AI 143659: am: CL 143472 Reduce dictionary size.
  Changed the tree structure to have variable length nodes to save an average of 21% on the dictionary size.
  Created a shortened English dictionary for Dream - 50K words.
  Added a shortened Spanish dictionary for Dream - 32K words.
  Original author: yamasani
  Merged from: //branches/cupcake/...

Automated import of CL 143659
diff --git a/dictionary/src/dictionary.cpp b/dictionary/src/dictionary.cpp
index 6aecb63..b37f4c9 100644
--- a/dictionary/src/dictionary.cpp
+++ b/dictionary/src/dictionary.cpp
@@ -85,10 +85,14 @@
 Dictionary::getAddress(int *pos)
 {
     int address = 0;
-    address += (mDict[*pos] & 0x7F) << 16;
-    address += (mDict[*pos + 1] & 0xFF) << 8;
-    address += (mDict[*pos + 2] & 0xFF);
-    *pos += 3;
+    if ((mDict[*pos] & FLAG_ADDRESS_MASK) == 0) {
+        *pos += 1;
+    } else {
+        address += (mDict[*pos] & (ADDRESS_MASK >> 16)) << 16;
+        address += (mDict[*pos + 1] & 0xFF) << 8;
+        address += (mDict[*pos + 2] & 0xFF);
+        *pos += 3;
+    }
     return address;
 }
 
@@ -193,7 +197,8 @@
         unsigned short lowerC = toLowerCase(c, depth);
         bool terminal = getTerminal(&pos);
         int childrenAddress = getAddress(&pos);
-        int freq = getFreq(&pos);
+        int freq = 1;
+        if (terminal) freq = getFreq(&pos);
         // If we are only doing completions, no need to look at the typed characters.
         if (completion) {
             mWord[depth] = c;
@@ -266,7 +271,9 @@
                 }
             }
         }
-        getFreq(&pos);
+        if (terminal) {
+            getFreq(&pos);
+        }
         // There could be two instances of each alphabet - upper and lower case. So continue
         // looking ...
     }
diff --git a/dictionary/src/dictionary.h b/dictionary/src/dictionary.h
index 8574e07..b13e977 100644
--- a/dictionary/src/dictionary.h
+++ b/dictionary/src/dictionary.h
@@ -19,35 +19,44 @@
 
 namespace latinime {
 
+// 22-bit address = ~4MB dictionary size limit, which on average would be about 200k-300k words
+#define ADDRESS_MASK 0x3FFFFF
+
+// The bit that decides if an address follows in the next 22 bits
+#define FLAG_ADDRESS_MASK 0x40
+// The bit that decides if this is a terminal node for a word. The node could still have children,
+// if the word has other endings.
+#define FLAG_TERMINAL_MASK 0x80
+
 class Dictionary {
 public:
     Dictionary(void *dict, int typedLetterMultipler, int fullWordMultiplier);
-    int getSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies, 
+    int getSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies,
         int maxWordLength, int maxWords, int maxAlternatives);
     bool isValidWord(unsigned short *word, int length);
     void setAsset(void *asset) { mAsset = asset; }
     void *getAsset() { return mAsset; }
     ~Dictionary();
-    
+
 private:
 
     int getAddress(int *pos);
-    bool getTerminal(int *pos) { return (mDict[*pos] & 0x80) > 0; }
+    bool getTerminal(int *pos) { return (mDict[*pos] & FLAG_TERMINAL_MASK) > 0; }
     int getFreq(int *pos) { return mDict[(*pos)++] & 0xFF; }
     int getCount(int *pos) { return mDict[(*pos)++] & 0xFF; }
     unsigned short getChar(int *pos);
     int wideStrLen(unsigned short *str);
-    
+
     bool sameAsTyped(unsigned short *word, int length);
     bool addWord(unsigned short *word, int length, int frequency);
     unsigned short toLowerCase(unsigned short c, int depth);
-    void getWordsRec(int pos, int depth, int maxDepth, bool completion, int frequency, 
+    void getWordsRec(int pos, int depth, int maxDepth, bool completion, int frequency,
             int inputIndex);
     bool isValidWordRec(int pos, unsigned short *word, int offset, int length);
 
     unsigned char *mDict;
     void *mAsset;
-    
+
     int *mFrequencies;
     int mMaxWords;
     int mMaxWordLength;
@@ -57,7 +66,7 @@
     int mInputLength;
     int mMaxAlternatives;
     unsigned short mWord[128];
-    
+
     int mFullWordMultiplier;
     int mTypedLetterMultiplier;
 };