AI 143659: am: CL 143472 Reduce dictionary size.
Changed the tree structure to have variable length nodes to save an average of 21% on the dictionary size.
Created a shortened English dictionary for Dream - 50K words.
Added a shortened Spanish dictionary for Dream - 32K words.
Original author: yamasani
Merged from: //branches/cupcake/...
Automated import of CL 143659
diff --git a/dictionary/src/dictionary.h b/dictionary/src/dictionary.h
index 8574e07..b13e977 100644
--- a/dictionary/src/dictionary.h
+++ b/dictionary/src/dictionary.h
@@ -19,35 +19,44 @@
namespace latinime {
+// 22-bit address = ~4MB dictionary size limit, which on average would be about 200k-300k words
+#define ADDRESS_MASK 0x3FFFFF
+
+// The bit that decides if an address follows in the next 22 bits
+#define FLAG_ADDRESS_MASK 0x40
+// The bit that decides if this is a terminal node for a word. The node could still have children,
+// if the word has other endings.
+#define FLAG_TERMINAL_MASK 0x80
+
class Dictionary {
public:
Dictionary(void *dict, int typedLetterMultipler, int fullWordMultiplier);
- int getSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies,
+ int getSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies,
int maxWordLength, int maxWords, int maxAlternatives);
bool isValidWord(unsigned short *word, int length);
void setAsset(void *asset) { mAsset = asset; }
void *getAsset() { return mAsset; }
~Dictionary();
-
+
private:
int getAddress(int *pos);
- bool getTerminal(int *pos) { return (mDict[*pos] & 0x80) > 0; }
+ bool getTerminal(int *pos) { return (mDict[*pos] & FLAG_TERMINAL_MASK) > 0; }
int getFreq(int *pos) { return mDict[(*pos)++] & 0xFF; }
int getCount(int *pos) { return mDict[(*pos)++] & 0xFF; }
unsigned short getChar(int *pos);
int wideStrLen(unsigned short *str);
-
+
bool sameAsTyped(unsigned short *word, int length);
bool addWord(unsigned short *word, int length, int frequency);
unsigned short toLowerCase(unsigned short c, int depth);
- void getWordsRec(int pos, int depth, int maxDepth, bool completion, int frequency,
+ void getWordsRec(int pos, int depth, int maxDepth, bool completion, int frequency,
int inputIndex);
bool isValidWordRec(int pos, unsigned short *word, int offset, int length);
unsigned char *mDict;
void *mAsset;
-
+
int *mFrequencies;
int mMaxWords;
int mMaxWordLength;
@@ -57,7 +66,7 @@
int mInputLength;
int mMaxAlternatives;
unsigned short mWord[128];
-
+
int mFullWordMultiplier;
int mTypedLetterMultiplier;
};