New dict format, step 2

Move some methods around and make static some methods

Bug: 4392433
Change-Id: I2bbe98aec118a416d21d1e293638e1d324505b9b
diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp
index cb8f507..91e3c81 100644
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@@ -265,8 +265,7 @@
     mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2;
 }
 
-void UnigramDictionary::registerNextLetter(
-        unsigned short c, int *nextLetters, int nextLettersSize) {
+static inline void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize) {
     if (c < nextLettersSize) {
         nextLetters[c]++;
     }
@@ -322,7 +321,7 @@
     return false;
 }
 
-unsigned short UnigramDictionary::toBaseLowerCase(unsigned short c) {
+static inline unsigned short toBaseLowerCase(unsigned short c) {
     if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) {
         c = BASE_CHARS[c];
     }
@@ -924,4 +923,47 @@
         return false;
     }
 }
+
+// TODO: use uint32_t instead of unsigned short
+bool UnigramDictionary::isValidWord(unsigned short *word, int length) {
+    if (IS_LATEST_DICT_VERSION) {
+        return (isValidWordRec(DICTIONARY_HEADER_SIZE, word, 0, length) != NOT_VALID_WORD);
+    } else {
+        return (isValidWordRec(0, word, 0, length) != NOT_VALID_WORD);
+    }
+}
+
+int UnigramDictionary::isValidWordRec(int pos, unsigned short *word, int offset, int length) {
+    // returns address of bigram data of that word
+    // return -99 if not found
+
+    int count = Dictionary::getCount(DICT_ROOT, &pos);
+    unsigned short currentChar = (unsigned short) word[offset];
+    for (int j = 0; j < count; j++) {
+        unsigned short c = Dictionary::getChar(DICT_ROOT, &pos);
+        int terminal = Dictionary::getTerminal(DICT_ROOT, &pos);
+        int childPos = Dictionary::getAddress(DICT_ROOT, &pos);
+        if (c == currentChar) {
+            if (offset == length - 1) {
+                if (terminal) {
+                    return (pos+1);
+                }
+            } else {
+                if (childPos != 0) {
+                    int t = isValidWordRec(childPos, word, offset + 1, length);
+                    if (t > 0) {
+                        return t;
+                    }
+                }
+            }
+        }
+        if (terminal) {
+            Dictionary::getFreq(DICT_ROOT, IS_LATEST_DICT_VERSION, &pos);
+        }
+        // There could be two instances of each alphabet - upper and lower case. So continue
+        // looking ...
+    }
+    return NOT_VALID_WORD;
+}
+
 } // namespace latinime