Merge "Clean up revertLastWord()"

commit: 59eeb1cf121950b2e673680a525ce019115ab5a0 [log] [tgz]
author: Ken Wakasa <kwakasa@google.com> Mon Aug 01 00:11:18 2011 -0700
committer: Android (Google) Code Review <android-gerrit@google.com> Mon Aug 01 00:11:18 2011 -0700
tree: da7a49f3f229c3bcd87dd009b4c67707a511c79b
parent: db2c0919cfd839d7036697b41e986fa897dc78df [diff]
parent: 9351550dc6af7859e5280e16144c9386a37b976d [diff]
diff --git a/native/Android.mk b/native/Android.mk
index bc246a9..04819e4 100644
--- a/native/Android.mk
+++ b/native/Android.mk

@@ -8,15 +8,13 @@
 # To suppress compiler warnings for unused variables/functions used for debug features etc.
 LOCAL_CFLAGS += -Wno-unused-parameter -Wno-unused-function
 
-# Use the new dictionary format
-LOCAL_CFLAGS += -DNEW_DICTIONARY_FORMAT
-
 LOCAL_SRC_FILES := \
     jni/com_android_inputmethod_keyboard_ProximityInfo.cpp \
     jni/com_android_inputmethod_latin_BinaryDictionary.cpp \
     jni/jni_common.cpp \
     src/bigram_dictionary.cpp \
     src/char_utils.cpp \
+    src/correction_state.cpp \
     src/dictionary.cpp \
     src/proximity_info.cpp \
     src/unigram_dictionary.cpp

diff --git a/native/src/correction_state.cpp b/native/src/correction_state.cpp
new file mode 100644
index 0000000..aa5efce
--- /dev/null
+++ b/native/src/correction_state.cpp

@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+
+#define LOG_TAG "LatinIME: correction_state.cpp"
+
+#include "correction_state.h"
+
+namespace latinime {
+
+CorrectionState::CorrectionState() {
+}
+
+void CorrectionState::setCorrectionParams(const ProximityInfo *pi, const int inputLength,
+        const int skipPos, const int excessivePos, const int transposedPos) {
+    mProximityInfo = pi;
+    mSkipPos = skipPos;
+    mExcessivePos = excessivePos;
+    mTransposedPos = transposedPos;
+}
+
+void CorrectionState::checkState() {
+    if (DEBUG_DICT) {
+        int inputCount = 0;
+        if (mSkipPos >= 0) ++inputCount;
+        if (mExcessivePos >= 0) ++inputCount;
+        if (mTransposedPos >= 0) ++inputCount;
+        // TODO: remove this assert
+        assert(inputCount <= 1);
+    }
+}
+
+CorrectionState::~CorrectionState() {
+}
+
+} // namespace latinime

diff --git a/native/src/correction_state.h b/native/src/correction_state.h
new file mode 100644
index 0000000..5b73925
--- /dev/null
+++ b/native/src/correction_state.h

@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_CORRECTION_STATE_H
+#define LATINIME_CORRECTION_STATE_H
+
+#include <stdint.h>
+
+#include "defines.h"
+
+namespace latinime {
+
+class ProximityInfo;
+
+class CorrectionState {
+public:
+    CorrectionState();
+    void setCorrectionParams(const ProximityInfo *pi, const int inputLength, const int skipPos,
+        const int excessivePos, const int transposedPos);
+    void checkState();
+    virtual ~CorrectionState();
+    int getSkipPos() const {
+        return mSkipPos;
+    }
+    int getExcessivePos() const {
+        return mExcessivePos;
+    }
+    int getTransposedPos() const {
+        return mTransposedPos;
+    }
+private:
+    const ProximityInfo *mProximityInfo;
+    int mInputLength;
+    int mSkipPos;
+    int mExcessivePos;
+    int mTransposedPos;
+};
+} // namespace latinime
+#endif // LATINIME_CORRECTION_INFO_H

diff --git a/native/src/proximity_info.cpp b/native/src/proximity_info.cpp
index c45393f..bed92cf 100644
--- a/native/src/proximity_info.cpp
+++ b/native/src/proximity_info.cpp

@@ -78,7 +78,7 @@
     return getProximityCharsAt(index)[0];
 }
 
-bool ProximityInfo::existsCharInProximityAt(const int index, const int c) const {
+inline bool ProximityInfo::existsCharInProximityAt(const int index, const int c) const {
     const int *chars = getProximityCharsAt(index);
     int i = 0;
     while (chars[i] > 0 && i < MAX_PROXIMITY_CHARS_SIZE) {
@@ -114,8 +114,10 @@
 // in their list. The non-accented version of the character should be considered
 // "close", but not the other keys close to the non-accented version.
 ProximityInfo::ProximityType ProximityInfo::getMatchedProximityId(
-        const int index, const unsigned short c, const int skipPos,
-        const int excessivePos, const int transposedPos) const {
+        const int index, const unsigned short c, CorrectionState *correctionState) const {
+    const int skipPos = correctionState->getSkipPos();
+    const int excessivePos = correctionState->getExcessivePos();
+    const int transposedPos = correctionState->getTransposedPos();
     const int *currentChars = getProximityCharsAt(index);
     const unsigned short baseLowerC = Dictionary::toBaseLowerCase(c);
 

diff --git a/native/src/proximity_info.h b/native/src/proximity_info.h
index 435a601..b28191d 100644
--- a/native/src/proximity_info.h
+++ b/native/src/proximity_info.h

@@ -23,6 +23,8 @@
 
 namespace latinime {
 
+class CorrectionState;
+
 class ProximityInfo {
 public:
     typedef enum {                             // Used as a return value for character comparison
@@ -42,8 +44,7 @@
     bool existsCharInProximityAt(const int index, const int c) const;
     bool existsAdjacentProximityChars(const int index) const;
     ProximityType getMatchedProximityId(
-            const int index, const unsigned short c, const int skipPos,
-            const int excessivePos, const int transposedPos) const;
+            const int index, const unsigned short c, CorrectionState *correctionState) const;
     bool sameAsTyped(const unsigned short *word, int length) const;
 private:
     int getStartIndexFromCoordinates(const int x, const int y) const;

diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp
index bccd37a..f0bb384 100644
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp

@@ -24,9 +24,7 @@
 #include "dictionary.h"
 #include "unigram_dictionary.h"
 
-#ifdef NEW_DICTIONARY_FORMAT
 #include "binary_format.h"
-#endif // NEW_DICTIONARY_FORMAT
 
 namespace latinime {
 
@@ -39,28 +37,23 @@
 UnigramDictionary::UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultiplier,
         int fullWordMultiplier, int maxWordLength, int maxWords, int maxProximityChars,
         const bool isLatestDictVersion)
-#ifndef NEW_DICTIONARY_FORMAT
-    : DICT_ROOT(streamStart),
-#else // NEW_DICTIONARY_FORMAT
     : DICT_ROOT(streamStart + NEW_DICTIONARY_HEADER_SIZE),
-#endif // NEW_DICTIONARY_FORMAT
     MAX_WORD_LENGTH(maxWordLength), MAX_WORDS(maxWords),
     MAX_PROXIMITY_CHARS(maxProximityChars), IS_LATEST_DICT_VERSION(isLatestDictVersion),
     TYPED_LETTER_MULTIPLIER(typedLetterMultiplier), FULL_WORD_MULTIPLIER(fullWordMultiplier),
-#ifndef NEW_DICTIONARY_FORMAT
-    ROOT_POS(isLatestDictVersion ? DICTIONARY_HEADER_SIZE : 0),
-#else // NEW_DICTIONARY_FORMAT
       // TODO : remove this variable.
     ROOT_POS(0),
-#endif // NEW_DICTIONARY_FORMAT
     BYTES_IN_ONE_CHAR(MAX_PROXIMITY_CHARS * sizeof(int)),
     MAX_UMLAUT_SEARCH_DEPTH(DEFAULT_MAX_UMLAUT_SEARCH_DEPTH) {
     if (DEBUG_DICT) {
         LOGI("UnigramDictionary - constructor");
     }
+    mCorrectionState = new CorrectionState();
 }
 
-UnigramDictionary::~UnigramDictionary() {}
+UnigramDictionary::~UnigramDictionary() {
+    delete mCorrectionState;
+}
 
 static inline unsigned int getCodesBufferSize(const int* codes, const int codesSize,
         const int MAX_PROXIMITY_CHARS) {
@@ -362,6 +355,8 @@
         assert(excessivePos < mInputLength);
         assert(missingPos < mInputLength);
     }
+    mCorrectionState->setCorrectionParams(mProximityInfo, mInputLength, skipPos, excessivePos,
+            transposedPos);
     int rootPosition = ROOT_POS;
     // Get the number of children of root, then increment the position
     int childCount = Dictionary::getCount(DICT_ROOT, &rootPosition);
@@ -389,8 +384,8 @@
             // depth will never be greater than maxDepth because in that case,
             // needsToTraverseChildrenNodes should be false
             const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, outputIndex,
-                    maxDepth, traverseAllNodes, matchWeight, inputIndex, diffs, skipPos,
-                    excessivePos, transposedPos, nextLetters, nextLettersSize, &childCount,
+                    maxDepth, traverseAllNodes, matchWeight, inputIndex, diffs,
+                    nextLetters, nextLettersSize, mCorrectionState, &childCount,
                     &firstChildPos, &traverseAllNodes, &matchWeight, &inputIndex, &diffs,
                     &siblingPos, &outputIndex);
             // Update next sibling pos
@@ -521,8 +516,12 @@
 }
 
 inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int depth,
-        const int matchWeight, const int skipPos, const int excessivePos, const int transposedPos,
-        const int freq, const bool sameLength) const {
+        const int matchWeight, const int freq, const bool sameLength,
+        CorrectionState *correctionState) const {
+    const int skipPos = correctionState->getSkipPos();
+    const int excessivePos = correctionState->getExcessivePos();
+    const int transposedPos = correctionState->getTransposedPos();
+
     // TODO: Demote by edit distance
     int finalFreq = freq * matchWeight;
     if (skipPos >= 0) {
@@ -587,16 +586,16 @@
 
 inline void UnigramDictionary::onTerminal(unsigned short int* word, const int depth,
         const uint8_t* const root, const uint8_t flags, const int pos,
-        const int inputIndex, const int matchWeight, const int skipPos,
-        const int excessivePos, const int transposedPos, const int freq, const bool sameLength,
-        int* nextLetters, const int nextLettersSize) {
+        const int inputIndex, const int matchWeight, const int freq, const bool sameLength,
+        int* nextLetters, const int nextLettersSize, CorrectionState *correctionState) {
+    const int skipPos = correctionState->getSkipPos();
 
     const bool isSameAsTyped = sameLength ? mProximityInfo->sameAsTyped(word, depth + 1) : false;
     if (isSameAsTyped) return;
 
     if (depth >= MIN_SUGGEST_DEPTH) {
-        const int finalFreq = calculateFinalFreq(inputIndex, depth, matchWeight, skipPos,
-                excessivePos, transposedPos, freq, sameLength);
+        const int finalFreq = calculateFinalFreq(inputIndex, depth, matchWeight,
+                freq, sameLength, correctionState);
         if (!isSameAsTyped)
             addWord(word, depth + 1, finalFreq);
     }
@@ -647,282 +646,6 @@
     return true;
 }
 
-#ifndef NEW_DICTIONARY_FORMAT
-// The following functions will be entirely replaced with new implementations.
-void UnigramDictionary::getWordsOld(const int initialPos, const int inputLength, const int skipPos,
-        const int excessivePos, const int transposedPos,int *nextLetters,
-        const int nextLettersSize) {
-    int initialPosition = initialPos;
-    const int count = Dictionary::getCount(DICT_ROOT, &initialPosition);
-    getWordsRec(count, initialPosition, 0,
-            min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH),
-            mInputLength <= 0, 1, 0, 0, skipPos, excessivePos, transposedPos, nextLetters,
-            nextLettersSize);
-}
-
-void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, const int depth,
-        const int maxDepth, const bool traverseAllNodes, const int matchWeight,
-        const int inputIndex, const int diffs, const int skipPos, const int excessivePos,
-        const int transposedPos, int *nextLetters, const int nextLettersSize) {
-    int siblingPos = pos;
-    for (int i = 0; i < childrenCount; ++i) {
-        int newCount;
-        int newChildPosition;
-        bool newTraverseAllNodes;
-        int newMatchRate;
-        int newInputIndex;
-        int newDiffs;
-        int newSiblingPos;
-        int newOutputIndex;
-        const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, depth, maxDepth,
-                traverseAllNodes, matchWeight, inputIndex, diffs,
-                skipPos, excessivePos, transposedPos,
-                nextLetters, nextLettersSize,
-                &newCount, &newChildPosition, &newTraverseAllNodes, &newMatchRate,
-                &newInputIndex, &newDiffs, &newSiblingPos, &newOutputIndex);
-        siblingPos = newSiblingPos;
-
-        if (needsToTraverseChildrenNodes) {
-            getWordsRec(newCount, newChildPosition, newOutputIndex, maxDepth, newTraverseAllNodes,
-                    newMatchRate, newInputIndex, newDiffs, skipPos, excessivePos, transposedPos,
-                    nextLetters, nextLettersSize);
-        }
-    }
-}
-
-inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex,
-        const int inputLength, unsigned short *word) {
-    int pos = ROOT_POS;
-    int count = Dictionary::getCount(DICT_ROOT, &pos);
-    int maxFreq = 0;
-    int depth = 0;
-    unsigned short newWord[MAX_WORD_LENGTH_INTERNAL];
-    bool terminal = false;
-
-    mStackChildCount[0] = count;
-    mStackSiblingPos[0] = pos;
-
-    while (depth >= 0) {
-        if (mStackChildCount[depth] > 0) {
-            --mStackChildCount[depth];
-            int firstChildPos;
-            int newFreq;
-            int siblingPos = mStackSiblingPos[depth];
-            const bool needsToTraverseChildrenNodes = processCurrentNodeForExactMatch(siblingPos,
-                    startInputIndex, depth, newWord, &firstChildPos, &count, &terminal, &newFreq,
-                    &siblingPos);
-            mStackSiblingPos[depth] = siblingPos;
-            if (depth == (inputLength - 1)) {
-                // Traverse sibling node
-                if (terminal) {
-                    if (newFreq > maxFreq) {
-                        for (int i = 0; i < inputLength; ++i) word[i] = newWord[i];
-                        if (DEBUG_DICT && DEBUG_NODE) {
-#ifdef FLAG_DBG
-                            char s[inputLength + 1];
-                            for (int i = 0; i < inputLength; ++i) s[i] = word[i];
-                            s[inputLength] = 0;
-                            LOGI("New missing space word found: %d > %d (%s), %d, %d",
-                                    newFreq, maxFreq, s, inputLength, depth);
-#endif
-                        }
-                        maxFreq = newFreq;
-                    }
-                }
-            } else if (needsToTraverseChildrenNodes) {
-                // Traverse children nodes
-                ++depth;
-                mStackChildCount[depth] = count;
-                mStackSiblingPos[depth] = firstChildPos;
-            }
-        } else {
-            // Traverse parent node
-            --depth;
-        }
-    }
-
-    word[inputLength] = 0;
-    return maxFreq;
-}
-
-inline bool UnigramDictionary::processCurrentNodeForExactMatch(const int firstChildPos,
-        const int startInputIndex, const int depth, unsigned short *word, int *newChildPosition,
-        int *newCount, bool *newTerminal, int *newFreq, int *siblingPos) {
-    const int inputIndex = startInputIndex + depth;
-    unsigned short c;
-    *siblingPos = Dictionary::setDictionaryValues(DICT_ROOT, IS_LATEST_DICT_VERSION, firstChildPos,
-            &c, newChildPosition, newTerminal, newFreq);
-    const unsigned int inputC = mProximityInfo->getPrimaryCharAt(inputIndex);
-    if (DEBUG_DICT) {
-        assert(inputC <= U_SHORT_MAX);
-    }
-    const unsigned short baseLowerC = Dictionary::toBaseLowerCase(c);
-    const bool matched = (inputC == baseLowerC || inputC == c);
-    const bool hasChild = *newChildPosition != 0;
-    if (matched) {
-        word[depth] = c;
-        if (DEBUG_DICT && DEBUG_NODE) {
-            LOGI("Node(%c, %c)<%d>, %d, %d", inputC, c, matched, hasChild, *newFreq);
-            if (*newTerminal) {
-                LOGI("Terminal %d", *newFreq);
-            }
-        }
-        if (hasChild) {
-            *newCount = Dictionary::getCount(DICT_ROOT, newChildPosition);
-            return true;
-        } else {
-            return false;
-        }
-    } else {
-        // If this node is not user typed character, this method treats this word as unmatched.
-        // Thus newTerminal shouldn't be true.
-        *newTerminal = false;
-        return false;
-    }
-}
-
-// TODO: use uint32_t instead of unsigned short
-bool UnigramDictionary::isValidWord(unsigned short *word, int length) {
-    if (IS_LATEST_DICT_VERSION) {
-        return (getBigramPosition(DICTIONARY_HEADER_SIZE, word, 0, length) != NOT_VALID_WORD);
-    } else {
-        return (getBigramPosition(0, word, 0, length) != NOT_VALID_WORD);
-    }
-}
-
-
-// Require strict exact match.
-int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offset,
-        int length) const {
-    // returns address of bigram data of that word
-    // return -99 if not found
-
-    int count = Dictionary::getCount(DICT_ROOT, &pos);
-    unsigned short currentChar = (unsigned short) word[offset];
-    for (int j = 0; j < count; j++) {
-        unsigned short c = Dictionary::getChar(DICT_ROOT, &pos);
-        int terminal = Dictionary::getTerminal(DICT_ROOT, &pos);
-        int childPos = Dictionary::getAddress(DICT_ROOT, &pos);
-        if (c == currentChar) {
-            if (offset == length - 1) {
-                if (terminal) {
-                    return (pos+1);
-                }
-            } else {
-                if (childPos != 0) {
-                    int t = getBigramPosition(childPos, word, offset + 1, length);
-                    if (t > 0) {
-                        return t;
-                    }
-                }
-            }
-        }
-        if (terminal) {
-            Dictionary::getFreq(DICT_ROOT, IS_LATEST_DICT_VERSION, &pos);
-        }
-        // There could be two instances of each alphabet - upper and lower case. So continue
-        // looking ...
-    }
-    return NOT_VALID_WORD;
-}
-
-// The following functions will be modified.
-inline bool UnigramDictionary::processCurrentNode(const int initialPos, const int initialDepth,
-        const int maxDepth, const bool initialTraverseAllNodes, int matchWeight, int inputIndex,
-        const int initialDiffs, const int skipPos, const int excessivePos, const int transposedPos,
-        int *nextLetters, const int nextLettersSize, int *newCount, int *newChildPosition,
-        bool *newTraverseAllNodes, int *newMatchRate, int *newInputIndex, int *newDiffs,
-        int *nextSiblingPosition, int *nextOutputIndex) {
-    if (DEBUG_DICT) {
-        int inputCount = 0;
-        if (skipPos >= 0) ++inputCount;
-        if (excessivePos >= 0) ++inputCount;
-        if (transposedPos >= 0) ++inputCount;
-        assert(inputCount <= 1);
-    }
-    unsigned short c;
-    int childPosition;
-    bool terminal;
-    int freq;
-    bool isSameAsUserTypedLength = false;
-
-    const int pos = initialPos;
-    const int depth = initialDepth;
-    const int traverseAllNodes = initialTraverseAllNodes;
-    const int diffs = initialDiffs;
-
-    const uint8_t flags = 0; // No flags for now
-
-    if (excessivePos == depth && inputIndex < mInputLength - 1) ++inputIndex;
-
-    *nextSiblingPosition = Dictionary::setDictionaryValues(DICT_ROOT, IS_LATEST_DICT_VERSION, pos,
-            &c, &childPosition, &terminal, &freq);
-    *nextOutputIndex = depth + 1;
-
-    const bool needsToTraverseChildrenNodes = childPosition != 0;
-
-    // If we are only doing traverseAllNodes, no need to look at the typed characters.
-    if (traverseAllNodes || needsToSkipCurrentNode(c, inputIndex, skipPos, depth)) {
-        mWord[depth] = c;
-        if (traverseAllNodes && terminal) {
-            onTerminal(mWord, depth, DICT_ROOT, flags, pos, inputIndex, matchWeight, skipPos,
-                       excessivePos, transposedPos, freq, false, nextLetters, nextLettersSize);
-        }
-        if (!needsToTraverseChildrenNodes) return false;
-        *newTraverseAllNodes = traverseAllNodes;
-        *newMatchRate = matchWeight;
-        *newDiffs = diffs;
-        *newInputIndex = inputIndex;
-    } else {
-        int inputIndexForProximity = inputIndex;
-
-        if (transposedPos >= 0) {
-            if (inputIndex == transposedPos) ++inputIndexForProximity;
-            if (inputIndex == (transposedPos + 1)) --inputIndexForProximity;
-        }
-
-        ProximityInfo::ProximityType matchedProximityCharId = mProximityInfo->getMatchedProximityId(
-                inputIndexForProximity, c, skipPos, excessivePos, transposedPos);
-        if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId) return false;
-        mWord[depth] = c;
-        // If inputIndex is greater than mInputLength, that means there is no
-        // proximity chars. So, we don't need to check proximity.
-        if (ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) {
-            multiplyIntCapped(TYPED_LETTER_MULTIPLIER, &matchWeight);
-        }
-        bool isSameAsUserTypedLength = mInputLength == inputIndex + 1
-                || (excessivePos == mInputLength - 1 && inputIndex == mInputLength - 2);
-        if (isSameAsUserTypedLength && terminal) {
-            onTerminal(mWord, depth, DICT_ROOT, flags, pos, inputIndex, matchWeight, skipPos,
-                    excessivePos, transposedPos, freq, true, nextLetters, nextLettersSize);
-        }
-        if (!needsToTraverseChildrenNodes) return false;
-        // Start traversing all nodes after the index exceeds the user typed length
-        *newTraverseAllNodes = isSameAsUserTypedLength;
-        *newMatchRate = matchWeight;
-        *newDiffs = diffs
-                + ((ProximityInfo::NEAR_PROXIMITY_CHAR == matchedProximityCharId) ? 1 : 0);
-        *newInputIndex = inputIndex + 1;
-    }
-    // Optimization: Prune out words that are too long compared to how much was typed.
-    if (depth >= maxDepth || *newDiffs > mMaxEditDistance) {
-        return false;
-    }
-
-    // If inputIndex is greater than mInputLength, that means there are no proximity chars.
-    // TODO: Check if this can be isSameAsUserTypedLength only.
-    if (isSameAsUserTypedLength || mInputLength <= *newInputIndex) {
-        *newTraverseAllNodes = true;
-    }
-    // get the count of nodes and increment childAddress.
-    *newCount = Dictionary::getCount(DICT_ROOT, &childPosition);
-    *newChildPosition = childPosition;
-    if (DEBUG_DICT) assert(needsToTraverseChildrenNodes);
-    return needsToTraverseChildrenNodes;
-}
-
-#else // NEW_DICTIONARY_FORMAT
-
 // Wrapper for getMostFrequentWordLikeInner, which matches it to the previous
 // interface.
 inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex,
@@ -1081,16 +804,15 @@
 // given level, as output into newCount when traversing this level's parent.
 inline bool UnigramDictionary::processCurrentNode(const int initialPos, const int initialDepth,
         const int maxDepth, const bool initialTraverseAllNodes, int matchWeight, int inputIndex,
-        const int initialDiffs, const int skipPos, const int excessivePos, const int transposedPos,
-        int *nextLetters, const int nextLettersSize, int *newCount, int *newChildrenPosition,
+        const int initialDiffs, int *nextLetters, const int nextLettersSize,
+        CorrectionState *correctionState, int *newCount, int *newChildrenPosition,
         bool *newTraverseAllNodes, int *newMatchRate, int *newInputIndex, int *newDiffs,
         int *nextSiblingPosition, int *newOutputIndex) {
+    const int skipPos = correctionState->getSkipPos();
+    const int excessivePos = correctionState->getExcessivePos();
+    const int transposedPos = correctionState->getTransposedPos();
     if (DEBUG_DICT) {
-        int inputCount = 0;
-        if (skipPos >= 0) ++inputCount;
-        if (excessivePos >= 0) ++inputCount;
-        if (transposedPos >= 0) ++inputCount;
-        assert(inputCount <= 1);
+        correctionState->checkState();
     }
     int pos = initialPos;
     int depth = initialDepth;
@@ -1146,8 +868,8 @@
                 // The frequency should be here, because we come here only if this is actually
                 // a terminal node, and we are on its last char.
                 const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos);
-                onTerminal(mWord, depth, DICT_ROOT, flags, pos, inputIndex, matchWeight, skipPos,
-                           excessivePos, transposedPos, freq, false, nextLetters, nextLettersSize);
+                onTerminal(mWord, depth, DICT_ROOT, flags, pos, inputIndex, matchWeight,
+                        freq, false, nextLetters, nextLettersSize, mCorrectionState);
             }
             if (!hasChildren) {
                 // If we don't have children here, that means we finished processing all
@@ -1170,7 +892,7 @@
             }
 
             int matchedProximityCharId = mProximityInfo->getMatchedProximityId(
-                    inputIndexForProximity, c, skipPos, excessivePos, transposedPos);
+                    inputIndexForProximity, c, mCorrectionState);
             if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId) {
                 // We found that this is an unrelated character, so we should give up traversing
                 // this node and its children entirely.
@@ -1197,8 +919,8 @@
                     || (excessivePos == mInputLength - 1 && inputIndex == mInputLength - 2);
             if (isSameAsUserTypedLength && isTerminal) {
                 const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos);
-                onTerminal(mWord, depth, DICT_ROOT, flags, pos, inputIndex, matchWeight, skipPos,
-                        excessivePos, transposedPos, freq, true, nextLetters, nextLettersSize);
+                onTerminal(mWord, depth, DICT_ROOT, flags, pos, inputIndex, matchWeight,
+                        freq, true, nextLetters, nextLettersSize, mCorrectionState);
             }
             // This character matched the typed character (enough to traverse the node at least)
             // so we just evaluated it. Now we should evaluate this virtual node's children - that
@@ -1276,6 +998,4 @@
     return true;
 }
 
-#endif // NEW_DICTIONARY_FORMAT
-
 } // namespace latinime

diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h
index 97198ef..41e3818 100644
--- a/native/src/unigram_dictionary.h
+++ b/native/src/unigram_dictionary.h

@@ -18,6 +18,7 @@
 #define LATINIME_UNIGRAM_DICTIONARY_H
 
 #include <stdint.h>
+#include "correction_state.h"
 #include "defines.h"
 #include "proximity_info.h"
 
@@ -30,7 +31,6 @@
 class UnigramDictionary {
 
 public:
-#ifdef NEW_DICTIONARY_FORMAT
 
     // Mask and flags for children address type selection.
     static const int MASK_GROUP_ADDRESS_TYPE = 0xC0;
@@ -62,21 +62,16 @@
     static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10;
     static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20;
     static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30;
-#endif // NEW_DICTIONARY_FORMAT
 
     UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultipler,
             int fullWordMultiplier, int maxWordLength, int maxWords, int maxProximityChars,
             const bool isLatestDictVersion);
-#ifndef NEW_DICTIONARY_FORMAT
-    bool isValidWord(unsigned short *word, int length);
-#else // NEW_DICTIONARY_FORMAT
     bool isValidWord(const uint16_t* const inWord, const int length) const;
-#endif // NEW_DICTIONARY_FORMAT
     int getBigramPosition(int pos, unsigned short *word, int offset, int length) const;
     int getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
             const int *ycoordinates, const int *codes, const int codesSize, const int flags,
             unsigned short *outWords, int *frequencies);
-    ~UnigramDictionary();
+    virtual ~UnigramDictionary();
 
 private:
     void getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
@@ -99,42 +94,25 @@
             const int secondWordStartPos, const int secondWordLength, const bool isSpaceProximity);
     bool getMissingSpaceWords(const int inputLength, const int missingSpacePos);
     bool getMistypedSpaceWords(const int inputLength, const int spaceProximityPos);
-    int calculateFinalFreq(const int inputIndex, const int depth, const int snr, const int skipPos,
-            const int excessivePos, const int transposedPos, const int freq,
-            const bool sameLength) const;
+    int calculateFinalFreq(const int inputIndex, const int depth, const int snr,
+            const int freq, const bool sameLength, CorrectionState *correctionState) const;
     void onTerminal(unsigned short int* word, const int depth,
             const uint8_t* const root, const uint8_t flags, const int pos,
-            const int inputIndex, const int matchWeight, const int skipPos,
-            const int excessivePos, const int transposedPos, const int freq, const bool sameLength,
-            int *nextLetters, const int nextLettersSize);
+            const int inputIndex, const int matchWeight, const int freq, const bool sameLength,
+            int* nextLetters, const int nextLettersSize, CorrectionState *correctionState);
     bool needsToSkipCurrentNode(const unsigned short c,
             const int inputIndex, const int skipPos, const int depth);
     // Process a node by considering proximity, missing and excessive character
     bool processCurrentNode(const int initialPos, const int initialDepth,
-            const int maxDepth, const bool initialTraverseAllNodes, const int snr, int inputIndex,
-            const int initialDiffs, const int skipPos, const int excessivePos,
-            const int transposedPos, int *nextLetters, const int nextLettersSize, int *newCount,
-            int *newChildPosition, bool *newTraverseAllNodes, int *newSnr, int*newInputIndex,
-            int *newDiffs, int *nextSiblingPosition, int *nextOutputIndex);
+            const int maxDepth, const bool initialTraverseAllNodes, int matchWeight, int inputIndex,
+            const int initialDiffs, int *nextLetters, const int nextLettersSize,
+            CorrectionState *correctionState, int *newCount, int *newChildPosition,
+            bool *newTraverseAllNodes, int *newMatchRate, int *newInputIndex, int *newDiffs,
+            int *nextSiblingPosition, int *nextOutputIndex);
     int getMostFrequentWordLike(const int startInputIndex, const int inputLength,
             unsigned short *word);
-#ifndef NEW_DICTIONARY_FORMAT
-    void getWordsRec(const int childrenCount, const int pos, const int depth, const int maxDepth,
-            const bool traverseAllNodes, const int snr, const int inputIndex, const int diffs,
-            const int skipPos, const int excessivePos, const int transposedPos, int *nextLetters,
-            const int nextLettersSize);
-    // Keep getWordsOld for comparing performance between getWords and getWordsOld
-    void getWordsOld(const int initialPos, const int inputLength, const int skipPos,
-            const int excessivePos, const int transposedPos, int *nextLetters,
-            const int nextLettersSize);
-    // Process a node by considering missing space
-    bool processCurrentNodeForExactMatch(const int firstChildPos,
-            const int startInputIndex, const int depth, unsigned short *word,
-            int *newChildPosition, int *newCount, bool *newTerminal, int *newFreq, int *siblingPos);
-#else // NEW_DICTIONARY_FORMAT
     int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length,
             short unsigned int* outWord);
-#endif // NEW_DICTIONARY_FORMAT
 
     const uint8_t* const DICT_ROOT;
     const int MAX_WORD_LENGTH;
@@ -158,7 +136,8 @@
 
     int *mFrequencies;
     unsigned short *mOutputChars;
-    const ProximityInfo *mProximityInfo;
+    ProximityInfo *mProximityInfo;
+    CorrectionState *mCorrectionState;
     int mInputLength;
     // MAX_WORD_LENGTH_INTERNAL must be bigger than MAX_WORD_LENGTH
     unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
commit	59eeb1cf121950b2e673680a525ce019115ab5a0	[log] [tgz]
author	Ken Wakasa <kwakasa@google.com>	Mon Aug 01 00:11:18 2011 -0700
committer	Android (Google) Code Review <android-gerrit@google.com>	Mon Aug 01 00:11:18 2011 -0700
tree	da7a49f3f229c3bcd87dd009b4c67707a511c79b
parent	db2c0919cfd839d7036697b41e986fa897dc78df [diff]
parent	9351550dc6af7859e5280e16144c9386a37b976d [diff]