Add a functionality to limit the max correction errors

Before
==== test finished, terminate logcat =====
(0)  121.97 (0.28%)
(1)  42032.07 (95.46%)
(2)  11.03 (0.03%)
(3)  12.19 (0.03%)
(4)  10.02 (0.02%)
(5)  1417.41 (3.22%)
(6)  258.43 (0.59%)
(20) 50.20 (0.11%)
Total 44033.07 (sum of others 43913.32)

After
==== test finished, terminate logcat =====
(0)  110.81 (0.29%)
(1)  36416.11 (94.47%)
(2)  10.06 (0.03%)
(3)  9.45 (0.02%)
(4)  9.83 (0.03%)
(5)  1535.52 (3.98%)
(6)  290.25 (0.75%)
(20) 40.57 (0.11%)
Total 38546.83 (sum of others 38422.60)

Change-Id: Iffd24ce0b2dc422c8c6085d5be5f6bfdaf59ca7d
diff --git a/native/src/correction.cpp b/native/src/correction.cpp
index 364913f..2da82dc 100644
--- a/native/src/correction.cpp
+++ b/native/src/correction.cpp
@@ -32,48 +32,6 @@
 // edit distance funcitons //
 /////////////////////////////
 
-#if 0 /* no longer used */
-inline static int editDistance(
-        int* editDistanceTable, const unsigned short* input,
-        const int inputLength, const unsigned short* output, const int outputLength) {
-    // dp[li][lo] dp[a][b] = dp[ a * lo + b]
-    int* dp = editDistanceTable;
-    const int li = inputLength + 1;
-    const int lo = outputLength + 1;
-    for (int i = 0; i < li; ++i) {
-        dp[lo * i] = i;
-    }
-    for (int i = 0; i < lo; ++i) {
-        dp[i] = i;
-    }
-
-    for (int i = 0; i < li - 1; ++i) {
-        for (int j = 0; j < lo - 1; ++j) {
-            const uint32_t ci = toBaseLowerCase(input[i]);
-            const uint32_t co = toBaseLowerCase(output[j]);
-            const uint16_t cost = (ci == co) ? 0 : 1;
-            dp[(i + 1) * lo + (j + 1)] = min(dp[i * lo + (j + 1)] + 1,
-                    min(dp[(i + 1) * lo + j] + 1, dp[i * lo + j] + cost));
-            if (i > 0 && j > 0 && ci == toBaseLowerCase(output[j - 1])
-                    && co == toBaseLowerCase(input[i - 1])) {
-                dp[(i + 1) * lo + (j + 1)] = min(
-                        dp[(i + 1) * lo + (j + 1)], dp[(i - 1) * lo + (j - 1)] + cost);
-            }
-        }
-    }
-
-    if (DEBUG_EDIT_DISTANCE) {
-        LOGI("IN = %d, OUT = %d", inputLength, outputLength);
-        for (int i = 0; i < li; ++i) {
-            for (int j = 0; j < lo; ++j) {
-                LOGI("EDIT[%d][%d], %d", i, j, dp[i * lo + j]);
-            }
-        }
-    }
-    return dp[li * lo - 1];
-}
-#endif
-
 inline static void initEditDistance(int *editDistanceTable) {
     for (int i = 0; i <= MAX_WORD_LENGTH_INTERNAL; ++i) {
         editDistanceTable[i] = i;
@@ -145,7 +103,7 @@
 
 void Correction::setCorrectionParams(const int skipPos, const int excessivePos,
         const int transposedPos, const int spaceProximityPos, const int missingSpacePos,
-        const bool useFullEditDistance, const bool doAutoCompletion) {
+        const bool useFullEditDistance, const bool doAutoCompletion, const int maxErrors) {
     // TODO: remove
     mTransposedPos = transposedPos;
     mExcessivePos = excessivePos;
@@ -159,6 +117,7 @@
     mMissingSpacePos = missingSpacePos;
     mUseFullEditDistance = useFullEditDistance;
     mDoAutoCompletion = doAutoCompletion;
+    mMaxErrors = maxErrors;
 }
 
 void Correction::checkState() {
@@ -314,12 +273,17 @@
 Correction::CorrectionType Correction::processCharAndCalcState(
         const int32_t c, const bool isTerminal) {
     const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount);
+    if (correctionCount > mMaxErrors) {
+        return UNRELATED;
+    }
+
     // TODO: Change the limit if we'll allow two or more corrections
     const bool noCorrectionsHappenedSoFar = correctionCount == 0;
     const bool canTryCorrection = noCorrectionsHappenedSoFar;
     int proximityIndex = 0;
     mDistances[mOutputIndex] = NOT_A_DISTANCE;
 
+    // Skip checking this node
     if (mNeedsToTraverseAllNodes || isQuote(c)) {
         bool incremented = false;
         if (mLastCharExceeded && mInputIndex == mInputLength - 1) {
@@ -344,6 +308,7 @@
         return processSkipChar(c, isTerminal, incremented);
     }
 
+    // Check possible corrections.
     if (mExcessivePos >= 0) {
         if (mExcessiveCount == 0 && mExcessivePos < mOutputIndex) {
             mExcessivePos = mOutputIndex;
@@ -394,7 +359,12 @@
     }
 
     // TODO: Change the limit if we'll allow two or more proximity chars with corrections
-    const bool checkProximityChars = noCorrectionsHappenedSoFar ||  mProximityCount == 0;
+    // Work around: When the mMaxErrors is 1, we only allow just one error
+    // including proximity correction.
+    const bool checkProximityChars = (mMaxErrors > 1)
+            ? (noCorrectionsHappenedSoFar || mProximityCount == 0)
+            : (noCorrectionsHappenedSoFar && mProximityCount == 0);
+
     ProximityInfo::ProximityType matchedProximityCharId = secondTransposing
             ? ProximityInfo::EQUIVALENT_CHAR
             : mProximityInfo->getMatchedProximityId(
@@ -934,4 +904,46 @@
     return totalFreq;
 }
 
+#if 0 /* no longer used. keep just for reference */
+inline static int editDistance(
+        int* editDistanceTable, const unsigned short* input,
+        const int inputLength, const unsigned short* output, const int outputLength) {
+    // dp[li][lo] dp[a][b] = dp[ a * lo + b]
+    int* dp = editDistanceTable;
+    const int li = inputLength + 1;
+    const int lo = outputLength + 1;
+    for (int i = 0; i < li; ++i) {
+        dp[lo * i] = i;
+    }
+    for (int i = 0; i < lo; ++i) {
+        dp[i] = i;
+    }
+
+    for (int i = 0; i < li - 1; ++i) {
+        for (int j = 0; j < lo - 1; ++j) {
+            const uint32_t ci = toBaseLowerCase(input[i]);
+            const uint32_t co = toBaseLowerCase(output[j]);
+            const uint16_t cost = (ci == co) ? 0 : 1;
+            dp[(i + 1) * lo + (j + 1)] = min(dp[i * lo + (j + 1)] + 1,
+                    min(dp[(i + 1) * lo + j] + 1, dp[i * lo + j] + cost));
+            if (i > 0 && j > 0 && ci == toBaseLowerCase(output[j - 1])
+                    && co == toBaseLowerCase(input[i - 1])) {
+                dp[(i + 1) * lo + (j + 1)] = min(
+                        dp[(i + 1) * lo + (j + 1)], dp[(i - 1) * lo + (j - 1)] + cost);
+            }
+        }
+    }
+
+    if (DEBUG_EDIT_DISTANCE) {
+        LOGI("IN = %d, OUT = %d", inputLength, outputLength);
+        for (int i = 0; i < li; ++i) {
+            for (int j = 0; j < lo; ++j) {
+                LOGI("EDIT[%d][%d], %d", i, j, dp[i * lo + j]);
+            }
+        }
+    }
+    return dp[li * lo - 1];
+}
+#endif
+
 } // namespace latinime
diff --git a/native/src/correction.h b/native/src/correction.h
index 4a8d1fa..e55be8d 100644
--- a/native/src/correction.h
+++ b/native/src/correction.h
@@ -45,7 +45,7 @@
     // TODO: remove
     void setCorrectionParams(const int skipPos, const int excessivePos, const int transposedPos,
             const int spaceProximityPos, const int missingSpacePos, const bool useFullEditDistance,
-            const bool doAutoCompletion);
+            const bool doAutoCompletion, const int maxErrors);
     void checkState();
     bool initProcessState(const int index);
 
@@ -118,6 +118,7 @@
     int mMissingSpacePos;
     int mTerminalInputIndex;
     int mTerminalOutputIndex;
+    int mMaxErrors;
 
     // The following arrays are state buffer.
     unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp
index 7cf1919..ac9f53e 100644
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@@ -261,7 +261,7 @@
     // TODO: Remove setCorrectionParams
     correction->setCorrectionParams(0, 0, 0,
             -1 /* spaceProximityPos */, -1 /* missingSpacePos */, useFullEditDistance,
-            true /* doAutoCompletion */);
+            true /* doAutoCompletion */, DEFAULT_MAX_ERRORS);
     int rootPosition = ROOT_POS;
     // Get the number of children of root, then increment the position
     int childCount = Dictionary::getCount(DICT_ROOT, &rootPosition);
@@ -296,7 +296,7 @@
         Correction *correction, const bool useFullEditDistance, WordsPriorityQueue *queue) {
     correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
             -1 /* transposedPos */, -1 /* spaceProximityPos */, missingSpacePos,
-            useFullEditDistance, true /* doAutoCompletion */);
+            useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS);
     getSplitTwoWordsSuggestion(inputLength, proximityInfo, correction, queue);
 }
 
@@ -305,7 +305,7 @@
         Correction *correction, const bool useFullEditDistance, WordsPriorityQueue *queue) {
     correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
             -1 /* transposedPos */, spaceProximityPos, -1 /* missingSpacePos */,
-            useFullEditDistance, true /* doAutoCompletion */);
+            useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS);
     getSplitTwoWordsSuggestion(inputLength, proximityInfo, correction, queue);
 }
 
diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h
index 0b01265..f5cb438 100644
--- a/native/src/unigram_dictionary.h
+++ b/native/src/unigram_dictionary.h
@@ -61,6 +61,10 @@
     static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20;
     static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30;
 
+    // Error tolerances
+    static const int DEFAULT_MAX_ERRORS = 2;
+    static const int MAX_ERRORS_FOR_TWO_WORDS = 1;
+
     UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultipler,
             int fullWordMultiplier, int maxWordLength, int maxWords, int maxProximityChars,
             const bool isLatestDictVersion);