Merge "Check adjacent proximity chars for insertion for typing"
diff --git a/native/jni/Android.mk b/native/jni/Android.mk
index fb60139..d5df6b6 100644
--- a/native/jni/Android.mk
+++ b/native/jni/Android.mk
@@ -53,10 +53,10 @@
         dic_nodes_cache.cpp) \
     $(addprefix suggest/core/dictionary/, \
         bigram_dictionary.cpp \
-        binary_dictionary_bigrams_reading_utils.cpp \
         binary_dictionary_format_utils.cpp \
         binary_dictionary_header.cpp \
         binary_dictionary_header_reading_utils.cpp \
+        binary_dictionary_terminal_attributes_reading_utils.cpp \
         bloom_filter.cpp \
         byte_array_utils.cpp \
         dictionary.cpp \
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h
index 0856840..f2b48e9 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h
@@ -18,8 +18,8 @@
 #define LATINIME_BINARY_DICTIONARY_BIGRAMS_ITERATOR_H
 
 #include "defines.h"
-#include "suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.h"
 #include "suggest/core/dictionary/binary_dictionary_info.h"
+#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h"
 
 namespace latinime {
 
@@ -35,15 +35,17 @@
     }
 
     AK_FORCE_INLINE void next() {
-        mBigramFlags = BinaryDictionaryBigramsReadingUtils::getFlagsAndForwardPointer(
+        mBigramFlags = BinaryDictionaryTerminalAttributesReadingUtils::getFlagsAndForwardPointer(
                 mBinaryDictionaryInfo, &mPos);
-        mBigramPos = BinaryDictionaryBigramsReadingUtils::getBigramAddressAndForwardPointer(
-                mBinaryDictionaryInfo, mBigramFlags, &mPos);
-        mHasNext = BinaryDictionaryBigramsReadingUtils::hasNext(mBigramFlags);
+        mBigramPos =
+                BinaryDictionaryTerminalAttributesReadingUtils::getBigramAddressAndForwardPointer(
+                        mBinaryDictionaryInfo, mBigramFlags, &mPos);
+        mHasNext = BinaryDictionaryTerminalAttributesReadingUtils::hasNext(mBigramFlags);
     }
 
     AK_FORCE_INLINE int getProbability() const {
-        return BinaryDictionaryBigramsReadingUtils::getBigramProbability(mBigramFlags);
+        return BinaryDictionaryTerminalAttributesReadingUtils::getProbabilityFromFlags(
+                mBigramFlags);
     }
 
     AK_FORCE_INLINE int getBigramPos() const {
@@ -59,7 +61,7 @@
 
     const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
     int mPos;
-    BinaryDictionaryBigramsReadingUtils::BigramFlags mBigramFlags;
+    BinaryDictionaryTerminalAttributesReadingUtils::BigramFlags mBigramFlags;
     int mBigramPos;
     bool mHasNext;
 };
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.cpp
deleted file mode 100644
index 78a54b1..0000000
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.cpp
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.h"
-
-#include "suggest/core/dictionary/binary_dictionary_info.h"
-#include "suggest/core/dictionary/byte_array_utils.h"
-
-namespace latinime {
-
-const BinaryDictionaryBigramsReadingUtils::BigramFlags
-        BinaryDictionaryBigramsReadingUtils::MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30;
-const BinaryDictionaryBigramsReadingUtils::BigramFlags
-        BinaryDictionaryBigramsReadingUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10;
-const BinaryDictionaryBigramsReadingUtils::BigramFlags
-        BinaryDictionaryBigramsReadingUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20;
-const BinaryDictionaryBigramsReadingUtils::BigramFlags
-        BinaryDictionaryBigramsReadingUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30;
-const BinaryDictionaryBigramsReadingUtils::BigramFlags
-        BinaryDictionaryBigramsReadingUtils::FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;
-// Flag for presence of more attributes
-const BinaryDictionaryBigramsReadingUtils::BigramFlags
-        BinaryDictionaryBigramsReadingUtils::FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
-// Mask for attribute probability, stored on 4 bits inside the flags byte.
-const BinaryDictionaryBigramsReadingUtils::BigramFlags
-        BinaryDictionaryBigramsReadingUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
-const int BinaryDictionaryBigramsReadingUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
-
-/* static */ int BinaryDictionaryBigramsReadingUtils::getBigramAddressAndForwardPointer(
-        const BinaryDictionaryInfo *const binaryDictionaryInfo, const BigramFlags flags,
-        int *const pos) {
-    int offset = 0;
-    const int origin = *pos;
-    switch (MASK_ATTRIBUTE_ADDRESS_TYPE & flags) {
-        case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
-            offset = ByteArrayUtils::readUint8andAdvancePosition(
-                    binaryDictionaryInfo->getDictRoot(), pos);
-            break;
-        case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
-            offset = ByteArrayUtils::readUint16andAdvancePosition(
-                    binaryDictionaryInfo->getDictRoot(), pos);
-            break;
-        case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
-            offset = ByteArrayUtils::readUint24andAdvancePosition(
-                    binaryDictionaryInfo->getDictRoot(), pos);
-            break;
-    }
-    if (isOffsetNegative(flags)) {
-        return origin - offset;
-    } else {
-        return origin + offset;
-    }
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp
new file mode 100644
index 0000000..0a7509c
--- /dev/null
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h"
+
+#include "suggest/core/dictionary/binary_dictionary_info.h"
+#include "suggest/core/dictionary/byte_array_utils.h"
+
+namespace latinime {
+
+typedef BinaryDictionaryTerminalAttributesReadingUtils TaUtils;
+
+const TaUtils::TerminalAttributeFlags TaUtils::MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30;
+const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10;
+const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20;
+const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30;
+const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;
+// Flag for presence of more attributes
+const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
+// Mask for attribute probability, stored on 4 bits inside the flags byte.
+const TaUtils::TerminalAttributeFlags TaUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
+const int TaUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
+
+/* static */ int TaUtils::getBigramAddressAndForwardPointer(
+        const BinaryDictionaryInfo *const binaryDictionaryInfo, const TerminalAttributeFlags flags,
+        int *const pos) {
+    int offset = 0;
+    const int origin = *pos;
+    switch (MASK_ATTRIBUTE_ADDRESS_TYPE & flags) {
+        case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
+            offset = ByteArrayUtils::readUint8andAdvancePosition(
+                    binaryDictionaryInfo->getDictRoot(), pos);
+            break;
+        case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
+            offset = ByteArrayUtils::readUint16andAdvancePosition(
+                    binaryDictionaryInfo->getDictRoot(), pos);
+            break;
+        case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
+            offset = ByteArrayUtils::readUint24andAdvancePosition(
+                    binaryDictionaryInfo->getDictRoot(), pos);
+            break;
+    }
+    if (isOffsetNegative(flags)) {
+        return origin - offset;
+    } else {
+        return origin + offset;
+    }
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h
similarity index 61%
rename from native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.h
rename to native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h
index e71f2a1..f38fd5a 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.h
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef LATINIME_BINARY_DICTIONARY_BIGRAM_READING_UTILS_H
-#define LATINIME_BINARY_DICTIONARY_BIGRAM_READING_UTILS_H
+#ifndef LATINIME_BINARY_DICTIONARY_TERMINAL_ATTRIBUTES_READING_UTILS_H
+#define LATINIME_BINARY_DICTIONARY_TERMINAL_ATTRIBUTES_READING_UTILS_H
 
 #include <stdint.h>
 
@@ -25,10 +25,26 @@
 
 namespace latinime {
 
-class BinaryDictionaryBigramsReadingUtils {
+class BinaryDictionaryTerminalAttributesReadingUtils {
  public:
-    typedef uint8_t BigramFlags;
+    typedef uint8_t TerminalAttributeFlags;
+    typedef TerminalAttributeFlags BigramFlags;
 
+    static AK_FORCE_INLINE TerminalAttributeFlags getFlagsAndForwardPointer(
+            const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
+        return ByteArrayUtils::readUint8andAdvancePosition(
+                binaryDictionaryInfo->getDictRoot(), pos);
+    }
+
+    static AK_FORCE_INLINE int getProbabilityFromFlags(const TerminalAttributeFlags flags) {
+        return flags & MASK_ATTRIBUTE_PROBABILITY;
+    }
+
+    static AK_FORCE_INLINE bool hasNext(const TerminalAttributeFlags flags) {
+        return (flags & FLAG_ATTRIBUTE_HAS_NEXT) != 0;
+    }
+
+    // Bigrams reading methods
     static AK_FORCE_INLINE void skipExistingBigrams(
             const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
         BigramFlags flags = getFlagsAndForwardPointer(binaryDictionaryInfo, pos);
@@ -39,41 +55,27 @@
         *pos += attributeAddressSize(flags);
     }
 
-    static AK_FORCE_INLINE BigramFlags getFlagsAndForwardPointer(
-            const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
-        return ByteArrayUtils::readUint8andAdvancePosition(
-                binaryDictionaryInfo->getDictRoot(), pos);
-    }
+    static int getBigramAddressAndForwardPointer(
+            const BinaryDictionaryInfo *const binaryDictionaryInfo, const BigramFlags flags,
+                    int *const pos);
 
-    static AK_FORCE_INLINE int getBigramProbability(const BigramFlags flags) {
-        return flags & MASK_ATTRIBUTE_PROBABILITY;
-    }
+ private:
+    DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryTerminalAttributesReadingUtils);
 
-    static AK_FORCE_INLINE bool isOffsetNegative(const BigramFlags flags) {
+    static const TerminalAttributeFlags MASK_ATTRIBUTE_ADDRESS_TYPE;
+    static const TerminalAttributeFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE;
+    static const TerminalAttributeFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES;
+    static const TerminalAttributeFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
+    static const TerminalAttributeFlags FLAG_ATTRIBUTE_OFFSET_NEGATIVE;
+    static const TerminalAttributeFlags FLAG_ATTRIBUTE_HAS_NEXT;
+    static const TerminalAttributeFlags MASK_ATTRIBUTE_PROBABILITY;
+    static const int ATTRIBUTE_ADDRESS_SHIFT;
+
+    static AK_FORCE_INLINE bool isOffsetNegative(const TerminalAttributeFlags flags) {
         return (flags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) != 0;
     }
 
-    static AK_FORCE_INLINE bool hasNext(const BigramFlags flags) {
-        return (flags & FLAG_ATTRIBUTE_HAS_NEXT) != 0;
-    }
-
-    static int getBigramAddressAndForwardPointer(
-            const BinaryDictionaryInfo *const binaryDictionaryInfo,
-            const BigramFlags flags, int *const pos);
-
- private:
-    DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryBigramsReadingUtils);
-
-    static const BigramFlags MASK_ATTRIBUTE_ADDRESS_TYPE;
-    static const BigramFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE;
-    static const BigramFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES;
-    static const BigramFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
-    static const BigramFlags FLAG_ATTRIBUTE_OFFSET_NEGATIVE;
-    static const BigramFlags FLAG_ATTRIBUTE_HAS_NEXT;
-    static const BigramFlags MASK_ATTRIBUTE_PROBABILITY;
-    static const int ATTRIBUTE_ADDRESS_SHIFT;
-
-    static AK_FORCE_INLINE int attributeAddressSize(const BigramFlags flags) {
+    static AK_FORCE_INLINE int attributeAddressSize(const TerminalAttributeFlags flags) {
         return (flags & MASK_ATTRIBUTE_ADDRESS_TYPE) >> ATTRIBUTE_ADDRESS_SHIFT;
         /* Note: this is a value-dependant optimization of what may probably be
            more readably written this way:
@@ -87,4 +89,4 @@
     }
 };
 }
-#endif /* LATINIME_BINARY_DICTIONARY_BIGRAM_READING_UTILS_H */
+#endif /* LATINIME_BINARY_DICTIONARY_TERMINAL_ATTRIBUTES_READING_UTILS_H */
diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp
index a8f16c8..173a612 100644
--- a/native/jni/src/suggest/core/suggest.cpp
+++ b/native/jni/src/suggest/core/suggest.cpp
@@ -36,6 +36,7 @@
 const int Suggest::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
 const int Suggest::MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE = 2;
 const float Suggest::AUTOCORRECT_CLASSIFICATION_THRESHOLD = 0.33f;
+const int Suggest::FINAL_SCORE_PENALTY_FOR_NOT_BEST_EXACT_MATCHED_WORD = 1;
 
 /**
  * Returns a set of suggestions for the given input touch points. The commitPoint argument indicates
@@ -148,6 +149,8 @@
             &doubleLetterTerminalIndex, &doubleLetterLevel);
 
     int maxScore = S_INT_MIN;
+    int bestExactMatchedNodeTerminalIndex = -1;
+    int bestExactMatchedNodeOutputWordIndex = -1;
     // Output suggestion results here
     for (int terminalIndex = 0; terminalIndex < terminalSize && outputWordIndex < MAX_RESULTS;
             ++terminalIndex) {
@@ -186,7 +189,6 @@
         const int finalScore = SCORING->calculateFinalScore(
                 compoundDistance, traverseSession->getInputSize(),
                 isForceCommitMultiWords || (isValidWord && SCORING->doesAutoCorrectValidWord()));
-
         maxScore = max(maxScore, finalScore);
 
         if (TRAVERSAL->allowPartialCommit()) {
@@ -200,6 +202,25 @@
         if (isValidWord) {
             outputTypes[outputWordIndex] = Dictionary::KIND_CORRECTION | outputTypeFlags;
             frequencies[outputWordIndex] = finalScore;
+            if (isSafeExactMatch) {
+                // Demote exact matches that are not the highest probable node among all exact
+                // matches.
+                const bool isBestTerminal = bestExactMatchedNodeTerminalIndex < 0
+                        || terminals[bestExactMatchedNodeTerminalIndex].getProbability()
+                                < terminalDicNode->getProbability();
+                const int outputWordIndexToBeDemoted = isBestTerminal ?
+                        bestExactMatchedNodeOutputWordIndex : outputWordIndex;
+                if (outputWordIndexToBeDemoted >= 0) {
+                    frequencies[outputWordIndexToBeDemoted] -=
+                            FINAL_SCORE_PENALTY_FOR_NOT_BEST_EXACT_MATCHED_WORD;
+                }
+                if (isBestTerminal) {
+                    // Updates the best exact matched node index.
+                    bestExactMatchedNodeTerminalIndex = terminalIndex;
+                    // Updates the best exact matched output word index.
+                    bestExactMatchedNodeOutputWordIndex = outputWordIndex;
+                }
+            }
             // Populate the outputChars array with the suggested word.
             const int startIndex = outputWordIndex * MAX_WORD_LENGTH;
             terminalDicNode->outputResult(&outputCodePoints[startIndex]);
diff --git a/native/jni/src/suggest/core/suggest.h b/native/jni/src/suggest/core/suggest.h
index 875cbe4..752bde9 100644
--- a/native/jni/src/suggest/core/suggest.h
+++ b/native/jni/src/suggest/core/suggest.h
@@ -82,6 +82,8 @@
 
     // Threshold for autocorrection classifier
     static const float AUTOCORRECT_CLASSIFICATION_THRESHOLD;
+    // Final score penalty to exact match words that are not the most probable exact match.
+    static const int FINAL_SCORE_PENALTY_FOR_NOT_BEST_EXACT_MATCHED_WORD;
 
     const Traversal *const TRAVERSAL;
     const Scoring *const SCORING;
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
index 7333dbe..e098f35 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
+++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
@@ -169,6 +169,9 @@
 
     float getTerminalLanguageCost(const DicTraverseSession *const traverseSession,
             const DicNode *const dicNode, const float dicNodeLanguageImprobability) const {
+        // We promote exact matches here to prevent them from being pruned. The final score of
+        // exact match nodes might be demoted later in Suggest::outputSuggestions if there are
+        // multiple exact matches.
         const float languageImprobability = (dicNode->isExactMatch()) ?
                 0.0f : dicNodeLanguageImprobability;
         return languageImprobability * ScoringParams::DISTANCE_WEIGHT_LANGUAGE;