Merge "Tag the whitelisted entries in native code." into jb-mr1-dev
diff --git a/native/jni/src/binary_format.h b/native/jni/src/binary_format.h
index 2ee4077..4cabc84 100644
--- a/native/jni/src/binary_format.h
+++ b/native/jni/src/binary_format.h
@@ -52,6 +52,8 @@
 
     // Mask for attribute frequency, stored on 4 bits inside the flags byte.
     static const int MASK_ATTRIBUTE_FREQUENCY = 0x0F;
+    // The numeric value of the shortcut frequency that means 'whitelist'.
+    static const int WHITELIST_SHORTCUT_FREQUENCY = 15;
 
     // Mask and flags for attribute address type selection.
     static const int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30;
@@ -99,6 +101,7 @@
     static bool hasChildrenInFlags(const uint8_t flags);
     static int getAttributeAddressAndForwardPointer(const uint8_t *const dict, const uint8_t flags,
             int *pos);
+    static int getAttributeFrequencyFromFlags(const int flags);
     static int getTerminalPosition(const uint8_t *const root, const int32_t *const inWord,
             const int length, const bool forceLowerCaseSearch);
     static int getWordAtAddress(const uint8_t *const root, const int address, const int maxDepth,
@@ -340,6 +343,10 @@
     }
 }
 
+inline int BinaryFormat::getAttributeFrequencyFromFlags(const int flags) {
+    return flags & MASK_ATTRIBUTE_FREQUENCY;
+}
+
 // This function gets the byte position of the last chargroup of the exact matching word in the
 // dictionary. If no match is found, it returns NOT_VALID_WORD.
 inline int BinaryFormat::getTerminalPosition(const uint8_t *const root,
diff --git a/native/jni/src/correction.cpp b/native/jni/src/correction.cpp
index e1f9db9..e55da01 100644
--- a/native/jni/src/correction.cpp
+++ b/native/jni/src/correction.cpp
@@ -157,6 +157,10 @@
     }
 }
 
+bool Correction::sameAsTyped() {
+    return mProximityInfoState.sameAsTyped(mWord, mOutputIndex);
+}
+
 int Correction::getFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray,
         const int wordCount, const bool isSpaceProximity, const unsigned short *word) {
     return Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(freqArray, wordLengthArray,
diff --git a/native/jni/src/correction.h b/native/jni/src/correction.h
index 81623a4..57e7b71 100644
--- a/native/jni/src/correction.h
+++ b/native/jni/src/correction.h
@@ -105,6 +105,7 @@
             const int spaceProximityPos, const int missingSpacePos, const bool useFullEditDistance,
             const bool doAutoCompletion, const int maxErrors);
     void checkState();
+    bool sameAsTyped();
     bool initProcessState(const int index);
 
     int getInputIndex();
diff --git a/native/jni/src/proximity_info_state.h b/native/jni/src/proximity_info_state.h
index 76d4551..474c407 100644
--- a/native/jni/src/proximity_info_state.h
+++ b/native/jni/src/proximity_info_state.h
@@ -160,6 +160,21 @@
         return mTouchPositionCorrectionEnabled;
     }
 
+    inline bool sameAsTyped(const unsigned short *word, int length) const {
+        if (length != mInputLength) {
+            return false;
+        }
+        const int *inputCodes = mInputCodes;
+        while (length--) {
+            if (static_cast<unsigned int>(*inputCodes) != static_cast<unsigned int>(*word)) {
+                return false;
+            }
+            inputCodes += MAX_PROXIMITY_CHARS_SIZE_INTERNAL;
+            word++;
+        }
+        return true;
+    }
+
  private:
     DISALLOW_COPY_AND_ASSIGN(ProximityInfoState);
     /////////////////////////////////////////
@@ -179,21 +194,6 @@
         return mInputXCoordinates && mInputYCoordinates;
     }
 
-    bool sameAsTyped(const unsigned short *word, int length) const {
-        if (length != mInputLength) {
-            return false;
-        }
-        const int *inputCodes = mInputCodes;
-        while (length--) {
-            if ((unsigned int) *inputCodes != (unsigned int) *word) {
-                return false;
-            }
-            inputCodes += MAX_PROXIMITY_CHARS_SIZE_INTERNAL;
-            word++;
-        }
-        return true;
-    }
-
     // const
     const ProximityInfo *mProximityInfo;
     bool mHasTouchPositionCorrectionData;
diff --git a/native/jni/src/terminal_attributes.h b/native/jni/src/terminal_attributes.h
index d633645..1ae9c7c 100644
--- a/native/jni/src/terminal_attributes.h
+++ b/native/jni/src/terminal_attributes.h
@@ -46,7 +46,7 @@
         // Gets the shortcut target itself as a uint16_t string. For parameters and return value
         // see BinaryFormat::getWordAtAddress.
         // TODO: make the output an uint32_t* to handle the whole unicode range.
-        inline int getNextShortcutTarget(const int maxDepth, uint16_t *outWord) {
+        inline int getNextShortcutTarget(const int maxDepth, uint16_t *outWord, int *outFreq) {
             const int shortcutFlags = BinaryFormat::getFlagsAndForwardPointer(mDict, &mPos);
             mHasNextShortcutTarget =
                     0 != (shortcutFlags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT);
@@ -56,6 +56,7 @@
                 if (NOT_A_CHARACTER == charCode) break;
                 outWord[i] = (uint16_t)charCode;
             }
+            *outFreq = BinaryFormat::getAttributeFrequencyFromFlags(shortcutFlags);
             mPos += BinaryFormat::CHARACTER_ARRAY_TERMINATOR_SIZE;
             return i;
         }
diff --git a/native/jni/src/unigram_dictionary.cpp b/native/jni/src/unigram_dictionary.cpp
index 9f7ab53..cc6d39a 100644
--- a/native/jni/src/unigram_dictionary.cpp
+++ b/native/jni/src/unigram_dictionary.cpp
@@ -391,8 +391,12 @@
         const int finalProbability =
                 correction->getFinalProbability(probability, &wordPointer, &wordLength);
         if (finalProbability != NOT_A_PROBABILITY) {
-            addWord(wordPointer, wordLength, finalProbability, masterQueue,
-                    Dictionary::KIND_CORRECTION);
+            if (0 != finalProbability) {
+                // If the probability is 0, we don't want to add this word. However we still
+                // want to add its shortcuts (including a possible whitelist entry) if any.
+                addWord(wordPointer, wordLength, finalProbability, masterQueue,
+                        Dictionary::KIND_CORRECTION);
+            }
 
             const int shortcutProbability = finalProbability > 0 ? finalProbability - 1 : 0;
             // Please note that the shortcut candidates will be added to the master queue only.
@@ -407,10 +411,21 @@
                 // with the same score. For the moment we use -1 to make sure the shortcut will
                 // never be in front of the word.
                 uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL];
+                int shortcutFrequency;
                 const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
-                        MAX_WORD_LENGTH_INTERNAL, shortcutTarget);
-                addWord(shortcutTarget, shortcutTargetStringLength, shortcutProbability,
-                        masterQueue, Dictionary::KIND_CORRECTION);
+                        MAX_WORD_LENGTH_INTERNAL, shortcutTarget, &shortcutFrequency);
+                int shortcutScore;
+                int kind;
+                if (shortcutFrequency == BinaryFormat::WHITELIST_SHORTCUT_FREQUENCY
+                        && correction->sameAsTyped()) {
+                    shortcutScore = S_INT_MAX;
+                    kind = Dictionary::KIND_WHITELIST;
+                } else {
+                    shortcutScore = shortcutProbability;
+                    kind = Dictionary::KIND_CORRECTION;
+                }
+                addWord(shortcutTarget, shortcutTargetStringLength, shortcutScore,
+                        masterQueue, kind);
             }
         }
     }