Fill up a map of bigram addresses for lookup.

We don't want to do a linear search on each terminal when there
may be 100+ bigrams for a given word because that would be
disastrous for performance. Also, we need to resolve each bigram
address anyway.
This change resolves the addresses at first and puts them in a
balanced tree so that lookup will be O(log(n)).

Bug: 6313806
Change-Id: Ibf088035870b9acb41e948f0ab7af4726f2cee24
diff --git a/native/jni/src/bigram_dictionary.cpp b/native/jni/src/bigram_dictionary.cpp
index e05e9d6..67f9628 100644
--- a/native/jni/src/bigram_dictionary.cpp
+++ b/native/jni/src/bigram_dictionary.cpp
@@ -135,6 +135,7 @@
 // If the word is not found or has no bigrams, this function returns 0.
 int BigramDictionary::getBigramListPositionForWord(const int32_t *prevWord,
         const int prevWordLength) {
+    if (0 >= prevWordLength) return 0;
     const uint8_t* const root = DICT;
     int pos = BinaryFormat::getTerminalPosition(root, prevWord, prevWordLength);
 
@@ -152,6 +153,22 @@
     return pos;
 }
 
+void BigramDictionary::fillBigramAddressToFrequencyMap(const int32_t *prevWord,
+        const int prevWordLength, std::map<int, int> *map) {
+    const uint8_t* const root = DICT;
+    int pos = getBigramListPositionForWord(prevWord, prevWordLength);
+    if (0 == pos) return;
+
+    int bigramFlags;
+    do {
+        bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
+        const int frequency = UnigramDictionary::MASK_ATTRIBUTE_FREQUENCY & bigramFlags;
+        const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags,
+                &pos);
+        (*map)[bigramPos] = frequency;
+    } while (0 != (UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags));
+}
+
 bool BigramDictionary::checkFirstCharacter(unsigned short *word) {
     // Checks whether this word starts with same character or neighboring characters of
     // what user typed.
diff --git a/native/jni/src/bigram_dictionary.h b/native/jni/src/bigram_dictionary.h
index 76f9039..b123321 100644
--- a/native/jni/src/bigram_dictionary.h
+++ b/native/jni/src/bigram_dictionary.h
@@ -17,6 +17,7 @@
 #ifndef LATINIME_BIGRAM_DICTIONARY_H
 #define LATINIME_BIGRAM_DICTIONARY_H
 
+#include <map>
 #include <stdint.h>
 
 namespace latinime {
@@ -28,6 +29,8 @@
     int getBigrams(const int32_t *word, int length, int *codes, int codesSize,
             unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams);
     int getBigramListPositionForWord(const int32_t *prevWord, const int prevWordLength);
+    void fillBigramAddressToFrequencyMap(const int32_t *prevWord, const int prevWordLength,
+            std::map<int, int> *map);
     ~BigramDictionary();
  private:
     bool addWordBigram(unsigned short *word, int length, int frequency);
diff --git a/native/jni/src/debug.h b/native/jni/src/debug.h
index b13052c..376ba59 100644
--- a/native/jni/src/debug.h
+++ b/native/jni/src/debug.h
@@ -22,7 +22,7 @@
 
 static inline unsigned char* convertToUnibyteString(unsigned short* input, unsigned char* output,
         const unsigned int length) {
-    int i = 0;
+    unsigned int i = 0;
     for (; i <= length && input[i] != 0; ++i)
         output[i] = input[i] & 0xFF;
     output[i] = 0;
@@ -31,10 +31,10 @@
 
 static inline unsigned char* convertToUnibyteStringAndReplaceLastChar(unsigned short* input,
         unsigned char* output, const unsigned int length, unsigned char c) {
-    int i = 0;
+    unsigned int i = 0;
     for (; i <= length && input[i] != 0; ++i)
         output[i] = input[i] & 0xFF;
-    output[i-1] = c;
+    if (i > 0) output[i-1] = c;
     output[i] = 0;
     return output;
 }
diff --git a/native/jni/src/dictionary.h b/native/jni/src/dictionary.h
index e0feeaf..a2b0491 100644
--- a/native/jni/src/dictionary.h
+++ b/native/jni/src/dictionary.h
@@ -17,6 +17,8 @@
 #ifndef LATINIME_DICTIONARY_H
 #define LATINIME_DICTIONARY_H
 
+#include <map>
+
 #include "bigram_dictionary.h"
 #include "char_utils.h"
 #include "correction.h"
@@ -39,6 +41,9 @@
         // If none, it's zero.
         const int bigramListPosition = !prevWordChars ? 0
                 : mBigramDictionary->getBigramListPositionForWord(prevWordChars, prevWordLength);
+        std::map<int, int> bigramMap;
+        mBigramDictionary->fillBigramAddressToFrequencyMap(prevWordChars, prevWordLength,
+                &bigramMap);
         return mUnigramDictionary->getSuggestions(proximityInfo, mWordsPriorityQueuePool,
                 mCorrection, xcoordinates, ycoordinates, codes, codesSize, bigramListPosition,
                 useFullEditDistance, outWords, frequencies);