Merge "Change expandable binary dict write to a temp file first."
diff --git a/java/res/values/attrs.xml b/java/res/values/attrs.xml
index c78013f..e14633a 100644
--- a/java/res/values/attrs.xml
+++ b/java/res/values/attrs.xml
@@ -64,7 +64,7 @@
         <attr name="keyShiftedLetterHintRatio" format="float" />
         <!-- Horizontal padding of left/right aligned key label to the edge of the key. -->
         <attr name="keyLabelHorizontalPadding" format="dimension" />
-        <!-- Top and right padding of hint letter to the edge of the key.-->
+        <!-- Right padding of hint letter to the edge of the key.-->
         <attr name="keyHintLetterPadding" format="dimension" />
         <!-- Bottom padding of popup hint letter "..." to the edge of the key.-->
         <attr name="keyPopupHintLetterPadding" format="dimension" />
diff --git a/java/res/xml/rowkeys_symbols3.xml b/java/res/xml/rowkeys_symbols3.xml
index 294e94d..0518c19 100644
--- a/java/res/xml/rowkeys_symbols3.xml
+++ b/java/res/xml/rowkeys_symbols3.xml
@@ -21,10 +21,6 @@
 <merge
     xmlns:latin="http://schemas.android.com/apk/res/com.android.inputmethod.latin"
 >
-    <Key
-        latin:keyStyle="toMoreSymbolKeyStyle"
-        latin:keyWidth="15%p"
-        latin:visualInsetsRight="1%p" />
     <!-- U+00A1: "¡" INVERTED EXCLAMATION MARK -->
     <Key
         latin:keyLabel="!"
@@ -45,8 +41,4 @@
     <Key
         latin:keyLabel="!text/keylabel_for_symbols_question"
         latin:moreKeys="!text/more_keys_for_symbols_question" />
-    <Key
-        latin:keyStyle="deleteKeyStyle"
-        latin:keyWidth="fillRight"
-        latin:visualInsetsLeft="1%p" />
 </merge>
diff --git a/java/res/xml/rowkeys_symbols_shift1.xml b/java/res/xml/rowkeys_symbols_shift1.xml
index 199e449..fea8ae3 100644
--- a/java/res/xml/rowkeys_symbols_shift1.xml
+++ b/java/res/xml/rowkeys_symbols_shift1.xml
@@ -34,10 +34,11 @@
     <!-- U+221A: "√" SQUARE ROOT -->
     <Key
         latin:keyLabel="&#x221A;" />
-    <!-- U+03C0: "π" GREEK SMALL LETTER PI -->
+    <!-- U+03C0: "π" GREEK SMALL LETTER PI
+         U+03A0: "Π" GREEK CAPITAL LETTER PI  -->
     <Key
         latin:keyLabel="&#x03C0;"
-        latin:moreKeys="Π" />
+        latin:moreKeys="&#x03A0;" />
     <!-- U+00F7: "÷" DIVISION SIGN -->
     <Key
         latin:keyLabel="&#x00F7;" />
diff --git a/java/res/xml/rowkeys_symbols_shift3.xml b/java/res/xml/rowkeys_symbols_shift3.xml
index f5db0fe..a35af21 100644
--- a/java/res/xml/rowkeys_symbols_shift3.xml
+++ b/java/res/xml/rowkeys_symbols_shift3.xml
@@ -21,10 +21,6 @@
 <merge
     xmlns:latin="http://schemas.android.com/apk/res/com.android.inputmethod.latin"
 >
-    <Key
-        latin:keyStyle="backFromMoreSymbolKeyStyle"
-        latin:keyWidth="15%p"
-        latin:visualInsetsRight="1%p" />
     <!-- U+2122: "™" TRADE MARK SIGN -->
     <Key
         latin:keyLabel="&#x2122;" />
@@ -43,8 +39,4 @@
         latin:keyLabel="\\" />
     <include
         latin:keyboardLayout="@xml/keys_less_greater" />
-    <Key
-        latin:keyStyle="deleteKeyStyle"
-        latin:keyWidth="fillRight"
-        latin:visualInsetsLeft="1%p" />
 </merge>
diff --git a/java/res/xml/rows_symbols.xml b/java/res/xml/rows_symbols.xml
index 2a73ffb..bd1a57e 100644
--- a/java/res/xml/rows_symbols.xml
+++ b/java/res/xml/rows_symbols.xml
@@ -40,8 +40,16 @@
     <Row
         latin:keyWidth="10%p"
     >
+        <Key
+            latin:keyStyle="toMoreSymbolKeyStyle"
+            latin:keyWidth="15%p"
+            latin:visualInsetsRight="1%p" />
         <include
             latin:keyboardLayout="@xml/rowkeys_symbols3" />
+        <Key
+            latin:keyStyle="deleteKeyStyle"
+            latin:keyWidth="fillRight"
+            latin:visualInsetsLeft="1%p" />
     </Row>
     <include
         latin:keyboardLayout="@xml/row_symbols4" />
diff --git a/java/res/xml/rows_symbols_shift.xml b/java/res/xml/rows_symbols_shift.xml
index 0ab9715..9c03d90 100644
--- a/java/res/xml/rows_symbols_shift.xml
+++ b/java/res/xml/rows_symbols_shift.xml
@@ -40,8 +40,16 @@
     <Row
         latin:keyWidth="10%p"
     >
+        <Key
+            latin:keyStyle="backFromMoreSymbolKeyStyle"
+            latin:keyWidth="15%p"
+            latin:visualInsetsRight="1%p" />
         <include
             latin:keyboardLayout="@xml/rowkeys_symbols_shift3" />
+        <Key
+            latin:keyStyle="deleteKeyStyle"
+            latin:keyWidth="fillRight"
+            latin:visualInsetsLeft="1%p" />
     </Row>
     <include
         latin:keyboardLayout="@xml/row_symbols_shift4" />
diff --git a/java/src/com/android/inputmethod/keyboard/KeyboardView.java b/java/src/com/android/inputmethod/keyboard/KeyboardView.java
index d5bd7fd..4a91353 100644
--- a/java/src/com/android/inputmethod/keyboard/KeyboardView.java
+++ b/java/src/com/android/inputmethod/keyboard/KeyboardView.java
@@ -662,10 +662,10 @@
                 hintY = -mFontMetrics.top + params.mKeyShiftedLetterHintPadding;
                 paint.setTextAlign(Align.CENTER);
             } else { // key.hasHintLetter()
-                // The hint label is placed at top-right corner of the key. Used mainly on phone.
+                // The hint letter is placed at top-right corner of the key. Used mainly on phone.
                 hintX = keyWidth - params.mKeyHintLetterPadding
                         - getCharWidth(KEY_NUMERIC_HINT_LABEL_REFERENCE_CHAR, paint) / 2;
-                hintY = -paint.ascent() + params.mKeyHintLetterPadding;
+                hintY = -paint.ascent();
                 paint.setTextAlign(Align.CENTER);
             }
             canvas.drawText(hint, 0, hint.length(), hintX, hintY, paint);
diff --git a/java/src/com/android/inputmethod/keyboard/internal/KeyboardTextsSet.java b/java/src/com/android/inputmethod/keyboard/internal/KeyboardTextsSet.java
index d0f27a9..9dc1786 100644
--- a/java/src/com/android/inputmethod/keyboard/internal/KeyboardTextsSet.java
+++ b/java/src/com/android/inputmethod/keyboard/internal/KeyboardTextsSet.java
@@ -25,7 +25,23 @@
 
 /**
  * !!!!! DO NOT EDIT THIS FILE !!!!!
- * This file is generated by tools/maketext.
+ *
+ * This file is generated by tools/maketext. The base template file is
+ *   tools/maketext/res/com/android/inputmethod/keyboard/internal/KeyboardTextsSet.tmpl
+ *
+ * This file must be updated when any text resources in keyboard layout files have been changed.
+ * These text resources are referred as "!text/<resource_name>" in keyboard XML definitions,
+ * and should be defined in
+ *   tools/maketext/res/values-<locale>/donottranslate-more-keys.xml
+ *
+ * To update this file, please run the following commands.
+ *   $ cd $ANDROID_BUILD_TOP
+ *   $ mmm packages/inputmethods/LatinIME/tools/maketext
+ *   $ maketext -java packages/inputmethods/LatinIME/java/src
+ *
+ * The updated source file will be generated to the following path (this file).
+ *   packages/inputmethods/LatinIME/java/src/com/android/inputmethod/keyboard/internal/
+ *   KeyboardTextsSet.java
  */
 public final class KeyboardTextsSet {
     // Language to texts map.
diff --git a/java/src/com/android/inputmethod/latin/AdditionalSubtypeSettings.java b/java/src/com/android/inputmethod/latin/AdditionalSubtypeSettings.java
index 613c203..994b917 100644
--- a/java/src/com/android/inputmethod/latin/AdditionalSubtypeSettings.java
+++ b/java/src/com/android/inputmethod/latin/AdditionalSubtypeSettings.java
@@ -366,6 +366,7 @@
             final Preference pref = mSubtypePrefGroup.getPreference(i);
             if (pref instanceof SubtypePreference) {
                 final InputMethodSubtype subtype = ((SubtypePreference)pref).getSubtype();
+                if (subtype == null) continue;
                 if (sb.length() > 0) {
                     sb.append(AdditionalSubtype.PREF_SUBTYPE_SEPARATOR);
                 }
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
index 9429ef4..a644ec0 100644
--- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
@@ -83,11 +83,11 @@
     private native long openNative(String sourceDir, long dictOffset, long dictSize,
             int typedLetterMultiplier, int fullWordMultiplier, int maxWordLength, int maxWords);
     private native void closeNative(long dict);
-    private native boolean isValidWordNative(long dict, char[] word, int wordLength);
+    private native boolean isValidWordNative(long dict, int[] word, int wordLength);
     private native int getSuggestionsNative(long dict, long proximityInfo, int[] xCoordinates,
             int[] yCoordinates, int[] inputCodes, int codesSize, int[] prevWordForBigrams,
             boolean useFullEditDistance, char[] outputChars, int[] scores);
-    private native int getBigramsNative(long dict, char[] prevWord, int prevWordLength,
+    private native int getBigramsNative(long dict, int[] prevWord, int prevWordLength,
             int[] inputCodes, int inputCodesLength, char[] outputChars, int[] scores,
             int maxWordLength, int maxBigrams);
     private static native double calcNormalizedScoreNative(
@@ -105,7 +105,7 @@
             final WordCallback callback) {
         if (mNativeDict == 0) return;
 
-        char[] chars = previousWord.toString().toCharArray();
+        int[] codePoints = StringUtils.toCodePointArray(previousWord.toString());
         Arrays.fill(mOutputChars_bigrams, (char) 0);
         Arrays.fill(mBigramScores, 0);
 
@@ -115,8 +115,8 @@
             mInputCodes[0] = codes.getCodeAt(0);
         }
 
-        int count = getBigramsNative(mNativeDict, chars, chars.length, mInputCodes, codesSize,
-                mOutputChars_bigrams, mBigramScores, MAX_WORD_LENGTH, MAX_BIGRAMS);
+        int count = getBigramsNative(mNativeDict, codePoints, codePoints.length, mInputCodes,
+                codesSize, mOutputChars_bigrams, mBigramScores, MAX_WORD_LENGTH, MAX_BIGRAMS);
         if (count > MAX_BIGRAMS) {
             count = MAX_BIGRAMS;
         }
@@ -200,7 +200,7 @@
     @Override
     public boolean isValidWord(CharSequence word) {
         if (word == null) return false;
-        char[] chars = word.toString().toCharArray();
+        int[] chars = StringUtils.toCodePointArray(word.toString());
         return isValidWordNative(mNativeDict, chars, chars.length);
     }
 
diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java
index e1978fc..c638118 100644
--- a/java/src/com/android/inputmethod/latin/LatinIME.java
+++ b/java/src/com/android/inputmethod/latin/LatinIME.java
@@ -1793,6 +1793,24 @@
     public void pickSuggestionManually(final int index, final CharSequence suggestion,
             int x, int y) {
         final SuggestedWords suggestedWords = mSuggestionsView.getSuggestions();
+        final InputConnection ic = getCurrentInputConnection();
+        if (ic != null) ic.beginBatchEdit();
+
+        // If this is a punctuation picked from the suggestion strip, pass it to onCodeInput
+        if (suggestion.length() == 1 && isShowingPunctuationList()) {
+            // Word separators are suggested before the user inputs something.
+            // So, LatinImeLogger logs "" as a user's input.
+            LatinImeLogger.logOnManualSuggestion("", suggestion.toString(), index, suggestedWords);
+            // Rely on onCodeInput to do the complicated swapping/stripping logic consistently.
+            if (ProductionFlag.IS_EXPERIMENTAL) {
+                ResearchLogger.latinIME_punctuationSuggestion(index, suggestion, x, y);
+            }
+            final int primaryCode = suggestion.charAt(0);
+            onCodeInput(primaryCode,
+                    KeyboardActionListener.SUGGESTION_STRIP_COORDINATE,
+                    KeyboardActionListener.SUGGESTION_STRIP_COORDINATE);
+            return;
+        }
 
         if (SPACE_STATE_PHANTOM == mSpaceState && suggestion.length() > 0) {
             int firstChar = Character.codePointAt(suggestion, 0);
@@ -1810,7 +1828,6 @@
             }
             mKeyboardSwitcher.updateShiftState();
             resetComposingState(true /* alsoResetLastComposedWord */);
-            final InputConnection ic = getCurrentInputConnection();
             if (ic != null) {
                 final CompletionInfo completionInfo = mApplicationSpecifiedCompletions[index];
                 ic.commitCompletion(completionInfo);
@@ -1822,21 +1839,6 @@
             return;
         }
 
-        // If this is a punctuation picked from the suggestion strip, pass it to onCodeInput
-        if (suggestion.length() == 1 && isShowingPunctuationList()) {
-            // Word separators are suggested before the user inputs something.
-            // So, LatinImeLogger logs "" as a user's input.
-            LatinImeLogger.logOnManualSuggestion("", suggestion.toString(), index, suggestedWords);
-            // Rely on onCodeInput to do the complicated swapping/stripping logic consistently.
-            if (ProductionFlag.IS_EXPERIMENTAL) {
-                ResearchLogger.latinIME_punctuationSuggestion(index, suggestion, x, y);
-            }
-            final int primaryCode = suggestion.charAt(0);
-            onCodeInput(primaryCode,
-                    KeyboardActionListener.SUGGESTION_STRIP_COORDINATE,
-                    KeyboardActionListener.SUGGESTION_STRIP_COORDINATE);
-            return;
-        }
         // We need to log before we commit, because the word composer will store away the user
         // typed word.
         final String replacedWord = mWordComposer.getTypedWord().toString();
@@ -1889,6 +1891,7 @@
                 mHandler.postUpdateSuggestions();
             }
         }
+        if (null != ic) ic.endBatchEdit();
     }
 
     /**
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
index 97df98e..cc98010 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
@@ -174,6 +174,13 @@
 
     private static final int MAX_TERMINAL_FREQUENCY = 255;
 
+    // Arbitrary limit to how much passes we consider address size compression should
+    // terminate in. At the time of this writing, our largest dictionary completes
+    // compression in five passes.
+    // If the number of passes exceeds this number, makedict bails with an exception on
+    // suspicion that a bug might be causing an infinite loop.
+    private static final int MAX_PASSES = 24;
+
     /**
      * A class grouping utility function for our specific character encoding.
      */
@@ -510,14 +517,22 @@
      * Each node stores its tentative address. During dictionary address computing, these
      * are not final, but they can be used to compute the node size (the node size depends
      * on the address of the children because the number of bytes necessary to store an
-     * address depends on its numeric value.
+     * address depends on its numeric value. The return value indicates whether the node
+     * contents (as in, any of the addresses stored in the cache fields) have changed with
+     * respect to their previous value.
      *
      * @param node the node to compute the size of.
      * @param dict the dictionary in which the word/attributes are to be found.
+     * @return false if none of the cached addresses inside the node changed, true otherwise.
      */
-    private static void computeActualNodeSize(Node node, FusionDictionary dict) {
+    private static boolean computeActualNodeSize(Node node, FusionDictionary dict) {
+        boolean changed = false;
         int size = getGroupCountSize(node);
         for (CharGroup group : node.mData) {
+            if (group.mCachedAddress != node.mCachedAddress + size) {
+                changed = true;
+                group.mCachedAddress = node.mCachedAddress + size;
+            }
             int groupSize = GROUP_FLAGS_SIZE + getGroupCharactersSize(group);
             if (group.isTerminal()) groupSize += GROUP_FREQUENCY_SIZE;
             if (null != group.mChildren) {
@@ -538,7 +553,11 @@
             group.mCachedSize = groupSize;
             size += groupSize;
         }
-        node.mCachedSize = size;
+        if (node.mCachedSize != size) {
+            node.mCachedSize = size;
+            changed = true;
+        }
+        return changed;
     }
 
     /**
@@ -594,13 +613,14 @@
             changesDone = false;
             for (Node n : flatNodes) {
                 final int oldNodeSize = n.mCachedSize;
-                computeActualNodeSize(n, dict);
+                final boolean changed = computeActualNodeSize(n, dict);
                 final int newNodeSize = n.mCachedSize;
                 if (oldNodeSize < newNodeSize) throw new RuntimeException("Increased size ?!");
-                if (oldNodeSize != newNodeSize) changesDone = true;
+                changesDone |= changed;
             }
             stackNodes(flatNodes);
             ++passes;
+            if (passes > MAX_PASSES) throw new RuntimeException("Too many passes - probably a bug");
         } while (changesDone);
 
         final Node lastNode = flatNodes.get(flatNodes.size() - 1);
@@ -1122,6 +1142,12 @@
         }
     }
 
+    // The word cache here is a stopgap bandaid to help the catastrophic performance
+    // of this method. Since it performs direct, unbuffered random access to the file and
+    // may be called hundreds of thousands of times, the resulting performance is not
+    // reasonable without some kind of cache. Thus:
+    // TODO: perform buffered I/O here and in other places in the code.
+    private static TreeMap<Integer, String> wordCache = new TreeMap<Integer, String>();
     /**
      * Finds, as a string, the word at the address passed as an argument.
      *
@@ -1131,8 +1157,10 @@
      * @return the word, as a string.
      * @throws IOException if the file can't be read.
      */
-    private static String getWordAtAddress(RandomAccessFile source, long headerSize,
+    private static String getWordAtAddress(final RandomAccessFile source, final long headerSize,
             int address) throws IOException {
+        final String cachedString = wordCache.get(address);
+        if (null != cachedString) return cachedString;
         final long originalPointer = source.getFilePointer();
         source.seek(headerSize);
         final int count = readCharGroupCount(source);
@@ -1171,6 +1199,7 @@
             }
         }
         source.seek(originalPointer);
+        wordCache.put(address, result);
         return result;
     }
 
diff --git a/java/src/com/android/inputmethod/latin/suggestions/SuggestionsView.java b/java/src/com/android/inputmethod/latin/suggestions/SuggestionsView.java
index a173713..26a9415 100644
--- a/java/src/com/android/inputmethod/latin/suggestions/SuggestionsView.java
+++ b/java/src/com/android/inputmethod/latin/suggestions/SuggestionsView.java
@@ -171,7 +171,7 @@
 
         public boolean mMoreSuggestionsAvailable;
 
-        public final TextView mWordToSaveView;
+        private final TextView mWordToSaveView;
         private final TextView mLeftwardsArrowView;
         private final TextView mHintToSaveView;
 
@@ -477,7 +477,7 @@
         }
 
         public void layoutAddToDictionaryHint(CharSequence word, ViewGroup stripView,
-                int stripWidth, CharSequence hintText) {
+                int stripWidth, CharSequence hintText, OnClickListener listener) {
             final int width = stripWidth - mDividerWidth - mPadding * 2;
 
             final TextView wordView = mWordToSaveView;
@@ -508,6 +508,18 @@
             stripView.addView(hintView);
             setLayoutWeight(
                     hintView, 1.0f - mCenterSuggestionWeight, ViewGroup.LayoutParams.MATCH_PARENT);
+
+            wordView.setOnClickListener(listener);
+            leftArrowView.setOnClickListener(listener);
+            hintView.setOnClickListener(listener);
+        }
+
+        public CharSequence getAddToDictionaryWord() {
+            return (CharSequence)mWordToSaveView.getTag();
+        }
+
+        public boolean isAddToDictionaryShowing(View v) {
+            return v == mWordToSaveView || v == mHintToSaveView || v == mLeftwardsArrowView;
         }
 
         private static void setLayoutWeight(View v, float weight, int height) {
@@ -620,7 +632,6 @@
         }
 
         mParams = new SuggestionsViewParams(context, attrs, defStyle, mWords, mDividers, mInfos);
-        mParams.mWordToSaveView.setOnClickListener(this);
 
         mMoreSuggestionsContainer = inflater.inflate(R.layout.more_suggestions, null);
         mMoreSuggestionsView = (MoreSuggestionsView)mMoreSuggestionsContainer
@@ -676,12 +687,12 @@
 
     public boolean isShowingAddToDictionaryHint() {
         return mSuggestionsStrip.getChildCount() > 0
-                && mSuggestionsStrip.getChildAt(0) == mParams.mWordToSaveView;
+                && mParams.isAddToDictionaryShowing(mSuggestionsStrip.getChildAt(0));
     }
 
     public void showAddToDictionaryHint(CharSequence word, CharSequence hintText) {
         clear();
-        mParams.layoutAddToDictionaryHint(word, mSuggestionsStrip, getWidth(), hintText);
+        mParams.layoutAddToDictionaryHint(word, mSuggestionsStrip, getWidth(), hintText, this);
     }
 
     public boolean dismissAddToDictionaryHint() {
@@ -851,8 +862,8 @@
 
     @Override
     public void onClick(View view) {
-        if (view == mParams.mWordToSaveView) {
-            addToDictionary((CharSequence)view.getTag());
+        if (mParams.isAddToDictionaryShowing(view)) {
+            addToDictionary(mParams.getAddToDictionaryWord());
             clear();
             return;
         }
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index 2ef72e1..3e72ce6 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -153,30 +153,30 @@
 }
 
 static int latinime_BinaryDictionary_getBigrams(JNIEnv *env, jobject object, jlong dict,
-        jcharArray prevWordArray, jint prevWordLength, jintArray inputArray, jint inputArraySize,
+        jintArray prevWordArray, jint prevWordLength, jintArray inputArray, jint inputArraySize,
         jcharArray outputArray, jintArray frequencyArray, jint maxWordLength, jint maxBigrams) {
     Dictionary *dictionary = (Dictionary*)dict;
     if (!dictionary) return 0;
-    jchar *prevWord = env->GetCharArrayElements(prevWordArray, 0);
+    jint *prevWord = env->GetIntArrayElements(prevWordArray, 0);
     int *inputCodes = env->GetIntArrayElements(inputArray, 0);
     jchar *outputChars = env->GetCharArrayElements(outputArray, 0);
     int *frequencies = env->GetIntArrayElements(frequencyArray, 0);
-    int count = dictionary->getBigrams((unsigned short*) prevWord, prevWordLength, inputCodes,
+    int count = dictionary->getBigrams(prevWord, prevWordLength, inputCodes,
             inputArraySize, (unsigned short*) outputChars, frequencies, maxWordLength, maxBigrams);
     env->ReleaseIntArrayElements(frequencyArray, frequencies, 0);
     env->ReleaseCharArrayElements(outputArray, outputChars, 0);
     env->ReleaseIntArrayElements(inputArray, inputCodes, JNI_ABORT);
-    env->ReleaseCharArrayElements(prevWordArray, prevWord, JNI_ABORT);
+    env->ReleaseIntArrayElements(prevWordArray, prevWord, JNI_ABORT);
     return count;
 }
 
 static jboolean latinime_BinaryDictionary_isValidWord(JNIEnv *env, jobject object, jlong dict,
-        jcharArray wordArray, jint wordLength) {
+        jintArray wordArray, jint wordLength) {
     Dictionary *dictionary = (Dictionary*)dict;
     if (!dictionary) return (jboolean) false;
-    jchar *word = env->GetCharArrayElements(wordArray, 0);
-    jboolean result = dictionary->isValidWord((unsigned short*) word, wordLength);
-    env->ReleaseCharArrayElements(wordArray, word, JNI_ABORT);
+    jint *word = env->GetIntArrayElements(wordArray, 0);
+    jboolean result = dictionary->isValidWord(word, wordLength);
+    env->ReleaseIntArrayElements(wordArray, word, JNI_ABORT);
     return result;
 }
 
@@ -236,8 +236,8 @@
     {"closeNative", "(J)V", (void*)latinime_BinaryDictionary_close},
     {"getSuggestionsNative", "(JJ[I[I[II[IZ[C[I)I",
             (void*)latinime_BinaryDictionary_getSuggestions},
-    {"isValidWordNative", "(J[CI)Z", (void*)latinime_BinaryDictionary_isValidWord},
-    {"getBigramsNative", "(J[CI[II[C[III)I", (void*)latinime_BinaryDictionary_getBigrams},
+    {"isValidWordNative", "(J[II)Z", (void*)latinime_BinaryDictionary_isValidWord},
+    {"getBigramsNative", "(J[II[II[C[III)I", (void*)latinime_BinaryDictionary_getBigrams},
     {"calcNormalizedScoreNative", "([CI[CII)D",
             (void*)latinime_BinaryDictionary_calcNormalizedScore},
     {"editDistanceNative", "([CI[CI)I", (void*)latinime_BinaryDictionary_editDistance}
diff --git a/native/jni/src/bigram_dictionary.cpp b/native/jni/src/bigram_dictionary.cpp
index 320b0af..927381f 100644
--- a/native/jni/src/bigram_dictionary.cpp
+++ b/native/jni/src/bigram_dictionary.cpp
@@ -96,7 +96,7 @@
  * and the bigrams are used to boost unigram result scores, it makes little sense to
  * reduce their scope to the ones that match the first letter.
  */
-int BigramDictionary::getBigrams(unsigned short *prevWord, int prevWordLength, int *codes,
+int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, int *codes,
         int codesSize, unsigned short *bigramChars, int *bigramFreq, int maxWordLength,
         int maxBigrams) {
     // TODO: remove unused arguments, and refrain from storing stuff in members of this class
@@ -134,7 +134,7 @@
 // Returns a pointer to the start of the bigram list.
 // If the word is not found or has no bigrams, this function returns 0.
 int BigramDictionary::getBigramListForWord(const uint8_t* const root,
-        const unsigned short *prevWord, const int prevWordLength) {
+        const int32_t *prevWord, const int prevWordLength) {
     int pos = BinaryFormat::getTerminalPosition(root, prevWord, prevWordLength);
 
     if (NOT_VALID_WORD == pos) return 0;
diff --git a/native/jni/src/bigram_dictionary.h b/native/jni/src/bigram_dictionary.h
index 1612131..07e47f0 100644
--- a/native/jni/src/bigram_dictionary.h
+++ b/native/jni/src/bigram_dictionary.h
@@ -25,10 +25,10 @@
 class BigramDictionary {
  public:
     BigramDictionary(const unsigned char *dict, int maxWordLength, Dictionary *parentDictionary);
-    int getBigrams(unsigned short *word, int length, int *codes, int codesSize,
+    int getBigrams(const int32_t *word, int length, int *codes, int codesSize,
             unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams);
     int getBigramListForWord(const uint8_t* const root,
-        const unsigned short *prevWord, const int prevWordLength);
+        const int32_t *prevWord, const int prevWordLength);
     ~BigramDictionary();
  private:
     bool addWordBigram(unsigned short *word, int length, int frequency);
diff --git a/native/jni/src/binary_format.h b/native/jni/src/binary_format.h
index f593024..d5d67c1 100644
--- a/native/jni/src/binary_format.h
+++ b/native/jni/src/binary_format.h
@@ -62,10 +62,11 @@
     static bool hasChildrenInFlags(const uint8_t flags);
     static int getAttributeAddressAndForwardPointer(const uint8_t* const dict, const uint8_t flags,
             int *pos);
-    static int getTerminalPosition(const uint8_t* const root, const uint16_t* const inWord,
+    static int getTerminalPosition(const uint8_t* const root, const int32_t* const inWord,
             const int length);
     static int getWordAtAddress(const uint8_t* const root, const int address, const int maxDepth,
             uint16_t* outWord);
+    static int getProbability(const int bigramListPosition, const int unigramFreq);
 
     // Flags for special processing
     // Those *must* match the flags in makedict (BinaryDictInputOutput#*_PROCESSING_FLAG) or
@@ -304,7 +305,7 @@
 // This function gets the byte position of the last chargroup of the exact matching word in the
 // dictionary. If no match is found, it returns NOT_VALID_WORD.
 inline int BinaryFormat::getTerminalPosition(const uint8_t* const root,
-        const uint16_t* const inWord, const int length) {
+        const int32_t* const inWord, const int length) {
     int pos = 0;
     int wordPos = 0;
 
@@ -313,7 +314,7 @@
         // there was no match (or we would have found it).
         if (wordPos > length) return NOT_VALID_WORD;
         int charGroupCount = BinaryFormat::getGroupCountAndForwardPointer(root, &pos);
-        const uint16_t wChar = inWord[wordPos];
+        const int32_t wChar = inWord[wordPos];
         while (true) {
             // If there are no more character groups in this node, it means we could not
             // find a matching character for this depth, therefore there is no match.
@@ -517,6 +518,14 @@
     return 0;
 }
 
+// This should probably return a probability in log space.
+inline int BinaryFormat::getProbability(const int bigramListPosition, const int unigramFreq) {
+    // TODO: use the bigram list position to get the bigram probability. If the bigram
+    // is not found, use the unigram frequency.
+    // TODO: if the unigram frequency is used, compute the actual probability
+    return unigramFreq;
+}
+
 } // namespace latinime
 
 #endif // LATINIME_BINARY_FORMAT_H
diff --git a/native/jni/src/correction.cpp b/native/jni/src/correction.cpp
index 087219e..376e9a1 100644
--- a/native/jni/src/correction.cpp
+++ b/native/jni/src/correction.cpp
@@ -165,28 +165,28 @@
             wordCount, this, isSpaceProximity, word);
 }
 
-int Correction::getFinalFreq(const int freq, unsigned short **word, int *wordLength) {
-    return getFinalFreqInternal(freq, word, wordLength, mInputLength);
+int Correction::getFinalProbability(const int probability, unsigned short **word, int *wordLength) {
+    return getFinalProbabilityInternal(probability, word, wordLength, mInputLength);
 }
 
-int Correction::getFinalFreqForSubQueue(const int freq, unsigned short **word, int *wordLength,
-        const int inputLength) {
-    return getFinalFreqInternal(freq, word, wordLength, inputLength);
+int Correction::getFinalProbabilityForSubQueue(const int probability, unsigned short **word,
+        int *wordLength, const int inputLength) {
+    return getFinalProbabilityInternal(probability, word, wordLength, inputLength);
 }
 
-int Correction::getFinalFreqInternal(const int freq, unsigned short **word, int *wordLength,
-        const int inputLength) {
+int Correction::getFinalProbabilityInternal(const int probability, unsigned short **word,
+        int *wordLength, const int inputLength) {
     const int outputIndex = mTerminalOutputIndex;
     const int inputIndex = mTerminalInputIndex;
     *wordLength = outputIndex + 1;
     if (outputIndex < MIN_SUGGEST_DEPTH) {
-        return NOT_A_FREQUENCY;
+        return NOT_A_PROBABILITY;
     }
 
     *word = mWord;
-    int finalFreq = Correction::RankingAlgorithm::calculateFinalFreq(
-            inputIndex, outputIndex, freq, mEditDistanceTable, this, inputLength);
-    return finalFreq;
+    int finalProbability= Correction::RankingAlgorithm::calculateFinalProbability(
+            inputIndex, outputIndex, probability, mEditDistanceTable, this, inputLength);
+    return finalProbability;
 }
 
 bool Correction::initProcessState(const int outputIndex) {
@@ -649,8 +649,8 @@
 //////////////////////
 
 /* static */
-int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const int outputIndex,
-        const int freq, int* editDistanceTable, const Correction* correction,
+int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex,
+        const int outputIndex, const int freq, int* editDistanceTable, const Correction* correction,
         const int inputLength) {
     const int excessivePos = correction->getExcessivePos();
     const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
diff --git a/native/jni/src/correction.h b/native/jni/src/correction.h
index ee55c96..1b4e4bf 100644
--- a/native/jni/src/correction.h
+++ b/native/jni/src/correction.h
@@ -132,9 +132,9 @@
     int getFreqForSplitMultipleWords(
             const int *freqArray, const int *wordLengthArray, const int wordCount,
             const bool isSpaceProximity, const unsigned short *word);
-    int getFinalFreq(const int freq, unsigned short **word, int* wordLength);
-    int getFinalFreqForSubQueue(const int freq, unsigned short **word, int* wordLength,
-            const int inputLength);
+    int getFinalProbability(const int probability, unsigned short **word, int* wordLength);
+    int getFinalProbabilityForSubQueue(const int probability, unsigned short **word,
+            int* wordLength, const int inputLength);
 
     CorrectionType processCharAndCalcState(const int32_t c, const bool isTerminal);
 
@@ -156,8 +156,8 @@
 
     class RankingAlgorithm {
      public:
-        static int calculateFinalFreq(const int inputIndex, const int depth,
-                const int freq, int *editDistanceTable, const Correction* correction,
+        static int calculateFinalProbability(const int inputIndex, const int depth,
+                const int probability, int *editDistanceTable, const Correction* correction,
                 const int inputLength);
         static int calcFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray,
                 const int wordCount, const Correction* correction, const bool isSpaceProximity,
@@ -182,8 +182,8 @@
             const int32_t c, const bool isTerminal, const bool inputIndexIncremented);
     inline CorrectionType processUnrelatedCorrectionType();
     inline void addCharToCurrentWord(const int32_t c);
-    inline int getFinalFreqInternal(const int freq, unsigned short **word, int* wordLength,
-            const int inputLength);
+    inline int getFinalProbabilityInternal(const int probability, unsigned short **word,
+            int* wordLength, const int inputLength);
 
     const int TYPED_LETTER_MULTIPLIER;
     const int FULL_WORD_MULTIPLIER;
diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h
index e882c37..c99f8a8 100644
--- a/native/jni/src/defines.h
+++ b/native/jni/src/defines.h
@@ -172,7 +172,7 @@
 #define PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO -3
 #define ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO -4
 #define NOT_AN_INDEX -1
-#define NOT_A_FREQUENCY -1
+#define NOT_A_PROBABILITY -1
 
 #define KEYCODE_SPACE ' '
 
diff --git a/native/jni/src/dictionary.cpp b/native/jni/src/dictionary.cpp
index 90ec207..9dc2072 100644
--- a/native/jni/src/dictionary.cpp
+++ b/native/jni/src/dictionary.cpp
@@ -54,7 +54,7 @@
     delete mBigramDictionary;
 }
 
-bool Dictionary::isValidWord(unsigned short *word, int length) {
+bool Dictionary::isValidWord(const int32_t *word, int length) {
     return mUnigramDictionary->isValidWord(word, length);
 }
 
diff --git a/native/jni/src/dictionary.h b/native/jni/src/dictionary.h
index 66a5c21..5b9ddb3 100644
--- a/native/jni/src/dictionary.h
+++ b/native/jni/src/dictionary.h
@@ -35,18 +35,22 @@
     int getSuggestions(ProximityInfo *proximityInfo, int *xcoordinates, int *ycoordinates,
             int *codes, int codesSize, bool useFullEditDistance, unsigned short *outWords,
             int *frequencies) {
+        // bigramListPosition is, as an int, the offset of the bigram list in the file.
+        // If none, it's zero.
+        // TODO: get this from the bigram dictionary instance
+        const int bigramListPosition = 0;
         return mUnigramDictionary->getSuggestions(proximityInfo, mWordsPriorityQueuePool,
-                mCorrection, xcoordinates, ycoordinates, codes,
-                codesSize, useFullEditDistance, outWords, frequencies);
+                mCorrection, xcoordinates, ycoordinates, codes, codesSize, bigramListPosition,
+                useFullEditDistance, outWords, frequencies);
     }
 
-    int getBigrams(unsigned short *word, int length, int *codes, int codesSize,
+    int getBigrams(const int32_t *word, int length, int *codes, int codesSize,
             unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams) {
         return mBigramDictionary->getBigrams(word, length, codes, codesSize, outWords, frequencies,
                 maxWordLength, maxBigrams);
     }
 
-    bool isValidWord(unsigned short *word, int length);
+    bool isValidWord(const int32_t *word, int length);
     void *getDict() { return (void *)mDict; }
     int getDictSize() { return mDictSize; }
     int getMmapFd() { return mMmapFd; }
diff --git a/native/jni/src/unigram_dictionary.cpp b/native/jni/src/unigram_dictionary.cpp
index ab8570e..0c759d4 100644
--- a/native/jni/src/unigram_dictionary.cpp
+++ b/native/jni/src/unigram_dictionary.cpp
@@ -98,7 +98,8 @@
 void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo,
         const int *xcoordinates, const int *ycoordinates, const int *codesBuffer,
         int *xCoordinatesBuffer, int *yCoordinatesBuffer,
-        const int codesBufferSize, const bool useFullEditDistance, const int *codesSrc,
+        const int codesBufferSize, const int bigramListPosition,
+        const bool useFullEditDistance, const int *codesSrc,
         const int codesRemain, const int currentDepth, int *codesDest, Correction *correction,
         WordsPriorityQueuePool *queuePool,
         const digraph_t* const digraphs, const unsigned int digraphsSize) {
@@ -127,8 +128,8 @@
                         replacementCodePoint;
                 getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates,
                         codesBuffer, xCoordinatesBuffer, yCoordinatesBuffer, codesBufferSize,
-                        useFullEditDistance, codesSrc + i + 1, codesRemain - i - 1,
-                        currentDepth + 1, codesDest + i, correction,
+                        bigramListPosition, useFullEditDistance, codesSrc + i + 1,
+                        codesRemain - i - 1, currentDepth + 1, codesDest + i, correction,
                         queuePool, digraphs, digraphsSize);
 
                 // Copy the second char of the digraph in place, then continue processing on
@@ -137,9 +138,9 @@
                 memcpy(codesDest + i, codesSrc + i, BYTES_IN_ONE_CHAR);
                 getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates,
                         codesBuffer, xCoordinatesBuffer, yCoordinatesBuffer, codesBufferSize,
-                        useFullEditDistance, codesSrc + i, codesRemain - i, currentDepth + 1,
-                        codesDest + i, correction, queuePool,
-                        digraphs, digraphsSize);
+                        bigramListPosition, useFullEditDistance, codesSrc + i, codesRemain - i,
+                        currentDepth + 1, codesDest + i, correction, queuePool, digraphs,
+                        digraphsSize);
                 return;
             }
         }
@@ -160,14 +161,16 @@
     }
 
     getWordSuggestions(proximityInfo, xCoordinatesBuffer, yCoordinatesBuffer, codesBuffer,
-            startIndex + codesRemain, useFullEditDistance, correction,
+            startIndex + codesRemain, bigramListPosition, useFullEditDistance, correction,
             queuePool);
 }
 
+// bigramListPosition is the offset in the file to the list of bigrams for the previous word.
 int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo,
         WordsPriorityQueuePool *queuePool, Correction *correction, const int *xcoordinates,
         const int *ycoordinates, const int *codes, const int codesSize,
-        const bool useFullEditDistance, unsigned short *outWords, int *frequencies) {
+        const int bigramListPosition, const bool useFullEditDistance, unsigned short *outWords,
+        int *frequencies) {
 
     queuePool->clearAll();
     Correction* masterCorrection = correction;
@@ -177,8 +180,8 @@
         int xCoordinatesBuffer[codesSize];
         int yCoordinatesBuffer[codesSize];
         getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer,
-                xCoordinatesBuffer, yCoordinatesBuffer,
-                codesSize, useFullEditDistance, codes, codesSize, 0, codesBuffer, masterCorrection,
+                xCoordinatesBuffer, yCoordinatesBuffer, codesSize, bigramListPosition,
+                useFullEditDistance, codes, codesSize, 0, codesBuffer, masterCorrection,
                 queuePool, GERMAN_UMLAUT_DIGRAPHS,
                 sizeof(GERMAN_UMLAUT_DIGRAPHS) / sizeof(GERMAN_UMLAUT_DIGRAPHS[0]));
     } else if (BinaryFormat::REQUIRES_FRENCH_LIGATURES_PROCESSING & FLAGS) {
@@ -186,13 +189,13 @@
         int xCoordinatesBuffer[codesSize];
         int yCoordinatesBuffer[codesSize];
         getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer,
-                xCoordinatesBuffer, yCoordinatesBuffer,
-                codesSize, useFullEditDistance, codes, codesSize, 0, codesBuffer, masterCorrection,
+                xCoordinatesBuffer, yCoordinatesBuffer, codesSize, bigramListPosition,
+                useFullEditDistance, codes, codesSize, 0, codesBuffer, masterCorrection,
                 queuePool, FRENCH_LIGATURES_DIGRAPHS,
                 sizeof(FRENCH_LIGATURES_DIGRAPHS) / sizeof(FRENCH_LIGATURES_DIGRAPHS[0]));
     } else { // Normal processing
         getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, codesSize,
-                useFullEditDistance, masterCorrection, queuePool);
+                bigramListPosition, useFullEditDistance, masterCorrection, queuePool);
     }
 
     PROF_START(20);
@@ -225,16 +228,16 @@
 
 void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
         const int *xcoordinates, const int *ycoordinates, const int *codes,
-        const int inputLength, const bool useFullEditDistance, Correction *correction,
-        WordsPriorityQueuePool *queuePool) {
+        const int inputLength, const int bigramListPosition, const bool useFullEditDistance,
+        Correction *correction, WordsPriorityQueuePool *queuePool) {
 
     PROF_OPEN;
     PROF_START(0);
     PROF_END(0);
 
     PROF_START(1);
-    getOneWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, useFullEditDistance,
-            inputLength, correction, queuePool);
+    getOneWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, bigramListPosition,
+            useFullEditDistance, inputLength, correction, queuePool);
     PROF_END(1);
 
     PROF_START(2);
@@ -305,15 +308,16 @@
 
 void UnigramDictionary::getOneWordSuggestions(ProximityInfo *proximityInfo,
         const int *xcoordinates, const int *ycoordinates, const int *codes,
-        const bool useFullEditDistance, const int inputLength, Correction *correction,
-        WordsPriorityQueuePool *queuePool) {
+        const int bigramListPosition, const bool useFullEditDistance, const int inputLength,
+        Correction *correction, WordsPriorityQueuePool *queuePool) {
     initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction);
-    getSuggestionCandidates(useFullEditDistance, inputLength, correction, queuePool,
-            true /* doAutoCompletion */, DEFAULT_MAX_ERRORS, FIRST_WORD_INDEX);
+    getSuggestionCandidates(useFullEditDistance, inputLength, bigramListPosition, correction,
+            queuePool, true /* doAutoCompletion */, DEFAULT_MAX_ERRORS, FIRST_WORD_INDEX);
 }
 
 void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
-        const int inputLength, Correction *correction, WordsPriorityQueuePool *queuePool,
+        const int inputLength, const int bigramListPosition,
+        Correction *correction, WordsPriorityQueuePool *queuePool,
         const bool doAutoCompletion, const int maxErrors, const int currentWordIndex) {
     // TODO: Remove setCorrectionParams
     correction->setCorrectionParams(0, 0, 0,
@@ -333,8 +337,8 @@
             int firstChildPos;
 
             const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos,
-                    correction, &childCount, &firstChildPos, &siblingPos, queuePool,
-                    currentWordIndex);
+                    bigramListPosition, correction, &childCount, &firstChildPos, &siblingPos,
+                    queuePool, currentWordIndex);
             // Update next sibling pos
             correction->setTreeSiblingPos(outputIndex, siblingPos);
 
@@ -349,7 +353,7 @@
     }
 }
 
-inline void UnigramDictionary::onTerminal(const int freq,
+inline void UnigramDictionary::onTerminal(const int probability,
         const TerminalAttributes& terminalAttributes, Correction *correction,
         WordsPriorityQueuePool *queuePool, const bool addToMasterQueue,
         const int currentWordIndex) {
@@ -361,26 +365,28 @@
 
     if ((currentWordIndex == FIRST_WORD_INDEX) && addToMasterQueue) {
         WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
-        const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength);
-        if (finalFreq != NOT_A_FREQUENCY) {
-            addWord(wordPointer, wordLength, finalFreq, masterQueue);
+        const int finalProbability =
+                correction->getFinalProbability(probability, &wordPointer, &wordLength);
+        if (finalProbability != NOT_A_PROBABILITY) {
+            addWord(wordPointer, wordLength, finalProbability, masterQueue);
 
-            const int shortcutFreq = finalFreq > 0 ? finalFreq - 1 : 0;
+            const int shortcutProbability = finalProbability > 0 ? finalProbability - 1 : 0;
             // Please note that the shortcut candidates will be added to the master queue only.
             TerminalAttributes::ShortcutIterator iterator =
                     terminalAttributes.getShortcutIterator();
             while (iterator.hasNextShortcutTarget()) {
                 // TODO: addWord only supports weak ordering, meaning we have no means
                 // to control the order of the shortcuts relative to one another or to the word.
-                // We need to either modulate the frequency of each shortcut according
-                // to its own shortcut frequency or to make the queue
+                // We need to either modulate the probability of each shortcut according
+                // to its own shortcut probability or to make the queue
                 // so that the insert order is protected inside the queue for words
                 // with the same score. For the moment we use -1 to make sure the shortcut will
                 // never be in front of the word.
                 uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL];
                 const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
                         MAX_WORD_LENGTH_INTERNAL, shortcutTarget);
-                addWord(shortcutTarget, shortcutTargetStringLength, shortcutFreq, masterQueue);
+                addWord(shortcutTarget, shortcutTargetStringLength, shortcutProbability,
+                        masterQueue);
             }
         }
     }
@@ -393,9 +399,9 @@
         if (!subQueue) {
             return;
         }
-        const int finalFreq = correction->getFinalFreqForSubQueue(freq, &wordPointer, &wordLength,
-                inputIndex);
-        addWord(wordPointer, wordLength, finalFreq, subQueue);
+        const int finalProbability = correction->getFinalProbabilityForSubQueue(
+                probability, &wordPointer, &wordLength, inputIndex);
+        addWord(wordPointer, wordLength, finalProbability, subQueue);
     }
 }
 
@@ -424,8 +430,10 @@
             initSuggestions(proximityInfo, &xcoordinates[offset], &ycoordinates[offset],
                     codes + offset, inputWordLength, correction);
             queuePool->clearSubQueue(currentWordIndex);
-            getSuggestionCandidates(useFullEditDistance, inputWordLength, correction,
-                    queuePool, false, MAX_ERRORS_FOR_TWO_WORDS, currentWordIndex);
+            // TODO: pass the bigram list for substring suggestion
+            getSuggestionCandidates(useFullEditDistance, inputWordLength,
+                    0 /* bigramListPosition */, correction, queuePool, false /* doAutoCompletion */,
+                    MAX_ERRORS_FOR_TWO_WORDS, currentWordIndex);
             if (DEBUG_DICT) {
                 if (currentWordIndex < MULTIPLE_WORDS_SUGGESTION_MAX_WORDS) {
                     AKLOGI("Dump word candidates(%d) %d", currentWordIndex, inputWordLength);
@@ -730,7 +738,7 @@
     return maxFreq;
 }
 
-bool UnigramDictionary::isValidWord(const uint16_t* const inWord, const int length) const {
+bool UnigramDictionary::isValidWord(const int32_t* const inWord, const int length) const {
     return NOT_VALID_WORD != BinaryFormat::getTerminalPosition(DICT_ROOT, inWord, length);
 }
 
@@ -755,7 +763,7 @@
 // the current node in nextSiblingPosition. Thus, the caller must keep count of the nodes at any
 // given level, as output into newCount when traversing this level's parent.
 inline bool UnigramDictionary::processCurrentNode(const int initialPos,
-        Correction *correction, int *newCount,
+        const int bigramListPosition, Correction *correction, int *newCount,
         int *newChildrenPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool,
         const int currentWordIndex) {
     if (DEBUG_DICT) {
@@ -834,11 +842,14 @@
     if (isTerminalNode) {
         // The frequency should be here, because we come here only if this is actually
         // a terminal node, and we are on its last char.
-        const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos);
+        const int unigramFreq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos);
         const int childrenAddressPos = BinaryFormat::skipFrequency(flags, pos);
         const int attributesPos = BinaryFormat::skipChildrenPosition(flags, childrenAddressPos);
         TerminalAttributes terminalAttributes(DICT_ROOT, flags, attributesPos);
-        onTerminal(freq, terminalAttributes, correction, queuePool, needsToInvokeOnTerminal,
+        // The bigramListPosition is the offset in the file of the bigrams for the previous word,
+        // or zero if we don't know of any bigrams for it.
+        const int probability = BinaryFormat::getProbability(bigramListPosition, unigramFreq);
+        onTerminal(probability, terminalAttributes, correction, queuePool, needsToInvokeOnTerminal,
                 currentWordIndex);
 
         // If there are more chars in this node, then this virtual node has children.
diff --git a/native/jni/src/unigram_dictionary.h b/native/jni/src/unigram_dictionary.h
index 4479cd9..0cc59ba 100644
--- a/native/jni/src/unigram_dictionary.h
+++ b/native/jni/src/unigram_dictionary.h
@@ -71,37 +71,38 @@
 
     UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultipler,
             int fullWordMultiplier, int maxWordLength, int maxWords, const unsigned int flags);
-    bool isValidWord(const uint16_t* const inWord, const int length) const;
+    bool isValidWord(const int32_t* const inWord, const int length) const;
     int getBigramPosition(int pos, unsigned short *word, int offset, int length) const;
     int getSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueuePool *queuePool,
-            Correction *correction, const int *xcoordinates,
-            const int *ycoordinates, const int *codes, const int codesSize,
+            Correction *correction, const int *xcoordinates, const int *ycoordinates,
+            const int *codes, const int codesSize, const int bigramListPosition,
             const bool useFullEditDistance, unsigned short *outWords, int *frequencies);
     virtual ~UnigramDictionary();
 
  private:
     void getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
             const int *ycoordinates, const int *codes, const int inputLength,
-            const bool useFullEditDistance, Correction *correction,
+            const int bigramListPosition, const bool useFullEditDistance, Correction *correction,
             WordsPriorityQueuePool *queuePool);
     int getDigraphReplacement(const int *codes, const int i, const int codesSize,
             const digraph_t* const digraphs, const unsigned int digraphsSize) const;
     void getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo,
         const int *xcoordinates, const int* ycoordinates, const int *codesBuffer,
-        int *xCoordinatesBuffer, int *yCoordinatesBuffer,
-        const int codesBufferSize, const bool useFullEditDistance, const int* codesSrc,
+        int *xCoordinatesBuffer, int *yCoordinatesBuffer, const int codesBufferSize,
+        const int bigramListPosition, const bool useFullEditDistance, const int* codesSrc,
         const int codesRemain, const int currentDepth, int* codesDest, Correction *correction,
         WordsPriorityQueuePool* queuePool, const digraph_t* const digraphs,
         const unsigned int digraphsSize);
     void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
             const int *ycoordinates, const int *codes, const int codesSize, Correction *correction);
     void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
-            const int *ycoordinates, const int *codes, const bool useFullEditDistance,
-            const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool);
-    void getSuggestionCandidates(
+            const int *ycoordinates, const int *codes, const int bigramListPosition,
             const bool useFullEditDistance, const int inputLength, Correction *correction,
-            WordsPriorityQueuePool* queuePool, const bool doAutoCompletion, const int maxErrors,
-            const int currentWordIndex);
+            WordsPriorityQueuePool* queuePool);
+    void getSuggestionCandidates(
+            const bool useFullEditDistance, const int inputLength, const int bigramListPosition,
+            Correction *correction, WordsPriorityQueuePool* queuePool, const bool doAutoCompletion,
+            const int maxErrors, const int currentWordIndex);
     void getSplitMultipleWordsSuggestions(ProximityInfo *proximityInfo,
             const int *xcoordinates, const int *ycoordinates, const int *codes,
             const bool useFullEditDistance, const int inputLength,
@@ -113,9 +114,9 @@
     bool needsToSkipCurrentNode(const unsigned short c,
             const int inputIndex, const int skipPos, const int depth);
     // Process a node by considering proximity, missing and excessive character
-    bool processCurrentNode(const int initialPos, Correction *correction, int *newCount,
-            int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool,
-            const int currentWordIndex);
+    bool processCurrentNode(const int initialPos, const int bigramListPosition,
+            Correction *correction, int *newCount, int *newChildPosition, int *nextSiblingPosition,
+            WordsPriorityQueuePool *queuePool, const int currentWordIndex);
     int getMostFrequentWordLike(const int startInputIndex, const int inputLength,
             ProximityInfo *proximityInfo, unsigned short *word);
     int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length,
diff --git a/tools/maketext/res/com/android/inputmethod/keyboard/internal/KeyboardTextsSet.tmpl b/tools/maketext/res/com/android/inputmethod/keyboard/internal/KeyboardTextsSet.tmpl
index 5a19338..8df94c1 100644
--- a/tools/maketext/res/com/android/inputmethod/keyboard/internal/KeyboardTextsSet.tmpl
+++ b/tools/maketext/res/com/android/inputmethod/keyboard/internal/KeyboardTextsSet.tmpl
@@ -25,7 +25,23 @@
 
 /**
  * !!!!! DO NOT EDIT THIS FILE !!!!!
- * This file is generated by tools/maketext.
+ *
+ * This file is generated by tools/maketext. The base template file is
+ *   tools/maketext/res/com/android/inputmethod/keyboard/internal/KeyboardTextsSet.tmpl
+ *
+ * This file must be updated when any text resources in keyboard layout files have been changed.
+ * These text resources are referred as "!text/<resource_name>" in keyboard XML definitions,
+ * and should be defined in
+ *   tools/maketext/res/values-<locale>/donottranslate-more-keys.xml
+ *
+ * To update this file, please run the following commands.
+ *   $ cd $ANDROID_BUILD_TOP
+ *   $ mmm packages/inputmethods/LatinIME/tools/maketext
+ *   $ maketext -java packages/inputmethods/LatinIME/java/src
+ *
+ * The updated source file will be generated to the following path (this file).
+ *   packages/inputmethods/LatinIME/java/src/com/android/inputmethod/keyboard/internal/
+ *   KeyboardTextsSet.java
  */
 public final class KeyboardTextsSet {
     // Language to texts map.