Extend jni interface to get bigrams in WordProperty.

Bug: 12810574
Change-Id: Ia4b88d02ea8790a5c47d32376cc0b84c3e071ddd
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
index 95823da..6e0cdf2 100644
--- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
@@ -68,11 +68,12 @@
     private static final int FORMAT_WORD_PROPERTY_HAS_BIGRAMS_INDEX = 2;
     private static final int FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX = 3;
 
-    // Format to get unigram historical info from native side via getWordPropertyNative().
-    private static final int FORMAT_WORD_PROPERTY_OUTPUT_HISTORICAL_INFO_COUNT = 3;
-    private static final int FORMAT_WORD_PROPERTY_TIMESTAMP_INDEX = 0;
-    private static final int FORMAT_WORD_PROPERTY_LEVEL_INDEX = 1;
-    private static final int FORMAT_WORD_PROPERTY_COUNT_INDEX = 2;
+    // Format to get probability and historical info from native side via getWordPropertyNative().
+    public static final int FORMAT_WORD_PROPERTY_OUTPUT_PROBABILITY_INFO_COUNT = 4;
+    public static final int FORMAT_WORD_PROPERTY_PROBABILITY_INDEX = 0;
+    public static final int FORMAT_WORD_PROPERTY_TIMESTAMP_INDEX = 1;
+    public static final int FORMAT_WORD_PROPERTY_LEVEL_INDEX = 2;
+    public static final int FORMAT_WORD_PROPERTY_COUNT_INDEX = 3;
 
     private long mNativeDict;
     private final Locale mLocale;
@@ -144,9 +145,9 @@
     private static native int getProbabilityNative(long dict, int[] word);
     private static native int getBigramProbabilityNative(long dict, int[] word0, int[] word1);
     private static native void getWordPropertyNative(long dict, int[] word,
-            int[] outCodePoints, boolean[] outFlags, int[] outProbability,
-            int[] outHistoricalInfo, ArrayList<int[]> outShortcutTargets,
-            ArrayList<Integer> outShortcutProbabilities);
+            int[] outCodePoints, boolean[] outFlags, int[] outProbabilityInfo,
+            ArrayList<int[]> outBigramTargets, ArrayList<int[]> outBigramProbabilityInfo,
+            ArrayList<int[]> outShortcutTargets, ArrayList<Integer> outShortcutProbabilities);
     private static native int getSuggestionsNative(long dict, long proximityInfo,
             long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times,
             int[] pointerIds, int[] inputCodePoints, int inputSize, int commitPoint,
@@ -313,22 +314,22 @@
         final int[] codePoints = StringUtils.toCodePointArray(word);
         final int[] outCodePoints = new int[MAX_WORD_LENGTH];
         final boolean[] outFlags = new boolean[FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT];
-        final int[] outProbability = new int[1];
-        final int[] outHistoricalInfo =
-                new int[FORMAT_WORD_PROPERTY_OUTPUT_HISTORICAL_INFO_COUNT];
+        final int[] outProbabilityInfo =
+                new int[FORMAT_WORD_PROPERTY_OUTPUT_PROBABILITY_INFO_COUNT];
+        final ArrayList<int[]> outBigramTargets = CollectionUtils.newArrayList();
+        final ArrayList<int[]> outBigramProbabilityInfo = CollectionUtils.newArrayList();
         final ArrayList<int[]> outShortcutTargets = CollectionUtils.newArrayList();
         final ArrayList<Integer> outShortcutProbabilities = CollectionUtils.newArrayList();
-        getWordPropertyNative(mNativeDict, codePoints, outCodePoints, outFlags, outProbability,
-                outHistoricalInfo, outShortcutTargets, outShortcutProbabilities);
+        getWordPropertyNative(mNativeDict, codePoints, outCodePoints, outFlags, outProbabilityInfo,
+                outBigramTargets, outBigramProbabilityInfo, outShortcutTargets,
+                outShortcutProbabilities);
         return new WordProperty(codePoints,
                 outFlags[FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX],
                 outFlags[FORMAT_WORD_PROPERTY_IS_BLACKLISTED_INDEX],
                 outFlags[FORMAT_WORD_PROPERTY_HAS_BIGRAMS_INDEX],
-                outFlags[FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX], outProbability[0],
-                outHistoricalInfo[FORMAT_WORD_PROPERTY_TIMESTAMP_INDEX],
-                outHistoricalInfo[FORMAT_WORD_PROPERTY_LEVEL_INDEX],
-                outHistoricalInfo[FORMAT_WORD_PROPERTY_COUNT_INDEX],
-                outShortcutTargets, outShortcutProbabilities);
+                outFlags[FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX], outProbabilityInfo,
+                outBigramTargets, outBigramProbabilityInfo, outShortcutTargets,
+                outShortcutProbabilities);
     }
 
     // Add a unigram entry to binary dictionary with unigram attributes in native code.
diff --git a/java/src/com/android/inputmethod/latin/utils/WordProperty.java b/java/src/com/android/inputmethod/latin/utils/WordProperty.java
index d6c0f90..ba9b114 100644
--- a/java/src/com/android/inputmethod/latin/utils/WordProperty.java
+++ b/java/src/com/android/inputmethod/latin/utils/WordProperty.java
@@ -32,15 +32,30 @@
     public final boolean mIsBlacklisted;
     public final boolean mHasBigrams;
     public final boolean mHasShortcuts;
-    public final int mProbability;
-    // mTimestamp, mLevel and mCount are historical info. These values are depend on the
-    // implementation in native code; thus, we must not use them and have any assumptions about
-    // them except for tests.
-    public final int mTimestamp;
-    public final int mLevel;
-    public final int mCount;
+    public final ProbabilityInfo mProbabilityInfo;
+    public final ArrayList<WeightedString> mBigramTargets = CollectionUtils.newArrayList();
+    public final ArrayList<ProbabilityInfo> mBigramProbabilityInfo = CollectionUtils.newArrayList();
     public final ArrayList<WeightedString> mShortcutTargets = CollectionUtils.newArrayList();
 
+    // TODO: Use this kind of Probability class for dictionary read/write code under the makedict
+    // package.
+    public static final class ProbabilityInfo {
+        public final int mProbability;
+        // wTimestamp, mLevel and mCount are historical info. These values are depend on the
+        // implementation in native code; thus, we must not use them and have any assumptions about
+        // them except for tests.
+        public final int mTimestamp;
+        public final int mLevel;
+        public final int mCount;
+
+        public ProbabilityInfo(final int[] probabilityInfo) {
+            mProbability = probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_PROBABILITY_INDEX];
+            mTimestamp = probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_TIMESTAMP_INDEX];
+            mLevel = probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_LEVEL_INDEX];
+            mCount = probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_COUNT_INDEX];
+        }
+    }
+
     private static int getCodePointCount(final int[] codePoints) {
         for (int i = 0; i < codePoints.length; i++) {
             if (codePoints[i] == 0) {
@@ -53,18 +68,29 @@
     // This represents invalid word when the probability is BinaryDictionary.NOT_A_PROBABILITY.
     public WordProperty(final int[] codePoints, final boolean isNotAWord,
             final boolean isBlacklisted, final boolean hasBigram,
-            final boolean hasShortcuts, final int probability, final int timestamp,
-            final int level, final int count, final ArrayList<int[]> shortcutTargets,
+            final boolean hasShortcuts, final int[] probabilityInfo,
+            final ArrayList<int[]> bigramTargets, final ArrayList<int[]> bigramProbabilityInfo,
+            final ArrayList<int[]> shortcutTargets,
             final ArrayList<Integer> shortcutProbabilities) {
         mCodePoints = new String(codePoints, 0 /* offset */, getCodePointCount(codePoints));
         mIsNotAWord = isNotAWord;
         mIsBlacklisted = isBlacklisted;
         mHasBigrams = hasBigram;
         mHasShortcuts = hasShortcuts;
-        mProbability = probability;
-        mTimestamp = timestamp;
-        mLevel = level;
-        mCount = count;
+        mProbabilityInfo = new ProbabilityInfo(probabilityInfo);
+
+        final int bigramTargetCount = bigramTargets.size();
+        for (int i = 0; i < bigramTargetCount; i++) {
+            final int[] bigramTargetCodePointArray = bigramTargets.get(i);
+            final String bigramTargetString = new String(bigramTargetCodePointArray,
+                    0 /* offset */, getCodePointCount(bigramTargetCodePointArray));
+            final ProbabilityInfo bigramProbability =
+                    new ProbabilityInfo(bigramProbabilityInfo.get(i));
+            mBigramTargets.add(
+                    new WeightedString(bigramTargetString, bigramProbability.mProbability));
+            mBigramProbabilityInfo.add(bigramProbability);
+        }
+
         final int shortcutTargetCount = shortcutTargets.size();
         for (int i = 0; i < shortcutTargetCount; i++) {
             final int[] shortcutTargetCodePointArray = shortcutTargets.get(i);
@@ -77,6 +103,6 @@
 
     @UsedForTesting
     public boolean isValid() {
-        return mProbability != BinaryDictionary.NOT_A_PROBABILITY;
+        return mProbabilityInfo.mProbability != BinaryDictionary.NOT_A_PROBABILITY;
     }
 }
\ No newline at end of file
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index 16a3fe8..8f3f8e2 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -262,16 +262,17 @@
 
 static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz,
         jlong dict, jintArray word, jintArray outCodePoints, jbooleanArray outFlags,
-        jintArray outProbability, jintArray outHistoricalInfo, jobject outShortcutTargets,
-        jobject outShortcutProbabilities) {
+        jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilityInfo,
+        jobject outShortcutTargets, jobject outShortcutProbabilities) {
     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
     if (!dictionary) return;
     const jsize wordLength = env->GetArrayLength(word);
     int wordCodePoints[wordLength];
     env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
     const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, wordLength);
-    wordProperty.outputProperties(env, outCodePoints, outFlags, outProbability,
-            outHistoricalInfo, outShortcutTargets, outShortcutProbabilities);
+    wordProperty.outputProperties(env, outCodePoints, outFlags, outProbabilityInfo,
+            outBigramTargets, outBigramProbabilityInfo, outShortcutTargets,
+            outShortcutProbabilities);
 }
 
 static jfloat latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jclass clazz,
@@ -521,7 +522,8 @@
     },
     {
         const_cast<char *>("getWordPropertyNative"),
-        const_cast<char *>("(J[I[I[Z[I[ILjava/util/ArrayList;Ljava/util/ArrayList;)V"),
+        const_cast<char *>("(J[I[I[Z[ILjava/util/ArrayList;Ljava/util/ArrayList;"
+                "Ljava/util/ArrayList;Ljava/util/ArrayList;)V"),
         reinterpret_cast<void *>(latinime_BinaryDictionary_getWordProperty)
     },
     {
diff --git a/native/jni/src/suggest/core/dictionary/word_property.cpp b/native/jni/src/suggest/core/dictionary/word_property.cpp
index 4a260a9..d8c330b 100644
--- a/native/jni/src/suggest/core/dictionary/word_property.cpp
+++ b/native/jni/src/suggest/core/dictionary/word_property.cpp
@@ -19,20 +19,23 @@
 namespace latinime {
 
 void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
-        jbooleanArray outFlags, jintArray outProbability, jintArray outHistoricalInfo,
-        jobject outShortcutTargets, jobject outShortcutProbabilities) const {
+        jbooleanArray outFlags, jintArray outProbabilityInfo, jobject outBigramTargets,
+        jobject outBigramProbabilities, jobject outShortcutTargets,
+        jobject outShortcutProbabilities) const {
     env->SetIntArrayRegion(outCodePoints, 0 /* start */, mCodePoints.size(), &mCodePoints[0]);
     jboolean flags[] = {mIsNotAWord, mIsBlacklisted, mHasBigrams, mHasShortcuts};
     env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags);
-    env->SetIntArrayRegion(outProbability, 0 /* start */, 1 /* len */, &mProbability);
-    int historicalInfo[] = {mTimestamp, mLevel, mCount};
-    env->SetIntArrayRegion(outHistoricalInfo, 0 /* start */, NELEMS(historicalInfo),
-            historicalInfo);
+    int probabilityInfo[] = {mProbability, mTimestamp, mLevel, mCount};
+    env->SetIntArrayRegion(outProbabilityInfo, 0 /* start */, NELEMS(probabilityInfo),
+            probabilityInfo);
 
     jclass integerClass = env->FindClass("java/lang/Integer");
     jmethodID intToIntegerConstructorId = env->GetMethodID(integerClass, "<init>", "(I)V");
     jclass arrayListClass = env->FindClass("java/util/ArrayList");
     jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z");
+
+    // TODO: Output bigrams.
+    // Output shortcuts.
     const int shortcutTargetCount = mShortcuts.size();
     for (int i = 0; i < shortcutTargetCount; ++i) {
         const std::vector<int> *const targetCodePoints = mShortcuts[i].getTargetCodePoints();
diff --git a/native/jni/src/suggest/core/dictionary/word_property.h b/native/jni/src/suggest/core/dictionary/word_property.h
index 69c8808..cc06b1b 100644
--- a/native/jni/src/suggest/core/dictionary/word_property.h
+++ b/native/jni/src/suggest/core/dictionary/word_property.h
@@ -78,8 +78,8 @@
               mShortcuts(*shortcuts) {}
 
     void outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags,
-            jintArray outProbability, jintArray outHistoricalInfo, jobject outShortcutTargets,
-            jobject outShortcutProbabilities) const;
+            jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilities,
+            jobject outShortcutTargets, jobject outShortcutProbabilities) const;
 
  private:
     DISALLOW_ASSIGNMENT_OPERATOR(WordProperty);
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
index 844fcbb..5294bb0 100644
--- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
+++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
@@ -871,11 +871,11 @@
         }
     }
 
-    public void testGetUnigramProperties() {
-        testGetUnigramProperties(FormatSpec.VERSION4);
+    public void testGetWordProperties() {
+        testGetWordProperties(FormatSpec.VERSION4);
     }
 
-    private void testGetUnigramProperties(final int formatVersion) {
+    private void testGetWordProperties(final int formatVersion) {
         final long seed = System.currentTimeMillis();
         final Random random = new Random(seed);
         final int ITERATION_COUNT = 1000;
@@ -892,8 +892,8 @@
                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
 
-        final WordProperty invalidUnigramProperty = binaryDictionary.getWordProperty("dummyWord");
-        assertFalse(invalidUnigramProperty.isValid());
+        final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord");
+        assertFalse(invalidWordProperty.isValid());
 
         for (int i = 0; i < ITERATION_COUNT; i++) {
             final String word = CodePointUtils.generateWord(random, codePointSet);
@@ -904,15 +904,15 @@
             binaryDictionary.addUnigramWord(word, unigramProbability,
                     null /* shortcutTarget */, BinaryDictionary.NOT_A_PROBABILITY,
                     isNotAWord, isBlacklisted, BinaryDictionary.NOT_A_VALID_TIMESTAMP);
-            final WordProperty unigramProperty = binaryDictionary.getWordProperty(word);
-            assertEquals(word, unigramProperty.mCodePoints);
-            assertTrue(unigramProperty.isValid());
-            assertEquals(isNotAWord, unigramProperty.mIsNotAWord);
-            assertEquals(isBlacklisted, unigramProperty.mIsBlacklisted);
-            assertEquals(false, unigramProperty.mHasBigrams);
-            assertEquals(false, unigramProperty.mHasShortcuts);
-            assertEquals(unigramProbability, unigramProperty.mProbability);
-            assertTrue(unigramProperty.mShortcutTargets.isEmpty());
+            final WordProperty wordProperty = binaryDictionary.getWordProperty(word);
+            assertEquals(word, wordProperty.mCodePoints);
+            assertTrue(wordProperty.isValid());
+            assertEquals(isNotAWord, wordProperty.mIsNotAWord);
+            assertEquals(isBlacklisted, wordProperty.mIsBlacklisted);
+            assertEquals(false, wordProperty.mHasBigrams);
+            assertEquals(false, wordProperty.mHasShortcuts);
+            assertEquals(unigramProbability, wordProperty.mProbabilityInfo.mProbability);
+            assertTrue(wordProperty.mShortcutTargets.isEmpty());
         }
     }
 
@@ -936,28 +936,28 @@
         binaryDictionary.addUnigramWord("aaa", unigramProbability, "zzz",
                 shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */,
                 0 /* timestamp */);
-        WordProperty unigramProperty = binaryDictionary.getWordProperty("aaa");
-        assertEquals(1, unigramProperty.mShortcutTargets.size());
-        assertEquals("zzz", unigramProperty.mShortcutTargets.get(0).mWord);
-        assertEquals(shortcutProbability, unigramProperty.mShortcutTargets.get(0).mFrequency);
+        WordProperty wordProperty = binaryDictionary.getWordProperty("aaa");
+        assertEquals(1, wordProperty.mShortcutTargets.size());
+        assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord);
+        assertEquals(shortcutProbability, wordProperty.mShortcutTargets.get(0).mFrequency);
         final int updatedShortcutProbability = 2;
         binaryDictionary.addUnigramWord("aaa", unigramProbability, "zzz",
                 updatedShortcutProbability, false /* isNotAWord */, false /* isBlacklisted */,
                 0 /* timestamp */);
-        unigramProperty = binaryDictionary.getWordProperty("aaa");
-        assertEquals(1, unigramProperty.mShortcutTargets.size());
-        assertEquals("zzz", unigramProperty.mShortcutTargets.get(0).mWord);
+        wordProperty = binaryDictionary.getWordProperty("aaa");
+        assertEquals(1, wordProperty.mShortcutTargets.size());
+        assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord);
         assertEquals(updatedShortcutProbability,
-                unigramProperty.mShortcutTargets.get(0).mFrequency);
+                wordProperty.mShortcutTargets.get(0).mFrequency);
         binaryDictionary.addUnigramWord("aaa", unigramProbability, "yyy",
                 shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */,
                 0 /* timestamp */);
         final HashMap<String, Integer> shortcutTargets = new HashMap<String, Integer>();
         shortcutTargets.put("zzz", updatedShortcutProbability);
         shortcutTargets.put("yyy", shortcutProbability);
-        unigramProperty = binaryDictionary.getWordProperty("aaa");
-        assertEquals(2, unigramProperty.mShortcutTargets.size());
-        for (WeightedString shortcutTarget : unigramProperty.mShortcutTargets) {
+        wordProperty = binaryDictionary.getWordProperty("aaa");
+        assertEquals(2, wordProperty.mShortcutTargets.size());
+        for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
             assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord));
             assertEquals((int)shortcutTargets.get(shortcutTarget.mWord), shortcutTarget.mFrequency);
             shortcutTargets.remove(shortcutTarget.mWord);
@@ -965,9 +965,9 @@
         shortcutTargets.put("zzz", updatedShortcutProbability);
         shortcutTargets.put("yyy", shortcutProbability);
         binaryDictionary.flushWithGC();
-        unigramProperty = binaryDictionary.getWordProperty("aaa");
-        assertEquals(2, unigramProperty.mShortcutTargets.size());
-        for (WeightedString shortcutTarget : unigramProperty.mShortcutTargets) {
+        wordProperty = binaryDictionary.getWordProperty("aaa");
+        assertEquals(2, wordProperty.mShortcutTargets.size());
+        for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
             assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord));
             assertEquals((int)shortcutTargets.get(shortcutTarget.mWord), shortcutTarget.mFrequency);
             shortcutTargets.remove(shortcutTarget.mWord);
@@ -1034,14 +1034,15 @@
         }
 
         for (final String word : words) {
-            final WordProperty unigramProperty = binaryDictionary.getWordProperty(word);
-            assertEquals((int)unigramProbabilities.get(word), unigramProperty.mProbability);
+            final WordProperty wordProperty = binaryDictionary.getWordProperty(word);
+            assertEquals((int)unigramProbabilities.get(word),
+                    wordProperty.mProbabilityInfo.mProbability);
             if (!shortcutTargets.containsKey(word)) {
                 // The word does not have shortcut targets.
                 continue;
             }
-            assertEquals(shortcutTargets.get(word).size(), unigramProperty.mShortcutTargets.size());
-            for (final WeightedString shortcutTarget : unigramProperty.mShortcutTargets) {
+            assertEquals(shortcutTargets.get(word).size(), wordProperty.mShortcutTargets.size());
+            for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
                 final String targetCodePonts = shortcutTarget.mWord;
                 assertEquals((int)shortcutTargets.get(word).get(targetCodePonts),
                         shortcutTarget.mFrequency);