Fix bug and Add large test for decaying dictionary.

- GC gets failure when the dictionary become empty.
- Useless unigrams are sometimes not removed.

Bug: 10197478
Change-Id: I8d1479c01efba61a81f03bc077da6bcb4797a940
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
index 541e697..fd29698 100644
--- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
@@ -52,6 +52,10 @@
     public static final String UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
     @UsedForTesting
     public static final String BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
+    @UsedForTesting
+    public static final String MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT";
+    @UsedForTesting
+    public static final String MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT";
 
     private long mNativeDict;
     private final Locale mLocale;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
index a17a0ac..5724c5d 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
@@ -39,7 +39,7 @@
             return false;
         }
         if (!ForgettingCurveUtils::isValidEncodedProbability(newProbability)) {
-            isUselessPtNode = false;
+            isUselessPtNode = true;
         }
     }
     if (mChildrenValue > 0) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h
index 3ca2f2a..9755120 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h
@@ -60,6 +60,7 @@
 
         bool onDescend(const int ptNodeArrayPos) {
             mValueStack.push_back(0);
+            mChildrenValue = 0;
             return true;
         }
 
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
index 31e3fb4..3d07c9d 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
@@ -37,6 +37,8 @@
 // BinaryDictionaryDecayingTests.
 const char *const DynamicPatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
 const char *const DynamicPatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
+const char *const DynamicPatriciaTriePolicy::MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT";
+const char *const DynamicPatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT";
 const char *const DynamicPatriciaTriePolicy::SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY =
         "SET_NEEDS_TO_DECAY_FOR_TESTING";
 const int DynamicPatriciaTriePolicy::MAX_DICT_EXTENDED_REGION_SIZE = 1024 * 1024;
@@ -355,6 +357,14 @@
         snprintf(outResult, maxResultLength, "%d", mUnigramCount);
     } else if (strncmp(query, BIGRAM_COUNT_QUERY, maxResultLength) == 0) {
         snprintf(outResult, maxResultLength, "%d", mBigramCount);
+    } else if (strncmp(query, MAX_UNIGRAM_COUNT_QUERY, maxResultLength) == 0) {
+        snprintf(outResult, maxResultLength, "%d",
+                mHeaderPolicy.isDecayingDict() ? ForgettingCurveUtils::MAX_UNIGRAM_COUNT :
+                        DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE);
+    } else if (strncmp(query, MAX_BIGRAM_COUNT_QUERY, maxResultLength) == 0) {
+        snprintf(outResult, maxResultLength, "%d",
+                mHeaderPolicy.isDecayingDict() ? ForgettingCurveUtils::MAX_BIGRAM_COUNT :
+                        DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE);
     } else if (strncmp(query, SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY, maxResultLength) == 0) {
         mNeedsToDecayForTesting = true;
     }
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
index 903f65e..be97ee1 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
@@ -102,6 +102,8 @@
 
     static const char *const UNIGRAM_COUNT_QUERY;
     static const char *const BIGRAM_COUNT_QUERY;
+    static const char *const MAX_UNIGRAM_COUNT_QUERY;
+    static const char *const MAX_BIGRAM_COUNT_QUERY;
     static const char *const SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY;
     static const int MAX_DICT_EXTENDED_REGION_SIZE;
     static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp
index 601ee66..f108c21 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp
@@ -93,6 +93,12 @@
     if (!listener->onDescend(getPosOfLastPtNodeArrayHead())) {
         return false;
     }
+    if (isEnd()) {
+        // Empty dictionary. Needs to notify the listener of the tail of empty PtNode array.
+        if (!listener->onReadingPtNodeArrayTail()) {
+            return false;
+        }
+    }
     pushReadingStateToStack();
     while (!isEnd()) {
         if (alreadyVisitedAllPtNodesInArray) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h
index 512a4d8..a71c069 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h
@@ -279,7 +279,9 @@
         } else {
             mReadingState = mReadingStateStack.back();
             mReadingStateStack.pop_back();
-            fetchPtNodeInfo();
+            if (!isEnd()) {
+                fetchPtNodeInfo();
+            }
         }
     }
 };
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
index 19ca354..1632fd0 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
@@ -93,8 +93,7 @@
     for (int i = 0; i < decayIterationCount; ++i) {
         const float currentRate = static_cast<float>(currentEncodedProbability)
                 / static_cast<float>(MAX_ENCODED_PROBABILITY);
-        const float thresholdToDecay = MIN_PROBABILITY_TO_DECAY
-                + (1.0f - MIN_PROBABILITY_TO_DECAY) * currentRate;
+        const float thresholdToDecay = (1.0f - MIN_PROBABILITY_TO_DECAY) * currentRate;
         const float randValue = static_cast<float>(rand()) / static_cast<float>(RAND_MAX);
         if (thresholdToDecay < randValue) {
             currentEncodedProbability = max(currentEncodedProbability - ENCODED_PROBABILITY_STEP,
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java
index ded8eaa..cecdd2f 100644
--- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java
+++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java
@@ -19,13 +19,16 @@
 import android.test.AndroidTestCase;
 import android.test.suitebuilder.annotation.LargeTest;
 
+import com.android.inputmethod.latin.makedict.CodePointUtils;
 import com.android.inputmethod.latin.makedict.FormatSpec;
 
 import java.io.File;
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.Locale;
 import java.util.Map;
+import java.util.Random;
 
 @LargeTest
 public class BinaryDictionaryDecayingTests extends AndroidTestCase {
@@ -179,4 +182,55 @@
         binaryDictionary.close();
         dictFile.delete();
     }
+
+    public void testAddManyUnigramsToDecayingDict() {
+        final int unigramCount = 30000;
+        final int unigramTypedCount = 100000;
+        final int codePointSetSize = 50;
+        final long seed = System.currentTimeMillis();
+        final Random random = new Random(seed);
+
+        File dictFile = null;
+        try {
+            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
+        } catch (IOException e) {
+            fail("IOException while writing an initial dictionary : " + e);
+        }
+        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
+                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
+                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+
+        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
+        final ArrayList<String> words = new ArrayList<String>();
+
+        for (int i = 0; i < unigramCount; i++) {
+            final String word = CodePointUtils.generateWord(random, codePointSet);
+            words.add(word);
+        }
+
+        final int maxUnigramCount = Integer.parseInt(
+                binaryDictionary.getPropertyForTests(BinaryDictionary.MAX_UNIGRAM_COUNT_QUERY));
+        for (int i = 0; i < unigramTypedCount; i++) {
+            final String word = words.get(random.nextInt(words.size()));
+            binaryDictionary.addUnigramWord(word, DUMMY_PROBABILITY);
+
+            if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
+                final int unigramCountBeforeGC =
+                        Integer.parseInt(binaryDictionary.getPropertyForTests(
+                                BinaryDictionary.UNIGRAM_COUNT_QUERY));
+                while (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
+                    binaryDictionary.flushWithGC();
+                }
+                final int unigramCountAfterGC =
+                        Integer.parseInt(binaryDictionary.getPropertyForTests(
+                                BinaryDictionary.UNIGRAM_COUNT_QUERY));
+                assertTrue(unigramCountBeforeGC > unigramCountAfterGC);
+            }
+        }
+
+        assertTrue(Integer.parseInt(binaryDictionary.getPropertyForTests(
+                BinaryDictionary.UNIGRAM_COUNT_QUERY)) > 0);
+        assertTrue(Integer.parseInt(binaryDictionary.getPropertyForTests(
+                BinaryDictionary.UNIGRAM_COUNT_QUERY)) <= maxUnigramCount);
+    }
 }