Stochastic decay.

Bug: 6669677
Change-Id: Ib2d9228b951c77dab7a8675ce9db60677e87e771
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
index 9c3dc39..8753c6e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
@@ -43,7 +43,7 @@
     }
     *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags);
     *outHasNext = BigramListReadWriteUtils::hasNext(bigramFlags);
-    if (mIsDecayingDict && !ForgettingCurveUtils::isValidBigram(*outProbability)) {
+    if (mIsDecayingDict && !ForgettingCurveUtils::isValidEncodedProbability(*outProbability)) {
         // This bigram is too weak to output.
         *outBigramPos = NOT_A_DICT_POS;
     } else {
@@ -261,8 +261,8 @@
             const int originalProbability = BigramListReadWriteUtils::getProbabilityFromFlags(
                     bigramFlags);
             const int probabilityToWrite = mIsDecayingDict ?
-                    ForgettingCurveUtils::getUpdatedBigramProbabilityDelta(
-                            originalProbability, probability) : probability;
+                    ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability,
+                            probability) : probability;
             const BigramListReadWriteUtils::BigramFlags updatedFlags =
                     BigramListReadWriteUtils::setProbabilityInFlags(bigramFlags,
                             probabilityToWrite);
@@ -294,7 +294,7 @@
         int *const writingPos) {
     // hasNext is false because we are adding a new bigram entry at the end of the bigram list.
     const int probabilityToWrite = mIsDecayingDict ?
-            ForgettingCurveUtils::getUpdatedBigramProbabilityDelta(NOT_A_PROBABILITY, probability) :
+            ForgettingCurveUtils::getUpdatedEncodedProbability(NOT_A_PROBABILITY, probability) :
                     probability;
     return BigramListReadWriteUtils::createAndWriteBigramEntry(mBuffer, bigramTargetPos,
             probabilityToWrite, false /* hasNext */, writingPos);
@@ -365,9 +365,9 @@
     *outRemoved = false;
     if (mIsDecayingDict) {
         // Update bigram probability for decaying.
-        const int newProbability = ForgettingCurveUtils::getBigramProbabilityDeltaToSave(
+        const int newProbability = ForgettingCurveUtils::getEncodedProbabilityToSave(
                 BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags));
-        if (ForgettingCurveUtils::isValidBigram(newProbability)) {
+        if (ForgettingCurveUtils::isValidEncodedProbability(newProbability)) {
             // Write new probability.
             const BigramListReadWriteUtils::BigramFlags updatedBigramFlags =
                     BigramListReadWriteUtils::setProbabilityInFlags(
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
index 26ea6ab..324b530 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
@@ -29,14 +29,14 @@
     bool isUselessPtNode = !node->isTerminal();
     if (node->isTerminal() && mIsDecayingDict) {
         const int newProbability =
-                ForgettingCurveUtils::getUnigramProbabilityToSave(node->getProbability());
+                ForgettingCurveUtils::getEncodedProbabilityToSave(node->getProbability());
         int writingPos = node->getProbabilityFieldPos();
         // Update probability.
         if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(
                 mBuffer, newProbability, &writingPos)) {
             return false;
         }
-        if (!ForgettingCurveUtils::isValidUnigram(newProbability)) {
+        if (!ForgettingCurveUtils::isValidEncodedProbability(newProbability)) {
             isUselessPtNode = false;
         }
     }
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
index 9dc2abe..70a9ee5 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
@@ -545,7 +545,7 @@
 int DynamicPatriciaTrieWritingHelper::getUpdatedProbability(const int originalProbability,
         const int newProbability) {
     if (mNeedsToDecay) {
-        return ForgettingCurveUtils::getUpdatedUnigramProbability(originalProbability,
+        return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability,
                 newProbability);
     } else {
         return newProbability;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
index c525a8a..62a19a5 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#include <stdlib.h>
+
 #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
 
 #include "suggest/policyimpl/dictionary/utils/probability_utils.h"
@@ -26,106 +28,91 @@
 const int ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC = 10000;
 
 const int ForgettingCurveUtils::MAX_COMPUTED_PROBABILITY = 127;
-const int ForgettingCurveUtils::MAX_UNIGRAM_PROBABILITY = 120;
-const int ForgettingCurveUtils::MIN_VALID_UNIGRAM_PROBABILITY = 24;
-const int ForgettingCurveUtils::UNIGRAM_PROBABILITY_STEP = 8;
-const int ForgettingCurveUtils::MAX_BIGRAM_PROBABILITY_DELTA = 15;
-const int ForgettingCurveUtils::MIN_VALID_BIGRAM_PROBABILITY_DELTA = 3;
-const int ForgettingCurveUtils::BIGRAM_PROBABILITY_DELTA_STEP = 1;
+const int ForgettingCurveUtils::MAX_ENCODED_PROBABILITY = 15;
+const int ForgettingCurveUtils::MIN_VALID_ENCODED_PROBABILITY = 3;
+const int ForgettingCurveUtils::ENCODED_PROBABILITY_STEP = 1;
+// Currently, we try to decay each uni/bigram once every 2 hours. Accordingly, the expected
+// duration of the decay is approximately 66hours.
+const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f;
 
 /* static */ int ForgettingCurveUtils::getProbability(const int encodedUnigramProbability,
-        const int encodedBigramProbabilityDelta) {
+        const int encodedBigramProbability) {
     if (encodedUnigramProbability == NOT_A_PROBABILITY) {
         return NOT_A_PROBABILITY;
-    } else if (encodedBigramProbabilityDelta == NOT_A_PROBABILITY) {
-        const int rawProbability = ProbabilityUtils::backoff(decodeUnigramProbability(
-                encodedUnigramProbability));
-        return min(getDecayedProbability(rawProbability), MAX_COMPUTED_PROBABILITY);
+    } else if (encodedBigramProbability == NOT_A_PROBABILITY) {
+        return backoff(decodeUnigramProbability(encodedUnigramProbability));
     } else {
-        const int rawProbability = ProbabilityUtils::computeProbabilityForBigram(
-                decodeUnigramProbability(encodedUnigramProbability),
-                decodeBigramProbabilityDelta(encodedBigramProbabilityDelta));
-        return min(getDecayedProbability(rawProbability), MAX_COMPUTED_PROBABILITY);
+        const int unigramProbability = decodeUnigramProbability(encodedUnigramProbability);
+        const int bigramProbability = decodeBigramProbability(encodedBigramProbability);
+        return min(max(unigramProbability, bigramProbability), MAX_COMPUTED_PROBABILITY);
     }
 }
 
-/* static */ int ForgettingCurveUtils::getUpdatedUnigramProbability(
+// Caveat: Unlike getProbability(), this method doesn't assume special bigram probability encoding
+// (i.e. unigram probability + bigram probability delta).
+/* static */ int ForgettingCurveUtils::getUpdatedEncodedProbability(
         const int originalEncodedProbability, const int newProbability) {
     if (originalEncodedProbability == NOT_A_PROBABILITY) {
-        // The unigram is not in this dictionary.
-        if (newProbability == NOT_A_PROBABILITY) {
-            // The unigram is not in other dictionaries.
-            return 0;
-        } else {
-            return MIN_VALID_UNIGRAM_PROBABILITY;
-        }
-    } else {
-        if (newProbability != NOT_A_PROBABILITY
-                && originalEncodedProbability < MIN_VALID_UNIGRAM_PROBABILITY) {
-            return MIN_VALID_UNIGRAM_PROBABILITY;
-        }
-        return min(originalEncodedProbability + UNIGRAM_PROBABILITY_STEP, MAX_UNIGRAM_PROBABILITY);
-    }
-}
-
-/* static */ int ForgettingCurveUtils::getUnigramProbabilityToSave(const int encodedProbability) {
-    return max(encodedProbability - UNIGRAM_PROBABILITY_STEP, 0);
-}
-
-/* static */ int ForgettingCurveUtils::getBigramProbabilityDeltaToSave(
-        const int encodedProbabilityDelta) {
-    return max(encodedProbabilityDelta - BIGRAM_PROBABILITY_DELTA_STEP, 0);
-}
-
-/* static */ int ForgettingCurveUtils::getUpdatedBigramProbabilityDelta(
-        const int originalEncodedProbabilityDelta, const int newProbability) {
-    if (originalEncodedProbabilityDelta == NOT_A_PROBABILITY) {
         // The bigram relation is not in this dictionary.
         if (newProbability == NOT_A_PROBABILITY) {
             // The bigram target is not in other dictionaries.
             return 0;
         } else {
-            return MIN_VALID_BIGRAM_PROBABILITY_DELTA;
+            return MIN_VALID_ENCODED_PROBABILITY;
         }
     } else {
         if (newProbability != NOT_A_PROBABILITY
-                && originalEncodedProbabilityDelta < MIN_VALID_BIGRAM_PROBABILITY_DELTA) {
-            return MIN_VALID_BIGRAM_PROBABILITY_DELTA;
+                && originalEncodedProbability < MIN_VALID_ENCODED_PROBABILITY) {
+            return MIN_VALID_ENCODED_PROBABILITY;
         }
-        return min(originalEncodedProbabilityDelta + BIGRAM_PROBABILITY_DELTA_STEP,
-                MAX_BIGRAM_PROBABILITY_DELTA);
+        return min(originalEncodedProbability + ENCODED_PROBABILITY_STEP, MAX_ENCODED_PROBABILITY);
     }
 }
 
-/* static */ int ForgettingCurveUtils::isValidUnigram(const int encodedUnigramProbability) {
-    return encodedUnigramProbability >= MIN_VALID_UNIGRAM_PROBABILITY;
+/* static */ int ForgettingCurveUtils::isValidEncodedProbability(const int encodedProbability) {
+    return encodedProbability >= MIN_VALID_ENCODED_PROBABILITY;
 }
 
-/* static */ int ForgettingCurveUtils::isValidBigram(const int encodedBigramProbabilityDelta) {
-    return encodedBigramProbabilityDelta >= MIN_VALID_BIGRAM_PROBABILITY_DELTA;
+/* static */ int ForgettingCurveUtils::getEncodedProbabilityToSave(const int encodedProbability) {
+    const int currentEncodedProbability = max(min(encodedProbability, MAX_ENCODED_PROBABILITY), 0);
+    // TODO: Implement the decay in more proper way.
+    const float currentRate = static_cast<float>(currentEncodedProbability)
+            / static_cast<float>(MAX_ENCODED_PROBABILITY);
+    const float thresholdToDecay = MIN_PROBABILITY_TO_DECAY
+            + (1.0f - MIN_PROBABILITY_TO_DECAY) * (1.0f - currentRate);
+    const float randValue = static_cast<float>(rand()) / static_cast<float>(RAND_MAX);
+    if (thresholdToDecay < randValue) {
+        return max(currentEncodedProbability - ENCODED_PROBABILITY_STEP, 0);
+    } else {
+        return currentEncodedProbability;
+    }
 }
 
 /* static */ int ForgettingCurveUtils::decodeUnigramProbability(const int encodedProbability) {
-    const int probability = encodedProbability - MIN_VALID_UNIGRAM_PROBABILITY;
+    const int probability = encodedProbability - MIN_VALID_ENCODED_PROBABILITY;
     if (probability < 0) {
         return NOT_A_PROBABILITY;
     } else {
-        return min(probability, MAX_UNIGRAM_PROBABILITY);
+        return min(probability, MAX_ENCODED_PROBABILITY) * 8;
     }
 }
 
-/* static */ int ForgettingCurveUtils::decodeBigramProbabilityDelta(
-        const int encodedProbabilityDelta) {
-    const int probabilityDelta = encodedProbabilityDelta - MIN_VALID_BIGRAM_PROBABILITY_DELTA;
-    if (probabilityDelta < 0) {
+/* static */ int ForgettingCurveUtils::decodeBigramProbability(const int encodedProbability) {
+    const int probability = encodedProbability - MIN_VALID_ENCODED_PROBABILITY;
+    if (probability < 0) {
         return NOT_A_PROBABILITY;
     } else {
-        return min(probabilityDelta, MAX_BIGRAM_PROBABILITY_DELTA);
+        return min(probability, MAX_ENCODED_PROBABILITY) * 8;
     }
 }
 
-/* static */ int ForgettingCurveUtils::getDecayedProbability(const int rawProbability) {
-    return rawProbability;
+// See comments in ProbabilityUtils::backoff().
+/* static */ int ForgettingCurveUtils::backoff(const int unigramProbability) {
+    if (unigramProbability == NOT_A_PROBABILITY) {
+        return NOT_A_PROBABILITY;
+    } else {
+        return max(unigramProbability - 8, 0);
+    }
 }
 
 } // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
index 11ffb2e..281f76a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
@@ -24,7 +24,6 @@
 // TODO: Check the elapsed time and decrease the probability depending on the time. Time field is
 // required to introduced to each terminal PtNode and bigram entry.
 // TODO: Quit using bigram probability to indicate the delta.
-// TODO: Quit using bigram probability delta.
 class ForgettingCurveUtils {
  public:
     static const int MAX_UNIGRAM_COUNT;
@@ -33,38 +32,30 @@
     static const int MAX_BIGRAM_COUNT_AFTER_GC;
 
     static int getProbability(const int encodedUnigramProbability,
-            const int encodedBigramProbabilityDelta);
+            const int encodedBigramProbability);
 
-    static int getUpdatedUnigramProbability(const int originalEncodedProbability,
+    static int getUpdatedEncodedProbability(const int originalEncodedProbability,
             const int newProbability);
 
-    static int getUpdatedBigramProbabilityDelta(const int originalEncodedProbabilityDelta,
-            const int newProbability);
+    static int isValidEncodedProbability(const int encodedProbability);
 
-    static int isValidUnigram(const int encodedUnigramProbability);
-
-    static int isValidBigram(const int encodedProbabilityDelta);
-
-    static int getUnigramProbabilityToSave(const int encodedProbability);
-
-    static int getBigramProbabilityDeltaToSave(const int encodedProbabilityDelta);
+    static int getEncodedProbabilityToSave(const int encodedProbability);
 
  private:
     DISALLOW_IMPLICIT_CONSTRUCTORS(ForgettingCurveUtils);
 
     static const int MAX_COMPUTED_PROBABILITY;
-    static const int MAX_UNIGRAM_PROBABILITY;
-    static const int MIN_VALID_UNIGRAM_PROBABILITY;
-    static const int UNIGRAM_PROBABILITY_STEP;
-    static const int MAX_BIGRAM_PROBABILITY_DELTA;
-    static const int MIN_VALID_BIGRAM_PROBABILITY_DELTA;
-    static const int BIGRAM_PROBABILITY_DELTA_STEP;
+    static const int MAX_ENCODED_PROBABILITY;
+    static const int MIN_VALID_ENCODED_PROBABILITY;
+    static const int ENCODED_PROBABILITY_STEP;
+
+    static const float MIN_PROBABILITY_TO_DECAY;
 
     static int decodeUnigramProbability(const int encodedProbability);
 
-    static int decodeBigramProbabilityDelta(const int encodedProbability);
+    static int decodeBigramProbability(const int encodedProbability);
 
-    static int getDecayedProbability(const int rawProbability);
+    static int backoff(const int unigramProbability);
 };
 } // namespace latinime
 #endif /* LATINIME_FORGETTING_CURVE_UTILS_H */
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java
index 8a439fc..b2d31c2 100644
--- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java
+++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java
@@ -50,14 +50,18 @@
     }
 
     private void forcePassingShortTime(final BinaryDictionary binaryDictionary) {
-        binaryDictionary.getPropertyForTests(SET_NEEDS_TO_DECAY_FOR_TESTING_KEY);
-        binaryDictionary.flushWithGC();
+        // Entries having low probability would be suppressed once in 2 GCs.
+        final int count = 2;
+        for (int i = 0; i < count; i++) {
+            binaryDictionary.getPropertyForTests(SET_NEEDS_TO_DECAY_FOR_TESTING_KEY);
+            binaryDictionary.flushWithGC();
+        }
     }
 
     private void forcePassingLongTime(final BinaryDictionary binaryDictionary) {
         // Currently, probabilities are decayed when GC is run. All entries that have never been
-        // typed in 32 GCs are removed.
-        final int count = 32;
+        // typed in 128 GCs would be removed.
+        final int count = 128;
         for (int i = 0; i < count; i++) {
             binaryDictionary.getPropertyForTests(SET_NEEDS_TO_DECAY_FOR_TESTING_KEY);
             binaryDictionary.flushWithGC();