Have ver4 support decaying dictionary

Bug: 11073222

Change-Id: I7f0002c4743ab3bb1ebaac1bca6e367e6b220010
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp
index 628d3ab..dc2adb4 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp
@@ -20,6 +20,7 @@
 #include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
 #include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
 #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
 
 namespace latinime {
 
@@ -46,10 +47,12 @@
         if (!mBigramDictContent->createNewBigramList(terminalId)) {
             return false;
         }
+        const int probabilityToWrite = getUpdatedProbability(
+                NOT_A_PROBABILITY /* originalProbability */, newProbability);
         // Write an entry.
-        int writingPos =  mBigramDictContent->getBigramListHeadPos(terminalId);
-        if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(newProbability,
-                false /* hasNext */, newTargetTerminalId, &writingPos)) {
+        const int writingPos =  mBigramDictContent->getBigramListHeadPos(terminalId);
+        if (!mBigramDictContent->writeBigramEntry(probabilityToWrite, false /* hasNext */,
+                newTargetTerminalId, writingPos)) {
             return false;
         }
         if (outAddedNewEntry) {
@@ -61,19 +64,18 @@
     const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos);
     if (entryPosToUpdate != NOT_A_DICT_POS) {
         // Overwrite existing entry.
-        int readingPos = entryPosToUpdate;
         bool hasNext = false;
         int probability = NOT_A_PROBABILITY;
         int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
-        mBigramDictContent->getBigramEntryAndAdvancePosition(&probability, &hasNext,
-                &targetTerminalId, &readingPos);
+        mBigramDictContent->getBigramEntry(&probability, &hasNext, &targetTerminalId,
+                entryPosToUpdate);
+        const int probabilityToWrite = getUpdatedProbability(probability, newProbability);
         if (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID && outAddedNewEntry) {
             // Reuse invalid entry.
             *outAddedNewEntry = true;
         }
-        int writingPos = entryPosToUpdate;
-        return mBigramDictContent->writeBigramEntryAndAdvancePosition(newProbability, hasNext,
-                newTargetTerminalId, &writingPos);
+        return mBigramDictContent->writeBigramEntry(probabilityToWrite, hasNext,
+                newTargetTerminalId, entryPosToUpdate);
     }
 
     // Add new entry to the bigram list.
@@ -83,7 +85,9 @@
     }
     // Write new entry at a head position of the bigram list.
     int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
-    if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(newProbability,
+    const int probabilityToWrite = getUpdatedProbability(
+            NOT_A_PROBABILITY /* originalProbability */, newProbability);
+    if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(probabilityToWrite,
             true /* hasNext */, newTargetTerminalId, &writingPos)) {
         return false;
     }
@@ -105,20 +109,18 @@
         // Bigram entry doesn't exist.
         return false;
     }
-    int readingPos = entryPosToUpdate;
     bool hasNext = false;
     int probability = NOT_A_PROBABILITY;
     int originalTargetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
-    mBigramDictContent->getBigramEntryAndAdvancePosition(&probability, &hasNext,
-            &originalTargetTerminalId, &readingPos);
+    mBigramDictContent->getBigramEntry(&probability, &hasNext, &originalTargetTerminalId,
+            entryPosToUpdate);
     if (targetTerminalId != originalTargetTerminalId) {
         // Bigram entry doesn't exist.
         return false;
     }
-    int writingPos = entryPosToUpdate;
     // Remove bigram entry by overwriting target terminal Id.
-    return mBigramDictContent->writeBigramEntryAndAdvancePosition(probability, hasNext,
-            Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, &writingPos);
+    return mBigramDictContent->writeBigramEntry(probability, hasNext,
+            Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, entryPosToUpdate);
 }
 
 bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId,
@@ -143,9 +145,28 @@
                 targetTerminalId);
         if (targetPtNodePos == NOT_A_DICT_POS) {
             // Invalidate bigram entry.
-            int writingPos = entryPos;
-            return mBigramDictContent->writeBigramEntryAndAdvancePosition(probability, hasNext,
-                    Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, &writingPos);
+            if (!mBigramDictContent->writeBigramEntry(probability, hasNext,
+                    Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, entryPos)) {
+                return false;
+            }
+        } else if (mNeedsToDecayWhenUpdating) {
+            probability = ForgettingCurveUtils::getEncodedProbabilityToSave(
+                    probability, mHeaderPolicy);
+            if (ForgettingCurveUtils::isValidEncodedProbability(probability)) {
+                if (!mBigramDictContent->writeBigramEntry(probability, hasNext, targetTerminalId,
+                        entryPos)) {
+                    return false;
+                }
+                *outBigramCount += 1;
+            } else {
+                // Remove entry.
+                if (!mBigramDictContent->writeBigramEntry(probability, hasNext,
+                        Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, entryPos)) {
+                    return false;
+                }
+            }
+        } else {
+            *outBigramCount += 1;
         }
     }
     return true;
@@ -192,4 +213,14 @@
     return invalidEntryPos;
 }
 
+int Ver4BigramListPolicy::getUpdatedProbability(const int originalProbability,
+        const int newProbability) const {
+    if (mNeedsToDecayWhenUpdating) {
+        return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability,
+                newProbability);
+    } else {
+        return newProbability;
+    }
+}
+
 } // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h
index 5b7d5b5..ed8bdb8 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h
@@ -23,14 +23,18 @@
 namespace latinime {
 
 class BigramDictContent;
+class DictionaryHeaderStructurePolicy;
 class TerminalPositionLookupTable;
 
 class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
  public:
     Ver4BigramListPolicy(BigramDictContent *const bigramDictContent,
-            const TerminalPositionLookupTable *const terminalPositionLookupTable)
+            const TerminalPositionLookupTable *const terminalPositionLookupTable,
+            const DictionaryHeaderStructurePolicy *const headerPolicy,
+            const bool needsToDecayWhenUpdating)
             : mBigramDictContent(bigramDictContent),
-              mTerminalPositionLookupTable(terminalPositionLookupTable) {}
+              mTerminalPositionLookupTable(terminalPositionLookupTable),
+              mHeaderPolicy(headerPolicy), mNeedsToDecayWhenUpdating(needsToDecayWhenUpdating) {}
 
     void getNextBigram(int *const outBigramPos, int *const outProbability,
             bool *const outHasNext, int *const bigramEntryPos) const;
@@ -54,8 +58,12 @@
 
     int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos) const;
 
+    int getUpdatedProbability(const int originalProbability, const int newProbability) const;
+
     BigramDictContent *const mBigramDictContent;
     const TerminalPositionLookupTable *const mTerminalPositionLookupTable;
+    const DictionaryHeaderStructurePolicy *const mHeaderPolicy;
+    const bool mNeedsToDecayWhenUpdating;
 };
 } // namespace latinime
 #endif /* LATINIME_VER4_BIGRAM_LIST_POLICY_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
index 9066876..4cd9672 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
@@ -103,6 +103,7 @@
     return true;
 }
 
+// Returns whether GC for the bigram list was succeeded or not.
 bool BigramDictContent::runGCBigramList(const int bigramListPos,
         const BigramDictContent *const sourceBigramDictContent, const int toPos,
         const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
@@ -121,9 +122,8 @@
         TerminalPositionLookupTable::TerminalIdMap::const_iterator it =
                 terminalIdMap->find(targetTerminalId);
         if (it == terminalIdMap->end()) {
-            AKLOGE("terminal Id %d is not in the terminal position map. map size: %zd",
-                    targetTerminalId, terminalIdMap->size());
-            return false;
+            // Target word has been removed.
+            continue;
         }
         if (!writeBigramEntryAndAdvancePosition(probability, hasNext, it->second,
                 &writingPos)) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
index ec0d756..cf380f4 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
@@ -38,6 +38,13 @@
             : SparseTableDictContent(Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
                       Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE) {}
 
+    void getBigramEntry(int *const outProbability, bool *const outHasNext,
+            int *const outTargetTerminalId, const int bigramEntryPos) const {
+        int readingPos = bigramEntryPos;
+        getBigramEntryAndAdvancePosition(outProbability, outHasNext, outTargetTerminalId,
+                &readingPos);
+    }
+
     void getBigramEntryAndAdvancePosition(int *const outProbability, bool *const outHasNext,
             int *const outTargetTerminalId, int *const bigramEntryPos) const;
 
@@ -50,6 +57,13 @@
         return addressLookupTable->get(terminalId);
     }
 
+    bool writeBigramEntry(const int probability, const int hasNext, const int targetTerminalId,
+            const int entryWritingPos) {
+        int writingPos = entryWritingPos;
+        return writeBigramEntryAndAdvancePosition(probability, hasNext, targetTerminalId,
+                &writingPos);
+    }
+
     bool writeBigramEntryAndAdvancePosition(const int probability, const int hasNext,
             const int targetTerminalId, int *const entryWritingPos);
 
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
index 6fe978d..3606a2a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
@@ -43,7 +43,8 @@
                       false /* usesAdditionalBuffer*/), FormatUtils::VERSION_4),
               mDictBuffer(mBuffers.get()->getWritableTrieBuffer()),
               mBigramPolicy(mBuffers.get()->getUpdatableBigramDictContent(),
-                      mBuffers.get()->getTerminalPositionLookupTable()),
+                      mBuffers.get()->getTerminalPositionLookupTable(), &mHeaderPolicy,
+                      mHeaderPolicy.isDecayingDict()),
               mShortcutPolicy(mBuffers.get()->getShortcutDictContent(),
                       mBuffers.get()->getTerminalPositionLookupTable()),
               mNodeReader(mDictBuffer, mBuffers.get()->getProbabilityDictContent()),
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
index f141d52..ea03c72 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
@@ -83,7 +83,7 @@
     Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(),
             mBuffers->getProbabilityDictContent());
     Ver4BigramListPolicy bigramPolicy(mBuffers->getUpdatableBigramDictContent(),
-            mBuffers->getTerminalPositionLookupTable());
+            mBuffers->getTerminalPositionLookupTable(), headerPolicy, needsToDecay);
     Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getShortcutDictContent(),
             mBuffers->getTerminalPositionLookupTable());
     Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(),
@@ -134,7 +134,8 @@
     Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(),
             buffersToWrite->getProbabilityDictContent());
     Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getUpdatableBigramDictContent(),
-            buffersToWrite->getTerminalPositionLookupTable());
+            buffersToWrite->getTerminalPositionLookupTable(), headerPolicy,
+            false /* needsToDecay */);
     Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getShortcutDictContent(),
             buffersToWrite->getTerminalPositionLookupTable());
     Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(),
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java
index 12b1caa..140ab63 100644
--- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java
+++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java
@@ -72,26 +72,63 @@
         }
     }
 
-    private File createEmptyDictionaryAndGetFile(final String filename) throws IOException {
-        final File file = File.createTempFile(filename, TEST_DICT_FILE_EXTENSION,
+    private File createEmptyDictionaryAndGetFile(final String dictId,
+            final int formatVersion) throws IOException {
+        if (formatVersion == 3) {
+            return createEmptyVer3DictionaryAndGetFile(dictId);
+        } else if (formatVersion == 4) {
+            return createEmptyVer4DictionaryAndGetFile(dictId);
+        } else {
+            throw new IOException("Dictionary format version " + formatVersion
+                    + " is not supported.");
+        }
+    }
+    private File createEmptyVer4DictionaryAndGetFile(final String dictId) throws IOException {
+        final File file = File.createTempFile(dictId, TEST_DICT_FILE_EXTENSION,
                 getContext().getCacheDir());
+        file.delete();
+        file.mkdir();
         Map<String, String> attributeMap = new HashMap<String, String>();
         attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE,
                 FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
         attributeMap.put(FormatSpec.FileHeader.USES_FORGETTING_CURVE_ATTRIBUTE,
                 FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
         if (BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(),
-                3 /* dictVersion */, attributeMap)) {
+                4 /* dictVersion */, attributeMap)) {
+            return new File(file, FormatSpec.TRIE_FILE_EXTENSION);
+        } else {
+            throw new IOException("Empty dictionary " + file.getAbsolutePath() + " "
+                    + FormatSpec.TRIE_FILE_EXTENSION + " cannot be created.");
+        }
+    }
+
+    private File createEmptyVer3DictionaryAndGetFile(final String dictId) throws IOException {
+        final File file = File.createTempFile(dictId, TEST_DICT_FILE_EXTENSION,
+                getContext().getCacheDir());
+        file.delete();
+        Map<String, String> attributeMap = new HashMap<String, String>();
+        attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE,
+                FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
+        attributeMap.put(FormatSpec.FileHeader.USES_FORGETTING_CURVE_ATTRIBUTE,
+                FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
+        if (BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(), 3 /* dictVersion */,
+                attributeMap)) {
             return file;
         } else {
-            throw new IOException("Empty dictionary cannot be created.");
+            throw new IOException(
+                    "Empty dictionary " + file.getAbsolutePath() + " cannot be created.");
         }
     }
 
     public void testAddValidAndInvalidWords() {
+        testAddValidAndInvalidWords(3 /* formatVersion */);
+        testAddValidAndInvalidWords(4 /* formatVersion */);
+    }
+
+    private void testAddValidAndInvalidWords(final int formatVersion) {
         File dictFile = null;
         try {
-            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
+            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
         } catch (IOException e) {
             fail("IOException while writing an initial dictionary : " + e);
         }
@@ -111,7 +148,6 @@
         binaryDictionary.addUnigramWord("b", DUMMY_PROBABILITY);
         assertTrue(binaryDictionary.isValidWord("b"));
 
-        final int unigramProbability = binaryDictionary.getFrequency("a");
         binaryDictionary.addBigramWords("a", "b", Dictionary.NOT_A_PROBABILITY);
         assertFalse(binaryDictionary.isValidBigram("a", "b"));
         binaryDictionary.addBigramWords("a", "b", Dictionary.NOT_A_PROBABILITY);
@@ -136,9 +172,14 @@
     }
 
     public void testDecayingProbability() {
+        testDecayingProbability(3 /* formatVersion */);
+        testDecayingProbability(4 /* formatVersion */);
+    }
+
+    private void testDecayingProbability(final int formatVersion) {
         File dictFile = null;
         try {
-            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
+            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
         } catch (IOException e) {
             fail("IOException while writing an initial dictionary : " + e);
         }
@@ -190,6 +231,11 @@
     }
 
     public void testAddManyUnigramsToDecayingDict() {
+        testAddManyUnigramsToDecayingDict(3 /* formatVersion */);
+        testAddManyUnigramsToDecayingDict(4 /* formatVersion */);
+    }
+
+    private void testAddManyUnigramsToDecayingDict(final int formatVersion) {
         final int unigramCount = 30000;
         final int unigramTypedCount = 100000;
         final int codePointSetSize = 50;
@@ -198,7 +244,7 @@
 
         File dictFile = null;
         try {
-            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
+            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
         } catch (IOException e) {
             fail("IOException while writing an initial dictionary : " + e);
         }
@@ -242,6 +288,11 @@
     }
 
     public void testAddManyBigramsToDecayingDict() {
+        testAddManyBigramsToDecayingDict(3 /* formatVersion */);
+        testAddManyBigramsToDecayingDict(4 /* formatVersion */);
+    }
+
+    private void testAddManyBigramsToDecayingDict(final int formatVersion) {
         final int unigramCount = 5000;
         final int bigramCount = 30000;
         final int bigramTypedCount = 100000;
@@ -251,7 +302,7 @@
 
         File dictFile = null;
         try {
-            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
+            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
         } catch (IOException e) {
             fail("IOException while writing an initial dictionary : " + e);
         }