Add DynamicLanguageModelProbabilityUtils.

Bug: 14425059
Change-Id: Ia58ab3f0ead02798046d182a9464dcbd95f086bc
diff --git a/native/jni/NativeFileList.mk b/native/jni/NativeFileList.mk
index 72bddbb..55bb683 100644
--- a/native/jni/NativeFileList.mk
+++ b/native/jni/NativeFileList.mk
@@ -71,6 +71,7 @@
         ver4_patricia_trie_writing_helper.cpp \
         ver4_pt_node_array_reader.cpp) \
     $(addprefix suggest/policyimpl/dictionary/structure/v4/content/, \
+        dynamic_language_model_probability_utils.cpp \
         language_model_dict_content.cpp \
         language_model_dict_content_global_counters.cpp \
         shortcut_dict_content.cpp \
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dynamic_language_model_probability_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dynamic_language_model_probability_utils.cpp
new file mode 100644
index 0000000..b0fbb3e
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dynamic_language_model_probability_utils.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/content/dynamic_language_model_probability_utils.h"
+
+namespace latinime {
+
+// These counts are used to provide stable probabilities even if the user's input count is small.
+const int DynamicLanguageModelProbabilityUtils::ASSUMED_MIN_COUNT_FOR_UNIGRAMS = 8192;
+const int DynamicLanguageModelProbabilityUtils::ASSUMED_MIN_COUNT_FOR_BIGRAMS = 2;
+const int DynamicLanguageModelProbabilityUtils::ASSUMED_MIN_COUNT_FOR_TRIGRAMS = 2;
+
+// These are encoded backoff weights.
+// Note that we give positive value for trigrams that means the weight is more than 1.
+// TODO: Apply backoff for main dictionaries and quit giving a positive backoff weight.
+const int DynamicLanguageModelProbabilityUtils::ENCODED_BACKOFF_WEIGHT_FOR_UNIGRAMS = -32;
+const int DynamicLanguageModelProbabilityUtils::ENCODED_BACKOFF_WEIGHT_FOR_BIGRAMS = 0;
+const int DynamicLanguageModelProbabilityUtils::ENCODED_BACKOFF_WEIGHT_FOR_TRIGRAMS = 8;
+
+// This value is used to remove too old entries from the dictionary.
+const int DynamicLanguageModelProbabilityUtils::DURATION_TO_DISCARD_ENTRY_IN_SECONDS =
+        300 * 24 * 60 * 60; // 300 days
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dynamic_language_model_probability_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dynamic_language_model_probability_utils.h
new file mode 100644
index 0000000..88bc58f
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dynamic_language_model_probability_utils.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DYNAMIC_LANGUAGE_MODEL_PROBABILITY_UTILS_H
+#define LATINIME_DYNAMIC_LANGUAGE_MODEL_PROBABILITY_UTILS_H
+
+#include <algorithm>
+
+#include "defines.h"
+#include "suggest/core/dictionary/property/historical_info.h"
+#include "utils/time_keeper.h"
+
+namespace latinime {
+
+class DynamicLanguageModelProbabilityUtils {
+ public:
+    static float computeRawProbabilityFromCounts(const int count, const int contextCount,
+            const int matchedWordCountInContext) {
+        int minCount = 0;
+        switch (matchedWordCountInContext) {
+            case 1:
+                minCount = ASSUMED_MIN_COUNT_FOR_UNIGRAMS;
+                break;
+            case 2:
+                minCount = ASSUMED_MIN_COUNT_FOR_BIGRAMS;
+                break;
+            case 3:
+                minCount = ASSUMED_MIN_COUNT_FOR_TRIGRAMS;
+                break;
+            default:
+                AKLOGE("computeRawProbabilityFromCounts is called with invalid "
+                        "matchedWordCountInContext (%d).", matchedWordCountInContext);
+                ASSERT(false);
+                return 0.0f;
+        }
+        return static_cast<float>(count) / static_cast<float>(std::max(contextCount, minCount));
+    }
+
+    static float backoff(const int ngramProbability, const int matchedWordCountInContext) {
+        int probability = NOT_A_PROBABILITY;
+
+        switch (matchedWordCountInContext) {
+            case 1:
+                probability = ngramProbability + ENCODED_BACKOFF_WEIGHT_FOR_UNIGRAMS;
+                break;
+            case 2:
+                probability = ngramProbability + ENCODED_BACKOFF_WEIGHT_FOR_BIGRAMS;
+                break;
+            case 3:
+                probability = ngramProbability + ENCODED_BACKOFF_WEIGHT_FOR_TRIGRAMS;
+                break;
+            default:
+                AKLOGE("backoff is called with invalid matchedWordCountInContext (%d).",
+                        matchedWordCountInContext);
+                ASSERT(false);
+                return NOT_A_PROBABILITY;
+        }
+        return std::min(std::max(probability, NOT_A_PROBABILITY), MAX_PROBABILITY);
+    }
+
+    static int getDecayedProbability(const int probability, const HistoricalInfo historicalInfo) {
+        const int elapsedTime = TimeKeeper::peekCurrentTime() - historicalInfo.getTimestamp();
+        if (elapsedTime < 0) {
+            AKLOGE("The elapsed time is negatime value. Timestamp overflow?");
+            return NOT_A_PROBABILITY;
+        }
+        // TODO: Improve this logic.
+        // We don't modify probability depending on the elapsed time.
+        return probability;
+    }
+
+    static int shouldRemoveEntryDuringGC(const HistoricalInfo historicalInfo) {
+        // TODO: Improve this logic.
+        const int elapsedTime = TimeKeeper::peekCurrentTime() - historicalInfo.getTimestamp();
+        return elapsedTime > DURATION_TO_DISCARD_ENTRY_IN_SECONDS;
+    }
+
+    static int getPriorityToPreventFromEviction(const HistoricalInfo historicalInfo) {
+        // TODO: Improve this logic.
+        // More recently input entries get higher priority.
+        return historicalInfo.getTimestamp();
+    }
+
+private:
+    DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicLanguageModelProbabilityUtils);
+
+    static_assert(MAX_PREV_WORD_COUNT_FOR_N_GRAM <= 2, "Max supported Ngram is Trigram.");
+
+    static const int ASSUMED_MIN_COUNT_FOR_UNIGRAMS;
+    static const int ASSUMED_MIN_COUNT_FOR_BIGRAMS;
+    static const int ASSUMED_MIN_COUNT_FOR_TRIGRAMS;
+
+    static const int ENCODED_BACKOFF_WEIGHT_FOR_UNIGRAMS;
+    static const int ENCODED_BACKOFF_WEIGHT_FOR_BIGRAMS;
+    static const int ENCODED_BACKOFF_WEIGHT_FOR_TRIGRAMS;
+
+    static const int DURATION_TO_DISCARD_ENTRY_IN_SECONDS;
+};
+
+} // namespace latinime
+#endif /* LATINIME_DYNAMIC_LANGUAGE_MODEL_PROBABILITY_UTILS_H */