Merge "Further fixes to treat 0-frequency words"
diff --git a/native/jni/src/suggest/core/dictionary/error_type_utils.cpp b/native/jni/src/suggest/core/dictionary/error_type_utils.cpp
index 1e2494e..8f07ce2 100644
--- a/native/jni/src/suggest/core/dictionary/error_type_utils.cpp
+++ b/native/jni/src/suggest/core/dictionary/error_type_utils.cpp
@@ -31,6 +31,7 @@
 
 const ErrorTypeUtils::ErrorType ErrorTypeUtils::ERRORS_TREATED_AS_AN_EXACT_MATCH =
         NOT_AN_ERROR | MATCH_WITH_WRONG_CASE | MATCH_WITH_MISSING_ACCENT | MATCH_WITH_DIGRAPH;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::ERRORS_TREATED_AS_A_PERFECT_MATCH = NOT_AN_ERROR;
 
 const ErrorTypeUtils::ErrorType
         ErrorTypeUtils::ERRORS_TREATED_AS_AN_EXACT_MATCH_WITH_INTENTIONAL_OMISSION =
diff --git a/native/jni/src/suggest/core/dictionary/error_type_utils.h b/native/jni/src/suggest/core/dictionary/error_type_utils.h
index fd1d5fc..e92c509 100644
--- a/native/jni/src/suggest/core/dictionary/error_type_utils.h
+++ b/native/jni/src/suggest/core/dictionary/error_type_utils.h
@@ -52,6 +52,10 @@
         return (containedErrorTypes & ~ERRORS_TREATED_AS_AN_EXACT_MATCH) == 0;
     }
 
+    static bool isPerfectMatch(const ErrorType containedErrorTypes) {
+        return (containedErrorTypes & ~ERRORS_TREATED_AS_A_PERFECT_MATCH) == 0;
+    }
+
     static bool isExactMatchWithIntentionalOmission(const ErrorType containedErrorTypes) {
         return (containedErrorTypes
                 & ~ERRORS_TREATED_AS_AN_EXACT_MATCH_WITH_INTENTIONAL_OMISSION) == 0;
@@ -73,6 +77,7 @@
     DISALLOW_IMPLICIT_CONSTRUCTORS(ErrorTypeUtils);
 
     static const ErrorType ERRORS_TREATED_AS_AN_EXACT_MATCH;
+    static const ErrorType ERRORS_TREATED_AS_A_PERFECT_MATCH;
     static const ErrorType ERRORS_TREATED_AS_AN_EXACT_MATCH_WITH_INTENTIONAL_OMISSION;
 };
 } // namespace latinime
diff --git a/native/jni/src/suggest/core/policy/scoring.h b/native/jni/src/suggest/core/policy/scoring.h
index ce3684a..b9dda83 100644
--- a/native/jni/src/suggest/core/policy/scoring.h
+++ b/native/jni/src/suggest/core/policy/scoring.h
@@ -30,7 +30,7 @@
  public:
     virtual int calculateFinalScore(const float compoundDistance, const int inputSize,
             const ErrorTypeUtils::ErrorType containedErrorTypes, const bool forceCommit,
-            const bool boostExactMatches) const = 0;
+            const bool boostExactMatches, const bool hasProbabilityZero) const = 0;
     virtual void getMostProbableString(const DicTraverseSession *const traverseSession,
             const float weightOfLangModelVsSpatialModel,
             SuggestionResults *const outSuggestionResults) const = 0;
diff --git a/native/jni/src/suggest/core/result/suggestions_output_utils.cpp b/native/jni/src/suggest/core/result/suggestions_output_utils.cpp
index 23103b9..74db959 100644
--- a/native/jni/src/suggest/core/result/suggestions_output_utils.cpp
+++ b/native/jni/src/suggest/core/result/suggestions_output_utils.cpp
@@ -161,7 +161,7 @@
             compoundDistance, traverseSession->getInputSize(),
             terminalDicNode->getContainedErrorTypes(),
             (forceCommitMultiWords && terminalDicNode->hasMultipleWords()),
-            boostExactMatches);
+            boostExactMatches, wordAttributes.getProbability() == 0);
 
     // Don't output invalid or blocked offensive words. However, we still need to submit their
     // shortcuts if any.
diff --git a/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp
index a6f9a8b..856808a 100644
--- a/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp
+++ b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp
@@ -24,6 +24,7 @@
 const float ScoringParams::AUTOCORRECT_OUTPUT_THRESHOLD = 1.0f;
 
 const float ScoringParams::EXACT_MATCH_PROMOTION = 1.1f;
+const float ScoringParams::PERFECT_MATCH_PROMOTION = 1.1f;
 const float ScoringParams::CASE_ERROR_PENALTY_FOR_EXACT_MATCH = 0.01f;
 const float ScoringParams::ACCENT_ERROR_PENALTY_FOR_EXACT_MATCH = 0.02f;
 const float ScoringParams::DIGRAPH_PENALTY_FOR_EXACT_MATCH = 0.03f;
diff --git a/native/jni/src/suggest/policyimpl/typing/scoring_params.h b/native/jni/src/suggest/policyimpl/typing/scoring_params.h
index b8f8895..6f327a3 100644
--- a/native/jni/src/suggest/policyimpl/typing/scoring_params.h
+++ b/native/jni/src/suggest/policyimpl/typing/scoring_params.h
@@ -34,6 +34,7 @@
     static const int THRESHOLD_SHORT_WORD_LENGTH;
 
     static const float EXACT_MATCH_PROMOTION;
+    static const float PERFECT_MATCH_PROMOTION;
     static const float CASE_ERROR_PENALTY_FOR_EXACT_MATCH;
     static const float ACCENT_ERROR_PENALTY_FOR_EXACT_MATCH;
     static const float DIGRAPH_PENALTY_FOR_EXACT_MATCH;
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_scoring.h b/native/jni/src/suggest/policyimpl/typing/typing_scoring.h
index 0240bcf..6acd767 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_scoring.h
+++ b/native/jni/src/suggest/policyimpl/typing/typing_scoring.h
@@ -44,23 +44,50 @@
 
     AK_FORCE_INLINE int calculateFinalScore(const float compoundDistance, const int inputSize,
             const ErrorTypeUtils::ErrorType containedErrorTypes, const bool forceCommit,
-            const bool boostExactMatches) const {
+            const bool boostExactMatches, const bool hasProbabilityZero) const {
         const float maxDistance = ScoringParams::DISTANCE_WEIGHT_LANGUAGE
                 + static_cast<float>(inputSize) * ScoringParams::TYPING_MAX_OUTPUT_SCORE_PER_INPUT;
         float score = ScoringParams::TYPING_BASE_OUTPUT_SCORE - compoundDistance / maxDistance;
         if (forceCommit) {
             score += ScoringParams::AUTOCORRECT_OUTPUT_THRESHOLD;
         }
-        if (boostExactMatches && ErrorTypeUtils::isExactMatch(containedErrorTypes)) {
-            score += ScoringParams::EXACT_MATCH_PROMOTION;
-            if ((ErrorTypeUtils::MATCH_WITH_WRONG_CASE & containedErrorTypes) != 0) {
-                score -= ScoringParams::CASE_ERROR_PENALTY_FOR_EXACT_MATCH;
+        if (hasProbabilityZero) {
+            // Previously, when both legitimate 0-frequency words (such as distracters) and
+            // offensive words were encoded in the same way, distracters would never show up
+            // when the user blocked offensive words (the default setting, as well as the
+            // setting for regression tests).
+            //
+            // When b/11031090 was fixed and a separate encoding was used for offensive words,
+            // 0-frequency words would no longer be blocked when they were an "exact match"
+            // (where case mismatches and accent mismatches would be considered an "exact
+            // match"). The exact match boosting functionality meant that, for example, when
+            // the user typed "mt" they would be suggested the word "Mt", although they most
+            // probably meant to type "my".
+            //
+            // For this reason, we introduced this change, which does the following:
+            // * Defines the "perfect match" as a really exact match, with no room for case or
+            // accent mismatches
+            // * When the target word has probability zero (as "Mt" does, because it is a
+            // distracter), ONLY boost its score if it is a perfect match.
+            //
+            // By doing this, when the user types "mt", the word "Mt" will NOT be boosted, and
+            // they will get "my". However, if the user makes an explicit effort to type "Mt",
+            // we do boost the word "Mt" so that the user's input is not autocorrected to "My".
+            if (boostExactMatches && ErrorTypeUtils::isPerfectMatch(containedErrorTypes)) {
+                score += ScoringParams::PERFECT_MATCH_PROMOTION;
             }
-            if ((ErrorTypeUtils::MATCH_WITH_MISSING_ACCENT & containedErrorTypes) != 0) {
-                score -= ScoringParams::ACCENT_ERROR_PENALTY_FOR_EXACT_MATCH;
-            }
-            if ((ErrorTypeUtils::MATCH_WITH_DIGRAPH & containedErrorTypes) != 0) {
-                score -= ScoringParams::DIGRAPH_PENALTY_FOR_EXACT_MATCH;
+        } else {
+            if (boostExactMatches && ErrorTypeUtils::isExactMatch(containedErrorTypes)) {
+                score += ScoringParams::EXACT_MATCH_PROMOTION;
+                if ((ErrorTypeUtils::MATCH_WITH_WRONG_CASE & containedErrorTypes) != 0) {
+                    score -= ScoringParams::CASE_ERROR_PENALTY_FOR_EXACT_MATCH;
+                }
+                if ((ErrorTypeUtils::MATCH_WITH_MISSING_ACCENT & containedErrorTypes) != 0) {
+                    score -= ScoringParams::ACCENT_ERROR_PENALTY_FOR_EXACT_MATCH;
+                }
+                if ((ErrorTypeUtils::MATCH_WITH_DIGRAPH & containedErrorTypes) != 0) {
+                    score -= ScoringParams::DIGRAPH_PENALTY_FOR_EXACT_MATCH;
+                }
             }
         }
         return static_cast<int>(score * SUGGEST_INTERFACE_OUTPUT_SCALE);