Merge "Move distracter filter to dictionary facilitator."
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index 6223f86..5ad2114 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -341,8 +341,8 @@
         shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
     }
     // Use 1 for count to indicate the word has inputted.
-    const UnigramProperty unigramProperty(isNotAWord, isBlacklisted,
-            probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
+    const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord,
+            isBlacklisted, probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
     dictionary->addUnigramEntry(codePoints, codePointCount, &unigramProperty);
 }
 
@@ -450,8 +450,9 @@
             shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
         }
         // Use 1 for count to indicate the word has inputted.
-        const UnigramProperty unigramProperty(isNotAWord, isBlacklisted,
-                unigramProbability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
+        const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord,
+                isBlacklisted, unigramProbability, timestamp, 0 /* level */, 1 /* count */,
+                &shortcuts);
         dictionary->addUnigramEntry(word1CodePoints, word1Length, &unigramProperty);
         if (word0) {
             jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId);
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index fe3167a..bcf7d59 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -82,6 +82,12 @@
 
 void Dictionary::addUnigramEntry(const int *const word, const int length,
         const UnigramProperty *const unigramProperty) {
+    if (unigramProperty->representsBeginningOfSentence()
+            && !mDictionaryStructureWithBufferPolicy->getHeaderStructurePolicy()
+                    ->supportsBeginningOfSentence()) {
+        AKLOGE("The dictionary doesn't support Beginning-of-Sentence.");
+        return;
+    }
     TimeKeeper::setCurrentTime();
     mDictionaryStructureWithBufferPolicy->addUnigramEntry(word, length, unigramProperty);
 }
diff --git a/native/jni/src/suggest/core/dictionary/property/unigram_property.h b/native/jni/src/suggest/core/dictionary/property/unigram_property.h
index d255105..902eb00 100644
--- a/native/jni/src/suggest/core/dictionary/property/unigram_property.h
+++ b/native/jni/src/suggest/core/dictionary/property/unigram_property.h
@@ -48,15 +48,21 @@
     };
 
     UnigramProperty()
-            : mIsNotAWord(false), mIsBlacklisted(false), mProbability(NOT_A_PROBABILITY),
-              mTimestamp(NOT_A_TIMESTAMP), mLevel(0), mCount(0), mShortcuts() {}
+            : mRepresentsBeginningOfSentence(false), mIsNotAWord(false), mIsBlacklisted(false),
+              mProbability(NOT_A_PROBABILITY), mTimestamp(NOT_A_TIMESTAMP), mLevel(0), mCount(0),
+              mShortcuts() {}
 
-    UnigramProperty(const bool isNotAWord, const bool isBlacklisted, const int probability,
-            const int timestamp, const int level, const int count,
-            const std::vector<ShortcutProperty> *const shortcuts)
-            : mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability),
+    UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord,
+            const bool isBlacklisted, const int probability, const int timestamp, const int level,
+            const int count, const std::vector<ShortcutProperty> *const shortcuts)
+            : mRepresentsBeginningOfSentence(representsBeginningOfSentence),
+              mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability),
               mTimestamp(timestamp), mLevel(level), mCount(count), mShortcuts(*shortcuts) {}
 
+    bool representsBeginningOfSentence() const {
+        return mRepresentsBeginningOfSentence;
+    }
+
     bool isNotAWord() const {
         return mIsNotAWord;
     }
@@ -94,6 +100,7 @@
     DISALLOW_ASSIGNMENT_OPERATOR(UnigramProperty);
 
     // TODO: Make members const.
+    bool mRepresentsBeginningOfSentence;
     bool mIsNotAWord;
     bool mIsBlacklisted;
     int mProbability;
diff --git a/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h
index 845e629..a612276 100644
--- a/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h
@@ -51,6 +51,8 @@
 
     virtual const std::vector<int> *getLocale() const = 0;
 
+    virtual bool supportsBeginningOfSentence() const = 0;
+
  protected:
     DictionaryHeaderStructurePolicy() {}
 
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
index 479d151..281c5a8 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
@@ -246,6 +246,10 @@
         return &mLocale;
     }
 
+    bool supportsBeginningOfSentence() const {
+        return mDictFormatVersion == FormatUtils::VERSION_4_DEV;
+    }
+
  private:
     DISALLOW_COPY_AND_ASSIGN(HeaderPolicy);
 
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.cpp
index 97e1120..557a0b4 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.cpp
@@ -432,8 +432,8 @@
             shortcuts.emplace_back(&target, shortcutProbability);
         }
     }
-    const UnigramProperty unigramProperty(ptNodeParams.isNotAWord(),
-            ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
+    const UnigramProperty unigramProperty(false /* representsBeginningOfSentence */,
+            ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
             historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
             historicalInfo->getCount(), &shortcuts);
     return WordProperty(&codePointVector, &unigramProperty, &bigrams);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
index 5704c2e..b2e60a8 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
@@ -160,7 +160,12 @@
     }
 
     AK_FORCE_INLINE bool representsNonWordInfo() const {
-        return getCodePointCount() > 0 && CharUtils::isInUnicodeSpace(getCodePoints()[0])
+        return getCodePointCount() > 0 && !CharUtils::isInUnicodeSpace(getCodePoints()[0])
+                && isNotAWord();
+    }
+
+    AK_FORCE_INLINE int representsBeginningOfSentence() const {
+        return getCodePointCount() > 0 && getCodePoints()[0] == CODE_POINT_BEGINNING_OF_SENTENCE
                 && isNotAWord();
     }
 
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
index 30dcfba..a6a470c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
@@ -383,8 +383,8 @@
             shortcuts.emplace_back(&shortcutTarget, shortcutProbability);
         }
     }
-    const UnigramProperty unigramProperty(ptNodeParams.isNotAWord(),
-            ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
+    const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
+            ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
             NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
     return WordProperty(&codePointVector, &unigramProperty, &bigrams);
 }
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index 439e90e..1858441 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -61,7 +61,7 @@
             isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY;
         }
         readingHelper.readNextSiblingNode(ptNodeParams);
-        if (!ptNodeParams.representsNonWordInfo()) {
+        if (ptNodeParams.representsNonWordInfo()) {
             // Skip PtNodes that represent non-word information.
             continue;
         }
@@ -430,8 +430,8 @@
             shortcuts.emplace_back(&target, shortcutProbability);
         }
     }
-    const UnigramProperty unigramProperty(ptNodeParams.isNotAWord(),
-            ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
+    const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
+            ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
             historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
             historicalInfo->getCount(), &shortcuts);
     return WordProperty(&codePointVector, &unigramProperty, &bigrams);