Add Beginning-of-Sentence info in UnigramProperty.

Bug: 14119293
Bug: 14425059
Change-Id: I8a894352568377d32468e5563f4e89af00d22944
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index 6223f86..5ad2114 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -341,8 +341,8 @@
         shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
     }
     // Use 1 for count to indicate the word has inputted.
-    const UnigramProperty unigramProperty(isNotAWord, isBlacklisted,
-            probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
+    const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord,
+            isBlacklisted, probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
     dictionary->addUnigramEntry(codePoints, codePointCount, &unigramProperty);
 }
 
@@ -450,8 +450,9 @@
             shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
         }
         // Use 1 for count to indicate the word has inputted.
-        const UnigramProperty unigramProperty(isNotAWord, isBlacklisted,
-                unigramProbability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
+        const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord,
+                isBlacklisted, unigramProbability, timestamp, 0 /* level */, 1 /* count */,
+                &shortcuts);
         dictionary->addUnigramEntry(word1CodePoints, word1Length, &unigramProperty);
         if (word0) {
             jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId);
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index fe3167a..bcf7d59 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -82,6 +82,12 @@
 
 void Dictionary::addUnigramEntry(const int *const word, const int length,
         const UnigramProperty *const unigramProperty) {
+    if (unigramProperty->representsBeginningOfSentence()
+            && !mDictionaryStructureWithBufferPolicy->getHeaderStructurePolicy()
+                    ->supportsBeginningOfSentence()) {
+        AKLOGE("The dictionary doesn't support Beginning-of-Sentence.");
+        return;
+    }
     TimeKeeper::setCurrentTime();
     mDictionaryStructureWithBufferPolicy->addUnigramEntry(word, length, unigramProperty);
 }
diff --git a/native/jni/src/suggest/core/dictionary/property/unigram_property.h b/native/jni/src/suggest/core/dictionary/property/unigram_property.h
index d255105..902eb00 100644
--- a/native/jni/src/suggest/core/dictionary/property/unigram_property.h
+++ b/native/jni/src/suggest/core/dictionary/property/unigram_property.h
@@ -48,15 +48,21 @@
     };
 
     UnigramProperty()
-            : mIsNotAWord(false), mIsBlacklisted(false), mProbability(NOT_A_PROBABILITY),
-              mTimestamp(NOT_A_TIMESTAMP), mLevel(0), mCount(0), mShortcuts() {}
+            : mRepresentsBeginningOfSentence(false), mIsNotAWord(false), mIsBlacklisted(false),
+              mProbability(NOT_A_PROBABILITY), mTimestamp(NOT_A_TIMESTAMP), mLevel(0), mCount(0),
+              mShortcuts() {}
 
-    UnigramProperty(const bool isNotAWord, const bool isBlacklisted, const int probability,
-            const int timestamp, const int level, const int count,
-            const std::vector<ShortcutProperty> *const shortcuts)
-            : mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability),
+    UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord,
+            const bool isBlacklisted, const int probability, const int timestamp, const int level,
+            const int count, const std::vector<ShortcutProperty> *const shortcuts)
+            : mRepresentsBeginningOfSentence(representsBeginningOfSentence),
+              mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability),
               mTimestamp(timestamp), mLevel(level), mCount(count), mShortcuts(*shortcuts) {}
 
+    bool representsBeginningOfSentence() const {
+        return mRepresentsBeginningOfSentence;
+    }
+
     bool isNotAWord() const {
         return mIsNotAWord;
     }
@@ -94,6 +100,7 @@
     DISALLOW_ASSIGNMENT_OPERATOR(UnigramProperty);
 
     // TODO: Make members const.
+    bool mRepresentsBeginningOfSentence;
     bool mIsNotAWord;
     bool mIsBlacklisted;
     int mProbability;
diff --git a/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h
index 845e629..a612276 100644
--- a/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h
@@ -51,6 +51,8 @@
 
     virtual const std::vector<int> *getLocale() const = 0;
 
+    virtual bool supportsBeginningOfSentence() const = 0;
+
  protected:
     DictionaryHeaderStructurePolicy() {}
 
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
index 479d151..281c5a8 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
@@ -246,6 +246,10 @@
         return &mLocale;
     }
 
+    bool supportsBeginningOfSentence() const {
+        return mDictFormatVersion == FormatUtils::VERSION_4_DEV;
+    }
+
  private:
     DISALLOW_COPY_AND_ASSIGN(HeaderPolicy);
 
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.cpp
index 97e1120..557a0b4 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.cpp
@@ -432,8 +432,8 @@
             shortcuts.emplace_back(&target, shortcutProbability);
         }
     }
-    const UnigramProperty unigramProperty(ptNodeParams.isNotAWord(),
-            ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
+    const UnigramProperty unigramProperty(false /* representsBeginningOfSentence */,
+            ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
             historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
             historicalInfo->getCount(), &shortcuts);
     return WordProperty(&codePointVector, &unigramProperty, &bigrams);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
index 5704c2e..33e60e2 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
@@ -164,6 +164,11 @@
                 && isNotAWord();
     }
 
+    AK_FORCE_INLINE int representsBeginningOfSentence() const {
+        return getCodePointCount() > 0 && getCodePoints()[0] == CODE_POINT_BEGINNING_OF_SENTENCE
+                && isNotAWord();
+    }
+
     // Parent node position
     AK_FORCE_INLINE int getParentPos() const {
         return mParentPos;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
index 30dcfba..a6a470c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
@@ -383,8 +383,8 @@
             shortcuts.emplace_back(&shortcutTarget, shortcutProbability);
         }
     }
-    const UnigramProperty unigramProperty(ptNodeParams.isNotAWord(),
-            ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
+    const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
+            ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
             NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
     return WordProperty(&codePointVector, &unigramProperty, &bigrams);
 }
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index 439e90e..4495def 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -430,8 +430,8 @@
             shortcuts.emplace_back(&target, shortcutProbability);
         }
     }
-    const UnigramProperty unigramProperty(ptNodeParams.isNotAWord(),
-            ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
+    const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
+            ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
             historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
             historicalInfo->getCount(), &shortcuts);
     return WordProperty(&codePointVector, &unigramProperty, &bigrams);