Move word flags to language model dict content.

Bug: 14425059
Change-Id: I64712e5c83d0bc241e6f0f16117ab47b5d75bd4b
diff --git a/native/jni/src/suggest/core/dictionary/property/unigram_property.h b/native/jni/src/suggest/core/dictionary/property/unigram_property.h
index 902eb00..65c8333 100644
--- a/native/jni/src/suggest/core/dictionary/property/unigram_property.h
+++ b/native/jni/src/suggest/core/dictionary/property/unigram_property.h
@@ -71,6 +71,11 @@
         return mIsBlacklisted;
     }
 
+    bool isPossiblyOffensive() const {
+        // TODO: Have dedicated flag.
+        return mProbability == 0;
+    }
+
     bool hasShortcuts() const {
         return !mShortcuts.empty();
     }
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
index 35f0f76..89094c8 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
@@ -38,7 +38,7 @@
             0 /* nextLevelBitmapEntryIndex */, outNgramCount);
 }
 
-int LanguageModelDictContent::getWordProbability(const WordIdArrayView prevWordIds,
+const WordAttributes LanguageModelDictContent::getWordAttributes(const WordIdArrayView prevWordIds,
         const int wordId, const HeaderPolicy *const headerPolicy) const {
     int bitmapEntryIndices[MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1];
     bitmapEntryIndices[0] = mTrieMap.getRootBitmapEntryIndex();
@@ -60,17 +60,24 @@
         }
         const ProbabilityEntry probabilityEntry =
                 ProbabilityEntry::decode(result.mValue, mHasHistoricalInfo);
+        int probability = NOT_A_PROBABILITY;
         if (mHasHistoricalInfo) {
-            const int probability = ForgettingCurveUtils::decodeProbability(
+            const int rawProbability = ForgettingCurveUtils::decodeProbability(
                     probabilityEntry.getHistoricalInfo(), headerPolicy)
                             + ForgettingCurveUtils::getProbabilityBiasForNgram(i + 1 /* n */);
-            return std::min(probability, MAX_PROBABILITY);
+            probability = std::min(rawProbability, MAX_PROBABILITY);
         } else {
-            return probabilityEntry.getProbability();
+            probability = probabilityEntry.getProbability();
         }
+        // TODO: Some flags in unigramProbabilityEntry should be overwritten by flags in
+        // probabilityEntry.
+        const ProbabilityEntry unigramProbabilityEntry = getProbabilityEntry(wordId);
+        return WordAttributes(probability, unigramProbabilityEntry.isNotAWord(),
+                unigramProbabilityEntry.isBlacklisted(),
+                unigramProbabilityEntry.isPossiblyOffensive());
     }
     // Cannot find the word.
-    return NOT_A_PROBABILITY;
+    return WordAttributes();
 }
 
 ProbabilityEntry LanguageModelDictContent::getNgramProbabilityEntry(
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h
index a793af4..b7e4af9 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h
@@ -21,6 +21,7 @@
 #include <vector>
 
 #include "defines.h"
+#include "suggest/core/dictionary/word_attributes.h"
 #include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
 #include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
 #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
@@ -128,7 +129,7 @@
             const LanguageModelDictContent *const originalContent,
             int *const outNgramCount);
 
-    int getWordProbability(const WordIdArrayView prevWordIds, const int wordId,
+    const WordAttributes getWordAttributes(const WordIdArrayView prevWordIds, const int wordId,
             const HeaderPolicy *const headerPolicy) const;
 
     ProbabilityEntry getProbabilityEntry(const int wordId) const {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h
index f1bf12c..e1e10ca 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h
@@ -49,7 +49,9 @@
 
     // Create from unigram property.
     ProbabilityEntry(const UnigramProperty *const unigramProperty)
-            : mFlags(createFlags(unigramProperty->representsBeginningOfSentence())),
+            : mFlags(createFlags(unigramProperty->representsBeginningOfSentence(),
+                    unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
+                    unigramProperty->isPossiblyOffensive())),
               mProbability(unigramProperty->getProbability()),
               mHistoricalInfo(unigramProperty->getTimestamp(), unigramProperty->getLevel(),
                       unigramProperty->getCount()) {}
@@ -85,6 +87,18 @@
         return (mFlags & Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE) != 0;
     }
 
+    bool isNotAWord() const {
+        return (mFlags & Ver4DictConstants::FLAG_NOT_A_WORD) != 0;
+    }
+
+    bool isBlacklisted() const {
+        return (mFlags & Ver4DictConstants::FLAG_BLACKLISTED) != 0;
+    }
+
+    bool isPossiblyOffensive() const {
+        return (mFlags & Ver4DictConstants::FLAG_POSSIBLY_OFFENSIVE) != 0;
+    }
+
     uint64_t encode(const bool hasHistoricalInfo) const {
         uint64_t encodedEntry = static_cast<uint64_t>(mFlags);
         if (hasHistoricalInfo) {
@@ -142,10 +156,20 @@
                 (encodedEntry >> (pos * CHAR_BIT)) & ((1ull << (size * CHAR_BIT)) - 1));
     }
 
-    static uint8_t createFlags(const bool representsBeginningOfSentence) {
+    static uint8_t createFlags(const bool representsBeginningOfSentence,
+            const bool isNotAWord, const bool isBlacklisted, const bool isPossiblyOffensive) {
         uint8_t flags = 0;
         if (representsBeginningOfSentence) {
-            flags ^= Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE;
+            flags |= Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE;
+        }
+        if (isNotAWord) {
+            flags |= Ver4DictConstants::FLAG_NOT_A_WORD;
+        }
+        if (isBlacklisted) {
+            flags |= Ver4DictConstants::FLAG_BLACKLISTED;
+        }
+        if (isPossiblyOffensive) {
+            flags |= Ver4DictConstants::FLAG_POSSIBLY_OFFENSIVE;
         }
         return flags;
     }
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
index 39822b9..8e6cb97 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
@@ -54,6 +54,9 @@
 
 const uint8_t Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE = 0x1;
 const uint8_t Ver4DictConstants::FLAG_NOT_A_VALID_ENTRY = 0x2;
+const uint8_t Ver4DictConstants::FLAG_NOT_A_WORD = 0x4;
+const uint8_t Ver4DictConstants::FLAG_BLACKLISTED = 0x8;
+const uint8_t Ver4DictConstants::FLAG_POSSIBLY_OFFENSIVE = 0x10;
 
 const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 64;
 const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
index dfcdd4d..600b5ff 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
@@ -52,6 +52,9 @@
     // Flags in probability entry.
     static const uint8_t FLAG_REPRESENTS_BEGINNING_OF_SENTENCE;
     static const uint8_t FLAG_NOT_A_VALID_ENTRY;
+    static const uint8_t FLAG_NOT_A_WORD;
+    static const uint8_t FLAG_BLACKLISTED;
+    static const uint8_t FLAG_POSSIBLY_OFFENSIVE;
 
     static const int SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE;
     static const int SHORTCUT_ADDRESS_TABLE_DATA_SIZE;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
index 75ec169..a1a33d2 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
@@ -191,7 +191,6 @@
             ptNodeWritingPos);
 }
 
-
 bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
         const PtNodeParams *const ptNodeParams, const UnigramProperty *const unigramProperty,
         int *const ptNodeWritingPos) {
@@ -341,8 +340,8 @@
             ptNodeParams->getChildrenPos(), ptNodeWritingPos)) {
         return false;
     }
-    return updatePtNodeFlags(nodePos, ptNodeParams->isBlacklisted(), ptNodeParams->isNotAWord(),
-            isTerminal, ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
+    return updatePtNodeFlags(nodePos, isTerminal,
+            ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
 }
 
 // TODO: Move probability handling code to LanguageModelDictContent.
@@ -361,14 +360,13 @@
     }
 }
 
-bool Ver4PatriciaTrieNodeWriter::updatePtNodeFlags(const int ptNodePos,
-        const bool isBlacklisted, const bool isNotAWord, const bool isTerminal,
+bool Ver4PatriciaTrieNodeWriter::updatePtNodeFlags(const int ptNodePos, const bool isTerminal,
         const bool hasMultipleChars) {
     // Create node flags and write them.
     PatriciaTrieReadingUtils::NodeFlags nodeFlags =
-            PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord, isTerminal,
-                    false /* hasShortcutTargets */, false /* hasBigrams */, hasMultipleChars,
-                    CHILDREN_POSITION_FIELD_SIZE);
+            PatriciaTrieReadingUtils::createAndGetFlags(false /* isNotAWord */,
+                    false /* isBlacklisted */, isTerminal, false /* hasShortcutTargets */,
+                    false /* hasBigrams */, hasMultipleChars, CHILDREN_POSITION_FIELD_SIZE);
     if (!DynamicPtWritingUtils::writeFlags(mTrieBuffer, nodeFlags, ptNodePos)) {
         AKLOGE("Cannot write PtNode flags. flags: %x, pos: %d", nodeFlags, ptNodePos);
         return false;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
index 08b7d38..1791527 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
@@ -103,8 +103,7 @@
             const ProbabilityEntry *const originalProbabilityEntry,
             const ProbabilityEntry *const probabilityEntry) const;
 
-    bool updatePtNodeFlags(const int ptNodePos, const bool isBlacklisted, const bool isNotAWord,
-            const bool isTerminal, const bool hasMultipleChars);
+    bool updatePtNodeFlags(const int ptNodePos, const bool isTerminal, const bool hasMultipleChars);
 
     static const int CHILDREN_POSITION_FIELD_SIZE;
 
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index cca0c29..16ff6c6 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -63,14 +63,10 @@
             // valid terminal DicNode.
             isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY;
         }
-        readingHelper.readNextSiblingNode(ptNodeParams);
-        if (ptNodeParams.representsNonWordInfo()) {
-            // Skip PtNodes that represent non-word information.
-            continue;
-        }
         const int wordId = isTerminal ? ptNodeParams.getTerminalId() : NOT_A_WORD_ID;
         childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getChildrenPos(),
                 wordId, ptNodeParams.getCodePointArrayView());
+        readingHelper.readNextSiblingNode(ptNodeParams);
     }
     if (readingHelper.isError()) {
         mIsCorrupted = true;
@@ -117,13 +113,8 @@
     if (wordId == NOT_A_WORD_ID) {
         return WordAttributes();
     }
-    const int ptNodePos =
-            mBuffers->getTerminalPositionLookupTable()->getTerminalPtNodePosition(wordId);
-    const PtNodeParams ptNodeParams = mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
-    const int probability = mBuffers->getLanguageModelDictContent()->getWordProbability(
-            prevWordIds, wordId, mHeaderPolicy);
-    return WordAttributes(probability, ptNodeParams.isBlacklisted(), ptNodeParams.isNotAWord(),
-            probability == 0);
+    return mBuffers->getLanguageModelDictContent()->getWordAttributes(prevWordIds, wordId,
+            mHeaderPolicy);
 }
 
 int Ver4PatriciaTriePolicy::getProbabilityOfWord(const WordIdArrayView prevWordIds,
@@ -131,15 +122,10 @@
     if (wordId == NOT_A_WORD_ID || prevWordIds.contains(NOT_A_WORD_ID)) {
         return NOT_A_PROBABILITY;
     }
-    const int ptNodePos =
-            mBuffers->getTerminalPositionLookupTable()->getTerminalPtNodePosition(wordId);
-    const PtNodeParams ptNodeParams = mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
-    if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) {
-        return NOT_A_PROBABILITY;
-    }
     const ProbabilityEntry probabilityEntry =
             mBuffers->getLanguageModelDictContent()->getNgramProbabilityEntry(prevWordIds, wordId);
-    if (!probabilityEntry.isValid()) {
+    if (!probabilityEntry.isValid() || probabilityEntry.isBlacklisted()
+            || probabilityEntry.isNotAWord()) {
         return NOT_A_PROBABILITY;
     }
     if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
@@ -511,10 +497,10 @@
             shortcuts.emplace_back(&target, shortcutProbability);
         }
     }
-    const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
-            ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
-            historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
-            historicalInfo->getCount(), &shortcuts);
+    const UnigramProperty unigramProperty(probabilityEntry.representsBeginningOfSentence(),
+            probabilityEntry.isNotAWord(), probabilityEntry.isBlacklisted(),
+            probabilityEntry.getProbability(), historicalInfo->getTimeStamp(),
+            historicalInfo->getLevel(), historicalInfo->getCount(), &shortcuts);
     return WordProperty(&codePointVector, &unigramProperty, &bigrams);
 }