Merge "Move distracter filter to dictionary facilitator."
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index 6223f86..5ad2114 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -341,8 +341,8 @@
shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
}
// Use 1 for count to indicate the word has inputted.
- const UnigramProperty unigramProperty(isNotAWord, isBlacklisted,
- probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
+ const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord,
+ isBlacklisted, probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
dictionary->addUnigramEntry(codePoints, codePointCount, &unigramProperty);
}
@@ -450,8 +450,9 @@
shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
}
// Use 1 for count to indicate the word has inputted.
- const UnigramProperty unigramProperty(isNotAWord, isBlacklisted,
- unigramProbability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
+ const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord,
+ isBlacklisted, unigramProbability, timestamp, 0 /* level */, 1 /* count */,
+ &shortcuts);
dictionary->addUnigramEntry(word1CodePoints, word1Length, &unigramProperty);
if (word0) {
jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId);
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index fe3167a..bcf7d59 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -82,6 +82,12 @@
void Dictionary::addUnigramEntry(const int *const word, const int length,
const UnigramProperty *const unigramProperty) {
+ if (unigramProperty->representsBeginningOfSentence()
+ && !mDictionaryStructureWithBufferPolicy->getHeaderStructurePolicy()
+ ->supportsBeginningOfSentence()) {
+ AKLOGE("The dictionary doesn't support Beginning-of-Sentence.");
+ return;
+ }
TimeKeeper::setCurrentTime();
mDictionaryStructureWithBufferPolicy->addUnigramEntry(word, length, unigramProperty);
}
diff --git a/native/jni/src/suggest/core/dictionary/property/unigram_property.h b/native/jni/src/suggest/core/dictionary/property/unigram_property.h
index d255105..902eb00 100644
--- a/native/jni/src/suggest/core/dictionary/property/unigram_property.h
+++ b/native/jni/src/suggest/core/dictionary/property/unigram_property.h
@@ -48,15 +48,21 @@
};
UnigramProperty()
- : mIsNotAWord(false), mIsBlacklisted(false), mProbability(NOT_A_PROBABILITY),
- mTimestamp(NOT_A_TIMESTAMP), mLevel(0), mCount(0), mShortcuts() {}
+ : mRepresentsBeginningOfSentence(false), mIsNotAWord(false), mIsBlacklisted(false),
+ mProbability(NOT_A_PROBABILITY), mTimestamp(NOT_A_TIMESTAMP), mLevel(0), mCount(0),
+ mShortcuts() {}
- UnigramProperty(const bool isNotAWord, const bool isBlacklisted, const int probability,
- const int timestamp, const int level, const int count,
- const std::vector<ShortcutProperty> *const shortcuts)
- : mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability),
+ UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord,
+ const bool isBlacklisted, const int probability, const int timestamp, const int level,
+ const int count, const std::vector<ShortcutProperty> *const shortcuts)
+ : mRepresentsBeginningOfSentence(representsBeginningOfSentence),
+ mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability),
mTimestamp(timestamp), mLevel(level), mCount(count), mShortcuts(*shortcuts) {}
+ bool representsBeginningOfSentence() const {
+ return mRepresentsBeginningOfSentence;
+ }
+
bool isNotAWord() const {
return mIsNotAWord;
}
@@ -94,6 +100,7 @@
DISALLOW_ASSIGNMENT_OPERATOR(UnigramProperty);
// TODO: Make members const.
+ bool mRepresentsBeginningOfSentence;
bool mIsNotAWord;
bool mIsBlacklisted;
int mProbability;
diff --git a/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h
index 845e629..a612276 100644
--- a/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h
@@ -51,6 +51,8 @@
virtual const std::vector<int> *getLocale() const = 0;
+ virtual bool supportsBeginningOfSentence() const = 0;
+
protected:
DictionaryHeaderStructurePolicy() {}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
index 479d151..281c5a8 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
@@ -246,6 +246,10 @@
return &mLocale;
}
+ bool supportsBeginningOfSentence() const {
+ return mDictFormatVersion == FormatUtils::VERSION_4_DEV;
+ }
+
private:
DISALLOW_COPY_AND_ASSIGN(HeaderPolicy);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.cpp
index 97e1120..557a0b4 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.cpp
@@ -432,8 +432,8 @@
shortcuts.emplace_back(&target, shortcutProbability);
}
}
- const UnigramProperty unigramProperty(ptNodeParams.isNotAWord(),
- ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
+ const UnigramProperty unigramProperty(false /* representsBeginningOfSentence */,
+ ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
historicalInfo->getCount(), &shortcuts);
return WordProperty(&codePointVector, &unigramProperty, &bigrams);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
index 5704c2e..b2e60a8 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
@@ -160,7 +160,12 @@
}
AK_FORCE_INLINE bool representsNonWordInfo() const {
- return getCodePointCount() > 0 && CharUtils::isInUnicodeSpace(getCodePoints()[0])
+ return getCodePointCount() > 0 && !CharUtils::isInUnicodeSpace(getCodePoints()[0])
+ && isNotAWord();
+ }
+
+ AK_FORCE_INLINE int representsBeginningOfSentence() const {
+ return getCodePointCount() > 0 && getCodePoints()[0] == CODE_POINT_BEGINNING_OF_SENTENCE
&& isNotAWord();
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
index 30dcfba..a6a470c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
@@ -383,8 +383,8 @@
shortcuts.emplace_back(&shortcutTarget, shortcutProbability);
}
}
- const UnigramProperty unigramProperty(ptNodeParams.isNotAWord(),
- ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
+ const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
+ ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
return WordProperty(&codePointVector, &unigramProperty, &bigrams);
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index 439e90e..1858441 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -61,7 +61,7 @@
isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY;
}
readingHelper.readNextSiblingNode(ptNodeParams);
- if (!ptNodeParams.representsNonWordInfo()) {
+ if (ptNodeParams.representsNonWordInfo()) {
// Skip PtNodes that represent non-word information.
continue;
}
@@ -430,8 +430,8 @@
shortcuts.emplace_back(&target, shortcutProbability);
}
}
- const UnigramProperty unigramProperty(ptNodeParams.isNotAWord(),
- ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
+ const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
+ ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
historicalInfo->getCount(), &shortcuts);
return WordProperty(&codePointVector, &unigramProperty, &bigrams);