Merge "Fix tests."
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
index 0675de6..85d6d43 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
@@ -167,6 +167,14 @@
if (lastBitmapEntryIndex == TrieMap::INVALID_INDEX) {
return TrieMap::INVALID_INDEX;
}
+ const int oldestPrevWordId = prevWordIds[prevWordIds.size() - 1];
+ const TrieMap::Result result = mTrieMap.get(oldestPrevWordId, lastBitmapEntryIndex);
+ if (!result.mIsValid) {
+ if (!mTrieMap.put(oldestPrevWordId,
+ ProbabilityEntry().encode(mHasHistoricalInfo), lastBitmapEntryIndex)) {
+ return TrieMap::INVALID_INDEX;
+ }
+ }
return mTrieMap.getNextLevelBitmapEntryIndex(prevWordIds[prevWordIds.size() - 1],
lastBitmapEntryIndex);
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h
index 3dfaba7..f1bf12c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h
@@ -36,7 +36,8 @@
// Dummy entry
ProbabilityEntry()
- : mFlags(0), mProbability(NOT_A_PROBABILITY), mHistoricalInfo() {}
+ : mFlags(Ver4DictConstants::FLAG_NOT_A_VALID_ENTRY), mProbability(NOT_A_PROBABILITY),
+ mHistoricalInfo() {}
// Entry without historical information
ProbabilityEntry(const int flags, const int probability)
@@ -61,7 +62,7 @@
bigramProperty->getCount()) {}
bool isValid() const {
- return (mProbability != NOT_A_PROBABILITY) || hasHistoricalInfo();
+ return (mFlags & Ver4DictConstants::FLAG_NOT_A_VALID_ENTRY) == 0;
}
bool hasHistoricalInfo() const {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
index 9acf2d4..39822b9 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
@@ -53,6 +53,7 @@
const int Ver4DictConstants::WORD_COUNT_FIELD_SIZE = 1;
const uint8_t Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE = 0x1;
+const uint8_t Ver4DictConstants::FLAG_NOT_A_VALID_ENTRY = 0x2;
const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 64;
const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
index 9703531..dfcdd4d 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
@@ -51,6 +51,7 @@
static const int WORD_COUNT_FIELD_SIZE;
// Flags in probability entry.
static const uint8_t FLAG_REPRESENTS_BEGINNING_OF_SENTENCE;
+ static const uint8_t FLAG_NOT_A_VALID_ENTRY;
static const int SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE;
static const int SHORTCUT_ADDRESS_TABLE_DATA_SIZE;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index b808c90..2af218a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -120,16 +120,15 @@
const int ptNodePos =
mBuffers->getTerminalPositionLookupTable()->getTerminalPtNodePosition(wordId);
const PtNodeParams ptNodeParams = mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
- // TODO: Support n-gram.
const int probability = mBuffers->getLanguageModelDictContent()->getWordProbability(
- prevWordIds.limit(1 /* maxSize */), wordId, mHeaderPolicy);
+ prevWordIds, wordId, mHeaderPolicy);
return WordAttributes(probability, ptNodeParams.isBlacklisted(), ptNodeParams.isNotAWord(),
probability == 0);
}
int Ver4PatriciaTriePolicy::getProbabilityOfWord(const WordIdArrayView prevWordIds,
const int wordId) const {
- if (wordId == NOT_A_WORD_ID) {
+ if (wordId == NOT_A_WORD_ID || prevWordIds.contains(NOT_A_WORD_ID)) {
return NOT_A_PROBABILITY;
}
const int ptNodePos =
@@ -138,10 +137,8 @@
if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) {
return NOT_A_PROBABILITY;
}
- // TODO: Support n-gram.
const ProbabilityEntry probabilityEntry =
- mBuffers->getLanguageModelDictContent()->getNgramProbabilityEntry(
- prevWordIds.limit(1 /* maxSize */), wordId);
+ mBuffers->getLanguageModelDictContent()->getNgramProbabilityEntry(prevWordIds, wordId);
if (!probabilityEntry.isValid()) {
return NOT_A_PROBABILITY;
}
@@ -164,16 +161,18 @@
if (prevWordIds.empty()) {
return;
}
- // TODO: Support n-gram.
const auto languageModelDictContent = mBuffers->getLanguageModelDictContent();
- for (const auto entry : languageModelDictContent->getProbabilityEntries(
- prevWordIds.limit(1 /* maxSize */))) {
- const ProbabilityEntry &probabilityEntry = entry.getProbabilityEntry();
- const int probability = probabilityEntry.hasHistoricalInfo() ?
- ForgettingCurveUtils::decodeProbability(
- probabilityEntry.getHistoricalInfo(), mHeaderPolicy) :
- probabilityEntry.getProbability();
- listener->onVisitEntry(probability, entry.getWordId());
+ for (size_t i = 1; i <= prevWordIds.size(); ++i) {
+ for (const auto entry : languageModelDictContent->getProbabilityEntries(
+ prevWordIds.limit(i))) {
+ const ProbabilityEntry &probabilityEntry = entry.getProbabilityEntry();
+ const int probability = probabilityEntry.hasHistoricalInfo() ?
+ ForgettingCurveUtils::decodeProbability(
+ probabilityEntry.getHistoricalInfo(), mHeaderPolicy)
+ + ForgettingCurveUtils::getProbabilityBiasForNgram(i + 1 /* n */) :
+ probabilityEntry.getProbability();
+ listener->onVisitEntry(probability, entry.getWordId());
+ }
}
}
diff --git a/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp b/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp
index c5849d0..06f82df 100644
--- a/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp
+++ b/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp
@@ -29,7 +29,7 @@
TEST(LanguageModelDictContentTest, TestUnigramProbability) {
LanguageModelDictContent languageModelDictContent(false /* useHistoricalInfo */);
- const int flag = 0xFF;
+ const int flag = 0xF0;
const int probability = 10;
const int wordId = 100;
const ProbabilityEntry probabilityEntry(flag, probability);