Use word id to get code ponits of the word.
Bug: 14425059
Change-Id: I81accffcdf5abe447c33ffc3a8e8315f9a4cde7f
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index 9562431..8f9b2aa 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -67,8 +67,8 @@
mDictStructurePolicy(dictStructurePolicy) {}
void Dictionary::NgramListenerForPrediction::onVisitEntry(const int ngramProbability,
- const int targetPtNodePos) {
- if (targetPtNodePos == NOT_A_DICT_POS) {
+ const int targetWordId) {
+ if (targetWordId == NOT_A_WORD_ID) {
return;
}
if (mPrevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)
@@ -78,8 +78,8 @@
int targetWordCodePoints[MAX_WORD_LENGTH];
int unigramProbability = 0;
const int codePointCount = mDictStructurePolicy->
- getCodePointsAndProbabilityAndReturnCodePointCount(targetPtNodePos,
- MAX_WORD_LENGTH, targetWordCodePoints, &unigramProbability);
+ getCodePointsAndProbabilityAndReturnCodePointCount(targetWordId, MAX_WORD_LENGTH,
+ targetWordCodePoints, &unigramProbability);
if (codePointCount <= 0) {
return;
}
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h
index 732d3b1..50951fb 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/dictionary.h
@@ -120,7 +120,7 @@
NgramListenerForPrediction(const PrevWordsInfo *const prevWordsInfo,
SuggestionResults *const suggestionResults,
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy);
- virtual void onVisitEntry(const int ngramProbability, const int targetPtNodePos);
+ virtual void onVisitEntry(const int ngramProbability, const int targetWordId);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(NgramListenerForPrediction);
diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
index 36fa6e8..aeeb66f 100644
--- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
@@ -50,7 +50,7 @@
DicNodeVector *const childDicNodes) const = 0;
virtual int getCodePointsAndProbabilityAndReturnCodePointCount(
- const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
+ const int wordId, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const = 0;
virtual int getWordId(const CodePointArrayView wordCodePoints,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
index 28c5eb1..929dc3d 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
@@ -91,9 +91,10 @@
}
int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
- const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
+ const int wordId, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const {
DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
+ const int ptNodePos = getTerminalPtNodePosFromWordId(wordId);
readingHelper.initWithPtNodePos(ptNodePos);
const int codePointCount = readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount(
maxCodePointCount, outCodePoints, outUnigramProbability);
@@ -492,8 +493,8 @@
// Word (unigram) probability
int word1Probability = NOT_A_PROBABILITY;
const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
- word1TerminalPtNodePos, MAX_WORD_LENGTH, bigramWord1CodePoints,
- &word1Probability);
+ getWordIdFromTerminalPtNodePos(word1TerminalPtNodePos), MAX_WORD_LENGTH,
+ bigramWord1CodePoints, &word1Probability);
const std::vector<int> word1(bigramWord1CodePoints,
bigramWord1CodePoints + codePointCount);
const HistoricalInfo *const historicalInfo = bigramEntry.getHistoricalInfo();
@@ -550,7 +551,8 @@
const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token];
int unigramProbability = NOT_A_PROBABILITY;
*outCodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
- terminalPtNodePos, MAX_WORD_LENGTH, outCodePoints, &unigramProbability);
+ getWordIdFromTerminalPtNodePos(terminalPtNodePos), MAX_WORD_LENGTH, outCodePoints,
+ &unigramProbability);
const int nextToken = token + 1;
if (nextToken >= terminalPtNodePositionsVectorSize) {
// All words have been iterated.
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
index bead2ff..562c219 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
@@ -86,7 +86,7 @@
DicNodeVector *const childDicNodes) const;
int getCodePointsAndProbabilityAndReturnCodePointCount(
- const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints,
+ const int wordId, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const;
int getWordId(const CodePointArrayView wordCodePoints, const bool forceLowerCaseSearch) const;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
index 88fe3ef..8aa7234 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
@@ -57,7 +57,7 @@
}
}
-// This retrieves code points and the probability of the word by its terminal position.
+// This retrieves code points and the probability of the word by its id.
// Due to the fact that words are ordered in the dictionary in a strict breadth-first order,
// it is possible to check for this with advantageous complexity. For each PtNode array, we search
// for PtNodes with children and compare the children position with the position we look for.
@@ -68,16 +68,16 @@
// with a z, it's the last PtNode of the root array, so all children addresses will be smaller
// than the position we look for, and we have to descend the z PtNode).
/* Parameters :
- * ptNodePos: the byte position of the terminal PtNode of the word we are searching for (this is
- * what is stored as the "bigram position" in each bigram)
+ * wordId: Id of the word we are searching for.
* outCodePoints: an array to write the found word, with MAX_WORD_LENGTH size.
* outUnigramProbability: a pointer to an int to write the probability into.
* Return value : the code point count, of 0 if the word was not found.
*/
// TODO: Split this function to be more readable
int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
- const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
+ const int wordId, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const {
+ const int ptNodePos = getTerminalPtNodePosFromWordId(wordId);
int pos = getRootPosition();
int wordPos = 0;
// One iteration of the outer loop iterates through PtNode arrays. As stated above, we will
@@ -410,8 +410,8 @@
if (bigramsIt.getBigramPos() != NOT_A_DICT_POS) {
int word1Probability = NOT_A_PROBABILITY;
const int word1CodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
- bigramsIt.getBigramPos(), MAX_WORD_LENGTH, bigramWord1CodePoints,
- &word1Probability);
+ getWordIdFromTerminalPtNodePos(bigramsIt.getBigramPos()), MAX_WORD_LENGTH,
+ bigramWord1CodePoints, &word1Probability);
const std::vector<int> word1(bigramWord1CodePoints,
bigramWord1CodePoints + word1CodePointCount);
const int probability = getProbability(word1Probability, bigramsIt.getProbability());
@@ -465,8 +465,9 @@
}
const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token];
int unigramProbability = NOT_A_PROBABILITY;
- *outCodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(terminalPtNodePos,
- MAX_WORD_LENGTH, outCodePoints, &unigramProbability);
+ *outCodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
+ getWordIdFromTerminalPtNodePos(terminalPtNodePos), MAX_WORD_LENGTH, outCodePoints,
+ &unigramProbability);
const int nextToken = token + 1;
if (nextToken >= terminalPtNodePositionsVectorSize) {
// All words have been iterated.
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
index 08c3e1d..66df527 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
@@ -61,7 +61,7 @@
DicNodeVector *const childDicNodes) const;
int getCodePointsAndProbabilityAndReturnCodePointCount(
- const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints,
+ const int wordId, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const;
int getWordId(const CodePointArrayView wordCodePoints, const bool forceLowerCaseSearch) const;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index e11b94c..094ce42 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -81,9 +81,11 @@
}
int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
- const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
+ const int wordId, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const {
DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
+ const int ptNodePos =
+ mBuffers->getTerminalPositionLookupTable()->getTerminalPtNodePosition(wordId);
readingHelper.initWithPtNodePos(ptNodePos);
const int codePointCount = readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount(
maxCodePointCount, outCodePoints, outUnigramProbability);
@@ -488,18 +490,13 @@
// TODO: Support n-gram.
std::vector<BigramProperty> bigrams;
const WordIdArrayView prevWordIds = WordIdArrayView::fromObject(&wordId);
- const TerminalPositionLookupTable *const terminalPositionLookupTable =
- mBuffers->getTerminalPositionLookupTable();
int bigramWord1CodePoints[MAX_WORD_LENGTH];
for (const auto entry : mBuffers->getLanguageModelDictContent()->getProbabilityEntries(
prevWordIds)) {
- const int word1TerminalPtNodePos =
- terminalPositionLookupTable->getTerminalPtNodePosition(entry.getWordId());
// Word (unigram) probability
int word1Probability = NOT_A_PROBABILITY;
const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
- word1TerminalPtNodePos, MAX_WORD_LENGTH, bigramWord1CodePoints,
- &word1Probability);
+ entry.getWordId(), MAX_WORD_LENGTH, bigramWord1CodePoints, &word1Probability);
const std::vector<int> word1(bigramWord1CodePoints,
bigramWord1CodePoints + codePointCount);
const ProbabilityEntry probabilityEntry = entry.getProbabilityEntry();
@@ -553,9 +550,11 @@
return 0;
}
const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token];
+ const PtNodeParams ptNodeParams =
+ mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(terminalPtNodePos);
int unigramProbability = NOT_A_PROBABILITY;
*outCodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
- terminalPtNodePos, MAX_WORD_LENGTH, outCodePoints, &unigramProbability);
+ ptNodeParams.getTerminalId(), MAX_WORD_LENGTH, outCodePoints, &unigramProbability);
const int nextToken = token + 1;
if (nextToken >= terminalPtNodePositionsVectorSize) {
// All words have been iterated.
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
index 9dc83d8..0b8eec4 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
@@ -63,7 +63,7 @@
DicNodeVector *const childDicNodes) const;
int getCodePointsAndProbabilityAndReturnCodePointCount(
- const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints,
+ const int wordId, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const;
int getWordId(const CodePointArrayView wordCodePoints, const bool forceLowerCaseSearch) const;