Fill up a map of bigram addresses for lookup.
We don't want to do a linear search on each terminal when there
may be 100+ bigrams for a given word because that would be
disastrous for performance. Also, we need to resolve each bigram
address anyway.
This change resolves the addresses at first and puts them in a
balanced tree so that lookup will be O(log(n)).
Bug: 6313806
Change-Id: Ibf088035870b9acb41e948f0ab7af4726f2cee24
diff --git a/native/jni/src/bigram_dictionary.cpp b/native/jni/src/bigram_dictionary.cpp
index e05e9d6..67f9628 100644
--- a/native/jni/src/bigram_dictionary.cpp
+++ b/native/jni/src/bigram_dictionary.cpp
@@ -135,6 +135,7 @@
// If the word is not found or has no bigrams, this function returns 0.
int BigramDictionary::getBigramListPositionForWord(const int32_t *prevWord,
const int prevWordLength) {
+ if (0 >= prevWordLength) return 0;
const uint8_t* const root = DICT;
int pos = BinaryFormat::getTerminalPosition(root, prevWord, prevWordLength);
@@ -152,6 +153,22 @@
return pos;
}
+void BigramDictionary::fillBigramAddressToFrequencyMap(const int32_t *prevWord,
+ const int prevWordLength, std::map<int, int> *map) {
+ const uint8_t* const root = DICT;
+ int pos = getBigramListPositionForWord(prevWord, prevWordLength);
+ if (0 == pos) return;
+
+ int bigramFlags;
+ do {
+ bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
+ const int frequency = UnigramDictionary::MASK_ATTRIBUTE_FREQUENCY & bigramFlags;
+ const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags,
+ &pos);
+ (*map)[bigramPos] = frequency;
+ } while (0 != (UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags));
+}
+
bool BigramDictionary::checkFirstCharacter(unsigned short *word) {
// Checks whether this word starts with same character or neighboring characters of
// what user typed.