Fill up a map of bigram addresses for lookup.
We don't want to do a linear search on each terminal when there
may be 100+ bigrams for a given word because that would be
disastrous for performance. Also, we need to resolve each bigram
address anyway.
This change resolves the addresses at first and puts them in a
balanced tree so that lookup will be O(log(n)).
Bug: 6313806
Change-Id: Ibf088035870b9acb41e948f0ab7af4726f2cee24
diff --git a/native/jni/src/bigram_dictionary.h b/native/jni/src/bigram_dictionary.h
index 76f9039..b123321 100644
--- a/native/jni/src/bigram_dictionary.h
+++ b/native/jni/src/bigram_dictionary.h
@@ -17,6 +17,7 @@
#ifndef LATINIME_BIGRAM_DICTIONARY_H
#define LATINIME_BIGRAM_DICTIONARY_H
+#include <map>
#include <stdint.h>
namespace latinime {
@@ -28,6 +29,8 @@
int getBigrams(const int32_t *word, int length, int *codes, int codesSize,
unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams);
int getBigramListPositionForWord(const int32_t *prevWord, const int prevWordLength);
+ void fillBigramAddressToFrequencyMap(const int32_t *prevWord, const int prevWordLength,
+ std::map<int, int> *map);
~BigramDictionary();
private:
bool addWordBigram(unsigned short *word, int length, int frequency);