Fill up a map of bigram addresses for lookup. We don't want to do a linear search on each terminal when there may be 100+ bigrams for a given word because that would be disastrous for performance. Also, we need to resolve each bigram address anyway. This change resolves the addresses at first and puts them in a balanced tree so that lookup will be O(log(n)). Bug: 6313806 Change-Id: Ibf088035870b9acb41e948f0ab7af4726f2cee24

commit: 1ff8dc47be1734555af1c0c011ea6cf72b395a43 [log] [tgz]
author: Jean Chalard <jchalard@google.com> Wed May 02 16:00:24 2012 +0900
committer: Jean Chalard <jchalard@google.com> Wed May 02 17:50:44 2012 +0900
tree: 7aa3a9c6575bbbeaa586d7f0af23dd1416487f05
parent: a1c89d9dbf3d0462b1e83dda8b2e376d7aa9886f [diff] [blame]
diff --git a/native/jni/src/bigram_dictionary.h b/native/jni/src/bigram_dictionary.h
index 76f9039..b123321 100644
--- a/native/jni/src/bigram_dictionary.h
+++ b/native/jni/src/bigram_dictionary.h

@@ -17,6 +17,7 @@
 #ifndef LATINIME_BIGRAM_DICTIONARY_H
 #define LATINIME_BIGRAM_DICTIONARY_H
 
+#include <map>
 #include <stdint.h>
 
 namespace latinime {
@@ -28,6 +29,8 @@
     int getBigrams(const int32_t *word, int length, int *codes, int codesSize,
             unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams);
     int getBigramListPositionForWord(const int32_t *prevWord, const int prevWordLength);
+    void fillBigramAddressToFrequencyMap(const int32_t *prevWord, const int prevWordLength,
+            std::map<int, int> *map);
     ~BigramDictionary();
  private:
     bool addWordBigram(unsigned short *word, int length, int frequency);
commit	1ff8dc47be1734555af1c0c011ea6cf72b395a43	[log] [tgz]
author	Jean Chalard <jchalard@google.com>	Wed May 02 16:00:24 2012 +0900
committer	Jean Chalard <jchalard@google.com>	Wed May 02 17:50:44 2012 +0900
tree	7aa3a9c6575bbbeaa586d7f0af23dd1416487f05
parent	a1c89d9dbf3d0462b1e83dda8b2e376d7aa9886f [diff] [blame]