Read multi-byte char group counts
Change-Id: Idc62382f1c814e9bd1466c9f7dda1fcc8ba4137d
diff --git a/native/src/binary_format.h b/native/src/binary_format.h
index 9944fa2..1d74998 100644
--- a/native/src/binary_format.h
+++ b/native/src/binary_format.h
@@ -61,7 +61,9 @@
}
inline int BinaryFormat::getGroupCountAndForwardPointer(const uint8_t* const dict, int* pos) {
- return dict[(*pos)++];
+ const int msb = dict[(*pos)++];
+ if (msb < 0x80) return msb;
+ return ((msb & 0x7F) << 8) | dict[(*pos)++];
}
inline uint8_t BinaryFormat::getFlagsAndForwardPointer(const uint8_t* const dict, int* pos) {
diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp
index cd73fe3..ca7f0be 100644
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@@ -507,9 +507,10 @@
int maxFreq = -1;
const uint8_t* const root = DICT_ROOT;
- mStackChildCount[0] = root[0];
+ int startPos = 0;
+ mStackChildCount[0] = BinaryFormat::getGroupCountAndForwardPointer(root, &startPos);
mStackInputIndex[0] = 0;
- mStackSiblingPos[0] = 1;
+ mStackSiblingPos[0] = startPos;
while (depth >= 0) {
const int charGroupCount = mStackChildCount[depth];
int pos = mStackSiblingPos[depth];