blob: 1a39f585b4b721ebf044363cc51c64699fb29052 [file] [log] [blame]
The Android Open Source Project923bf412009-03-13 15:11:42 -07001/*
2**
3** Copyright 2009, The Android Open Source Project
4**
5** Licensed under the Apache License, Version 2.0 (the "License");
6** you may not use this file except in compliance with the License.
7** You may obtain a copy of the License at
8**
9** http://www.apache.org/licenses/LICENSE-2.0
10**
11** Unless required by applicable law or agreed to in writing, software
12** distributed under the License is distributed on an "AS IS" BASIS,
13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14** See the License for the specific language governing permissions and
15** limitations under the License.
16*/
17
18#include <stdio.h>
19#include <fcntl.h>
20#include <sys/mman.h>
21#include <string.h>
Jae Yong Sung937d5ad2010-06-30 20:28:04 -070022//#define LOG_TAG "dictionary.cpp"
Ken Wakasa826269c2010-04-27 10:28:14 +090023//#include <cutils/log.h>
24#define LOGI
The Android Open Source Project923bf412009-03-13 15:11:42 -070025
26#include "dictionary.h"
27#include "basechars.h"
Ken Wakasa707505e2010-04-21 02:35:47 +090028#include "char_utils.h"
The Android Open Source Project923bf412009-03-13 15:11:42 -070029
30#define DEBUG_DICT 0
Jae Yong Sung937d5ad2010-06-30 20:28:04 -070031#define DICTIONARY_VERSION_MIN 200
32#define DICTIONARY_HEADER_SIZE 2
33#define NOT_VALID_WORD -99
The Android Open Source Project923bf412009-03-13 15:11:42 -070034
35namespace latinime {
36
37Dictionary::Dictionary(void *dict, int typedLetterMultiplier, int fullWordMultiplier)
38{
39 mDict = (unsigned char*) dict;
40 mTypedLetterMultiplier = typedLetterMultiplier;
41 mFullWordMultiplier = fullWordMultiplier;
Jae Yong Sung937d5ad2010-06-30 20:28:04 -070042 getVersionNumber();
The Android Open Source Project923bf412009-03-13 15:11:42 -070043}
44
45Dictionary::~Dictionary()
46{
47}
48
49int Dictionary::getSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies,
Amith Yamasani1b62ff12010-02-05 14:07:04 -080050 int maxWordLength, int maxWords, int maxAlternatives, int skipPos,
51 int *nextLetters, int nextLettersSize)
The Android Open Source Project923bf412009-03-13 15:11:42 -070052{
Amith Yamasanid0e43ec2009-10-14 16:10:32 -070053 int suggWords;
The Android Open Source Project923bf412009-03-13 15:11:42 -070054 mFrequencies = frequencies;
55 mOutputChars = outWords;
56 mInputCodes = codes;
57 mInputLength = codesSize;
58 mMaxAlternatives = maxAlternatives;
59 mMaxWordLength = maxWordLength;
60 mMaxWords = maxWords;
Amith Yamasanic3df2d62009-06-04 12:20:45 -070061 mSkipPos = skipPos;
Amith Yamasani322dc3d2009-07-15 18:30:47 -070062 mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2;
Amith Yamasani1b62ff12010-02-05 14:07:04 -080063 mNextLettersFrequencies = nextLetters;
64 mNextLettersSize = nextLettersSize;
The Android Open Source Project923bf412009-03-13 15:11:42 -070065
Jae Yong Sung937d5ad2010-06-30 20:28:04 -070066 if (checkIfDictVersionIsLatest()) {
67 getWordsRec(DICTIONARY_HEADER_SIZE, 0, mInputLength * 3, false, 1, 0, 0);
68 } else {
69 getWordsRec(0, 0, mInputLength * 3, false, 1, 0, 0);
70 }
The Android Open Source Project923bf412009-03-13 15:11:42 -070071
Amith Yamasanid0e43ec2009-10-14 16:10:32 -070072 // Get the word count
73 suggWords = 0;
74 while (suggWords < mMaxWords && mFrequencies[suggWords] > 0) suggWords++;
75 if (DEBUG_DICT) LOGI("Returning %d words", suggWords);
Amith Yamasani1b62ff12010-02-05 14:07:04 -080076
77 if (DEBUG_DICT) {
78 LOGI("Next letters: ");
79 for (int k = 0; k < nextLettersSize; k++) {
80 if (mNextLettersFrequencies[k] > 0) {
81 LOGI("%c = %d,", k, mNextLettersFrequencies[k]);
82 }
83 }
84 LOGI("\n");
85 }
Amith Yamasanid0e43ec2009-10-14 16:10:32 -070086 return suggWords;
The Android Open Source Project923bf412009-03-13 15:11:42 -070087}
88
Amith Yamasani1b62ff12010-02-05 14:07:04 -080089void
90Dictionary::registerNextLetter(unsigned short c)
91{
92 if (c < mNextLettersSize) {
93 mNextLettersFrequencies[c]++;
94 }
95}
96
Jae Yong Sung937d5ad2010-06-30 20:28:04 -070097void
98Dictionary::getVersionNumber()
99{
100 mVersion = (mDict[0] & 0xFF);
101 mBigram = (mDict[1] & 0xFF);
102 LOGI("IN NATIVE SUGGEST Version: %d Bigram : %d \n", mVersion, mBigram);
103}
104
105// Checks whether it has the latest dictionary or the old dictionary
106bool
107Dictionary::checkIfDictVersionIsLatest()
108{
109 return (mVersion >= DICTIONARY_VERSION_MIN) && (mBigram == 1 || mBigram == 0);
110}
111
The Android Open Source Project923bf412009-03-13 15:11:42 -0700112unsigned short
113Dictionary::getChar(int *pos)
114{
115 unsigned short ch = (unsigned short) (mDict[(*pos)++] & 0xFF);
116 // If the code is 255, then actual 16 bit code follows (in big endian)
117 if (ch == 0xFF) {
118 ch = ((mDict[*pos] & 0xFF) << 8) | (mDict[*pos + 1] & 0xFF);
119 (*pos) += 2;
120 }
121 return ch;
122}
123
124int
125Dictionary::getAddress(int *pos)
126{
127 int address = 0;
Amith Yamasanicc3e5c72009-03-31 10:51:17 -0700128 if ((mDict[*pos] & FLAG_ADDRESS_MASK) == 0) {
129 *pos += 1;
130 } else {
131 address += (mDict[*pos] & (ADDRESS_MASK >> 16)) << 16;
132 address += (mDict[*pos + 1] & 0xFF) << 8;
133 address += (mDict[*pos + 2] & 0xFF);
134 *pos += 3;
135 }
The Android Open Source Project923bf412009-03-13 15:11:42 -0700136 return address;
137}
138
139int
Jae Yong Sung937d5ad2010-06-30 20:28:04 -0700140Dictionary::getFreq(int *pos)
141{
142 int freq = mDict[(*pos)++] & 0xFF;
143
144 if (checkIfDictVersionIsLatest()) {
145 // skipping bigram
146 int bigramExist = (mDict[*pos] & FLAG_BIGRAM_READ);
147 if (bigramExist > 0) {
148 int nextBigramExist = 1;
149 while (nextBigramExist > 0) {
150 (*pos) += 3;
151 nextBigramExist = (mDict[(*pos)++] & FLAG_BIGRAM_CONTINUED);
152 }
153 } else {
154 (*pos)++;
155 }
156 }
157
158 return freq;
159}
160
161int
The Android Open Source Project923bf412009-03-13 15:11:42 -0700162Dictionary::wideStrLen(unsigned short *str)
163{
164 if (!str) return 0;
165 unsigned short *end = str;
166 while (*end)
167 end++;
168 return end - str;
169}
170
171bool
172Dictionary::addWord(unsigned short *word, int length, int frequency)
173{
174 word[length] = 0;
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700175 if (DEBUG_DICT) {
176 char s[length + 1];
177 for (int i = 0; i <= length; i++) s[i] = word[i];
178 LOGI("Found word = %s, freq = %d : \n", s, frequency);
179 }
The Android Open Source Project923bf412009-03-13 15:11:42 -0700180
181 // Find the right insertion point
182 int insertAt = 0;
183 while (insertAt < mMaxWords) {
184 if (frequency > mFrequencies[insertAt]
185 || (mFrequencies[insertAt] == frequency
186 && length < wideStrLen(mOutputChars + insertAt * mMaxWordLength))) {
187 break;
188 }
189 insertAt++;
190 }
191 if (insertAt < mMaxWords) {
192 memmove((char*) mFrequencies + (insertAt + 1) * sizeof(mFrequencies[0]),
193 (char*) mFrequencies + insertAt * sizeof(mFrequencies[0]),
194 (mMaxWords - insertAt - 1) * sizeof(mFrequencies[0]));
195 mFrequencies[insertAt] = frequency;
196 memmove((char*) mOutputChars + (insertAt + 1) * mMaxWordLength * sizeof(short),
197 (char*) mOutputChars + (insertAt ) * mMaxWordLength * sizeof(short),
198 (mMaxWords - insertAt - 1) * sizeof(short) * mMaxWordLength);
199 unsigned short *dest = mOutputChars + (insertAt ) * mMaxWordLength;
200 while (length--) {
201 *dest++ = *word++;
202 }
203 *dest = 0; // NULL terminate
The Android Open Source Project923bf412009-03-13 15:11:42 -0700204 if (DEBUG_DICT) LOGI("Added word at %d\n", insertAt);
205 return true;
206 }
207 return false;
208}
209
Jae Yong Sung937d5ad2010-06-30 20:28:04 -0700210bool
211Dictionary::addWordBigram(unsigned short *word, int length, int frequency)
212{
213 word[length] = 0;
214 if (DEBUG_DICT) {
215 char s[length + 1];
216 for (int i = 0; i <= length; i++) s[i] = word[i];
217 LOGI("Bigram: Found word = %s, freq = %d : \n", s, frequency);
218 }
219
220 // Find the right insertion point
221 int insertAt = 0;
222 while (insertAt < mMaxBigrams) {
223 if (frequency > mBigramFreq[insertAt]
224 || (mBigramFreq[insertAt] == frequency
225 && length < wideStrLen(mBigramChars + insertAt * mMaxWordLength))) {
226 break;
227 }
228 insertAt++;
229 }
230 LOGI("Bigram: InsertAt -> %d maxBigrams: %d\n", insertAt, mMaxBigrams);
231 if (insertAt < mMaxBigrams) {
232 memmove((char*) mBigramFreq + (insertAt + 1) * sizeof(mBigramFreq[0]),
233 (char*) mBigramFreq + insertAt * sizeof(mBigramFreq[0]),
234 (mMaxBigrams - insertAt - 1) * sizeof(mBigramFreq[0]));
235 mBigramFreq[insertAt] = frequency;
236 memmove((char*) mBigramChars + (insertAt + 1) * mMaxWordLength * sizeof(short),
237 (char*) mBigramChars + (insertAt ) * mMaxWordLength * sizeof(short),
238 (mMaxBigrams - insertAt - 1) * sizeof(short) * mMaxWordLength);
239 unsigned short *dest = mBigramChars + (insertAt ) * mMaxWordLength;
240 while (length--) {
241 *dest++ = *word++;
242 }
243 *dest = 0; // NULL terminate
244 if (DEBUG_DICT) LOGI("Bigram: Added word at %d\n", insertAt);
245 return true;
246 }
247 return false;
248}
249
The Android Open Source Project923bf412009-03-13 15:11:42 -0700250unsigned short
Amith Yamasanif1150882009-08-07 14:04:24 -0700251Dictionary::toLowerCase(unsigned short c) {
The Android Open Source Project923bf412009-03-13 15:11:42 -0700252 if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) {
253 c = BASE_CHARS[c];
254 }
Amith Yamasanif1150882009-08-07 14:04:24 -0700255 if (c >='A' && c <= 'Z') {
256 c |= 32;
257 } else if (c > 127) {
Ken Wakasa707505e2010-04-21 02:35:47 +0900258 c = latin_tolower(c);
The Android Open Source Project923bf412009-03-13 15:11:42 -0700259 }
260 return c;
261}
262
263bool
264Dictionary::sameAsTyped(unsigned short *word, int length)
265{
266 if (length != mInputLength) {
267 return false;
268 }
269 int *inputCodes = mInputCodes;
270 while (length--) {
271 if ((unsigned int) *inputCodes != (unsigned int) *word) {
272 return false;
273 }
274 inputCodes += mMaxAlternatives;
275 word++;
276 }
277 return true;
278}
279
280static char QUOTE = '\'';
281
282void
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700283Dictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion, int snr, int inputIndex,
284 int diffs)
The Android Open Source Project923bf412009-03-13 15:11:42 -0700285{
286 // Optimization: Prune out words that are too long compared to how much was typed.
287 if (depth > maxDepth) {
288 return;
289 }
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700290 if (diffs > mMaxEditDistance) {
291 return;
292 }
The Android Open Source Project923bf412009-03-13 15:11:42 -0700293 int count = getCount(&pos);
294 int *currentChars = NULL;
295 if (mInputLength <= inputIndex) {
296 completion = true;
297 } else {
298 currentChars = mInputCodes + (inputIndex * mMaxAlternatives);
299 }
300
301 for (int i = 0; i < count; i++) {
Jae Yong Sung937d5ad2010-06-30 20:28:04 -0700302 // -- at char
The Android Open Source Project923bf412009-03-13 15:11:42 -0700303 unsigned short c = getChar(&pos);
Jae Yong Sung937d5ad2010-06-30 20:28:04 -0700304 // -- at flag/add
Amith Yamasanif1150882009-08-07 14:04:24 -0700305 unsigned short lowerC = toLowerCase(c);
The Android Open Source Project923bf412009-03-13 15:11:42 -0700306 bool terminal = getTerminal(&pos);
307 int childrenAddress = getAddress(&pos);
Jae Yong Sung937d5ad2010-06-30 20:28:04 -0700308 // -- after address or flag
Amith Yamasanicc3e5c72009-03-31 10:51:17 -0700309 int freq = 1;
310 if (terminal) freq = getFreq(&pos);
Jae Yong Sung937d5ad2010-06-30 20:28:04 -0700311 // -- after add or freq
312
The Android Open Source Project923bf412009-03-13 15:11:42 -0700313 // If we are only doing completions, no need to look at the typed characters.
314 if (completion) {
315 mWord[depth] = c;
316 if (terminal) {
317 addWord(mWord, depth + 1, freq * snr);
Amith Yamasani1b62ff12010-02-05 14:07:04 -0800318 if (depth >= mInputLength && mSkipPos < 0) {
319 registerNextLetter(mWord[mInputLength]);
320 }
The Android Open Source Project923bf412009-03-13 15:11:42 -0700321 }
322 if (childrenAddress != 0) {
323 getWordsRec(childrenAddress, depth + 1, maxDepth,
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700324 completion, snr, inputIndex, diffs);
The Android Open Source Project923bf412009-03-13 15:11:42 -0700325 }
Jae Yong Sung937d5ad2010-06-30 20:28:04 -0700326 } else if ((c == QUOTE && currentChars[0] != QUOTE) || mSkipPos == depth) {
Amith Yamasanic3df2d62009-06-04 12:20:45 -0700327 // Skip the ' or other letter and continue deeper
328 mWord[depth] = c;
The Android Open Source Project923bf412009-03-13 15:11:42 -0700329 if (childrenAddress != 0) {
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700330 getWordsRec(childrenAddress, depth + 1, maxDepth, false, snr, inputIndex, diffs);
The Android Open Source Project923bf412009-03-13 15:11:42 -0700331 }
332 } else {
333 int j = 0;
334 while (currentChars[j] > 0) {
The Android Open Source Project923bf412009-03-13 15:11:42 -0700335 if (currentChars[j] == lowerC || currentChars[j] == c) {
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700336 int addedWeight = j == 0 ? mTypedLetterMultiplier : 1;
The Android Open Source Project923bf412009-03-13 15:11:42 -0700337 mWord[depth] = c;
338 if (mInputLength == inputIndex + 1) {
339 if (terminal) {
340 if (//INCLUDE_TYPED_WORD_IF_VALID ||
341 !sameAsTyped(mWord, depth + 1)) {
Amith Yamasanif51d16a2009-08-10 17:22:39 -0700342 int finalFreq = freq * snr * addedWeight;
343 if (mSkipPos < 0) finalFreq *= mFullWordMultiplier;
344 addWord(mWord, depth + 1, finalFreq);
The Android Open Source Project923bf412009-03-13 15:11:42 -0700345 }
346 }
347 if (childrenAddress != 0) {
348 getWordsRec(childrenAddress, depth + 1,
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700349 maxDepth, true, snr * addedWeight, inputIndex + 1,
350 diffs + (j > 0));
The Android Open Source Project923bf412009-03-13 15:11:42 -0700351 }
352 } else if (childrenAddress != 0) {
353 getWordsRec(childrenAddress, depth + 1, maxDepth,
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700354 false, snr * addedWeight, inputIndex + 1, diffs + (j > 0));
The Android Open Source Project923bf412009-03-13 15:11:42 -0700355 }
356 }
357 j++;
Amith Yamasanic3df2d62009-06-04 12:20:45 -0700358 if (mSkipPos >= 0) break;
The Android Open Source Project923bf412009-03-13 15:11:42 -0700359 }
360 }
361 }
362}
363
Jae Yong Sung937d5ad2010-06-30 20:28:04 -0700364int
365Dictionary::getBigramAddress(int *pos, bool advance)
The Android Open Source Project923bf412009-03-13 15:11:42 -0700366{
Jae Yong Sung937d5ad2010-06-30 20:28:04 -0700367 int address = 0;
368
369 address += (mDict[*pos] & 0x3F) << 16;
370 address += (mDict[*pos + 1] & 0xFF) << 8;
371 address += (mDict[*pos + 2] & 0xFF);
372
373 if (advance) {
374 *pos += 3;
375 }
376
377 return address;
378}
379
380int
381Dictionary::getBigramFreq(int *pos)
382{
383 int freq = mDict[(*pos)++] & FLAG_BIGRAM_FREQ;
384
385 return freq;
386}
387
388
389int
Jae Yong Sung80aa14f2010-07-26 11:43:29 -0700390Dictionary::getBigrams(unsigned short *prevWord, int prevWordLength, int *codes, int codesSize,
391 unsigned short *bigramChars, int *bigramFreq, int maxWordLength, int maxBigrams,
392 int maxAlternatives)
Jae Yong Sung937d5ad2010-06-30 20:28:04 -0700393{
394 mBigramFreq = bigramFreq;
395 mBigramChars = bigramChars;
Jae Yong Sung80aa14f2010-07-26 11:43:29 -0700396 mInputCodes = codes;
397 mInputLength = codesSize;
Jae Yong Sung937d5ad2010-06-30 20:28:04 -0700398 mMaxWordLength = maxWordLength;
399 mMaxBigrams = maxBigrams;
Jae Yong Sung80aa14f2010-07-26 11:43:29 -0700400 mMaxAlternatives = maxAlternatives;
Jae Yong Sung937d5ad2010-06-30 20:28:04 -0700401
402 if (mBigram == 1 && checkIfDictVersionIsLatest()) {
403 int pos = isValidWordRec(DICTIONARY_HEADER_SIZE, prevWord, 0, prevWordLength);
404 LOGI("Pos -> %d\n", pos);
405 if (pos < 0) {
406 return 0;
407 }
408
409 int bigramCount = 0;
410 int bigramExist = (mDict[pos] & FLAG_BIGRAM_READ);
411 if (bigramExist > 0) {
412 int nextBigramExist = 1;
Jae Yong Sung80aa14f2010-07-26 11:43:29 -0700413 while (nextBigramExist > 0 && bigramCount < maxBigrams) {
Jae Yong Sung937d5ad2010-06-30 20:28:04 -0700414 int bigramAddress = getBigramAddress(&pos, true);
415 int frequency = (FLAG_BIGRAM_FREQ & mDict[pos]);
416 // search for all bigrams and store them
417 searchForTerminalNode(bigramAddress, frequency);
418 nextBigramExist = (mDict[pos++] & FLAG_BIGRAM_CONTINUED);
419 bigramCount++;
420 }
421 }
422
423 return bigramCount;
424 }
425 return 0;
426}
427
428void
429Dictionary::searchForTerminalNode(int addressLookingFor, int frequency)
430{
431 // track word with such address and store it in an array
432 unsigned short word[mMaxWordLength];
433
434 int pos;
435 int followDownBranchAddress = DICTIONARY_HEADER_SIZE;
436 bool found = false;
437 char followingChar = ' ';
438 int depth = -1;
439
440 while(!found) {
441 bool followDownAddressSearchStop = false;
442 bool firstAddress = true;
443 bool haveToSearchAll = true;
444
445 if (depth >= 0) {
446 word[depth] = (unsigned short) followingChar;
447 }
448 pos = followDownBranchAddress; // pos start at count
449 int count = mDict[pos] & 0xFF;
450 LOGI("count - %d\n",count);
451 pos++;
452 for (int i = 0; i < count; i++) {
453 // pos at data
454 pos++;
455 // pos now at flag
456 if (!getFirstBitOfByte(&pos)) { // non-terminal
457 if (!followDownAddressSearchStop) {
458 int addr = getBigramAddress(&pos, false);
459 if (addr > addressLookingFor) {
460 followDownAddressSearchStop = true;
461 if (firstAddress) {
462 firstAddress = false;
463 haveToSearchAll = true;
464 } else if (!haveToSearchAll) {
465 break;
466 }
467 } else {
468 followDownBranchAddress = addr;
469 followingChar = (char)(0xFF & mDict[pos-1]);
470 if (firstAddress) {
471 firstAddress = false;
472 haveToSearchAll = false;
473 }
474 }
475 }
476 pos += 3;
477 } else if (getFirstBitOfByte(&pos)) { // terminal
478 if (addressLookingFor == (pos-1)) { // found !!
479 depth++;
480 word[depth] = (0xFF & mDict[pos-1]);
481 found = true;
482 break;
483 }
484 if (getSecondBitOfByte(&pos)) { // address + freq (4 byte)
485 if (!followDownAddressSearchStop) {
486 int addr = getBigramAddress(&pos, false);
487 if (addr > addressLookingFor) {
488 followDownAddressSearchStop = true;
489 if (firstAddress) {
490 firstAddress = false;
491 haveToSearchAll = true;
492 } else if (!haveToSearchAll) {
493 break;
494 }
495 } else {
496 followDownBranchAddress = addr;
497 followingChar = (char)(0xFF & mDict[pos-1]);
498 if (firstAddress) {
499 firstAddress = false;
500 haveToSearchAll = true;
501 }
502 }
503 }
504 pos += 4;
505 } else { // freq only (2 byte)
506 pos += 2;
507 }
508
509 // skipping bigram
510 int bigramExist = (mDict[pos] & FLAG_BIGRAM_READ);
511 if (bigramExist > 0) {
512 int nextBigramExist = 1;
513 while (nextBigramExist > 0) {
514 pos += 3;
515 nextBigramExist = (mDict[pos++] & FLAG_BIGRAM_CONTINUED);
516 }
517 } else {
518 pos++;
519 }
520 }
521 }
522 depth++;
523 if (followDownBranchAddress == 0) {
524 LOGI("ERROR!!! Cannot find bigram!!");
525 break;
526 }
527 }
Jae Yong Sung80aa14f2010-07-26 11:43:29 -0700528 if (checkFirstCharacter(word)) {
529 addWordBigram(word, depth, frequency);
530 }
531}
Jae Yong Sung937d5ad2010-06-30 20:28:04 -0700532
Jae Yong Sung80aa14f2010-07-26 11:43:29 -0700533bool
534Dictionary::checkFirstCharacter(unsigned short *word)
535{
536 // Checks whether this word starts with same character or neighboring characters of
537 // what user typed.
538
539 int *inputCodes = mInputCodes;
540 int maxAlt = mMaxAlternatives;
541 while (maxAlt > 0) {
542 if ((unsigned int) *inputCodes == (unsigned int) *word) {
543 return true;
544 }
545 inputCodes++;
546 maxAlt--;
547 }
548 return false;
The Android Open Source Project923bf412009-03-13 15:11:42 -0700549}
550
551bool
Jae Yong Sung937d5ad2010-06-30 20:28:04 -0700552Dictionary::isValidWord(unsigned short *word, int length)
553{
554 if (checkIfDictVersionIsLatest()) {
555 return (isValidWordRec(DICTIONARY_HEADER_SIZE, word, 0, length) != NOT_VALID_WORD);
556 } else {
557 return (isValidWordRec(0, word, 0, length) != NOT_VALID_WORD);
558 }
559}
560
561int
The Android Open Source Project923bf412009-03-13 15:11:42 -0700562Dictionary::isValidWordRec(int pos, unsigned short *word, int offset, int length) {
Jae Yong Sung937d5ad2010-06-30 20:28:04 -0700563 // returns address of bigram data of that word
564 // return -99 if not found
565
The Android Open Source Project923bf412009-03-13 15:11:42 -0700566 int count = getCount(&pos);
567 unsigned short currentChar = (unsigned short) word[offset];
568 for (int j = 0; j < count; j++) {
569 unsigned short c = getChar(&pos);
570 int terminal = getTerminal(&pos);
571 int childPos = getAddress(&pos);
572 if (c == currentChar) {
573 if (offset == length - 1) {
574 if (terminal) {
Jae Yong Sung937d5ad2010-06-30 20:28:04 -0700575 return (pos+1);
The Android Open Source Project923bf412009-03-13 15:11:42 -0700576 }
577 } else {
578 if (childPos != 0) {
Jae Yong Sung937d5ad2010-06-30 20:28:04 -0700579 int t = isValidWordRec(childPos, word, offset + 1, length);
580 if (t > 0) {
581 return t;
The Android Open Source Project923bf412009-03-13 15:11:42 -0700582 }
583 }
584 }
585 }
Amith Yamasanicc3e5c72009-03-31 10:51:17 -0700586 if (terminal) {
587 getFreq(&pos);
588 }
The Android Open Source Project923bf412009-03-13 15:11:42 -0700589 // There could be two instances of each alphabet - upper and lower case. So continue
590 // looking ...
591 }
Jae Yong Sung937d5ad2010-06-30 20:28:04 -0700592 return NOT_VALID_WORD;
The Android Open Source Project923bf412009-03-13 15:11:42 -0700593}
594
595
596} // namespace latinime