blob: 7f58fc137d170e96f85c6d1f4679ef992d511b68 [file] [log] [blame]
The Android Open Source Project923bf412009-03-13 15:11:42 -07001/*
2**
3** Copyright 2009, The Android Open Source Project
4**
5** Licensed under the Apache License, Version 2.0 (the "License");
6** you may not use this file except in compliance with the License.
7** You may obtain a copy of the License at
8**
9** http://www.apache.org/licenses/LICENSE-2.0
10**
11** Unless required by applicable law or agreed to in writing, software
12** distributed under the License is distributed on an "AS IS" BASIS,
13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14** See the License for the specific language governing permissions and
15** limitations under the License.
16*/
17
18#include <stdio.h>
19#include <fcntl.h>
20#include <sys/mman.h>
21#include <string.h>
satok15dc33d2010-12-01 15:37:31 +090022
23#ifdef FLAG_DBG
24#define LOG_TAG "LatinIME: dictionary.cpp"
25#include <cutils/log.h>
26#define DEBUG_DICT 1
27#else // FLAG_DBG
Ken Wakasa826269c2010-04-27 10:28:14 +090028#define LOGI
satok15dc33d2010-12-01 15:37:31 +090029#define DEBUG_DICT 0
30#endif // FLAG_DBG
The Android Open Source Project923bf412009-03-13 15:11:42 -070031
32#include "dictionary.h"
33#include "basechars.h"
Ken Wakasa707505e2010-04-21 02:35:47 +090034#include "char_utils.h"
The Android Open Source Project923bf412009-03-13 15:11:42 -070035
Jae Yong Sung937d5ad2010-06-30 20:28:04 -070036#define DICTIONARY_VERSION_MIN 200
37#define DICTIONARY_HEADER_SIZE 2
38#define NOT_VALID_WORD -99
The Android Open Source Project923bf412009-03-13 15:11:42 -070039
40namespace latinime {
41
42Dictionary::Dictionary(void *dict, int typedLetterMultiplier, int fullWordMultiplier)
43{
satok15dc33d2010-12-01 15:37:31 +090044 LOGI("Dictionary - constructor");
The Android Open Source Project923bf412009-03-13 15:11:42 -070045 mDict = (unsigned char*) dict;
46 mTypedLetterMultiplier = typedLetterMultiplier;
47 mFullWordMultiplier = fullWordMultiplier;
Jae Yong Sung937d5ad2010-06-30 20:28:04 -070048 getVersionNumber();
The Android Open Source Project923bf412009-03-13 15:11:42 -070049}
50
51Dictionary::~Dictionary()
52{
53}
54
55int Dictionary::getSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies,
Amith Yamasani1b62ff12010-02-05 14:07:04 -080056 int maxWordLength, int maxWords, int maxAlternatives, int skipPos,
57 int *nextLetters, int nextLettersSize)
The Android Open Source Project923bf412009-03-13 15:11:42 -070058{
Amith Yamasanid0e43ec2009-10-14 16:10:32 -070059 int suggWords;
The Android Open Source Project923bf412009-03-13 15:11:42 -070060 mFrequencies = frequencies;
61 mOutputChars = outWords;
62 mInputCodes = codes;
63 mInputLength = codesSize;
64 mMaxAlternatives = maxAlternatives;
65 mMaxWordLength = maxWordLength;
66 mMaxWords = maxWords;
Amith Yamasanic3df2d62009-06-04 12:20:45 -070067 mSkipPos = skipPos;
Amith Yamasani322dc3d2009-07-15 18:30:47 -070068 mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2;
Amith Yamasani1b62ff12010-02-05 14:07:04 -080069 mNextLettersFrequencies = nextLetters;
70 mNextLettersSize = nextLettersSize;
The Android Open Source Project923bf412009-03-13 15:11:42 -070071
Jae Yong Sung937d5ad2010-06-30 20:28:04 -070072 if (checkIfDictVersionIsLatest()) {
73 getWordsRec(DICTIONARY_HEADER_SIZE, 0, mInputLength * 3, false, 1, 0, 0);
74 } else {
75 getWordsRec(0, 0, mInputLength * 3, false, 1, 0, 0);
76 }
The Android Open Source Project923bf412009-03-13 15:11:42 -070077
Amith Yamasanid0e43ec2009-10-14 16:10:32 -070078 // Get the word count
79 suggWords = 0;
80 while (suggWords < mMaxWords && mFrequencies[suggWords] > 0) suggWords++;
81 if (DEBUG_DICT) LOGI("Returning %d words", suggWords);
Amith Yamasani1b62ff12010-02-05 14:07:04 -080082
83 if (DEBUG_DICT) {
84 LOGI("Next letters: ");
85 for (int k = 0; k < nextLettersSize; k++) {
86 if (mNextLettersFrequencies[k] > 0) {
87 LOGI("%c = %d,", k, mNextLettersFrequencies[k]);
88 }
89 }
90 LOGI("\n");
91 }
Amith Yamasanid0e43ec2009-10-14 16:10:32 -070092 return suggWords;
The Android Open Source Project923bf412009-03-13 15:11:42 -070093}
94
Amith Yamasani1b62ff12010-02-05 14:07:04 -080095void
96Dictionary::registerNextLetter(unsigned short c)
97{
98 if (c < mNextLettersSize) {
99 mNextLettersFrequencies[c]++;
100 }
101}
102
Jae Yong Sung937d5ad2010-06-30 20:28:04 -0700103void
104Dictionary::getVersionNumber()
105{
106 mVersion = (mDict[0] & 0xFF);
107 mBigram = (mDict[1] & 0xFF);
108 LOGI("IN NATIVE SUGGEST Version: %d Bigram : %d \n", mVersion, mBigram);
109}
110
111// Checks whether it has the latest dictionary or the old dictionary
112bool
113Dictionary::checkIfDictVersionIsLatest()
114{
115 return (mVersion >= DICTIONARY_VERSION_MIN) && (mBigram == 1 || mBigram == 0);
116}
117
The Android Open Source Project923bf412009-03-13 15:11:42 -0700118unsigned short
119Dictionary::getChar(int *pos)
120{
121 unsigned short ch = (unsigned short) (mDict[(*pos)++] & 0xFF);
122 // If the code is 255, then actual 16 bit code follows (in big endian)
123 if (ch == 0xFF) {
124 ch = ((mDict[*pos] & 0xFF) << 8) | (mDict[*pos + 1] & 0xFF);
125 (*pos) += 2;
126 }
127 return ch;
128}
129
130int
131Dictionary::getAddress(int *pos)
132{
133 int address = 0;
Amith Yamasanicc3e5c72009-03-31 10:51:17 -0700134 if ((mDict[*pos] & FLAG_ADDRESS_MASK) == 0) {
135 *pos += 1;
136 } else {
137 address += (mDict[*pos] & (ADDRESS_MASK >> 16)) << 16;
138 address += (mDict[*pos + 1] & 0xFF) << 8;
139 address += (mDict[*pos + 2] & 0xFF);
140 *pos += 3;
141 }
The Android Open Source Project923bf412009-03-13 15:11:42 -0700142 return address;
143}
144
145int
Jae Yong Sung937d5ad2010-06-30 20:28:04 -0700146Dictionary::getFreq(int *pos)
147{
148 int freq = mDict[(*pos)++] & 0xFF;
149
150 if (checkIfDictVersionIsLatest()) {
151 // skipping bigram
152 int bigramExist = (mDict[*pos] & FLAG_BIGRAM_READ);
153 if (bigramExist > 0) {
154 int nextBigramExist = 1;
155 while (nextBigramExist > 0) {
156 (*pos) += 3;
157 nextBigramExist = (mDict[(*pos)++] & FLAG_BIGRAM_CONTINUED);
158 }
159 } else {
160 (*pos)++;
161 }
162 }
163
164 return freq;
165}
166
167int
The Android Open Source Project923bf412009-03-13 15:11:42 -0700168Dictionary::wideStrLen(unsigned short *str)
169{
170 if (!str) return 0;
171 unsigned short *end = str;
172 while (*end)
173 end++;
174 return end - str;
175}
176
177bool
178Dictionary::addWord(unsigned short *word, int length, int frequency)
179{
180 word[length] = 0;
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700181 if (DEBUG_DICT) {
182 char s[length + 1];
183 for (int i = 0; i <= length; i++) s[i] = word[i];
184 LOGI("Found word = %s, freq = %d : \n", s, frequency);
185 }
The Android Open Source Project923bf412009-03-13 15:11:42 -0700186
187 // Find the right insertion point
188 int insertAt = 0;
189 while (insertAt < mMaxWords) {
190 if (frequency > mFrequencies[insertAt]
191 || (mFrequencies[insertAt] == frequency
192 && length < wideStrLen(mOutputChars + insertAt * mMaxWordLength))) {
193 break;
194 }
195 insertAt++;
196 }
197 if (insertAt < mMaxWords) {
198 memmove((char*) mFrequencies + (insertAt + 1) * sizeof(mFrequencies[0]),
199 (char*) mFrequencies + insertAt * sizeof(mFrequencies[0]),
200 (mMaxWords - insertAt - 1) * sizeof(mFrequencies[0]));
201 mFrequencies[insertAt] = frequency;
202 memmove((char*) mOutputChars + (insertAt + 1) * mMaxWordLength * sizeof(short),
203 (char*) mOutputChars + (insertAt ) * mMaxWordLength * sizeof(short),
204 (mMaxWords - insertAt - 1) * sizeof(short) * mMaxWordLength);
205 unsigned short *dest = mOutputChars + (insertAt ) * mMaxWordLength;
206 while (length--) {
207 *dest++ = *word++;
208 }
209 *dest = 0; // NULL terminate
The Android Open Source Project923bf412009-03-13 15:11:42 -0700210 if (DEBUG_DICT) LOGI("Added word at %d\n", insertAt);
211 return true;
212 }
213 return false;
214}
215
Jae Yong Sung937d5ad2010-06-30 20:28:04 -0700216bool
217Dictionary::addWordBigram(unsigned short *word, int length, int frequency)
218{
219 word[length] = 0;
220 if (DEBUG_DICT) {
221 char s[length + 1];
222 for (int i = 0; i <= length; i++) s[i] = word[i];
223 LOGI("Bigram: Found word = %s, freq = %d : \n", s, frequency);
224 }
225
226 // Find the right insertion point
227 int insertAt = 0;
228 while (insertAt < mMaxBigrams) {
229 if (frequency > mBigramFreq[insertAt]
230 || (mBigramFreq[insertAt] == frequency
231 && length < wideStrLen(mBigramChars + insertAt * mMaxWordLength))) {
232 break;
233 }
234 insertAt++;
235 }
236 LOGI("Bigram: InsertAt -> %d maxBigrams: %d\n", insertAt, mMaxBigrams);
237 if (insertAt < mMaxBigrams) {
238 memmove((char*) mBigramFreq + (insertAt + 1) * sizeof(mBigramFreq[0]),
239 (char*) mBigramFreq + insertAt * sizeof(mBigramFreq[0]),
240 (mMaxBigrams - insertAt - 1) * sizeof(mBigramFreq[0]));
241 mBigramFreq[insertAt] = frequency;
242 memmove((char*) mBigramChars + (insertAt + 1) * mMaxWordLength * sizeof(short),
243 (char*) mBigramChars + (insertAt ) * mMaxWordLength * sizeof(short),
244 (mMaxBigrams - insertAt - 1) * sizeof(short) * mMaxWordLength);
245 unsigned short *dest = mBigramChars + (insertAt ) * mMaxWordLength;
246 while (length--) {
247 *dest++ = *word++;
248 }
249 *dest = 0; // NULL terminate
250 if (DEBUG_DICT) LOGI("Bigram: Added word at %d\n", insertAt);
251 return true;
252 }
253 return false;
254}
255
The Android Open Source Project923bf412009-03-13 15:11:42 -0700256unsigned short
Amith Yamasanif1150882009-08-07 14:04:24 -0700257Dictionary::toLowerCase(unsigned short c) {
The Android Open Source Project923bf412009-03-13 15:11:42 -0700258 if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) {
259 c = BASE_CHARS[c];
260 }
Amith Yamasanif1150882009-08-07 14:04:24 -0700261 if (c >='A' && c <= 'Z') {
262 c |= 32;
263 } else if (c > 127) {
Ken Wakasa707505e2010-04-21 02:35:47 +0900264 c = latin_tolower(c);
The Android Open Source Project923bf412009-03-13 15:11:42 -0700265 }
266 return c;
267}
268
269bool
270Dictionary::sameAsTyped(unsigned short *word, int length)
271{
272 if (length != mInputLength) {
273 return false;
274 }
275 int *inputCodes = mInputCodes;
276 while (length--) {
277 if ((unsigned int) *inputCodes != (unsigned int) *word) {
278 return false;
279 }
280 inputCodes += mMaxAlternatives;
281 word++;
282 }
283 return true;
284}
285
286static char QUOTE = '\'';
287
288void
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700289Dictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion, int snr, int inputIndex,
290 int diffs)
The Android Open Source Project923bf412009-03-13 15:11:42 -0700291{
292 // Optimization: Prune out words that are too long compared to how much was typed.
293 if (depth > maxDepth) {
294 return;
295 }
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700296 if (diffs > mMaxEditDistance) {
297 return;
298 }
The Android Open Source Project923bf412009-03-13 15:11:42 -0700299 int count = getCount(&pos);
300 int *currentChars = NULL;
301 if (mInputLength <= inputIndex) {
302 completion = true;
303 } else {
304 currentChars = mInputCodes + (inputIndex * mMaxAlternatives);
305 }
306
307 for (int i = 0; i < count; i++) {
Jae Yong Sung937d5ad2010-06-30 20:28:04 -0700308 // -- at char
The Android Open Source Project923bf412009-03-13 15:11:42 -0700309 unsigned short c = getChar(&pos);
Jae Yong Sung937d5ad2010-06-30 20:28:04 -0700310 // -- at flag/add
Amith Yamasanif1150882009-08-07 14:04:24 -0700311 unsigned short lowerC = toLowerCase(c);
The Android Open Source Project923bf412009-03-13 15:11:42 -0700312 bool terminal = getTerminal(&pos);
313 int childrenAddress = getAddress(&pos);
Jae Yong Sung937d5ad2010-06-30 20:28:04 -0700314 // -- after address or flag
Amith Yamasanicc3e5c72009-03-31 10:51:17 -0700315 int freq = 1;
316 if (terminal) freq = getFreq(&pos);
Jae Yong Sung937d5ad2010-06-30 20:28:04 -0700317 // -- after add or freq
318
The Android Open Source Project923bf412009-03-13 15:11:42 -0700319 // If we are only doing completions, no need to look at the typed characters.
320 if (completion) {
321 mWord[depth] = c;
322 if (terminal) {
323 addWord(mWord, depth + 1, freq * snr);
Amith Yamasani1b62ff12010-02-05 14:07:04 -0800324 if (depth >= mInputLength && mSkipPos < 0) {
325 registerNextLetter(mWord[mInputLength]);
326 }
The Android Open Source Project923bf412009-03-13 15:11:42 -0700327 }
328 if (childrenAddress != 0) {
329 getWordsRec(childrenAddress, depth + 1, maxDepth,
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700330 completion, snr, inputIndex, diffs);
The Android Open Source Project923bf412009-03-13 15:11:42 -0700331 }
Jae Yong Sung937d5ad2010-06-30 20:28:04 -0700332 } else if ((c == QUOTE && currentChars[0] != QUOTE) || mSkipPos == depth) {
Amith Yamasanic3df2d62009-06-04 12:20:45 -0700333 // Skip the ' or other letter and continue deeper
334 mWord[depth] = c;
The Android Open Source Project923bf412009-03-13 15:11:42 -0700335 if (childrenAddress != 0) {
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700336 getWordsRec(childrenAddress, depth + 1, maxDepth, false, snr, inputIndex, diffs);
The Android Open Source Project923bf412009-03-13 15:11:42 -0700337 }
338 } else {
339 int j = 0;
340 while (currentChars[j] > 0) {
The Android Open Source Project923bf412009-03-13 15:11:42 -0700341 if (currentChars[j] == lowerC || currentChars[j] == c) {
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700342 int addedWeight = j == 0 ? mTypedLetterMultiplier : 1;
The Android Open Source Project923bf412009-03-13 15:11:42 -0700343 mWord[depth] = c;
344 if (mInputLength == inputIndex + 1) {
345 if (terminal) {
346 if (//INCLUDE_TYPED_WORD_IF_VALID ||
347 !sameAsTyped(mWord, depth + 1)) {
Amith Yamasanif51d16a2009-08-10 17:22:39 -0700348 int finalFreq = freq * snr * addedWeight;
349 if (mSkipPos < 0) finalFreq *= mFullWordMultiplier;
350 addWord(mWord, depth + 1, finalFreq);
The Android Open Source Project923bf412009-03-13 15:11:42 -0700351 }
352 }
353 if (childrenAddress != 0) {
354 getWordsRec(childrenAddress, depth + 1,
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700355 maxDepth, true, snr * addedWeight, inputIndex + 1,
356 diffs + (j > 0));
The Android Open Source Project923bf412009-03-13 15:11:42 -0700357 }
358 } else if (childrenAddress != 0) {
359 getWordsRec(childrenAddress, depth + 1, maxDepth,
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700360 false, snr * addedWeight, inputIndex + 1, diffs + (j > 0));
The Android Open Source Project923bf412009-03-13 15:11:42 -0700361 }
362 }
363 j++;
Amith Yamasanic3df2d62009-06-04 12:20:45 -0700364 if (mSkipPos >= 0) break;
The Android Open Source Project923bf412009-03-13 15:11:42 -0700365 }
366 }
367 }
368}
369
Jae Yong Sung937d5ad2010-06-30 20:28:04 -0700370int
371Dictionary::getBigramAddress(int *pos, bool advance)
The Android Open Source Project923bf412009-03-13 15:11:42 -0700372{
Jae Yong Sung937d5ad2010-06-30 20:28:04 -0700373 int address = 0;
374
375 address += (mDict[*pos] & 0x3F) << 16;
376 address += (mDict[*pos + 1] & 0xFF) << 8;
377 address += (mDict[*pos + 2] & 0xFF);
378
379 if (advance) {
380 *pos += 3;
381 }
382
383 return address;
384}
385
386int
387Dictionary::getBigramFreq(int *pos)
388{
389 int freq = mDict[(*pos)++] & FLAG_BIGRAM_FREQ;
390
391 return freq;
392}
393
394
395int
Jae Yong Sung80aa14f2010-07-26 11:43:29 -0700396Dictionary::getBigrams(unsigned short *prevWord, int prevWordLength, int *codes, int codesSize,
397 unsigned short *bigramChars, int *bigramFreq, int maxWordLength, int maxBigrams,
398 int maxAlternatives)
Jae Yong Sung937d5ad2010-06-30 20:28:04 -0700399{
400 mBigramFreq = bigramFreq;
401 mBigramChars = bigramChars;
Jae Yong Sung80aa14f2010-07-26 11:43:29 -0700402 mInputCodes = codes;
403 mInputLength = codesSize;
Jae Yong Sung937d5ad2010-06-30 20:28:04 -0700404 mMaxWordLength = maxWordLength;
405 mMaxBigrams = maxBigrams;
Jae Yong Sung80aa14f2010-07-26 11:43:29 -0700406 mMaxAlternatives = maxAlternatives;
Jae Yong Sung937d5ad2010-06-30 20:28:04 -0700407
408 if (mBigram == 1 && checkIfDictVersionIsLatest()) {
409 int pos = isValidWordRec(DICTIONARY_HEADER_SIZE, prevWord, 0, prevWordLength);
410 LOGI("Pos -> %d\n", pos);
411 if (pos < 0) {
412 return 0;
413 }
414
415 int bigramCount = 0;
416 int bigramExist = (mDict[pos] & FLAG_BIGRAM_READ);
417 if (bigramExist > 0) {
418 int nextBigramExist = 1;
Jae Yong Sung80aa14f2010-07-26 11:43:29 -0700419 while (nextBigramExist > 0 && bigramCount < maxBigrams) {
Jae Yong Sung937d5ad2010-06-30 20:28:04 -0700420 int bigramAddress = getBigramAddress(&pos, true);
421 int frequency = (FLAG_BIGRAM_FREQ & mDict[pos]);
422 // search for all bigrams and store them
423 searchForTerminalNode(bigramAddress, frequency);
424 nextBigramExist = (mDict[pos++] & FLAG_BIGRAM_CONTINUED);
425 bigramCount++;
426 }
427 }
428
429 return bigramCount;
430 }
431 return 0;
432}
433
434void
435Dictionary::searchForTerminalNode(int addressLookingFor, int frequency)
436{
437 // track word with such address and store it in an array
438 unsigned short word[mMaxWordLength];
439
440 int pos;
441 int followDownBranchAddress = DICTIONARY_HEADER_SIZE;
442 bool found = false;
443 char followingChar = ' ';
444 int depth = -1;
445
446 while(!found) {
447 bool followDownAddressSearchStop = false;
448 bool firstAddress = true;
449 bool haveToSearchAll = true;
450
451 if (depth >= 0) {
452 word[depth] = (unsigned short) followingChar;
453 }
454 pos = followDownBranchAddress; // pos start at count
455 int count = mDict[pos] & 0xFF;
456 LOGI("count - %d\n",count);
457 pos++;
458 for (int i = 0; i < count; i++) {
459 // pos at data
460 pos++;
461 // pos now at flag
462 if (!getFirstBitOfByte(&pos)) { // non-terminal
463 if (!followDownAddressSearchStop) {
464 int addr = getBigramAddress(&pos, false);
465 if (addr > addressLookingFor) {
466 followDownAddressSearchStop = true;
467 if (firstAddress) {
468 firstAddress = false;
469 haveToSearchAll = true;
470 } else if (!haveToSearchAll) {
471 break;
472 }
473 } else {
474 followDownBranchAddress = addr;
475 followingChar = (char)(0xFF & mDict[pos-1]);
476 if (firstAddress) {
477 firstAddress = false;
478 haveToSearchAll = false;
479 }
480 }
481 }
482 pos += 3;
483 } else if (getFirstBitOfByte(&pos)) { // terminal
484 if (addressLookingFor == (pos-1)) { // found !!
485 depth++;
486 word[depth] = (0xFF & mDict[pos-1]);
487 found = true;
488 break;
489 }
490 if (getSecondBitOfByte(&pos)) { // address + freq (4 byte)
491 if (!followDownAddressSearchStop) {
492 int addr = getBigramAddress(&pos, false);
493 if (addr > addressLookingFor) {
494 followDownAddressSearchStop = true;
495 if (firstAddress) {
496 firstAddress = false;
497 haveToSearchAll = true;
498 } else if (!haveToSearchAll) {
499 break;
500 }
501 } else {
502 followDownBranchAddress = addr;
503 followingChar = (char)(0xFF & mDict[pos-1]);
504 if (firstAddress) {
505 firstAddress = false;
506 haveToSearchAll = true;
507 }
508 }
509 }
510 pos += 4;
511 } else { // freq only (2 byte)
512 pos += 2;
513 }
514
515 // skipping bigram
516 int bigramExist = (mDict[pos] & FLAG_BIGRAM_READ);
517 if (bigramExist > 0) {
518 int nextBigramExist = 1;
519 while (nextBigramExist > 0) {
520 pos += 3;
521 nextBigramExist = (mDict[pos++] & FLAG_BIGRAM_CONTINUED);
522 }
523 } else {
524 pos++;
525 }
526 }
527 }
528 depth++;
529 if (followDownBranchAddress == 0) {
530 LOGI("ERROR!!! Cannot find bigram!!");
531 break;
532 }
533 }
Jae Yong Sung80aa14f2010-07-26 11:43:29 -0700534 if (checkFirstCharacter(word)) {
535 addWordBigram(word, depth, frequency);
536 }
537}
Jae Yong Sung937d5ad2010-06-30 20:28:04 -0700538
Jae Yong Sung80aa14f2010-07-26 11:43:29 -0700539bool
540Dictionary::checkFirstCharacter(unsigned short *word)
541{
542 // Checks whether this word starts with same character or neighboring characters of
543 // what user typed.
544
545 int *inputCodes = mInputCodes;
546 int maxAlt = mMaxAlternatives;
547 while (maxAlt > 0) {
548 if ((unsigned int) *inputCodes == (unsigned int) *word) {
549 return true;
550 }
551 inputCodes++;
552 maxAlt--;
553 }
554 return false;
The Android Open Source Project923bf412009-03-13 15:11:42 -0700555}
556
557bool
Jae Yong Sung937d5ad2010-06-30 20:28:04 -0700558Dictionary::isValidWord(unsigned short *word, int length)
559{
560 if (checkIfDictVersionIsLatest()) {
561 return (isValidWordRec(DICTIONARY_HEADER_SIZE, word, 0, length) != NOT_VALID_WORD);
562 } else {
563 return (isValidWordRec(0, word, 0, length) != NOT_VALID_WORD);
564 }
565}
566
567int
The Android Open Source Project923bf412009-03-13 15:11:42 -0700568Dictionary::isValidWordRec(int pos, unsigned short *word, int offset, int length) {
Jae Yong Sung937d5ad2010-06-30 20:28:04 -0700569 // returns address of bigram data of that word
570 // return -99 if not found
571
The Android Open Source Project923bf412009-03-13 15:11:42 -0700572 int count = getCount(&pos);
573 unsigned short currentChar = (unsigned short) word[offset];
574 for (int j = 0; j < count; j++) {
575 unsigned short c = getChar(&pos);
576 int terminal = getTerminal(&pos);
577 int childPos = getAddress(&pos);
578 if (c == currentChar) {
579 if (offset == length - 1) {
580 if (terminal) {
Jae Yong Sung937d5ad2010-06-30 20:28:04 -0700581 return (pos+1);
The Android Open Source Project923bf412009-03-13 15:11:42 -0700582 }
583 } else {
584 if (childPos != 0) {
Jae Yong Sung937d5ad2010-06-30 20:28:04 -0700585 int t = isValidWordRec(childPos, word, offset + 1, length);
586 if (t > 0) {
587 return t;
The Android Open Source Project923bf412009-03-13 15:11:42 -0700588 }
589 }
590 }
591 }
Amith Yamasanicc3e5c72009-03-31 10:51:17 -0700592 if (terminal) {
593 getFreq(&pos);
594 }
The Android Open Source Project923bf412009-03-13 15:11:42 -0700595 // There could be two instances of each alphabet - upper and lower case. So continue
596 // looking ...
597 }
Jae Yong Sung937d5ad2010-06-30 20:28:04 -0700598 return NOT_VALID_WORD;
The Android Open Source Project923bf412009-03-13 15:11:42 -0700599}
600
601
602} // namespace latinime