blob: 17a87a708ccb7b16be0053d8b787dd83d0211b51 [file] [log] [blame]
satok30088252010-12-01 21:22:15 +09001/*
2**
3** Copyright 2010, The Android Open Source Project
4**
5** Licensed under the Apache License, Version 2.0 (the "License");
6** you may not use this file except in compliance with the License.
7** You may obtain a copy of the License at
8**
9** http://www.apache.org/licenses/LICENSE-2.0
10**
11** Unless required by applicable law or agreed to in writing, software
12** distributed under the License is distributed on an "AS IS" BASIS,
13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14** See the License for the specific language governing permissions and
15** limitations under the License.
16*/
17
satok48e432c2010-12-06 17:38:58 +090018#include <assert.h>
satok30088252010-12-01 21:22:15 +090019#include <fcntl.h>
satokf5cded12010-12-06 21:28:24 +090020#include <stdio.h>
satok30088252010-12-01 21:22:15 +090021#include <string.h>
22
satoke808e432010-12-02 14:53:24 +090023#define LOG_TAG "LatinIME: unigram_dictionary.cpp"
satok30088252010-12-01 21:22:15 +090024
satok30088252010-12-01 21:22:15 +090025#include "basechars.h"
26#include "char_utils.h"
satoke808e432010-12-02 14:53:24 +090027#include "dictionary.h"
28#include "unigram_dictionary.h"
satok30088252010-12-01 21:22:15 +090029
30namespace latinime {
31
Jean Chalardc2bbc6a2011-02-25 17:56:53 +090032const UnigramDictionary::digraph_t UnigramDictionary::GERMAN_UMLAUT_DIGRAPHS[] =
33 { { 'a', 'e' },
34 { 'o', 'e' },
35 { 'u', 'e' } };
36
satoke808e432010-12-02 14:53:24 +090037UnigramDictionary::UnigramDictionary(const unsigned char *dict, int typedLetterMultiplier,
satok662fe692010-12-08 17:05:39 +090038 int fullWordMultiplier, int maxWordLength, int maxWords, int maxProximityChars,
satok18c28f42010-12-02 18:11:54 +090039 const bool isLatestDictVersion)
Tadashi G. Takaoka887f11e2011-02-10 20:53:58 +090040 : DICT(dict), MAX_WORD_LENGTH(maxWordLength), MAX_WORDS(maxWords),
satok662fe692010-12-08 17:05:39 +090041 MAX_PROXIMITY_CHARS(maxProximityChars), IS_LATEST_DICT_VERSION(isLatestDictVersion),
42 TYPED_LETTER_MULTIPLIER(typedLetterMultiplier), FULL_WORD_MULTIPLIER(fullWordMultiplier),
Jean Chalardc2bbc6a2011-02-25 17:56:53 +090043 ROOT_POS(isLatestDictVersion ? DICTIONARY_HEADER_SIZE : 0),
44 BYTES_IN_ONE_CHAR(MAX_PROXIMITY_CHARS * sizeof(*mInputCodes)) {
satoka3d78f62010-12-09 22:08:33 +090045 if (DEBUG_DICT) LOGI("UnigramDictionary - constructor");
satok30088252010-12-01 21:22:15 +090046}
47
satok18c28f42010-12-02 18:11:54 +090048UnigramDictionary::~UnigramDictionary() {}
satok30088252010-12-01 21:22:15 +090049
Jean Chalardc2bbc6a2011-02-25 17:56:53 +090050static inline unsigned int getCodesBufferSize(const int* codes, const int codesSize,
51 const int MAX_PROXIMITY_CHARS) {
52 return sizeof(*codes) * MAX_PROXIMITY_CHARS * codesSize;
53}
54
55bool UnigramDictionary::isDigraph(const int* codes, const int i, const int codesSize) const {
56
57 // There can't be a digraph if we don't have at least 2 characters to examine
58 if (i + 2 > codesSize) return false;
59
60 // Search for the first char of some digraph
61 int lastDigraphIndex = -1;
62 const int thisChar = codes[i * MAX_PROXIMITY_CHARS];
63 for (lastDigraphIndex = sizeof(GERMAN_UMLAUT_DIGRAPHS) / sizeof(GERMAN_UMLAUT_DIGRAPHS[0]) - 1;
64 lastDigraphIndex >= 0; --lastDigraphIndex) {
65 if (thisChar == GERMAN_UMLAUT_DIGRAPHS[lastDigraphIndex].first) break;
66 }
67 // No match: return early
68 if (lastDigraphIndex < 0) return false;
69
70 // It's an interesting digraph if the second char matches too.
71 return GERMAN_UMLAUT_DIGRAPHS[lastDigraphIndex].second == codes[(i + 1) * MAX_PROXIMITY_CHARS];
72}
73
74// Mostly the same arguments as the non-recursive version, except:
75// codes is the original value. It points to the start of the work buffer, and gets passed as is.
76// codesSize is the size of the user input (thus, it is the size of codesSrc).
77// codesDest is the current point in the work buffer.
78// codesSrc is the current point in the user-input, original, content-unmodified buffer.
79// codesRemain is the remaining size in codesSrc.
80void UnigramDictionary::getWordWithDigraphSuggestionsRec(const ProximityInfo *proximityInfo,
81 const int *xcoordinates, const int* ycoordinates, const int *codesBuffer,
82 const int codesBufferSize, const int flags, const int* codesSrc, const int codesRemain,
83 int* codesDest, unsigned short* outWords, int* frequencies) {
84
85 for (int i = 0; i < codesRemain; ++i) {
86 if (isDigraph(codesSrc, i, codesRemain)) {
87 // Found a digraph. We will try both spellings. eg. the word is "pruefen"
88
89 // Copy the word up to the first char of the digraph, then continue processing
90 // on the remaining part of the word, skipping the second char of the digraph.
91 // In our example, copy "pru" and continue running on "fen"
92 memcpy(codesDest, codesSrc, i * BYTES_IN_ONE_CHAR);
93 getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer,
94 codesBufferSize, flags, codesSrc + (i + 1) * MAX_PROXIMITY_CHARS,
95 codesRemain - i - 1, codesDest + i * MAX_PROXIMITY_CHARS,
96 outWords, frequencies);
97
98 // Copy the second char of the digraph in place, then continue processing on
99 // the remaining part of the word.
100 // In our example, after "pru" in the buffer copy the "e", and continue running on "fen"
101 memcpy(codesDest + i * MAX_PROXIMITY_CHARS, codesSrc + i * MAX_PROXIMITY_CHARS,
102 BYTES_IN_ONE_CHAR);
103 getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer,
104 codesBufferSize, flags, codesSrc + i * MAX_PROXIMITY_CHARS, codesRemain - i,
105 codesDest + i * MAX_PROXIMITY_CHARS, outWords, frequencies);
106 return;
107 }
108 }
109
110 // If we come here, we hit the end of the word: let's check it against the dictionary.
111 // In our example, we'll come here once for "prufen" and then once for "pruefen".
112 // If the word contains several digraphs, we'll come it for the product of them.
113 // eg. if the word is "ueberpruefen" we'll test, in order, against
114 // "uberprufen", "uberpruefen", "ueberprufen", "ueberpruefen".
115 const unsigned int remainingBytes = BYTES_IN_ONE_CHAR * codesRemain;
116 if (0 != remainingBytes)
117 memcpy(codesDest, codesSrc, remainingBytes);
118
119 getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codesBuffer,
120 (codesDest - codesBuffer) / MAX_PROXIMITY_CHARS + codesRemain, outWords, frequencies);
121}
122
123int UnigramDictionary::getSuggestions(const ProximityInfo *proximityInfo, const int *xcoordinates,
124 const int *ycoordinates, const int *codes, const int codesSize, const int flags,
125 unsigned short *outWords, int *frequencies) {
126
127 if (REQUIRES_GERMAN_UMLAUT_PROCESSING & flags)
128 { // Incrementally tune the word and try all possibilities
129 int codesBuffer[getCodesBufferSize(codes, codesSize, MAX_PROXIMITY_CHARS)];
130 getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer,
131 codesSize, flags, codes, codesSize, codesBuffer, outWords, frequencies);
132 } else { // Normal processing
133 getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, codesSize,
134 outWords, frequencies);
135 }
136
137 PROF_START(6);
138 // Get the word count
139 int suggestedWordsCount = 0;
140 while (suggestedWordsCount < MAX_WORDS && mFrequencies[suggestedWordsCount] > 0) {
141 suggestedWordsCount++;
142 }
143
144 if (DEBUG_DICT) {
145 LOGI("Returning %d words", suggestedWordsCount);
146 LOGI("Next letters: ");
147 for (int k = 0; k < NEXT_LETTERS_SIZE; k++) {
148 if (mNextLettersFrequency[k] > 0) {
149 LOGI("%c = %d,", k, mNextLettersFrequency[k]);
150 }
151 }
152 }
153 PROF_END(6);
154 PROF_CLOSE;
155 return suggestedWordsCount;
156}
157
158void UnigramDictionary::getWordSuggestions(const ProximityInfo *proximityInfo,
159 const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize,
160 unsigned short *outWords, int *frequencies) {
161
satok61e2f852011-01-05 14:13:07 +0900162 PROF_OPEN;
163 PROF_START(0);
satok30088252010-12-01 21:22:15 +0900164 initSuggestions(codes, codesSize, outWords, frequencies);
satok54fe9e02010-12-13 14:42:35 +0900165 if (DEBUG_DICT) assert(codesSize == mInputLength);
166
satok8fbd5522011-02-22 17:28:55 +0900167 if (DEBUG_PROXIMITY_INFO) {
168 for (int i = 0; i < codesSize; ++i) {
169 LOGI("Input[%d] x = %d, y = %d", i, xcoordinates[i], ycoordinates[i]);
170 }
171 }
172
satoka3d78f62010-12-09 22:08:33 +0900173 const int MAX_DEPTH = min(mInputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
satok61e2f852011-01-05 14:13:07 +0900174 PROF_END(0);
satok30088252010-12-01 21:22:15 +0900175
satok61e2f852011-01-05 14:13:07 +0900176 PROF_START(1);
Tadashi G. Takaoka887f11e2011-02-10 20:53:58 +0900177 getSuggestionCandidates(-1, -1, -1, mNextLettersFrequency, NEXT_LETTERS_SIZE, MAX_DEPTH);
satok61e2f852011-01-05 14:13:07 +0900178 PROF_END(1);
179
180 PROF_START(2);
satok662fe692010-12-08 17:05:39 +0900181 // Suggestion with missing character
182 if (SUGGEST_WORDS_WITH_MISSING_CHARACTER) {
satok30088252010-12-01 21:22:15 +0900183 for (int i = 0; i < codesSize; ++i) {
satokcdbbea72010-12-08 16:04:16 +0900184 if (DEBUG_DICT) LOGI("--- Suggest missing characters %d", i);
satok54fe9e02010-12-13 14:42:35 +0900185 getSuggestionCandidates(i, -1, -1, NULL, 0, MAX_DEPTH);
satokcdbbea72010-12-08 16:04:16 +0900186 }
187 }
satok61e2f852011-01-05 14:13:07 +0900188 PROF_END(2);
satokcdbbea72010-12-08 16:04:16 +0900189
satok61e2f852011-01-05 14:13:07 +0900190 PROF_START(3);
satok662fe692010-12-08 17:05:39 +0900191 // Suggestion with excessive character
satok54fe9e02010-12-13 14:42:35 +0900192 if (SUGGEST_WORDS_WITH_EXCESSIVE_CHARACTER
193 && mInputLength >= MIN_USER_TYPED_LENGTH_FOR_EXCESSIVE_CHARACTER_SUGGESTION) {
satokcdbbea72010-12-08 16:04:16 +0900194 for (int i = 0; i < codesSize; ++i) {
satok54fe9e02010-12-13 14:42:35 +0900195 if (DEBUG_DICT) LOGI("--- Suggest excessive characters %d", i);
196 getSuggestionCandidates(-1, i, -1, NULL, 0, MAX_DEPTH);
satok30088252010-12-01 21:22:15 +0900197 }
198 }
satok61e2f852011-01-05 14:13:07 +0900199 PROF_END(3);
satok30088252010-12-01 21:22:15 +0900200
satok61e2f852011-01-05 14:13:07 +0900201 PROF_START(4);
satoka3d78f62010-12-09 22:08:33 +0900202 // Suggestion with transposed characters
203 // Only suggest words that length is mInputLength
204 if (SUGGEST_WORDS_WITH_TRANSPOSED_CHARACTERS) {
205 for (int i = 0; i < codesSize; ++i) {
206 if (DEBUG_DICT) LOGI("--- Suggest transposed characters %d", i);
satok54fe9e02010-12-13 14:42:35 +0900207 getSuggestionCandidates(-1, -1, i, NULL, 0, mInputLength - 1);
satoka3d78f62010-12-09 22:08:33 +0900208 }
209 }
satok61e2f852011-01-05 14:13:07 +0900210 PROF_END(4);
satoka3d78f62010-12-09 22:08:33 +0900211
satok61e2f852011-01-05 14:13:07 +0900212 PROF_START(5);
satok662fe692010-12-08 17:05:39 +0900213 // Suggestions with missing space
satok54fe9e02010-12-13 14:42:35 +0900214 if (SUGGEST_WORDS_WITH_MISSING_SPACE_CHARACTER
215 && mInputLength >= MIN_USER_TYPED_LENGTH_FOR_MISSING_SPACE_SUGGESTION) {
satok662fe692010-12-08 17:05:39 +0900216 for (int i = 1; i < codesSize; ++i) {
217 if (DEBUG_DICT) LOGI("--- Suggest missing space characters %d", i);
218 getMissingSpaceWords(mInputLength, i);
219 }
220 }
satok61e2f852011-01-05 14:13:07 +0900221 PROF_END(5);
satok30088252010-12-01 21:22:15 +0900222}
223
Jean Chalardc2bbc6a2011-02-25 17:56:53 +0900224void UnigramDictionary::initSuggestions(const int *codes, const int codesSize,
225 unsigned short *outWords, int *frequencies) {
satokf5cded12010-12-06 21:28:24 +0900226 if (DEBUG_DICT) LOGI("initSuggest");
satok30088252010-12-01 21:22:15 +0900227 mFrequencies = frequencies;
228 mOutputChars = outWords;
229 mInputCodes = codes;
230 mInputLength = codesSize;
231 mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2;
232}
233
satok715514d2010-12-02 20:19:59 +0900234void UnigramDictionary::registerNextLetter(
235 unsigned short c, int *nextLetters, int nextLettersSize) {
satok30088252010-12-01 21:22:15 +0900236 if (c < nextLettersSize) {
237 nextLetters[c]++;
238 }
239}
240
satok662fe692010-12-08 17:05:39 +0900241// TODO: We need to optimize addWord by using STL or something
satok28bd03b2010-12-03 16:39:16 +0900242bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency) {
satok30088252010-12-01 21:22:15 +0900243 word[length] = 0;
satok662fe692010-12-08 17:05:39 +0900244 if (DEBUG_DICT && DEBUG_SHOW_FOUND_WORD) {
satok30088252010-12-01 21:22:15 +0900245 char s[length + 1];
246 for (int i = 0; i <= length; i++) s[i] = word[i];
satok662fe692010-12-08 17:05:39 +0900247 LOGI("Found word = %s, freq = %d", s, frequency);
satok30088252010-12-01 21:22:15 +0900248 }
satokf5cded12010-12-06 21:28:24 +0900249 if (length > MAX_WORD_LENGTH) {
250 if (DEBUG_DICT) LOGI("Exceeded max word length.");
251 return false;
252 }
satok30088252010-12-01 21:22:15 +0900253
254 // Find the right insertion point
255 int insertAt = 0;
256 while (insertAt < MAX_WORDS) {
satok715514d2010-12-02 20:19:59 +0900257 if (frequency > mFrequencies[insertAt] || (mFrequencies[insertAt] == frequency
258 && length < Dictionary::wideStrLen(mOutputChars + insertAt * MAX_WORD_LENGTH))) {
satok30088252010-12-01 21:22:15 +0900259 break;
260 }
261 insertAt++;
262 }
263 if (insertAt < MAX_WORDS) {
satokcdbbea72010-12-08 16:04:16 +0900264 if (DEBUG_DICT) {
265 char s[length + 1];
266 for (int i = 0; i <= length; i++) s[i] = word[i];
satok662fe692010-12-08 17:05:39 +0900267 LOGI("Added word = %s, freq = %d", s, frequency);
satokcdbbea72010-12-08 16:04:16 +0900268 }
satok30088252010-12-01 21:22:15 +0900269 memmove((char*) mFrequencies + (insertAt + 1) * sizeof(mFrequencies[0]),
270 (char*) mFrequencies + insertAt * sizeof(mFrequencies[0]),
271 (MAX_WORDS - insertAt - 1) * sizeof(mFrequencies[0]));
272 mFrequencies[insertAt] = frequency;
273 memmove((char*) mOutputChars + (insertAt + 1) * MAX_WORD_LENGTH * sizeof(short),
satok715514d2010-12-02 20:19:59 +0900274 (char*) mOutputChars + insertAt * MAX_WORD_LENGTH * sizeof(short),
satok30088252010-12-01 21:22:15 +0900275 (MAX_WORDS - insertAt - 1) * sizeof(short) * MAX_WORD_LENGTH);
satok715514d2010-12-02 20:19:59 +0900276 unsigned short *dest = mOutputChars + insertAt * MAX_WORD_LENGTH;
satok30088252010-12-01 21:22:15 +0900277 while (length--) {
278 *dest++ = *word++;
279 }
280 *dest = 0; // NULL terminate
satok662fe692010-12-08 17:05:39 +0900281 if (DEBUG_DICT) LOGI("Added word at %d", insertAt);
satok30088252010-12-01 21:22:15 +0900282 return true;
283 }
284 return false;
285}
286
Jean Chalardf5f834a2011-02-22 15:12:46 +0900287unsigned short UnigramDictionary::toBaseLowerCase(unsigned short c) {
satok30088252010-12-01 21:22:15 +0900288 if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) {
289 c = BASE_CHARS[c];
290 }
291 if (c >='A' && c <= 'Z') {
292 c |= 32;
293 } else if (c > 127) {
294 c = latin_tolower(c);
295 }
296 return c;
297}
298
satok28bd03b2010-12-03 16:39:16 +0900299bool UnigramDictionary::sameAsTyped(unsigned short *word, int length) {
satok30088252010-12-01 21:22:15 +0900300 if (length != mInputLength) {
301 return false;
302 }
Jean Chalardc2bbc6a2011-02-25 17:56:53 +0900303 const int *inputCodes = mInputCodes;
satok30088252010-12-01 21:22:15 +0900304 while (length--) {
305 if ((unsigned int) *inputCodes != (unsigned int) *word) {
306 return false;
307 }
satok662fe692010-12-08 17:05:39 +0900308 inputCodes += MAX_PROXIMITY_CHARS;
satok30088252010-12-01 21:22:15 +0900309 word++;
310 }
311 return true;
312}
313
satok715514d2010-12-02 20:19:59 +0900314static const char QUOTE = '\'';
satok662fe692010-12-08 17:05:39 +0900315static const char SPACE = ' ';
satok30088252010-12-01 21:22:15 +0900316
satok54fe9e02010-12-13 14:42:35 +0900317void UnigramDictionary::getSuggestionCandidates(const int skipPos,
satoka3d78f62010-12-09 22:08:33 +0900318 const int excessivePos, const int transposedPos, int *nextLetters,
319 const int nextLettersSize, const int maxDepth) {
satok54fe9e02010-12-13 14:42:35 +0900320 if (DEBUG_DICT) {
321 LOGI("getSuggestionCandidates %d", maxDepth);
322 assert(transposedPos + 1 < mInputLength);
323 assert(excessivePos < mInputLength);
324 assert(missingPos < mInputLength);
325 }
satok662fe692010-12-08 17:05:39 +0900326 int rootPosition = ROOT_POS;
satokd2997922010-12-07 13:08:39 +0900327 // Get the number of child of root, then increment the position
328 int childCount = Dictionary::getCount(DICT, &rootPosition);
329 int depth = 0;
330
331 mStackChildCount[0] = childCount;
332 mStackTraverseAll[0] = (mInputLength <= 0);
333 mStackNodeFreq[0] = 1;
334 mStackInputIndex[0] = 0;
335 mStackDiffs[0] = 0;
336 mStackSiblingPos[0] = rootPosition;
337
satok662fe692010-12-08 17:05:39 +0900338 // Depth first search
satokd2997922010-12-07 13:08:39 +0900339 while (depth >= 0) {
340 if (mStackChildCount[depth] > 0) {
341 --mStackChildCount[depth];
342 bool traverseAllNodes = mStackTraverseAll[depth];
Jean Chalardf5f834a2011-02-22 15:12:46 +0900343 int matchWeight = mStackNodeFreq[depth];
satokd2997922010-12-07 13:08:39 +0900344 int inputIndex = mStackInputIndex[depth];
345 int diffs = mStackDiffs[depth];
346 int siblingPos = mStackSiblingPos[depth];
347 int firstChildPos;
satoka3d78f62010-12-09 22:08:33 +0900348 // depth will never be greater than maxDepth because in that case,
satokd2997922010-12-07 13:08:39 +0900349 // needsToTraverseChildrenNodes should be false
350 const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, depth,
Jean Chalardf5f834a2011-02-22 15:12:46 +0900351 maxDepth, traverseAllNodes, matchWeight, inputIndex, diffs, skipPos,
352 excessivePos, transposedPos, nextLetters, nextLettersSize, &childCount,
353 &firstChildPos, &traverseAllNodes, &matchWeight, &inputIndex, &diffs,
354 &siblingPos);
satok662fe692010-12-08 17:05:39 +0900355 // Update next sibling pos
satokd2997922010-12-07 13:08:39 +0900356 mStackSiblingPos[depth] = siblingPos;
357 if (needsToTraverseChildrenNodes) {
358 // Goes to child node
359 ++depth;
360 mStackChildCount[depth] = childCount;
361 mStackTraverseAll[depth] = traverseAllNodes;
Jean Chalardf5f834a2011-02-22 15:12:46 +0900362 mStackNodeFreq[depth] = matchWeight;
satokd2997922010-12-07 13:08:39 +0900363 mStackInputIndex[depth] = inputIndex;
364 mStackDiffs[depth] = diffs;
365 mStackSiblingPos[depth] = firstChildPos;
366 }
367 } else {
satokcdbbea72010-12-08 16:04:16 +0900368 // Goes to parent sibling node
satokd2997922010-12-07 13:08:39 +0900369 --depth;
370 }
371 }
372}
373
satokf7425bb2011-01-05 16:37:53 +0900374inline static void multiplyRate(const int rate, int *freq) {
375 if (rate > 1000000) {
376 *freq = (*freq / 100) * rate;
377 } else {
378 *freq = *freq * rate / 100;
379 }
380}
381
satok662fe692010-12-08 17:05:39 +0900382bool UnigramDictionary::getMissingSpaceWords(const int inputLength, const int missingSpacePos) {
satokaee09dc2010-12-09 19:21:51 +0900383 if (missingSpacePos <= 0 || missingSpacePos >= inputLength
384 || inputLength >= MAX_WORD_LENGTH) return false;
satok662fe692010-12-08 17:05:39 +0900385 const int newWordLength = inputLength + 1;
386 // Allocating variable length array on stack
387 unsigned short word[newWordLength];
satokaee09dc2010-12-09 19:21:51 +0900388 const int firstFreq = getBestWordFreq(0, missingSpacePos, mWord);
389 if (DEBUG_DICT) LOGI("First freq: %d", firstFreq);
390 if (firstFreq <= 0) return false;
391
satok662fe692010-12-08 17:05:39 +0900392 for (int i = 0; i < missingSpacePos; ++i) {
satokaee09dc2010-12-09 19:21:51 +0900393 word[i] = mWord[i];
satok662fe692010-12-08 17:05:39 +0900394 }
satokaee09dc2010-12-09 19:21:51 +0900395
396 const int secondFreq = getBestWordFreq(missingSpacePos, inputLength - missingSpacePos, mWord);
satoka3d78f62010-12-09 22:08:33 +0900397 if (DEBUG_DICT) LOGI("Second freq: %d", secondFreq);
satokaee09dc2010-12-09 19:21:51 +0900398 if (secondFreq <= 0) return false;
399
satok662fe692010-12-08 17:05:39 +0900400 word[missingSpacePos] = SPACE;
401 for (int i = (missingSpacePos + 1); i < newWordLength; ++i) {
satokaee09dc2010-12-09 19:21:51 +0900402 word[i] = mWord[i - missingSpacePos - 1];
satok662fe692010-12-08 17:05:39 +0900403 }
satokaee09dc2010-12-09 19:21:51 +0900404
405 int pairFreq = ((firstFreq + secondFreq) / 2);
406 for (int i = 0; i < inputLength; ++i) pairFreq *= TYPED_LETTER_MULTIPLIER;
satokf7425bb2011-01-05 16:37:53 +0900407 multiplyRate(WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE, &pairFreq);
satok662fe692010-12-08 17:05:39 +0900408 addWord(word, newWordLength, pairFreq);
409 return true;
410}
411
412// Keep this for comparing spec to new getWords
413void UnigramDictionary::getWordsOld(const int initialPos, const int inputLength, const int skipPos,
satoka3d78f62010-12-09 22:08:33 +0900414 const int excessivePos, const int transposedPos,int *nextLetters,
415 const int nextLettersSize) {
satok662fe692010-12-08 17:05:39 +0900416 int initialPosition = initialPos;
417 const int count = Dictionary::getCount(DICT, &initialPosition);
418 getWordsRec(count, initialPosition, 0,
419 min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH),
satoka3d78f62010-12-09 22:08:33 +0900420 mInputLength <= 0, 1, 0, 0, skipPos, excessivePos, transposedPos, nextLetters,
421 nextLettersSize);
satok662fe692010-12-08 17:05:39 +0900422}
423
satok68319262010-12-03 19:38:08 +0900424void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, const int depth,
Jean Chalardf5f834a2011-02-22 15:12:46 +0900425 const int maxDepth, const bool traverseAllNodes, const int matchWeight,
426 const int inputIndex, const int diffs, const int skipPos, const int excessivePos,
427 const int transposedPos, int *nextLetters, const int nextLettersSize) {
satok48e432c2010-12-06 17:38:58 +0900428 int siblingPos = pos;
satok68319262010-12-03 19:38:08 +0900429 for (int i = 0; i < childrenCount; ++i) {
satok48e432c2010-12-06 17:38:58 +0900430 int newCount;
431 int newChildPosition;
satokd2997922010-12-07 13:08:39 +0900432 const int newDepth = depth + 1;
satok48e432c2010-12-06 17:38:58 +0900433 bool newTraverseAllNodes;
Jean Chalardf5f834a2011-02-22 15:12:46 +0900434 int newMatchRate;
satok48e432c2010-12-06 17:38:58 +0900435 int newInputIndex;
436 int newDiffs;
437 int newSiblingPos;
438 const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, depth, maxDepth,
Jean Chalardf5f834a2011-02-22 15:12:46 +0900439 traverseAllNodes, matchWeight, inputIndex, diffs,
440 skipPos, excessivePos, transposedPos,
satoka3d78f62010-12-09 22:08:33 +0900441 nextLetters, nextLettersSize,
Jean Chalardf5f834a2011-02-22 15:12:46 +0900442 &newCount, &newChildPosition, &newTraverseAllNodes, &newMatchRate,
satok48e432c2010-12-06 17:38:58 +0900443 &newInputIndex, &newDiffs, &newSiblingPos);
444 siblingPos = newSiblingPos;
satok30088252010-12-01 21:22:15 +0900445
satok48e432c2010-12-06 17:38:58 +0900446 if (needsToTraverseChildrenNodes) {
447 getWordsRec(newCount, newChildPosition, newDepth, maxDepth, newTraverseAllNodes,
Jean Chalardf5f834a2011-02-22 15:12:46 +0900448 newMatchRate, newInputIndex, newDiffs, skipPos, excessivePos, transposedPos,
satoka3d78f62010-12-09 22:08:33 +0900449 nextLetters, nextLettersSize);
satok30088252010-12-01 21:22:15 +0900450 }
451 }
452}
453
Jean Chalarda5d58492011-02-18 17:50:58 +0900454static const int TWO_31ST_DIV_255 = ((1 << 31) - 1) / 255;
455static inline int capped255MultForFullMatchAccentsOrCapitalizationDifference(const int num) {
456 return (num < TWO_31ST_DIV_255 ? 255 * num : S_INT_MAX);
457}
satok58c49b92011-01-27 03:23:39 +0900458inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int depth,
Jean Chalardf5f834a2011-02-22 15:12:46 +0900459 const int matchWeight, const int skipPos, const int excessivePos, const int transposedPos,
Jean Chalard07a84062011-03-03 10:22:10 +0900460 const int freq, const bool sameLength) const {
satoka3d78f62010-12-09 22:08:33 +0900461 // TODO: Demote by edit distance
Jean Chalardf5f834a2011-02-22 15:12:46 +0900462 int finalFreq = freq * matchWeight;
Jean Chalard07a84062011-03-03 10:22:10 +0900463 if (skipPos >= 0) {
464 if (mInputLength >= 3) {
465 multiplyRate(WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE *
466 (mInputLength - 2) / (mInputLength - 1), &finalFreq);
467 } else {
468 finalFreq = 0;
469 }
470 }
satokf7425bb2011-01-05 16:37:53 +0900471 if (transposedPos >= 0) multiplyRate(
472 WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE, &finalFreq);
satok54fe9e02010-12-13 14:42:35 +0900473 if (excessivePos >= 0) {
satokf7425bb2011-01-05 16:37:53 +0900474 multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE, &finalFreq);
satok54fe9e02010-12-13 14:42:35 +0900475 if (!existsAdjacentProximityChars(inputIndex, mInputLength)) {
satokf7425bb2011-01-05 16:37:53 +0900476 multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE, &finalFreq);
satok54fe9e02010-12-13 14:42:35 +0900477 }
478 }
satok58c49b92011-01-27 03:23:39 +0900479 int lengthFreq = TYPED_LETTER_MULTIPLIER;
480 for (int i = 0; i < depth; ++i) lengthFreq *= TYPED_LETTER_MULTIPLIER;
Jean Chalardf5f834a2011-02-22 15:12:46 +0900481 if (lengthFreq == matchWeight) {
Jean Chalard8dc754a2011-01-27 14:20:22 +0900482 if (depth > 1) {
483 if (DEBUG_DICT) LOGI("Found full matched word.");
484 multiplyRate(FULL_MATCHED_WORDS_PROMOTION_RATE, &finalFreq);
485 }
486 if (sameLength && transposedPos < 0 && skipPos < 0 && excessivePos < 0) {
Jean Chalarda5d58492011-02-18 17:50:58 +0900487 finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq);
Jean Chalard8dc754a2011-01-27 14:20:22 +0900488 }
satok58c49b92011-01-27 03:23:39 +0900489 }
satok54fe9e02010-12-13 14:42:35 +0900490 if (sameLength && skipPos < 0) finalFreq *= FULL_WORD_MULTIPLIER;
491 return finalFreq;
492}
satoka3d78f62010-12-09 22:08:33 +0900493
satok54fe9e02010-12-13 14:42:35 +0900494inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsGreaterThanInputLength(
Jean Chalardf5f834a2011-02-22 15:12:46 +0900495 unsigned short *word, const int inputIndex, const int depth, const int matchWeight,
satok54fe9e02010-12-13 14:42:35 +0900496 int *nextLetters, const int nextLettersSize, const int skipPos, const int excessivePos,
497 const int transposedPos, const int freq) {
Jean Chalardf5f834a2011-02-22 15:12:46 +0900498 const int finalFreq = calculateFinalFreq(inputIndex, depth, matchWeight, skipPos, excessivePos,
satok58c49b92011-01-27 03:23:39 +0900499 transposedPos, freq, false);
satoka3d78f62010-12-09 22:08:33 +0900500 if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq);
satok54fe9e02010-12-13 14:42:35 +0900501 if (depth >= mInputLength && skipPos < 0) {
satok715514d2010-12-02 20:19:59 +0900502 registerNextLetter(mWord[mInputLength], nextLetters, nextLettersSize);
503 }
504}
505
506inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsSameAsInputLength(
Jean Chalardf5f834a2011-02-22 15:12:46 +0900507 unsigned short *word, const int inputIndex, const int depth, const int matchWeight,
Jean Chalard8dc754a2011-01-27 14:20:22 +0900508 const int skipPos, const int excessivePos, const int transposedPos, const int freq) {
satok54fe9e02010-12-13 14:42:35 +0900509 if (sameAsTyped(word, depth + 1)) return;
Jean Chalardf5f834a2011-02-22 15:12:46 +0900510 const int finalFreq = calculateFinalFreq(inputIndex, depth, matchWeight, skipPos,
satok54fe9e02010-12-13 14:42:35 +0900511 excessivePos, transposedPos, freq, true);
512 // Proximity collection will promote a word of the same length as what user typed.
513 if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq);
satok715514d2010-12-02 20:19:59 +0900514}
satok28bd03b2010-12-03 16:39:16 +0900515
516inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,
satok68319262010-12-03 19:38:08 +0900517 const int inputIndex, const int skipPos, const int depth) {
satok8fbd5522011-02-22 17:28:55 +0900518 const unsigned short userTypedChar = getInputCharsAt(inputIndex)[0];
satok28bd03b2010-12-03 16:39:16 +0900519 // Skip the ' or other letter and continue deeper
520 return (c == QUOTE && userTypedChar != QUOTE) || skipPos == depth;
521}
522
satoke07baa62010-12-09 21:55:40 +0900523inline bool UnigramDictionary::existsAdjacentProximityChars(const int inputIndex,
Jean Chalard07a84062011-03-03 10:22:10 +0900524 const int inputLength) const {
satoke07baa62010-12-09 21:55:40 +0900525 if (inputIndex < 0 || inputIndex >= inputLength) return false;
526 const int currentChar = *getInputCharsAt(inputIndex);
527 const int leftIndex = inputIndex - 1;
528 if (leftIndex >= 0) {
Jean Chalardc2bbc6a2011-02-25 17:56:53 +0900529 const int *leftChars = getInputCharsAt(leftIndex);
satoke07baa62010-12-09 21:55:40 +0900530 int i = 0;
531 while (leftChars[i] > 0 && i < MAX_PROXIMITY_CHARS) {
532 if (leftChars[i++] == currentChar) return true;
533 }
534 }
535 const int rightIndex = inputIndex + 1;
536 if (rightIndex < inputLength) {
Jean Chalardc2bbc6a2011-02-25 17:56:53 +0900537 const int *rightChars = getInputCharsAt(rightIndex);
satoke07baa62010-12-09 21:55:40 +0900538 int i = 0;
539 while (rightChars[i] > 0 && i < MAX_PROXIMITY_CHARS) {
540 if (rightChars[i++] == currentChar) return true;
541 }
542 }
543 return false;
544}
545
Jean Chalarda5d58492011-02-18 17:50:58 +0900546
547// In the following function, c is the current character of the dictionary word
548// currently examined.
549// currentChars is an array containing the keys close to the character the
550// user actually typed at the same position. We want to see if c is in it: if so,
551// then the word contains at that position a character close to what the user
552// typed.
553// What the user typed is actually the first character of the array.
554// Notice : accented characters do not have a proximity list, so they are alone
555// in their list. The non-accented version of the character should be considered
556// "close", but not the other keys close to the non-accented version.
Jean Chalard8dc754a2011-01-27 14:20:22 +0900557inline UnigramDictionary::ProximityType UnigramDictionary::getMatchedProximityId(
558 const int *currentChars, const unsigned short c, const int skipPos,
559 const int excessivePos, const int transposedPos) {
Jean Chalardf5f834a2011-02-22 15:12:46 +0900560 const unsigned short baseLowerC = toBaseLowerCase(c);
Jean Chalarda5d58492011-02-18 17:50:58 +0900561
562 // The first char in the array is what user typed. If it matches right away,
563 // that means the user typed that same char for this pos.
Jean Chalardf5f834a2011-02-22 15:12:46 +0900564 if (currentChars[0] == baseLowerC || currentChars[0] == c)
Jean Chalarda5d58492011-02-18 17:50:58 +0900565 return SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR;
566
567 // If one of those is true, we should not check for close characters at all.
568 if (skipPos >= 0 || excessivePos >= 0 || transposedPos >= 0)
569 return UNRELATED_CHAR;
570
571 // If the non-accented, lowercased version of that first character matches c,
572 // then we have a non-accented version of the accented character the user
573 // typed. Treat it as a close char.
Jean Chalardf5f834a2011-02-22 15:12:46 +0900574 if (toBaseLowerCase(currentChars[0]) == baseLowerC)
Jean Chalarda5d58492011-02-18 17:50:58 +0900575 return NEAR_PROXIMITY_CHAR;
576
577 // Not an exact nor an accent-alike match: search the list of close keys
578 int j = 1;
satoke07baa62010-12-09 21:55:40 +0900579 while (currentChars[j] > 0 && j < MAX_PROXIMITY_CHARS) {
Jean Chalardf5f834a2011-02-22 15:12:46 +0900580 const bool matched = (currentChars[j] == baseLowerC || currentChars[j] == c);
Jean Chalarda5d58492011-02-18 17:50:58 +0900581 if (matched) return NEAR_PROXIMITY_CHAR;
satok28bd03b2010-12-03 16:39:16 +0900582 ++j;
583 }
Jean Chalarda5d58492011-02-18 17:50:58 +0900584
585 // Was not included, signal this as an unrelated character.
Jean Chalard8dc754a2011-01-27 14:20:22 +0900586 return UNRELATED_CHAR;
satok28bd03b2010-12-03 16:39:16 +0900587}
588
satok48e432c2010-12-06 17:38:58 +0900589inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth,
Jean Chalardf5f834a2011-02-22 15:12:46 +0900590 const int maxDepth, const bool traverseAllNodes, int matchWeight, int inputIndex,
satoka3d78f62010-12-09 22:08:33 +0900591 const int diffs, const int skipPos, const int excessivePos, const int transposedPos,
592 int *nextLetters, const int nextLettersSize, int *newCount, int *newChildPosition,
Jean Chalardf5f834a2011-02-22 15:12:46 +0900593 bool *newTraverseAllNodes, int *newMatchRate, int *newInputIndex, int *newDiffs,
satoka3d78f62010-12-09 22:08:33 +0900594 int *nextSiblingPosition) {
595 if (DEBUG_DICT) {
596 int inputCount = 0;
597 if (skipPos >= 0) ++inputCount;
598 if (excessivePos >= 0) ++inputCount;
599 if (transposedPos >= 0) ++inputCount;
600 assert(inputCount <= 1);
601 }
satok48e432c2010-12-06 17:38:58 +0900602 unsigned short c;
603 int childPosition;
604 bool terminal;
605 int freq;
satokfd16f1d2011-01-27 16:25:16 +0900606 bool isSameAsUserTypedLength = false;
satokcdbbea72010-12-08 16:04:16 +0900607
satokfd16f1d2011-01-27 16:25:16 +0900608 if (excessivePos == depth && inputIndex < mInputLength - 1) ++inputIndex;
satokcdbbea72010-12-08 16:04:16 +0900609
satok48e432c2010-12-06 17:38:58 +0900610 *nextSiblingPosition = Dictionary::setDictionaryValues(DICT, IS_LATEST_DICT_VERSION, pos, &c,
611 &childPosition, &terminal, &freq);
612
613 const bool needsToTraverseChildrenNodes = childPosition != 0;
614
615 // If we are only doing traverseAllNodes, no need to look at the typed characters.
616 if (traverseAllNodes || needsToSkipCurrentNode(c, inputIndex, skipPos, depth)) {
617 mWord[depth] = c;
618 if (traverseAllNodes && terminal) {
satok54fe9e02010-12-13 14:42:35 +0900619 onTerminalWhenUserTypedLengthIsGreaterThanInputLength(mWord, inputIndex, depth,
Jean Chalardf5f834a2011-02-22 15:12:46 +0900620 matchWeight, nextLetters, nextLettersSize, skipPos, excessivePos, transposedPos,
621 freq);
satok48e432c2010-12-06 17:38:58 +0900622 }
623 if (!needsToTraverseChildrenNodes) return false;
624 *newTraverseAllNodes = traverseAllNodes;
Jean Chalardf5f834a2011-02-22 15:12:46 +0900625 *newMatchRate = matchWeight;
satok48e432c2010-12-06 17:38:58 +0900626 *newDiffs = diffs;
627 *newInputIndex = inputIndex;
satok48e432c2010-12-06 17:38:58 +0900628 } else {
Jean Chalardc2bbc6a2011-02-25 17:56:53 +0900629 const int *currentChars = getInputCharsAt(inputIndex);
satoka3d78f62010-12-09 22:08:33 +0900630
631 if (transposedPos >= 0) {
632 if (inputIndex == transposedPos) currentChars += MAX_PROXIMITY_CHARS;
633 if (inputIndex == (transposedPos + 1)) currentChars -= MAX_PROXIMITY_CHARS;
634 }
635
636 int matchedProximityCharId = getMatchedProximityId(currentChars, c, skipPos, excessivePos,
637 transposedPos);
Jean Chalard8dc754a2011-01-27 14:20:22 +0900638 if (UNRELATED_CHAR == matchedProximityCharId) return false;
satok48e432c2010-12-06 17:38:58 +0900639 mWord[depth] = c;
640 // If inputIndex is greater than mInputLength, that means there is no
641 // proximity chars. So, we don't need to check proximity.
Jean Chalard8dc754a2011-01-27 14:20:22 +0900642 if (SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) {
Jean Chalardf5f834a2011-02-22 15:12:46 +0900643 matchWeight = matchWeight * TYPED_LETTER_MULTIPLIER;
Jean Chalard8dc754a2011-01-27 14:20:22 +0900644 }
satokfd16f1d2011-01-27 16:25:16 +0900645 bool isSameAsUserTypedLength = mInputLength == inputIndex + 1
646 || (excessivePos == mInputLength - 1 && inputIndex == mInputLength - 2);
satok48e432c2010-12-06 17:38:58 +0900647 if (isSameAsUserTypedLength && terminal) {
Jean Chalardf5f834a2011-02-22 15:12:46 +0900648 onTerminalWhenUserTypedLengthIsSameAsInputLength(mWord, inputIndex, depth, matchWeight,
Jean Chalard8dc754a2011-01-27 14:20:22 +0900649 skipPos, excessivePos, transposedPos, freq);
satok48e432c2010-12-06 17:38:58 +0900650 }
651 if (!needsToTraverseChildrenNodes) return false;
652 // Start traversing all nodes after the index exceeds the user typed length
653 *newTraverseAllNodes = isSameAsUserTypedLength;
Jean Chalardf5f834a2011-02-22 15:12:46 +0900654 *newMatchRate = matchWeight;
Jean Chalard8dc754a2011-01-27 14:20:22 +0900655 *newDiffs = diffs + ((NEAR_PROXIMITY_CHAR == matchedProximityCharId) ? 1 : 0);
satok48e432c2010-12-06 17:38:58 +0900656 *newInputIndex = inputIndex + 1;
satok48e432c2010-12-06 17:38:58 +0900657 }
658 // Optimization: Prune out words that are too long compared to how much was typed.
satokd2997922010-12-07 13:08:39 +0900659 if (depth >= maxDepth || *newDiffs > mMaxEditDistance) {
satok48e432c2010-12-06 17:38:58 +0900660 return false;
661 }
662
663 // If inputIndex is greater than mInputLength, that means there are no proximity chars.
satokfd16f1d2011-01-27 16:25:16 +0900664 // TODO: Check if this can be isSameAsUserTypedLength only.
665 if (isSameAsUserTypedLength || mInputLength <= *newInputIndex) {
satok48e432c2010-12-06 17:38:58 +0900666 *newTraverseAllNodes = true;
667 }
668 // get the count of nodes and increment childAddress.
669 *newCount = Dictionary::getCount(DICT, &childPosition);
670 *newChildPosition = childPosition;
671 if (DEBUG_DICT) assert(needsToTraverseChildrenNodes);
672 return needsToTraverseChildrenNodes;
673}
674
satokaee09dc2010-12-09 19:21:51 +0900675inline int UnigramDictionary::getBestWordFreq(const int startInputIndex, const int inputLength,
676 unsigned short *word) {
satok662fe692010-12-08 17:05:39 +0900677 int pos = ROOT_POS;
678 int count = Dictionary::getCount(DICT, &pos);
satokaee09dc2010-12-09 19:21:51 +0900679 int maxFreq = 0;
680 int depth = 0;
681 unsigned short newWord[MAX_WORD_LENGTH_INTERNAL];
satok662fe692010-12-08 17:05:39 +0900682 bool terminal = false;
683
satokaee09dc2010-12-09 19:21:51 +0900684 mStackChildCount[0] = count;
685 mStackSiblingPos[0] = pos;
686
687 while (depth >= 0) {
688 if (mStackChildCount[depth] > 0) {
689 --mStackChildCount[depth];
690 int firstChildPos;
691 int newFreq;
692 int siblingPos = mStackSiblingPos[depth];
693 const bool needsToTraverseChildrenNodes = processCurrentNodeForExactMatch(siblingPos,
694 startInputIndex, depth, newWord, &firstChildPos, &count, &terminal, &newFreq,
695 &siblingPos);
696 mStackSiblingPos[depth] = siblingPos;
697 if (depth == (inputLength - 1)) {
698 // Traverse sibling node
699 if (terminal) {
700 if (newFreq > maxFreq) {
701 for (int i = 0; i < inputLength; ++i) word[i] = newWord[i];
702 if (DEBUG_DICT && DEBUG_NODE) {
703 char s[inputLength + 1];
704 for (int i = 0; i < inputLength; ++i) s[i] = word[i];
705 s[inputLength] = 0;
706 LOGI("New missing space word found: %d > %d (%s), %d, %d",
707 newFreq, maxFreq, s, inputLength, depth);
708 }
709 maxFreq = newFreq;
710 }
711 }
712 } else if (needsToTraverseChildrenNodes) {
713 // Traverse children nodes
714 ++depth;
715 mStackChildCount[depth] = count;
716 mStackSiblingPos[depth] = firstChildPos;
717 }
718 } else {
719 // Traverse parent node
720 --depth;
satok662fe692010-12-08 17:05:39 +0900721 }
722 }
satokaee09dc2010-12-09 19:21:51 +0900723
724 word[inputLength] = 0;
725 return maxFreq;
satok662fe692010-12-08 17:05:39 +0900726}
727
728inline bool UnigramDictionary::processCurrentNodeForExactMatch(const int firstChildPos,
satokaee09dc2010-12-09 19:21:51 +0900729 const int startInputIndex, const int depth, unsigned short *word, int *newChildPosition,
730 int *newCount, bool *newTerminal, int *newFreq, int *siblingPos) {
731 const int inputIndex = startInputIndex + depth;
satok8fbd5522011-02-22 17:28:55 +0900732 const int *currentChars = getInputCharsAt(inputIndex);
satok662fe692010-12-08 17:05:39 +0900733 unsigned short c;
satokaee09dc2010-12-09 19:21:51 +0900734 *siblingPos = Dictionary::setDictionaryValues(DICT, IS_LATEST_DICT_VERSION, firstChildPos, &c,
735 newChildPosition, newTerminal, newFreq);
736 const unsigned int inputC = currentChars[0];
737 if (DEBUG_DICT) assert(inputC <= U_SHORT_MAX);
Jean Chalardf5f834a2011-02-22 15:12:46 +0900738 const unsigned short baseLowerC = toBaseLowerCase(c);
739 const bool matched = (inputC == baseLowerC || inputC == c);
satokaee09dc2010-12-09 19:21:51 +0900740 const bool hasChild = *newChildPosition != 0;
741 if (matched) {
742 word[depth] = c;
743 if (DEBUG_DICT && DEBUG_NODE) {
744 LOGI("Node(%c, %c)<%d>, %d, %d", inputC, c, matched, hasChild, *newFreq);
745 if (*newTerminal) LOGI("Terminal %d", *newFreq);
satok662fe692010-12-08 17:05:39 +0900746 }
satokaee09dc2010-12-09 19:21:51 +0900747 if (hasChild) {
748 *newCount = Dictionary::getCount(DICT, newChildPosition);
749 return true;
750 } else {
751 return false;
752 }
753 } else {
754 // If this node is not user typed character, this method treats this word as unmatched.
755 // Thus newTerminal shouldn't be true.
756 *newTerminal = false;
757 return false;
satok662fe692010-12-08 17:05:39 +0900758 }
satok662fe692010-12-08 17:05:39 +0900759}
satok30088252010-12-01 21:22:15 +0900760} // namespace latinime