blob: 6e6f4418218813cc224d936ec583251c59636722 [file] [log] [blame]
The Android Open Source Project923bf412009-03-13 15:11:42 -07001/*
2**
3** Copyright 2009, The Android Open Source Project
4**
5** Licensed under the Apache License, Version 2.0 (the "License");
6** you may not use this file except in compliance with the License.
7** You may obtain a copy of the License at
8**
9** http://www.apache.org/licenses/LICENSE-2.0
10**
11** Unless required by applicable law or agreed to in writing, software
12** distributed under the License is distributed on an "AS IS" BASIS,
13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14** See the License for the specific language governing permissions and
15** limitations under the License.
16*/
17
18#include <stdio.h>
19#include <fcntl.h>
20#include <sys/mman.h>
21#include <string.h>
22#include <cutils/log.h>
23
24#include <unicode/uchar.h>
25
26//#define USE_ASSET_MANAGER
27
28#ifdef USE_ASSET_MANAGER
29#include <utils/AssetManager.h>
30#include <utils/Asset.h>
31#endif
32
33#include "dictionary.h"
34#include "basechars.h"
35
36#define DEBUG_DICT 0
37
38namespace latinime {
39
40Dictionary::Dictionary(void *dict, int typedLetterMultiplier, int fullWordMultiplier)
41{
42 mDict = (unsigned char*) dict;
43 mTypedLetterMultiplier = typedLetterMultiplier;
44 mFullWordMultiplier = fullWordMultiplier;
45}
46
47Dictionary::~Dictionary()
48{
49}
50
51int Dictionary::getSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies,
Amith Yamasani1b62ff12010-02-05 14:07:04 -080052 int maxWordLength, int maxWords, int maxAlternatives, int skipPos,
53 int *nextLetters, int nextLettersSize)
The Android Open Source Project923bf412009-03-13 15:11:42 -070054{
Amith Yamasanid0e43ec2009-10-14 16:10:32 -070055 int suggWords;
The Android Open Source Project923bf412009-03-13 15:11:42 -070056 mFrequencies = frequencies;
57 mOutputChars = outWords;
58 mInputCodes = codes;
59 mInputLength = codesSize;
60 mMaxAlternatives = maxAlternatives;
61 mMaxWordLength = maxWordLength;
62 mMaxWords = maxWords;
Amith Yamasanic3df2d62009-06-04 12:20:45 -070063 mSkipPos = skipPos;
Amith Yamasani322dc3d2009-07-15 18:30:47 -070064 mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2;
Amith Yamasani1b62ff12010-02-05 14:07:04 -080065 mNextLettersFrequencies = nextLetters;
66 mNextLettersSize = nextLettersSize;
The Android Open Source Project923bf412009-03-13 15:11:42 -070067
Amith Yamasani322dc3d2009-07-15 18:30:47 -070068 getWordsRec(0, 0, mInputLength * 3, false, 1, 0, 0);
The Android Open Source Project923bf412009-03-13 15:11:42 -070069
Amith Yamasanid0e43ec2009-10-14 16:10:32 -070070 // Get the word count
71 suggWords = 0;
72 while (suggWords < mMaxWords && mFrequencies[suggWords] > 0) suggWords++;
73 if (DEBUG_DICT) LOGI("Returning %d words", suggWords);
Amith Yamasani1b62ff12010-02-05 14:07:04 -080074
75 if (DEBUG_DICT) {
76 LOGI("Next letters: ");
77 for (int k = 0; k < nextLettersSize; k++) {
78 if (mNextLettersFrequencies[k] > 0) {
79 LOGI("%c = %d,", k, mNextLettersFrequencies[k]);
80 }
81 }
82 LOGI("\n");
83 }
Amith Yamasanid0e43ec2009-10-14 16:10:32 -070084 return suggWords;
The Android Open Source Project923bf412009-03-13 15:11:42 -070085}
86
Amith Yamasani1b62ff12010-02-05 14:07:04 -080087void
88Dictionary::registerNextLetter(unsigned short c)
89{
90 if (c < mNextLettersSize) {
91 mNextLettersFrequencies[c]++;
92 }
93}
94
The Android Open Source Project923bf412009-03-13 15:11:42 -070095unsigned short
96Dictionary::getChar(int *pos)
97{
98 unsigned short ch = (unsigned short) (mDict[(*pos)++] & 0xFF);
99 // If the code is 255, then actual 16 bit code follows (in big endian)
100 if (ch == 0xFF) {
101 ch = ((mDict[*pos] & 0xFF) << 8) | (mDict[*pos + 1] & 0xFF);
102 (*pos) += 2;
103 }
104 return ch;
105}
106
107int
108Dictionary::getAddress(int *pos)
109{
110 int address = 0;
Amith Yamasanicc3e5c72009-03-31 10:51:17 -0700111 if ((mDict[*pos] & FLAG_ADDRESS_MASK) == 0) {
112 *pos += 1;
113 } else {
114 address += (mDict[*pos] & (ADDRESS_MASK >> 16)) << 16;
115 address += (mDict[*pos + 1] & 0xFF) << 8;
116 address += (mDict[*pos + 2] & 0xFF);
117 *pos += 3;
118 }
The Android Open Source Project923bf412009-03-13 15:11:42 -0700119 return address;
120}
121
122int
123Dictionary::wideStrLen(unsigned short *str)
124{
125 if (!str) return 0;
126 unsigned short *end = str;
127 while (*end)
128 end++;
129 return end - str;
130}
131
132bool
133Dictionary::addWord(unsigned short *word, int length, int frequency)
134{
135 word[length] = 0;
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700136 if (DEBUG_DICT) {
137 char s[length + 1];
138 for (int i = 0; i <= length; i++) s[i] = word[i];
139 LOGI("Found word = %s, freq = %d : \n", s, frequency);
140 }
The Android Open Source Project923bf412009-03-13 15:11:42 -0700141
142 // Find the right insertion point
143 int insertAt = 0;
144 while (insertAt < mMaxWords) {
145 if (frequency > mFrequencies[insertAt]
146 || (mFrequencies[insertAt] == frequency
147 && length < wideStrLen(mOutputChars + insertAt * mMaxWordLength))) {
148 break;
149 }
150 insertAt++;
151 }
152 if (insertAt < mMaxWords) {
153 memmove((char*) mFrequencies + (insertAt + 1) * sizeof(mFrequencies[0]),
154 (char*) mFrequencies + insertAt * sizeof(mFrequencies[0]),
155 (mMaxWords - insertAt - 1) * sizeof(mFrequencies[0]));
156 mFrequencies[insertAt] = frequency;
157 memmove((char*) mOutputChars + (insertAt + 1) * mMaxWordLength * sizeof(short),
158 (char*) mOutputChars + (insertAt ) * mMaxWordLength * sizeof(short),
159 (mMaxWords - insertAt - 1) * sizeof(short) * mMaxWordLength);
160 unsigned short *dest = mOutputChars + (insertAt ) * mMaxWordLength;
161 while (length--) {
162 *dest++ = *word++;
163 }
164 *dest = 0; // NULL terminate
The Android Open Source Project923bf412009-03-13 15:11:42 -0700165 if (DEBUG_DICT) LOGI("Added word at %d\n", insertAt);
166 return true;
167 }
168 return false;
169}
170
171unsigned short
Amith Yamasanif1150882009-08-07 14:04:24 -0700172Dictionary::toLowerCase(unsigned short c) {
The Android Open Source Project923bf412009-03-13 15:11:42 -0700173 if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) {
174 c = BASE_CHARS[c];
175 }
Amith Yamasanif1150882009-08-07 14:04:24 -0700176 if (c >='A' && c <= 'Z') {
177 c |= 32;
178 } else if (c > 127) {
179 c = u_tolower(c);
The Android Open Source Project923bf412009-03-13 15:11:42 -0700180 }
181 return c;
182}
183
184bool
185Dictionary::sameAsTyped(unsigned short *word, int length)
186{
187 if (length != mInputLength) {
188 return false;
189 }
190 int *inputCodes = mInputCodes;
191 while (length--) {
192 if ((unsigned int) *inputCodes != (unsigned int) *word) {
193 return false;
194 }
195 inputCodes += mMaxAlternatives;
196 word++;
197 }
198 return true;
199}
200
201static char QUOTE = '\'';
202
203void
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700204Dictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion, int snr, int inputIndex,
205 int diffs)
The Android Open Source Project923bf412009-03-13 15:11:42 -0700206{
207 // Optimization: Prune out words that are too long compared to how much was typed.
208 if (depth > maxDepth) {
209 return;
210 }
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700211 if (diffs > mMaxEditDistance) {
212 return;
213 }
The Android Open Source Project923bf412009-03-13 15:11:42 -0700214 int count = getCount(&pos);
215 int *currentChars = NULL;
216 if (mInputLength <= inputIndex) {
217 completion = true;
218 } else {
219 currentChars = mInputCodes + (inputIndex * mMaxAlternatives);
220 }
221
222 for (int i = 0; i < count; i++) {
223 unsigned short c = getChar(&pos);
Amith Yamasanif1150882009-08-07 14:04:24 -0700224 unsigned short lowerC = toLowerCase(c);
The Android Open Source Project923bf412009-03-13 15:11:42 -0700225 bool terminal = getTerminal(&pos);
226 int childrenAddress = getAddress(&pos);
Amith Yamasanicc3e5c72009-03-31 10:51:17 -0700227 int freq = 1;
228 if (terminal) freq = getFreq(&pos);
The Android Open Source Project923bf412009-03-13 15:11:42 -0700229 // If we are only doing completions, no need to look at the typed characters.
230 if (completion) {
231 mWord[depth] = c;
232 if (terminal) {
233 addWord(mWord, depth + 1, freq * snr);
Amith Yamasani1b62ff12010-02-05 14:07:04 -0800234 if (depth >= mInputLength && mSkipPos < 0) {
235 registerNextLetter(mWord[mInputLength]);
236 }
The Android Open Source Project923bf412009-03-13 15:11:42 -0700237 }
238 if (childrenAddress != 0) {
239 getWordsRec(childrenAddress, depth + 1, maxDepth,
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700240 completion, snr, inputIndex, diffs);
The Android Open Source Project923bf412009-03-13 15:11:42 -0700241 }
Amith Yamasanic3df2d62009-06-04 12:20:45 -0700242 } else if (c == QUOTE && currentChars[0] != QUOTE || mSkipPos == depth) {
243 // Skip the ' or other letter and continue deeper
244 mWord[depth] = c;
The Android Open Source Project923bf412009-03-13 15:11:42 -0700245 if (childrenAddress != 0) {
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700246 getWordsRec(childrenAddress, depth + 1, maxDepth, false, snr, inputIndex, diffs);
The Android Open Source Project923bf412009-03-13 15:11:42 -0700247 }
248 } else {
249 int j = 0;
250 while (currentChars[j] > 0) {
The Android Open Source Project923bf412009-03-13 15:11:42 -0700251 if (currentChars[j] == lowerC || currentChars[j] == c) {
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700252 int addedWeight = j == 0 ? mTypedLetterMultiplier : 1;
The Android Open Source Project923bf412009-03-13 15:11:42 -0700253 mWord[depth] = c;
254 if (mInputLength == inputIndex + 1) {
255 if (terminal) {
256 if (//INCLUDE_TYPED_WORD_IF_VALID ||
257 !sameAsTyped(mWord, depth + 1)) {
Amith Yamasanif51d16a2009-08-10 17:22:39 -0700258 int finalFreq = freq * snr * addedWeight;
259 if (mSkipPos < 0) finalFreq *= mFullWordMultiplier;
260 addWord(mWord, depth + 1, finalFreq);
The Android Open Source Project923bf412009-03-13 15:11:42 -0700261 }
262 }
263 if (childrenAddress != 0) {
264 getWordsRec(childrenAddress, depth + 1,
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700265 maxDepth, true, snr * addedWeight, inputIndex + 1,
266 diffs + (j > 0));
The Android Open Source Project923bf412009-03-13 15:11:42 -0700267 }
268 } else if (childrenAddress != 0) {
269 getWordsRec(childrenAddress, depth + 1, maxDepth,
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700270 false, snr * addedWeight, inputIndex + 1, diffs + (j > 0));
The Android Open Source Project923bf412009-03-13 15:11:42 -0700271 }
272 }
273 j++;
Amith Yamasanic3df2d62009-06-04 12:20:45 -0700274 if (mSkipPos >= 0) break;
The Android Open Source Project923bf412009-03-13 15:11:42 -0700275 }
276 }
277 }
278}
279
280bool
281Dictionary::isValidWord(unsigned short *word, int length)
282{
283 return isValidWordRec(0, word, 0, length);
284}
285
286bool
287Dictionary::isValidWordRec(int pos, unsigned short *word, int offset, int length) {
288 int count = getCount(&pos);
289 unsigned short currentChar = (unsigned short) word[offset];
290 for (int j = 0; j < count; j++) {
291 unsigned short c = getChar(&pos);
292 int terminal = getTerminal(&pos);
293 int childPos = getAddress(&pos);
294 if (c == currentChar) {
295 if (offset == length - 1) {
296 if (terminal) {
297 return true;
298 }
299 } else {
300 if (childPos != 0) {
301 if (isValidWordRec(childPos, word, offset + 1, length)) {
302 return true;
303 }
304 }
305 }
306 }
Amith Yamasanicc3e5c72009-03-31 10:51:17 -0700307 if (terminal) {
308 getFreq(&pos);
309 }
The Android Open Source Project923bf412009-03-13 15:11:42 -0700310 // There could be two instances of each alphabet - upper and lower case. So continue
311 // looking ...
312 }
313 return false;
314}
315
316
317} // namespace latinime