blob: 3f196a9ed7d52e39147b1f08d16d301ff3c488d5 [file] [log] [blame]
The Android Open Source Project923bf412009-03-13 15:11:42 -07001/*
2**
3** Copyright 2009, The Android Open Source Project
4**
5** Licensed under the Apache License, Version 2.0 (the "License");
6** you may not use this file except in compliance with the License.
7** You may obtain a copy of the License at
8**
9** http://www.apache.org/licenses/LICENSE-2.0
10**
11** Unless required by applicable law or agreed to in writing, software
12** distributed under the License is distributed on an "AS IS" BASIS,
13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14** See the License for the specific language governing permissions and
15** limitations under the License.
16*/
17
18#include <stdio.h>
19#include <fcntl.h>
20#include <sys/mman.h>
21#include <string.h>
22#include <cutils/log.h>
23
The Android Open Source Project923bf412009-03-13 15:11:42 -070024//#define USE_ASSET_MANAGER
25
26#ifdef USE_ASSET_MANAGER
27#include <utils/AssetManager.h>
28#include <utils/Asset.h>
29#endif
30
31#include "dictionary.h"
32#include "basechars.h"
Ken Wakasa707505e2010-04-21 02:35:47 +090033#include "char_utils.h"
The Android Open Source Project923bf412009-03-13 15:11:42 -070034
35#define DEBUG_DICT 0
36
37namespace latinime {
38
39Dictionary::Dictionary(void *dict, int typedLetterMultiplier, int fullWordMultiplier)
40{
41 mDict = (unsigned char*) dict;
42 mTypedLetterMultiplier = typedLetterMultiplier;
43 mFullWordMultiplier = fullWordMultiplier;
44}
45
46Dictionary::~Dictionary()
47{
48}
49
50int Dictionary::getSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies,
Amith Yamasani1b62ff12010-02-05 14:07:04 -080051 int maxWordLength, int maxWords, int maxAlternatives, int skipPos,
52 int *nextLetters, int nextLettersSize)
The Android Open Source Project923bf412009-03-13 15:11:42 -070053{
Amith Yamasanid0e43ec2009-10-14 16:10:32 -070054 int suggWords;
The Android Open Source Project923bf412009-03-13 15:11:42 -070055 mFrequencies = frequencies;
56 mOutputChars = outWords;
57 mInputCodes = codes;
58 mInputLength = codesSize;
59 mMaxAlternatives = maxAlternatives;
60 mMaxWordLength = maxWordLength;
61 mMaxWords = maxWords;
Amith Yamasanic3df2d62009-06-04 12:20:45 -070062 mSkipPos = skipPos;
Amith Yamasani322dc3d2009-07-15 18:30:47 -070063 mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2;
Amith Yamasani1b62ff12010-02-05 14:07:04 -080064 mNextLettersFrequencies = nextLetters;
65 mNextLettersSize = nextLettersSize;
The Android Open Source Project923bf412009-03-13 15:11:42 -070066
Amith Yamasani322dc3d2009-07-15 18:30:47 -070067 getWordsRec(0, 0, mInputLength * 3, false, 1, 0, 0);
The Android Open Source Project923bf412009-03-13 15:11:42 -070068
Amith Yamasanid0e43ec2009-10-14 16:10:32 -070069 // Get the word count
70 suggWords = 0;
71 while (suggWords < mMaxWords && mFrequencies[suggWords] > 0) suggWords++;
72 if (DEBUG_DICT) LOGI("Returning %d words", suggWords);
Amith Yamasani1b62ff12010-02-05 14:07:04 -080073
74 if (DEBUG_DICT) {
75 LOGI("Next letters: ");
76 for (int k = 0; k < nextLettersSize; k++) {
77 if (mNextLettersFrequencies[k] > 0) {
78 LOGI("%c = %d,", k, mNextLettersFrequencies[k]);
79 }
80 }
81 LOGI("\n");
82 }
Amith Yamasanid0e43ec2009-10-14 16:10:32 -070083 return suggWords;
The Android Open Source Project923bf412009-03-13 15:11:42 -070084}
85
Amith Yamasani1b62ff12010-02-05 14:07:04 -080086void
87Dictionary::registerNextLetter(unsigned short c)
88{
89 if (c < mNextLettersSize) {
90 mNextLettersFrequencies[c]++;
91 }
92}
93
The Android Open Source Project923bf412009-03-13 15:11:42 -070094unsigned short
95Dictionary::getChar(int *pos)
96{
97 unsigned short ch = (unsigned short) (mDict[(*pos)++] & 0xFF);
98 // If the code is 255, then actual 16 bit code follows (in big endian)
99 if (ch == 0xFF) {
100 ch = ((mDict[*pos] & 0xFF) << 8) | (mDict[*pos + 1] & 0xFF);
101 (*pos) += 2;
102 }
103 return ch;
104}
105
106int
107Dictionary::getAddress(int *pos)
108{
109 int address = 0;
Amith Yamasanicc3e5c72009-03-31 10:51:17 -0700110 if ((mDict[*pos] & FLAG_ADDRESS_MASK) == 0) {
111 *pos += 1;
112 } else {
113 address += (mDict[*pos] & (ADDRESS_MASK >> 16)) << 16;
114 address += (mDict[*pos + 1] & 0xFF) << 8;
115 address += (mDict[*pos + 2] & 0xFF);
116 *pos += 3;
117 }
The Android Open Source Project923bf412009-03-13 15:11:42 -0700118 return address;
119}
120
121int
122Dictionary::wideStrLen(unsigned short *str)
123{
124 if (!str) return 0;
125 unsigned short *end = str;
126 while (*end)
127 end++;
128 return end - str;
129}
130
131bool
132Dictionary::addWord(unsigned short *word, int length, int frequency)
133{
134 word[length] = 0;
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700135 if (DEBUG_DICT) {
136 char s[length + 1];
137 for (int i = 0; i <= length; i++) s[i] = word[i];
138 LOGI("Found word = %s, freq = %d : \n", s, frequency);
139 }
The Android Open Source Project923bf412009-03-13 15:11:42 -0700140
141 // Find the right insertion point
142 int insertAt = 0;
143 while (insertAt < mMaxWords) {
144 if (frequency > mFrequencies[insertAt]
145 || (mFrequencies[insertAt] == frequency
146 && length < wideStrLen(mOutputChars + insertAt * mMaxWordLength))) {
147 break;
148 }
149 insertAt++;
150 }
151 if (insertAt < mMaxWords) {
152 memmove((char*) mFrequencies + (insertAt + 1) * sizeof(mFrequencies[0]),
153 (char*) mFrequencies + insertAt * sizeof(mFrequencies[0]),
154 (mMaxWords - insertAt - 1) * sizeof(mFrequencies[0]));
155 mFrequencies[insertAt] = frequency;
156 memmove((char*) mOutputChars + (insertAt + 1) * mMaxWordLength * sizeof(short),
157 (char*) mOutputChars + (insertAt ) * mMaxWordLength * sizeof(short),
158 (mMaxWords - insertAt - 1) * sizeof(short) * mMaxWordLength);
159 unsigned short *dest = mOutputChars + (insertAt ) * mMaxWordLength;
160 while (length--) {
161 *dest++ = *word++;
162 }
163 *dest = 0; // NULL terminate
The Android Open Source Project923bf412009-03-13 15:11:42 -0700164 if (DEBUG_DICT) LOGI("Added word at %d\n", insertAt);
165 return true;
166 }
167 return false;
168}
169
170unsigned short
Amith Yamasanif1150882009-08-07 14:04:24 -0700171Dictionary::toLowerCase(unsigned short c) {
The Android Open Source Project923bf412009-03-13 15:11:42 -0700172 if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) {
173 c = BASE_CHARS[c];
174 }
Amith Yamasanif1150882009-08-07 14:04:24 -0700175 if (c >='A' && c <= 'Z') {
176 c |= 32;
177 } else if (c > 127) {
Ken Wakasa707505e2010-04-21 02:35:47 +0900178 c = latin_tolower(c);
The Android Open Source Project923bf412009-03-13 15:11:42 -0700179 }
180 return c;
181}
182
183bool
184Dictionary::sameAsTyped(unsigned short *word, int length)
185{
186 if (length != mInputLength) {
187 return false;
188 }
189 int *inputCodes = mInputCodes;
190 while (length--) {
191 if ((unsigned int) *inputCodes != (unsigned int) *word) {
192 return false;
193 }
194 inputCodes += mMaxAlternatives;
195 word++;
196 }
197 return true;
198}
199
200static char QUOTE = '\'';
201
202void
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700203Dictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion, int snr, int inputIndex,
204 int diffs)
The Android Open Source Project923bf412009-03-13 15:11:42 -0700205{
206 // Optimization: Prune out words that are too long compared to how much was typed.
207 if (depth > maxDepth) {
208 return;
209 }
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700210 if (diffs > mMaxEditDistance) {
211 return;
212 }
The Android Open Source Project923bf412009-03-13 15:11:42 -0700213 int count = getCount(&pos);
214 int *currentChars = NULL;
215 if (mInputLength <= inputIndex) {
216 completion = true;
217 } else {
218 currentChars = mInputCodes + (inputIndex * mMaxAlternatives);
219 }
220
221 for (int i = 0; i < count; i++) {
222 unsigned short c = getChar(&pos);
Amith Yamasanif1150882009-08-07 14:04:24 -0700223 unsigned short lowerC = toLowerCase(c);
The Android Open Source Project923bf412009-03-13 15:11:42 -0700224 bool terminal = getTerminal(&pos);
225 int childrenAddress = getAddress(&pos);
Amith Yamasanicc3e5c72009-03-31 10:51:17 -0700226 int freq = 1;
227 if (terminal) freq = getFreq(&pos);
The Android Open Source Project923bf412009-03-13 15:11:42 -0700228 // If we are only doing completions, no need to look at the typed characters.
229 if (completion) {
230 mWord[depth] = c;
231 if (terminal) {
232 addWord(mWord, depth + 1, freq * snr);
Amith Yamasani1b62ff12010-02-05 14:07:04 -0800233 if (depth >= mInputLength && mSkipPos < 0) {
234 registerNextLetter(mWord[mInputLength]);
235 }
The Android Open Source Project923bf412009-03-13 15:11:42 -0700236 }
237 if (childrenAddress != 0) {
238 getWordsRec(childrenAddress, depth + 1, maxDepth,
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700239 completion, snr, inputIndex, diffs);
The Android Open Source Project923bf412009-03-13 15:11:42 -0700240 }
Amith Yamasanic3df2d62009-06-04 12:20:45 -0700241 } else if (c == QUOTE && currentChars[0] != QUOTE || mSkipPos == depth) {
242 // Skip the ' or other letter and continue deeper
243 mWord[depth] = c;
The Android Open Source Project923bf412009-03-13 15:11:42 -0700244 if (childrenAddress != 0) {
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700245 getWordsRec(childrenAddress, depth + 1, maxDepth, false, snr, inputIndex, diffs);
The Android Open Source Project923bf412009-03-13 15:11:42 -0700246 }
247 } else {
248 int j = 0;
249 while (currentChars[j] > 0) {
The Android Open Source Project923bf412009-03-13 15:11:42 -0700250 if (currentChars[j] == lowerC || currentChars[j] == c) {
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700251 int addedWeight = j == 0 ? mTypedLetterMultiplier : 1;
The Android Open Source Project923bf412009-03-13 15:11:42 -0700252 mWord[depth] = c;
253 if (mInputLength == inputIndex + 1) {
254 if (terminal) {
255 if (//INCLUDE_TYPED_WORD_IF_VALID ||
256 !sameAsTyped(mWord, depth + 1)) {
Amith Yamasanif51d16a2009-08-10 17:22:39 -0700257 int finalFreq = freq * snr * addedWeight;
258 if (mSkipPos < 0) finalFreq *= mFullWordMultiplier;
259 addWord(mWord, depth + 1, finalFreq);
The Android Open Source Project923bf412009-03-13 15:11:42 -0700260 }
261 }
262 if (childrenAddress != 0) {
263 getWordsRec(childrenAddress, depth + 1,
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700264 maxDepth, true, snr * addedWeight, inputIndex + 1,
265 diffs + (j > 0));
The Android Open Source Project923bf412009-03-13 15:11:42 -0700266 }
267 } else if (childrenAddress != 0) {
268 getWordsRec(childrenAddress, depth + 1, maxDepth,
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700269 false, snr * addedWeight, inputIndex + 1, diffs + (j > 0));
The Android Open Source Project923bf412009-03-13 15:11:42 -0700270 }
271 }
272 j++;
Amith Yamasanic3df2d62009-06-04 12:20:45 -0700273 if (mSkipPos >= 0) break;
The Android Open Source Project923bf412009-03-13 15:11:42 -0700274 }
275 }
276 }
277}
278
279bool
280Dictionary::isValidWord(unsigned short *word, int length)
281{
282 return isValidWordRec(0, word, 0, length);
283}
284
285bool
286Dictionary::isValidWordRec(int pos, unsigned short *word, int offset, int length) {
287 int count = getCount(&pos);
288 unsigned short currentChar = (unsigned short) word[offset];
289 for (int j = 0; j < count; j++) {
290 unsigned short c = getChar(&pos);
291 int terminal = getTerminal(&pos);
292 int childPos = getAddress(&pos);
293 if (c == currentChar) {
294 if (offset == length - 1) {
295 if (terminal) {
296 return true;
297 }
298 } else {
299 if (childPos != 0) {
300 if (isValidWordRec(childPos, word, offset + 1, length)) {
301 return true;
302 }
303 }
304 }
305 }
Amith Yamasanicc3e5c72009-03-31 10:51:17 -0700306 if (terminal) {
307 getFreq(&pos);
308 }
The Android Open Source Project923bf412009-03-13 15:11:42 -0700309 // There could be two instances of each alphabet - upper and lower case. So continue
310 // looking ...
311 }
312 return false;
313}
314
315
316} // namespace latinime