blob: 306aff52702070538bd7358fc97e72f6969b1b0a [file] [log] [blame]
The Android Open Source Project923bf412009-03-13 15:11:42 -07001/*
2**
3** Copyright 2009, The Android Open Source Project
4**
5** Licensed under the Apache License, Version 2.0 (the "License");
6** you may not use this file except in compliance with the License.
7** You may obtain a copy of the License at
8**
9** http://www.apache.org/licenses/LICENSE-2.0
10**
11** Unless required by applicable law or agreed to in writing, software
12** distributed under the License is distributed on an "AS IS" BASIS,
13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14** See the License for the specific language governing permissions and
15** limitations under the License.
16*/
17
18#include <stdio.h>
19#include <fcntl.h>
20#include <sys/mman.h>
21#include <string.h>
22#include <cutils/log.h>
23
24#include <unicode/uchar.h>
25
26//#define USE_ASSET_MANAGER
27
28#ifdef USE_ASSET_MANAGER
29#include <utils/AssetManager.h>
30#include <utils/Asset.h>
31#endif
32
33#include "dictionary.h"
34#include "basechars.h"
35
36#define DEBUG_DICT 0
37
38namespace latinime {
39
40Dictionary::Dictionary(void *dict, int typedLetterMultiplier, int fullWordMultiplier)
41{
42 mDict = (unsigned char*) dict;
43 mTypedLetterMultiplier = typedLetterMultiplier;
44 mFullWordMultiplier = fullWordMultiplier;
45}
46
47Dictionary::~Dictionary()
48{
49}
50
51int Dictionary::getSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies,
Amith Yamasanic3df2d62009-06-04 12:20:45 -070052 int maxWordLength, int maxWords, int maxAlternatives, int skipPos)
The Android Open Source Project923bf412009-03-13 15:11:42 -070053{
Amith Yamasanid0e43ec2009-10-14 16:10:32 -070054 int suggWords;
The Android Open Source Project923bf412009-03-13 15:11:42 -070055 mFrequencies = frequencies;
56 mOutputChars = outWords;
57 mInputCodes = codes;
58 mInputLength = codesSize;
59 mMaxAlternatives = maxAlternatives;
60 mMaxWordLength = maxWordLength;
61 mMaxWords = maxWords;
Amith Yamasanic3df2d62009-06-04 12:20:45 -070062 mSkipPos = skipPos;
Amith Yamasani322dc3d2009-07-15 18:30:47 -070063 mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2;
The Android Open Source Project923bf412009-03-13 15:11:42 -070064
Amith Yamasani322dc3d2009-07-15 18:30:47 -070065 getWordsRec(0, 0, mInputLength * 3, false, 1, 0, 0);
The Android Open Source Project923bf412009-03-13 15:11:42 -070066
Amith Yamasanid0e43ec2009-10-14 16:10:32 -070067 // Get the word count
68 suggWords = 0;
69 while (suggWords < mMaxWords && mFrequencies[suggWords] > 0) suggWords++;
70 if (DEBUG_DICT) LOGI("Returning %d words", suggWords);
71 return suggWords;
The Android Open Source Project923bf412009-03-13 15:11:42 -070072}
73
74unsigned short
75Dictionary::getChar(int *pos)
76{
77 unsigned short ch = (unsigned short) (mDict[(*pos)++] & 0xFF);
78 // If the code is 255, then actual 16 bit code follows (in big endian)
79 if (ch == 0xFF) {
80 ch = ((mDict[*pos] & 0xFF) << 8) | (mDict[*pos + 1] & 0xFF);
81 (*pos) += 2;
82 }
83 return ch;
84}
85
86int
87Dictionary::getAddress(int *pos)
88{
89 int address = 0;
Amith Yamasanicc3e5c72009-03-31 10:51:17 -070090 if ((mDict[*pos] & FLAG_ADDRESS_MASK) == 0) {
91 *pos += 1;
92 } else {
93 address += (mDict[*pos] & (ADDRESS_MASK >> 16)) << 16;
94 address += (mDict[*pos + 1] & 0xFF) << 8;
95 address += (mDict[*pos + 2] & 0xFF);
96 *pos += 3;
97 }
The Android Open Source Project923bf412009-03-13 15:11:42 -070098 return address;
99}
100
101int
102Dictionary::wideStrLen(unsigned short *str)
103{
104 if (!str) return 0;
105 unsigned short *end = str;
106 while (*end)
107 end++;
108 return end - str;
109}
110
111bool
112Dictionary::addWord(unsigned short *word, int length, int frequency)
113{
114 word[length] = 0;
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700115 if (DEBUG_DICT) {
116 char s[length + 1];
117 for (int i = 0; i <= length; i++) s[i] = word[i];
118 LOGI("Found word = %s, freq = %d : \n", s, frequency);
119 }
The Android Open Source Project923bf412009-03-13 15:11:42 -0700120
121 // Find the right insertion point
122 int insertAt = 0;
123 while (insertAt < mMaxWords) {
124 if (frequency > mFrequencies[insertAt]
125 || (mFrequencies[insertAt] == frequency
126 && length < wideStrLen(mOutputChars + insertAt * mMaxWordLength))) {
127 break;
128 }
129 insertAt++;
130 }
131 if (insertAt < mMaxWords) {
132 memmove((char*) mFrequencies + (insertAt + 1) * sizeof(mFrequencies[0]),
133 (char*) mFrequencies + insertAt * sizeof(mFrequencies[0]),
134 (mMaxWords - insertAt - 1) * sizeof(mFrequencies[0]));
135 mFrequencies[insertAt] = frequency;
136 memmove((char*) mOutputChars + (insertAt + 1) * mMaxWordLength * sizeof(short),
137 (char*) mOutputChars + (insertAt ) * mMaxWordLength * sizeof(short),
138 (mMaxWords - insertAt - 1) * sizeof(short) * mMaxWordLength);
139 unsigned short *dest = mOutputChars + (insertAt ) * mMaxWordLength;
140 while (length--) {
141 *dest++ = *word++;
142 }
143 *dest = 0; // NULL terminate
The Android Open Source Project923bf412009-03-13 15:11:42 -0700144 if (DEBUG_DICT) LOGI("Added word at %d\n", insertAt);
145 return true;
146 }
147 return false;
148}
149
150unsigned short
Amith Yamasanif1150882009-08-07 14:04:24 -0700151Dictionary::toLowerCase(unsigned short c) {
The Android Open Source Project923bf412009-03-13 15:11:42 -0700152 if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) {
153 c = BASE_CHARS[c];
154 }
Amith Yamasanif1150882009-08-07 14:04:24 -0700155 if (c >='A' && c <= 'Z') {
156 c |= 32;
157 } else if (c > 127) {
158 c = u_tolower(c);
The Android Open Source Project923bf412009-03-13 15:11:42 -0700159 }
160 return c;
161}
162
163bool
164Dictionary::sameAsTyped(unsigned short *word, int length)
165{
166 if (length != mInputLength) {
167 return false;
168 }
169 int *inputCodes = mInputCodes;
170 while (length--) {
171 if ((unsigned int) *inputCodes != (unsigned int) *word) {
172 return false;
173 }
174 inputCodes += mMaxAlternatives;
175 word++;
176 }
177 return true;
178}
179
180static char QUOTE = '\'';
181
182void
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700183Dictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion, int snr, int inputIndex,
184 int diffs)
The Android Open Source Project923bf412009-03-13 15:11:42 -0700185{
186 // Optimization: Prune out words that are too long compared to how much was typed.
187 if (depth > maxDepth) {
188 return;
189 }
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700190 if (diffs > mMaxEditDistance) {
191 return;
192 }
The Android Open Source Project923bf412009-03-13 15:11:42 -0700193 int count = getCount(&pos);
194 int *currentChars = NULL;
195 if (mInputLength <= inputIndex) {
196 completion = true;
197 } else {
198 currentChars = mInputCodes + (inputIndex * mMaxAlternatives);
199 }
200
201 for (int i = 0; i < count; i++) {
202 unsigned short c = getChar(&pos);
Amith Yamasanif1150882009-08-07 14:04:24 -0700203 unsigned short lowerC = toLowerCase(c);
The Android Open Source Project923bf412009-03-13 15:11:42 -0700204 bool terminal = getTerminal(&pos);
205 int childrenAddress = getAddress(&pos);
Amith Yamasanicc3e5c72009-03-31 10:51:17 -0700206 int freq = 1;
207 if (terminal) freq = getFreq(&pos);
The Android Open Source Project923bf412009-03-13 15:11:42 -0700208 // If we are only doing completions, no need to look at the typed characters.
209 if (completion) {
210 mWord[depth] = c;
211 if (terminal) {
212 addWord(mWord, depth + 1, freq * snr);
213 }
214 if (childrenAddress != 0) {
215 getWordsRec(childrenAddress, depth + 1, maxDepth,
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700216 completion, snr, inputIndex, diffs);
The Android Open Source Project923bf412009-03-13 15:11:42 -0700217 }
Amith Yamasanic3df2d62009-06-04 12:20:45 -0700218 } else if (c == QUOTE && currentChars[0] != QUOTE || mSkipPos == depth) {
219 // Skip the ' or other letter and continue deeper
220 mWord[depth] = c;
The Android Open Source Project923bf412009-03-13 15:11:42 -0700221 if (childrenAddress != 0) {
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700222 getWordsRec(childrenAddress, depth + 1, maxDepth, false, snr, inputIndex, diffs);
The Android Open Source Project923bf412009-03-13 15:11:42 -0700223 }
224 } else {
225 int j = 0;
226 while (currentChars[j] > 0) {
The Android Open Source Project923bf412009-03-13 15:11:42 -0700227 if (currentChars[j] == lowerC || currentChars[j] == c) {
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700228 int addedWeight = j == 0 ? mTypedLetterMultiplier : 1;
The Android Open Source Project923bf412009-03-13 15:11:42 -0700229 mWord[depth] = c;
230 if (mInputLength == inputIndex + 1) {
231 if (terminal) {
232 if (//INCLUDE_TYPED_WORD_IF_VALID ||
233 !sameAsTyped(mWord, depth + 1)) {
Amith Yamasanif51d16a2009-08-10 17:22:39 -0700234 int finalFreq = freq * snr * addedWeight;
235 if (mSkipPos < 0) finalFreq *= mFullWordMultiplier;
236 addWord(mWord, depth + 1, finalFreq);
The Android Open Source Project923bf412009-03-13 15:11:42 -0700237 }
238 }
239 if (childrenAddress != 0) {
240 getWordsRec(childrenAddress, depth + 1,
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700241 maxDepth, true, snr * addedWeight, inputIndex + 1,
242 diffs + (j > 0));
The Android Open Source Project923bf412009-03-13 15:11:42 -0700243 }
244 } else if (childrenAddress != 0) {
245 getWordsRec(childrenAddress, depth + 1, maxDepth,
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700246 false, snr * addedWeight, inputIndex + 1, diffs + (j > 0));
The Android Open Source Project923bf412009-03-13 15:11:42 -0700247 }
248 }
249 j++;
Amith Yamasanic3df2d62009-06-04 12:20:45 -0700250 if (mSkipPos >= 0) break;
The Android Open Source Project923bf412009-03-13 15:11:42 -0700251 }
252 }
253 }
254}
255
256bool
257Dictionary::isValidWord(unsigned short *word, int length)
258{
259 return isValidWordRec(0, word, 0, length);
260}
261
262bool
263Dictionary::isValidWordRec(int pos, unsigned short *word, int offset, int length) {
264 int count = getCount(&pos);
265 unsigned short currentChar = (unsigned short) word[offset];
266 for (int j = 0; j < count; j++) {
267 unsigned short c = getChar(&pos);
268 int terminal = getTerminal(&pos);
269 int childPos = getAddress(&pos);
270 if (c == currentChar) {
271 if (offset == length - 1) {
272 if (terminal) {
273 return true;
274 }
275 } else {
276 if (childPos != 0) {
277 if (isValidWordRec(childPos, word, offset + 1, length)) {
278 return true;
279 }
280 }
281 }
282 }
Amith Yamasanicc3e5c72009-03-31 10:51:17 -0700283 if (terminal) {
284 getFreq(&pos);
285 }
The Android Open Source Project923bf412009-03-13 15:11:42 -0700286 // There could be two instances of each alphabet - upper and lower case. So continue
287 // looking ...
288 }
289 return false;
290}
291
292
293} // namespace latinime