blob: 02843abc23b98e3a8a6f8b2503480fbfce5c7e49 [file] [log] [blame]
The Android Open Source Project923bf412009-03-13 15:11:42 -07001/*
2**
3** Copyright 2009, The Android Open Source Project
4**
5** Licensed under the Apache License, Version 2.0 (the "License");
6** you may not use this file except in compliance with the License.
7** You may obtain a copy of the License at
8**
9** http://www.apache.org/licenses/LICENSE-2.0
10**
11** Unless required by applicable law or agreed to in writing, software
12** distributed under the License is distributed on an "AS IS" BASIS,
13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14** See the License for the specific language governing permissions and
15** limitations under the License.
16*/
17
18#include <stdio.h>
19#include <fcntl.h>
20#include <sys/mman.h>
21#include <string.h>
22#include <cutils/log.h>
23
24#include <unicode/uchar.h>
25
26//#define USE_ASSET_MANAGER
27
28#ifdef USE_ASSET_MANAGER
29#include <utils/AssetManager.h>
30#include <utils/Asset.h>
31#endif
32
33#include "dictionary.h"
34#include "basechars.h"
35
36#define DEBUG_DICT 0
37
38namespace latinime {
39
40Dictionary::Dictionary(void *dict, int typedLetterMultiplier, int fullWordMultiplier)
41{
42 mDict = (unsigned char*) dict;
43 mTypedLetterMultiplier = typedLetterMultiplier;
44 mFullWordMultiplier = fullWordMultiplier;
45}
46
47Dictionary::~Dictionary()
48{
49}
50
51int Dictionary::getSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies,
Amith Yamasanic3df2d62009-06-04 12:20:45 -070052 int maxWordLength, int maxWords, int maxAlternatives, int skipPos)
The Android Open Source Project923bf412009-03-13 15:11:42 -070053{
The Android Open Source Project923bf412009-03-13 15:11:42 -070054 mFrequencies = frequencies;
55 mOutputChars = outWords;
56 mInputCodes = codes;
57 mInputLength = codesSize;
58 mMaxAlternatives = maxAlternatives;
59 mMaxWordLength = maxWordLength;
60 mMaxWords = maxWords;
61 mWords = 0;
Amith Yamasanic3df2d62009-06-04 12:20:45 -070062 mSkipPos = skipPos;
Amith Yamasani322dc3d2009-07-15 18:30:47 -070063 mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2;
The Android Open Source Project923bf412009-03-13 15:11:42 -070064
Amith Yamasani322dc3d2009-07-15 18:30:47 -070065 getWordsRec(0, 0, mInputLength * 3, false, 1, 0, 0);
The Android Open Source Project923bf412009-03-13 15:11:42 -070066
67 if (DEBUG_DICT) LOGI("Returning %d words", mWords);
68 return mWords;
69}
70
71unsigned short
72Dictionary::getChar(int *pos)
73{
74 unsigned short ch = (unsigned short) (mDict[(*pos)++] & 0xFF);
75 // If the code is 255, then actual 16 bit code follows (in big endian)
76 if (ch == 0xFF) {
77 ch = ((mDict[*pos] & 0xFF) << 8) | (mDict[*pos + 1] & 0xFF);
78 (*pos) += 2;
79 }
80 return ch;
81}
82
83int
84Dictionary::getAddress(int *pos)
85{
86 int address = 0;
Amith Yamasanicc3e5c72009-03-31 10:51:17 -070087 if ((mDict[*pos] & FLAG_ADDRESS_MASK) == 0) {
88 *pos += 1;
89 } else {
90 address += (mDict[*pos] & (ADDRESS_MASK >> 16)) << 16;
91 address += (mDict[*pos + 1] & 0xFF) << 8;
92 address += (mDict[*pos + 2] & 0xFF);
93 *pos += 3;
94 }
The Android Open Source Project923bf412009-03-13 15:11:42 -070095 return address;
96}
97
98int
99Dictionary::wideStrLen(unsigned short *str)
100{
101 if (!str) return 0;
102 unsigned short *end = str;
103 while (*end)
104 end++;
105 return end - str;
106}
107
108bool
109Dictionary::addWord(unsigned short *word, int length, int frequency)
110{
111 word[length] = 0;
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700112 if (DEBUG_DICT) {
113 char s[length + 1];
114 for (int i = 0; i <= length; i++) s[i] = word[i];
115 LOGI("Found word = %s, freq = %d : \n", s, frequency);
116 }
The Android Open Source Project923bf412009-03-13 15:11:42 -0700117
118 // Find the right insertion point
119 int insertAt = 0;
120 while (insertAt < mMaxWords) {
121 if (frequency > mFrequencies[insertAt]
122 || (mFrequencies[insertAt] == frequency
123 && length < wideStrLen(mOutputChars + insertAt * mMaxWordLength))) {
124 break;
125 }
126 insertAt++;
127 }
128 if (insertAt < mMaxWords) {
129 memmove((char*) mFrequencies + (insertAt + 1) * sizeof(mFrequencies[0]),
130 (char*) mFrequencies + insertAt * sizeof(mFrequencies[0]),
131 (mMaxWords - insertAt - 1) * sizeof(mFrequencies[0]));
132 mFrequencies[insertAt] = frequency;
133 memmove((char*) mOutputChars + (insertAt + 1) * mMaxWordLength * sizeof(short),
134 (char*) mOutputChars + (insertAt ) * mMaxWordLength * sizeof(short),
135 (mMaxWords - insertAt - 1) * sizeof(short) * mMaxWordLength);
136 unsigned short *dest = mOutputChars + (insertAt ) * mMaxWordLength;
137 while (length--) {
138 *dest++ = *word++;
139 }
140 *dest = 0; // NULL terminate
141 // Update the word count
142 if (insertAt + 1 > mWords) mWords = insertAt + 1;
143 if (DEBUG_DICT) LOGI("Added word at %d\n", insertAt);
144 return true;
145 }
146 return false;
147}
148
149unsigned short
150Dictionary::toLowerCase(unsigned short c, const int depth) {
151 if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) {
152 c = BASE_CHARS[c];
153 }
154 if (depth == 0) {
155 if (c >='A' && c <= 'Z') {
156 c |= 32;
157 } else if (c > 127) {
158 c = u_tolower(c);
159 }
160 }
161 return c;
162}
163
164bool
165Dictionary::sameAsTyped(unsigned short *word, int length)
166{
167 if (length != mInputLength) {
168 return false;
169 }
170 int *inputCodes = mInputCodes;
171 while (length--) {
172 if ((unsigned int) *inputCodes != (unsigned int) *word) {
173 return false;
174 }
175 inputCodes += mMaxAlternatives;
176 word++;
177 }
178 return true;
179}
180
181static char QUOTE = '\'';
182
183void
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700184Dictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion, int snr, int inputIndex,
185 int diffs)
The Android Open Source Project923bf412009-03-13 15:11:42 -0700186{
187 // Optimization: Prune out words that are too long compared to how much was typed.
188 if (depth > maxDepth) {
189 return;
190 }
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700191 if (diffs > mMaxEditDistance) {
192 return;
193 }
The Android Open Source Project923bf412009-03-13 15:11:42 -0700194 int count = getCount(&pos);
195 int *currentChars = NULL;
196 if (mInputLength <= inputIndex) {
197 completion = true;
198 } else {
199 currentChars = mInputCodes + (inputIndex * mMaxAlternatives);
200 }
201
202 for (int i = 0; i < count; i++) {
203 unsigned short c = getChar(&pos);
204 unsigned short lowerC = toLowerCase(c, depth);
205 bool terminal = getTerminal(&pos);
206 int childrenAddress = getAddress(&pos);
Amith Yamasanicc3e5c72009-03-31 10:51:17 -0700207 int freq = 1;
208 if (terminal) freq = getFreq(&pos);
The Android Open Source Project923bf412009-03-13 15:11:42 -0700209 // If we are only doing completions, no need to look at the typed characters.
210 if (completion) {
211 mWord[depth] = c;
212 if (terminal) {
213 addWord(mWord, depth + 1, freq * snr);
214 }
215 if (childrenAddress != 0) {
216 getWordsRec(childrenAddress, depth + 1, maxDepth,
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700217 completion, snr, inputIndex, diffs);
The Android Open Source Project923bf412009-03-13 15:11:42 -0700218 }
Amith Yamasanic3df2d62009-06-04 12:20:45 -0700219 } else if (c == QUOTE && currentChars[0] != QUOTE || mSkipPos == depth) {
220 // Skip the ' or other letter and continue deeper
221 mWord[depth] = c;
The Android Open Source Project923bf412009-03-13 15:11:42 -0700222 if (childrenAddress != 0) {
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700223 getWordsRec(childrenAddress, depth + 1, maxDepth, false, snr, inputIndex, diffs);
The Android Open Source Project923bf412009-03-13 15:11:42 -0700224 }
225 } else {
226 int j = 0;
227 while (currentChars[j] > 0) {
The Android Open Source Project923bf412009-03-13 15:11:42 -0700228 if (currentChars[j] == lowerC || currentChars[j] == c) {
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700229 int addedWeight = j == 0 ? mTypedLetterMultiplier : 1;
The Android Open Source Project923bf412009-03-13 15:11:42 -0700230 mWord[depth] = c;
231 if (mInputLength == inputIndex + 1) {
232 if (terminal) {
233 if (//INCLUDE_TYPED_WORD_IF_VALID ||
234 !sameAsTyped(mWord, depth + 1)) {
235 addWord(mWord, depth + 1,
236 (freq * snr * addedWeight * mFullWordMultiplier));
237 }
238 }
239 if (childrenAddress != 0) {
240 getWordsRec(childrenAddress, depth + 1,
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700241 maxDepth, true, snr * addedWeight, inputIndex + 1,
242 diffs + (j > 0));
The Android Open Source Project923bf412009-03-13 15:11:42 -0700243 }
244 } else if (childrenAddress != 0) {
245 getWordsRec(childrenAddress, depth + 1, maxDepth,
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700246 false, snr * addedWeight, inputIndex + 1, diffs + (j > 0));
The Android Open Source Project923bf412009-03-13 15:11:42 -0700247 }
248 }
249 j++;
Amith Yamasanic3df2d62009-06-04 12:20:45 -0700250 if (mSkipPos >= 0) break;
The Android Open Source Project923bf412009-03-13 15:11:42 -0700251 }
252 }
253 }
254}
255
256bool
257Dictionary::isValidWord(unsigned short *word, int length)
258{
259 return isValidWordRec(0, word, 0, length);
260}
261
262bool
263Dictionary::isValidWordRec(int pos, unsigned short *word, int offset, int length) {
264 int count = getCount(&pos);
265 unsigned short currentChar = (unsigned short) word[offset];
266 for (int j = 0; j < count; j++) {
267 unsigned short c = getChar(&pos);
268 int terminal = getTerminal(&pos);
269 int childPos = getAddress(&pos);
270 if (c == currentChar) {
271 if (offset == length - 1) {
272 if (terminal) {
273 return true;
274 }
275 } else {
276 if (childPos != 0) {
277 if (isValidWordRec(childPos, word, offset + 1, length)) {
278 return true;
279 }
280 }
281 }
282 }
Amith Yamasanicc3e5c72009-03-31 10:51:17 -0700283 if (terminal) {
284 getFreq(&pos);
285 }
The Android Open Source Project923bf412009-03-13 15:11:42 -0700286 // There could be two instances of each alphabet - upper and lower case. So continue
287 // looking ...
288 }
289 return false;
290}
291
292
293} // namespace latinime