blob: 6fe7f4a18780f413598b0c6bc353474ff0a55b0e [file] [log] [blame]
The Android Open Source Project923bf412009-03-13 15:11:42 -07001/*
2**
3** Copyright 2009, The Android Open Source Project
4**
5** Licensed under the Apache License, Version 2.0 (the "License");
6** you may not use this file except in compliance with the License.
7** You may obtain a copy of the License at
8**
9** http://www.apache.org/licenses/LICENSE-2.0
10**
11** Unless required by applicable law or agreed to in writing, software
12** distributed under the License is distributed on an "AS IS" BASIS,
13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14** See the License for the specific language governing permissions and
15** limitations under the License.
16*/
17
18#include <stdio.h>
19#include <fcntl.h>
20#include <sys/mman.h>
21#include <string.h>
22#include <cutils/log.h>
23
24#include <unicode/uchar.h>
25
26//#define USE_ASSET_MANAGER
27
28#ifdef USE_ASSET_MANAGER
29#include <utils/AssetManager.h>
30#include <utils/Asset.h>
31#endif
32
33#include "dictionary.h"
34#include "basechars.h"
35
36#define DEBUG_DICT 0
37
38namespace latinime {
39
40Dictionary::Dictionary(void *dict, int typedLetterMultiplier, int fullWordMultiplier)
41{
42 mDict = (unsigned char*) dict;
43 mTypedLetterMultiplier = typedLetterMultiplier;
44 mFullWordMultiplier = fullWordMultiplier;
45}
46
47Dictionary::~Dictionary()
48{
49}
50
51int Dictionary::getSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies,
Amith Yamasanic3df2d62009-06-04 12:20:45 -070052 int maxWordLength, int maxWords, int maxAlternatives, int skipPos)
The Android Open Source Project923bf412009-03-13 15:11:42 -070053{
The Android Open Source Project923bf412009-03-13 15:11:42 -070054 mFrequencies = frequencies;
55 mOutputChars = outWords;
56 mInputCodes = codes;
57 mInputLength = codesSize;
58 mMaxAlternatives = maxAlternatives;
59 mMaxWordLength = maxWordLength;
60 mMaxWords = maxWords;
61 mWords = 0;
Amith Yamasanic3df2d62009-06-04 12:20:45 -070062 mSkipPos = skipPos;
The Android Open Source Project923bf412009-03-13 15:11:42 -070063
64 getWordsRec(0, 0, mInputLength * 3, false, 1, 0);
65
66 if (DEBUG_DICT) LOGI("Returning %d words", mWords);
67 return mWords;
68}
69
70unsigned short
71Dictionary::getChar(int *pos)
72{
73 unsigned short ch = (unsigned short) (mDict[(*pos)++] & 0xFF);
74 // If the code is 255, then actual 16 bit code follows (in big endian)
75 if (ch == 0xFF) {
76 ch = ((mDict[*pos] & 0xFF) << 8) | (mDict[*pos + 1] & 0xFF);
77 (*pos) += 2;
78 }
79 return ch;
80}
81
82int
83Dictionary::getAddress(int *pos)
84{
85 int address = 0;
Amith Yamasanicc3e5c72009-03-31 10:51:17 -070086 if ((mDict[*pos] & FLAG_ADDRESS_MASK) == 0) {
87 *pos += 1;
88 } else {
89 address += (mDict[*pos] & (ADDRESS_MASK >> 16)) << 16;
90 address += (mDict[*pos + 1] & 0xFF) << 8;
91 address += (mDict[*pos + 2] & 0xFF);
92 *pos += 3;
93 }
The Android Open Source Project923bf412009-03-13 15:11:42 -070094 return address;
95}
96
97int
98Dictionary::wideStrLen(unsigned short *str)
99{
100 if (!str) return 0;
101 unsigned short *end = str;
102 while (*end)
103 end++;
104 return end - str;
105}
106
107bool
108Dictionary::addWord(unsigned short *word, int length, int frequency)
109{
110 word[length] = 0;
111 if (DEBUG_DICT) LOGI("Found word = %s, freq = %d : \n", word, frequency);
112
113 // Find the right insertion point
114 int insertAt = 0;
115 while (insertAt < mMaxWords) {
116 if (frequency > mFrequencies[insertAt]
117 || (mFrequencies[insertAt] == frequency
118 && length < wideStrLen(mOutputChars + insertAt * mMaxWordLength))) {
119 break;
120 }
121 insertAt++;
122 }
123 if (insertAt < mMaxWords) {
124 memmove((char*) mFrequencies + (insertAt + 1) * sizeof(mFrequencies[0]),
125 (char*) mFrequencies + insertAt * sizeof(mFrequencies[0]),
126 (mMaxWords - insertAt - 1) * sizeof(mFrequencies[0]));
127 mFrequencies[insertAt] = frequency;
128 memmove((char*) mOutputChars + (insertAt + 1) * mMaxWordLength * sizeof(short),
129 (char*) mOutputChars + (insertAt ) * mMaxWordLength * sizeof(short),
130 (mMaxWords - insertAt - 1) * sizeof(short) * mMaxWordLength);
131 unsigned short *dest = mOutputChars + (insertAt ) * mMaxWordLength;
132 while (length--) {
133 *dest++ = *word++;
134 }
135 *dest = 0; // NULL terminate
136 // Update the word count
137 if (insertAt + 1 > mWords) mWords = insertAt + 1;
138 if (DEBUG_DICT) LOGI("Added word at %d\n", insertAt);
139 return true;
140 }
141 return false;
142}
143
144unsigned short
145Dictionary::toLowerCase(unsigned short c, const int depth) {
146 if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) {
147 c = BASE_CHARS[c];
148 }
149 if (depth == 0) {
150 if (c >='A' && c <= 'Z') {
151 c |= 32;
152 } else if (c > 127) {
153 c = u_tolower(c);
154 }
155 }
156 return c;
157}
158
159bool
160Dictionary::sameAsTyped(unsigned short *word, int length)
161{
162 if (length != mInputLength) {
163 return false;
164 }
165 int *inputCodes = mInputCodes;
166 while (length--) {
167 if ((unsigned int) *inputCodes != (unsigned int) *word) {
168 return false;
169 }
170 inputCodes += mMaxAlternatives;
171 word++;
172 }
173 return true;
174}
175
176static char QUOTE = '\'';
177
178void
179Dictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion, int snr, int inputIndex)
180{
181 // Optimization: Prune out words that are too long compared to how much was typed.
182 if (depth > maxDepth) {
183 return;
184 }
185 int count = getCount(&pos);
186 int *currentChars = NULL;
187 if (mInputLength <= inputIndex) {
188 completion = true;
189 } else {
190 currentChars = mInputCodes + (inputIndex * mMaxAlternatives);
191 }
192
193 for (int i = 0; i < count; i++) {
194 unsigned short c = getChar(&pos);
195 unsigned short lowerC = toLowerCase(c, depth);
196 bool terminal = getTerminal(&pos);
197 int childrenAddress = getAddress(&pos);
Amith Yamasanicc3e5c72009-03-31 10:51:17 -0700198 int freq = 1;
199 if (terminal) freq = getFreq(&pos);
The Android Open Source Project923bf412009-03-13 15:11:42 -0700200 // If we are only doing completions, no need to look at the typed characters.
201 if (completion) {
202 mWord[depth] = c;
203 if (terminal) {
204 addWord(mWord, depth + 1, freq * snr);
205 }
206 if (childrenAddress != 0) {
207 getWordsRec(childrenAddress, depth + 1, maxDepth,
208 completion, snr, inputIndex);
209 }
Amith Yamasanic3df2d62009-06-04 12:20:45 -0700210 } else if (c == QUOTE && currentChars[0] != QUOTE || mSkipPos == depth) {
211 // Skip the ' or other letter and continue deeper
212 mWord[depth] = c;
The Android Open Source Project923bf412009-03-13 15:11:42 -0700213 if (childrenAddress != 0) {
214 getWordsRec(childrenAddress, depth + 1, maxDepth, false, snr, inputIndex);
215 }
216 } else {
217 int j = 0;
218 while (currentChars[j] > 0) {
219 int addedWeight = j == 0 ? mTypedLetterMultiplier : 1;
220 if (currentChars[j] == lowerC || currentChars[j] == c) {
221 mWord[depth] = c;
222 if (mInputLength == inputIndex + 1) {
223 if (terminal) {
224 if (//INCLUDE_TYPED_WORD_IF_VALID ||
225 !sameAsTyped(mWord, depth + 1)) {
226 addWord(mWord, depth + 1,
227 (freq * snr * addedWeight * mFullWordMultiplier));
228 }
229 }
230 if (childrenAddress != 0) {
231 getWordsRec(childrenAddress, depth + 1,
232 maxDepth, true, snr * addedWeight, inputIndex + 1);
233 }
234 } else if (childrenAddress != 0) {
235 getWordsRec(childrenAddress, depth + 1, maxDepth,
236 false, snr * addedWeight, inputIndex + 1);
237 }
238 }
239 j++;
Amith Yamasanic3df2d62009-06-04 12:20:45 -0700240 if (mSkipPos >= 0) break;
The Android Open Source Project923bf412009-03-13 15:11:42 -0700241 }
242 }
243 }
244}
245
246bool
247Dictionary::isValidWord(unsigned short *word, int length)
248{
249 return isValidWordRec(0, word, 0, length);
250}
251
252bool
253Dictionary::isValidWordRec(int pos, unsigned short *word, int offset, int length) {
254 int count = getCount(&pos);
255 unsigned short currentChar = (unsigned short) word[offset];
256 for (int j = 0; j < count; j++) {
257 unsigned short c = getChar(&pos);
258 int terminal = getTerminal(&pos);
259 int childPos = getAddress(&pos);
260 if (c == currentChar) {
261 if (offset == length - 1) {
262 if (terminal) {
263 return true;
264 }
265 } else {
266 if (childPos != 0) {
267 if (isValidWordRec(childPos, word, offset + 1, length)) {
268 return true;
269 }
270 }
271 }
272 }
Amith Yamasanicc3e5c72009-03-31 10:51:17 -0700273 if (terminal) {
274 getFreq(&pos);
275 }
The Android Open Source Project923bf412009-03-13 15:11:42 -0700276 // There could be two instances of each alphabet - upper and lower case. So continue
277 // looking ...
278 }
279 return false;
280}
281
282
283} // namespace latinime