blob: b37f4c9265de9be74cd7472b58f6f994b4659b4a [file] [log] [blame]
The Android Open Source Project923bf412009-03-13 15:11:42 -07001/*
2**
3** Copyright 2009, The Android Open Source Project
4**
5** Licensed under the Apache License, Version 2.0 (the "License");
6** you may not use this file except in compliance with the License.
7** You may obtain a copy of the License at
8**
9** http://www.apache.org/licenses/LICENSE-2.0
10**
11** Unless required by applicable law or agreed to in writing, software
12** distributed under the License is distributed on an "AS IS" BASIS,
13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14** See the License for the specific language governing permissions and
15** limitations under the License.
16*/
17
18#include <stdio.h>
19#include <fcntl.h>
20#include <sys/mman.h>
21#include <string.h>
22#include <cutils/log.h>
23
24#include <unicode/uchar.h>
25
26//#define USE_ASSET_MANAGER
27
28#ifdef USE_ASSET_MANAGER
29#include <utils/AssetManager.h>
30#include <utils/Asset.h>
31#endif
32
33#include "dictionary.h"
34#include "basechars.h"
35
36#define DEBUG_DICT 0
37
38namespace latinime {
39
40Dictionary::Dictionary(void *dict, int typedLetterMultiplier, int fullWordMultiplier)
41{
42 mDict = (unsigned char*) dict;
43 mTypedLetterMultiplier = typedLetterMultiplier;
44 mFullWordMultiplier = fullWordMultiplier;
45}
46
47Dictionary::~Dictionary()
48{
49}
50
51int Dictionary::getSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies,
52 int maxWordLength, int maxWords, int maxAlternatives)
53{
54 memset(frequencies, 0, maxWords * sizeof(*frequencies));
55 memset(outWords, 0, maxWords * maxWordLength * sizeof(*outWords));
56
57 mFrequencies = frequencies;
58 mOutputChars = outWords;
59 mInputCodes = codes;
60 mInputLength = codesSize;
61 mMaxAlternatives = maxAlternatives;
62 mMaxWordLength = maxWordLength;
63 mMaxWords = maxWords;
64 mWords = 0;
65
66 getWordsRec(0, 0, mInputLength * 3, false, 1, 0);
67
68 if (DEBUG_DICT) LOGI("Returning %d words", mWords);
69 return mWords;
70}
71
72unsigned short
73Dictionary::getChar(int *pos)
74{
75 unsigned short ch = (unsigned short) (mDict[(*pos)++] & 0xFF);
76 // If the code is 255, then actual 16 bit code follows (in big endian)
77 if (ch == 0xFF) {
78 ch = ((mDict[*pos] & 0xFF) << 8) | (mDict[*pos + 1] & 0xFF);
79 (*pos) += 2;
80 }
81 return ch;
82}
83
84int
85Dictionary::getAddress(int *pos)
86{
87 int address = 0;
Amith Yamasanicc3e5c72009-03-31 10:51:17 -070088 if ((mDict[*pos] & FLAG_ADDRESS_MASK) == 0) {
89 *pos += 1;
90 } else {
91 address += (mDict[*pos] & (ADDRESS_MASK >> 16)) << 16;
92 address += (mDict[*pos + 1] & 0xFF) << 8;
93 address += (mDict[*pos + 2] & 0xFF);
94 *pos += 3;
95 }
The Android Open Source Project923bf412009-03-13 15:11:42 -070096 return address;
97}
98
99int
100Dictionary::wideStrLen(unsigned short *str)
101{
102 if (!str) return 0;
103 unsigned short *end = str;
104 while (*end)
105 end++;
106 return end - str;
107}
108
109bool
110Dictionary::addWord(unsigned short *word, int length, int frequency)
111{
112 word[length] = 0;
113 if (DEBUG_DICT) LOGI("Found word = %s, freq = %d : \n", word, frequency);
114
115 // Find the right insertion point
116 int insertAt = 0;
117 while (insertAt < mMaxWords) {
118 if (frequency > mFrequencies[insertAt]
119 || (mFrequencies[insertAt] == frequency
120 && length < wideStrLen(mOutputChars + insertAt * mMaxWordLength))) {
121 break;
122 }
123 insertAt++;
124 }
125 if (insertAt < mMaxWords) {
126 memmove((char*) mFrequencies + (insertAt + 1) * sizeof(mFrequencies[0]),
127 (char*) mFrequencies + insertAt * sizeof(mFrequencies[0]),
128 (mMaxWords - insertAt - 1) * sizeof(mFrequencies[0]));
129 mFrequencies[insertAt] = frequency;
130 memmove((char*) mOutputChars + (insertAt + 1) * mMaxWordLength * sizeof(short),
131 (char*) mOutputChars + (insertAt ) * mMaxWordLength * sizeof(short),
132 (mMaxWords - insertAt - 1) * sizeof(short) * mMaxWordLength);
133 unsigned short *dest = mOutputChars + (insertAt ) * mMaxWordLength;
134 while (length--) {
135 *dest++ = *word++;
136 }
137 *dest = 0; // NULL terminate
138 // Update the word count
139 if (insertAt + 1 > mWords) mWords = insertAt + 1;
140 if (DEBUG_DICT) LOGI("Added word at %d\n", insertAt);
141 return true;
142 }
143 return false;
144}
145
146unsigned short
147Dictionary::toLowerCase(unsigned short c, const int depth) {
148 if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) {
149 c = BASE_CHARS[c];
150 }
151 if (depth == 0) {
152 if (c >='A' && c <= 'Z') {
153 c |= 32;
154 } else if (c > 127) {
155 c = u_tolower(c);
156 }
157 }
158 return c;
159}
160
161bool
162Dictionary::sameAsTyped(unsigned short *word, int length)
163{
164 if (length != mInputLength) {
165 return false;
166 }
167 int *inputCodes = mInputCodes;
168 while (length--) {
169 if ((unsigned int) *inputCodes != (unsigned int) *word) {
170 return false;
171 }
172 inputCodes += mMaxAlternatives;
173 word++;
174 }
175 return true;
176}
177
178static char QUOTE = '\'';
179
180void
181Dictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion, int snr, int inputIndex)
182{
183 // Optimization: Prune out words that are too long compared to how much was typed.
184 if (depth > maxDepth) {
185 return;
186 }
187 int count = getCount(&pos);
188 int *currentChars = NULL;
189 if (mInputLength <= inputIndex) {
190 completion = true;
191 } else {
192 currentChars = mInputCodes + (inputIndex * mMaxAlternatives);
193 }
194
195 for (int i = 0; i < count; i++) {
196 unsigned short c = getChar(&pos);
197 unsigned short lowerC = toLowerCase(c, depth);
198 bool terminal = getTerminal(&pos);
199 int childrenAddress = getAddress(&pos);
Amith Yamasanicc3e5c72009-03-31 10:51:17 -0700200 int freq = 1;
201 if (terminal) freq = getFreq(&pos);
The Android Open Source Project923bf412009-03-13 15:11:42 -0700202 // If we are only doing completions, no need to look at the typed characters.
203 if (completion) {
204 mWord[depth] = c;
205 if (terminal) {
206 addWord(mWord, depth + 1, freq * snr);
207 }
208 if (childrenAddress != 0) {
209 getWordsRec(childrenAddress, depth + 1, maxDepth,
210 completion, snr, inputIndex);
211 }
212 } else if (c == QUOTE && currentChars[0] != QUOTE) {
213 // Skip the ' and continue deeper
214 mWord[depth] = QUOTE;
215 if (childrenAddress != 0) {
216 getWordsRec(childrenAddress, depth + 1, maxDepth, false, snr, inputIndex);
217 }
218 } else {
219 int j = 0;
220 while (currentChars[j] > 0) {
221 int addedWeight = j == 0 ? mTypedLetterMultiplier : 1;
222 if (currentChars[j] == lowerC || currentChars[j] == c) {
223 mWord[depth] = c;
224 if (mInputLength == inputIndex + 1) {
225 if (terminal) {
226 if (//INCLUDE_TYPED_WORD_IF_VALID ||
227 !sameAsTyped(mWord, depth + 1)) {
228 addWord(mWord, depth + 1,
229 (freq * snr * addedWeight * mFullWordMultiplier));
230 }
231 }
232 if (childrenAddress != 0) {
233 getWordsRec(childrenAddress, depth + 1,
234 maxDepth, true, snr * addedWeight, inputIndex + 1);
235 }
236 } else if (childrenAddress != 0) {
237 getWordsRec(childrenAddress, depth + 1, maxDepth,
238 false, snr * addedWeight, inputIndex + 1);
239 }
240 }
241 j++;
242 }
243 }
244 }
245}
246
247bool
248Dictionary::isValidWord(unsigned short *word, int length)
249{
250 return isValidWordRec(0, word, 0, length);
251}
252
253bool
254Dictionary::isValidWordRec(int pos, unsigned short *word, int offset, int length) {
255 int count = getCount(&pos);
256 unsigned short currentChar = (unsigned short) word[offset];
257 for (int j = 0; j < count; j++) {
258 unsigned short c = getChar(&pos);
259 int terminal = getTerminal(&pos);
260 int childPos = getAddress(&pos);
261 if (c == currentChar) {
262 if (offset == length - 1) {
263 if (terminal) {
264 return true;
265 }
266 } else {
267 if (childPos != 0) {
268 if (isValidWordRec(childPos, word, offset + 1, length)) {
269 return true;
270 }
271 }
272 }
273 }
Amith Yamasanicc3e5c72009-03-31 10:51:17 -0700274 if (terminal) {
275 getFreq(&pos);
276 }
The Android Open Source Project923bf412009-03-13 15:11:42 -0700277 // There could be two instances of each alphabet - upper and lower case. So continue
278 // looking ...
279 }
280 return false;
281}
282
283
284} // namespace latinime