blob: e75beb5b7d03e527963046e35e4921225f363c09 [file] [log] [blame]
The Android Open Source Project923bf412009-03-13 15:11:42 -07001/*
2**
3** Copyright 2009, The Android Open Source Project
4**
5** Licensed under the Apache License, Version 2.0 (the "License");
6** you may not use this file except in compliance with the License.
7** You may obtain a copy of the License at
8**
9** http://www.apache.org/licenses/LICENSE-2.0
10**
11** Unless required by applicable law or agreed to in writing, software
12** distributed under the License is distributed on an "AS IS" BASIS,
13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14** See the License for the specific language governing permissions and
15** limitations under the License.
16*/
17
18#include <stdio.h>
19#include <fcntl.h>
20#include <sys/mman.h>
21#include <string.h>
Ken Wakasa826269c2010-04-27 10:28:14 +090022//#include <cutils/log.h>
23#define LOGI
The Android Open Source Project923bf412009-03-13 15:11:42 -070024
25#include "dictionary.h"
26#include "basechars.h"
Ken Wakasa707505e2010-04-21 02:35:47 +090027#include "char_utils.h"
The Android Open Source Project923bf412009-03-13 15:11:42 -070028
29#define DEBUG_DICT 0
30
31namespace latinime {
32
33Dictionary::Dictionary(void *dict, int typedLetterMultiplier, int fullWordMultiplier)
34{
35 mDict = (unsigned char*) dict;
36 mTypedLetterMultiplier = typedLetterMultiplier;
37 mFullWordMultiplier = fullWordMultiplier;
38}
39
40Dictionary::~Dictionary()
41{
42}
43
44int Dictionary::getSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies,
Amith Yamasani1b62ff12010-02-05 14:07:04 -080045 int maxWordLength, int maxWords, int maxAlternatives, int skipPos,
46 int *nextLetters, int nextLettersSize)
The Android Open Source Project923bf412009-03-13 15:11:42 -070047{
Amith Yamasanid0e43ec2009-10-14 16:10:32 -070048 int suggWords;
The Android Open Source Project923bf412009-03-13 15:11:42 -070049 mFrequencies = frequencies;
50 mOutputChars = outWords;
51 mInputCodes = codes;
52 mInputLength = codesSize;
53 mMaxAlternatives = maxAlternatives;
54 mMaxWordLength = maxWordLength;
55 mMaxWords = maxWords;
Amith Yamasanic3df2d62009-06-04 12:20:45 -070056 mSkipPos = skipPos;
Amith Yamasani322dc3d2009-07-15 18:30:47 -070057 mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2;
Amith Yamasani1b62ff12010-02-05 14:07:04 -080058 mNextLettersFrequencies = nextLetters;
59 mNextLettersSize = nextLettersSize;
The Android Open Source Project923bf412009-03-13 15:11:42 -070060
Amith Yamasani322dc3d2009-07-15 18:30:47 -070061 getWordsRec(0, 0, mInputLength * 3, false, 1, 0, 0);
The Android Open Source Project923bf412009-03-13 15:11:42 -070062
Amith Yamasanid0e43ec2009-10-14 16:10:32 -070063 // Get the word count
64 suggWords = 0;
65 while (suggWords < mMaxWords && mFrequencies[suggWords] > 0) suggWords++;
66 if (DEBUG_DICT) LOGI("Returning %d words", suggWords);
Amith Yamasani1b62ff12010-02-05 14:07:04 -080067
68 if (DEBUG_DICT) {
69 LOGI("Next letters: ");
70 for (int k = 0; k < nextLettersSize; k++) {
71 if (mNextLettersFrequencies[k] > 0) {
72 LOGI("%c = %d,", k, mNextLettersFrequencies[k]);
73 }
74 }
75 LOGI("\n");
76 }
Amith Yamasanid0e43ec2009-10-14 16:10:32 -070077 return suggWords;
The Android Open Source Project923bf412009-03-13 15:11:42 -070078}
79
Amith Yamasani1b62ff12010-02-05 14:07:04 -080080void
81Dictionary::registerNextLetter(unsigned short c)
82{
83 if (c < mNextLettersSize) {
84 mNextLettersFrequencies[c]++;
85 }
86}
87
The Android Open Source Project923bf412009-03-13 15:11:42 -070088unsigned short
89Dictionary::getChar(int *pos)
90{
91 unsigned short ch = (unsigned short) (mDict[(*pos)++] & 0xFF);
92 // If the code is 255, then actual 16 bit code follows (in big endian)
93 if (ch == 0xFF) {
94 ch = ((mDict[*pos] & 0xFF) << 8) | (mDict[*pos + 1] & 0xFF);
95 (*pos) += 2;
96 }
97 return ch;
98}
99
100int
101Dictionary::getAddress(int *pos)
102{
103 int address = 0;
Amith Yamasanicc3e5c72009-03-31 10:51:17 -0700104 if ((mDict[*pos] & FLAG_ADDRESS_MASK) == 0) {
105 *pos += 1;
106 } else {
107 address += (mDict[*pos] & (ADDRESS_MASK >> 16)) << 16;
108 address += (mDict[*pos + 1] & 0xFF) << 8;
109 address += (mDict[*pos + 2] & 0xFF);
110 *pos += 3;
111 }
The Android Open Source Project923bf412009-03-13 15:11:42 -0700112 return address;
113}
114
115int
116Dictionary::wideStrLen(unsigned short *str)
117{
118 if (!str) return 0;
119 unsigned short *end = str;
120 while (*end)
121 end++;
122 return end - str;
123}
124
125bool
126Dictionary::addWord(unsigned short *word, int length, int frequency)
127{
128 word[length] = 0;
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700129 if (DEBUG_DICT) {
130 char s[length + 1];
131 for (int i = 0; i <= length; i++) s[i] = word[i];
132 LOGI("Found word = %s, freq = %d : \n", s, frequency);
133 }
The Android Open Source Project923bf412009-03-13 15:11:42 -0700134
135 // Find the right insertion point
136 int insertAt = 0;
137 while (insertAt < mMaxWords) {
138 if (frequency > mFrequencies[insertAt]
139 || (mFrequencies[insertAt] == frequency
140 && length < wideStrLen(mOutputChars + insertAt * mMaxWordLength))) {
141 break;
142 }
143 insertAt++;
144 }
145 if (insertAt < mMaxWords) {
146 memmove((char*) mFrequencies + (insertAt + 1) * sizeof(mFrequencies[0]),
147 (char*) mFrequencies + insertAt * sizeof(mFrequencies[0]),
148 (mMaxWords - insertAt - 1) * sizeof(mFrequencies[0]));
149 mFrequencies[insertAt] = frequency;
150 memmove((char*) mOutputChars + (insertAt + 1) * mMaxWordLength * sizeof(short),
151 (char*) mOutputChars + (insertAt ) * mMaxWordLength * sizeof(short),
152 (mMaxWords - insertAt - 1) * sizeof(short) * mMaxWordLength);
153 unsigned short *dest = mOutputChars + (insertAt ) * mMaxWordLength;
154 while (length--) {
155 *dest++ = *word++;
156 }
157 *dest = 0; // NULL terminate
The Android Open Source Project923bf412009-03-13 15:11:42 -0700158 if (DEBUG_DICT) LOGI("Added word at %d\n", insertAt);
159 return true;
160 }
161 return false;
162}
163
164unsigned short
Amith Yamasanif1150882009-08-07 14:04:24 -0700165Dictionary::toLowerCase(unsigned short c) {
The Android Open Source Project923bf412009-03-13 15:11:42 -0700166 if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) {
167 c = BASE_CHARS[c];
168 }
Amith Yamasanif1150882009-08-07 14:04:24 -0700169 if (c >='A' && c <= 'Z') {
170 c |= 32;
171 } else if (c > 127) {
Ken Wakasa707505e2010-04-21 02:35:47 +0900172 c = latin_tolower(c);
The Android Open Source Project923bf412009-03-13 15:11:42 -0700173 }
174 return c;
175}
176
177bool
178Dictionary::sameAsTyped(unsigned short *word, int length)
179{
180 if (length != mInputLength) {
181 return false;
182 }
183 int *inputCodes = mInputCodes;
184 while (length--) {
185 if ((unsigned int) *inputCodes != (unsigned int) *word) {
186 return false;
187 }
188 inputCodes += mMaxAlternatives;
189 word++;
190 }
191 return true;
192}
193
194static char QUOTE = '\'';
195
196void
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700197Dictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion, int snr, int inputIndex,
198 int diffs)
The Android Open Source Project923bf412009-03-13 15:11:42 -0700199{
200 // Optimization: Prune out words that are too long compared to how much was typed.
201 if (depth > maxDepth) {
202 return;
203 }
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700204 if (diffs > mMaxEditDistance) {
205 return;
206 }
The Android Open Source Project923bf412009-03-13 15:11:42 -0700207 int count = getCount(&pos);
208 int *currentChars = NULL;
209 if (mInputLength <= inputIndex) {
210 completion = true;
211 } else {
212 currentChars = mInputCodes + (inputIndex * mMaxAlternatives);
213 }
214
215 for (int i = 0; i < count; i++) {
216 unsigned short c = getChar(&pos);
Amith Yamasanif1150882009-08-07 14:04:24 -0700217 unsigned short lowerC = toLowerCase(c);
The Android Open Source Project923bf412009-03-13 15:11:42 -0700218 bool terminal = getTerminal(&pos);
219 int childrenAddress = getAddress(&pos);
Amith Yamasanicc3e5c72009-03-31 10:51:17 -0700220 int freq = 1;
221 if (terminal) freq = getFreq(&pos);
The Android Open Source Project923bf412009-03-13 15:11:42 -0700222 // If we are only doing completions, no need to look at the typed characters.
223 if (completion) {
224 mWord[depth] = c;
225 if (terminal) {
226 addWord(mWord, depth + 1, freq * snr);
Amith Yamasani1b62ff12010-02-05 14:07:04 -0800227 if (depth >= mInputLength && mSkipPos < 0) {
228 registerNextLetter(mWord[mInputLength]);
229 }
The Android Open Source Project923bf412009-03-13 15:11:42 -0700230 }
231 if (childrenAddress != 0) {
232 getWordsRec(childrenAddress, depth + 1, maxDepth,
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700233 completion, snr, inputIndex, diffs);
The Android Open Source Project923bf412009-03-13 15:11:42 -0700234 }
Amith Yamasanic3df2d62009-06-04 12:20:45 -0700235 } else if (c == QUOTE && currentChars[0] != QUOTE || mSkipPos == depth) {
236 // Skip the ' or other letter and continue deeper
237 mWord[depth] = c;
The Android Open Source Project923bf412009-03-13 15:11:42 -0700238 if (childrenAddress != 0) {
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700239 getWordsRec(childrenAddress, depth + 1, maxDepth, false, snr, inputIndex, diffs);
The Android Open Source Project923bf412009-03-13 15:11:42 -0700240 }
241 } else {
242 int j = 0;
243 while (currentChars[j] > 0) {
The Android Open Source Project923bf412009-03-13 15:11:42 -0700244 if (currentChars[j] == lowerC || currentChars[j] == c) {
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700245 int addedWeight = j == 0 ? mTypedLetterMultiplier : 1;
The Android Open Source Project923bf412009-03-13 15:11:42 -0700246 mWord[depth] = c;
247 if (mInputLength == inputIndex + 1) {
248 if (terminal) {
249 if (//INCLUDE_TYPED_WORD_IF_VALID ||
250 !sameAsTyped(mWord, depth + 1)) {
Amith Yamasanif51d16a2009-08-10 17:22:39 -0700251 int finalFreq = freq * snr * addedWeight;
252 if (mSkipPos < 0) finalFreq *= mFullWordMultiplier;
253 addWord(mWord, depth + 1, finalFreq);
The Android Open Source Project923bf412009-03-13 15:11:42 -0700254 }
255 }
256 if (childrenAddress != 0) {
257 getWordsRec(childrenAddress, depth + 1,
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700258 maxDepth, true, snr * addedWeight, inputIndex + 1,
259 diffs + (j > 0));
The Android Open Source Project923bf412009-03-13 15:11:42 -0700260 }
261 } else if (childrenAddress != 0) {
262 getWordsRec(childrenAddress, depth + 1, maxDepth,
Amith Yamasani322dc3d2009-07-15 18:30:47 -0700263 false, snr * addedWeight, inputIndex + 1, diffs + (j > 0));
The Android Open Source Project923bf412009-03-13 15:11:42 -0700264 }
265 }
266 j++;
Amith Yamasanic3df2d62009-06-04 12:20:45 -0700267 if (mSkipPos >= 0) break;
The Android Open Source Project923bf412009-03-13 15:11:42 -0700268 }
269 }
270 }
271}
272
273bool
274Dictionary::isValidWord(unsigned short *word, int length)
275{
276 return isValidWordRec(0, word, 0, length);
277}
278
279bool
280Dictionary::isValidWordRec(int pos, unsigned short *word, int offset, int length) {
281 int count = getCount(&pos);
282 unsigned short currentChar = (unsigned short) word[offset];
283 for (int j = 0; j < count; j++) {
284 unsigned short c = getChar(&pos);
285 int terminal = getTerminal(&pos);
286 int childPos = getAddress(&pos);
287 if (c == currentChar) {
288 if (offset == length - 1) {
289 if (terminal) {
290 return true;
291 }
292 } else {
293 if (childPos != 0) {
294 if (isValidWordRec(childPos, word, offset + 1, length)) {
295 return true;
296 }
297 }
298 }
299 }
Amith Yamasanicc3e5c72009-03-31 10:51:17 -0700300 if (terminal) {
301 getFreq(&pos);
302 }
The Android Open Source Project923bf412009-03-13 15:11:42 -0700303 // There could be two instances of each alphabet - upper and lower case. So continue
304 // looking ...
305 }
306 return false;
307}
308
309
310} // namespace latinime