blob: d9ef8f3b7cc5720be5d1a9c9113a74b1d6688e5b [file] [log] [blame]
The Android Open Source Project923bf412009-03-13 15:11:42 -07001/*
2**
3** Copyright 2009, The Android Open Source Project
4**
5** Licensed under the Apache License, Version 2.0 (the "License");
6** you may not use this file except in compliance with the License.
7** You may obtain a copy of the License at
8**
9** http://www.apache.org/licenses/LICENSE-2.0
10**
11** Unless required by applicable law or agreed to in writing, software
12** distributed under the License is distributed on an "AS IS" BASIS,
13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14** See the License for the specific language governing permissions and
15** limitations under the License.
16*/
17
18#include <stdio.h>
The Android Open Source Project923bf412009-03-13 15:11:42 -070019
satoke808e432010-12-02 14:53:24 +090020#define LOG_TAG "LatinIME: dictionary.cpp"
21
The Android Open Source Project923bf412009-03-13 15:11:42 -070022#include "dictionary.h"
satokd4952c82010-12-01 19:09:29 +090023
The Android Open Source Project923bf412009-03-13 15:11:42 -070024namespace latinime {
25
satok8fbd5522011-02-22 17:28:55 +090026// TODO: Change the type of all keyCodes to uint32_t
Ken Wakasae90b3332011-01-07 15:01:51 +090027Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust,
28 int typedLetterMultiplier, int fullWordMultiplier,
satok30088252010-12-01 21:22:15 +090029 int maxWordLength, int maxWords, int maxAlternatives)
Ken Wakasae90b3332011-01-07 15:01:51 +090030 : mDict((unsigned char*) dict), mDictSize(dictSize),
31 mMmapFd(mmapFd), mDictBufAdjust(dictBufAdjust),
satoke808e432010-12-02 14:53:24 +090032 // Checks whether it has the latest dictionary or the old dictionary
satok662fe692010-12-08 17:05:39 +090033 IS_LATEST_DICT_VERSION((((unsigned char*) dict)[0] & 0xFF) >= DICTIONARY_VERSION_MIN) {
34 if (DEBUG_DICT) {
35 if (MAX_WORD_LENGTH_INTERNAL < maxWordLength) {
36 LOGI("Max word length (%d) is greater than %d",
37 maxWordLength, MAX_WORD_LENGTH_INTERNAL);
Ken Wakasae90b3332011-01-07 15:01:51 +090038 LOGI("IN NATIVE SUGGEST Version: %d", (mDict[0] & 0xFF));
satok662fe692010-12-08 17:05:39 +090039 }
satok715514d2010-12-02 20:19:59 +090040 }
Ken Wakasae90b3332011-01-07 15:01:51 +090041 mUnigramDictionary = new UnigramDictionary(mDict, typedLetterMultiplier, fullWordMultiplier,
satok18c28f42010-12-02 18:11:54 +090042 maxWordLength, maxWords, maxAlternatives, IS_LATEST_DICT_VERSION);
Ken Wakasae90b3332011-01-07 15:01:51 +090043 mBigramDictionary = new BigramDictionary(mDict, maxWordLength, maxAlternatives,
satok18c28f42010-12-02 18:11:54 +090044 IS_LATEST_DICT_VERSION, hasBigram(), this);
The Android Open Source Project923bf412009-03-13 15:11:42 -070045}
46
satok662fe692010-12-08 17:05:39 +090047Dictionary::~Dictionary() {
satok30088252010-12-01 21:22:15 +090048 delete mUnigramDictionary;
49 delete mBigramDictionary;
The Android Open Source Project923bf412009-03-13 15:11:42 -070050}
satoke808e432010-12-02 14:53:24 +090051
52bool Dictionary::hasBigram() {
Ken Wakasae90b3332011-01-07 15:01:51 +090053 return ((mDict[1] & 0xFF) == 1);
satoke808e432010-12-02 14:53:24 +090054}
55
Jean Chalarde93b1f222011-06-01 17:12:25 +090056// TODO: use uint32_t instead of unsigned short
satok8fbd5522011-02-22 17:28:55 +090057bool Dictionary::isValidWord(unsigned short *word, int length) {
satoke808e432010-12-02 14:53:24 +090058 if (IS_LATEST_DICT_VERSION) {
59 return (isValidWordRec(DICTIONARY_HEADER_SIZE, word, 0, length) != NOT_VALID_WORD);
60 } else {
61 return (isValidWordRec(0, word, 0, length) != NOT_VALID_WORD);
62 }
63}
64
65int Dictionary::isValidWordRec(int pos, unsigned short *word, int offset, int length) {
66 // returns address of bigram data of that word
67 // return -99 if not found
68
Ken Wakasae90b3332011-01-07 15:01:51 +090069 int count = Dictionary::getCount(mDict, &pos);
satoke808e432010-12-02 14:53:24 +090070 unsigned short currentChar = (unsigned short) word[offset];
71 for (int j = 0; j < count; j++) {
Ken Wakasae90b3332011-01-07 15:01:51 +090072 unsigned short c = Dictionary::getChar(mDict, &pos);
73 int terminal = Dictionary::getTerminal(mDict, &pos);
74 int childPos = Dictionary::getAddress(mDict, &pos);
satoke808e432010-12-02 14:53:24 +090075 if (c == currentChar) {
76 if (offset == length - 1) {
77 if (terminal) {
78 return (pos+1);
79 }
80 } else {
81 if (childPos != 0) {
82 int t = isValidWordRec(childPos, word, offset + 1, length);
83 if (t > 0) {
84 return t;
85 }
86 }
87 }
88 }
89 if (terminal) {
Ken Wakasae90b3332011-01-07 15:01:51 +090090 Dictionary::getFreq(mDict, IS_LATEST_DICT_VERSION, &pos);
satoke808e432010-12-02 14:53:24 +090091 }
92 // There could be two instances of each alphabet - upper and lower case. So continue
93 // looking ...
94 }
95 return NOT_VALID_WORD;
96}
The Android Open Source Project923bf412009-03-13 15:11:42 -070097} // namespace latinime