The Android Open Source Project | 923bf41 | 2009-03-13 15:11:42 -0700 | [diff] [blame] | 1 | /* |
| 2 | ** |
| 3 | ** Copyright 2009, The Android Open Source Project |
| 4 | ** |
| 5 | ** Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | ** you may not use this file except in compliance with the License. |
| 7 | ** You may obtain a copy of the License at |
| 8 | ** |
| 9 | ** http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | ** |
| 11 | ** Unless required by applicable law or agreed to in writing, software |
| 12 | ** distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | ** See the License for the specific language governing permissions and |
| 15 | ** limitations under the License. |
| 16 | */ |
| 17 | |
| 18 | #include <stdio.h> |
The Android Open Source Project | 923bf41 | 2009-03-13 15:11:42 -0700 | [diff] [blame] | 19 | |
satok | e808e43 | 2010-12-02 14:53:24 +0900 | [diff] [blame] | 20 | #define LOG_TAG "LatinIME: dictionary.cpp" |
| 21 | |
The Android Open Source Project | 923bf41 | 2009-03-13 15:11:42 -0700 | [diff] [blame] | 22 | #include "dictionary.h" |
satok | d4952c8 | 2010-12-01 19:09:29 +0900 | [diff] [blame] | 23 | |
The Android Open Source Project | 923bf41 | 2009-03-13 15:11:42 -0700 | [diff] [blame] | 24 | namespace latinime { |
| 25 | |
satok | 8fbd552 | 2011-02-22 17:28:55 +0900 | [diff] [blame^] | 26 | // TODO: Change the type of all keyCodes to uint32_t |
Ken Wakasa | e90b333 | 2011-01-07 15:01:51 +0900 | [diff] [blame] | 27 | Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust, |
| 28 | int typedLetterMultiplier, int fullWordMultiplier, |
satok | 3008825 | 2010-12-01 21:22:15 +0900 | [diff] [blame] | 29 | int maxWordLength, int maxWords, int maxAlternatives) |
Ken Wakasa | e90b333 | 2011-01-07 15:01:51 +0900 | [diff] [blame] | 30 | : mDict((unsigned char*) dict), mDictSize(dictSize), |
| 31 | mMmapFd(mmapFd), mDictBufAdjust(dictBufAdjust), |
satok | e808e43 | 2010-12-02 14:53:24 +0900 | [diff] [blame] | 32 | // Checks whether it has the latest dictionary or the old dictionary |
satok | 662fe69 | 2010-12-08 17:05:39 +0900 | [diff] [blame] | 33 | IS_LATEST_DICT_VERSION((((unsigned char*) dict)[0] & 0xFF) >= DICTIONARY_VERSION_MIN) { |
| 34 | if (DEBUG_DICT) { |
| 35 | if (MAX_WORD_LENGTH_INTERNAL < maxWordLength) { |
| 36 | LOGI("Max word length (%d) is greater than %d", |
| 37 | maxWordLength, MAX_WORD_LENGTH_INTERNAL); |
Ken Wakasa | e90b333 | 2011-01-07 15:01:51 +0900 | [diff] [blame] | 38 | LOGI("IN NATIVE SUGGEST Version: %d", (mDict[0] & 0xFF)); |
satok | 662fe69 | 2010-12-08 17:05:39 +0900 | [diff] [blame] | 39 | } |
satok | 715514d | 2010-12-02 20:19:59 +0900 | [diff] [blame] | 40 | } |
Ken Wakasa | e90b333 | 2011-01-07 15:01:51 +0900 | [diff] [blame] | 41 | mUnigramDictionary = new UnigramDictionary(mDict, typedLetterMultiplier, fullWordMultiplier, |
satok | 18c28f4 | 2010-12-02 18:11:54 +0900 | [diff] [blame] | 42 | maxWordLength, maxWords, maxAlternatives, IS_LATEST_DICT_VERSION); |
Ken Wakasa | e90b333 | 2011-01-07 15:01:51 +0900 | [diff] [blame] | 43 | mBigramDictionary = new BigramDictionary(mDict, maxWordLength, maxAlternatives, |
satok | 18c28f4 | 2010-12-02 18:11:54 +0900 | [diff] [blame] | 44 | IS_LATEST_DICT_VERSION, hasBigram(), this); |
The Android Open Source Project | 923bf41 | 2009-03-13 15:11:42 -0700 | [diff] [blame] | 45 | } |
| 46 | |
satok | 662fe69 | 2010-12-08 17:05:39 +0900 | [diff] [blame] | 47 | Dictionary::~Dictionary() { |
satok | 3008825 | 2010-12-01 21:22:15 +0900 | [diff] [blame] | 48 | delete mUnigramDictionary; |
| 49 | delete mBigramDictionary; |
The Android Open Source Project | 923bf41 | 2009-03-13 15:11:42 -0700 | [diff] [blame] | 50 | } |
satok | e808e43 | 2010-12-02 14:53:24 +0900 | [diff] [blame] | 51 | |
| 52 | bool Dictionary::hasBigram() { |
Ken Wakasa | e90b333 | 2011-01-07 15:01:51 +0900 | [diff] [blame] | 53 | return ((mDict[1] & 0xFF) == 1); |
satok | e808e43 | 2010-12-02 14:53:24 +0900 | [diff] [blame] | 54 | } |
| 55 | |
| 56 | // TODO: use uint16_t instead of unsigned short |
satok | 8fbd552 | 2011-02-22 17:28:55 +0900 | [diff] [blame^] | 57 | bool Dictionary::isValidWord(unsigned short *word, int length) { |
satok | e808e43 | 2010-12-02 14:53:24 +0900 | [diff] [blame] | 58 | if (IS_LATEST_DICT_VERSION) { |
| 59 | return (isValidWordRec(DICTIONARY_HEADER_SIZE, word, 0, length) != NOT_VALID_WORD); |
| 60 | } else { |
| 61 | return (isValidWordRec(0, word, 0, length) != NOT_VALID_WORD); |
| 62 | } |
| 63 | } |
| 64 | |
| 65 | int Dictionary::isValidWordRec(int pos, unsigned short *word, int offset, int length) { |
| 66 | // returns address of bigram data of that word |
| 67 | // return -99 if not found |
| 68 | |
Ken Wakasa | e90b333 | 2011-01-07 15:01:51 +0900 | [diff] [blame] | 69 | int count = Dictionary::getCount(mDict, &pos); |
satok | e808e43 | 2010-12-02 14:53:24 +0900 | [diff] [blame] | 70 | unsigned short currentChar = (unsigned short) word[offset]; |
| 71 | for (int j = 0; j < count; j++) { |
Ken Wakasa | e90b333 | 2011-01-07 15:01:51 +0900 | [diff] [blame] | 72 | unsigned short c = Dictionary::getChar(mDict, &pos); |
| 73 | int terminal = Dictionary::getTerminal(mDict, &pos); |
| 74 | int childPos = Dictionary::getAddress(mDict, &pos); |
satok | e808e43 | 2010-12-02 14:53:24 +0900 | [diff] [blame] | 75 | if (c == currentChar) { |
| 76 | if (offset == length - 1) { |
| 77 | if (terminal) { |
| 78 | return (pos+1); |
| 79 | } |
| 80 | } else { |
| 81 | if (childPos != 0) { |
| 82 | int t = isValidWordRec(childPos, word, offset + 1, length); |
| 83 | if (t > 0) { |
| 84 | return t; |
| 85 | } |
| 86 | } |
| 87 | } |
| 88 | } |
| 89 | if (terminal) { |
Ken Wakasa | e90b333 | 2011-01-07 15:01:51 +0900 | [diff] [blame] | 90 | Dictionary::getFreq(mDict, IS_LATEST_DICT_VERSION, &pos); |
satok | e808e43 | 2010-12-02 14:53:24 +0900 | [diff] [blame] | 91 | } |
| 92 | // There could be two instances of each alphabet - upper and lower case. So continue |
| 93 | // looking ... |
| 94 | } |
| 95 | return NOT_VALID_WORD; |
| 96 | } |
The Android Open Source Project | 923bf41 | 2009-03-13 15:11:42 -0700 | [diff] [blame] | 97 | } // namespace latinime |