blob: 4cb1abe9b74bbf32644cabc5eab9cd55de86bc07 [file] [log] [blame]
satok30088252010-12-01 21:22:15 +09001/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef LATINIME_UNIGRAM_DICTIONARY_H
18#define LATINIME_UNIGRAM_DICTIONARY_H
19
satoke808e432010-12-02 14:53:24 +090020#include "defines.h"
21
satok30088252010-12-01 21:22:15 +090022namespace latinime {
23
satok30088252010-12-01 21:22:15 +090024class UnigramDictionary {
25public:
satoke808e432010-12-02 14:53:24 +090026 UnigramDictionary(const unsigned char *dict, int typedLetterMultipler, int fullWordMultiplier,
27 int maxWordLength, int maxWords, int maxAlternatives, const bool isLatestDictVersion,
28 const bool hasBigram, Dictionary *parentDictionary);
satok30088252010-12-01 21:22:15 +090029 int getSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies,
30 int *nextLetters, int nextLettersSize);
31 int getBigrams(unsigned short *word, int length, int *codes, int codesSize,
32 unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams,
33 int maxAlternatives);
satok30088252010-12-01 21:22:15 +090034 ~UnigramDictionary();
35
36private:
37 void initSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies);
38 int getSuggestionCandidates(int inputLength, int skipPos, int *nextLetters, int nextLettersSize);
39 void getVersionNumber();
40 bool checkIfDictVersionIsLatest();
41 int getAddress(int *pos);
42 int getBigramAddress(int *pos, bool advance);
43 int getFreq(int *pos);
44 int getBigramFreq(int *pos);
45 void searchForTerminalNode(int address, int frequency);
46
satoke808e432010-12-02 14:53:24 +090047 bool getFirstBitOfByte(int *pos) { return (DICT[*pos] & 0x80) > 0; }
48 bool getSecondBitOfByte(int *pos) { return (DICT[*pos] & 0x40) > 0; }
49 bool getTerminal(int *pos) { return (DICT[*pos] & FLAG_TERMINAL_MASK) > 0; }
satok30088252010-12-01 21:22:15 +090050 int wideStrLen(unsigned short *str);
51
52 bool sameAsTyped(unsigned short *word, int length);
53 bool checkFirstCharacter(unsigned short *word);
54 bool addWord(unsigned short *word, int length, int frequency);
55 bool addWordBigram(unsigned short *word, int length, int frequency);
56 unsigned short toLowerCase(unsigned short c);
57 void getWordsRec(int pos, int depth, int maxDepth, bool completion, int frequency,
58 int inputIndex, int diffs, int skipPos, int *nextLetters, int nextLettersSize);
59 void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize);
satok30088252010-12-01 21:22:15 +090060
satoke808e432010-12-02 14:53:24 +090061 const unsigned char *DICT;
satok30088252010-12-01 21:22:15 +090062
63 const int MAX_WORDS;
64 const int MAX_WORD_LENGTH;
65 const int MAX_ALTERNATIVES;
satoke808e432010-12-02 14:53:24 +090066 const bool IS_LATEST_DICT_VERSION;
67 const bool HAS_BIGRAM;
satok30088252010-12-01 21:22:15 +090068
satoke808e432010-12-02 14:53:24 +090069 Dictionary *mParentDictionary;
satok30088252010-12-01 21:22:15 +090070 int *mFrequencies;
71 int *mBigramFreq;
72 int mMaxBigrams;
73 unsigned short *mOutputChars;
74 unsigned short *mBigramChars;
75 int *mInputCodes;
76 int mInputLength;
77 unsigned short mWord[128];
78 int mMaxEditDistance;
79
80 int mFullWordMultiplier;
81 int mTypedLetterMultiplier;
82 int mVersion;
83 int mBigram;
84};
85
86// ----------------------------------------------------------------------------
87
88}; // namespace latinime
89
90#endif // LATINIME_UNIGRAM_DICTIONARY_H