blob: 733b80c79ef7c2acef8d65abfec780f353191f71 [file] [log] [blame]
satok30088252010-12-01 21:22:15 +09001/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef LATINIME_UNIGRAM_DICTIONARY_H
18#define LATINIME_UNIGRAM_DICTIONARY_H
19
satoke808e432010-12-02 14:53:24 +090020#include "defines.h"
21
satok30088252010-12-01 21:22:15 +090022namespace latinime {
23
satok30088252010-12-01 21:22:15 +090024class UnigramDictionary {
25public:
satoke808e432010-12-02 14:53:24 +090026 UnigramDictionary(const unsigned char *dict, int typedLetterMultipler, int fullWordMultiplier,
satok18c28f42010-12-02 18:11:54 +090027 int maxWordLength, int maxWords, int maxAlternatives, const bool isLatestDictVersion);
satok30088252010-12-01 21:22:15 +090028 int getSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies,
29 int *nextLetters, int nextLettersSize);
satok30088252010-12-01 21:22:15 +090030 ~UnigramDictionary();
31
32private:
33 void initSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies);
satok18c28f42010-12-02 18:11:54 +090034 int getSuggestionCandidates(int inputLength, int skipPos, int *nextLetters,
35 int nextLettersSize);
satok30088252010-12-01 21:22:15 +090036 void getVersionNumber();
37 bool checkIfDictVersionIsLatest();
38 int getAddress(int *pos);
satok30088252010-12-01 21:22:15 +090039 int getFreq(int *pos);
satok30088252010-12-01 21:22:15 +090040 int wideStrLen(unsigned short *str);
satok30088252010-12-01 21:22:15 +090041 bool sameAsTyped(unsigned short *word, int length);
satok30088252010-12-01 21:22:15 +090042 bool addWord(unsigned short *word, int length, int frequency);
satok30088252010-12-01 21:22:15 +090043 unsigned short toLowerCase(unsigned short c);
satok68319262010-12-03 19:38:08 +090044 void getWordsRec(const int childrenCount, const int pos, const int depth, const int maxDepth,
45 const bool traverseAllNodes, const int snr, const int inputIndex, const int diffs,
46 const int skipPos, int *nextLetters, const int nextLettersSize);
47 void getWords(const int initialPos, const int inputLength, const int skipPos, int *nextLetters,
48 const int nextLettersSize);
satok30088252010-12-01 21:22:15 +090049 void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize);
satok715514d2010-12-02 20:19:59 +090050 void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word,
51 const int mInputLength, const int depth, const int snr, int *nextLetters,
52 const int nextLettersSize, const int skipPos, const int freq);
satok715514d2010-12-02 20:19:59 +090053 void onTerminalWhenUserTypedLengthIsSameAsInputLength(unsigned short *word, const int depth,
54 const int snr, const int skipPos, const int freq, const int addedWeight);
satok28bd03b2010-12-03 16:39:16 +090055 bool needsToSkipCurrentNode(const unsigned short c,
satok68319262010-12-03 19:38:08 +090056 const int inputIndex, const int skipPos, const int depth);
satok28bd03b2010-12-03 16:39:16 +090057 int getMatchedProximityId(const int *currentChars, const unsigned short lowerC,
58 const unsigned short c, const int skipPos);
59
satoke808e432010-12-02 14:53:24 +090060 const unsigned char *DICT;
satok30088252010-12-01 21:22:15 +090061 const int MAX_WORDS;
62 const int MAX_WORD_LENGTH;
63 const int MAX_ALTERNATIVES;
satoke808e432010-12-02 14:53:24 +090064 const bool IS_LATEST_DICT_VERSION;
satok18c28f42010-12-02 18:11:54 +090065 const int TYPED_LETTER_MULTIPLIER;
66 const int FULL_WORD_MULTIPLIER;
satok30088252010-12-01 21:22:15 +090067
satok30088252010-12-01 21:22:15 +090068 int *mFrequencies;
satok30088252010-12-01 21:22:15 +090069 unsigned short *mOutputChars;
satok30088252010-12-01 21:22:15 +090070 int *mInputCodes;
71 int mInputLength;
satok715514d2010-12-02 20:19:59 +090072 // MAX_WORD_LENGTH_INTERNAL must be bigger than MAX_WORD_LENGTH
73 unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
satok30088252010-12-01 21:22:15 +090074 int mMaxEditDistance;
satok30088252010-12-01 21:22:15 +090075};
76
77// ----------------------------------------------------------------------------
78
79}; // namespace latinime
80
81#endif // LATINIME_UNIGRAM_DICTIONARY_H