blob: e0feeafdaccf15b963a814727daea648ca66baec [file] [log] [blame]
The Android Open Source Project923bf412009-03-13 15:11:42 -07001/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef LATINIME_DICTIONARY_H
18#define LATINIME_DICTIONARY_H
19
satok30088252010-12-01 21:22:15 +090020#include "bigram_dictionary.h"
satokd24df432011-07-14 15:43:42 +090021#include "char_utils.h"
satok1147c7b2011-12-14 15:04:58 +090022#include "correction.h"
satoke808e432010-12-02 14:53:24 +090023#include "defines.h"
satok8fbd5522011-02-22 17:28:55 +090024#include "proximity_info.h"
satok30088252010-12-01 21:22:15 +090025#include "unigram_dictionary.h"
satoka7e5a5a2011-12-15 16:49:12 +090026#include "words_priority_queue_pool.h"
satok30088252010-12-01 21:22:15 +090027
The Android Open Source Project923bf412009-03-13 15:11:42 -070028namespace latinime {
29
30class Dictionary {
Ken Wakasae12e9b52012-01-06 12:24:38 +090031 public:
Ken Wakasae90b3332011-01-07 15:01:51 +090032 Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust, int typedLetterMultipler,
satok6ba8de22012-03-28 18:21:04 +090033 int fullWordMultiplier, int maxWordLength, int maxWords);
satok1147c7b2011-12-14 15:04:58 +090034
satok8fbd5522011-02-22 17:28:55 +090035 int getSuggestions(ProximityInfo *proximityInfo, int *xcoordinates, int *ycoordinates,
Jean Chalard351864b2012-04-24 18:06:51 +090036 int *codes, int codesSize, const int32_t* prevWordChars, const int prevWordLength,
37 bool useFullEditDistance, unsigned short *outWords, int *frequencies) {
Jean Chalard4d9b2022012-04-23 19:25:28 +090038 // bigramListPosition is, as an int, the offset of the bigram list in the file.
39 // If none, it's zero.
Jean Chalard351864b2012-04-24 18:06:51 +090040 const int bigramListPosition = !prevWordChars ? 0
41 : mBigramDictionary->getBigramListPositionForWord(prevWordChars, prevWordLength);
satoka7e5a5a2011-12-15 16:49:12 +090042 return mUnigramDictionary->getSuggestions(proximityInfo, mWordsPriorityQueuePool,
Jean Chalard4d9b2022012-04-23 19:25:28 +090043 mCorrection, xcoordinates, ycoordinates, codes, codesSize, bigramListPosition,
44 useFullEditDistance, outWords, frequencies);
satok30088252010-12-01 21:22:15 +090045 }
46
Jean Chalard522a04e2012-04-23 15:37:07 +090047 int getBigrams(const int32_t *word, int length, int *codes, int codesSize,
satok6ba8de22012-03-28 18:21:04 +090048 unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams) {
satok18c28f42010-12-02 18:11:54 +090049 return mBigramDictionary->getBigrams(word, length, codes, codesSize, outWords, frequencies,
satok6ba8de22012-03-28 18:21:04 +090050 maxWordLength, maxBigrams);
satok30088252010-12-01 21:22:15 +090051 }
satok8fbd5522011-02-22 17:28:55 +090052
Jean Chalard522a04e2012-04-23 15:37:07 +090053 bool isValidWord(const int32_t *word, int length);
Ken Wakasae90b3332011-01-07 15:01:51 +090054 void *getDict() { return (void *)mDict; }
55 int getDictSize() { return mDictSize; }
56 int getMmapFd() { return mMmapFd; }
57 int getDictBufAdjust() { return mDictBufAdjust; }
The Android Open Source Project923bf412009-03-13 15:11:42 -070058 ~Dictionary();
Amith Yamasanicc3e5c72009-03-31 10:51:17 -070059
satoke808e432010-12-02 14:53:24 +090060 // public static utility methods
61 // static inline methods should be defined in the header file
satok18c28f42010-12-02 18:11:54 +090062 static int wideStrLen(unsigned short *str);
Jean Chalard581335c2011-06-17 12:45:17 +090063
Ken Wakasae12e9b52012-01-06 12:24:38 +090064 private:
Ken Wakasae90b3332011-01-07 15:01:51 +090065 const unsigned char *mDict;
66
67 // Used only for the mmap version of dictionary loading, but we use these as dummy variables
68 // also for the malloc version.
69 const int mDictSize;
70 const int mMmapFd;
71 const int mDictBufAdjust;
72
satok30088252010-12-01 21:22:15 +090073 UnigramDictionary *mUnigramDictionary;
Ken Wakasae90b3332011-01-07 15:01:51 +090074 BigramDictionary *mBigramDictionary;
satoka7e5a5a2011-12-15 16:49:12 +090075 WordsPriorityQueuePool *mWordsPriorityQueuePool;
satok1147c7b2011-12-14 15:04:58 +090076 Correction *mCorrection;
The Android Open Source Project923bf412009-03-13 15:11:42 -070077};
78
satoke808e432010-12-02 14:53:24 +090079// public static utility methods
80// static inline methods should be defined in the header file
satok18c28f42010-12-02 18:11:54 +090081inline int Dictionary::wideStrLen(unsigned short *str) {
82 if (!str) return 0;
83 unsigned short *end = str;
84 while (*end)
85 end++;
86 return end - str;
87}
Ken Wakasace9e52a2011-06-18 13:09:55 +090088} // namespace latinime
89
The Android Open Source Project923bf412009-03-13 15:11:42 -070090#endif // LATINIME_DICTIONARY_H