Blame - native/src/unigram_dictionary.h - android_packages_inputmethods_LatinIME

blob: 28e4308b49444ca43b931403a51393c683c105c8 [file] [log] [blame]

satok	3008825	2010-12-01 21:22:15 +0900	[diff] [blame]	1	/*
				2	* Copyright (C) 2010 The Android Open Source Project
				3	*
				4	* Licensed under the Apache License, Version 2.0 (the "License");
				5	* you may not use this file except in compliance with the License.
				6	* You may obtain a copy of the License at
				7	*
				8	* http://www.apache.org/licenses/LICENSE-2.0
				9	*
				10	* Unless required by applicable law or agreed to in writing, software
				11	* distributed under the License is distributed on an "AS IS" BASIS,
				12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				13	* See the License for the specific language governing permissions and
				14	* limitations under the License.
				15	*/
				16
				17	#ifndef LATINIME_UNIGRAM_DICTIONARY_H
				18	#define LATINIME_UNIGRAM_DICTIONARY_H
				19
				20	namespace latinime {
				21
				22	// 22-bit address = ~4MB dictionary size limit, which on average would be about 200k-300k words
				23	#define ADDRESS_MASK 0x3FFFFF
				24
				25	// The bit that decides if an address follows in the next 22 bits
				26	#define FLAG_ADDRESS_MASK 0x40
				27	// The bit that decides if this is a terminal node for a word. The node could still have children,
				28	// if the word has other endings.
				29	#define FLAG_TERMINAL_MASK 0x80
				30
				31	#define FLAG_BIGRAM_READ 0x80
				32	#define FLAG_BIGRAM_CHILDEXIST 0x40
				33	#define FLAG_BIGRAM_CONTINUED 0x80
				34	#define FLAG_BIGRAM_FREQ 0x7F
				35
				36	class Dictionary;
				37	class UnigramDictionary {
				38	public:
				39	UnigramDictionary(void *dict, int typedLetterMultipler, int fullWordMultiplier, int maxWordLength,
				40	int maxWords, int maxAlternatives, Dictionary *parentDictionary);
				41	int getSuggestions(int codes, int codesSize, unsigned short outWords, int *frequencies,
				42	int *nextLetters, int nextLettersSize);
				43	int getBigrams(unsigned short word, int length, int codes, int codesSize,
				44	unsigned short outWords, int frequencies, int maxWordLength, int maxBigrams,
				45	int maxAlternatives);
				46	bool isValidWord(unsigned short *word, int length);
				47	~UnigramDictionary();
				48
				49	private:
				50	void initSuggestions(int codes, int codesSize, unsigned short outWords, int *frequencies);
				51	int getSuggestionCandidates(int inputLength, int skipPos, int *nextLetters, int nextLettersSize);
				52	void getVersionNumber();
				53	bool checkIfDictVersionIsLatest();
				54	int getAddress(int *pos);
				55	int getBigramAddress(int *pos, bool advance);
				56	int getFreq(int *pos);
				57	int getBigramFreq(int *pos);
				58	void searchForTerminalNode(int address, int frequency);
				59
				60	bool getFirstBitOfByte(int pos) { return (mDict[pos] & 0x80) > 0; }
				61	bool getSecondBitOfByte(int pos) { return (mDict[pos] & 0x40) > 0; }
				62	bool getTerminal(int pos) { return (mDict[pos] & FLAG_TERMINAL_MASK) > 0; }
				63	int getCount(int pos) { return mDict[(pos)++] & 0xFF; }
				64	unsigned short getChar(int *pos);
				65	int wideStrLen(unsigned short *str);
				66
				67	bool sameAsTyped(unsigned short *word, int length);
				68	bool checkFirstCharacter(unsigned short *word);
				69	bool addWord(unsigned short *word, int length, int frequency);
				70	bool addWordBigram(unsigned short *word, int length, int frequency);
				71	unsigned short toLowerCase(unsigned short c);
				72	void getWordsRec(int pos, int depth, int maxDepth, bool completion, int frequency,
				73	int inputIndex, int diffs, int skipPos, int *nextLetters, int nextLettersSize);
				74	void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize);
				75	int isValidWordRec(int pos, unsigned short *word, int offset, int length);
				76
				77	unsigned char *mDict;
				78	Dictionary *mParentDictionary;
				79
				80	const int MAX_WORDS;
				81	const int MAX_WORD_LENGTH;
				82	const int MAX_ALTERNATIVES;
				83
				84	int *mFrequencies;
				85	int *mBigramFreq;
				86	int mMaxBigrams;
				87	unsigned short *mOutputChars;
				88	unsigned short *mBigramChars;
				89	int *mInputCodes;
				90	int mInputLength;
				91	unsigned short mWord[128];
				92	int mMaxEditDistance;
				93
				94	int mFullWordMultiplier;
				95	int mTypedLetterMultiplier;
				96	int mVersion;
				97	int mBigram;
				98	};
				99
				100	// ----------------------------------------------------------------------------
				101
				102	}; // namespace latinime
				103
				104	#endif // LATINIME_UNIGRAM_DICTIONARY_H