Blame - dictionary/src/dictionary.cpp - android_packages_inputmethods_LatinIME

blob: 6aecb6374972ff36ea570006f5abcaa538b5f26a [file] [log] [blame]

The Android Open Source Project	923bf41	2009-03-13 15:11:42 -0700	[diff] [blame^]	1	/*
				2	**
				3	** Copyright 2009, The Android Open Source Project
				4	**
				5	** Licensed under the Apache License, Version 2.0 (the "License");
				6	** you may not use this file except in compliance with the License.
				7	** You may obtain a copy of the License at
				8	**
				9	** http://www.apache.org/licenses/LICENSE-2.0
				10	**
				11	** Unless required by applicable law or agreed to in writing, software
				12	** distributed under the License is distributed on an "AS IS" BASIS,
				13	** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	** See the License for the specific language governing permissions and
				15	** limitations under the License.
				16	*/
				17
				18	#include <stdio.h>
				19	#include <fcntl.h>
				20	#include <sys/mman.h>
				21	#include <string.h>
				22	#include <cutils/log.h>
				23
				24	#include <unicode/uchar.h>
				25
				26	//#define USE_ASSET_MANAGER
				27
				28	#ifdef USE_ASSET_MANAGER
				29	#include <utils/AssetManager.h>
				30	#include <utils/Asset.h>
				31	#endif
				32
				33	#include "dictionary.h"
				34	#include "basechars.h"
				35
				36	#define DEBUG_DICT 0
				37
				38	namespace latinime {
				39
				40	Dictionary::Dictionary(void *dict, int typedLetterMultiplier, int fullWordMultiplier)
				41	{
				42	mDict = (unsigned char*) dict;
				43	mTypedLetterMultiplier = typedLetterMultiplier;
				44	mFullWordMultiplier = fullWordMultiplier;
				45	}
				46
				47	Dictionary::~Dictionary()
				48	{
				49	}
				50
				51	int Dictionary::getSuggestions(int codes, int codesSize, unsigned short outWords, int *frequencies,
				52	int maxWordLength, int maxWords, int maxAlternatives)
				53	{
				54	memset(frequencies, 0, maxWords * sizeof(*frequencies));
				55	memset(outWords, 0, maxWords * maxWordLength * sizeof(*outWords));
				56
				57	mFrequencies = frequencies;
				58	mOutputChars = outWords;
				59	mInputCodes = codes;
				60	mInputLength = codesSize;
				61	mMaxAlternatives = maxAlternatives;
				62	mMaxWordLength = maxWordLength;
				63	mMaxWords = maxWords;
				64	mWords = 0;
				65
				66	getWordsRec(0, 0, mInputLength * 3, false, 1, 0);
				67
				68	if (DEBUG_DICT) LOGI("Returning %d words", mWords);
				69	return mWords;
				70	}
				71
				72	unsigned short
				73	Dictionary::getChar(int *pos)
				74	{
				75	unsigned short ch = (unsigned short) (mDict[(*pos)++] & 0xFF);
				76	// If the code is 255, then actual 16 bit code follows (in big endian)
				77	if (ch == 0xFF) {
				78	ch = ((mDict[pos] & 0xFF) << 8) \| (mDict[pos + 1] & 0xFF);
				79	(*pos) += 2;
				80	}
				81	return ch;
				82	}
				83
				84	int
				85	Dictionary::getAddress(int *pos)
				86	{
				87	int address = 0;
				88	address += (mDict[*pos] & 0x7F) << 16;
				89	address += (mDict[*pos + 1] & 0xFF) << 8;
				90	address += (mDict[*pos + 2] & 0xFF);
				91	*pos += 3;
				92	return address;
				93	}
				94
				95	int
				96	Dictionary::wideStrLen(unsigned short *str)
				97	{
				98	if (!str) return 0;
				99	unsigned short *end = str;
				100	while (*end)
				101	end++;
				102	return end - str;
				103	}
				104
				105	bool
				106	Dictionary::addWord(unsigned short *word, int length, int frequency)
				107	{
				108	word[length] = 0;
				109	if (DEBUG_DICT) LOGI("Found word = %s, freq = %d : \n", word, frequency);
				110
				111	// Find the right insertion point
				112	int insertAt = 0;
				113	while (insertAt < mMaxWords) {
				114	if (frequency > mFrequencies[insertAt]
				115	\|\| (mFrequencies[insertAt] == frequency
				116	&& length < wideStrLen(mOutputChars + insertAt * mMaxWordLength))) {
				117	break;
				118	}
				119	insertAt++;
				120	}
				121	if (insertAt < mMaxWords) {
				122	memmove((char) mFrequencies + (insertAt + 1) sizeof(mFrequencies[0]),
				123	(char) mFrequencies + insertAt sizeof(mFrequencies[0]),
				124	(mMaxWords - insertAt - 1) * sizeof(mFrequencies[0]));
				125	mFrequencies[insertAt] = frequency;
				126	memmove((char) mOutputChars + (insertAt + 1) mMaxWordLength * sizeof(short),
				127	(char) mOutputChars + (insertAt ) mMaxWordLength * sizeof(short),
				128	(mMaxWords - insertAt - 1) * sizeof(short) * mMaxWordLength);
				129	unsigned short dest = mOutputChars + (insertAt ) mMaxWordLength;
				130	while (length--) {
				131	dest++ = word++;
				132	}
				133	*dest = 0; // NULL terminate
				134	// Update the word count
				135	if (insertAt + 1 > mWords) mWords = insertAt + 1;
				136	if (DEBUG_DICT) LOGI("Added word at %d\n", insertAt);
				137	return true;
				138	}
				139	return false;
				140	}
				141
				142	unsigned short
				143	Dictionary::toLowerCase(unsigned short c, const int depth) {
				144	if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) {
				145	c = BASE_CHARS[c];
				146	}
				147	if (depth == 0) {
				148	if (c >='A' && c <= 'Z') {
				149	c \|= 32;
				150	} else if (c > 127) {
				151	c = u_tolower(c);
				152	}
				153	}
				154	return c;
				155	}
				156
				157	bool
				158	Dictionary::sameAsTyped(unsigned short *word, int length)
				159	{
				160	if (length != mInputLength) {
				161	return false;
				162	}
				163	int *inputCodes = mInputCodes;
				164	while (length--) {
				165	if ((unsigned int) inputCodes != (unsigned int) word) {
				166	return false;
				167	}
				168	inputCodes += mMaxAlternatives;
				169	word++;
				170	}
				171	return true;
				172	}
				173
				174	static char QUOTE = '\'';
				175
				176	void
				177	Dictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion, int snr, int inputIndex)
				178	{
				179	// Optimization: Prune out words that are too long compared to how much was typed.
				180	if (depth > maxDepth) {
				181	return;
				182	}
				183	int count = getCount(&pos);
				184	int *currentChars = NULL;
				185	if (mInputLength <= inputIndex) {
				186	completion = true;
				187	} else {
				188	currentChars = mInputCodes + (inputIndex * mMaxAlternatives);
				189	}
				190
				191	for (int i = 0; i < count; i++) {
				192	unsigned short c = getChar(&pos);
				193	unsigned short lowerC = toLowerCase(c, depth);
				194	bool terminal = getTerminal(&pos);
				195	int childrenAddress = getAddress(&pos);
				196	int freq = getFreq(&pos);
				197	// If we are only doing completions, no need to look at the typed characters.
				198	if (completion) {
				199	mWord[depth] = c;
				200	if (terminal) {
				201	addWord(mWord, depth + 1, freq * snr);
				202	}
				203	if (childrenAddress != 0) {
				204	getWordsRec(childrenAddress, depth + 1, maxDepth,
				205	completion, snr, inputIndex);
				206	}
				207	} else if (c == QUOTE && currentChars[0] != QUOTE) {
				208	// Skip the ' and continue deeper
				209	mWord[depth] = QUOTE;
				210	if (childrenAddress != 0) {
				211	getWordsRec(childrenAddress, depth + 1, maxDepth, false, snr, inputIndex);
				212	}
				213	} else {
				214	int j = 0;
				215	while (currentChars[j] > 0) {
				216	int addedWeight = j == 0 ? mTypedLetterMultiplier : 1;
				217	if (currentChars[j] == lowerC \|\| currentChars[j] == c) {
				218	mWord[depth] = c;
				219	if (mInputLength == inputIndex + 1) {
				220	if (terminal) {
				221	if (//INCLUDE_TYPED_WORD_IF_VALID \|\|
				222	!sameAsTyped(mWord, depth + 1)) {
				223	addWord(mWord, depth + 1,
				224	(freq * snr * addedWeight * mFullWordMultiplier));
				225	}
				226	}
				227	if (childrenAddress != 0) {
				228	getWordsRec(childrenAddress, depth + 1,
				229	maxDepth, true, snr * addedWeight, inputIndex + 1);
				230	}
				231	} else if (childrenAddress != 0) {
				232	getWordsRec(childrenAddress, depth + 1, maxDepth,
				233	false, snr * addedWeight, inputIndex + 1);
				234	}
				235	}
				236	j++;
				237	}
				238	}
				239	}
				240	}
				241
				242	bool
				243	Dictionary::isValidWord(unsigned short *word, int length)
				244	{
				245	return isValidWordRec(0, word, 0, length);
				246	}
				247
				248	bool
				249	Dictionary::isValidWordRec(int pos, unsigned short *word, int offset, int length) {
				250	int count = getCount(&pos);
				251	unsigned short currentChar = (unsigned short) word[offset];
				252	for (int j = 0; j < count; j++) {
				253	unsigned short c = getChar(&pos);
				254	int terminal = getTerminal(&pos);
				255	int childPos = getAddress(&pos);
				256	if (c == currentChar) {
				257	if (offset == length - 1) {
				258	if (terminal) {
				259	return true;
				260	}
				261	} else {
				262	if (childPos != 0) {
				263	if (isValidWordRec(childPos, word, offset + 1, length)) {
				264	return true;
				265	}
				266	}
				267	}
				268	}
				269	getFreq(&pos);
				270	// There could be two instances of each alphabet - upper and lower case. So continue
				271	// looking ...
				272	}
				273	return false;
				274	}
				275
				276
				277	} // namespace latinime