Blame - native/src/unigram_dictionary.cpp - android_packages_inputmethods_LatinIME

2010-12-01 21:22:15 +0900

[diff] [blame]

/*

**

**

** Licensed under the Apache License, Version 2.0 (the "License");

6

** you may not use this file except in compliance with the License.

7

** You may obtain a copy of the License at

8

**

9

** http://www.apache.org/licenses/LICENSE-2.0

10

**

11

** Unless required by applicable law or agreed to in writing, software

12

** distributed under the License is distributed on an "AS IS" BASIS,

13

** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14

** See the License for the specific language governing permissions and

15

** limitations under the License.

16

*/

17

satok

2010-12-06 17:38:58 +0900

[diff] [blame]

18

#include <assert.h>

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

19

#include <string.h>

20

satok

e808e43

2010-12-02 14:53:24 +0900

[diff] [blame]

21

#define LOG_TAG "LatinIME: unigram_dictionary.cpp"

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

22

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

23

#include "basechars.h"

24

#include "char_utils.h"

satok

e808e43

2010-12-02 14:53:24 +0900

[diff] [blame]

25

#include "dictionary.h"

26

#include "unigram_dictionary.h"

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

namespace latinime {

Jean Chalard

2011-02-25 17:56:53 +0900

[diff] [blame]

30

const UnigramDictionary::digraph_t UnigramDictionary::GERMAN_UMLAUT_DIGRAPHS[] =

{ { 'a', 'e' },

{ 'o', 'e' },

{ 'u', 'e' } };

Jean Chalard

2011-06-16 20:55:16 +0900

[diff] [blame]

35

// TODO: check the header

36

UnigramDictionary::UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultiplier,

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

37

int fullWordMultiplier, int maxWordLength, int maxWords, int maxProximityChars,

satok

18c28f4

2010-12-02 18:11:54 +0900

[diff] [blame]

38

const bool isLatestDictVersion)

Jean Chalard

2011-06-16 20:55:16 +0900

[diff] [blame]

39

: DICT_ROOT(streamStart),

40

MAX_WORD_LENGTH(maxWordLength), MAX_WORDS(maxWords),

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

41

MAX_PROXIMITY_CHARS(maxProximityChars), IS_LATEST_DICT_VERSION(isLatestDictVersion),

42

TYPED_LETTER_MULTIPLIER(typedLetterMultiplier), FULL_WORD_MULTIPLIER(fullWordMultiplier),

Jean Chalard

2011-02-25 17:56:53 +0900

[diff] [blame]

43

ROOT_POS(isLatestDictVersion ? DICTIONARY_HEADER_SIZE : 0),

Jean Chalard

2011-03-04 12:17:48 +0900

[diff] [blame]

44

BYTES_IN_ONE_CHAR(MAX_PROXIMITY_CHARS * sizeof(*mInputCodes)),

45

MAX_UMLAUT_SEARCH_DEPTH(DEFAULT_MAX_UMLAUT_SEARCH_DEPTH) {

Ken Wakasa

2011-03-19 09:16:42 +0900

[diff] [blame]

46

if (DEBUG_DICT) {

47

LOGI("UnigramDictionary - constructor");

48

}

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

49

}

50

satok

18c28f4

2010-12-02 18:11:54 +0900

[diff] [blame]

51

UnigramDictionary::~UnigramDictionary() {}

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

52

Jean Chalard

2011-02-25 17:56:53 +0900

[diff] [blame]

53

static inline unsigned int getCodesBufferSize(const int* codes, const int codesSize,

54

const int MAX_PROXIMITY_CHARS) {

55

return sizeof(*codes) * MAX_PROXIMITY_CHARS * codesSize;

56

}

57

58

bool UnigramDictionary::isDigraph(const int* codes, const int i, const int codesSize) const {

59

60

// There can't be a digraph if we don't have at least 2 characters to examine

61

if (i + 2 > codesSize) return false;

62

63

// Search for the first char of some digraph

64

int lastDigraphIndex = -1;

65

const int thisChar = codes[i * MAX_PROXIMITY_CHARS];

66

for (lastDigraphIndex = sizeof(GERMAN_UMLAUT_DIGRAPHS) / sizeof(GERMAN_UMLAUT_DIGRAPHS[0]) - 1;

67

lastDigraphIndex >= 0; --lastDigraphIndex) {

68

if (thisChar == GERMAN_UMLAUT_DIGRAPHS[lastDigraphIndex].first) break;

69

}

70

// No match: return early

71

if (lastDigraphIndex < 0) return false;

72

73

// It's an interesting digraph if the second char matches too.

74

return GERMAN_UMLAUT_DIGRAPHS[lastDigraphIndex].second == codes[(i + 1) * MAX_PROXIMITY_CHARS];

75

}

76

77

// Mostly the same arguments as the non-recursive version, except:

78

// codes is the original value. It points to the start of the work buffer, and gets passed as is.

79

// codesSize is the size of the user input (thus, it is the size of codesSrc).

80

// codesDest is the current point in the work buffer.

81

// codesSrc is the current point in the user-input, original, content-unmodified buffer.

82

// codesRemain is the remaining size in codesSrc.

83

void UnigramDictionary::getWordWithDigraphSuggestionsRec(const ProximityInfo *proximityInfo,

84

const int *xcoordinates, const int* ycoordinates, const int *codesBuffer,

85

const int codesBufferSize, const int flags, const int* codesSrc, const int codesRemain,

satok

3c4bb77

2011-03-04 22:50:19 -0800

[diff] [blame]

86

const int currentDepth, int* codesDest, unsigned short* outWords, int* frequencies) {

Jean Chalard

2011-02-25 17:56:53 +0900

[diff] [blame]

87

Jean Chalard

2011-03-04 12:17:48 +0900

[diff] [blame]

88

if (currentDepth < MAX_UMLAUT_SEARCH_DEPTH) {

89

for (int i = 0; i < codesRemain; ++i) {

90

if (isDigraph(codesSrc, i, codesRemain)) {

91

// Found a digraph. We will try both spellings. eg. the word is "pruefen"

Jean Chalard

2011-02-25 17:56:53 +0900

[diff] [blame]

92

Jean Chalard

2011-03-04 12:17:48 +0900

[diff] [blame]

93

// Copy the word up to the first char of the digraph, then continue processing

94

// on the remaining part of the word, skipping the second char of the digraph.

95

// In our example, copy "pru" and continue running on "fen"

96

// Make i the index of the second char of the digraph for simplicity. Forgetting

97

// to do that results in an infinite recursion so take care!

98

++i;

99

memcpy(codesDest, codesSrc, i * BYTES_IN_ONE_CHAR);

100

getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates,

101

codesBuffer, codesBufferSize, flags,

102

codesSrc + (i + 1) * MAX_PROXIMITY_CHARS, codesRemain - i - 1,

103

currentDepth + 1, codesDest + i * MAX_PROXIMITY_CHARS, outWords,

104

frequencies);

Jean Chalard

2011-02-25 17:56:53 +0900

[diff] [blame]

105

Jean Chalard

2011-03-04 12:17:48 +0900

[diff] [blame]

106

// Copy the second char of the digraph in place, then continue processing on

107

// the remaining part of the word.

108

// In our example, after "pru" in the buffer copy the "e", and continue on "fen"

109

memcpy(codesDest + i * MAX_PROXIMITY_CHARS, codesSrc + i * MAX_PROXIMITY_CHARS,

110

BYTES_IN_ONE_CHAR);

111

getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates,

112

codesBuffer, codesBufferSize, flags, codesSrc + i * MAX_PROXIMITY_CHARS,

113

codesRemain - i, currentDepth + 1, codesDest + i * MAX_PROXIMITY_CHARS,

114

outWords, frequencies);

115

return;

116

}

Jean Chalard

2011-02-25 17:56:53 +0900

[diff] [blame]

}

}

// If we come here, we hit the end of the word: let's check it against the dictionary.

121

// In our example, we'll come here once for "prufen" and then once for "pruefen".

122

// If the word contains several digraphs, we'll come it for the product of them.

123

// eg. if the word is "ueberpruefen" we'll test, in order, against

124

// "uberprufen", "uberpruefen", "ueberprufen", "ueberpruefen".

125

const unsigned int remainingBytes = BYTES_IN_ONE_CHAR * codesRemain;

126

if (0 != remainingBytes)

127

memcpy(codesDest, codesSrc, remainingBytes);

128

129

getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codesBuffer,

130

(codesDest - codesBuffer) / MAX_PROXIMITY_CHARS + codesRemain, outWords, frequencies);

131

}

132

133

int UnigramDictionary::getSuggestions(const ProximityInfo *proximityInfo, const int *xcoordinates,

134

const int *ycoordinates, const int *codes, const int codesSize, const int flags,

135

unsigned short *outWords, int *frequencies) {

136

137

if (REQUIRES_GERMAN_UMLAUT_PROCESSING & flags)

138

{ // Incrementally tune the word and try all possibilities

139

int codesBuffer[getCodesBufferSize(codes, codesSize, MAX_PROXIMITY_CHARS)];

140

getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer,

Jean Chalard

2011-03-04 12:17:48 +0900

[diff] [blame]

141

codesSize, flags, codes, codesSize, 0, codesBuffer, outWords, frequencies);

Jean Chalard

2011-02-25 17:56:53 +0900

[diff] [blame]

142

} else { // Normal processing

143

getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, codesSize,

144

outWords, frequencies);

145

}

146

satok

2011-03-04 06:06:45 -0800

[diff] [blame]

147

PROF_START(20);

Jean Chalard

2011-02-25 17:56:53 +0900

[diff] [blame]

148

// Get the word count

149

int suggestedWordsCount = 0;

150

while (suggestedWordsCount < MAX_WORDS && mFrequencies[suggestedWordsCount] > 0) {

151

suggestedWordsCount++;

}

if (DEBUG_DICT) {

LOGI("Returning %d words", suggestedWordsCount);

Jean Chalard

2011-06-30 17:02:23 +0900

[diff] [blame]

156

/// Print the returned words

157

for (int j = 0; j < suggestedWordsCount; ++j) {

Doug Kwan

2011-07-07 22:53:50 -0700

[diff] [blame^]

158

#ifdef FLAG_DBG

Jean Chalard

2011-06-30 17:02:23 +0900

[diff] [blame]

159

short unsigned int* w = mOutputChars + j * MAX_WORD_LENGTH;

160

char s[MAX_WORD_LENGTH];

161

for (int i = 0; i <= MAX_WORD_LENGTH; i++) s[i] = w[i];

Doug Kwan

2011-07-07 22:53:50 -0700

[diff] [blame^]

162

#endif

Jean Chalard

2011-06-30 17:02:23 +0900

[diff] [blame]

163

LOGI("%s %i", s, mFrequencies[j]);

164

}

Jean Chalard

2011-02-25 17:56:53 +0900

[diff] [blame]

165

LOGI("Next letters: ");

166

for (int k = 0; k < NEXT_LETTERS_SIZE; k++) {

167

if (mNextLettersFrequency[k] > 0) {

168

LOGI("%c = %d,", k, mNextLettersFrequency[k]);

169

}

170

}

171

}

satok

2011-03-04 06:06:45 -0800

[diff] [blame]

172

PROF_END(20);

Jean Chalard

2011-02-25 17:56:53 +0900

[diff] [blame]

173

PROF_CLOSE;

174

return suggestedWordsCount;

175

}

176

177

void UnigramDictionary::getWordSuggestions(const ProximityInfo *proximityInfo,

178

const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize,

179

unsigned short *outWords, int *frequencies) {

180

satok

2011-01-05 14:13:07 +0900

[diff] [blame]

181

PROF_OPEN;

182

PROF_START(0);

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

183

initSuggestions(codes, codesSize, outWords, frequencies);

satok

2010-12-13 14:42:35 +0900

[diff] [blame]

184

if (DEBUG_DICT) assert(codesSize == mInputLength);

185

satok

2010-12-09 22:08:33 +0900

[diff] [blame]

186

const int MAX_DEPTH = min(mInputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);

satok

2011-01-05 14:13:07 +0900

[diff] [blame]

187

PROF_END(0);

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

188

satok

2011-01-05 14:13:07 +0900

[diff] [blame]

189

PROF_START(1);

Tadashi G. Takaoka

887f11e

2011-02-10 20:53:58 +0900

[diff] [blame]

190

getSuggestionCandidates(-1, -1, -1, mNextLettersFrequency, NEXT_LETTERS_SIZE, MAX_DEPTH);

satok

2011-01-05 14:13:07 +0900

[diff] [blame]

191

PROF_END(1);

192

193

PROF_START(2);

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

194

// Suggestion with missing character

195

if (SUGGEST_WORDS_WITH_MISSING_CHARACTER) {

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

196

for (int i = 0; i < codesSize; ++i) {

Ken Wakasa

2011-03-19 09:16:42 +0900

[diff] [blame]

197

if (DEBUG_DICT) {

198

LOGI("--- Suggest missing characters %d", i);

199

}

satok

2010-12-13 14:42:35 +0900

[diff] [blame]

200

getSuggestionCandidates(i, -1, -1, NULL, 0, MAX_DEPTH);

satok

2010-12-08 16:04:16 +0900

[diff] [blame]

201

}

202

}

satok

2011-01-05 14:13:07 +0900

[diff] [blame]

203

PROF_END(2);

satok

2010-12-08 16:04:16 +0900

[diff] [blame]

204

satok

2011-01-05 14:13:07 +0900

[diff] [blame]

205

PROF_START(3);

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

206

// Suggestion with excessive character

satok

2010-12-13 14:42:35 +0900

[diff] [blame]

207

if (SUGGEST_WORDS_WITH_EXCESSIVE_CHARACTER

208

&& mInputLength >= MIN_USER_TYPED_LENGTH_FOR_EXCESSIVE_CHARACTER_SUGGESTION) {

satok

2010-12-08 16:04:16 +0900

[diff] [blame]

209

for (int i = 0; i < codesSize; ++i) {

Ken Wakasa

2011-03-19 09:16:42 +0900

[diff] [blame]

210

if (DEBUG_DICT) {

211

LOGI("--- Suggest excessive characters %d", i);

212

}

satok

2010-12-13 14:42:35 +0900

[diff] [blame]

213

getSuggestionCandidates(-1, i, -1, NULL, 0, MAX_DEPTH);

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

214

}

215

}

satok

2011-01-05 14:13:07 +0900

[diff] [blame]

216

PROF_END(3);

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

217

satok

2011-01-05 14:13:07 +0900

[diff] [blame]

218

PROF_START(4);

satok

2010-12-09 22:08:33 +0900

[diff] [blame]

219

// Suggestion with transposed characters

220

// Only suggest words that length is mInputLength

221

if (SUGGEST_WORDS_WITH_TRANSPOSED_CHARACTERS) {

222

for (int i = 0; i < codesSize; ++i) {

Ken Wakasa

2011-03-19 09:16:42 +0900

[diff] [blame]

223

if (DEBUG_DICT) {

224

LOGI("--- Suggest transposed characters %d", i);

225

}

satok

2010-12-13 14:42:35 +0900

[diff] [blame]

226

getSuggestionCandidates(-1, -1, i, NULL, 0, mInputLength - 1);

satok

2010-12-09 22:08:33 +0900

[diff] [blame]

227

}

228

}

satok

2011-01-05 14:13:07 +0900

[diff] [blame]

229

PROF_END(4);

satok

2010-12-09 22:08:33 +0900

[diff] [blame]

230

satok

2011-01-05 14:13:07 +0900

[diff] [blame]

231

PROF_START(5);

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

232

// Suggestions with missing space

satok

2010-12-13 14:42:35 +0900

[diff] [blame]

233

if (SUGGEST_WORDS_WITH_MISSING_SPACE_CHARACTER

234

&& mInputLength >= MIN_USER_TYPED_LENGTH_FOR_MISSING_SPACE_SUGGESTION) {

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

235

for (int i = 1; i < codesSize; ++i) {

Ken Wakasa

2011-03-19 09:16:42 +0900

[diff] [blame]

236

if (DEBUG_DICT) {

237

LOGI("--- Suggest missing space characters %d", i);

238

}

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

239

getMissingSpaceWords(mInputLength, i);

240

}

241

}

satok

2011-01-05 14:13:07 +0900

[diff] [blame]

242

PROF_END(5);

satok

2011-03-04 06:06:45 -0800

[diff] [blame]

243

244

PROF_START(6);

Jean Chalard

e93b1f22

2011-06-01 17:12:25 +0900

[diff] [blame]

245

if (SUGGEST_WORDS_WITH_SPACE_PROXIMITY && proximityInfo) {

satok

2011-03-04 06:06:45 -0800

[diff] [blame]

246

// The first and last "mistyped spaces" are taken care of by excessive character handling

247

for (int i = 1; i < codesSize - 1; ++i) {

Ken Wakasa

2011-03-19 09:16:42 +0900

[diff] [blame]

248

if (DEBUG_DICT) {

249

LOGI("--- Suggest words with proximity space %d", i);

250

}

satok

2011-03-04 06:06:45 -0800

[diff] [blame]

251

const int x = xcoordinates[i];

252

const int y = ycoordinates[i];

Ken Wakasa

2011-03-19 09:16:42 +0900

[diff] [blame]

253

if (DEBUG_PROXIMITY_INFO) {

satok

2011-03-04 06:06:45 -0800

[diff] [blame]

254

LOGI("Input[%d] x = %d, y = %d, has space proximity = %d",

255

i, x, y, proximityInfo->hasSpaceProximity(x, y));

Ken Wakasa

2011-03-19 09:16:42 +0900

[diff] [blame]

256

}

satok

2011-03-04 06:06:45 -0800

[diff] [blame]

257

if (proximityInfo->hasSpaceProximity(x, y)) {

258

getMistypedSpaceWords(mInputLength, i);

259

}

satok

2011-03-04 06:06:45 -0800

[diff] [blame]

260

}

261

}

262

PROF_END(6);

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

263

}

264

Jean Chalard

2011-02-25 17:56:53 +0900

[diff] [blame]

265

void UnigramDictionary::initSuggestions(const int *codes, const int codesSize,

266

unsigned short *outWords, int *frequencies) {

Ken Wakasa

2011-03-19 09:16:42 +0900

[diff] [blame]

267

if (DEBUG_DICT) {

268

LOGI("initSuggest");

269

}

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

270

mFrequencies = frequencies;

271

mOutputChars = outWords;

272

mInputCodes = codes;

273

mInputLength = codesSize;

274

mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2;

275

}

276

Jean Chalard

2011-06-16 22:33:41 +0900

[diff] [blame]

277

static inline void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize) {

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

278

if (c < nextLettersSize) {

nextLetters[c]++;

}

}

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

283

// TODO: We need to optimize addWord by using STL or something

Jean Chalard

2011-06-17 15:36:26 +0900

[diff] [blame]

284

// TODO: This needs to take an const unsigned short* and not tinker with its contents

satok

2010-12-03 16:39:16 +0900

[diff] [blame]

285

bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency) {

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

286

word[length] = 0;

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

287

if (DEBUG_DICT && DEBUG_SHOW_FOUND_WORD) {

Doug Kwan

2011-07-07 22:53:50 -0700

[diff] [blame^]

288

#ifdef FLAG_DBG

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

289

char s[length + 1];

290

for (int i = 0; i <= length; i++) s[i] = word[i];

Doug Kwan

2011-07-07 22:53:50 -0700

[diff] [blame^]

291

#endif

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

292

LOGI("Found word = %s, freq = %d", s, frequency);

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

293

}

satok

f5cded1

2010-12-06 21:28:24 +0900

[diff] [blame]

294

if (length > MAX_WORD_LENGTH) {

Ken Wakasa

2011-03-19 09:16:42 +0900

[diff] [blame]

295

if (DEBUG_DICT) {

296

LOGI("Exceeded max word length.");

297

}

satok

f5cded1

2010-12-06 21:28:24 +0900

[diff] [blame]

298

return false;

299

}

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

300

301

// Find the right insertion point

302

int insertAt = 0;

303

while (insertAt < MAX_WORDS) {

Jean Chalard

2011-06-16 22:51:11 +0900

[diff] [blame]

304

// TODO: How should we sort words with the same frequency?

305

if (frequency > mFrequencies[insertAt]) {

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

break;

}

insertAt++;

}

if (insertAt < MAX_WORDS) {

satok

2010-12-08 16:04:16 +0900

[diff] [blame]

311

if (DEBUG_DICT) {

Doug Kwan

2011-07-07 22:53:50 -0700

[diff] [blame^]

312

#ifdef FLAG_DBG

satok

2010-12-08 16:04:16 +0900

[diff] [blame]

313

char s[length + 1];

314

for (int i = 0; i <= length; i++) s[i] = word[i];

Doug Kwan

2011-07-07 22:53:50 -0700

[diff] [blame^]

315

#endif

satok

2011-04-26 14:50:54 +0900

[diff] [blame]

316

LOGI("Added word = %s, freq = %d, %d", s, frequency, S_INT_MAX);

satok

2010-12-08 16:04:16 +0900

[diff] [blame]

317

}

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

318

memmove((char*) mFrequencies + (insertAt + 1) * sizeof(mFrequencies[0]),

319

(char*) mFrequencies + insertAt * sizeof(mFrequencies[0]),

320

(MAX_WORDS - insertAt - 1) * sizeof(mFrequencies[0]));

321

mFrequencies[insertAt] = frequency;

322

memmove((char*) mOutputChars + (insertAt + 1) * MAX_WORD_LENGTH * sizeof(short),

satok

715514d

2010-12-02 20:19:59 +0900

[diff] [blame]

323

(char*) mOutputChars + insertAt * MAX_WORD_LENGTH * sizeof(short),

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

324

(MAX_WORDS - insertAt - 1) * sizeof(short) * MAX_WORD_LENGTH);

satok

715514d

2010-12-02 20:19:59 +0900

[diff] [blame]

325

unsigned short *dest = mOutputChars + insertAt * MAX_WORD_LENGTH;

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

while (length--) {

*dest++ = *word++;

}

*dest = 0; // NULL terminate

Ken Wakasa

2011-03-19 09:16:42 +0900

[diff] [blame]

330

if (DEBUG_DICT) {

331

LOGI("Added word at %d", insertAt);

332

}

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

return true;

}

return false;

}

Jean Chalard

2011-06-16 22:33:41 +0900

[diff] [blame]

338

static inline unsigned short toBaseLowerCase(unsigned short c) {

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

339

if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) {

340

c = BASE_CHARS[c];

341

}

342

if (c >='A' && c <= 'Z') {

343

c |= 32;

344

} else if (c > 127) {

345

c = latin_tolower(c);

}

return c;

}

Jean Chalard

2011-06-17 15:36:26 +0900

[diff] [blame]

350

bool UnigramDictionary::sameAsTyped(const unsigned short *word, int length) const {

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

351

if (length != mInputLength) {

352

return false;

353

}

Jean Chalard

2011-02-25 17:56:53 +0900

[diff] [blame]

354

const int *inputCodes = mInputCodes;

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

355

while (length--) {

356

if ((unsigned int) *inputCodes != (unsigned int) *word) {

357

return false;

358

}

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

359

inputCodes += MAX_PROXIMITY_CHARS;

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

word++;

}

return true;

}

satok

2010-12-02 20:19:59 +0900

[diff] [blame]

365

static const char QUOTE = '\'';

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

366

static const char SPACE = ' ';

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

367

satok

2010-12-13 14:42:35 +0900

[diff] [blame]

368

void UnigramDictionary::getSuggestionCandidates(const int skipPos,

satok

2010-12-09 22:08:33 +0900

[diff] [blame]

369

const int excessivePos, const int transposedPos, int *nextLetters,

370

const int nextLettersSize, const int maxDepth) {

satok

2010-12-13 14:42:35 +0900

[diff] [blame]

371

if (DEBUG_DICT) {

372

LOGI("getSuggestionCandidates %d", maxDepth);

373

assert(transposedPos + 1 < mInputLength);

374

assert(excessivePos < mInputLength);

375

assert(missingPos < mInputLength);

376

}

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

377

int rootPosition = ROOT_POS;

Jean Chalard

2011-06-30 17:02:23 +0900

[diff] [blame]

378

// Get the number of children of root, then increment the position

Jean Chalard

2011-06-16 20:55:16 +0900

[diff] [blame]

379

int childCount = Dictionary::getCount(DICT_ROOT, &rootPosition);

satok

2010-12-07 13:08:39 +0900

[diff] [blame]

380

int depth = 0;

381

382

mStackChildCount[0] = childCount;

383

mStackTraverseAll[0] = (mInputLength <= 0);

384

mStackNodeFreq[0] = 1;

385

mStackInputIndex[0] = 0;

386

mStackDiffs[0] = 0;

387

mStackSiblingPos[0] = rootPosition;

Jean Chalard

2011-06-16 22:51:11 +0900

[diff] [blame]

388

mStackOutputIndex[0] = 0;

satok

2010-12-07 13:08:39 +0900

[diff] [blame]

389

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

390

// Depth first search

satok

2010-12-07 13:08:39 +0900

[diff] [blame]

391

while (depth >= 0) {

392

if (mStackChildCount[depth] > 0) {

393

--mStackChildCount[depth];

394

bool traverseAllNodes = mStackTraverseAll[depth];

Jean Chalard

2011-02-22 15:12:46 +0900

[diff] [blame]

395

int matchWeight = mStackNodeFreq[depth];

satok

2010-12-07 13:08:39 +0900

[diff] [blame]

396

int inputIndex = mStackInputIndex[depth];

397

int diffs = mStackDiffs[depth];

398

int siblingPos = mStackSiblingPos[depth];

Jean Chalard

2011-06-16 22:51:11 +0900

[diff] [blame]

399

int outputIndex = mStackOutputIndex[depth];

satok

2010-12-07 13:08:39 +0900

[diff] [blame]

400

int firstChildPos;

satok

2010-12-09 22:08:33 +0900

[diff] [blame]

401

// depth will never be greater than maxDepth because in that case,

satok

2010-12-07 13:08:39 +0900

[diff] [blame]

402

// needsToTraverseChildrenNodes should be false

Jean Chalard

2011-06-16 22:51:11 +0900

[diff] [blame]

403

const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, outputIndex,

Jean Chalard

2011-02-22 15:12:46 +0900

[diff] [blame]

404

maxDepth, traverseAllNodes, matchWeight, inputIndex, diffs, skipPos,

405

excessivePos, transposedPos, nextLetters, nextLettersSize, &childCount,

406

&firstChildPos, &traverseAllNodes, &matchWeight, &inputIndex, &diffs,

Jean Chalard

2011-06-16 22:51:11 +0900

[diff] [blame]

407

&siblingPos, &outputIndex);

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

408

// Update next sibling pos

satok

2010-12-07 13:08:39 +0900

[diff] [blame]

409

mStackSiblingPos[depth] = siblingPos;

410

if (needsToTraverseChildrenNodes) {

411

// Goes to child node

412

++depth;

413

mStackChildCount[depth] = childCount;

414

mStackTraverseAll[depth] = traverseAllNodes;

Jean Chalard

2011-02-22 15:12:46 +0900

[diff] [blame]

415

mStackNodeFreq[depth] = matchWeight;

satok

2010-12-07 13:08:39 +0900

[diff] [blame]

416

mStackInputIndex[depth] = inputIndex;

417

mStackDiffs[depth] = diffs;

418

mStackSiblingPos[depth] = firstChildPos;

Jean Chalard

2011-06-16 22:51:11 +0900

[diff] [blame]

419

mStackOutputIndex[depth] = outputIndex;

satok

2010-12-07 13:08:39 +0900

[diff] [blame]

420

}

421

} else {

satok

2010-12-08 16:04:16 +0900

[diff] [blame]

422

// Goes to parent sibling node

satok

2010-12-07 13:08:39 +0900

[diff] [blame]

--depth;

}

}

}

satok

2011-04-26 14:50:54 +0900

[diff] [blame]

428

static const int TWO_31ST_DIV_255 = S_INT_MAX / 255;

429

static inline int capped255MultForFullMatchAccentsOrCapitalizationDifference(const int num) {

430

return (num < TWO_31ST_DIV_255 ? 255 * num : S_INT_MAX);

431

}

432

433

static const int TWO_31ST_DIV_2 = S_INT_MAX / 2;

434

inline static void multiplyIntCapped(const int multiplier, int *base) {

435

const int temp = *base;

436

if (temp != S_INT_MAX) {

437

// Branch if multiplier == 2 for the optimization

438

if (multiplier == 2) {

439

*base = TWO_31ST_DIV_2 >= temp ? temp << 1 : S_INT_MAX;

440

} else {

441

const int tempRetval = temp * multiplier;

442

*base = tempRetval >= temp ? tempRetval : S_INT_MAX;

}

}

}

inline static int powerIntCapped(const int base, const int n) {

satok

0b6b0a5

2011-04-27 16:29:27 +0900

[diff] [blame]

448

if (base == 2) {

satok

2011-04-26 14:50:54 +0900

[diff] [blame]

449

return n < 31 ? 1 << n : S_INT_MAX;

satok

2011-01-05 16:37:53 +0900

[diff] [blame]

450

} else {

satok

2011-04-26 14:50:54 +0900

[diff] [blame]

451

int ret = base;

452

for (int i = 1; i < n; ++i) multiplyIntCapped(base, &ret);

return ret;

}

}

inline static void multiplyRate(const int rate, int *freq) {

458

if (*freq != S_INT_MAX) {

459

if (*freq > 1000000) {

460

*freq /= 100;

461

multiplyIntCapped(rate, freq);

462

} else {

463

multiplyIntCapped(rate, freq);

464

*freq /= 100;

465

}

satok

2011-01-05 16:37:53 +0900

[diff] [blame]

}

}

satok

2011-04-19 13:58:42 +0900

[diff] [blame]

469

inline static int calcFreqForSplitTwoWords(

satok

2011-05-18 15:31:04 +0900

[diff] [blame]

470

const int typedLetterMultiplier, const int firstWordLength, const int secondWordLength,

471

const int firstFreq, const int secondFreq, const bool isSpaceProximity) {

satok

4c981d3

2011-04-19 13:58:42 +0900

[diff] [blame]

472

if (firstWordLength == 0 || secondWordLength == 0) {

473

return 0;

474

}

475

const int firstDemotionRate = 100 - 100 / (firstWordLength + 1);

476

int tempFirstFreq = firstFreq;

477

multiplyRate(firstDemotionRate, &tempFirstFreq);

478

479

const int secondDemotionRate = 100 - 100 / (secondWordLength + 1);

480

int tempSecondFreq = secondFreq;

481

multiplyRate(secondDemotionRate, &tempSecondFreq);

482

483

const int totalLength = firstWordLength + secondWordLength;

484

485

// Promote pairFreq with multiplying by 2, because the word length is the same as the typed

486

// length.

487

int totalFreq = tempFirstFreq + tempSecondFreq;

488

489

// This is a workaround to try offsetting the not-enough-demotion which will be done in

490

// calcNormalizedScore in Utils.java.

491

// In calcNormalizedScore the score will be demoted by (1 - 1 / length)

492

// but we demoted only (1 - 1 / (length + 1)) so we will additionally adjust freq by

493

// (1 - 1 / length) / (1 - 1 / (length + 1)) = (1 - 1 / (length * length))

494

const int normalizedScoreNotEnoughDemotionAdjustment = 100 - 100 / (totalLength * totalLength);

495

multiplyRate(normalizedScoreNotEnoughDemotionAdjustment, &totalFreq);

496

497

// At this moment, totalFreq is calculated by the following formula:

498

// (firstFreq * (1 - 1 / (firstWordLength + 1)) + secondFreq * (1 - 1 / (secondWordLength + 1)))

499

// * (1 - 1 / totalLength) / (1 - 1 / (totalLength + 1))

500

satok

2011-04-26 14:50:54 +0900

[diff] [blame]

501

multiplyIntCapped(powerIntCapped(typedLetterMultiplier, totalLength), &totalFreq);

satok

4c981d3

2011-04-19 13:58:42 +0900

[diff] [blame]

502

503

// This is another workaround to offset the demotion which will be done in

504

// calcNormalizedScore in Utils.java.

505

// In calcNormalizedScore the score will be demoted by (1 - 1 / length) so we have to promote

506

// the same amount because we already have adjusted the synthetic freq of this "missing or

507

// mistyped space" suggestion candidate above in this method.

508

const int normalizedScoreDemotionRateOffset = (100 + 100 / totalLength);

509

multiplyRate(normalizedScoreDemotionRateOffset, &totalFreq);

510

satok

2011-05-18 15:31:04 +0900

[diff] [blame]

511

if (isSpaceProximity) {

512

// A word pair with one space proximity correction

513

if (DEBUG_DICT) {

514

LOGI("Found a word pair with space proximity correction.");

515

}

516

multiplyIntCapped(typedLetterMultiplier, &totalFreq);

517

multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &totalFreq);

518

}

519

satok

4c981d3

2011-04-19 13:58:42 +0900

[diff] [blame]

520

multiplyRate(WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE, &totalFreq);

return totalFreq;

}

satok

2011-03-04 06:06:45 -0800

[diff] [blame]

524

bool UnigramDictionary::getMissingSpaceWords(const int inputLength, const int missingSpacePos) {

525

return getSplitTwoWordsSuggestion(

satok

2011-05-18 15:31:04 +0900

[diff] [blame]

526

inputLength, 0, missingSpacePos, missingSpacePos, inputLength - missingSpacePos, false);

satok

2011-03-04 06:06:45 -0800

[diff] [blame]

527

}

528

529

bool UnigramDictionary::getMistypedSpaceWords(const int inputLength, const int spaceProximityPos) {

530

return getSplitTwoWordsSuggestion(

531

inputLength, 0, spaceProximityPos, spaceProximityPos + 1,

satok

2011-05-18 15:31:04 +0900

[diff] [blame]

532

inputLength - spaceProximityPos - 1, true);

satok

2011-03-04 06:06:45 -0800

[diff] [blame]

533

}

534

satok

58c49b9

2011-01-27 03:23:39 +0900

[diff] [blame]

535

inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int depth,

Jean Chalard

2011-02-22 15:12:46 +0900

[diff] [blame]

536

const int matchWeight, const int skipPos, const int excessivePos, const int transposedPos,

Jean Chalard

2011-03-03 10:22:10 +0900

[diff] [blame]

537

const int freq, const bool sameLength) const {

satok

2010-12-09 22:08:33 +0900

[diff] [blame]

538

// TODO: Demote by edit distance

Jean Chalard

2011-02-22 15:12:46 +0900

[diff] [blame]

539

int finalFreq = freq * matchWeight;

Jean Chalard

2011-03-03 10:22:10 +0900

[diff] [blame]

540

if (skipPos >= 0) {

satok

dc5301e

2011-04-11 16:14:45 +0900

[diff] [blame]

541

if (mInputLength >= 2) {

542

const int demotionRate = WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE

543

* (10 * mInputLength - WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X)

544

/ (10 * mInputLength

545

- WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X + 10);

satok

9674f65

2011-04-20 17:15:27 +0900

[diff] [blame]

546

if (DEBUG_DICT_FULL) {

satok

72bc17e

2011-04-13 17:23:27 +0900

[diff] [blame]

547

LOGI("Demotion rate for missing character is %d.", demotionRate);

548

}

satok

dc5301e

2011-04-11 16:14:45 +0900

[diff] [blame]

549

multiplyRate(demotionRate, &finalFreq);

Jean Chalard

2011-03-03 10:22:10 +0900

[diff] [blame]

} else {

finalFreq = 0;

}

}

satok

2011-01-05 16:37:53 +0900

[diff] [blame]

554

if (transposedPos >= 0) multiplyRate(

555

WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE, &finalFreq);

satok

2010-12-13 14:42:35 +0900

[diff] [blame]

556

if (excessivePos >= 0) {

satok

2011-01-05 16:37:53 +0900

[diff] [blame]

557

multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE, &finalFreq);

satok

2010-12-13 14:42:35 +0900

[diff] [blame]

558

if (!existsAdjacentProximityChars(inputIndex, mInputLength)) {

satok

2011-01-05 16:37:53 +0900

[diff] [blame]

559

multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE, &finalFreq);

satok

2010-12-13 14:42:35 +0900

[diff] [blame]

560

}

561

}

satok

58c49b9

2011-01-27 03:23:39 +0900

[diff] [blame]

562

int lengthFreq = TYPED_LETTER_MULTIPLIER;

satok

2011-04-26 14:50:54 +0900

[diff] [blame]

563

multiplyIntCapped(powerIntCapped(TYPED_LETTER_MULTIPLIER, depth), &lengthFreq);

Jean Chalard

2011-02-22 15:12:46 +0900

[diff] [blame]

564

if (lengthFreq == matchWeight) {

satok

72bc17e

2011-04-13 17:23:27 +0900

[diff] [blame]

565

// Full exact match

Jean Chalard

2011-01-27 14:20:22 +0900

[diff] [blame]

566

if (depth > 1) {

Ken Wakasa

2011-03-19 09:16:42 +0900

[diff] [blame]

567

if (DEBUG_DICT) {

568

LOGI("Found full matched word.");

569

}

Jean Chalard

2011-01-27 14:20:22 +0900

[diff] [blame]

570

multiplyRate(FULL_MATCHED_WORDS_PROMOTION_RATE, &finalFreq);

571

}

572

if (sameLength && transposedPos < 0 && skipPos < 0 && excessivePos < 0) {

Jean Chalard

2011-02-18 17:50:58 +0900

[diff] [blame]

573

finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq);

Jean Chalard

2011-01-27 14:20:22 +0900

[diff] [blame]

574

}

satok

9674f65

2011-04-20 17:15:27 +0900

[diff] [blame]

575

} else if (sameLength && transposedPos < 0 && skipPos < 0 && excessivePos < 0 && depth > 0) {

satok

9d2a302

2011-04-14 19:13:34 +0900

[diff] [blame]

576

// A word with proximity corrections

satok

72bc17e

2011-04-13 17:23:27 +0900

[diff] [blame]

577

if (DEBUG_DICT) {

578

LOGI("Found one proximity correction.");

579

}

satok

2011-04-26 14:50:54 +0900

[diff] [blame]

580

multiplyIntCapped(TYPED_LETTER_MULTIPLIER, &finalFreq);

satok

9d2a302

2011-04-14 19:13:34 +0900

[diff] [blame]

581

multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq);

satok

58c49b9

2011-01-27 03:23:39 +0900

[diff] [blame]

582

}

satok

9674f65

2011-04-20 17:15:27 +0900

[diff] [blame]

583

if (DEBUG_DICT) {

584

LOGI("calc: %d, %d", depth, sameLength);

585

}

satok

2011-04-26 14:50:54 +0900

[diff] [blame]

586

if (sameLength) multiplyIntCapped(FULL_WORD_MULTIPLIER, &finalFreq);

satok

2010-12-13 14:42:35 +0900

[diff] [blame]

587

return finalFreq;

588

}

satok

2010-12-09 22:08:33 +0900

[diff] [blame]

589

satok

2010-12-03 16:39:16 +0900

[diff] [blame]

590

inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,

satok

6831926

2010-12-03 19:38:08 +0900

[diff] [blame]

591

const int inputIndex, const int skipPos, const int depth) {

satok

8fbd552

2011-02-22 17:28:55 +0900

[diff] [blame]

592

const unsigned short userTypedChar = getInputCharsAt(inputIndex)[0];

satok

2010-12-03 16:39:16 +0900

[diff] [blame]

593

// Skip the ' or other letter and continue deeper

594

return (c == QUOTE && userTypedChar != QUOTE) || skipPos == depth;

595

}

596

satok

2010-12-09 21:55:40 +0900

[diff] [blame]

597

inline bool UnigramDictionary::existsAdjacentProximityChars(const int inputIndex,

Jean Chalard

2011-03-03 10:22:10 +0900

[diff] [blame]

598

const int inputLength) const {

satok

2010-12-09 21:55:40 +0900

[diff] [blame]

599

if (inputIndex < 0 || inputIndex >= inputLength) return false;

600

const int currentChar = *getInputCharsAt(inputIndex);

601

const int leftIndex = inputIndex - 1;

602

if (leftIndex >= 0) {

Jean Chalard

2011-02-25 17:56:53 +0900

[diff] [blame]

603

const int *leftChars = getInputCharsAt(leftIndex);

satok

2010-12-09 21:55:40 +0900

[diff] [blame]

604

int i = 0;

605

while (leftChars[i] > 0 && i < MAX_PROXIMITY_CHARS) {

606

if (leftChars[i++] == currentChar) return true;

607

}

608

}

609

const int rightIndex = inputIndex + 1;

610

if (rightIndex < inputLength) {

Jean Chalard

2011-02-25 17:56:53 +0900

[diff] [blame]

611

const int *rightChars = getInputCharsAt(rightIndex);

satok

2010-12-09 21:55:40 +0900

[diff] [blame]

612

int i = 0;

613

while (rightChars[i] > 0 && i < MAX_PROXIMITY_CHARS) {

614

if (rightChars[i++] == currentChar) return true;

}

}

return false;

}

Jean Chalard

2011-02-18 17:50:58 +0900

[diff] [blame]

620

// In the following function, c is the current character of the dictionary word

621

// currently examined.

622

// currentChars is an array containing the keys close to the character the

623

// user actually typed at the same position. We want to see if c is in it: if so,

624

// then the word contains at that position a character close to what the user

625

// typed.

626

// What the user typed is actually the first character of the array.

627

// Notice : accented characters do not have a proximity list, so they are alone

628

// in their list. The non-accented version of the character should be considered

629

// "close", but not the other keys close to the non-accented version.

Jean Chalard

2011-01-27 14:20:22 +0900

[diff] [blame]

630

inline UnigramDictionary::ProximityType UnigramDictionary::getMatchedProximityId(

631

const int *currentChars, const unsigned short c, const int skipPos,

632

const int excessivePos, const int transposedPos) {

Jean Chalard

2011-02-22 15:12:46 +0900

[diff] [blame]

633

const unsigned short baseLowerC = toBaseLowerCase(c);

Jean Chalard

2011-02-18 17:50:58 +0900

[diff] [blame]

634

635

// The first char in the array is what user typed. If it matches right away,

636

// that means the user typed that same char for this pos.

Jean Chalard

2011-02-22 15:12:46 +0900

[diff] [blame]

637

if (currentChars[0] == baseLowerC || currentChars[0] == c)

Jean Chalard

2011-02-18 17:50:58 +0900

[diff] [blame]

638

return SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR;

639

640

// If one of those is true, we should not check for close characters at all.

641

if (skipPos >= 0 || excessivePos >= 0 || transposedPos >= 0)

642

return UNRELATED_CHAR;

643

644

// If the non-accented, lowercased version of that first character matches c,

645

// then we have a non-accented version of the accented character the user

646

// typed. Treat it as a close char.

Jean Chalard

2011-02-22 15:12:46 +0900

[diff] [blame]

647

if (toBaseLowerCase(currentChars[0]) == baseLowerC)

Jean Chalard

2011-02-18 17:50:58 +0900

[diff] [blame]

648

return NEAR_PROXIMITY_CHAR;

649

650

// Not an exact nor an accent-alike match: search the list of close keys

651

int j = 1;

satok

2010-12-09 21:55:40 +0900

[diff] [blame]

652

while (currentChars[j] > 0 && j < MAX_PROXIMITY_CHARS) {

Jean Chalard

2011-02-22 15:12:46 +0900

[diff] [blame]

653

const bool matched = (currentChars[j] == baseLowerC || currentChars[j] == c);

Jean Chalard

2011-02-18 17:50:58 +0900

[diff] [blame]

654

if (matched) return NEAR_PROXIMITY_CHAR;

satok

2010-12-03 16:39:16 +0900

[diff] [blame]

655

++j;

656

}

Jean Chalard

2011-02-18 17:50:58 +0900

[diff] [blame]

657

658

// Was not included, signal this as an unrelated character.

Jean Chalard

2011-01-27 14:20:22 +0900

[diff] [blame]

659

return UNRELATED_CHAR;

satok

2010-12-03 16:39:16 +0900

[diff] [blame]

660

}

661

Jean Chalard

2011-06-17 15:36:26 +0900

[diff] [blame]

662

inline void UnigramDictionary::onTerminal(unsigned short int* word, const int depth,

Jean Chalard

2011-06-30 17:02:23 +0900

[diff] [blame]

663

const uint8_t* const root, const uint8_t flags, const int pos,

Jean Chalard

2011-06-17 15:36:26 +0900

[diff] [blame]

664

const int inputIndex, const int matchWeight, const int skipPos,

665

const int excessivePos, const int transposedPos, const int freq, const bool sameLength,

666

int* nextLetters, const int nextLettersSize) {

667

668

const bool isSameAsTyped = sameLength ? sameAsTyped(word, depth + 1) : false;

Jean Chalard

2011-06-30 17:02:23 +0900

[diff] [blame]

669

if (isSameAsTyped) return;

Jean Chalard

2011-06-17 15:36:26 +0900

[diff] [blame]

670

671

if (depth >= MIN_SUGGEST_DEPTH) {

672

const int finalFreq = calculateFinalFreq(inputIndex, depth, matchWeight, skipPos,

673

excessivePos, transposedPos, freq, sameLength);

674

if (!isSameAsTyped)

675

addWord(word, depth + 1, finalFreq);

Jean Chalard

2011-06-17 15:36:26 +0900

[diff] [blame]

676

}

677

678

if (sameLength && depth >= mInputLength && skipPos < 0) {

679

registerNextLetter(word[mInputLength], nextLetters, nextLettersSize);

}

}

Jean Chalard

2011-06-30 19:47:25 +0900

[diff] [blame]

683

bool UnigramDictionary::getSplitTwoWordsSuggestion(const int inputLength,

684

const int firstWordStartPos, const int firstWordLength, const int secondWordStartPos,

685

const int secondWordLength, const bool isSpaceProximity) {

686

if (inputLength >= MAX_WORD_LENGTH) return false;

687

if (0 >= firstWordLength || 0 >= secondWordLength || firstWordStartPos >= secondWordStartPos

688

|| firstWordStartPos < 0 || secondWordStartPos + secondWordLength > inputLength)

689

return false;

690

const int newWordLength = firstWordLength + secondWordLength + 1;

691

// Allocating variable length array on stack

692

unsigned short word[newWordLength];

693

const int firstFreq = getMostFrequentWordLike(firstWordStartPos, firstWordLength, mWord);

694

if (DEBUG_DICT) {

695

LOGI("First freq: %d", firstFreq);

696

}

697

if (firstFreq <= 0) return false;

698

699

for (int i = 0; i < firstWordLength; ++i) {

word[i] = mWord[i];

}

const int secondFreq = getMostFrequentWordLike(secondWordStartPos, secondWordLength, mWord);

704

if (DEBUG_DICT) {

705

LOGI("Second freq: %d", secondFreq);

706

}

707

if (secondFreq <= 0) return false;

708

709

word[firstWordLength] = SPACE;

710

for (int i = (firstWordLength + 1); i < newWordLength; ++i) {

711

word[i] = mWord[i - firstWordLength - 1];

712

}

713

714

int pairFreq = calcFreqForSplitTwoWords(TYPED_LETTER_MULTIPLIER, firstWordLength,

715

secondWordLength, firstFreq, secondFreq, isSpaceProximity);

716

if (DEBUG_DICT) {

717

LOGI("Split two words: %d, %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength,

718

TYPED_LETTER_MULTIPLIER);

719

}

720

addWord(word, newWordLength, pairFreq);

return true;

}

Jean Chalard

2011-06-20 21:09:04 +0900

[diff] [blame]

724

#ifndef NEW_DICTIONARY_FORMAT

725

// TODO: Don't forget to bring inline functions back to over where they are used.

satok

2010-12-08 16:04:16 +0900

[diff] [blame]

726

Jean Chalard

2011-06-20 21:09:04 +0900

[diff] [blame]

727

// The following functions will be entirely replaced with new implementations.

728

void UnigramDictionary::getWordsOld(const int initialPos, const int inputLength, const int skipPos,

729

const int excessivePos, const int transposedPos,int *nextLetters,

730

const int nextLettersSize) {

731

int initialPosition = initialPos;

732

const int count = Dictionary::getCount(DICT_ROOT, &initialPosition);

733

getWordsRec(count, initialPosition, 0,

734

min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH),

735

mInputLength <= 0, 1, 0, 0, skipPos, excessivePos, transposedPos, nextLetters,

736

nextLettersSize);

737

}

Jean Chalard

2011-06-17 15:36:26 +0900

[diff] [blame]

738

Jean Chalard

2011-06-20 21:09:04 +0900

[diff] [blame]

739

void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, const int depth,

740

const int maxDepth, const bool traverseAllNodes, const int matchWeight,

741

const int inputIndex, const int diffs, const int skipPos, const int excessivePos,

742

const int transposedPos, int *nextLetters, const int nextLettersSize) {

743

int siblingPos = pos;

744

for (int i = 0; i < childrenCount; ++i) {

745

int newCount;

746

int newChildPosition;

747

bool newTraverseAllNodes;

int newMatchRate;

int newInputIndex;

int newDiffs;

int newSiblingPos;

int newOutputIndex;

const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, depth, maxDepth,

754

traverseAllNodes, matchWeight, inputIndex, diffs,

755

skipPos, excessivePos, transposedPos,

756

nextLetters, nextLettersSize,

757

&newCount, &newChildPosition, &newTraverseAllNodes, &newMatchRate,

758

&newInputIndex, &newDiffs, &newSiblingPos, &newOutputIndex);

759

siblingPos = newSiblingPos;

satok

2010-12-08 16:04:16 +0900

[diff] [blame]

760

Jean Chalard

2011-06-20 21:09:04 +0900

[diff] [blame]

761

if (needsToTraverseChildrenNodes) {

762

getWordsRec(newCount, newChildPosition, newOutputIndex, maxDepth, newTraverseAllNodes,

763

newMatchRate, newInputIndex, newDiffs, skipPos, excessivePos, transposedPos,

764

nextLetters, nextLettersSize);

satok

2010-12-06 17:38:58 +0900

[diff] [blame]

765

}

satok

2010-12-06 17:38:58 +0900

[diff] [blame]

766

}

satok

2010-12-06 17:38:58 +0900

[diff] [blame]

767

}

768

Jean Chalard

2011-06-30 17:02:23 +0900

[diff] [blame]

769

inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex,

770

const int inputLength, unsigned short *word) {

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

771

int pos = ROOT_POS;

Jean Chalard

2011-06-16 20:55:16 +0900

[diff] [blame]

772

int count = Dictionary::getCount(DICT_ROOT, &pos);

satok

2010-12-09 19:21:51 +0900

[diff] [blame]

773

int maxFreq = 0;

774

int depth = 0;

775

unsigned short newWord[MAX_WORD_LENGTH_INTERNAL];

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

776

bool terminal = false;

777

satok

2010-12-09 19:21:51 +0900

[diff] [blame]

778

mStackChildCount[0] = count;

779

mStackSiblingPos[0] = pos;

780

781

while (depth >= 0) {

782

if (mStackChildCount[depth] > 0) {

783

--mStackChildCount[depth];

784

int firstChildPos;

785

int newFreq;

786

int siblingPos = mStackSiblingPos[depth];

787

const bool needsToTraverseChildrenNodes = processCurrentNodeForExactMatch(siblingPos,

788

startInputIndex, depth, newWord, &firstChildPos, &count, &terminal, &newFreq,

789

&siblingPos);

790

mStackSiblingPos[depth] = siblingPos;

791

if (depth == (inputLength - 1)) {

792

// Traverse sibling node

793

if (terminal) {

794

if (newFreq > maxFreq) {

795

for (int i = 0; i < inputLength; ++i) word[i] = newWord[i];

796

if (DEBUG_DICT && DEBUG_NODE) {

Doug Kwan

2011-07-07 22:53:50 -0700

[diff] [blame^]

797

#ifdef FLAG_DBG

satok

2010-12-09 19:21:51 +0900

[diff] [blame]

798

char s[inputLength + 1];

799

for (int i = 0; i < inputLength; ++i) s[i] = word[i];

800

s[inputLength] = 0;

Doug Kwan

2011-07-07 22:53:50 -0700

[diff] [blame^]

801

#endif

satok

2010-12-09 19:21:51 +0900

[diff] [blame]

802

LOGI("New missing space word found: %d > %d (%s), %d, %d",

803

newFreq, maxFreq, s, inputLength, depth);

}

maxFreq = newFreq;

}

}

} else if (needsToTraverseChildrenNodes) {

809

// Traverse children nodes

810

++depth;

811

mStackChildCount[depth] = count;

812

mStackSiblingPos[depth] = firstChildPos;

813

}

814

} else {

815

// Traverse parent node

816

--depth;

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

817

}

818

}

satok

2010-12-09 19:21:51 +0900

[diff] [blame]

819

820

word[inputLength] = 0;

821

return maxFreq;

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

822

}

823

824

inline bool UnigramDictionary::processCurrentNodeForExactMatch(const int firstChildPos,

satok

2010-12-09 19:21:51 +0900

[diff] [blame]

825

const int startInputIndex, const int depth, unsigned short *word, int *newChildPosition,

826

int *newCount, bool *newTerminal, int *newFreq, int *siblingPos) {

827

const int inputIndex = startInputIndex + depth;

satok

8fbd552

2011-02-22 17:28:55 +0900

[diff] [blame]

828

const int *currentChars = getInputCharsAt(inputIndex);

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

829

unsigned short c;

Jean Chalard

2011-06-16 20:55:16 +0900

[diff] [blame]

830

*siblingPos = Dictionary::setDictionaryValues(DICT_ROOT, IS_LATEST_DICT_VERSION, firstChildPos,

831

&c, newChildPosition, newTerminal, newFreq);

satok

2010-12-09 19:21:51 +0900

[diff] [blame]

832

const unsigned int inputC = currentChars[0];

Ken Wakasa

2011-03-19 09:16:42 +0900

[diff] [blame]

833

if (DEBUG_DICT) {

834

assert(inputC <= U_SHORT_MAX);

835

}

Jean Chalard

2011-02-22 15:12:46 +0900

[diff] [blame]

836

const unsigned short baseLowerC = toBaseLowerCase(c);

837

const bool matched = (inputC == baseLowerC || inputC == c);

satok

2010-12-09 19:21:51 +0900

[diff] [blame]

838

const bool hasChild = *newChildPosition != 0;

839

if (matched) {

840

word[depth] = c;

841

if (DEBUG_DICT && DEBUG_NODE) {

842

LOGI("Node(%c, %c)<%d>, %d, %d", inputC, c, matched, hasChild, *newFreq);

Ken Wakasa

2011-03-19 09:16:42 +0900

[diff] [blame]

843

if (*newTerminal) {

844

LOGI("Terminal %d", *newFreq);

845

}

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

846

}

satok

2010-12-09 19:21:51 +0900

[diff] [blame]

847

if (hasChild) {

Jean Chalard

2011-06-16 20:55:16 +0900

[diff] [blame]

848

*newCount = Dictionary::getCount(DICT_ROOT, newChildPosition);

satok

2010-12-09 19:21:51 +0900

[diff] [blame]

return true;

} else {

return false;

}

} else {

// If this node is not user typed character, this method treats this word as unmatched.

855

// Thus newTerminal shouldn't be true.

856

*newTerminal = false;

857

return false;

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

858

}

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

859

}

Jean Chalard

2011-06-16 22:33:41 +0900

[diff] [blame]

860

861

// TODO: use uint32_t instead of unsigned short

862

bool UnigramDictionary::isValidWord(unsigned short *word, int length) {

863

if (IS_LATEST_DICT_VERSION) {

Jean Chalard

2011-06-17 12:45:17 +0900

[diff] [blame]

864

return (getBigramPosition(DICTIONARY_HEADER_SIZE, word, 0, length) != NOT_VALID_WORD);

Jean Chalard

2011-06-16 22:33:41 +0900

[diff] [blame]

865

} else {

Jean Chalard

2011-06-17 12:45:17 +0900

[diff] [blame]

866

return (getBigramPosition(0, word, 0, length) != NOT_VALID_WORD);

Jean Chalard

2011-06-16 22:33:41 +0900

[diff] [blame]

}

}

Jean Chalard

2011-06-16 22:51:11 +0900

[diff] [blame]

870

871

// Require strict exact match.

Jean Chalard

2011-06-17 12:45:17 +0900

[diff] [blame]

872

int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offset,

873

int length) const {

Jean Chalard

2011-06-16 22:33:41 +0900

[diff] [blame]

874

// returns address of bigram data of that word

875

// return -99 if not found

876

877

int count = Dictionary::getCount(DICT_ROOT, &pos);

878

unsigned short currentChar = (unsigned short) word[offset];

879

for (int j = 0; j < count; j++) {

880

unsigned short c = Dictionary::getChar(DICT_ROOT, &pos);

881

int terminal = Dictionary::getTerminal(DICT_ROOT, &pos);

882

int childPos = Dictionary::getAddress(DICT_ROOT, &pos);

883

if (c == currentChar) {

884

if (offset == length - 1) {

if (terminal) {

return (pos+1);

}

} else {

if (childPos != 0) {

Jean Chalard

2011-06-17 12:45:17 +0900

[diff] [blame]

890

int t = getBigramPosition(childPos, word, offset + 1, length);

Jean Chalard

2011-06-16 22:33:41 +0900

[diff] [blame]

if (t > 0) {

return t;

}

}

}

}

if (terminal) {

Dictionary::getFreq(DICT_ROOT, IS_LATEST_DICT_VERSION, &pos);

899

}

900

// There could be two instances of each alphabet - upper and lower case. So continue

901

// looking ...

902

}

903

return NOT_VALID_WORD;

904

}

905

Jean Chalard

2011-06-20 21:09:04 +0900

[diff] [blame]

906

// The following functions will be modified.

Jean Chalard

2011-06-30 19:23:16 +0900

[diff] [blame]

907

inline bool UnigramDictionary::processCurrentNode(const int initialPos, const int initialDepth,

908

const int maxDepth, const bool initialTraverseAllNodes, int matchWeight, int inputIndex,

909

const int initialDiffs, const int skipPos, const int excessivePos, const int transposedPos,

Jean Chalard

2011-06-20 21:09:04 +0900

[diff] [blame]

910

int *nextLetters, const int nextLettersSize, int *newCount, int *newChildPosition,

911

bool *newTraverseAllNodes, int *newMatchRate, int *newInputIndex, int *newDiffs,

912

int *nextSiblingPosition, int *nextOutputIndex) {

913

if (DEBUG_DICT) {

914

int inputCount = 0;

915

if (skipPos >= 0) ++inputCount;

916

if (excessivePos >= 0) ++inputCount;

917

if (transposedPos >= 0) ++inputCount;

918

assert(inputCount <= 1);

}

unsigned short c;

int childPosition;

bool terminal;

int freq;

bool isSameAsUserTypedLength = false;

925

Jean Chalard

2011-06-30 19:23:16 +0900

[diff] [blame]

926

const int pos = initialPos;

927

const int depth = initialDepth;

928

const int traverseAllNodes = initialTraverseAllNodes;

929

const int diffs = initialDiffs;

930

Jean Chalard

2011-06-20 21:09:04 +0900

[diff] [blame]

931

const uint8_t flags = 0; // No flags for now

932

933

if (excessivePos == depth && inputIndex < mInputLength - 1) ++inputIndex;

934

935

*nextSiblingPosition = Dictionary::setDictionaryValues(DICT_ROOT, IS_LATEST_DICT_VERSION, pos,

936

&c, &childPosition, &terminal, &freq);

937

*nextOutputIndex = depth + 1;

938

939

const bool needsToTraverseChildrenNodes = childPosition != 0;

940

941

// If we are only doing traverseAllNodes, no need to look at the typed characters.

942

if (traverseAllNodes || needsToSkipCurrentNode(c, inputIndex, skipPos, depth)) {

943

mWord[depth] = c;

944

if (traverseAllNodes && terminal) {

945

onTerminal(mWord, depth, DICT_ROOT, flags, pos, inputIndex, matchWeight, skipPos,

946

excessivePos, transposedPos, freq, false, nextLetters, nextLettersSize);

947

}

948

if (!needsToTraverseChildrenNodes) return false;

949

*newTraverseAllNodes = traverseAllNodes;

950

*newMatchRate = matchWeight;

951

*newDiffs = diffs;

952

*newInputIndex = inputIndex;

953

} else {

954

const int *currentChars = getInputCharsAt(inputIndex);

955

956

if (transposedPos >= 0) {

957

if (inputIndex == transposedPos) currentChars += MAX_PROXIMITY_CHARS;

958

if (inputIndex == (transposedPos + 1)) currentChars -= MAX_PROXIMITY_CHARS;

959

}

960

961

int matchedProximityCharId = getMatchedProximityId(currentChars, c, skipPos, excessivePos,

962

transposedPos);

963

if (UNRELATED_CHAR == matchedProximityCharId) return false;

964

mWord[depth] = c;

965

// If inputIndex is greater than mInputLength, that means there is no

966

// proximity chars. So, we don't need to check proximity.

967

if (SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) {

968

multiplyIntCapped(TYPED_LETTER_MULTIPLIER, &matchWeight);

969

}

970

bool isSameAsUserTypedLength = mInputLength == inputIndex + 1

971

|| (excessivePos == mInputLength - 1 && inputIndex == mInputLength - 2);

972

if (isSameAsUserTypedLength && terminal) {

973

onTerminal(mWord, depth, DICT_ROOT, flags, pos, inputIndex, matchWeight, skipPos,

974

excessivePos, transposedPos, freq, true, nextLetters, nextLettersSize);

975

}

976

if (!needsToTraverseChildrenNodes) return false;

977

// Start traversing all nodes after the index exceeds the user typed length

978

*newTraverseAllNodes = isSameAsUserTypedLength;

979

*newMatchRate = matchWeight;

980

*newDiffs = diffs + ((NEAR_PROXIMITY_CHAR == matchedProximityCharId) ? 1 : 0);

981

*newInputIndex = inputIndex + 1;

982

}

983

// Optimization: Prune out words that are too long compared to how much was typed.

984

if (depth >= maxDepth || *newDiffs > mMaxEditDistance) {

return false;

}

// If inputIndex is greater than mInputLength, that means there are no proximity chars.

989

// TODO: Check if this can be isSameAsUserTypedLength only.

990

if (isSameAsUserTypedLength || mInputLength <= *newInputIndex) {

991

*newTraverseAllNodes = true;

992

}

993

// get the count of nodes and increment childAddress.

994

*newCount = Dictionary::getCount(DICT_ROOT, &childPosition);

995

*newChildPosition = childPosition;

996

if (DEBUG_DICT) assert(needsToTraverseChildrenNodes);

997

return needsToTraverseChildrenNodes;

998

}

999

1000

#else // NEW_DICTIONARY_FORMAT

Jean Chalard

2011-06-21 22:23:21 +0900

[diff] [blame]

1001

Jean Chalard

2011-06-30 19:23:16 +0900

[diff] [blame]

1002

inline bool UnigramDictionary::processCurrentNode(const int initialPos, const int initialDepth,

1003

const int maxDepth, const bool initialTraverseAllNodes, int matchWeight, int inputIndex,

1004

const int initialDiffs, const int skipPos, const int excessivePos, const int transposedPos,

Jean Chalard

2011-06-21 22:23:21 +0900

[diff] [blame]

1005

int *nextLetters, const int nextLettersSize, int *newCount, int *newChildPosition,

1006

bool *newTraverseAllNodes, int *newMatchRate, int *newInputIndex, int *newDiffs,

Jean Chalard

432789a

2011-06-30 17:50:48 +0900

[diff] [blame]

1007

int *nextSiblingPosition, int *newOutputIndex) {

Jean Chalard

2011-06-21 22:23:21 +0900

[diff] [blame]

1008

if (DEBUG_DICT) {

1009

int inputCount = 0;

1010

if (skipPos >= 0) ++inputCount;

1011

if (excessivePos >= 0) ++inputCount;

1012

if (transposedPos >= 0) ++inputCount;

1013

assert(inputCount <= 1);

}

unsigned short c;

int childPosition;

bool terminal;

int freq;

bool isSameAsUserTypedLength = false;

1020

Jean Chalard

2011-06-30 19:23:16 +0900

[diff] [blame]

1021

int pos = initialPos;

1022

int depth = initialDepth;

1023

int traverseAllNodes = initialTraverseAllNodes;

1024

int diffs = initialDiffs;

1025

Jean Chalard

2011-06-21 22:23:21 +0900

[diff] [blame]

1026

const uint8_t flags = 0; // No flags for now

1027

1028

if (excessivePos == depth && inputIndex < mInputLength - 1) ++inputIndex;

1029

1030

*nextSiblingPosition = Dictionary::setDictionaryValues(DICT_ROOT, IS_LATEST_DICT_VERSION, pos,

1031

&c, &childPosition, &terminal, &freq);

Jean Chalard

432789a

2011-06-30 17:50:48 +0900

[diff] [blame]

1032

*newOutputIndex = depth + 1;

Jean Chalard

2011-06-21 22:23:21 +0900

[diff] [blame]

1033

1034

const bool needsToTraverseChildrenNodes = childPosition != 0;

1035

1036

// If we are only doing traverseAllNodes, no need to look at the typed characters.

1037

if (traverseAllNodes || needsToSkipCurrentNode(c, inputIndex, skipPos, depth)) {

1038

mWord[depth] = c;

1039

if (traverseAllNodes && terminal) {

1040

onTerminal(mWord, depth, DICT_ROOT, flags, pos, inputIndex, matchWeight, skipPos,

1041

excessivePos, transposedPos, freq, false, nextLetters, nextLettersSize);

1042

}

1043

if (!needsToTraverseChildrenNodes) return false;

1044

*newTraverseAllNodes = traverseAllNodes;

1045

*newMatchRate = matchWeight;

1046

*newDiffs = diffs;

1047

*newInputIndex = inputIndex;

1048

} else {

1049

const int *currentChars = getInputCharsAt(inputIndex);

1050

1051

if (transposedPos >= 0) {

1052

if (inputIndex == transposedPos) currentChars += MAX_PROXIMITY_CHARS;

1053

if (inputIndex == (transposedPos + 1)) currentChars -= MAX_PROXIMITY_CHARS;

1054

}

1055

1056

int matchedProximityCharId = getMatchedProximityId(currentChars, c, skipPos, excessivePos,

1057

transposedPos);

1058

if (UNRELATED_CHAR == matchedProximityCharId) return false;

1059

mWord[depth] = c;

1060

// If inputIndex is greater than mInputLength, that means there is no

1061

// proximity chars. So, we don't need to check proximity.

1062

if (SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) {

1063

multiplyIntCapped(TYPED_LETTER_MULTIPLIER, &matchWeight);

1064

}

1065

bool isSameAsUserTypedLength = mInputLength == inputIndex + 1

1066

|| (excessivePos == mInputLength - 1 && inputIndex == mInputLength - 2);

1067

if (isSameAsUserTypedLength && terminal) {

1068

onTerminal(mWord, depth, DICT_ROOT, flags, pos, inputIndex, matchWeight, skipPos,

1069

excessivePos, transposedPos, freq, true, nextLetters, nextLettersSize);

1070

}

1071

if (!needsToTraverseChildrenNodes) return false;

1072

// Start traversing all nodes after the index exceeds the user typed length

1073

*newTraverseAllNodes = isSameAsUserTypedLength;

1074

*newMatchRate = matchWeight;

1075

*newDiffs = diffs + ((NEAR_PROXIMITY_CHAR == matchedProximityCharId) ? 1 : 0);

1076

*newInputIndex = inputIndex + 1;

1077

}

1078

// Optimization: Prune out words that are too long compared to how much was typed.

1079

if (depth >= maxDepth || *newDiffs > mMaxEditDistance) {

return false;

}

// If inputIndex is greater than mInputLength, that means there are no proximity chars.

1084

// TODO: Check if this can be isSameAsUserTypedLength only.

1085

if (isSameAsUserTypedLength || mInputLength <= *newInputIndex) {

1086

*newTraverseAllNodes = true;

1087

}

1088

// get the count of nodes and increment childAddress.

1089

*newCount = Dictionary::getCount(DICT_ROOT, &childPosition);

1090

*newChildPosition = childPosition;

1091

if (DEBUG_DICT) assert(needsToTraverseChildrenNodes);

1092

return needsToTraverseChildrenNodes;

1093

}

1094

Jean Chalard

2011-06-20 21:09:04 +0900

[diff] [blame]

1095

#endif // NEW_DICTIONARY_FORMAT

1096

satok