Blame - native/src/unigram_dictionary.cpp - android_packages_inputmethods_LatinIME

2010-12-01 21:22:15 +0900

[diff] [blame]

/*

**

**

** Licensed under the Apache License, Version 2.0 (the "License");

6

** you may not use this file except in compliance with the License.

7

** You may obtain a copy of the License at

8

**

9

** http://www.apache.org/licenses/LICENSE-2.0

10

**

11

** Unless required by applicable law or agreed to in writing, software

12

** distributed under the License is distributed on an "AS IS" BASIS,

13

** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14

** See the License for the specific language governing permissions and

15

** limitations under the License.

16

*/

17

satok

2010-12-06 17:38:58 +0900

[diff] [blame]

18

#include <assert.h>

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

19

#include <string.h>

20

satok

e808e43

2010-12-02 14:53:24 +0900

[diff] [blame]

21

#define LOG_TAG "LatinIME: unigram_dictionary.cpp"

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

22

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

23

#include "basechars.h"

24

#include "char_utils.h"

satok

e808e43

2010-12-02 14:53:24 +0900

[diff] [blame]

25

#include "dictionary.h"

26

#include "unigram_dictionary.h"

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

namespace latinime {

Jean Chalard

2011-02-25 17:56:53 +0900

[diff] [blame]

30

const UnigramDictionary::digraph_t UnigramDictionary::GERMAN_UMLAUT_DIGRAPHS[] =

{ { 'a', 'e' },

{ 'o', 'e' },

{ 'u', 'e' } };

Jean Chalard

2011-06-16 20:55:16 +0900

[diff] [blame]

35

// TODO: check the header

36

UnigramDictionary::UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultiplier,

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

37

int fullWordMultiplier, int maxWordLength, int maxWords, int maxProximityChars,

satok

18c28f4

2010-12-02 18:11:54 +0900

[diff] [blame]

38

const bool isLatestDictVersion)

Jean Chalard

2011-06-16 20:55:16 +0900

[diff] [blame]

39

: DICT_ROOT(streamStart),

40

MAX_WORD_LENGTH(maxWordLength), MAX_WORDS(maxWords),

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

41

MAX_PROXIMITY_CHARS(maxProximityChars), IS_LATEST_DICT_VERSION(isLatestDictVersion),

42

TYPED_LETTER_MULTIPLIER(typedLetterMultiplier), FULL_WORD_MULTIPLIER(fullWordMultiplier),

Jean Chalard

2011-02-25 17:56:53 +0900

[diff] [blame]

43

ROOT_POS(isLatestDictVersion ? DICTIONARY_HEADER_SIZE : 0),

Jean Chalard

2011-03-04 12:17:48 +0900

[diff] [blame]

44

BYTES_IN_ONE_CHAR(MAX_PROXIMITY_CHARS * sizeof(*mInputCodes)),

45

MAX_UMLAUT_SEARCH_DEPTH(DEFAULT_MAX_UMLAUT_SEARCH_DEPTH) {

Ken Wakasa

2011-03-19 09:16:42 +0900

[diff] [blame]

46

if (DEBUG_DICT) {

47

LOGI("UnigramDictionary - constructor");

48

}

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

49

}

50

satok

18c28f4

2010-12-02 18:11:54 +0900

[diff] [blame]

51

UnigramDictionary::~UnigramDictionary() {}

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

52

Jean Chalard

2011-02-25 17:56:53 +0900

[diff] [blame]

53

static inline unsigned int getCodesBufferSize(const int* codes, const int codesSize,

54

const int MAX_PROXIMITY_CHARS) {

55

return sizeof(*codes) * MAX_PROXIMITY_CHARS * codesSize;

56

}

57

58

bool UnigramDictionary::isDigraph(const int* codes, const int i, const int codesSize) const {

59

60

// There can't be a digraph if we don't have at least 2 characters to examine

61

if (i + 2 > codesSize) return false;

62

63

// Search for the first char of some digraph

64

int lastDigraphIndex = -1;

65

const int thisChar = codes[i * MAX_PROXIMITY_CHARS];

66

for (lastDigraphIndex = sizeof(GERMAN_UMLAUT_DIGRAPHS) / sizeof(GERMAN_UMLAUT_DIGRAPHS[0]) - 1;

67

lastDigraphIndex >= 0; --lastDigraphIndex) {

68

if (thisChar == GERMAN_UMLAUT_DIGRAPHS[lastDigraphIndex].first) break;

69

}

70

// No match: return early

71

if (lastDigraphIndex < 0) return false;

72

73

// It's an interesting digraph if the second char matches too.

74

return GERMAN_UMLAUT_DIGRAPHS[lastDigraphIndex].second == codes[(i + 1) * MAX_PROXIMITY_CHARS];

75

}

76

77

// Mostly the same arguments as the non-recursive version, except:

78

// codes is the original value. It points to the start of the work buffer, and gets passed as is.

79

// codesSize is the size of the user input (thus, it is the size of codesSrc).

80

// codesDest is the current point in the work buffer.

81

// codesSrc is the current point in the user-input, original, content-unmodified buffer.

82

// codesRemain is the remaining size in codesSrc.

83

void UnigramDictionary::getWordWithDigraphSuggestionsRec(const ProximityInfo *proximityInfo,

84

const int *xcoordinates, const int* ycoordinates, const int *codesBuffer,

85

const int codesBufferSize, const int flags, const int* codesSrc, const int codesRemain,

satok

3c4bb77

2011-03-04 22:50:19 -0800

[diff] [blame]

86

const int currentDepth, int* codesDest, unsigned short* outWords, int* frequencies) {

Jean Chalard

2011-02-25 17:56:53 +0900

[diff] [blame]

87

Jean Chalard

2011-03-04 12:17:48 +0900

[diff] [blame]

88

if (currentDepth < MAX_UMLAUT_SEARCH_DEPTH) {

89

for (int i = 0; i < codesRemain; ++i) {

90

if (isDigraph(codesSrc, i, codesRemain)) {

91

// Found a digraph. We will try both spellings. eg. the word is "pruefen"

Jean Chalard

2011-02-25 17:56:53 +0900

[diff] [blame]

92

Jean Chalard

2011-03-04 12:17:48 +0900

[diff] [blame]

93

// Copy the word up to the first char of the digraph, then continue processing

94

// on the remaining part of the word, skipping the second char of the digraph.

95

// In our example, copy "pru" and continue running on "fen"

96

// Make i the index of the second char of the digraph for simplicity. Forgetting

97

// to do that results in an infinite recursion so take care!

98

++i;

99

memcpy(codesDest, codesSrc, i * BYTES_IN_ONE_CHAR);

100

getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates,

101

codesBuffer, codesBufferSize, flags,

102

codesSrc + (i + 1) * MAX_PROXIMITY_CHARS, codesRemain - i - 1,

103

currentDepth + 1, codesDest + i * MAX_PROXIMITY_CHARS, outWords,

104

frequencies);

Jean Chalard

2011-02-25 17:56:53 +0900

[diff] [blame]

105

Jean Chalard

2011-03-04 12:17:48 +0900

[diff] [blame]

106

// Copy the second char of the digraph in place, then continue processing on

107

// the remaining part of the word.

108

// In our example, after "pru" in the buffer copy the "e", and continue on "fen"

109

memcpy(codesDest + i * MAX_PROXIMITY_CHARS, codesSrc + i * MAX_PROXIMITY_CHARS,

110

BYTES_IN_ONE_CHAR);

111

getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates,

112

codesBuffer, codesBufferSize, flags, codesSrc + i * MAX_PROXIMITY_CHARS,

113

codesRemain - i, currentDepth + 1, codesDest + i * MAX_PROXIMITY_CHARS,

114

outWords, frequencies);

115

return;

116

}

Jean Chalard

2011-02-25 17:56:53 +0900

[diff] [blame]

}

}

// If we come here, we hit the end of the word: let's check it against the dictionary.

121

// In our example, we'll come here once for "prufen" and then once for "pruefen".

122

// If the word contains several digraphs, we'll come it for the product of them.

123

// eg. if the word is "ueberpruefen" we'll test, in order, against

124

// "uberprufen", "uberpruefen", "ueberprufen", "ueberpruefen".

125

const unsigned int remainingBytes = BYTES_IN_ONE_CHAR * codesRemain;

126

if (0 != remainingBytes)

127

memcpy(codesDest, codesSrc, remainingBytes);

128

129

getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codesBuffer,

130

(codesDest - codesBuffer) / MAX_PROXIMITY_CHARS + codesRemain, outWords, frequencies);

131

}

132

133

int UnigramDictionary::getSuggestions(const ProximityInfo *proximityInfo, const int *xcoordinates,

134

const int *ycoordinates, const int *codes, const int codesSize, const int flags,

135

unsigned short *outWords, int *frequencies) {

136

137

if (REQUIRES_GERMAN_UMLAUT_PROCESSING & flags)

138

{ // Incrementally tune the word and try all possibilities

139

int codesBuffer[getCodesBufferSize(codes, codesSize, MAX_PROXIMITY_CHARS)];

140

getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer,

Jean Chalard

2011-03-04 12:17:48 +0900

[diff] [blame]

141

codesSize, flags, codes, codesSize, 0, codesBuffer, outWords, frequencies);

Jean Chalard

2011-02-25 17:56:53 +0900

[diff] [blame]

142

} else { // Normal processing

143

getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, codesSize,

144

outWords, frequencies);

145

}

146

satok

2011-03-04 06:06:45 -0800

[diff] [blame]

147

PROF_START(20);

Jean Chalard

2011-02-25 17:56:53 +0900

[diff] [blame]

148

// Get the word count

149

int suggestedWordsCount = 0;

150

while (suggestedWordsCount < MAX_WORDS && mFrequencies[suggestedWordsCount] > 0) {

151

suggestedWordsCount++;

}

if (DEBUG_DICT) {

LOGI("Returning %d words", suggestedWordsCount);

Jean Chalard

2011-06-30 17:02:23 +0900

[diff] [blame]

156

/// Print the returned words

157

for (int j = 0; j < suggestedWordsCount; ++j) {

158

short unsigned int* w = mOutputChars + j * MAX_WORD_LENGTH;

159

char s[MAX_WORD_LENGTH];

160

for (int i = 0; i <= MAX_WORD_LENGTH; i++) s[i] = w[i];

161

LOGI("%s %i", s, mFrequencies[j]);

162

}

Jean Chalard

2011-02-25 17:56:53 +0900

[diff] [blame]

163

LOGI("Next letters: ");

164

for (int k = 0; k < NEXT_LETTERS_SIZE; k++) {

165

if (mNextLettersFrequency[k] > 0) {

166

LOGI("%c = %d,", k, mNextLettersFrequency[k]);

167

}

168

}

169

}

satok

2011-03-04 06:06:45 -0800

[diff] [blame]

170

PROF_END(20);

Jean Chalard

2011-02-25 17:56:53 +0900

[diff] [blame]

171

PROF_CLOSE;

172

return suggestedWordsCount;

173

}

174

175

void UnigramDictionary::getWordSuggestions(const ProximityInfo *proximityInfo,

176

const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize,

177

unsigned short *outWords, int *frequencies) {

178

satok

2011-01-05 14:13:07 +0900

[diff] [blame]

179

PROF_OPEN;

180

PROF_START(0);

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

181

initSuggestions(codes, codesSize, outWords, frequencies);

satok

2010-12-13 14:42:35 +0900

[diff] [blame]

182

if (DEBUG_DICT) assert(codesSize == mInputLength);

183

satok

2010-12-09 22:08:33 +0900

[diff] [blame]

184

const int MAX_DEPTH = min(mInputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);

satok

2011-01-05 14:13:07 +0900

[diff] [blame]

185

PROF_END(0);

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

186

satok

2011-01-05 14:13:07 +0900

[diff] [blame]

187

PROF_START(1);

Tadashi G. Takaoka

887f11e

2011-02-10 20:53:58 +0900

[diff] [blame]

188

getSuggestionCandidates(-1, -1, -1, mNextLettersFrequency, NEXT_LETTERS_SIZE, MAX_DEPTH);

satok

2011-01-05 14:13:07 +0900

[diff] [blame]

189

PROF_END(1);

190

191

PROF_START(2);

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

192

// Suggestion with missing character

193

if (SUGGEST_WORDS_WITH_MISSING_CHARACTER) {

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

194

for (int i = 0; i < codesSize; ++i) {

Ken Wakasa

2011-03-19 09:16:42 +0900

[diff] [blame]

195

if (DEBUG_DICT) {

196

LOGI("--- Suggest missing characters %d", i);

197

}

satok

2010-12-13 14:42:35 +0900

[diff] [blame]

198

getSuggestionCandidates(i, -1, -1, NULL, 0, MAX_DEPTH);

satok

2010-12-08 16:04:16 +0900

[diff] [blame]

199

}

200

}

satok

2011-01-05 14:13:07 +0900

[diff] [blame]

201

PROF_END(2);

satok

2010-12-08 16:04:16 +0900

[diff] [blame]

202

satok

2011-01-05 14:13:07 +0900

[diff] [blame]

203

PROF_START(3);

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

204

// Suggestion with excessive character

satok

2010-12-13 14:42:35 +0900

[diff] [blame]

205

if (SUGGEST_WORDS_WITH_EXCESSIVE_CHARACTER

206

&& mInputLength >= MIN_USER_TYPED_LENGTH_FOR_EXCESSIVE_CHARACTER_SUGGESTION) {

satok

2010-12-08 16:04:16 +0900

[diff] [blame]

207

for (int i = 0; i < codesSize; ++i) {

Ken Wakasa

2011-03-19 09:16:42 +0900

[diff] [blame]

208

if (DEBUG_DICT) {

209

LOGI("--- Suggest excessive characters %d", i);

210

}

satok

2010-12-13 14:42:35 +0900

[diff] [blame]

211

getSuggestionCandidates(-1, i, -1, NULL, 0, MAX_DEPTH);

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

212

}

213

}

satok

2011-01-05 14:13:07 +0900

[diff] [blame]

214

PROF_END(3);

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

215

satok

2011-01-05 14:13:07 +0900

[diff] [blame]

216

PROF_START(4);

satok

2010-12-09 22:08:33 +0900

[diff] [blame]

217

// Suggestion with transposed characters

218

// Only suggest words that length is mInputLength

219

if (SUGGEST_WORDS_WITH_TRANSPOSED_CHARACTERS) {

220

for (int i = 0; i < codesSize; ++i) {

Ken Wakasa

2011-03-19 09:16:42 +0900

[diff] [blame]

221

if (DEBUG_DICT) {

222

LOGI("--- Suggest transposed characters %d", i);

223

}

satok

2010-12-13 14:42:35 +0900

[diff] [blame]

224

getSuggestionCandidates(-1, -1, i, NULL, 0, mInputLength - 1);

satok

2010-12-09 22:08:33 +0900

[diff] [blame]

225

}

226

}

satok

2011-01-05 14:13:07 +0900

[diff] [blame]

227

PROF_END(4);

satok

2010-12-09 22:08:33 +0900

[diff] [blame]

228

satok

2011-01-05 14:13:07 +0900

[diff] [blame]

229

PROF_START(5);

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

230

// Suggestions with missing space

satok

2010-12-13 14:42:35 +0900

[diff] [blame]

231

if (SUGGEST_WORDS_WITH_MISSING_SPACE_CHARACTER

232

&& mInputLength >= MIN_USER_TYPED_LENGTH_FOR_MISSING_SPACE_SUGGESTION) {

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

233

for (int i = 1; i < codesSize; ++i) {

Ken Wakasa

2011-03-19 09:16:42 +0900

[diff] [blame]

234

if (DEBUG_DICT) {

235

LOGI("--- Suggest missing space characters %d", i);

236

}

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

237

getMissingSpaceWords(mInputLength, i);

238

}

239

}

satok

2011-01-05 14:13:07 +0900

[diff] [blame]

240

PROF_END(5);

satok

2011-03-04 06:06:45 -0800

[diff] [blame]

241

242

PROF_START(6);

Jean Chalard

e93b1f22

2011-06-01 17:12:25 +0900

[diff] [blame]

243

if (SUGGEST_WORDS_WITH_SPACE_PROXIMITY && proximityInfo) {

satok

2011-03-04 06:06:45 -0800

[diff] [blame]

244

// The first and last "mistyped spaces" are taken care of by excessive character handling

245

for (int i = 1; i < codesSize - 1; ++i) {

Ken Wakasa

2011-03-19 09:16:42 +0900

[diff] [blame]

246

if (DEBUG_DICT) {

247

LOGI("--- Suggest words with proximity space %d", i);

248

}

satok

2011-03-04 06:06:45 -0800

[diff] [blame]

249

const int x = xcoordinates[i];

250

const int y = ycoordinates[i];

Ken Wakasa

2011-03-19 09:16:42 +0900

[diff] [blame]

251

if (DEBUG_PROXIMITY_INFO) {

satok

2011-03-04 06:06:45 -0800

[diff] [blame]

252

LOGI("Input[%d] x = %d, y = %d, has space proximity = %d",

253

i, x, y, proximityInfo->hasSpaceProximity(x, y));

Ken Wakasa

2011-03-19 09:16:42 +0900

[diff] [blame]

254

}

satok

2011-03-04 06:06:45 -0800

[diff] [blame]

255

if (proximityInfo->hasSpaceProximity(x, y)) {

256

getMistypedSpaceWords(mInputLength, i);

257

}

satok

2011-03-04 06:06:45 -0800

[diff] [blame]

258

}

259

}

260

PROF_END(6);

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

261

}

262

Jean Chalard

2011-02-25 17:56:53 +0900

[diff] [blame]

263

void UnigramDictionary::initSuggestions(const int *codes, const int codesSize,

264

unsigned short *outWords, int *frequencies) {

Ken Wakasa

2011-03-19 09:16:42 +0900

[diff] [blame]

265

if (DEBUG_DICT) {

266

LOGI("initSuggest");

267

}

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

268

mFrequencies = frequencies;

269

mOutputChars = outWords;

270

mInputCodes = codes;

271

mInputLength = codesSize;

272

mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2;

273

}

274

Jean Chalard

2011-06-16 22:33:41 +0900

[diff] [blame]

275

static inline void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize) {

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

276

if (c < nextLettersSize) {

nextLetters[c]++;

}

}

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

281

// TODO: We need to optimize addWord by using STL or something

Jean Chalard

2011-06-17 15:36:26 +0900

[diff] [blame]

282

// TODO: This needs to take an const unsigned short* and not tinker with its contents

satok

2010-12-03 16:39:16 +0900

[diff] [blame]

283

bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency) {

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

284

word[length] = 0;

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

285

if (DEBUG_DICT && DEBUG_SHOW_FOUND_WORD) {

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

286

char s[length + 1];

287

for (int i = 0; i <= length; i++) s[i] = word[i];

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

288

LOGI("Found word = %s, freq = %d", s, frequency);

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

289

}

satok

f5cded1

2010-12-06 21:28:24 +0900

[diff] [blame]

290

if (length > MAX_WORD_LENGTH) {

Ken Wakasa

2011-03-19 09:16:42 +0900

[diff] [blame]

291

if (DEBUG_DICT) {

292

LOGI("Exceeded max word length.");

293

}

satok

f5cded1

2010-12-06 21:28:24 +0900

[diff] [blame]

294

return false;

295

}

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

296

297

// Find the right insertion point

298

int insertAt = 0;

299

while (insertAt < MAX_WORDS) {

Jean Chalard

2011-06-16 22:51:11 +0900

[diff] [blame]

300

// TODO: How should we sort words with the same frequency?

301

if (frequency > mFrequencies[insertAt]) {

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

break;

}

insertAt++;

}

if (insertAt < MAX_WORDS) {

satok

2010-12-08 16:04:16 +0900

[diff] [blame]

307

if (DEBUG_DICT) {

308

char s[length + 1];

309

for (int i = 0; i <= length; i++) s[i] = word[i];

satok

2011-04-26 14:50:54 +0900

[diff] [blame]

310

LOGI("Added word = %s, freq = %d, %d", s, frequency, S_INT_MAX);

satok

2010-12-08 16:04:16 +0900

[diff] [blame]

311

}

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

312

memmove((char*) mFrequencies + (insertAt + 1) * sizeof(mFrequencies[0]),

313

(char*) mFrequencies + insertAt * sizeof(mFrequencies[0]),

314

(MAX_WORDS - insertAt - 1) * sizeof(mFrequencies[0]));

315

mFrequencies[insertAt] = frequency;

316

memmove((char*) mOutputChars + (insertAt + 1) * MAX_WORD_LENGTH * sizeof(short),

satok

715514d

2010-12-02 20:19:59 +0900

[diff] [blame]

317

(char*) mOutputChars + insertAt * MAX_WORD_LENGTH * sizeof(short),

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

318

(MAX_WORDS - insertAt - 1) * sizeof(short) * MAX_WORD_LENGTH);

satok

715514d

2010-12-02 20:19:59 +0900

[diff] [blame]

319

unsigned short *dest = mOutputChars + insertAt * MAX_WORD_LENGTH;

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

while (length--) {

*dest++ = *word++;

}

*dest = 0; // NULL terminate

Ken Wakasa

2011-03-19 09:16:42 +0900

[diff] [blame]

324

if (DEBUG_DICT) {

325

LOGI("Added word at %d", insertAt);

326

}

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

return true;

}

return false;

}

Jean Chalard

2011-06-16 22:33:41 +0900

[diff] [blame]

332

static inline unsigned short toBaseLowerCase(unsigned short c) {

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

333

if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) {

334

c = BASE_CHARS[c];

335

}

336

if (c >='A' && c <= 'Z') {

337

c |= 32;

338

} else if (c > 127) {

339

c = latin_tolower(c);

}

return c;

}

Jean Chalard

2011-06-17 15:36:26 +0900

[diff] [blame]

344

bool UnigramDictionary::sameAsTyped(const unsigned short *word, int length) const {

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

345

if (length != mInputLength) {

346

return false;

347

}

Jean Chalard

2011-02-25 17:56:53 +0900

[diff] [blame]

348

const int *inputCodes = mInputCodes;

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

349

while (length--) {

350

if ((unsigned int) *inputCodes != (unsigned int) *word) {

351

return false;

352

}

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

353

inputCodes += MAX_PROXIMITY_CHARS;

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

word++;

}

return true;

}

satok

2010-12-02 20:19:59 +0900

[diff] [blame]

359

static const char QUOTE = '\'';

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

360

static const char SPACE = ' ';

satok

2010-12-01 21:22:15 +0900

[diff] [blame]

361

satok

2010-12-13 14:42:35 +0900

[diff] [blame]

362

void UnigramDictionary::getSuggestionCandidates(const int skipPos,

satok

2010-12-09 22:08:33 +0900

[diff] [blame]

363

const int excessivePos, const int transposedPos, int *nextLetters,

364

const int nextLettersSize, const int maxDepth) {

satok

2010-12-13 14:42:35 +0900

[diff] [blame]

365

if (DEBUG_DICT) {

366

LOGI("getSuggestionCandidates %d", maxDepth);

367

assert(transposedPos + 1 < mInputLength);

368

assert(excessivePos < mInputLength);

369

assert(missingPos < mInputLength);

370

}

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

371

int rootPosition = ROOT_POS;

Jean Chalard

2011-06-30 17:02:23 +0900

[diff] [blame]

372

// Get the number of children of root, then increment the position

Jean Chalard

2011-06-16 20:55:16 +0900

[diff] [blame]

373

int childCount = Dictionary::getCount(DICT_ROOT, &rootPosition);

satok

2010-12-07 13:08:39 +0900

[diff] [blame]

374

int depth = 0;

375

376

mStackChildCount[0] = childCount;

377

mStackTraverseAll[0] = (mInputLength <= 0);

378

mStackNodeFreq[0] = 1;

379

mStackInputIndex[0] = 0;

380

mStackDiffs[0] = 0;

381

mStackSiblingPos[0] = rootPosition;

Jean Chalard

2011-06-16 22:51:11 +0900

[diff] [blame]

382

mStackOutputIndex[0] = 0;

satok

2010-12-07 13:08:39 +0900

[diff] [blame]

383

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

384

// Depth first search

satok

2010-12-07 13:08:39 +0900

[diff] [blame]

385

while (depth >= 0) {

386

if (mStackChildCount[depth] > 0) {

387

--mStackChildCount[depth];

388

bool traverseAllNodes = mStackTraverseAll[depth];

Jean Chalard

2011-02-22 15:12:46 +0900

[diff] [blame]

389

int matchWeight = mStackNodeFreq[depth];

satok

2010-12-07 13:08:39 +0900

[diff] [blame]

390

int inputIndex = mStackInputIndex[depth];

391

int diffs = mStackDiffs[depth];

392

int siblingPos = mStackSiblingPos[depth];

Jean Chalard

2011-06-16 22:51:11 +0900

[diff] [blame]

393

int outputIndex = mStackOutputIndex[depth];

satok

2010-12-07 13:08:39 +0900

[diff] [blame]

394

int firstChildPos;

satok

2010-12-09 22:08:33 +0900

[diff] [blame]

395

// depth will never be greater than maxDepth because in that case,

satok

2010-12-07 13:08:39 +0900

[diff] [blame]

396

// needsToTraverseChildrenNodes should be false

Jean Chalard

2011-06-16 22:51:11 +0900

[diff] [blame]

397

const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, outputIndex,

Jean Chalard

2011-02-22 15:12:46 +0900

[diff] [blame]

398

maxDepth, traverseAllNodes, matchWeight, inputIndex, diffs, skipPos,

399

excessivePos, transposedPos, nextLetters, nextLettersSize, &childCount,

400

&firstChildPos, &traverseAllNodes, &matchWeight, &inputIndex, &diffs,

Jean Chalard

2011-06-16 22:51:11 +0900

[diff] [blame]

401

&siblingPos, &outputIndex);

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

402

// Update next sibling pos

satok

2010-12-07 13:08:39 +0900

[diff] [blame]

403

mStackSiblingPos[depth] = siblingPos;

404

if (needsToTraverseChildrenNodes) {

405

// Goes to child node

406

++depth;

407

mStackChildCount[depth] = childCount;

408

mStackTraverseAll[depth] = traverseAllNodes;

Jean Chalard

2011-02-22 15:12:46 +0900

[diff] [blame]

409

mStackNodeFreq[depth] = matchWeight;

satok

2010-12-07 13:08:39 +0900

[diff] [blame]

410

mStackInputIndex[depth] = inputIndex;

411

mStackDiffs[depth] = diffs;

412

mStackSiblingPos[depth] = firstChildPos;

Jean Chalard

2011-06-16 22:51:11 +0900

[diff] [blame]

413

mStackOutputIndex[depth] = outputIndex;

satok

2010-12-07 13:08:39 +0900

[diff] [blame]

414

}

415

} else {

satok

2010-12-08 16:04:16 +0900

[diff] [blame]

416

// Goes to parent sibling node

satok

2010-12-07 13:08:39 +0900

[diff] [blame]

--depth;

}

}

}

satok

2011-04-26 14:50:54 +0900

[diff] [blame]

422

static const int TWO_31ST_DIV_255 = S_INT_MAX / 255;

423

static inline int capped255MultForFullMatchAccentsOrCapitalizationDifference(const int num) {

424

return (num < TWO_31ST_DIV_255 ? 255 * num : S_INT_MAX);

425

}

426

427

static const int TWO_31ST_DIV_2 = S_INT_MAX / 2;

428

inline static void multiplyIntCapped(const int multiplier, int *base) {

429

const int temp = *base;

430

if (temp != S_INT_MAX) {

431

// Branch if multiplier == 2 for the optimization

432

if (multiplier == 2) {

433

*base = TWO_31ST_DIV_2 >= temp ? temp << 1 : S_INT_MAX;

434

} else {

435

const int tempRetval = temp * multiplier;

436

*base = tempRetval >= temp ? tempRetval : S_INT_MAX;

}

}

}

inline static int powerIntCapped(const int base, const int n) {

satok

0b6b0a5

2011-04-27 16:29:27 +0900

[diff] [blame]

442

if (base == 2) {

satok

2011-04-26 14:50:54 +0900

[diff] [blame]

443

return n < 31 ? 1 << n : S_INT_MAX;

satok

2011-01-05 16:37:53 +0900

[diff] [blame]

444

} else {

satok

2011-04-26 14:50:54 +0900

[diff] [blame]

445

int ret = base;

446

for (int i = 1; i < n; ++i) multiplyIntCapped(base, &ret);

return ret;

}

}

inline static void multiplyRate(const int rate, int *freq) {

452

if (*freq != S_INT_MAX) {

453

if (*freq > 1000000) {

454

*freq /= 100;

455

multiplyIntCapped(rate, freq);

456

} else {

457

multiplyIntCapped(rate, freq);

458

*freq /= 100;

459

}

satok

2011-01-05 16:37:53 +0900

[diff] [blame]

}

}

satok

2011-04-19 13:58:42 +0900

[diff] [blame]

463

inline static int calcFreqForSplitTwoWords(

satok

2011-05-18 15:31:04 +0900

[diff] [blame]

464

const int typedLetterMultiplier, const int firstWordLength, const int secondWordLength,

465

const int firstFreq, const int secondFreq, const bool isSpaceProximity) {

satok

4c981d3

2011-04-19 13:58:42 +0900

[diff] [blame]

466

if (firstWordLength == 0 || secondWordLength == 0) {

467

return 0;

468

}

469

const int firstDemotionRate = 100 - 100 / (firstWordLength + 1);

470

int tempFirstFreq = firstFreq;

471

multiplyRate(firstDemotionRate, &tempFirstFreq);

472

473

const int secondDemotionRate = 100 - 100 / (secondWordLength + 1);

474

int tempSecondFreq = secondFreq;

475

multiplyRate(secondDemotionRate, &tempSecondFreq);

476

477

const int totalLength = firstWordLength + secondWordLength;

478

479

// Promote pairFreq with multiplying by 2, because the word length is the same as the typed

480

// length.

481

int totalFreq = tempFirstFreq + tempSecondFreq;

482

483

// This is a workaround to try offsetting the not-enough-demotion which will be done in

484

// calcNormalizedScore in Utils.java.

485

// In calcNormalizedScore the score will be demoted by (1 - 1 / length)

486

// but we demoted only (1 - 1 / (length + 1)) so we will additionally adjust freq by

487

// (1 - 1 / length) / (1 - 1 / (length + 1)) = (1 - 1 / (length * length))

488

const int normalizedScoreNotEnoughDemotionAdjustment = 100 - 100 / (totalLength * totalLength);

489

multiplyRate(normalizedScoreNotEnoughDemotionAdjustment, &totalFreq);

490

491

// At this moment, totalFreq is calculated by the following formula:

492

// (firstFreq * (1 - 1 / (firstWordLength + 1)) + secondFreq * (1 - 1 / (secondWordLength + 1)))

493

// * (1 - 1 / totalLength) / (1 - 1 / (totalLength + 1))

494

satok

2011-04-26 14:50:54 +0900

[diff] [blame]

495

multiplyIntCapped(powerIntCapped(typedLetterMultiplier, totalLength), &totalFreq);

satok

4c981d3

2011-04-19 13:58:42 +0900

[diff] [blame]

496

497

// This is another workaround to offset the demotion which will be done in

498

// calcNormalizedScore in Utils.java.

499

// In calcNormalizedScore the score will be demoted by (1 - 1 / length) so we have to promote

500

// the same amount because we already have adjusted the synthetic freq of this "missing or

501

// mistyped space" suggestion candidate above in this method.

502

const int normalizedScoreDemotionRateOffset = (100 + 100 / totalLength);

503

multiplyRate(normalizedScoreDemotionRateOffset, &totalFreq);

504

satok

2011-05-18 15:31:04 +0900

[diff] [blame]

505

if (isSpaceProximity) {

506

// A word pair with one space proximity correction

507

if (DEBUG_DICT) {

508

LOGI("Found a word pair with space proximity correction.");

509

}

510

multiplyIntCapped(typedLetterMultiplier, &totalFreq);

511

multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &totalFreq);

512

}

513

satok

4c981d3

2011-04-19 13:58:42 +0900

[diff] [blame]

514

multiplyRate(WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE, &totalFreq);

return totalFreq;

}

satok

2011-03-04 06:06:45 -0800

[diff] [blame]

518

bool UnigramDictionary::getMissingSpaceWords(const int inputLength, const int missingSpacePos) {

519

return getSplitTwoWordsSuggestion(

satok

2011-05-18 15:31:04 +0900

[diff] [blame]

520

inputLength, 0, missingSpacePos, missingSpacePos, inputLength - missingSpacePos, false);

satok

2011-03-04 06:06:45 -0800

[diff] [blame]

521

}

522

523

bool UnigramDictionary::getMistypedSpaceWords(const int inputLength, const int spaceProximityPos) {

524

return getSplitTwoWordsSuggestion(

525

inputLength, 0, spaceProximityPos, spaceProximityPos + 1,

satok

2011-05-18 15:31:04 +0900

[diff] [blame]

526

inputLength - spaceProximityPos - 1, true);

satok

2011-03-04 06:06:45 -0800

[diff] [blame]

527

}

528

satok

58c49b9

2011-01-27 03:23:39 +0900

[diff] [blame]

529

inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int depth,

Jean Chalard

2011-02-22 15:12:46 +0900

[diff] [blame]

530

const int matchWeight, const int skipPos, const int excessivePos, const int transposedPos,

Jean Chalard

2011-03-03 10:22:10 +0900

[diff] [blame]

531

const int freq, const bool sameLength) const {

satok

2010-12-09 22:08:33 +0900

[diff] [blame]

532

// TODO: Demote by edit distance

Jean Chalard

2011-02-22 15:12:46 +0900

[diff] [blame]

533

int finalFreq = freq * matchWeight;

Jean Chalard

2011-03-03 10:22:10 +0900

[diff] [blame]

534

if (skipPos >= 0) {

satok

dc5301e

2011-04-11 16:14:45 +0900

[diff] [blame]

535

if (mInputLength >= 2) {

536

const int demotionRate = WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE

537

* (10 * mInputLength - WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X)

538

/ (10 * mInputLength

539

- WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X + 10);

satok

9674f65

2011-04-20 17:15:27 +0900

[diff] [blame]

540

if (DEBUG_DICT_FULL) {

satok

72bc17e

2011-04-13 17:23:27 +0900

[diff] [blame]

541

LOGI("Demotion rate for missing character is %d.", demotionRate);

542

}

satok

dc5301e

2011-04-11 16:14:45 +0900

[diff] [blame]

543

multiplyRate(demotionRate, &finalFreq);

Jean Chalard

2011-03-03 10:22:10 +0900

[diff] [blame]

} else {

finalFreq = 0;

}

}

satok

2011-01-05 16:37:53 +0900

[diff] [blame]

548

if (transposedPos >= 0) multiplyRate(

549

WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE, &finalFreq);

satok

2010-12-13 14:42:35 +0900

[diff] [blame]

550

if (excessivePos >= 0) {

satok

2011-01-05 16:37:53 +0900

[diff] [blame]

551

multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE, &finalFreq);

satok

2010-12-13 14:42:35 +0900

[diff] [blame]

552

if (!existsAdjacentProximityChars(inputIndex, mInputLength)) {

satok

2011-01-05 16:37:53 +0900

[diff] [blame]

553

multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE, &finalFreq);

satok

2010-12-13 14:42:35 +0900

[diff] [blame]

554

}

555

}

satok

58c49b9

2011-01-27 03:23:39 +0900

[diff] [blame]

556

int lengthFreq = TYPED_LETTER_MULTIPLIER;

satok

2011-04-26 14:50:54 +0900

[diff] [blame]

557

multiplyIntCapped(powerIntCapped(TYPED_LETTER_MULTIPLIER, depth), &lengthFreq);

Jean Chalard

2011-02-22 15:12:46 +0900

[diff] [blame]

558

if (lengthFreq == matchWeight) {

satok

72bc17e

2011-04-13 17:23:27 +0900

[diff] [blame]

559

// Full exact match

Jean Chalard

2011-01-27 14:20:22 +0900

[diff] [blame]

560

if (depth > 1) {

Ken Wakasa

2011-03-19 09:16:42 +0900

[diff] [blame]

561

if (DEBUG_DICT) {

562

LOGI("Found full matched word.");

563

}

Jean Chalard

2011-01-27 14:20:22 +0900

[diff] [blame]

564

multiplyRate(FULL_MATCHED_WORDS_PROMOTION_RATE, &finalFreq);

565

}

566

if (sameLength && transposedPos < 0 && skipPos < 0 && excessivePos < 0) {

Jean Chalard

2011-02-18 17:50:58 +0900

[diff] [blame]

567

finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq);

Jean Chalard

2011-01-27 14:20:22 +0900

[diff] [blame]

568

}

satok

9674f65

2011-04-20 17:15:27 +0900

[diff] [blame]

569

} else if (sameLength && transposedPos < 0 && skipPos < 0 && excessivePos < 0 && depth > 0) {

satok

9d2a302

2011-04-14 19:13:34 +0900

[diff] [blame]

570

// A word with proximity corrections

satok

72bc17e

2011-04-13 17:23:27 +0900

[diff] [blame]

571

if (DEBUG_DICT) {

572

LOGI("Found one proximity correction.");

573

}

satok

2011-04-26 14:50:54 +0900

[diff] [blame]

574

multiplyIntCapped(TYPED_LETTER_MULTIPLIER, &finalFreq);

satok

9d2a302

2011-04-14 19:13:34 +0900

[diff] [blame]

575

multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq);

satok

58c49b9

2011-01-27 03:23:39 +0900

[diff] [blame]

576

}

satok

9674f65

2011-04-20 17:15:27 +0900

[diff] [blame]

577

if (DEBUG_DICT) {

578

LOGI("calc: %d, %d", depth, sameLength);

579

}

satok

2011-04-26 14:50:54 +0900

[diff] [blame]

580

if (sameLength) multiplyIntCapped(FULL_WORD_MULTIPLIER, &finalFreq);

satok

2010-12-13 14:42:35 +0900

[diff] [blame]

581

return finalFreq;

582

}

satok

2010-12-09 22:08:33 +0900

[diff] [blame]

583

satok

2010-12-03 16:39:16 +0900

[diff] [blame]

584

inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,

satok

6831926

2010-12-03 19:38:08 +0900

[diff] [blame]

585

const int inputIndex, const int skipPos, const int depth) {

satok

8fbd552

2011-02-22 17:28:55 +0900

[diff] [blame]

586

const unsigned short userTypedChar = getInputCharsAt(inputIndex)[0];

satok

2010-12-03 16:39:16 +0900

[diff] [blame]

587

// Skip the ' or other letter and continue deeper

588

return (c == QUOTE && userTypedChar != QUOTE) || skipPos == depth;

589

}

590

satok

2010-12-09 21:55:40 +0900

[diff] [blame]

591

inline bool UnigramDictionary::existsAdjacentProximityChars(const int inputIndex,

Jean Chalard

2011-03-03 10:22:10 +0900

[diff] [blame]

592

const int inputLength) const {

satok

2010-12-09 21:55:40 +0900

[diff] [blame]

593

if (inputIndex < 0 || inputIndex >= inputLength) return false;

594

const int currentChar = *getInputCharsAt(inputIndex);

595

const int leftIndex = inputIndex - 1;

596

if (leftIndex >= 0) {

Jean Chalard

2011-02-25 17:56:53 +0900

[diff] [blame]

597

const int *leftChars = getInputCharsAt(leftIndex);

satok

2010-12-09 21:55:40 +0900

[diff] [blame]

598

int i = 0;

599

while (leftChars[i] > 0 && i < MAX_PROXIMITY_CHARS) {

600

if (leftChars[i++] == currentChar) return true;

601

}

602

}

603

const int rightIndex = inputIndex + 1;

604

if (rightIndex < inputLength) {

Jean Chalard

2011-02-25 17:56:53 +0900

[diff] [blame]

605

const int *rightChars = getInputCharsAt(rightIndex);

satok

2010-12-09 21:55:40 +0900

[diff] [blame]

606

int i = 0;

607

while (rightChars[i] > 0 && i < MAX_PROXIMITY_CHARS) {

608

if (rightChars[i++] == currentChar) return true;

}

}

return false;

}

Jean Chalard

2011-02-18 17:50:58 +0900

[diff] [blame]

614

// In the following function, c is the current character of the dictionary word

615

// currently examined.

616

// currentChars is an array containing the keys close to the character the

617

// user actually typed at the same position. We want to see if c is in it: if so,

618

// then the word contains at that position a character close to what the user

619

// typed.

620

// What the user typed is actually the first character of the array.

621

// Notice : accented characters do not have a proximity list, so they are alone

622

// in their list. The non-accented version of the character should be considered

623

// "close", but not the other keys close to the non-accented version.

Jean Chalard

2011-01-27 14:20:22 +0900

[diff] [blame]

624

inline UnigramDictionary::ProximityType UnigramDictionary::getMatchedProximityId(

625

const int *currentChars, const unsigned short c, const int skipPos,

626

const int excessivePos, const int transposedPos) {

Jean Chalard

2011-02-22 15:12:46 +0900

[diff] [blame]

627

const unsigned short baseLowerC = toBaseLowerCase(c);

Jean Chalard

2011-02-18 17:50:58 +0900

[diff] [blame]

628

629

// The first char in the array is what user typed. If it matches right away,

630

// that means the user typed that same char for this pos.

Jean Chalard

2011-02-22 15:12:46 +0900

[diff] [blame]

631

if (currentChars[0] == baseLowerC || currentChars[0] == c)

Jean Chalard

2011-02-18 17:50:58 +0900

[diff] [blame]

632

return SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR;

633

634

// If one of those is true, we should not check for close characters at all.

635

if (skipPos >= 0 || excessivePos >= 0 || transposedPos >= 0)

636

return UNRELATED_CHAR;

637

638

// If the non-accented, lowercased version of that first character matches c,

639

// then we have a non-accented version of the accented character the user

640

// typed. Treat it as a close char.

Jean Chalard

2011-02-22 15:12:46 +0900

[diff] [blame]

641

if (toBaseLowerCase(currentChars[0]) == baseLowerC)

Jean Chalard

2011-02-18 17:50:58 +0900

[diff] [blame]

642

return NEAR_PROXIMITY_CHAR;

643

644

// Not an exact nor an accent-alike match: search the list of close keys

645

int j = 1;

satok

2010-12-09 21:55:40 +0900

[diff] [blame]

646

while (currentChars[j] > 0 && j < MAX_PROXIMITY_CHARS) {

Jean Chalard

2011-02-22 15:12:46 +0900

[diff] [blame]

647

const bool matched = (currentChars[j] == baseLowerC || currentChars[j] == c);

Jean Chalard

2011-02-18 17:50:58 +0900

[diff] [blame]

648

if (matched) return NEAR_PROXIMITY_CHAR;

satok

2010-12-03 16:39:16 +0900

[diff] [blame]

649

++j;

650

}

Jean Chalard

2011-02-18 17:50:58 +0900

[diff] [blame]

651

652

// Was not included, signal this as an unrelated character.

Jean Chalard

2011-01-27 14:20:22 +0900

[diff] [blame]

653

return UNRELATED_CHAR;

satok

2010-12-03 16:39:16 +0900

[diff] [blame]

654

}

655

Jean Chalard

2011-06-17 15:36:26 +0900

[diff] [blame]

656

inline void UnigramDictionary::onTerminal(unsigned short int* word, const int depth,

Jean Chalard

2011-06-30 17:02:23 +0900

[diff] [blame]

657

const uint8_t* const root, const uint8_t flags, const int pos,

Jean Chalard

2011-06-17 15:36:26 +0900

[diff] [blame]

658

const int inputIndex, const int matchWeight, const int skipPos,

659

const int excessivePos, const int transposedPos, const int freq, const bool sameLength,

660

int* nextLetters, const int nextLettersSize) {

661

662

const bool isSameAsTyped = sameLength ? sameAsTyped(word, depth + 1) : false;

Jean Chalard

2011-06-30 17:02:23 +0900

[diff] [blame]

663

if (isSameAsTyped) return;

Jean Chalard

2011-06-17 15:36:26 +0900

[diff] [blame]

664

665

if (depth >= MIN_SUGGEST_DEPTH) {

666

const int finalFreq = calculateFinalFreq(inputIndex, depth, matchWeight, skipPos,

667

excessivePos, transposedPos, freq, sameLength);

668

if (!isSameAsTyped)

669

addWord(word, depth + 1, finalFreq);

Jean Chalard

2011-06-17 15:36:26 +0900

[diff] [blame]

670

}

671

672

if (sameLength && depth >= mInputLength && skipPos < 0) {

673

registerNextLetter(word[mInputLength], nextLetters, nextLettersSize);

}

}

Jean Chalard

2011-06-20 21:09:04 +0900

[diff] [blame]

677

#ifndef NEW_DICTIONARY_FORMAT

678

// TODO: Don't forget to bring inline functions back to over where they are used.

satok

2010-12-08 16:04:16 +0900

[diff] [blame]

679

Jean Chalard

2011-06-20 21:09:04 +0900

[diff] [blame]

680

// The following functions will be entirely replaced with new implementations.

681

void UnigramDictionary::getWordsOld(const int initialPos, const int inputLength, const int skipPos,

682

const int excessivePos, const int transposedPos,int *nextLetters,

683

const int nextLettersSize) {

684

int initialPosition = initialPos;

685

const int count = Dictionary::getCount(DICT_ROOT, &initialPosition);

686

getWordsRec(count, initialPosition, 0,

687

min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH),

688

mInputLength <= 0, 1, 0, 0, skipPos, excessivePos, transposedPos, nextLetters,

689

nextLettersSize);

690

}

Jean Chalard

2011-06-17 15:36:26 +0900

[diff] [blame]

691

Jean Chalard

2011-06-20 21:09:04 +0900

[diff] [blame]

692

void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, const int depth,

693

const int maxDepth, const bool traverseAllNodes, const int matchWeight,

694

const int inputIndex, const int diffs, const int skipPos, const int excessivePos,

695

const int transposedPos, int *nextLetters, const int nextLettersSize) {

696

int siblingPos = pos;

697

for (int i = 0; i < childrenCount; ++i) {

698

int newCount;

699

int newChildPosition;

700

bool newTraverseAllNodes;

int newMatchRate;

int newInputIndex;

int newDiffs;

int newSiblingPos;

int newOutputIndex;

const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, depth, maxDepth,

707

traverseAllNodes, matchWeight, inputIndex, diffs,

708

skipPos, excessivePos, transposedPos,

709

nextLetters, nextLettersSize,

710

&newCount, &newChildPosition, &newTraverseAllNodes, &newMatchRate,

711

&newInputIndex, &newDiffs, &newSiblingPos, &newOutputIndex);

712

siblingPos = newSiblingPos;

satok

2010-12-08 16:04:16 +0900

[diff] [blame]

713

Jean Chalard

2011-06-20 21:09:04 +0900

[diff] [blame]

714

if (needsToTraverseChildrenNodes) {

715

getWordsRec(newCount, newChildPosition, newOutputIndex, maxDepth, newTraverseAllNodes,

716

newMatchRate, newInputIndex, newDiffs, skipPos, excessivePos, transposedPos,

717

nextLetters, nextLettersSize);

satok

2010-12-06 17:38:58 +0900

[diff] [blame]

718

}

satok

2010-12-06 17:38:58 +0900

[diff] [blame]

719

}

satok

2010-12-06 17:38:58 +0900

[diff] [blame]

720

}

721

Jean Chalard

2011-06-30 17:02:23 +0900

[diff] [blame]

722

inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex,

723

const int inputLength, unsigned short *word) {

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

724

int pos = ROOT_POS;

Jean Chalard

2011-06-16 20:55:16 +0900

[diff] [blame]

725

int count = Dictionary::getCount(DICT_ROOT, &pos);

satok

2010-12-09 19:21:51 +0900

[diff] [blame]

726

int maxFreq = 0;

727

int depth = 0;

728

unsigned short newWord[MAX_WORD_LENGTH_INTERNAL];

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

729

bool terminal = false;

730

satok

2010-12-09 19:21:51 +0900

[diff] [blame]

731

mStackChildCount[0] = count;

732

mStackSiblingPos[0] = pos;

733

734

while (depth >= 0) {

735

if (mStackChildCount[depth] > 0) {

736

--mStackChildCount[depth];

737

int firstChildPos;

738

int newFreq;

739

int siblingPos = mStackSiblingPos[depth];

740

const bool needsToTraverseChildrenNodes = processCurrentNodeForExactMatch(siblingPos,

741

startInputIndex, depth, newWord, &firstChildPos, &count, &terminal, &newFreq,

742

&siblingPos);

743

mStackSiblingPos[depth] = siblingPos;

744

if (depth == (inputLength - 1)) {

745

// Traverse sibling node

746

if (terminal) {

747

if (newFreq > maxFreq) {

748

for (int i = 0; i < inputLength; ++i) word[i] = newWord[i];

749

if (DEBUG_DICT && DEBUG_NODE) {

750

char s[inputLength + 1];

751

for (int i = 0; i < inputLength; ++i) s[i] = word[i];

752

s[inputLength] = 0;

753

LOGI("New missing space word found: %d > %d (%s), %d, %d",

754

newFreq, maxFreq, s, inputLength, depth);

}

maxFreq = newFreq;

}

}

} else if (needsToTraverseChildrenNodes) {

760

// Traverse children nodes

761

++depth;

762

mStackChildCount[depth] = count;

763

mStackSiblingPos[depth] = firstChildPos;

764

}

765

} else {

766

// Traverse parent node

767

--depth;

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

768

}

769

}

satok

2010-12-09 19:21:51 +0900

[diff] [blame]

770

771

word[inputLength] = 0;

772

return maxFreq;

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

773

}

774

775

inline bool UnigramDictionary::processCurrentNodeForExactMatch(const int firstChildPos,

satok

2010-12-09 19:21:51 +0900

[diff] [blame]

776

const int startInputIndex, const int depth, unsigned short *word, int *newChildPosition,

777

int *newCount, bool *newTerminal, int *newFreq, int *siblingPos) {

778

const int inputIndex = startInputIndex + depth;

satok

8fbd552

2011-02-22 17:28:55 +0900

[diff] [blame]

779

const int *currentChars = getInputCharsAt(inputIndex);

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

780

unsigned short c;

Jean Chalard

2011-06-16 20:55:16 +0900

[diff] [blame]

781

*siblingPos = Dictionary::setDictionaryValues(DICT_ROOT, IS_LATEST_DICT_VERSION, firstChildPos,

782

&c, newChildPosition, newTerminal, newFreq);

satok

2010-12-09 19:21:51 +0900

[diff] [blame]

783

const unsigned int inputC = currentChars[0];

Ken Wakasa

2011-03-19 09:16:42 +0900

[diff] [blame]

784

if (DEBUG_DICT) {

785

assert(inputC <= U_SHORT_MAX);

786

}

Jean Chalard

2011-02-22 15:12:46 +0900

[diff] [blame]

787

const unsigned short baseLowerC = toBaseLowerCase(c);

788

const bool matched = (inputC == baseLowerC || inputC == c);

satok

2010-12-09 19:21:51 +0900

[diff] [blame]

789

const bool hasChild = *newChildPosition != 0;

790

if (matched) {

791

word[depth] = c;

792

if (DEBUG_DICT && DEBUG_NODE) {

793

LOGI("Node(%c, %c)<%d>, %d, %d", inputC, c, matched, hasChild, *newFreq);

Ken Wakasa

2011-03-19 09:16:42 +0900

[diff] [blame]

794

if (*newTerminal) {

795

LOGI("Terminal %d", *newFreq);

796

}

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

797

}

satok

2010-12-09 19:21:51 +0900

[diff] [blame]

798

if (hasChild) {

Jean Chalard

2011-06-16 20:55:16 +0900

[diff] [blame]

799

*newCount = Dictionary::getCount(DICT_ROOT, newChildPosition);

satok

2010-12-09 19:21:51 +0900

[diff] [blame]

return true;

} else {

return false;

}

} else {

// If this node is not user typed character, this method treats this word as unmatched.

806

// Thus newTerminal shouldn't be true.

807

*newTerminal = false;

808

return false;

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

809

}

satok

2010-12-08 17:05:39 +0900

[diff] [blame]

810

}

Jean Chalard

2011-06-16 22:33:41 +0900

[diff] [blame]

811

812

// TODO: use uint32_t instead of unsigned short

813

bool UnigramDictionary::isValidWord(unsigned short *word, int length) {

814

if (IS_LATEST_DICT_VERSION) {

Jean Chalard

2011-06-17 12:45:17 +0900

[diff] [blame]

815

return (getBigramPosition(DICTIONARY_HEADER_SIZE, word, 0, length) != NOT_VALID_WORD);

Jean Chalard

2011-06-16 22:33:41 +0900

[diff] [blame]

816

} else {

Jean Chalard

2011-06-17 12:45:17 +0900

[diff] [blame]

817

return (getBigramPosition(0, word, 0, length) != NOT_VALID_WORD);

Jean Chalard

2011-06-16 22:33:41 +0900

[diff] [blame]

}

}

Jean Chalard

2011-06-16 22:51:11 +0900

[diff] [blame]

821

822

// Require strict exact match.

Jean Chalard

2011-06-17 12:45:17 +0900

[diff] [blame]

823

int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offset,

824

int length) const {

Jean Chalard

2011-06-16 22:33:41 +0900

[diff] [blame]

825

// returns address of bigram data of that word

826

// return -99 if not found

827

828

int count = Dictionary::getCount(DICT_ROOT, &pos);

829

unsigned short currentChar = (unsigned short) word[offset];

830

for (int j = 0; j < count; j++) {

831

unsigned short c = Dictionary::getChar(DICT_ROOT, &pos);

832

int terminal = Dictionary::getTerminal(DICT_ROOT, &pos);

833

int childPos = Dictionary::getAddress(DICT_ROOT, &pos);

834

if (c == currentChar) {

835

if (offset == length - 1) {

if (terminal) {

return (pos+1);

}

} else {

if (childPos != 0) {

Jean Chalard

2011-06-17 12:45:17 +0900

[diff] [blame]

841

int t = getBigramPosition(childPos, word, offset + 1, length);

Jean Chalard

2011-06-16 22:33:41 +0900

[diff] [blame]

if (t > 0) {

return t;

}

}

}

}

if (terminal) {

Dictionary::getFreq(DICT_ROOT, IS_LATEST_DICT_VERSION, &pos);

850

}

851

// There could be two instances of each alphabet - upper and lower case. So continue

852

// looking ...

853

}

854

return NOT_VALID_WORD;

855

}

856

Jean Chalard

2011-06-20 21:09:04 +0900

[diff] [blame]

857

858

// The following functions will be modified.

859

bool UnigramDictionary::getSplitTwoWordsSuggestion(const int inputLength,

860

const int firstWordStartPos, const int firstWordLength, const int secondWordStartPos,

861

const int secondWordLength, const bool isSpaceProximity) {

862

if (inputLength >= MAX_WORD_LENGTH) return false;

863

if (0 >= firstWordLength || 0 >= secondWordLength || firstWordStartPos >= secondWordStartPos

864

|| firstWordStartPos < 0 || secondWordStartPos + secondWordLength > inputLength)

865

return false;

866

const int newWordLength = firstWordLength + secondWordLength + 1;

867

// Allocating variable length array on stack

868

unsigned short word[newWordLength];

Jean Chalard

ffefdb6

2011-06-30 17:15:32 +0900

[diff] [blame^]

869

const int firstFreq = getMostFrequentWordLike(firstWordStartPos, firstWordLength, mWord);

Jean Chalard

2011-06-20 21:09:04 +0900

[diff] [blame]

870

if (DEBUG_DICT) {

871

LOGI("First freq: %d", firstFreq);

872

}

873

if (firstFreq <= 0) return false;

874

875

for (int i = 0; i < firstWordLength; ++i) {

word[i] = mWord[i];

}

Jean Chalard

2011-06-30 17:15:32 +0900

[diff] [blame^]

879

const int secondFreq = getMostFrequentWordLike(secondWordStartPos, secondWordLength, mWord);

Jean Chalard

2011-06-20 21:09:04 +0900

[diff] [blame]

880

if (DEBUG_DICT) {

881

LOGI("Second freq: %d", secondFreq);

882

}

883

if (secondFreq <= 0) return false;

884

885

word[firstWordLength] = SPACE;

886

for (int i = (firstWordLength + 1); i < newWordLength; ++i) {

887

word[i] = mWord[i - firstWordLength - 1];

888

}

889

890

int pairFreq = calcFreqForSplitTwoWords(TYPED_LETTER_MULTIPLIER, firstWordLength,

891

secondWordLength, firstFreq, secondFreq, isSpaceProximity);

892

if (DEBUG_DICT) {

893

LOGI("Split two words: %d, %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength,

894

TYPED_LETTER_MULTIPLIER);

895

}

896

addWord(word, newWordLength, pairFreq);

return true;

}

inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth,

901

const int maxDepth, const bool traverseAllNodes, int matchWeight, int inputIndex,

902

const int diffs, const int skipPos, const int excessivePos, const int transposedPos,

903

int *nextLetters, const int nextLettersSize, int *newCount, int *newChildPosition,

904

bool *newTraverseAllNodes, int *newMatchRate, int *newInputIndex, int *newDiffs,

905

int *nextSiblingPosition, int *nextOutputIndex) {

906

if (DEBUG_DICT) {

907

int inputCount = 0;

908

if (skipPos >= 0) ++inputCount;

909

if (excessivePos >= 0) ++inputCount;

910

if (transposedPos >= 0) ++inputCount;

911

assert(inputCount <= 1);

}

unsigned short c;

int childPosition;

bool terminal;

int freq;

bool isSameAsUserTypedLength = false;

918

919

const uint8_t flags = 0; // No flags for now

920

921

if (excessivePos == depth && inputIndex < mInputLength - 1) ++inputIndex;

922

923

*nextSiblingPosition = Dictionary::setDictionaryValues(DICT_ROOT, IS_LATEST_DICT_VERSION, pos,

924

&c, &childPosition, &terminal, &freq);

925

*nextOutputIndex = depth + 1;

926

927

const bool needsToTraverseChildrenNodes = childPosition != 0;

928

929

// If we are only doing traverseAllNodes, no need to look at the typed characters.

930

if (traverseAllNodes || needsToSkipCurrentNode(c, inputIndex, skipPos, depth)) {

931

mWord[depth] = c;

932

if (traverseAllNodes && terminal) {

933

onTerminal(mWord, depth, DICT_ROOT, flags, pos, inputIndex, matchWeight, skipPos,

934

excessivePos, transposedPos, freq, false, nextLetters, nextLettersSize);

935

}

936

if (!needsToTraverseChildrenNodes) return false;

937

*newTraverseAllNodes = traverseAllNodes;

938

*newMatchRate = matchWeight;

939

*newDiffs = diffs;

940

*newInputIndex = inputIndex;

941

} else {

942

const int *currentChars = getInputCharsAt(inputIndex);

943

944

if (transposedPos >= 0) {

945

if (inputIndex == transposedPos) currentChars += MAX_PROXIMITY_CHARS;

946

if (inputIndex == (transposedPos + 1)) currentChars -= MAX_PROXIMITY_CHARS;

947

}

948

949

int matchedProximityCharId = getMatchedProximityId(currentChars, c, skipPos, excessivePos,

950

transposedPos);

951

if (UNRELATED_CHAR == matchedProximityCharId) return false;

952

mWord[depth] = c;

953

// If inputIndex is greater than mInputLength, that means there is no

954

// proximity chars. So, we don't need to check proximity.

955

if (SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) {

956

multiplyIntCapped(TYPED_LETTER_MULTIPLIER, &matchWeight);

957

}

958

bool isSameAsUserTypedLength = mInputLength == inputIndex + 1

959

|| (excessivePos == mInputLength - 1 && inputIndex == mInputLength - 2);

960

if (isSameAsUserTypedLength && terminal) {

961

onTerminal(mWord, depth, DICT_ROOT, flags, pos, inputIndex, matchWeight, skipPos,

962

excessivePos, transposedPos, freq, true, nextLetters, nextLettersSize);

963

}

964

if (!needsToTraverseChildrenNodes) return false;

965

// Start traversing all nodes after the index exceeds the user typed length

966

*newTraverseAllNodes = isSameAsUserTypedLength;

967

*newMatchRate = matchWeight;

968

*newDiffs = diffs + ((NEAR_PROXIMITY_CHAR == matchedProximityCharId) ? 1 : 0);

969

*newInputIndex = inputIndex + 1;

970

}

971

// Optimization: Prune out words that are too long compared to how much was typed.

972

if (depth >= maxDepth || *newDiffs > mMaxEditDistance) {

return false;

}

// If inputIndex is greater than mInputLength, that means there are no proximity chars.

977

// TODO: Check if this can be isSameAsUserTypedLength only.

978

if (isSameAsUserTypedLength || mInputLength <= *newInputIndex) {

979

*newTraverseAllNodes = true;

980

}

981

// get the count of nodes and increment childAddress.

982

*newCount = Dictionary::getCount(DICT_ROOT, &childPosition);

983

*newChildPosition = childPosition;

984

if (DEBUG_DICT) assert(needsToTraverseChildrenNodes);

985

return needsToTraverseChildrenNodes;

986

}

987

988

#else // NEW_DICTIONARY_FORMAT

Jean Chalard

85a1d1e

2011-06-21 22:23:21 +0900

[diff] [blame]

989

990

bool UnigramDictionary::getSplitTwoWordsSuggestion(const int inputLength,

991

const int firstWordStartPos, const int firstWordLength, const int secondWordStartPos,

992

const int secondWordLength, const bool isSpaceProximity) {

993

if (inputLength >= MAX_WORD_LENGTH) return false;

994

if (0 >= firstWordLength || 0 >= secondWordLength || firstWordStartPos >= secondWordStartPos

995

|| firstWordStartPos < 0 || secondWordStartPos + secondWordLength > inputLength)

996

return false;

997

const int newWordLength = firstWordLength + secondWordLength + 1;

998

// Allocating variable length array on stack

999

unsigned short word[newWordLength];

Jean Chalard

ffefdb6

2011-06-30 17:15:32 +0900

[diff] [blame^]

1000

const int firstFreq = getMostFrequentWordLike(firstWordStartPos, firstWordLength, mWord);

Jean Chalard

85a1d1e

2011-06-21 22:23:21 +0900

[diff] [blame]

1001

if (DEBUG_DICT) {

1002

LOGI("First freq: %d", firstFreq);

1003

}

1004

if (firstFreq <= 0) return false;

1005

1006

for (int i = 0; i < firstWordLength; ++i) {

word[i] = mWord[i];

}

Jean Chalard

2011-06-30 17:15:32 +0900

[diff] [blame^]

1010

const int secondFreq = getMostFrequentWordLike(secondWordStartPos, secondWordLength, mWord);

Jean Chalard

85a1d1e

2011-06-21 22:23:21 +0900

[diff] [blame]

1011

if (DEBUG_DICT) {

1012

LOGI("Second freq: %d", secondFreq);

1013

}

1014

if (secondFreq <= 0) return false;

1015

1016

word[firstWordLength] = SPACE;

1017

for (int i = (firstWordLength + 1); i < newWordLength; ++i) {

1018

word[i] = mWord[i - firstWordLength - 1];

1019

}

1020

1021

int pairFreq = calcFreqForSplitTwoWords(TYPED_LETTER_MULTIPLIER, firstWordLength,

1022

secondWordLength, firstFreq, secondFreq, isSpaceProximity);

1023

if (DEBUG_DICT) {

1024

LOGI("Split two words: %d, %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength,

1025

TYPED_LETTER_MULTIPLIER);

1026

}

1027

addWord(word, newWordLength, pairFreq);

return true;

}

inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth,

1032

const int maxDepth, const bool traverseAllNodes, int matchWeight, int inputIndex,

1033

const int diffs, const int skipPos, const int excessivePos, const int transposedPos,

1034

int *nextLetters, const int nextLettersSize, int *newCount, int *newChildPosition,

1035

bool *newTraverseAllNodes, int *newMatchRate, int *newInputIndex, int *newDiffs,

1036

int *nextSiblingPosition, int *nextOutputIndex) {

1037

if (DEBUG_DICT) {

1038

int inputCount = 0;

1039

if (skipPos >= 0) ++inputCount;

1040

if (excessivePos >= 0) ++inputCount;

1041

if (transposedPos >= 0) ++inputCount;

1042

assert(inputCount <= 1);

}

unsigned short c;

int childPosition;

bool terminal;

int freq;

bool isSameAsUserTypedLength = false;

1049

1050

const uint8_t flags = 0; // No flags for now

1051

1052

if (excessivePos == depth && inputIndex < mInputLength - 1) ++inputIndex;

1053

1054

*nextSiblingPosition = Dictionary::setDictionaryValues(DICT_ROOT, IS_LATEST_DICT_VERSION, pos,

1055

&c, &childPosition, &terminal, &freq);

1056

*nextOutputIndex = depth + 1;

1057

1058

const bool needsToTraverseChildrenNodes = childPosition != 0;

1059

1060

// If we are only doing traverseAllNodes, no need to look at the typed characters.

1061

if (traverseAllNodes || needsToSkipCurrentNode(c, inputIndex, skipPos, depth)) {

1062

mWord[depth] = c;

1063

if (traverseAllNodes && terminal) {

1064

onTerminal(mWord, depth, DICT_ROOT, flags, pos, inputIndex, matchWeight, skipPos,

1065

excessivePos, transposedPos, freq, false, nextLetters, nextLettersSize);

1066

}

1067

if (!needsToTraverseChildrenNodes) return false;

1068

*newTraverseAllNodes = traverseAllNodes;

1069

*newMatchRate = matchWeight;

1070

*newDiffs = diffs;

1071

*newInputIndex = inputIndex;

1072

} else {

1073

const int *currentChars = getInputCharsAt(inputIndex);

1074

1075

if (transposedPos >= 0) {

1076

if (inputIndex == transposedPos) currentChars += MAX_PROXIMITY_CHARS;

1077

if (inputIndex == (transposedPos + 1)) currentChars -= MAX_PROXIMITY_CHARS;

1078

}

1079

1080

int matchedProximityCharId = getMatchedProximityId(currentChars, c, skipPos, excessivePos,

1081

transposedPos);

1082

if (UNRELATED_CHAR == matchedProximityCharId) return false;

1083

mWord[depth] = c;

1084

// If inputIndex is greater than mInputLength, that means there is no

1085

// proximity chars. So, we don't need to check proximity.

1086

if (SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) {

1087

multiplyIntCapped(TYPED_LETTER_MULTIPLIER, &matchWeight);

1088

}

1089

bool isSameAsUserTypedLength = mInputLength == inputIndex + 1

1090

|| (excessivePos == mInputLength - 1 && inputIndex == mInputLength - 2);

1091

if (isSameAsUserTypedLength && terminal) {

1092

onTerminal(mWord, depth, DICT_ROOT, flags, pos, inputIndex, matchWeight, skipPos,

1093

excessivePos, transposedPos, freq, true, nextLetters, nextLettersSize);

1094

}

1095

if (!needsToTraverseChildrenNodes) return false;

1096

// Start traversing all nodes after the index exceeds the user typed length

1097

*newTraverseAllNodes = isSameAsUserTypedLength;

1098

*newMatchRate = matchWeight;

1099

*newDiffs = diffs + ((NEAR_PROXIMITY_CHAR == matchedProximityCharId) ? 1 : 0);

1100

*newInputIndex = inputIndex + 1;

1101

}

1102

// Optimization: Prune out words that are too long compared to how much was typed.

1103

if (depth >= maxDepth || *newDiffs > mMaxEditDistance) {

return false;

}

// If inputIndex is greater than mInputLength, that means there are no proximity chars.

1108

// TODO: Check if this can be isSameAsUserTypedLength only.

1109

if (isSameAsUserTypedLength || mInputLength <= *newInputIndex) {

1110

*newTraverseAllNodes = true;

1111

}

1112

// get the count of nodes and increment childAddress.

1113

*newCount = Dictionary::getCount(DICT_ROOT, &childPosition);

1114

*newChildPosition = childPosition;

1115

if (DEBUG_DICT) assert(needsToTraverseChildrenNodes);

1116

return needsToTraverseChildrenNodes;

1117

}

1118

Jean Chalard

2011-06-20 21:09:04 +0900

[diff] [blame]

1119

#endif // NEW_DICTIONARY_FORMAT

1120

satok