Fix auto-correction threshold and promote full matched words
Bug: 3374359
Bug: 3278422
"zbe" will be auto corrected to "be" by fixing s-line
"teh" will be auto corrected to "the" by promotion of full matched words
Change-Id: I314c632820e4e0b1501edeca60ada205d291451f
diff --git a/native/src/defines.h b/native/src/defines.h
index 71aaf28..7374526 100644
--- a/native/src/defines.h
+++ b/native/src/defines.h
@@ -134,6 +134,7 @@
#define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75
#define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75
#define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 60
+#define FULL_MATCHED_WORDS_PROMOTION_RATE 120
// This should be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java
// This is only used for the size of array. Not to be used in c functions.
diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp
index 3f9bcd7..06dd39a 100644
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@@ -347,9 +347,9 @@
}
}
-inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int snr,
- const int skipPos, const int excessivePos, const int transposedPos, const int freq,
- const bool sameLength) {
+inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int depth,
+ const int snr, const int skipPos, const int excessivePos, const int transposedPos,
+ const int freq, const bool sameLength) {
// TODO: Demote by edit distance
int finalFreq = freq * snr;
if (skipPos >= 0) multiplyRate(WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE, &finalFreq);
@@ -361,6 +361,12 @@
multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE, &finalFreq);
}
}
+ int lengthFreq = TYPED_LETTER_MULTIPLIER;
+ for (int i = 0; i < depth; ++i) lengthFreq *= TYPED_LETTER_MULTIPLIER;
+ if (depth > 1 && lengthFreq == snr) {
+ if (DEBUG_DICT) LOGI("Found full matched word.");
+ multiplyRate(FULL_MATCHED_WORDS_PROMOTION_RATE, &finalFreq);
+ }
if (sameLength && skipPos < 0) finalFreq *= FULL_WORD_MULTIPLIER;
return finalFreq;
}
@@ -369,8 +375,8 @@
unsigned short *word, const int inputIndex, const int depth, const int snr,
int *nextLetters, const int nextLettersSize, const int skipPos, const int excessivePos,
const int transposedPos, const int freq) {
- const int finalFreq = calculateFinalFreq(inputIndex, snr, skipPos, excessivePos, transposedPos,
- freq, false);
+ const int finalFreq = calculateFinalFreq(inputIndex, depth, snr, skipPos, excessivePos,
+ transposedPos, freq, false);
if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq);
if (depth >= mInputLength && skipPos < 0) {
registerNextLetter(mWord[mInputLength], nextLetters, nextLettersSize);
@@ -382,7 +388,7 @@
const int skipPos, const int excessivePos, const int transposedPos, const int freq,
const int addedWeight) {
if (sameAsTyped(word, depth + 1)) return;
- const int finalFreq = calculateFinalFreq(inputIndex, snr * addedWeight, skipPos,
+ const int finalFreq = calculateFinalFreq(inputIndex, depth, snr * addedWeight, skipPos,
excessivePos, transposedPos, freq, true);
// Proximity collection will promote a word of the same length as what user typed.
if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq);
diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h
index 7f7b7bd..95f9655 100644
--- a/native/src/unigram_dictionary.h
+++ b/native/src/unigram_dictionary.h
@@ -52,7 +52,7 @@
const int excessivePos, const int transposedPos, int *nextLetters,
const int nextLettersSize);
void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize);
- int calculateFinalFreq(const int inputIndex, const int snr, const int skipPos,
+ int calculateFinalFreq(const int inputIndex, const int depth, const int snr, const int skipPos,
const int excessivePos, const int transposedPos, const int freq, const bool sameLength);
void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word,
const int inputIndex, const int depth, const int snr, int *nextLetters,