Merge "Check adjacent proximity chars for insertion for typing"
diff --git a/native/jni/Android.mk b/native/jni/Android.mk
index fb60139..d5df6b6 100644
--- a/native/jni/Android.mk
+++ b/native/jni/Android.mk
@@ -53,10 +53,10 @@
dic_nodes_cache.cpp) \
$(addprefix suggest/core/dictionary/, \
bigram_dictionary.cpp \
- binary_dictionary_bigrams_reading_utils.cpp \
binary_dictionary_format_utils.cpp \
binary_dictionary_header.cpp \
binary_dictionary_header_reading_utils.cpp \
+ binary_dictionary_terminal_attributes_reading_utils.cpp \
bloom_filter.cpp \
byte_array_utils.cpp \
dictionary.cpp \
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h
index 0856840..f2b48e9 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h
@@ -18,8 +18,8 @@
#define LATINIME_BINARY_DICTIONARY_BIGRAMS_ITERATOR_H
#include "defines.h"
-#include "suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.h"
#include "suggest/core/dictionary/binary_dictionary_info.h"
+#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h"
namespace latinime {
@@ -35,15 +35,17 @@
}
AK_FORCE_INLINE void next() {
- mBigramFlags = BinaryDictionaryBigramsReadingUtils::getFlagsAndForwardPointer(
+ mBigramFlags = BinaryDictionaryTerminalAttributesReadingUtils::getFlagsAndForwardPointer(
mBinaryDictionaryInfo, &mPos);
- mBigramPos = BinaryDictionaryBigramsReadingUtils::getBigramAddressAndForwardPointer(
- mBinaryDictionaryInfo, mBigramFlags, &mPos);
- mHasNext = BinaryDictionaryBigramsReadingUtils::hasNext(mBigramFlags);
+ mBigramPos =
+ BinaryDictionaryTerminalAttributesReadingUtils::getBigramAddressAndForwardPointer(
+ mBinaryDictionaryInfo, mBigramFlags, &mPos);
+ mHasNext = BinaryDictionaryTerminalAttributesReadingUtils::hasNext(mBigramFlags);
}
AK_FORCE_INLINE int getProbability() const {
- return BinaryDictionaryBigramsReadingUtils::getBigramProbability(mBigramFlags);
+ return BinaryDictionaryTerminalAttributesReadingUtils::getProbabilityFromFlags(
+ mBigramFlags);
}
AK_FORCE_INLINE int getBigramPos() const {
@@ -59,7 +61,7 @@
const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
int mPos;
- BinaryDictionaryBigramsReadingUtils::BigramFlags mBigramFlags;
+ BinaryDictionaryTerminalAttributesReadingUtils::BigramFlags mBigramFlags;
int mBigramPos;
bool mHasNext;
};
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.cpp
deleted file mode 100644
index 78a54b1..0000000
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.cpp
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.h"
-
-#include "suggest/core/dictionary/binary_dictionary_info.h"
-#include "suggest/core/dictionary/byte_array_utils.h"
-
-namespace latinime {
-
-const BinaryDictionaryBigramsReadingUtils::BigramFlags
- BinaryDictionaryBigramsReadingUtils::MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30;
-const BinaryDictionaryBigramsReadingUtils::BigramFlags
- BinaryDictionaryBigramsReadingUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10;
-const BinaryDictionaryBigramsReadingUtils::BigramFlags
- BinaryDictionaryBigramsReadingUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20;
-const BinaryDictionaryBigramsReadingUtils::BigramFlags
- BinaryDictionaryBigramsReadingUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30;
-const BinaryDictionaryBigramsReadingUtils::BigramFlags
- BinaryDictionaryBigramsReadingUtils::FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;
-// Flag for presence of more attributes
-const BinaryDictionaryBigramsReadingUtils::BigramFlags
- BinaryDictionaryBigramsReadingUtils::FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
-// Mask for attribute probability, stored on 4 bits inside the flags byte.
-const BinaryDictionaryBigramsReadingUtils::BigramFlags
- BinaryDictionaryBigramsReadingUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
-const int BinaryDictionaryBigramsReadingUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
-
-/* static */ int BinaryDictionaryBigramsReadingUtils::getBigramAddressAndForwardPointer(
- const BinaryDictionaryInfo *const binaryDictionaryInfo, const BigramFlags flags,
- int *const pos) {
- int offset = 0;
- const int origin = *pos;
- switch (MASK_ATTRIBUTE_ADDRESS_TYPE & flags) {
- case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
- offset = ByteArrayUtils::readUint8andAdvancePosition(
- binaryDictionaryInfo->getDictRoot(), pos);
- break;
- case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
- offset = ByteArrayUtils::readUint16andAdvancePosition(
- binaryDictionaryInfo->getDictRoot(), pos);
- break;
- case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
- offset = ByteArrayUtils::readUint24andAdvancePosition(
- binaryDictionaryInfo->getDictRoot(), pos);
- break;
- }
- if (isOffsetNegative(flags)) {
- return origin - offset;
- } else {
- return origin + offset;
- }
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp
new file mode 100644
index 0000000..0a7509c
--- /dev/null
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h"
+
+#include "suggest/core/dictionary/binary_dictionary_info.h"
+#include "suggest/core/dictionary/byte_array_utils.h"
+
+namespace latinime {
+
+typedef BinaryDictionaryTerminalAttributesReadingUtils TaUtils;
+
+const TaUtils::TerminalAttributeFlags TaUtils::MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30;
+const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10;
+const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20;
+const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30;
+const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;
+// Flag for presence of more attributes
+const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
+// Mask for attribute probability, stored on 4 bits inside the flags byte.
+const TaUtils::TerminalAttributeFlags TaUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
+const int TaUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
+
+/* static */ int TaUtils::getBigramAddressAndForwardPointer(
+ const BinaryDictionaryInfo *const binaryDictionaryInfo, const TerminalAttributeFlags flags,
+ int *const pos) {
+ int offset = 0;
+ const int origin = *pos;
+ switch (MASK_ATTRIBUTE_ADDRESS_TYPE & flags) {
+ case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
+ offset = ByteArrayUtils::readUint8andAdvancePosition(
+ binaryDictionaryInfo->getDictRoot(), pos);
+ break;
+ case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
+ offset = ByteArrayUtils::readUint16andAdvancePosition(
+ binaryDictionaryInfo->getDictRoot(), pos);
+ break;
+ case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
+ offset = ByteArrayUtils::readUint24andAdvancePosition(
+ binaryDictionaryInfo->getDictRoot(), pos);
+ break;
+ }
+ if (isOffsetNegative(flags)) {
+ return origin - offset;
+ } else {
+ return origin + offset;
+ }
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h
similarity index 61%
rename from native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.h
rename to native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h
index e71f2a1..f38fd5a 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.h
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef LATINIME_BINARY_DICTIONARY_BIGRAM_READING_UTILS_H
-#define LATINIME_BINARY_DICTIONARY_BIGRAM_READING_UTILS_H
+#ifndef LATINIME_BINARY_DICTIONARY_TERMINAL_ATTRIBUTES_READING_UTILS_H
+#define LATINIME_BINARY_DICTIONARY_TERMINAL_ATTRIBUTES_READING_UTILS_H
#include <stdint.h>
@@ -25,10 +25,26 @@
namespace latinime {
-class BinaryDictionaryBigramsReadingUtils {
+class BinaryDictionaryTerminalAttributesReadingUtils {
public:
- typedef uint8_t BigramFlags;
+ typedef uint8_t TerminalAttributeFlags;
+ typedef TerminalAttributeFlags BigramFlags;
+ static AK_FORCE_INLINE TerminalAttributeFlags getFlagsAndForwardPointer(
+ const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
+ return ByteArrayUtils::readUint8andAdvancePosition(
+ binaryDictionaryInfo->getDictRoot(), pos);
+ }
+
+ static AK_FORCE_INLINE int getProbabilityFromFlags(const TerminalAttributeFlags flags) {
+ return flags & MASK_ATTRIBUTE_PROBABILITY;
+ }
+
+ static AK_FORCE_INLINE bool hasNext(const TerminalAttributeFlags flags) {
+ return (flags & FLAG_ATTRIBUTE_HAS_NEXT) != 0;
+ }
+
+ // Bigrams reading methods
static AK_FORCE_INLINE void skipExistingBigrams(
const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
BigramFlags flags = getFlagsAndForwardPointer(binaryDictionaryInfo, pos);
@@ -39,41 +55,27 @@
*pos += attributeAddressSize(flags);
}
- static AK_FORCE_INLINE BigramFlags getFlagsAndForwardPointer(
- const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
- return ByteArrayUtils::readUint8andAdvancePosition(
- binaryDictionaryInfo->getDictRoot(), pos);
- }
+ static int getBigramAddressAndForwardPointer(
+ const BinaryDictionaryInfo *const binaryDictionaryInfo, const BigramFlags flags,
+ int *const pos);
- static AK_FORCE_INLINE int getBigramProbability(const BigramFlags flags) {
- return flags & MASK_ATTRIBUTE_PROBABILITY;
- }
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryTerminalAttributesReadingUtils);
- static AK_FORCE_INLINE bool isOffsetNegative(const BigramFlags flags) {
+ static const TerminalAttributeFlags MASK_ATTRIBUTE_ADDRESS_TYPE;
+ static const TerminalAttributeFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE;
+ static const TerminalAttributeFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES;
+ static const TerminalAttributeFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
+ static const TerminalAttributeFlags FLAG_ATTRIBUTE_OFFSET_NEGATIVE;
+ static const TerminalAttributeFlags FLAG_ATTRIBUTE_HAS_NEXT;
+ static const TerminalAttributeFlags MASK_ATTRIBUTE_PROBABILITY;
+ static const int ATTRIBUTE_ADDRESS_SHIFT;
+
+ static AK_FORCE_INLINE bool isOffsetNegative(const TerminalAttributeFlags flags) {
return (flags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) != 0;
}
- static AK_FORCE_INLINE bool hasNext(const BigramFlags flags) {
- return (flags & FLAG_ATTRIBUTE_HAS_NEXT) != 0;
- }
-
- static int getBigramAddressAndForwardPointer(
- const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const BigramFlags flags, int *const pos);
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryBigramsReadingUtils);
-
- static const BigramFlags MASK_ATTRIBUTE_ADDRESS_TYPE;
- static const BigramFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE;
- static const BigramFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES;
- static const BigramFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
- static const BigramFlags FLAG_ATTRIBUTE_OFFSET_NEGATIVE;
- static const BigramFlags FLAG_ATTRIBUTE_HAS_NEXT;
- static const BigramFlags MASK_ATTRIBUTE_PROBABILITY;
- static const int ATTRIBUTE_ADDRESS_SHIFT;
-
- static AK_FORCE_INLINE int attributeAddressSize(const BigramFlags flags) {
+ static AK_FORCE_INLINE int attributeAddressSize(const TerminalAttributeFlags flags) {
return (flags & MASK_ATTRIBUTE_ADDRESS_TYPE) >> ATTRIBUTE_ADDRESS_SHIFT;
/* Note: this is a value-dependant optimization of what may probably be
more readably written this way:
@@ -87,4 +89,4 @@
}
};
}
-#endif /* LATINIME_BINARY_DICTIONARY_BIGRAM_READING_UTILS_H */
+#endif /* LATINIME_BINARY_DICTIONARY_TERMINAL_ATTRIBUTES_READING_UTILS_H */
diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp
index a8f16c8..173a612 100644
--- a/native/jni/src/suggest/core/suggest.cpp
+++ b/native/jni/src/suggest/core/suggest.cpp
@@ -36,6 +36,7 @@
const int Suggest::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
const int Suggest::MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE = 2;
const float Suggest::AUTOCORRECT_CLASSIFICATION_THRESHOLD = 0.33f;
+const int Suggest::FINAL_SCORE_PENALTY_FOR_NOT_BEST_EXACT_MATCHED_WORD = 1;
/**
* Returns a set of suggestions for the given input touch points. The commitPoint argument indicates
@@ -148,6 +149,8 @@
&doubleLetterTerminalIndex, &doubleLetterLevel);
int maxScore = S_INT_MIN;
+ int bestExactMatchedNodeTerminalIndex = -1;
+ int bestExactMatchedNodeOutputWordIndex = -1;
// Output suggestion results here
for (int terminalIndex = 0; terminalIndex < terminalSize && outputWordIndex < MAX_RESULTS;
++terminalIndex) {
@@ -186,7 +189,6 @@
const int finalScore = SCORING->calculateFinalScore(
compoundDistance, traverseSession->getInputSize(),
isForceCommitMultiWords || (isValidWord && SCORING->doesAutoCorrectValidWord()));
-
maxScore = max(maxScore, finalScore);
if (TRAVERSAL->allowPartialCommit()) {
@@ -200,6 +202,25 @@
if (isValidWord) {
outputTypes[outputWordIndex] = Dictionary::KIND_CORRECTION | outputTypeFlags;
frequencies[outputWordIndex] = finalScore;
+ if (isSafeExactMatch) {
+ // Demote exact matches that are not the highest probable node among all exact
+ // matches.
+ const bool isBestTerminal = bestExactMatchedNodeTerminalIndex < 0
+ || terminals[bestExactMatchedNodeTerminalIndex].getProbability()
+ < terminalDicNode->getProbability();
+ const int outputWordIndexToBeDemoted = isBestTerminal ?
+ bestExactMatchedNodeOutputWordIndex : outputWordIndex;
+ if (outputWordIndexToBeDemoted >= 0) {
+ frequencies[outputWordIndexToBeDemoted] -=
+ FINAL_SCORE_PENALTY_FOR_NOT_BEST_EXACT_MATCHED_WORD;
+ }
+ if (isBestTerminal) {
+ // Updates the best exact matched node index.
+ bestExactMatchedNodeTerminalIndex = terminalIndex;
+ // Updates the best exact matched output word index.
+ bestExactMatchedNodeOutputWordIndex = outputWordIndex;
+ }
+ }
// Populate the outputChars array with the suggested word.
const int startIndex = outputWordIndex * MAX_WORD_LENGTH;
terminalDicNode->outputResult(&outputCodePoints[startIndex]);
diff --git a/native/jni/src/suggest/core/suggest.h b/native/jni/src/suggest/core/suggest.h
index 875cbe4..752bde9 100644
--- a/native/jni/src/suggest/core/suggest.h
+++ b/native/jni/src/suggest/core/suggest.h
@@ -82,6 +82,8 @@
// Threshold for autocorrection classifier
static const float AUTOCORRECT_CLASSIFICATION_THRESHOLD;
+ // Final score penalty to exact match words that are not the most probable exact match.
+ static const int FINAL_SCORE_PENALTY_FOR_NOT_BEST_EXACT_MATCHED_WORD;
const Traversal *const TRAVERSAL;
const Scoring *const SCORING;
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
index 7333dbe..e098f35 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
+++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
@@ -169,6 +169,9 @@
float getTerminalLanguageCost(const DicTraverseSession *const traverseSession,
const DicNode *const dicNode, const float dicNodeLanguageImprobability) const {
+ // We promote exact matches here to prevent them from being pruned. The final score of
+ // exact match nodes might be demoted later in Suggest::outputSuggestions if there are
+ // multiple exact matches.
const float languageImprobability = (dicNode->isExactMatch()) ?
0.0f : dicNodeLanguageImprobability;
return languageImprobability * ScoringParams::DISTANCE_WEIGHT_LANGUAGE;