Merge "Consolidate attributes to EmojiPaletteView to control indicators"
diff --git a/java/src/com/android/inputmethod/dictionarypack/MetadataDbHelper.java b/java/src/com/android/inputmethod/dictionarypack/MetadataDbHelper.java
index 668eb92..743bc80 100644
--- a/java/src/com/android/inputmethod/dictionarypack/MetadataDbHelper.java
+++ b/java/src/com/android/inputmethod/dictionarypack/MetadataDbHelper.java
@@ -47,7 +47,7 @@
// used to identify the versions for upgrades. This should never change going forward.
private static final int METADATA_DATABASE_VERSION_WITH_CLIENTID = 6;
// The current database version.
- private static final int CURRENT_METADATA_DATABASE_VERSION = 8;
+ private static final int CURRENT_METADATA_DATABASE_VERSION = 9;
private final static long NOT_A_DOWNLOAD_ID = -1;
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
index e7ab02a..b775406 100644
--- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
@@ -191,7 +191,8 @@
private static native void closeNative(long dict);
private static native int getFormatVersionNative(long dict);
private static native int getProbabilityNative(long dict, int[] word);
- private static native int getBigramProbabilityNative(long dict, int[] word0, int[] word1);
+ private static native int getBigramProbabilityNative(long dict, int[] word0,
+ boolean isBeginningOfSentence, int[] word1);
private static native void getWordPropertyNative(long dict, int[] word,
int[] outCodePoints, boolean[] outFlags, int[] outProbabilityInfo,
ArrayList<int[]> outBigramTargets, ArrayList<int[]> outBigramProbabilityInfo,
@@ -200,15 +201,17 @@
private static native void getSuggestionsNative(long dict, long proximityInfo,
long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times,
int[] pointerIds, int[] inputCodePoints, int inputSize, int[] suggestOptions,
- int[] prevWordCodePointArray, int[] outputSuggestionCount, int[] outputCodePoints,
- int[] outputScores, int[] outputIndices, int[] outputTypes,
- int[] outputAutoCommitFirstWordConfidence, float[] inOutLanguageWeight);
+ int[] prevWordCodePointArray, boolean isBeginningOfSentence,
+ int[] outputSuggestionCount, int[] outputCodePoints, int[] outputScores,
+ int[] outputIndices, int[] outputTypes, int[] outputAutoCommitFirstWordConfidence,
+ float[] inOutLanguageWeight);
private static native void addUnigramWordNative(long dict, int[] word, int probability,
- int[] shortcutTarget, int shortcutProbability, boolean isNotAWord,
- boolean isBlacklisted, int timestamp);
- private static native void addBigramWordsNative(long dict, int[] word0, int[] word1,
- int probability, int timestamp);
- private static native void removeBigramWordsNative(long dict, int[] word0, int[] word1);
+ int[] shortcutTarget, int shortcutProbability, boolean isBeginningOfSentence,
+ boolean isNotAWord, boolean isBlacklisted, int timestamp);
+ private static native void addBigramWordsNative(long dict, int[] word0,
+ boolean isBeginningOfSentence, int[] word1, int probability, int timestamp);
+ private static native void removeBigramWordsNative(long dict, int[] word0,
+ boolean isBeginningOfSentence, int[] word1);
private static native int addMultipleDictionaryEntriesNative(long dict,
LanguageModelParam[] languageModelParams, int startIndex);
private static native String getPropertyNative(long dict, String query);
@@ -301,7 +304,8 @@
getTraverseSession(sessionId).getSession(), inputPointers.getXCoordinates(),
inputPointers.getYCoordinates(), inputPointers.getTimes(),
inputPointers.getPointerIds(), mInputCodePoints, inputSize,
- mNativeSuggestOptions.getOptions(), prevWordCodePointArray, mOutputSuggestionCount,
+ mNativeSuggestOptions.getOptions(), prevWordCodePointArray,
+ prevWordsInfo.mIsBeginningOfSentence, mOutputSuggestionCount,
mOutputCodePoints, mOutputScores, mSpaceIndices, mOutputTypes,
mOutputAutoCommitFirstWordConfidence, mInputOutputLanguageWeight);
if (inOutLanguageWeight != null) {
@@ -364,12 +368,13 @@
}
public int getNgramProbability(final PrevWordsInfo prevWordsInfo, final String word) {
- if (TextUtils.isEmpty(prevWordsInfo.mPrevWord) || TextUtils.isEmpty(word)) {
+ if (!prevWordsInfo.isValid() || TextUtils.isEmpty(word)) {
return NOT_A_PROBABILITY;
}
final int[] codePoints0 = StringUtils.toCodePointArray(prevWordsInfo.mPrevWord);
final int[] codePoints1 = StringUtils.toCodePointArray(word);
- return getBigramProbabilityNative(mNativeDict, codePoints0, codePoints1);
+ return getBigramProbabilityNative(mNativeDict, codePoints0,
+ prevWordsInfo.mIsBeginningOfSentence, codePoints1);
}
public WordProperty getWordProperty(final String word) {
@@ -420,16 +425,17 @@
// Add a unigram entry to binary dictionary with unigram attributes in native code.
public void addUnigramEntry(final String word, final int probability,
- final String shortcutTarget, final int shortcutProbability, final boolean isNotAWord,
+ final String shortcutTarget, final int shortcutProbability,
+ final boolean isBeginningOfSentence, final boolean isNotAWord,
final boolean isBlacklisted, final int timestamp) {
- if (TextUtils.isEmpty(word)) {
+ if (word == null || (word.isEmpty() && !isBeginningOfSentence)) {
return;
}
final int[] codePoints = StringUtils.toCodePointArray(word);
final int[] shortcutTargetCodePoints = (shortcutTarget != null) ?
StringUtils.toCodePointArray(shortcutTarget) : null;
addUnigramWordNative(mNativeDict, codePoints, probability, shortcutTargetCodePoints,
- shortcutProbability, isNotAWord, isBlacklisted, timestamp);
+ shortcutProbability, isBeginningOfSentence, isNotAWord, isBlacklisted, timestamp);
mHasUpdated = true;
}
@@ -437,23 +443,25 @@
public void addNgramEntry(final PrevWordsInfo prevWordsInfo, final String word,
final int probability,
final int timestamp) {
- if (TextUtils.isEmpty(prevWordsInfo.mPrevWord) || TextUtils.isEmpty(word)) {
+ if (!prevWordsInfo.isValid() || TextUtils.isEmpty(word)) {
return;
}
final int[] codePoints0 = StringUtils.toCodePointArray(prevWordsInfo.mPrevWord);
final int[] codePoints1 = StringUtils.toCodePointArray(word);
- addBigramWordsNative(mNativeDict, codePoints0, codePoints1, probability, timestamp);
+ addBigramWordsNative(mNativeDict, codePoints0, prevWordsInfo.mIsBeginningOfSentence,
+ codePoints1, probability, timestamp);
mHasUpdated = true;
}
// Remove an n-gram entry from the binary dictionary in native code.
public void removeNgramEntry(final PrevWordsInfo prevWordsInfo, final String word) {
- if (TextUtils.isEmpty(prevWordsInfo.mPrevWord) || TextUtils.isEmpty(word)) {
+ if (!prevWordsInfo.isValid() || TextUtils.isEmpty(word)) {
return;
}
final int[] codePoints0 = StringUtils.toCodePointArray(prevWordsInfo.mPrevWord);
final int[] codePoints1 = StringUtils.toCodePointArray(word);
- removeBigramWordsNative(mNativeDict, codePoints0, codePoints1);
+ removeBigramWordsNative(mNativeDict, codePoints0, prevWordsInfo.mIsBeginningOfSentence,
+ codePoints1);
mHasUpdated = true;
}
diff --git a/java/src/com/android/inputmethod/latin/Constants.java b/java/src/com/android/inputmethod/latin/Constants.java
index 67ca595..efc5a61 100644
--- a/java/src/com/android/inputmethod/latin/Constants.java
+++ b/java/src/com/android/inputmethod/latin/Constants.java
@@ -192,7 +192,6 @@
public static final int CODE_SPACE = ' ';
public static final int CODE_PERIOD = '.';
public static final int CODE_COMMA = ',';
- public static final int CODE_ARMENIAN_PERIOD = 0x0589;
public static final int CODE_DASH = '-';
public static final int CODE_SINGLE_QUOTE = '\'';
public static final int CODE_DOUBLE_QUOTE = '"';
@@ -208,6 +207,8 @@
public static final int CODE_CLOSING_SQUARE_BRACKET = ']';
public static final int CODE_CLOSING_CURLY_BRACKET = '}';
public static final int CODE_CLOSING_ANGLE_BRACKET = '>';
+ public static final int CODE_INVERTED_QUESTION_MARK = 0xBF; // ¿
+ public static final int CODE_INVERTED_EXCLAMATION_MARK = 0xA1; // ¡
/**
* Special keys code. Must be negative.
diff --git a/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java
index 3fb76b1..538bfc0 100644
--- a/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java
@@ -224,7 +224,7 @@
*/
private void addNameLocked(final String name) {
int len = StringUtils.codePointCount(name);
- PrevWordsInfo prevWordsInfo = new PrevWordsInfo(null);
+ PrevWordsInfo prevWordsInfo = PrevWordsInfo.EMPTY_PREV_WORDS_INFO;
// TODO: Better tokenization for non-Latin writing systems
for (int i = 0; i < len; i++) {
if (Character.isLetter(name.codePointAt(i))) {
diff --git a/java/src/com/android/inputmethod/latin/DictionaryFacilitator.java b/java/src/com/android/inputmethod/latin/DictionaryFacilitator.java
index a979167..2123638 100644
--- a/java/src/com/android/inputmethod/latin/DictionaryFacilitator.java
+++ b/java/src/com/android/inputmethod/latin/DictionaryFacilitator.java
@@ -19,14 +19,18 @@
import android.content.Context;
import android.text.TextUtils;
import android.util.Log;
+import android.view.inputmethod.InputMethodSubtype;
import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.keyboard.ProximityInfo;
import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
import com.android.inputmethod.latin.personalization.ContextualDictionary;
+import com.android.inputmethod.latin.personalization.PersonalizationDataChunk;
import com.android.inputmethod.latin.personalization.PersonalizationDictionary;
import com.android.inputmethod.latin.personalization.UserHistoryDictionary;
+import com.android.inputmethod.latin.settings.SpacingAndPunctuations;
import com.android.inputmethod.latin.utils.CollectionUtils;
+import com.android.inputmethod.latin.utils.DistracterFilter;
import com.android.inputmethod.latin.utils.ExecutorUtils;
import com.android.inputmethod.latin.utils.LanguageModelParam;
import com.android.inputmethod.latin.utils.SuggestionResults;
@@ -37,6 +41,7 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
+import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
@@ -57,6 +62,7 @@
private volatile CountDownLatch mLatchForWaitingLoadingMainDictionary = new CountDownLatch(0);
// To synchronize assigning mDictionaries to ensure closing dictionaries.
private final Object mLock = new Object();
+ private final DistracterFilter mDistracterFilter;
private static final String[] DICT_TYPES_ORDERED_TO_GET_SUGGESTION =
new String[] {
@@ -162,7 +168,17 @@
public void onUpdateMainDictionaryAvailability(boolean isMainDictionaryAvailable);
}
- public DictionaryFacilitator() {}
+ public DictionaryFacilitator() {
+ mDistracterFilter = new DistracterFilter.EmptyDistracterFilter();
+ }
+
+ public DictionaryFacilitator(final DistracterFilter distracterFilter) {
+ mDistracterFilter = distracterFilter;
+ }
+
+ public void updateEnabledSubtypes(final List<InputMethodSubtype> enabledSubtypes) {
+ mDistracterFilter.updateEnabledSubtypes(enabledSubtypes);
+ }
public Locale getLocale() {
return mDictionaries.mLocale;
@@ -321,6 +337,7 @@
for (final String dictType : DICT_TYPES_ORDERED_TO_GET_SUGGESTION) {
dictionaries.closeDict(dictType);
}
+ mDistracterFilter.close();
}
// The main dictionary could have been loaded asynchronously. Don't cache the return value
@@ -537,9 +554,16 @@
personalizationDict.clear();
}
- public void addMultipleDictionaryEntriesToPersonalizationDictionary(
- final ArrayList<LanguageModelParam> languageModelParams,
+ public void addEntriesToPersonalizationDictionary(
+ final PersonalizationDataChunk personalizationDataChunk,
+ final SpacingAndPunctuations spacingAndPunctuations,
final ExpandableBinaryDictionary.AddMultipleDictionaryEntriesCallback callback) {
+ final ArrayList<LanguageModelParam> languageModelParams =
+ LanguageModelParam.createLanguageModelParamsFrom(
+ personalizationDataChunk.mTokens,
+ personalizationDataChunk.mTimestampInSeconds,
+ this /* dictionaryFacilitator */, spacingAndPunctuations,
+ mDistracterFilter);
final ExpandableBinaryDictionary personalizationDict =
mDictionaries.getSubDict(Dictionary.TYPE_PERSONALIZATION);
if (personalizationDict == null || languageModelParams == null
diff --git a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
index d67253c..2cbce04 100644
--- a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
@@ -114,7 +114,8 @@
private boolean needsToMigrateDictionary(final int formatVersion) {
// When we bump up the dictionary format version, the old version should be added to here
// for supporting migration. Note that native code has to support reading such formats.
- return formatVersion == FormatSpec.VERSION4_ONLY_FOR_TESTING;
+ return formatVersion == FormatSpec.VERSION4_ONLY_FOR_TESTING
+ || formatVersion == FormatSpec.VERSION401;
}
public boolean isValidDictionaryLocked() {
@@ -292,7 +293,7 @@
final String shortcutTarget, final int shortcutFreq, final boolean isNotAWord,
final boolean isBlacklisted, final int timestamp) {
mBinaryDictionary.addUnigramEntry(word, frequency, shortcutTarget, shortcutFreq,
- isNotAWord, isBlacklisted, timestamp);
+ false /* isBeginningOfSentence */, isNotAWord, isBlacklisted, timestamp);
}
/**
diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java
index 4863326..34d5f71 100644
--- a/java/src/com/android/inputmethod/latin/LatinIME.java
+++ b/java/src/com/android/inputmethod/latin/LatinIME.java
@@ -81,9 +81,10 @@
import com.android.inputmethod.latin.suggestions.SuggestionStripViewAccessor;
import com.android.inputmethod.latin.utils.ApplicationUtils;
import com.android.inputmethod.latin.utils.CapsModeUtils;
+import com.android.inputmethod.latin.utils.CollectionUtils;
import com.android.inputmethod.latin.utils.CoordinateUtils;
import com.android.inputmethod.latin.utils.DialogUtils;
-import com.android.inputmethod.latin.utils.DistracterFilter;
+import com.android.inputmethod.latin.utils.DistracterFilterUsingSuggestion;
import com.android.inputmethod.latin.utils.ImportantNoticeUtils;
import com.android.inputmethod.latin.utils.IntentUtils;
import com.android.inputmethod.latin.utils.JniUtils;
@@ -95,6 +96,7 @@
import java.io.FileDescriptor;
import java.io.PrintWriter;
import java.util.ArrayList;
+import java.util.List;
import java.util.Locale;
import java.util.concurrent.TimeUnit;
@@ -122,7 +124,8 @@
private static final String SCHEME_PACKAGE = "package";
private final Settings mSettings;
- private final DictionaryFacilitator mDictionaryFacilitator = new DictionaryFacilitator();
+ private final DictionaryFacilitator mDictionaryFacilitator =
+ new DictionaryFacilitator(new DistracterFilterUsingSuggestion(this /* context */));
private final InputLogic mInputLogic = new InputLogic(this /* LatinIME */,
this /* SuggestionStripViewAccessor */, mDictionaryFacilitator);
// We expect to have only one decoder in almost all cases, hence the default capacity of 1.
@@ -538,6 +541,8 @@
if (!mHandler.hasPendingReopenDictionaries()) {
resetSuggestForLocale(locale);
}
+ mDictionaryFacilitator.updateEnabledSubtypes(mRichImm.getMyEnabledInputMethodSubtypeList(
+ true /* allowsImplicitlySelectedSubtypes */));
refreshPersonalizationDictionarySession();
StatsUtils.onLoadSettings(currentSettingsValues);
}
@@ -564,9 +569,7 @@
PersonalizationHelper.removeAllPersonalizationDictionaries(this);
PersonalizationDictionarySessionRegistrar.resetAll(this);
} else {
- final DistracterFilter distracterFilter = createDistracterFilter();
- PersonalizationDictionarySessionRegistrar.init(
- this, mDictionaryFacilitator, distracterFilter);
+ PersonalizationDictionarySessionRegistrar.init(this, mDictionaryFacilitator);
}
}
@@ -660,9 +663,8 @@
mInputLogic.mConnection.finishComposingText();
mInputLogic.mConnection.endBatchEdit();
}
- final DistracterFilter distracterFilter = createDistracterFilter();
PersonalizationDictionarySessionRegistrar.onConfigurationChanged(this, conf,
- mDictionaryFacilitator, distracterFilter);
+ mDictionaryFacilitator);
super.onConfigurationChanged(conf);
}
@@ -1739,11 +1741,9 @@
}
@UsedForTesting
- /* package for test */ DistracterFilter createDistracterFilter() {
- // Return an empty distracter filter when this method is called before onCreate().
- return (mRichImm != null) ? new DistracterFilter(this /* Context */,
- mRichImm.getMyEnabledInputMethodSubtypeList(
- true /* allowsImplicitlySelectedSubtypes */)) : new DistracterFilter();
+ /* package for test */ List<InputMethodSubtype> getEnabledSubtypesForTest() {
+ return (mRichImm != null) ? mRichImm.getMyEnabledInputMethodSubtypeList(
+ true /* allowsImplicitlySelectedSubtypes */) : new ArrayList<InputMethodSubtype>();
}
public void dumpDictionaryForDebug(final String dictName) {
diff --git a/java/src/com/android/inputmethod/latin/PrevWordsInfo.java b/java/src/com/android/inputmethod/latin/PrevWordsInfo.java
index ecc8947..e44239f 100644
--- a/java/src/com/android/inputmethod/latin/PrevWordsInfo.java
+++ b/java/src/com/android/inputmethod/latin/PrevWordsInfo.java
@@ -16,23 +16,32 @@
package com.android.inputmethod.latin;
-import android.util.Log;
-
+/**
+ * Class to represent information of previous words. This class is used to add n-gram entries
+ * into binary dictionaries, to get predictions, and to get suggestions.
+ */
// TODO: Support multiple previous words for n-gram.
public class PrevWordsInfo {
- // The previous word. May be null after resetting and before starting a new composing word, or
- // when there is no context like at the start of text for example. It can also be set to null
- // externally when the user enters a separator that does not let bigrams across, like a period
- // or a comma.
+ public static final PrevWordsInfo EMPTY_PREV_WORDS_INFO = new PrevWordsInfo(null);
+ public static final PrevWordsInfo BEGINNING_OF_SENTENCE = new PrevWordsInfo();
+
+ // The word immediately before the considered word. null means we don't have any context
+ // including the "beginning of sentence context" - we just don't know what to predict.
+ // An example of that is after a comma.
+ // For simplicity of implementation, this may also be null transiently after the WordComposer
+ // was reset and before starting a new composing word, but we should never be calling
+ // getSuggetions* in this situation.
+ // This is an empty string when mIsBeginningOfSentence is true.
public final String mPrevWord;
// TODO: Have sentence separator.
- // Whether the current context is beginning of sentence or not.
+ // Whether the current context is beginning of sentence or not. This is true when composing at
+ // the beginning of an input field or composing a word after a sentence separator.
public final boolean mIsBeginningOfSentence;
// Beginning of sentence.
public PrevWordsInfo() {
- mPrevWord = null;
+ mPrevWord = "";
mIsBeginningOfSentence = true;
}
@@ -40,4 +49,8 @@
mPrevWord = prevWord;
mIsBeginningOfSentence = false;
}
+
+ public boolean isValid() {
+ return mPrevWord != null;
+ }
}
diff --git a/java/src/com/android/inputmethod/latin/RichInputConnection.java b/java/src/com/android/inputmethod/latin/RichInputConnection.java
index 2c54e10..e7c1636 100644
--- a/java/src/com/android/inputmethod/latin/RichInputConnection.java
+++ b/java/src/com/android/inputmethod/latin/RichInputConnection.java
@@ -542,7 +542,7 @@
final SpacingAndPunctuations spacingAndPunctuations, final int n) {
mIC = mParent.getCurrentInputConnection();
if (null == mIC) {
- return new PrevWordsInfo(null);
+ return PrevWordsInfo.EMPTY_PREV_WORDS_INFO;
}
final CharSequence prev = getTextBeforeCursor(LOOKBACK_CHARACTER_NUM, 0);
if (DEBUG_PREVIOUS_TEXT && null != prev) {
@@ -588,30 +588,30 @@
// (n = 2) "abc. def|" -> beginning-of-sentence
public static PrevWordsInfo getPrevWordsInfoFromNthPreviousWord(final CharSequence prev,
final SpacingAndPunctuations spacingAndPunctuations, final int n) {
- if (prev == null) return new PrevWordsInfo(null);
+ if (prev == null) return PrevWordsInfo.EMPTY_PREV_WORDS_INFO;
final String[] w = spaceRegex.split(prev);
// If we can't find n words, or we found an empty word, the context is
// beginning-of-sentence.
if (w.length < n) {
- return new PrevWordsInfo();
+ return PrevWordsInfo.BEGINNING_OF_SENTENCE;
}
final String nthPrevWord = w[w.length - n];
final int length = nthPrevWord.length();
if (length <= 0) {
- return new PrevWordsInfo();
+ return PrevWordsInfo.BEGINNING_OF_SENTENCE;
}
// If ends in a sentence separator, the context is beginning-of-sentence.
final char lastChar = nthPrevWord.charAt(length - 1);
if (spacingAndPunctuations.isSentenceSeparator(lastChar)) {
- new PrevWordsInfo();
+ return PrevWordsInfo.BEGINNING_OF_SENTENCE;
}
// If ends in a word separator or connector, the context is unclear.
// TODO: Return meaningful context for this case.
if (spacingAndPunctuations.isWordSeparator(lastChar)
|| spacingAndPunctuations.isWordConnector(lastChar)) {
- return new PrevWordsInfo(null);
+ return PrevWordsInfo.EMPTY_PREV_WORDS_INFO;
}
return new PrevWordsInfo(nthPrevWord);
}
diff --git a/java/src/com/android/inputmethod/latin/Suggest.java b/java/src/com/android/inputmethod/latin/Suggest.java
index 1eccf2c..daa7f4b 100644
--- a/java/src/com/android/inputmethod/latin/Suggest.java
+++ b/java/src/com/android/inputmethod/latin/Suggest.java
@@ -18,6 +18,7 @@
import android.text.TextUtils;
+import com.android.inputmethod.event.Event;
import com.android.inputmethod.keyboard.ProximityInfo;
import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
import com.android.inputmethod.latin.define.ProductionFlag;
diff --git a/java/src/com/android/inputmethod/latin/WordComposer.java b/java/src/com/android/inputmethod/latin/WordComposer.java
index 6ecb373..c53a8fd 100644
--- a/java/src/com/android/inputmethod/latin/WordComposer.java
+++ b/java/src/com/android/inputmethod/latin/WordComposer.java
@@ -85,7 +85,7 @@
mIsBatchMode = false;
mCursorPositionWithinWord = 0;
mRejectedBatchModeSuggestion = null;
- mPrevWordsInfo = new PrevWordsInfo(null);
+ mPrevWordsInfo = PrevWordsInfo.EMPTY_PREV_WORDS_INFO;
refreshTypedWordCache();
}
@@ -117,7 +117,7 @@
mIsBatchMode = false;
mCursorPositionWithinWord = 0;
mRejectedBatchModeSuggestion = null;
- mPrevWordsInfo = new PrevWordsInfo(null);
+ mPrevWordsInfo = PrevWordsInfo.EMPTY_PREV_WORDS_INFO;
refreshTypedWordCache();
}
@@ -445,7 +445,7 @@
// when the user inputs a separator that's not whitespace (including the case of the
// double-space-to-period feature).
public void discardPreviousWordForSuggestion() {
- mPrevWordsInfo = new PrevWordsInfo(null);
+ mPrevWordsInfo = PrevWordsInfo.EMPTY_PREV_WORDS_INFO;
}
public void resumeSuggestionOnLastComposedWord(final LastComposedWord lastComposedWord,
diff --git a/java/src/com/android/inputmethod/latin/inputlogic/InputLogic.java b/java/src/com/android/inputmethod/latin/inputlogic/InputLogic.java
index 78d4bc8..237b43b 100644
--- a/java/src/com/android/inputmethod/latin/inputlogic/InputLogic.java
+++ b/java/src/com/android/inputmethod/latin/inputlogic/InputLogic.java
@@ -809,11 +809,10 @@
final int codePoint = inputTransaction.mEvent.mCodePoint;
final SettingsValues settingsValues = inputTransaction.mSettingsValues;
boolean didAutoCorrect = false;
- final boolean wasComposingWord = mWordComposer.isComposingWord();
// We avoid sending spaces in languages without spaces if we were composing.
final boolean shouldAvoidSendingCode = Constants.CODE_SPACE == codePoint
&& !settingsValues.mSpacingAndPunctuations.mCurrentLanguageHasSpaces
- && wasComposingWord;
+ && mWordComposer.isComposingWord();
if (mWordComposer.isCursorFrontOrMiddleOfComposingWord()) {
// If we are in the middle of a recorrection, we need to commit the recorrection
// first so that we can insert the separator at the current cursor position.
@@ -857,7 +856,7 @@
promotePhantomSpace(settingsValues);
}
if (ProductionFlag.USES_DEVELOPMENT_ONLY_DIAGNOSTICS) {
- ResearchLogger.latinIME_handleSeparator(codePoint, wasComposingWord);
+ ResearchLogger.latinIME_handleSeparator(codePoint, mWordComposer.isComposingWord());
}
if (!shouldAvoidSendingCode) {
@@ -873,9 +872,7 @@
}
startDoubleSpacePeriodCountdown(inputTransaction);
- if (wasComposingWord) {
- inputTransaction.setRequiresUpdateSuggestions();
- }
+ inputTransaction.setRequiresUpdateSuggestions();
} else {
if (swapWeakSpace) {
swapSwapperAndSpace(inputTransaction);
@@ -1612,8 +1609,9 @@
return mConnection.getPrevWordsInfoFromNthPreviousWord(
spacingAndPunctuations, nthPreviousWord);
} else {
- return LastComposedWord.NOT_A_COMPOSED_WORD == mLastComposedWord ? new PrevWordsInfo()
- : new PrevWordsInfo(mLastComposedWord.mCommittedWord.toString());
+ return LastComposedWord.NOT_A_COMPOSED_WORD == mLastComposedWord ?
+ PrevWordsInfo.BEGINNING_OF_SENTENCE :
+ new PrevWordsInfo(mLastComposedWord.mCommittedWord.toString());
}
}
diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
index f5f072b..a2ae74b 100644
--- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
+++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
@@ -192,8 +192,9 @@
public static final int VERSION2 = 2;
// Dictionary version used for testing.
public static final int VERSION4_ONLY_FOR_TESTING = 399;
- public static final int VERSION4 = 401;
- public static final int VERSION4_DEV = 402;
+ public static final int VERSION401 = 401;
+ public static final int VERSION4 = 402;
+ public static final int VERSION4_DEV = 403;
static final int MINIMUM_SUPPORTED_VERSION = VERSION2;
static final int MAXIMUM_SUPPORTED_VERSION = VERSION4_DEV;
diff --git a/java/src/com/android/inputmethod/latin/makedict/WordProperty.java b/java/src/com/android/inputmethod/latin/makedict/WordProperty.java
index 8533922..ed83251 100644
--- a/java/src/com/android/inputmethod/latin/makedict/WordProperty.java
+++ b/java/src/com/android/inputmethod/latin/makedict/WordProperty.java
@@ -35,6 +35,8 @@
public final ProbabilityInfo mProbabilityInfo;
public final ArrayList<WeightedString> mShortcutTargets;
public final ArrayList<WeightedString> mBigrams;
+ // TODO: Support mIsBeginningOfSentence.
+ public final boolean mIsBeginningOfSentence;
public final boolean mIsNotAWord;
public final boolean mIsBlacklistEntry;
public final boolean mHasShortcuts;
@@ -51,6 +53,7 @@
mProbabilityInfo = probabilityInfo;
mShortcutTargets = shortcutTargets;
mBigrams = bigrams;
+ mIsBeginningOfSentence = false;
mIsNotAWord = isNotAWord;
mIsBlacklistEntry = isBlacklistEntry;
mHasBigrams = bigrams != null && !bigrams.isEmpty();
@@ -77,6 +80,7 @@
mProbabilityInfo = createProbabilityInfoFromArray(probabilityInfo);
mShortcutTargets = CollectionUtils.newArrayList();
mBigrams = CollectionUtils.newArrayList();
+ mIsBeginningOfSentence = false;
mIsNotAWord = isNotAWord;
mIsBlacklistEntry = isBlacklisted;
mHasShortcuts = hasShortcuts;
diff --git a/java/src/com/android/inputmethod/latin/personalization/PersonalizationDataChunk.java b/java/src/com/android/inputmethod/latin/personalization/PersonalizationDataChunk.java
new file mode 100644
index 0000000..9d72de8
--- /dev/null
+++ b/java/src/com/android/inputmethod/latin/personalization/PersonalizationDataChunk.java
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.inputmethod.latin.personalization;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.Locale;
+
+public class PersonalizationDataChunk {
+ public final boolean mInputByUser;
+ public final List<String> mTokens;
+ public final int mTimestampInSeconds;
+ public final String mPackageName;
+ public final Locale mlocale = null;
+
+ public PersonalizationDataChunk(boolean inputByUser, final List<String> tokens,
+ final int timestampInSeconds, final String packageName) {
+ mInputByUser = inputByUser;
+ mTokens = Collections.unmodifiableList(tokens);
+ mTimestampInSeconds = timestampInSeconds;
+ mPackageName = packageName;
+ }
+}
diff --git a/java/src/com/android/inputmethod/latin/personalization/PersonalizationDictionarySessionRegistrar.java b/java/src/com/android/inputmethod/latin/personalization/PersonalizationDictionarySessionRegistrar.java
index 805f422..4506440 100644
--- a/java/src/com/android/inputmethod/latin/personalization/PersonalizationDictionarySessionRegistrar.java
+++ b/java/src/com/android/inputmethod/latin/personalization/PersonalizationDictionarySessionRegistrar.java
@@ -20,17 +20,14 @@
import android.content.res.Configuration;
import com.android.inputmethod.latin.DictionaryFacilitator;
-import com.android.inputmethod.latin.utils.DistracterFilter;
public class PersonalizationDictionarySessionRegistrar {
public static void init(final Context context,
- final DictionaryFacilitator dictionaryFacilitator,
- final DistracterFilter distracterFilter) {
+ final DictionaryFacilitator dictionaryFacilitator) {
}
public static void onConfigurationChanged(final Context context, final Configuration conf,
- final DictionaryFacilitator dictionaryFacilitator,
- final DistracterFilter distracterFilter) {
+ final DictionaryFacilitator dictionaryFacilitator) {
}
public static void onUpdateData(final Context context, final String type) {
diff --git a/java/src/com/android/inputmethod/latin/utils/CapsModeUtils.java b/java/src/com/android/inputmethod/latin/utils/CapsModeUtils.java
index 702688f..9362193 100644
--- a/java/src/com/android/inputmethod/latin/utils/CapsModeUtils.java
+++ b/java/src/com/android/inputmethod/latin/utils/CapsModeUtils.java
@@ -62,6 +62,22 @@
}
/**
+ * Helper method to find out if a code point is starting punctuation.
+ *
+ * This include the Unicode START_PUNCTUATION category, but also some other symbols that are
+ * starting, like the inverted question mark or the double quote.
+ *
+ * @param codePoint the code point
+ * @return true if it's starting punctuation, false otherwise.
+ */
+ private static boolean isStartPunctuation(final int codePoint) {
+ return (codePoint == Constants.CODE_DOUBLE_QUOTE || codePoint == Constants.CODE_SINGLE_QUOTE
+ || codePoint == Constants.CODE_INVERTED_QUESTION_MARK
+ || codePoint == Constants.CODE_INVERTED_EXCLAMATION_MARK
+ || Character.getType(codePoint) == Character.START_PUNCTUATION);
+ }
+
+ /**
* Determine what caps mode should be in effect at the current offset in
* the text. Only the mode bits set in <var>reqModes</var> will be
* checked. Note that the caps mode flags here are explicitly defined
@@ -115,8 +131,7 @@
} else {
for (i = cs.length(); i > 0; i--) {
final char c = cs.charAt(i - 1);
- if (c != Constants.CODE_DOUBLE_QUOTE && c != Constants.CODE_SINGLE_QUOTE
- && Character.getType(c) != Character.START_PUNCTUATION) {
+ if (!isStartPunctuation(c)) {
break;
}
}
@@ -210,11 +225,14 @@
// We found out that we have a period. We need to determine if this is a full stop or
// otherwise sentence-ending period, or an abbreviation like "e.g.". An abbreviation
- // looks like (\w\.){2,}
+ // looks like (\w\.){2,}. Moreover, in German, you put periods after digits for dates
+ // and some other things, and in German specifically we need to not go into autocaps after
+ // a whitespace-digits-period sequence.
// To find out, we will have a simple state machine with the following states :
- // START, WORD, PERIOD, ABBREVIATION
+ // START, WORD, PERIOD, ABBREVIATION, NUMBER
// On START : (just before the first period)
// letter => WORD
+ // digit => NUMBER if German; end with caps otherwise
// whitespace => end with no caps (it was a stand-alone period)
// otherwise => end with caps (several periods/symbols in a row)
// On WORD : (within the word just before the first period)
@@ -228,6 +246,11 @@
// letter => LETTER
// period => PERIOD
// otherwise => end with no caps (it was an abbreviation)
+ // On NUMBER : (period immediately preceded by one or more digits)
+ // digit => NUMBER
+ // letter => LETTER (promote to word)
+ // otherwise => end with no caps (it was a whitespace-digits-period sequence,
+ // or a punctuation-digits-period sequence like "11.11.")
// "Not an abbreviation" in the above chart essentially covers cases like "...yes.". This
// should capitalize.
@@ -235,6 +258,7 @@
final int WORD = 1;
final int PERIOD = 2;
final int LETTER = 3;
+ final int NUMBER = 4;
final int caps = (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS
| TextUtils.CAP_MODE_SENTENCES) & reqModes;
final int noCaps = (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS) & reqModes;
@@ -247,6 +271,8 @@
state = WORD;
} else if (Character.isWhitespace(c)) {
return noCaps;
+ } else if (Character.isDigit(c) && spacingAndPunctuations.mUsesGermanRules) {
+ state = NUMBER;
} else {
return caps;
}
@@ -275,6 +301,15 @@
} else {
return noCaps;
}
+ break;
+ case NUMBER:
+ if (Character.isLetter(c)) {
+ state = WORD;
+ } else if (Character.isDigit(c)) {
+ state = NUMBER;
+ } else {
+ return noCaps;
+ }
}
}
// Here we arrived at the start of the line. This should behave exactly like whitespace.
diff --git a/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java b/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java
index 19a01ee..6e0fab3 100644
--- a/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java
+++ b/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java
@@ -16,132 +16,14 @@
package com.android.inputmethod.latin.utils;
-import java.util.ArrayList;
-import java.util.HashMap;
import java.util.List;
import java.util.Locale;
-import java.util.Map;
-import java.util.concurrent.TimeUnit;
-import android.content.Context;
-import android.content.res.Resources;
-import android.text.InputType;
-import android.util.Log;
-import android.view.inputmethod.EditorInfo;
import android.view.inputmethod.InputMethodSubtype;
-import com.android.inputmethod.keyboard.Keyboard;
-import com.android.inputmethod.keyboard.KeyboardId;
-import com.android.inputmethod.keyboard.KeyboardLayoutSet;
-import com.android.inputmethod.latin.Constants;
-import com.android.inputmethod.latin.DictionaryFacilitator;
import com.android.inputmethod.latin.PrevWordsInfo;
-import com.android.inputmethod.latin.Suggest;
-import com.android.inputmethod.latin.Suggest.OnGetSuggestedWordsCallback;
-import com.android.inputmethod.latin.SuggestedWords;
-import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
-import com.android.inputmethod.latin.WordComposer;
-/**
- * This class is used to prevent distracters being added to personalization
- * or user history dictionaries
- */
-public class DistracterFilter {
- private static final String TAG = DistracterFilter.class.getSimpleName();
-
- private static final long TIMEOUT_TO_WAIT_LOADING_DICTIONARIES_IN_SECONDS = 120;
-
- private final Context mContext;
- private final Map<Locale, InputMethodSubtype> mLocaleToSubtypeMap;
- private final Map<Locale, Keyboard> mLocaleToKeyboardMap;
- private final DictionaryFacilitator mDictionaryFacilitator;
- private final Suggest mSuggest;
- private Keyboard mKeyboard;
-
- // If the score of the top suggestion exceeds this value, the tested word (e.g.,
- // an OOV, a misspelling, or an in-vocabulary word) would be considered as a distracter to
- // words in dictionary. The greater the threshold is, the less likely the tested word would
- // become a distracter, which means the tested word will be more likely to be added to
- // the dictionary.
- private static final float DISTRACTER_WORD_SCORE_THRESHOLD = 2.0f;
-
- // Create empty distracter filter.
- public DistracterFilter() {
- this(null, new ArrayList<InputMethodSubtype>());
- }
-
- /**
- * Create a DistracterFilter instance.
- *
- * @param context the context.
- * @param enabledSubtypes the enabled subtypes.
- */
- public DistracterFilter(final Context context, final List<InputMethodSubtype> enabledSubtypes) {
- mContext = context;
- mLocaleToSubtypeMap = new HashMap<>();
- if (enabledSubtypes != null) {
- for (final InputMethodSubtype subtype : enabledSubtypes) {
- final Locale locale = SubtypeLocaleUtils.getSubtypeLocale(subtype);
- if (mLocaleToSubtypeMap.containsKey(locale)) {
- // Multiple subtypes are enabled for one locale.
- // TODO: Investigate what we should do for this case.
- continue;
- }
- mLocaleToSubtypeMap.put(locale, subtype);
- }
- }
- mLocaleToKeyboardMap = new HashMap<>();
- mDictionaryFacilitator = new DictionaryFacilitator();
- mSuggest = new Suggest(mDictionaryFacilitator);
- mKeyboard = null;
- }
-
- private static boolean suggestionExceedsDistracterThreshold(
- final SuggestedWordInfo suggestion, final String consideredWord,
- final float distracterThreshold) {
- if (null != suggestion) {
- final int suggestionScore = suggestion.mScore;
- final float normalizedScore = BinaryDictionaryUtils.calcNormalizedScore(
- consideredWord, suggestion.mWord, suggestionScore);
- if (normalizedScore > distracterThreshold) {
- return true;
- }
- }
- return false;
- }
-
- private void loadKeyboardForLocale(final Locale newLocale) {
- final Keyboard cachedKeyboard = mLocaleToKeyboardMap.get(newLocale);
- if (cachedKeyboard != null) {
- mKeyboard = cachedKeyboard;
- return;
- }
- final InputMethodSubtype subtype = mLocaleToSubtypeMap.get(newLocale);
- if (subtype == null) {
- return;
- }
- final EditorInfo editorInfo = new EditorInfo();
- editorInfo.inputType = InputType.TYPE_CLASS_TEXT;
- final KeyboardLayoutSet.Builder builder = new KeyboardLayoutSet.Builder(
- mContext, editorInfo);
- final Resources res = mContext.getResources();
- final int keyboardWidth = ResourceUtils.getDefaultKeyboardWidth(res);
- final int keyboardHeight = ResourceUtils.getDefaultKeyboardHeight(res);
- builder.setKeyboardGeometry(keyboardWidth, keyboardHeight);
- builder.setSubtype(subtype);
- builder.setIsSpellChecker(false /* isSpellChecker */);
- final KeyboardLayoutSet layoutSet = builder.build();
- mKeyboard = layoutSet.getKeyboard(KeyboardId.ELEMENT_ALPHABET);
- }
-
- private void loadDictionariesForLocale(final Locale newlocale) throws InterruptedException {
- mDictionaryFacilitator.resetDictionaries(mContext, newlocale,
- false /* useContactsDict */, false /* usePersonalizedDicts */,
- false /* forceReloadMainDictionary */, null /* listener */);
- mDictionaryFacilitator.waitForLoadingMainDictionary(
- TIMEOUT_TO_WAIT_LOADING_DICTIONARIES_IN_SECONDS, TimeUnit.SECONDS);
- }
-
+public interface DistracterFilter {
/**
* Determine whether a word is a distracter to words in dictionaries.
*
@@ -152,56 +34,25 @@
* @return true if testedWord is a distracter, otherwise false.
*/
public boolean isDistracterToWordsInDictionaries(final PrevWordsInfo prevWordsInfo,
- final String testedWord, final Locale locale) {
- if (locale == null) {
+ final String testedWord, final Locale locale);
+
+ public void updateEnabledSubtypes(final List<InputMethodSubtype> enabledSubtypes);
+
+ public void close();
+
+ public static final class EmptyDistracterFilter implements DistracterFilter {
+ @Override
+ public boolean isDistracterToWordsInDictionaries(PrevWordsInfo prevWordsInfo,
+ String testedWord, Locale locale) {
return false;
}
- if (!locale.equals(mDictionaryFacilitator.getLocale())) {
- if (!mLocaleToSubtypeMap.containsKey(locale)) {
- Log.e(TAG, "Locale " + locale + " is not enabled.");
- // TODO: Investigate what we should do for disabled locales.
- return false;
- }
- loadKeyboardForLocale(locale);
- // Reset dictionaries for the locale.
- try {
- loadDictionariesForLocale(locale);
- } catch (final InterruptedException e) {
- Log.e(TAG, "Interrupted while waiting for loading dicts in DistracterFilter", e);
- return false;
- }
- }
- if (mKeyboard == null) {
- return false;
- }
- final WordComposer composer = new WordComposer();
- final int[] codePoints = StringUtils.toCodePointArray(testedWord);
- final int[] coordinates = mKeyboard.getCoordinates(codePoints);
- composer.setComposingWord(codePoints, coordinates, prevWordsInfo);
- final int trailingSingleQuotesCount = StringUtils.getTrailingSingleQuotesCount(testedWord);
- final String consideredWord = trailingSingleQuotesCount > 0 ?
- testedWord.substring(0, testedWord.length() - trailingSingleQuotesCount) :
- testedWord;
- final AsyncResultHolder<Boolean> holder = new AsyncResultHolder<Boolean>();
- final OnGetSuggestedWordsCallback callback = new OnGetSuggestedWordsCallback() {
- @Override
- public void onGetSuggestedWords(final SuggestedWords suggestedWords) {
- if (suggestedWords != null && suggestedWords.size() > 1) {
- // The suggestedWordInfo at 0 is the typed word. The 1st suggestion from
- // the decoder is at index 1.
- final SuggestedWordInfo firstSuggestion = suggestedWords.getInfo(1);
- final boolean hasStrongDistractor = suggestionExceedsDistracterThreshold(
- firstSuggestion, consideredWord, DISTRACTER_WORD_SCORE_THRESHOLD);
- holder.set(hasStrongDistractor);
- }
- }
- };
- mSuggest.getSuggestedWords(composer, prevWordsInfo, mKeyboard.getProximityInfo(),
- true /* blockOffensiveWords */, true /* isCorrectionEnbaled */,
- null /* additionalFeaturesOptions */, 0 /* sessionId */,
- SuggestedWords.NOT_A_SEQUENCE_NUMBER, callback);
+ @Override
+ public void close() {
+ }
- return holder.get(false /* defaultValue */, Constants.GET_SUGGESTED_WORDS_TIMEOUT);
+ @Override
+ public void updateEnabledSubtypes(List<InputMethodSubtype> enabledSubtypes) {
+ }
}
}
diff --git a/java/src/com/android/inputmethod/latin/utils/DistracterFilterUsingSuggestion.java b/java/src/com/android/inputmethod/latin/utils/DistracterFilterUsingSuggestion.java
new file mode 100644
index 0000000..92033b7
--- /dev/null
+++ b/java/src/com/android/inputmethod/latin/utils/DistracterFilterUsingSuggestion.java
@@ -0,0 +1,227 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.inputmethod.latin.utils;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+
+import android.content.Context;
+import android.content.res.Resources;
+import android.text.InputType;
+import android.util.Log;
+import android.view.inputmethod.EditorInfo;
+import android.view.inputmethod.InputMethodSubtype;
+
+import com.android.inputmethod.keyboard.Keyboard;
+import com.android.inputmethod.keyboard.KeyboardId;
+import com.android.inputmethod.keyboard.KeyboardLayoutSet;
+import com.android.inputmethod.latin.Constants;
+import com.android.inputmethod.latin.DictionaryFacilitator;
+import com.android.inputmethod.latin.PrevWordsInfo;
+import com.android.inputmethod.latin.Suggest;
+import com.android.inputmethod.latin.Suggest.OnGetSuggestedWordsCallback;
+import com.android.inputmethod.latin.SuggestedWords;
+import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
+import com.android.inputmethod.latin.WordComposer;
+
+/**
+ * This class is used to prevent distracters being added to personalization
+ * or user history dictionaries
+ */
+public class DistracterFilterUsingSuggestion implements DistracterFilter {
+ private static final String TAG = DistracterFilterUsingSuggestion.class.getSimpleName();
+
+ private static final long TIMEOUT_TO_WAIT_LOADING_DICTIONARIES_IN_SECONDS = 120;
+
+ private final Context mContext;
+ private final Map<Locale, InputMethodSubtype> mLocaleToSubtypeMap;
+ private final Map<Locale, Keyboard> mLocaleToKeyboardMap;
+ private final DictionaryFacilitator mDictionaryFacilitator;
+ private final Suggest mSuggest;
+ private Keyboard mKeyboard;
+ private final Object mLock = new Object();
+
+ // If the score of the top suggestion exceeds this value, the tested word (e.g.,
+ // an OOV, a misspelling, or an in-vocabulary word) would be considered as a distracter to
+ // words in dictionary. The greater the threshold is, the less likely the tested word would
+ // become a distracter, which means the tested word will be more likely to be added to
+ // the dictionary.
+ private static final float DISTRACTER_WORD_SCORE_THRESHOLD = 2.0f;
+
+ /**
+ * Create a DistracterFilter instance.
+ *
+ * @param context the context.
+ */
+ public DistracterFilterUsingSuggestion(final Context context) {
+ mContext = context;
+ mLocaleToSubtypeMap = new HashMap<>();
+ mLocaleToKeyboardMap = new HashMap<>();
+ mDictionaryFacilitator = new DictionaryFacilitator();
+ mSuggest = new Suggest(mDictionaryFacilitator);
+ mKeyboard = null;
+ }
+
+ @Override
+ public void close() {
+ mDictionaryFacilitator.closeDictionaries();
+ }
+
+ @Override
+ public void updateEnabledSubtypes(final List<InputMethodSubtype> enabledSubtypes) {
+ final Map<Locale, InputMethodSubtype> newLocaleToSubtypeMap = new HashMap<>();
+ if (enabledSubtypes != null) {
+ for (final InputMethodSubtype subtype : enabledSubtypes) {
+ final Locale locale = SubtypeLocaleUtils.getSubtypeLocale(subtype);
+ if (newLocaleToSubtypeMap.containsKey(locale)) {
+ // Multiple subtypes are enabled for one locale.
+ // TODO: Investigate what we should do for this case.
+ continue;
+ }
+ newLocaleToSubtypeMap.put(locale, subtype);
+ }
+ }
+ if (mLocaleToSubtypeMap.equals(newLocaleToSubtypeMap)) {
+ // Enabled subtypes have not been changed.
+ return;
+ }
+ synchronized (mLock) {
+ mLocaleToSubtypeMap.clear();
+ mLocaleToSubtypeMap.putAll(newLocaleToSubtypeMap);
+ mLocaleToKeyboardMap.clear();
+ }
+ }
+
+ private static boolean suggestionExceedsDistracterThreshold(
+ final SuggestedWordInfo suggestion, final String consideredWord,
+ final float distracterThreshold) {
+ if (null != suggestion) {
+ final int suggestionScore = suggestion.mScore;
+ final float normalizedScore = BinaryDictionaryUtils.calcNormalizedScore(
+ consideredWord, suggestion.mWord, suggestionScore);
+ if (normalizedScore > distracterThreshold) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private void loadKeyboardForLocale(final Locale newLocale) {
+ final Keyboard cachedKeyboard = mLocaleToKeyboardMap.get(newLocale);
+ if (cachedKeyboard != null) {
+ mKeyboard = cachedKeyboard;
+ return;
+ }
+ final InputMethodSubtype subtype;
+ synchronized (mLock) {
+ subtype = mLocaleToSubtypeMap.get(newLocale);
+ }
+ if (subtype == null) {
+ return;
+ }
+ final EditorInfo editorInfo = new EditorInfo();
+ editorInfo.inputType = InputType.TYPE_CLASS_TEXT;
+ final KeyboardLayoutSet.Builder builder = new KeyboardLayoutSet.Builder(
+ mContext, editorInfo);
+ final Resources res = mContext.getResources();
+ final int keyboardWidth = ResourceUtils.getDefaultKeyboardWidth(res);
+ final int keyboardHeight = ResourceUtils.getDefaultKeyboardHeight(res);
+ builder.setKeyboardGeometry(keyboardWidth, keyboardHeight);
+ builder.setSubtype(subtype);
+ builder.setIsSpellChecker(false /* isSpellChecker */);
+ final KeyboardLayoutSet layoutSet = builder.build();
+ mKeyboard = layoutSet.getKeyboard(KeyboardId.ELEMENT_ALPHABET);
+ }
+
+ private void loadDictionariesForLocale(final Locale newlocale) throws InterruptedException {
+ mDictionaryFacilitator.resetDictionaries(mContext, newlocale,
+ false /* useContactsDict */, false /* usePersonalizedDicts */,
+ false /* forceReloadMainDictionary */, null /* listener */);
+ mDictionaryFacilitator.waitForLoadingMainDictionary(
+ TIMEOUT_TO_WAIT_LOADING_DICTIONARIES_IN_SECONDS, TimeUnit.SECONDS);
+ }
+
+ /**
+ * Determine whether a word is a distracter to words in dictionaries.
+ *
+ * @param prevWordsInfo the information of previous words.
+ * @param testedWord the word that will be tested to see whether it is a distracter to words
+ * in dictionaries.
+ * @param locale the locale of word.
+ * @return true if testedWord is a distracter, otherwise false.
+ */
+ @Override
+ public boolean isDistracterToWordsInDictionaries(final PrevWordsInfo prevWordsInfo,
+ final String testedWord, final Locale locale) {
+ if (locale == null) {
+ return false;
+ }
+ if (!locale.equals(mDictionaryFacilitator.getLocale())) {
+ synchronized (mLock) {
+ if (!mLocaleToSubtypeMap.containsKey(locale)) {
+ Log.e(TAG, "Locale " + locale + " is not enabled.");
+ // TODO: Investigate what we should do for disabled locales.
+ return false;
+ }
+ loadKeyboardForLocale(locale);
+ // Reset dictionaries for the locale.
+ try {
+ loadDictionariesForLocale(locale);
+ } catch (final InterruptedException e) {
+ Log.e(TAG, "Interrupted while waiting for loading dicts in DistracterFilter",
+ e);
+ return false;
+ }
+ }
+ }
+ if (mKeyboard == null) {
+ return false;
+ }
+ final WordComposer composer = new WordComposer();
+ final int[] codePoints = StringUtils.toCodePointArray(testedWord);
+ final int[] coordinates = mKeyboard.getCoordinates(codePoints);
+ composer.setComposingWord(codePoints, coordinates, prevWordsInfo);
+
+ final int trailingSingleQuotesCount = StringUtils.getTrailingSingleQuotesCount(testedWord);
+ final String consideredWord = trailingSingleQuotesCount > 0 ?
+ testedWord.substring(0, testedWord.length() - trailingSingleQuotesCount) :
+ testedWord;
+ final AsyncResultHolder<Boolean> holder = new AsyncResultHolder<Boolean>();
+ final OnGetSuggestedWordsCallback callback = new OnGetSuggestedWordsCallback() {
+ @Override
+ public void onGetSuggestedWords(final SuggestedWords suggestedWords) {
+ if (suggestedWords != null && suggestedWords.size() > 1) {
+ // The suggestedWordInfo at 0 is the typed word. The 1st suggestion from
+ // the decoder is at index 1.
+ final SuggestedWordInfo firstSuggestion = suggestedWords.getInfo(1);
+ final boolean hasStrongDistractor = suggestionExceedsDistracterThreshold(
+ firstSuggestion, consideredWord, DISTRACTER_WORD_SCORE_THRESHOLD);
+ holder.set(hasStrongDistractor);
+ }
+ }
+ };
+ mSuggest.getSuggestedWords(composer, prevWordsInfo, mKeyboard.getProximityInfo(),
+ true /* blockOffensiveWords */, true /* isCorrectionEnbaled */,
+ null /* additionalFeaturesOptions */, 0 /* sessionId */,
+ SuggestedWords.NOT_A_SEQUENCE_NUMBER, callback);
+
+ return holder.get(false /* defaultValue */, Constants.GET_SUGGESTED_WORDS_TIMEOUT);
+ }
+}
diff --git a/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java b/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java
index 36543cc..9c759ed 100644
--- a/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java
+++ b/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java
@@ -24,6 +24,7 @@
import com.android.inputmethod.latin.settings.SpacingAndPunctuations;
import java.util.ArrayList;
+import java.util.List;
import java.util.Locale;
// Note: this class is used as a parameter type of a native method. You should be careful when you
@@ -79,14 +80,14 @@
// Process a list of words and return a list of {@link LanguageModelParam} objects.
public static ArrayList<LanguageModelParam> createLanguageModelParamsFrom(
- final ArrayList<String> tokens, final int timestamp,
+ final List<String> tokens, final int timestamp,
final DictionaryFacilitator dictionaryFacilitator,
final SpacingAndPunctuations spacingAndPunctuations,
final DistracterFilter distracterFilter) {
final ArrayList<LanguageModelParam> languageModelParams =
CollectionUtils.newArrayList();
final int N = tokens.size();
- PrevWordsInfo prevWordsInfo = new PrevWordsInfo(null);
+ PrevWordsInfo prevWordsInfo = PrevWordsInfo.EMPTY_PREV_WORDS_INFO;
for (int i = 0; i < N; ++i) {
final String tempWord = tokens.get(i);
if (StringUtils.isEmptyStringOrWhiteSpaces(tempWord)) {
@@ -103,7 +104,7 @@
+ tempWord + "\"");
}
// Sentence terminator found. Split.
- prevWordsInfo = new PrevWordsInfo(null);
+ prevWordsInfo = PrevWordsInfo.EMPTY_PREV_WORDS_INFO;
continue;
}
if (DEBUG_TOKEN) {
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index 5ad2114..e41fe1d 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -178,10 +178,10 @@
jlong proximityInfo, jlong dicTraverseSession, jintArray xCoordinatesArray,
jintArray yCoordinatesArray, jintArray timesArray, jintArray pointerIdsArray,
jintArray inputCodePointsArray, jint inputSize, jintArray suggestOptions,
- jintArray prevWordCodePointsForBigrams, jintArray outSuggestionCount,
- jintArray outCodePointsArray, jintArray outScoresArray, jintArray outSpaceIndicesArray,
- jintArray outTypesArray, jintArray outAutoCommitFirstWordConfidenceArray,
- jfloatArray inOutLanguageWeight) {
+ jintArray prevWordCodePointsForBigrams, jboolean isBeginningOfSentence,
+ jintArray outSuggestionCount, jintArray outCodePointsArray, jintArray outScoresArray,
+ jintArray outSpaceIndicesArray, jintArray outTypesArray,
+ jintArray outAutoCommitFirstWordConfidenceArray, jfloatArray inOutLanguageWeight) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
// Assign 0 to outSuggestionCount here in case of returning earlier in this method.
JniDataUtils::putIntToArray(env, outSuggestionCount, 0 /* index */, 0);
@@ -274,7 +274,7 @@
}
static jint latinime_BinaryDictionary_getBigramProbability(JNIEnv *env, jclass clazz,
- jlong dict, jintArray word0, jintArray word1) {
+ jlong dict, jintArray word0, jboolean isBeginningOfSentence, jintArray word1) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return JNI_FALSE;
const jsize word0Length = env->GetArrayLength(word0);
@@ -283,7 +283,7 @@
int word1CodePoints[word1Length];
env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints);
env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
- const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, false /* isStartOfSentence */);
+ const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, isBeginningOfSentence);
return dictionary->getBigramProbability(&prevWordsInfo, word1CodePoints, word1Length);
}
@@ -326,7 +326,8 @@
static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz, jlong dict,
jintArray word, jint probability, jintArray shortcutTarget, jint shortcutProbability,
- jboolean isNotAWord, jboolean isBlacklisted, jint timestamp) {
+ jboolean isBeginningOfSentence, jboolean isNotAWord, jboolean isBlacklisted,
+ jint timestamp) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return;
@@ -341,13 +342,14 @@
shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
}
// Use 1 for count to indicate the word has inputted.
- const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord,
+ const UnigramProperty unigramProperty(isBeginningOfSentence, isNotAWord,
isBlacklisted, probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
dictionary->addUnigramEntry(codePoints, codePointCount, &unigramProperty);
}
static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz, jlong dict,
- jintArray word0, jintArray word1, jint probability, jint timestamp) {
+ jintArray word0, jboolean isBeginningOfSentence, jintArray word1, jint probability,
+ jint timestamp) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return;
@@ -363,13 +365,12 @@
// Use 1 for count to indicate the bigram has inputted.
const BigramProperty bigramProperty(&bigramTargetCodePoints, probability,
timestamp, 0 /* level */, 1 /* count */);
- const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length,
- false /* isBeginningOfSentence */);
+ const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, isBeginningOfSentence);
dictionary->addNgramEntry(&prevWordsInfo, &bigramProperty);
}
static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass clazz, jlong dict,
- jintArray word0, jintArray word1) {
+ jintArray word0, jboolean isBeginningOfSentence, jintArray word1) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return;
@@ -380,8 +381,7 @@
jsize word1Length = env->GetArrayLength(word1);
int word1CodePoints[word1Length];
env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
- const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length,
- false /* isBeginningOfSentence */);
+ const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, isBeginningOfSentence);
dictionary->removeNgramEntry(&prevWordsInfo, word1CodePoints, word1Length);
}
@@ -625,7 +625,7 @@
},
{
const_cast<char *>("getSuggestionsNative"),
- const_cast<char *>("(JJJ[I[I[I[I[II[I[I[I[I[I[I[I[I[F)V"),
+ const_cast<char *>("(JJJ[I[I[I[I[II[I[IZ[I[I[I[I[I[I[F)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getSuggestions)
},
{
@@ -635,7 +635,7 @@
},
{
const_cast<char *>("getBigramProbabilityNative"),
- const_cast<char *>("(J[I[I)I"),
+ const_cast<char *>("(J[IZ[I)I"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getBigramProbability)
},
{
@@ -651,17 +651,17 @@
},
{
const_cast<char *>("addUnigramWordNative"),
- const_cast<char *>("(J[II[IIZZI)V"),
+ const_cast<char *>("(J[II[IIZZZI)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_addUnigramWord)
},
{
const_cast<char *>("addBigramWordsNative"),
- const_cast<char *>("(J[I[III)V"),
+ const_cast<char *>("(J[IZ[III)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_addBigramWords)
},
{
const_cast<char *>("removeBigramWordsNative"),
- const_cast<char *>("(J[I[I)V"),
+ const_cast<char *>("(J[IZ[I)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_removeBigramWords)
},
{
diff --git a/native/jni/src/suggest/core/session/prev_words_info.h b/native/jni/src/suggest/core/session/prev_words_info.h
index e4de1f4..a58000a 100644
--- a/native/jni/src/suggest/core/session/prev_words_info.h
+++ b/native/jni/src/suggest/core/session/prev_words_info.h
@@ -20,11 +20,11 @@
#include "defines.h"
#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+#include "utils/char_utils.h"
namespace latinime {
// TODO: Support n-gram.
-// TODO: Support beginning of sentence.
// This class does not take ownership of any code point buffers.
class PrevWordsInfo {
public:
@@ -52,8 +52,7 @@
void getPrevWordsTerminalPtNodePos(
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
- int *const outPrevWordsTerminalPtNodePos,
- const bool tryLowerCaseSearch) const {
+ int *const outPrevWordsTerminalPtNodePos, const bool tryLowerCaseSearch) const {
for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
outPrevWordsTerminalPtNodePos[i] = getTerminalPtNodePosOfWord(dictStructurePolicy,
mPrevWordCodePoints[i], mPrevWordCodePointCount[i],
@@ -63,17 +62,11 @@
BinaryDictionaryBigramsIterator getBigramsIteratorForPrediction(
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy) const {
- int pos = getBigramListPositionForWord(dictStructurePolicy, mPrevWordCodePoints[0],
- mPrevWordCodePointCount[0], false /* forceLowerCaseSearch */);
- // getBigramListPositionForWord returns NOT_A_DICT_POS if this word isn't in the
- // dictionary or has no bigrams
- if (NOT_A_DICT_POS == pos) {
- // If no bigrams for this exact word, search again in lower case.
- pos = getBigramListPositionForWord(dictStructurePolicy, mPrevWordCodePoints[0],
- mPrevWordCodePointCount[0], true /* forceLowerCaseSearch */);
- }
- return BinaryDictionaryBigramsIterator(
- dictStructurePolicy->getBigramsStructurePolicy(), pos);
+ const int bigramListPos = getBigramListPositionForWordWithTryingLowerCaseSearch(
+ dictStructurePolicy, mPrevWordCodePoints[0], mPrevWordCodePointCount[0],
+ mIsBeginningOfSentence[0]);
+ return BinaryDictionaryBigramsIterator(dictStructurePolicy->getBigramsStructurePolicy(),
+ bigramListPos);
}
// n is 1-indexed.
@@ -102,8 +95,18 @@
if (!dictStructurePolicy || !wordCodePoints) {
return NOT_A_DICT_POS;
}
+ int codePoints[MAX_WORD_LENGTH];
+ int codePointCount = wordCodePointCount;
+ memmove(codePoints, wordCodePoints, sizeof(int) * codePointCount);
+ if (isBeginningOfSentence) {
+ codePointCount = CharUtils::attachBeginningOfSentenceMarker(codePoints,
+ codePointCount, MAX_WORD_LENGTH);
+ if (codePointCount <= 0) {
+ return NOT_A_DICT_POS;
+ }
+ }
const int wordPtNodePos = dictStructurePolicy->getTerminalPtNodePositionOfWord(
- wordCodePoints, wordCodePointCount, false /* forceLowerCaseSearch */);
+ codePoints, codePointCount, false /* forceLowerCaseSearch */);
if (wordPtNodePos != NOT_A_DICT_POS || !tryLowerCaseSearch) {
// Return the position when when the word was found or doesn't try lower case
// search.
@@ -112,7 +115,33 @@
// Check bigrams for lower-cased previous word if original was not found. Useful for
// auto-capitalized words like "The [current_word]".
return dictStructurePolicy->getTerminalPtNodePositionOfWord(
- wordCodePoints, wordCodePointCount, true /* forceLowerCaseSearch */);
+ codePoints, codePointCount, true /* forceLowerCaseSearch */);
+ }
+
+ static int getBigramListPositionForWordWithTryingLowerCaseSearch(
+ const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
+ const int *const wordCodePoints, const int wordCodePointCount,
+ const bool isBeginningOfSentence) {
+ int codePoints[MAX_WORD_LENGTH];
+ int codePointCount = wordCodePointCount;
+ memmove(codePoints, wordCodePoints, sizeof(int) * codePointCount);
+ if (isBeginningOfSentence) {
+ codePointCount = CharUtils::attachBeginningOfSentenceMarker(codePoints,
+ codePointCount, MAX_WORD_LENGTH);
+ if (codePointCount <= 0) {
+ return NOT_A_DICT_POS;
+ }
+ }
+ int pos = getBigramListPositionForWord(dictStructurePolicy, codePoints,
+ codePointCount, false /* forceLowerCaseSearch */);
+ // getBigramListPositionForWord returns NOT_A_DICT_POS if this word isn't in the
+ // dictionary or has no bigrams
+ if (NOT_A_DICT_POS == pos) {
+ // If no bigrams for this exact word, search again in lower case.
+ pos = getBigramListPositionForWord(dictStructurePolicy, codePoints,
+ codePointCount, true /* forceLowerCaseSearch */);
+ }
+ return pos;
}
static int getBigramListPositionForWord(
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
index 281c5a8..75f4fef 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
@@ -139,6 +139,8 @@
switch (mDictFormatVersion) {
case FormatUtils::VERSION_2:
return FormatUtils::VERSION_2;
+ case FormatUtils::VERSION_401:
+ return FormatUtils::VERSION_401;
case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
return FormatUtils::VERSION_4_ONLY_FOR_TESTING;
case FormatUtils::VERSION_4:
@@ -247,7 +249,7 @@
}
bool supportsBeginningOfSentence() const {
- return mDictFormatVersion == FormatUtils::VERSION_4_DEV;
+ return mDictFormatVersion > FormatUtils::VERSION_401;
}
private:
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
index a8f8f28..b13ad18 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
@@ -98,6 +98,7 @@
case FormatUtils::VERSION_2:
// Version 2 dictionary writing is not supported.
return false;
+ case FormatUtils::VERSION_401:
case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
case FormatUtils::VERSION_4:
case FormatUtils::VERSION_4_DEV:
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
index f93d289..93e330a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
@@ -57,13 +57,14 @@
const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap) {
FormatUtils::FORMAT_VERSION dictFormatVersion = FormatUtils::getFormatVersion(formatVersion);
switch (dictFormatVersion) {
- case FormatUtils::VERSION_4: {
+ case FormatUtils::VERSION_401: {
return newPolicyForOnMemoryV4Dict<backward::v401::Ver4DictConstants,
backward::v401::Ver4DictBuffers,
backward::v401::Ver4DictBuffers::Ver4DictBuffersPtr,
backward::v401::Ver4PatriciaTriePolicy>(
dictFormatVersion, locale, attributeMap);
}
+ case FormatUtils::VERSION_4:
case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
case FormatUtils::VERSION_4_DEV: {
return newPolicyForOnMemoryV4Dict<Ver4DictConstants, Ver4DictBuffers,
@@ -115,13 +116,14 @@
case FormatUtils::VERSION_2:
AKLOGE("Given path is a directory but the format is version 2. path: %s", path);
break;
- case FormatUtils::VERSION_4: {
+ case FormatUtils::VERSION_401: {
return newPolicyForV4Dict<backward::v401::Ver4DictConstants,
backward::v401::Ver4DictBuffers,
backward::v401::Ver4DictBuffers::Ver4DictBuffersPtr,
backward::v401::Ver4PatriciaTriePolicy>(
headerFilePath, formatVersion, std::move(mmappedBuffer));
}
+ case FormatUtils::VERSION_4:
case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
case FormatUtils::VERSION_4_DEV: {
return newPolicyForV4Dict<Ver4DictConstants, Ver4DictBuffers,
@@ -177,6 +179,7 @@
case FormatUtils::VERSION_2:
return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(
new PatriciaTriePolicy(std::move(mmappedBuffer)));
+ case FormatUtils::VERSION_401:
case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
case FormatUtils::VERSION_4:
case FormatUtils::VERSION_4_DEV:
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp
index 028e9ec..1f00fc6 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp
@@ -56,7 +56,7 @@
}
} else {
mValueStack.back() += 1;
- if (ptNodeParams->isTerminal()) {
+ if (ptNodeParams->isTerminal() && !ptNodeParams->representsNonWordInfo()) {
mValidUnigramCount += 1;
}
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
index 33e60e2..b2e60a8 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
@@ -160,7 +160,7 @@
}
AK_FORCE_INLINE bool representsNonWordInfo() const {
- return getCodePointCount() > 0 && CharUtils::isInUnicodeSpace(getCodePoints()[0])
+ return getCodePointCount() > 0 && !CharUtils::isInUnicodeSpace(getCodePoints()[0])
&& isNotAWord();
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index 4495def..0247870 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -61,7 +61,7 @@
isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY;
}
readingHelper.readNextSiblingNode(ptNodeParams);
- if (!ptNodeParams.representsNonWordInfo()) {
+ if (ptNodeParams.representsNonWordInfo()) {
// Skip PtNodes that represent non-word information.
continue;
}
@@ -181,9 +181,19 @@
DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
readingHelper.initWithPtNodeArrayPos(getRootPosition());
bool addedNewUnigram = false;
- if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length,
+ int codePointsToAdd[MAX_WORD_LENGTH];
+ int codePointCountToAdd = length;
+ memmove(codePointsToAdd, word, sizeof(int) * length);
+ if (unigramProperty->representsBeginningOfSentence()) {
+ codePointCountToAdd = CharUtils::attachBeginningOfSentenceMarker(codePointsToAdd,
+ codePointCountToAdd, MAX_WORD_LENGTH);
+ }
+ if (codePointCountToAdd <= 0) {
+ return false;
+ }
+ if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointsToAdd, codePointCountToAdd,
unigramProperty, &addedNewUnigram)) {
- if (addedNewUnigram) {
+ if (addedNewUnigram && !unigramProperty->representsBeginningOfSentence()) {
mUnigramCount++;
}
if (unigramProperty->getShortcuts().size() > 0) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
index 105363d..a04551a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
@@ -41,11 +41,12 @@
TimeKeeper::setCurrentTime();
const FormatUtils::FORMAT_VERSION formatVersion = FormatUtils::getFormatVersion(dictVersion);
switch (formatVersion) {
- case FormatUtils::VERSION_4:
+ case FormatUtils::VERSION_401:
return createEmptyV4DictFile<backward::v401::Ver4DictConstants,
backward::v401::Ver4DictBuffers,
backward::v401::Ver4DictBuffers::Ver4DictBuffersPtr>(
filePath, localeAsCodePointVector, attributeMap, formatVersion);
+ case FormatUtils::VERSION_4:
case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
case FormatUtils::VERSION_4_DEV:
return createEmptyV4DictFile<Ver4DictConstants, Ver4DictBuffers,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
index ba405b0..18f5580 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
@@ -29,6 +29,8 @@
switch (formatVersion) {
case VERSION_2:
return VERSION_2;
+ case VERSION_401:
+ return VERSION_401;
case VERSION_4_ONLY_FOR_TESTING:
return VERSION_4_ONLY_FOR_TESTING;
case VERSION_4:
@@ -60,6 +62,8 @@
// same so we use them for both here.
if (ByteArrayUtils::readUint16(dict, 4) == VERSION_2) {
return VERSION_2;
+ } else if (ByteArrayUtils::readUint16(dict, 4) == VERSION_401) {
+ return VERSION_401;
} else if (ByteArrayUtils::readUint16(dict, 4) == VERSION_4_ONLY_FOR_TESTING) {
return VERSION_4_ONLY_FOR_TESTING;
} else if (ByteArrayUtils::readUint16(dict, 4) == VERSION_4) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
index c47f30c..b05cb2f 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
@@ -32,8 +32,9 @@
// These MUST have the same values as the relevant constants in FormatSpec.java.
VERSION_2 = 2,
VERSION_4_ONLY_FOR_TESTING = 399,
- VERSION_4 = 401,
- VERSION_4_DEV = 402,
+ VERSION_401 = 401,
+ VERSION_4 = 402,
+ VERSION_4_DEV = 403,
UNKNOWN_VERSION = -1
};
diff --git a/native/jni/src/utils/char_utils.h b/native/jni/src/utils/char_utils.h
index 634c45b..f28ed56 100644
--- a/native/jni/src/utils/char_utils.h
+++ b/native/jni/src/utils/char_utils.h
@@ -18,6 +18,7 @@
#define LATINIME_CHAR_UTILS_H
#include <cctype>
+#include <cstring>
#include <vector>
#include "defines.h"
@@ -93,6 +94,19 @@
static unsigned short latin_tolower(const unsigned short c);
static const std::vector<int> EMPTY_STRING;
+ // Returns updated code point count. Returns 0 when the code points cannot be marked as a
+ // Beginning-of-Sentence.
+ static AK_FORCE_INLINE int attachBeginningOfSentenceMarker(int *const codePoints,
+ const int codePointCount, const int maxCodePoint) {
+ if (codePointCount >= maxCodePoint) {
+ // the code points cannot be marked as a Beginning-of-Sentence.
+ return 0;
+ }
+ memmove(codePoints + 1, codePoints, sizeof(int) * codePointCount);
+ codePoints[0] = CODE_POINT_BEGINNING_OF_SENTENCE;
+ return codePointCount + 1;
+ }
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(CharUtils);
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java
index 2c2fed3..3349a32 100644
--- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java
+++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java
@@ -63,12 +63,16 @@
super.tearDown();
}
+ private static boolean supportsBeginningOfSentence(final int formatVersion) {
+ return formatVersion > FormatSpec.VERSION401;
+ }
+
private void addUnigramWord(final BinaryDictionary binaryDictionary, final String word,
final int probability) {
binaryDictionary.addUnigramEntry(word, probability, "" /* shortcutTarget */,
BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */,
- false /* isNotAWord */, false /* isBlacklisted */,
- mCurrentTime /* timestamp */);
+ false /* isBeginningOfSentence */, false /* isNotAWord */,
+ false /* isBlacklisted */, mCurrentTime /* timestamp */);
}
private void addBigramWords(final BinaryDictionary binaryDictionary, final String word0,
@@ -631,4 +635,57 @@
binaryDictionary.close();
dictFile.delete();
}
+
+ public void testBeginningOfSentence() {
+ for (final int formatVersion : DICT_FORMAT_VERSIONS) {
+ if (supportsBeginningOfSentence(formatVersion)) {
+ testBeginningOfSentence(formatVersion);
+ }
+ }
+ }
+
+ private void testBeginningOfSentence(final int formatVersion) {
+ setCurrentTimeForTestMode(mCurrentTime);
+ File dictFile = null;
+ try {
+ dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
+ } catch (IOException e) {
+ fail("IOException while writing an initial dictionary : " + e);
+ }
+ final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
+ 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
+ Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+
+ binaryDictionary.addUnigramEntry("", DUMMY_PROBABILITY, "" /* shortcutTarget */,
+ BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */,
+ true /* isBeginningOfSentence */, true /* isNotAWord */, false /* isBlacklisted */,
+ mCurrentTime);
+ final PrevWordsInfo prevWordsInfoStartOfSentence = PrevWordsInfo.BEGINNING_OF_SENTENCE;
+ addUnigramWord(binaryDictionary, "aaa", DUMMY_PROBABILITY);
+ binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "aaa", DUMMY_PROBABILITY,
+ mCurrentTime);
+ assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "aaa"));
+ binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "aaa", DUMMY_PROBABILITY,
+ mCurrentTime);
+ addUnigramWord(binaryDictionary, "bbb", DUMMY_PROBABILITY);
+ binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "bbb", DUMMY_PROBABILITY,
+ mCurrentTime);
+ assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "aaa"));
+ assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "bbb"));
+
+ forcePassingLongTime(binaryDictionary);
+ assertFalse(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "aaa"));
+ assertFalse(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "bbb"));
+
+ addUnigramWord(binaryDictionary, "aaa", DUMMY_PROBABILITY);
+ binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "aaa", DUMMY_PROBABILITY,
+ mCurrentTime);
+ addUnigramWord(binaryDictionary, "bbb", DUMMY_PROBABILITY);
+ binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "bbb", DUMMY_PROBABILITY,
+ mCurrentTime);
+ assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "aaa"));
+ assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "bbb"));
+ binaryDictionary.close();
+ dictFile.delete();
+ }
}
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
index 2b82e54..7938409 100644
--- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
+++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
@@ -47,7 +47,11 @@
new int[] { FormatSpec.VERSION4, FormatSpec.VERSION4_DEV };
private static boolean canCheckBigramProbability(final int formatVersion) {
- return formatVersion >= FormatSpec.VERSION4_DEV;
+ return formatVersion > FormatSpec.VERSION401;
+ }
+
+ private static boolean supportsBeginningOfSentence(final int formatVersion) {
+ return formatVersion > FormatSpec.VERSION401;
}
private File createEmptyDictionaryAndGetFile(final String dictId,
@@ -171,7 +175,8 @@
addUnigramWord(binaryDictionary, invalidLongWord, probability);
// Too long short cut.
binaryDictionary.addUnigramEntry("a", probability, invalidLongWord,
- 10 /* shortcutProbability */, false /* isNotAWord */, false /* isBlacklisted */,
+ 10 /* shortcutProbability */, false /* isBeginningOfSentence */,
+ false /* isNotAWord */, false /* isBlacklisted */,
BinaryDictionary.NOT_A_VALID_TIMESTAMP);
addUnigramWord(binaryDictionary, "abc", probability);
final int updatedProbability = 200;
@@ -192,8 +197,8 @@
final int probability) {
binaryDictionary.addUnigramEntry(word, probability, "" /* shortcutTarget */,
BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */,
- false /* isNotAWord */, false /* isBlacklisted */,
- BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
+ false /* isBeginningOfSentence */, false /* isNotAWord */,
+ false /* isBlacklisted */, BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
}
private static void addBigramWords(final BinaryDictionary binaryDictionary, final String word0,
@@ -1010,7 +1015,8 @@
// TODO: Add tests for historical info.
binaryDictionary.addUnigramEntry(word, unigramProbability,
null /* shortcutTarget */, BinaryDictionary.NOT_A_PROBABILITY,
- isNotAWord, isBlacklisted, BinaryDictionary.NOT_A_VALID_TIMESTAMP);
+ false /* isBeginningOfSentence */, isNotAWord, isBlacklisted,
+ BinaryDictionary.NOT_A_VALID_TIMESTAMP);
if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
binaryDictionary.flushWithGC();
}
@@ -1188,24 +1194,24 @@
final int unigramProbability = 100;
final int shortcutProbability = 10;
binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz",
- shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */,
- 0 /* timestamp */);
+ shortcutProbability, false /* isBeginningOfSentence */,
+ false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */);
WordProperty wordProperty = binaryDictionary.getWordProperty("aaa");
assertEquals(1, wordProperty.mShortcutTargets.size());
assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord);
assertEquals(shortcutProbability, wordProperty.mShortcutTargets.get(0).getProbability());
final int updatedShortcutProbability = 2;
binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz",
- updatedShortcutProbability, false /* isNotAWord */, false /* isBlacklisted */,
- 0 /* timestamp */);
+ updatedShortcutProbability, false /* isBeginningOfSentence */,
+ false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */);
wordProperty = binaryDictionary.getWordProperty("aaa");
assertEquals(1, wordProperty.mShortcutTargets.size());
assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord);
assertEquals(updatedShortcutProbability,
wordProperty.mShortcutTargets.get(0).getProbability());
binaryDictionary.addUnigramEntry("aaa", unigramProbability, "yyy",
- shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */,
- 0 /* timestamp */);
+ shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */,
+ false /* isBlacklisted */, 0 /* timestamp */);
final HashMap<String, Integer> shortcutTargets = new HashMap<String, Integer>();
shortcutTargets.put("zzz", updatedShortcutProbability);
shortcutTargets.put("yyy", shortcutProbability);
@@ -1275,8 +1281,8 @@
final String word = words.get(random.nextInt(words.size()));
final int unigramProbability = unigramProbabilities.get(word);
binaryDictionary.addUnigramEntry(word, unigramProbability, shortcutTarget,
- shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */,
- 0 /* timestamp */);
+ shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */,
+ false /* isBlacklisted */, 0 /* timestamp */);
if (shortcutTargets.containsKey(word)) {
final HashMap<String, Integer> shortcutTargetsOfWord = shortcutTargets.get(word);
shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability);
@@ -1331,10 +1337,11 @@
addBigramWords(binaryDictionary, "aaa", "bbb", bigramProbability);
final int shortcutProbability = 10;
binaryDictionary.addUnigramEntry("ccc", unigramProbability, "xxx", shortcutProbability,
- false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */);
+ false /* isBeginningOfSentence */, false /* isNotAWord */,
+ false /* isBlacklisted */, 0 /* timestamp */);
binaryDictionary.addUnigramEntry("ddd", unigramProbability, null /* shortcutTarget */,
- Dictionary.NOT_A_PROBABILITY, true /* isNotAWord */,
- true /* isBlacklisted */, 0 /* timestamp */);
+ Dictionary.NOT_A_PROBABILITY, false /* isBeginningOfSentence */,
+ true /* isNotAWord */, true /* isBlacklisted */, 0 /* timestamp */);
assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb"));
assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb"));
@@ -1434,4 +1441,46 @@
assertEquals(bigramProbabilities.size(), Integer.parseInt(
binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY)));
}
+
+ public void testBeginningOfSentence() {
+ for (final int formatVersion : DICT_FORMAT_VERSIONS) {
+ if (supportsBeginningOfSentence(formatVersion)) {
+ testBeginningOfSentence(formatVersion);
+ }
+ }
+ }
+
+ private void testBeginningOfSentence(final int formatVersion) {
+ File dictFile = null;
+ try {
+ dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
+ } catch (IOException e) {
+ fail("IOException while writing an initial dictionary : " + e);
+ }
+ final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
+ 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
+ Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+ final int dummyProbability = 0;
+ binaryDictionary.addUnigramEntry("", dummyProbability, "" /* shortcutTarget */,
+ BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */,
+ true /* isBeginningOfSentence */, true /* isNotAWord */, false /* isBlacklisted */,
+ BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
+ final PrevWordsInfo prevWordsInfoStartOfSentence = PrevWordsInfo.BEGINNING_OF_SENTENCE;
+ final int bigramProbability = 200;
+ addUnigramWord(binaryDictionary, "aaa", dummyProbability);
+ binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "aaa", bigramProbability,
+ BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
+ assertEquals(bigramProbability,
+ binaryDictionary.getNgramProbability(prevWordsInfoStartOfSentence, "aaa"));
+ binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "aaa", bigramProbability,
+ BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
+ addUnigramWord(binaryDictionary, "bbb", dummyProbability);
+ binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "bbb", bigramProbability,
+ BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
+ binaryDictionary.flushWithGC();
+ assertEquals(bigramProbability,
+ binaryDictionary.getNgramProbability(prevWordsInfoStartOfSentence, "aaa"));
+ assertEquals(bigramProbability,
+ binaryDictionary.getNgramProbability(prevWordsInfoStartOfSentence, "bbb"));
+ }
}
diff --git a/tests/src/com/android/inputmethod/latin/DistracterFilterTest.java b/tests/src/com/android/inputmethod/latin/DistracterFilterTest.java
index e98f9ea..33f3794 100644
--- a/tests/src/com/android/inputmethod/latin/DistracterFilterTest.java
+++ b/tests/src/com/android/inputmethod/latin/DistracterFilterTest.java
@@ -20,23 +20,24 @@
import android.test.suitebuilder.annotation.LargeTest;
-import com.android.inputmethod.latin.utils.DistracterFilter;
+import com.android.inputmethod.latin.utils.DistracterFilterUsingSuggestion;
/**
* Unit test for DistracterFilter
*/
@LargeTest
public class DistracterFilterTest extends InputTestsBase {
- private DistracterFilter mDistracterFilter;
+ private DistracterFilterUsingSuggestion mDistracterFilter;
@Override
protected void setUp() throws Exception {
super.setUp();
- mDistracterFilter = mLatinIME.createDistracterFilter();
+ mDistracterFilter = new DistracterFilterUsingSuggestion(getContext());
+ mDistracterFilter.updateEnabledSubtypes(mLatinIME.getEnabledSubtypesForTest());
}
public void testIsDistractorToWordsInDictionaries() {
- final PrevWordsInfo EMPTY_PREV_WORDS_INFO = new PrevWordsInfo(null);
+ final PrevWordsInfo EMPTY_PREV_WORDS_INFO = PrevWordsInfo.EMPTY_PREV_WORDS_INFO;
final Locale localeEnUs = new Locale("en", "US");
String typedWord = "alot";
diff --git a/tests/src/com/android/inputmethod/latin/RichInputConnectionAndTextRangeTests.java b/tests/src/com/android/inputmethod/latin/RichInputConnectionAndTextRangeTests.java
index f3351ff..c471eca 100644
--- a/tests/src/com/android/inputmethod/latin/RichInputConnectionAndTextRangeTests.java
+++ b/tests/src/com/android/inputmethod/latin/RichInputConnectionAndTextRangeTests.java
@@ -157,10 +157,10 @@
// If one of the following cases breaks, the bigram suggestions won't work.
assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord(
"abc def", mSpacingAndPunctuations, 2).mPrevWord, "abc");
- assertNull(RichInputConnection.getPrevWordsInfoFromNthPreviousWord(
- "abc", mSpacingAndPunctuations, 2).mPrevWord);
- assertNull(RichInputConnection.getPrevWordsInfoFromNthPreviousWord(
- "abc. def", mSpacingAndPunctuations, 2).mPrevWord);
+ assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord(
+ "abc", mSpacingAndPunctuations, 2), PrevWordsInfo.BEGINNING_OF_SENTENCE);
+ assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord(
+ "abc. def", mSpacingAndPunctuations, 2), PrevWordsInfo.BEGINNING_OF_SENTENCE);
assertFalse(RichInputConnection.getPrevWordsInfoFromNthPreviousWord(
"abc def", mSpacingAndPunctuations, 2).mIsBeginningOfSentence);
@@ -180,16 +180,22 @@
assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord(
"abc def .", mSpacingAndPunctuations, 2).mPrevWord, "def");
assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord(
- "abc ", mSpacingAndPunctuations, 2).mPrevWord, null);
+ "abc ", mSpacingAndPunctuations, 2), PrevWordsInfo.BEGINNING_OF_SENTENCE);
assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord(
"abc def", mSpacingAndPunctuations, 1).mPrevWord, "def");
assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord(
"abc def ", mSpacingAndPunctuations, 1).mPrevWord, "def");
- assertNull(RichInputConnection.getPrevWordsInfoFromNthPreviousWord(
- "abc def.", mSpacingAndPunctuations, 1).mPrevWord);
- assertNull(RichInputConnection.getPrevWordsInfoFromNthPreviousWord(
- "abc def .", mSpacingAndPunctuations, 1).mPrevWord);
+ assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord(
+ "abc def.", mSpacingAndPunctuations, 1), PrevWordsInfo.BEGINNING_OF_SENTENCE);
+ assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord(
+ "abc def .", mSpacingAndPunctuations, 1), PrevWordsInfo.BEGINNING_OF_SENTENCE);
+ assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord(
+ "abc, def", mSpacingAndPunctuations, 2), PrevWordsInfo.EMPTY_PREV_WORDS_INFO);
+ assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord(
+ "abc? def", mSpacingAndPunctuations, 2), PrevWordsInfo.EMPTY_PREV_WORDS_INFO);
+ assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord(
+ "abc! def", mSpacingAndPunctuations, 2), PrevWordsInfo.EMPTY_PREV_WORDS_INFO);
}
/**
diff --git a/tests/src/com/android/inputmethod/latin/ShiftModeTests.java b/tests/src/com/android/inputmethod/latin/ShiftModeTests.java
index 6fc9df7..f3756c2 100644
--- a/tests/src/com/android/inputmethod/latin/ShiftModeTests.java
+++ b/tests/src/com/android/inputmethod/latin/ShiftModeTests.java
@@ -78,4 +78,48 @@
runMessages();
assertTrue("Caps after a while after repeating Backspace a lot", isCapsModeAutoShifted());
}
+
+ public void testAutoCapsAfterDigitsPeriod() {
+ changeLanguage("en");
+ type("On 22.11.");
+ assertFalse("(English) Auto caps after digits-period", isCapsModeAutoShifted());
+ type(" ");
+ assertTrue("(English) Auto caps after digits-period-whitespace", isCapsModeAutoShifted());
+ mEditText.setText("");
+ changeLanguage("fr");
+ type("Le 22.");
+ assertFalse("(French) Auto caps after digits-period", isCapsModeAutoShifted());
+ type(" ");
+ assertTrue("(French) Auto caps after digits-period-whitespace", isCapsModeAutoShifted());
+ mEditText.setText("");
+ changeLanguage("de");
+ type("Am 22.");
+ assertFalse("(German) Auto caps after digits-period", isCapsModeAutoShifted());
+ type(" ");
+ // For German, no auto-caps in this case
+ assertFalse("(German) Auto caps after digits-period-whitespace", isCapsModeAutoShifted());
+ }
+
+ public void testAutoCapsAfterInvertedMarks() {
+ changeLanguage("es");
+ assertTrue("(Spanish) Auto caps at start", isCapsModeAutoShifted());
+ type("Hey. ¿");
+ assertTrue("(Spanish) Auto caps after inverted what", isCapsModeAutoShifted());
+ mEditText.setText("");
+ type("¡");
+ assertTrue("(Spanish) Auto caps after inverted bang", isCapsModeAutoShifted());
+ }
+
+ public void testOtherSentenceSeparators() {
+ changeLanguage("hy-AM");
+ assertTrue("(Armenian) Auto caps at start", isCapsModeAutoShifted());
+ type("Hey. ");
+ assertFalse("(Armenian) No auto-caps after latin period", isCapsModeAutoShifted());
+ type("Hey\u0589");
+ assertFalse("(Armenian) No auto-caps directly after armenian period",
+ isCapsModeAutoShifted());
+ type(" ");
+ assertTrue("(Armenian) Auto-caps after armenian period-whitespace",
+ isCapsModeAutoShifted());
+ }
}
diff --git a/tests/src/com/android/inputmethod/latin/WordComposerTests.java b/tests/src/com/android/inputmethod/latin/WordComposerTests.java
index 17e7185..274555a 100644
--- a/tests/src/com/android/inputmethod/latin/WordComposerTests.java
+++ b/tests/src/com/android/inputmethod/latin/WordComposerTests.java
@@ -74,7 +74,7 @@
CoordinateUtils.newCoordinateArray(CODEPOINTS_WITH_SUPPLEMENTARY_CHAR.length,
Constants.NOT_A_COORDINATE, Constants.NOT_A_COORDINATE);
wc.setComposingWord(CODEPOINTS_WITH_SUPPLEMENTARY_CHAR, COORDINATES_WITH_SUPPLEMENTARY_CHAR,
- new PrevWordsInfo(null));
+ PrevWordsInfo.EMPTY_PREV_WORDS_INFO);
assertEquals(wc.size(), CODEPOINTS_WITH_SUPPLEMENTARY_CHAR.length);
assertFalse(wc.isCursorFrontOrMiddleOfComposingWord());
wc.setCursorPositionWithinWord(3);
@@ -109,7 +109,7 @@
assertEquals(PREV_WORDS_INFO_STR_WITHIN_BMP, wc.getPrevWordsInfoForSuggestion());
- final PrevWordsInfo PREV_WORDS_INFO_NULL = new PrevWordsInfo(null);
+ final PrevWordsInfo PREV_WORDS_INFO_NULL = PrevWordsInfo.EMPTY_PREV_WORDS_INFO;
wc.setComposingWord(CODEPOINTS_WITH_SUPPLEMENTARY_CHAR, COORDINATES_WITH_SUPPLEMENTARY_CHAR,
PREV_WORDS_INFO_NULL);
wc.setCursorPositionWithinWord(3);
diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java b/tests/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
index a04b810..0528e34 100644
--- a/tests/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
+++ b/tests/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
@@ -77,14 +77,14 @@
if (null == wordProperty.mShortcutTargets || wordProperty.mShortcutTargets.isEmpty()) {
binaryDict.addUnigramEntry(wordProperty.mWord, wordProperty.getProbability(),
null /* shortcutTarget */, 0 /* shortcutProbability */,
- wordProperty.mIsNotAWord, wordProperty.mIsBlacklistEntry,
- 0 /* timestamp */);
+ wordProperty.mIsBeginningOfSentence, wordProperty.mIsNotAWord,
+ wordProperty.mIsBlacklistEntry, 0 /* timestamp */);
} else {
for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
binaryDict.addUnigramEntry(wordProperty.mWord, wordProperty.getProbability(),
shortcutTarget.mWord, shortcutTarget.getProbability(),
- wordProperty.mIsNotAWord, wordProperty.mIsBlacklistEntry,
- 0 /* timestamp */);
+ wordProperty.mIsBeginningOfSentence, wordProperty.mIsNotAWord,
+ wordProperty.mIsBlacklistEntry, 0 /* timestamp */);
}
}
if (binaryDict.needsToRunGC(true /* mindsBlockByGC */)) {
diff --git a/tests/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryTests.java
index bc86864..7d3214a 100644
--- a/tests/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryTests.java
+++ b/tests/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryTests.java
@@ -110,7 +110,7 @@
}
private static void addToDict(final UserHistoryDictionary dict, final List<String> words) {
- PrevWordsInfo prevWordsInfo = new PrevWordsInfo(null);
+ PrevWordsInfo prevWordsInfo = PrevWordsInfo.EMPTY_PREV_WORDS_INFO;
for (String word : words) {
UserHistoryDictionary.addToDictionary(dict, prevWordsInfo, word, true,
(int)TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis()));
diff --git a/tools/dicttool/compat/android/test/AndroidTestCase.java b/tools/dicttool/compat/android/test/AndroidTestCase.java
index d01b7ad..f765ce0 100644
--- a/tools/dicttool/compat/android/test/AndroidTestCase.java
+++ b/tools/dicttool/compat/android/test/AndroidTestCase.java
@@ -16,6 +16,8 @@
package android.test;
+import com.android.inputmethod.latin.dicttool.Test;
+
import junit.framework.TestCase;
import java.io.File;
@@ -27,7 +29,11 @@
*/
public class AndroidTestCase extends TestCase {
public File getCacheDir() {
- return new File(".");
+ final File dir = Test.TEST_TMP_DIR;
+ if (!dir.isDirectory()) {
+ dir.mkdirs();
+ }
+ return dir;
}
public AndroidTestCase getContext() {
return this;
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java
index 48817b1..33661c8 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java
@@ -19,16 +19,29 @@
import com.android.inputmethod.latin.makedict.BinaryDictDecoderEncoderTests;
import com.android.inputmethod.latin.makedict.BinaryDictEncoderFlattenTreeTests;
import com.android.inputmethod.latin.makedict.FusionDictionaryTest;
+import com.android.inputmethod.latin.utils.FileUtils;
+import java.io.File;
+import java.io.IOException;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
+import java.nio.file.Files;
import java.util.ArrayList;
/**
* Dicttool command implementing self-tests.
*/
public class Test extends Dicttool.Command {
+ private static final String getTmpDir() {
+ try {
+ return Files.createTempDirectory("dicttool").toString();
+ } catch (IOException e) {
+ throw new RuntimeException("Can't get temporary directory", e);
+ }
+ }
+ private static final String TEST_TMP_DIR_BASE = getTmpDir();
+ public static final File TEST_TMP_DIR = new File(TEST_TMP_DIR_BASE);
public static final String COMMAND = "test";
private static final int DEFAULT_MAX_UNIGRAMS = 1500;
private long mSeed = System.currentTimeMillis();
@@ -56,8 +69,12 @@
@Override
public String getHelp() {
- final StringBuilder s = new StringBuilder("test [-s seed] [-m maxUnigrams] [testName...]\n"
- + "If seed is not specified, the current time is used.\nTest list is:\n");
+ final StringBuilder s = new StringBuilder(
+ "test [-s seed] [-m maxUnigrams] [-n] [testName...]\n"
+ + "If seed is not specified, the current time is used.\n"
+ + "If -n option is provided, do not delete temporary files in "
+ + TEST_TMP_DIR_BASE + "/*.\n"
+ + "Test list is:\n");
for (final Method m : mAllTestMethods) {
s.append(" ");
s.append(m.getName());
@@ -70,17 +87,26 @@
public void run() throws IllegalAccessException, InstantiationException,
InvocationTargetException {
int i = 0;
+ boolean deleteTmpDir = true;
while (i < mArgs.length) {
final String arg = mArgs[i++];
if ("-s".equals(arg)) {
mSeed = Long.parseLong(mArgs[i++]);
} else if ("-m".equals(arg)) {
mMaxUnigrams = Integer.parseInt(mArgs[i++]);
+ } else if ("-n".equals(arg)) {
+ deleteTmpDir = false;
} else {
mUsedTestMethods.add(arg);
}
}
- runChosenTests();
+ try {
+ runChosenTests();
+ } finally {
+ if (deleteTmpDir) {
+ FileUtils.deleteRecursively(TEST_TMP_DIR);
+ }
+ }
}
private void runChosenTests() throws IllegalAccessException, InstantiationException,