Merge "Consolidate attributes to EmojiPaletteView to control indicators"
diff --git a/java/src/com/android/inputmethod/dictionarypack/MetadataDbHelper.java b/java/src/com/android/inputmethod/dictionarypack/MetadataDbHelper.java
index 668eb92..743bc80 100644
--- a/java/src/com/android/inputmethod/dictionarypack/MetadataDbHelper.java
+++ b/java/src/com/android/inputmethod/dictionarypack/MetadataDbHelper.java
@@ -47,7 +47,7 @@
     // used to identify the versions for upgrades. This should never change going forward.
     private static final int METADATA_DATABASE_VERSION_WITH_CLIENTID = 6;
     // The current database version.
-    private static final int CURRENT_METADATA_DATABASE_VERSION = 8;
+    private static final int CURRENT_METADATA_DATABASE_VERSION = 9;
 
     private final static long NOT_A_DOWNLOAD_ID = -1;
 
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
index e7ab02a..b775406 100644
--- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
@@ -191,7 +191,8 @@
     private static native void closeNative(long dict);
     private static native int getFormatVersionNative(long dict);
     private static native int getProbabilityNative(long dict, int[] word);
-    private static native int getBigramProbabilityNative(long dict, int[] word0, int[] word1);
+    private static native int getBigramProbabilityNative(long dict, int[] word0,
+            boolean isBeginningOfSentence, int[] word1);
     private static native void getWordPropertyNative(long dict, int[] word,
             int[] outCodePoints, boolean[] outFlags, int[] outProbabilityInfo,
             ArrayList<int[]> outBigramTargets, ArrayList<int[]> outBigramProbabilityInfo,
@@ -200,15 +201,17 @@
     private static native void getSuggestionsNative(long dict, long proximityInfo,
             long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times,
             int[] pointerIds, int[] inputCodePoints, int inputSize, int[] suggestOptions,
-            int[] prevWordCodePointArray, int[] outputSuggestionCount, int[] outputCodePoints,
-            int[] outputScores, int[] outputIndices, int[] outputTypes,
-            int[] outputAutoCommitFirstWordConfidence, float[] inOutLanguageWeight);
+            int[] prevWordCodePointArray, boolean isBeginningOfSentence,
+            int[] outputSuggestionCount, int[] outputCodePoints, int[] outputScores,
+            int[] outputIndices, int[] outputTypes, int[] outputAutoCommitFirstWordConfidence,
+            float[] inOutLanguageWeight);
     private static native void addUnigramWordNative(long dict, int[] word, int probability,
-            int[] shortcutTarget, int shortcutProbability, boolean isNotAWord,
-            boolean isBlacklisted, int timestamp);
-    private static native void addBigramWordsNative(long dict, int[] word0, int[] word1,
-            int probability, int timestamp);
-    private static native void removeBigramWordsNative(long dict, int[] word0, int[] word1);
+            int[] shortcutTarget, int shortcutProbability, boolean isBeginningOfSentence,
+            boolean isNotAWord, boolean isBlacklisted, int timestamp);
+    private static native void addBigramWordsNative(long dict, int[] word0,
+            boolean isBeginningOfSentence, int[] word1, int probability, int timestamp);
+    private static native void removeBigramWordsNative(long dict, int[] word0,
+            boolean isBeginningOfSentence, int[] word1);
     private static native int addMultipleDictionaryEntriesNative(long dict,
             LanguageModelParam[] languageModelParams, int startIndex);
     private static native String getPropertyNative(long dict, String query);
@@ -301,7 +304,8 @@
                 getTraverseSession(sessionId).getSession(), inputPointers.getXCoordinates(),
                 inputPointers.getYCoordinates(), inputPointers.getTimes(),
                 inputPointers.getPointerIds(), mInputCodePoints, inputSize,
-                mNativeSuggestOptions.getOptions(), prevWordCodePointArray, mOutputSuggestionCount,
+                mNativeSuggestOptions.getOptions(), prevWordCodePointArray,
+                prevWordsInfo.mIsBeginningOfSentence, mOutputSuggestionCount,
                 mOutputCodePoints, mOutputScores, mSpaceIndices, mOutputTypes,
                 mOutputAutoCommitFirstWordConfidence, mInputOutputLanguageWeight);
         if (inOutLanguageWeight != null) {
@@ -364,12 +368,13 @@
     }
 
     public int getNgramProbability(final PrevWordsInfo prevWordsInfo, final String word) {
-        if (TextUtils.isEmpty(prevWordsInfo.mPrevWord) || TextUtils.isEmpty(word)) {
+        if (!prevWordsInfo.isValid() || TextUtils.isEmpty(word)) {
             return NOT_A_PROBABILITY;
         }
         final int[] codePoints0 = StringUtils.toCodePointArray(prevWordsInfo.mPrevWord);
         final int[] codePoints1 = StringUtils.toCodePointArray(word);
-        return getBigramProbabilityNative(mNativeDict, codePoints0, codePoints1);
+        return getBigramProbabilityNative(mNativeDict, codePoints0,
+                prevWordsInfo.mIsBeginningOfSentence, codePoints1);
     }
 
     public WordProperty getWordProperty(final String word) {
@@ -420,16 +425,17 @@
 
     // Add a unigram entry to binary dictionary with unigram attributes in native code.
     public void addUnigramEntry(final String word, final int probability,
-            final String shortcutTarget, final int shortcutProbability, final boolean isNotAWord,
+            final String shortcutTarget, final int shortcutProbability,
+            final boolean isBeginningOfSentence, final boolean isNotAWord,
             final boolean isBlacklisted, final int timestamp) {
-        if (TextUtils.isEmpty(word)) {
+        if (word == null || (word.isEmpty() && !isBeginningOfSentence)) {
             return;
         }
         final int[] codePoints = StringUtils.toCodePointArray(word);
         final int[] shortcutTargetCodePoints = (shortcutTarget != null) ?
                 StringUtils.toCodePointArray(shortcutTarget) : null;
         addUnigramWordNative(mNativeDict, codePoints, probability, shortcutTargetCodePoints,
-                shortcutProbability, isNotAWord, isBlacklisted, timestamp);
+                shortcutProbability, isBeginningOfSentence, isNotAWord, isBlacklisted, timestamp);
         mHasUpdated = true;
     }
 
@@ -437,23 +443,25 @@
     public void addNgramEntry(final PrevWordsInfo prevWordsInfo, final String word,
             final int probability,
             final int timestamp) {
-        if (TextUtils.isEmpty(prevWordsInfo.mPrevWord) || TextUtils.isEmpty(word)) {
+        if (!prevWordsInfo.isValid() || TextUtils.isEmpty(word)) {
             return;
         }
         final int[] codePoints0 = StringUtils.toCodePointArray(prevWordsInfo.mPrevWord);
         final int[] codePoints1 = StringUtils.toCodePointArray(word);
-        addBigramWordsNative(mNativeDict, codePoints0, codePoints1, probability, timestamp);
+        addBigramWordsNative(mNativeDict, codePoints0, prevWordsInfo.mIsBeginningOfSentence,
+                codePoints1, probability, timestamp);
         mHasUpdated = true;
     }
 
     // Remove an n-gram entry from the binary dictionary in native code.
     public void removeNgramEntry(final PrevWordsInfo prevWordsInfo, final String word) {
-        if (TextUtils.isEmpty(prevWordsInfo.mPrevWord) || TextUtils.isEmpty(word)) {
+        if (!prevWordsInfo.isValid() || TextUtils.isEmpty(word)) {
             return;
         }
         final int[] codePoints0 = StringUtils.toCodePointArray(prevWordsInfo.mPrevWord);
         final int[] codePoints1 = StringUtils.toCodePointArray(word);
-        removeBigramWordsNative(mNativeDict, codePoints0, codePoints1);
+        removeBigramWordsNative(mNativeDict, codePoints0, prevWordsInfo.mIsBeginningOfSentence,
+                codePoints1);
         mHasUpdated = true;
     }
 
diff --git a/java/src/com/android/inputmethod/latin/Constants.java b/java/src/com/android/inputmethod/latin/Constants.java
index 67ca595..efc5a61 100644
--- a/java/src/com/android/inputmethod/latin/Constants.java
+++ b/java/src/com/android/inputmethod/latin/Constants.java
@@ -192,7 +192,6 @@
     public static final int CODE_SPACE = ' ';
     public static final int CODE_PERIOD = '.';
     public static final int CODE_COMMA = ',';
-    public static final int CODE_ARMENIAN_PERIOD = 0x0589;
     public static final int CODE_DASH = '-';
     public static final int CODE_SINGLE_QUOTE = '\'';
     public static final int CODE_DOUBLE_QUOTE = '"';
@@ -208,6 +207,8 @@
     public static final int CODE_CLOSING_SQUARE_BRACKET = ']';
     public static final int CODE_CLOSING_CURLY_BRACKET = '}';
     public static final int CODE_CLOSING_ANGLE_BRACKET = '>';
+    public static final int CODE_INVERTED_QUESTION_MARK = 0xBF; // ¿
+    public static final int CODE_INVERTED_EXCLAMATION_MARK = 0xA1; // ¡
 
     /**
      * Special keys code. Must be negative.
diff --git a/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java
index 3fb76b1..538bfc0 100644
--- a/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java
@@ -224,7 +224,7 @@
      */
     private void addNameLocked(final String name) {
         int len = StringUtils.codePointCount(name);
-        PrevWordsInfo prevWordsInfo = new PrevWordsInfo(null);
+        PrevWordsInfo prevWordsInfo = PrevWordsInfo.EMPTY_PREV_WORDS_INFO;
         // TODO: Better tokenization for non-Latin writing systems
         for (int i = 0; i < len; i++) {
             if (Character.isLetter(name.codePointAt(i))) {
diff --git a/java/src/com/android/inputmethod/latin/DictionaryFacilitator.java b/java/src/com/android/inputmethod/latin/DictionaryFacilitator.java
index a979167..2123638 100644
--- a/java/src/com/android/inputmethod/latin/DictionaryFacilitator.java
+++ b/java/src/com/android/inputmethod/latin/DictionaryFacilitator.java
@@ -19,14 +19,18 @@
 import android.content.Context;
 import android.text.TextUtils;
 import android.util.Log;
+import android.view.inputmethod.InputMethodSubtype;
 
 import com.android.inputmethod.annotations.UsedForTesting;
 import com.android.inputmethod.keyboard.ProximityInfo;
 import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
 import com.android.inputmethod.latin.personalization.ContextualDictionary;
+import com.android.inputmethod.latin.personalization.PersonalizationDataChunk;
 import com.android.inputmethod.latin.personalization.PersonalizationDictionary;
 import com.android.inputmethod.latin.personalization.UserHistoryDictionary;
+import com.android.inputmethod.latin.settings.SpacingAndPunctuations;
 import com.android.inputmethod.latin.utils.CollectionUtils;
+import com.android.inputmethod.latin.utils.DistracterFilter;
 import com.android.inputmethod.latin.utils.ExecutorUtils;
 import com.android.inputmethod.latin.utils.LanguageModelParam;
 import com.android.inputmethod.latin.utils.SuggestionResults;
@@ -37,6 +41,7 @@
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 import java.util.Set;
@@ -57,6 +62,7 @@
     private volatile CountDownLatch mLatchForWaitingLoadingMainDictionary = new CountDownLatch(0);
     // To synchronize assigning mDictionaries to ensure closing dictionaries.
     private final Object mLock = new Object();
+    private final DistracterFilter mDistracterFilter;
 
     private static final String[] DICT_TYPES_ORDERED_TO_GET_SUGGESTION =
             new String[] {
@@ -162,7 +168,17 @@
         public void onUpdateMainDictionaryAvailability(boolean isMainDictionaryAvailable);
     }
 
-    public DictionaryFacilitator() {}
+    public DictionaryFacilitator() {
+        mDistracterFilter = new DistracterFilter.EmptyDistracterFilter();
+    }
+
+    public DictionaryFacilitator(final DistracterFilter distracterFilter) {
+        mDistracterFilter = distracterFilter;
+    }
+
+    public void updateEnabledSubtypes(final List<InputMethodSubtype> enabledSubtypes) {
+        mDistracterFilter.updateEnabledSubtypes(enabledSubtypes);
+    }
 
     public Locale getLocale() {
         return mDictionaries.mLocale;
@@ -321,6 +337,7 @@
         for (final String dictType : DICT_TYPES_ORDERED_TO_GET_SUGGESTION) {
             dictionaries.closeDict(dictType);
         }
+        mDistracterFilter.close();
     }
 
     // The main dictionary could have been loaded asynchronously.  Don't cache the return value
@@ -537,9 +554,16 @@
         personalizationDict.clear();
     }
 
-    public void addMultipleDictionaryEntriesToPersonalizationDictionary(
-            final ArrayList<LanguageModelParam> languageModelParams,
+    public void addEntriesToPersonalizationDictionary(
+            final PersonalizationDataChunk personalizationDataChunk,
+            final SpacingAndPunctuations spacingAndPunctuations,
             final ExpandableBinaryDictionary.AddMultipleDictionaryEntriesCallback callback) {
+        final ArrayList<LanguageModelParam> languageModelParams =
+                LanguageModelParam.createLanguageModelParamsFrom(
+                        personalizationDataChunk.mTokens,
+                        personalizationDataChunk.mTimestampInSeconds,
+                        this /* dictionaryFacilitator */, spacingAndPunctuations,
+                        mDistracterFilter);
         final ExpandableBinaryDictionary personalizationDict =
                 mDictionaries.getSubDict(Dictionary.TYPE_PERSONALIZATION);
         if (personalizationDict == null || languageModelParams == null
diff --git a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
index d67253c..2cbce04 100644
--- a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
@@ -114,7 +114,8 @@
     private boolean needsToMigrateDictionary(final int formatVersion) {
         // When we bump up the dictionary format version, the old version should be added to here
         // for supporting migration. Note that native code has to support reading such formats.
-        return formatVersion == FormatSpec.VERSION4_ONLY_FOR_TESTING;
+        return formatVersion == FormatSpec.VERSION4_ONLY_FOR_TESTING
+                || formatVersion == FormatSpec.VERSION401;
     }
 
     public boolean isValidDictionaryLocked() {
@@ -292,7 +293,7 @@
             final String shortcutTarget, final int shortcutFreq, final boolean isNotAWord,
             final boolean isBlacklisted, final int timestamp) {
         mBinaryDictionary.addUnigramEntry(word, frequency, shortcutTarget, shortcutFreq,
-                isNotAWord, isBlacklisted, timestamp);
+                false /* isBeginningOfSentence */, isNotAWord, isBlacklisted, timestamp);
     }
 
     /**
diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java
index 4863326..34d5f71 100644
--- a/java/src/com/android/inputmethod/latin/LatinIME.java
+++ b/java/src/com/android/inputmethod/latin/LatinIME.java
@@ -81,9 +81,10 @@
 import com.android.inputmethod.latin.suggestions.SuggestionStripViewAccessor;
 import com.android.inputmethod.latin.utils.ApplicationUtils;
 import com.android.inputmethod.latin.utils.CapsModeUtils;
+import com.android.inputmethod.latin.utils.CollectionUtils;
 import com.android.inputmethod.latin.utils.CoordinateUtils;
 import com.android.inputmethod.latin.utils.DialogUtils;
-import com.android.inputmethod.latin.utils.DistracterFilter;
+import com.android.inputmethod.latin.utils.DistracterFilterUsingSuggestion;
 import com.android.inputmethod.latin.utils.ImportantNoticeUtils;
 import com.android.inputmethod.latin.utils.IntentUtils;
 import com.android.inputmethod.latin.utils.JniUtils;
@@ -95,6 +96,7 @@
 import java.io.FileDescriptor;
 import java.io.PrintWriter;
 import java.util.ArrayList;
+import java.util.List;
 import java.util.Locale;
 import java.util.concurrent.TimeUnit;
 
@@ -122,7 +124,8 @@
     private static final String SCHEME_PACKAGE = "package";
 
     private final Settings mSettings;
-    private final DictionaryFacilitator mDictionaryFacilitator = new DictionaryFacilitator();
+    private final DictionaryFacilitator mDictionaryFacilitator =
+            new DictionaryFacilitator(new DistracterFilterUsingSuggestion(this /* context */));
     private final InputLogic mInputLogic = new InputLogic(this /* LatinIME */,
             this /* SuggestionStripViewAccessor */, mDictionaryFacilitator);
     // We expect to have only one decoder in almost all cases, hence the default capacity of 1.
@@ -538,6 +541,8 @@
         if (!mHandler.hasPendingReopenDictionaries()) {
             resetSuggestForLocale(locale);
         }
+        mDictionaryFacilitator.updateEnabledSubtypes(mRichImm.getMyEnabledInputMethodSubtypeList(
+                true /* allowsImplicitlySelectedSubtypes */));
         refreshPersonalizationDictionarySession();
         StatsUtils.onLoadSettings(currentSettingsValues);
     }
@@ -564,9 +569,7 @@
             PersonalizationHelper.removeAllPersonalizationDictionaries(this);
             PersonalizationDictionarySessionRegistrar.resetAll(this);
         } else {
-            final DistracterFilter distracterFilter = createDistracterFilter();
-            PersonalizationDictionarySessionRegistrar.init(
-                    this, mDictionaryFacilitator, distracterFilter);
+            PersonalizationDictionarySessionRegistrar.init(this, mDictionaryFacilitator);
         }
     }
 
@@ -660,9 +663,8 @@
             mInputLogic.mConnection.finishComposingText();
             mInputLogic.mConnection.endBatchEdit();
         }
-        final DistracterFilter distracterFilter = createDistracterFilter();
         PersonalizationDictionarySessionRegistrar.onConfigurationChanged(this, conf,
-                mDictionaryFacilitator, distracterFilter);
+                mDictionaryFacilitator);
         super.onConfigurationChanged(conf);
     }
 
@@ -1739,11 +1741,9 @@
     }
 
     @UsedForTesting
-    /* package for test */ DistracterFilter createDistracterFilter() {
-        // Return an empty distracter filter when this method is called before onCreate().
-        return (mRichImm != null) ? new DistracterFilter(this /* Context */,
-                mRichImm.getMyEnabledInputMethodSubtypeList(
-                        true /* allowsImplicitlySelectedSubtypes */)) : new DistracterFilter();
+    /* package for test */ List<InputMethodSubtype> getEnabledSubtypesForTest() {
+        return (mRichImm != null) ? mRichImm.getMyEnabledInputMethodSubtypeList(
+                true /* allowsImplicitlySelectedSubtypes */) : new ArrayList<InputMethodSubtype>();
     }
 
     public void dumpDictionaryForDebug(final String dictName) {
diff --git a/java/src/com/android/inputmethod/latin/PrevWordsInfo.java b/java/src/com/android/inputmethod/latin/PrevWordsInfo.java
index ecc8947..e44239f 100644
--- a/java/src/com/android/inputmethod/latin/PrevWordsInfo.java
+++ b/java/src/com/android/inputmethod/latin/PrevWordsInfo.java
@@ -16,23 +16,32 @@
 
 package com.android.inputmethod.latin;
 
-import android.util.Log;
-
+/**
+ * Class to represent information of previous words. This class is used to add n-gram entries
+ * into binary dictionaries, to get predictions, and to get suggestions.
+ */
 // TODO: Support multiple previous words for n-gram.
 public class PrevWordsInfo {
-    // The previous word. May be null after resetting and before starting a new composing word, or
-    // when there is no context like at the start of text for example. It can also be set to null
-    // externally when the user enters a separator that does not let bigrams across, like a period
-    // or a comma.
+    public static final PrevWordsInfo EMPTY_PREV_WORDS_INFO = new PrevWordsInfo(null);
+    public static final PrevWordsInfo BEGINNING_OF_SENTENCE = new PrevWordsInfo();
+
+    // The word immediately before the considered word. null means we don't have any context
+    // including the "beginning of sentence context" - we just don't know what to predict.
+    // An example of that is after a comma.
+    // For simplicity of implementation, this may also be null transiently after the WordComposer
+    // was reset and before starting a new composing word, but we should never be calling
+    // getSuggetions* in this situation.
+    // This is an empty string when mIsBeginningOfSentence is true.
     public final String mPrevWord;
 
     // TODO: Have sentence separator.
-    // Whether the current context is beginning of sentence or not.
+    // Whether the current context is beginning of sentence or not. This is true when composing at
+    // the beginning of an input field or composing a word after a sentence separator.
     public final boolean mIsBeginningOfSentence;
 
     // Beginning of sentence.
     public PrevWordsInfo() {
-        mPrevWord = null;
+        mPrevWord = "";
         mIsBeginningOfSentence = true;
     }
 
@@ -40,4 +49,8 @@
         mPrevWord = prevWord;
         mIsBeginningOfSentence = false;
     }
+
+    public boolean isValid() {
+        return mPrevWord != null;
+    }
 }
diff --git a/java/src/com/android/inputmethod/latin/RichInputConnection.java b/java/src/com/android/inputmethod/latin/RichInputConnection.java
index 2c54e10..e7c1636 100644
--- a/java/src/com/android/inputmethod/latin/RichInputConnection.java
+++ b/java/src/com/android/inputmethod/latin/RichInputConnection.java
@@ -542,7 +542,7 @@
             final SpacingAndPunctuations spacingAndPunctuations, final int n) {
         mIC = mParent.getCurrentInputConnection();
         if (null == mIC) {
-            return new PrevWordsInfo(null);
+            return PrevWordsInfo.EMPTY_PREV_WORDS_INFO;
         }
         final CharSequence prev = getTextBeforeCursor(LOOKBACK_CHARACTER_NUM, 0);
         if (DEBUG_PREVIOUS_TEXT && null != prev) {
@@ -588,30 +588,30 @@
     // (n = 2) "abc. def|" -> beginning-of-sentence
     public static PrevWordsInfo getPrevWordsInfoFromNthPreviousWord(final CharSequence prev,
             final SpacingAndPunctuations spacingAndPunctuations, final int n) {
-        if (prev == null) return new PrevWordsInfo(null);
+        if (prev == null) return PrevWordsInfo.EMPTY_PREV_WORDS_INFO;
         final String[] w = spaceRegex.split(prev);
 
         // If we can't find n words, or we found an empty word, the context is
         // beginning-of-sentence.
         if (w.length < n) {
-            return new PrevWordsInfo();
+            return PrevWordsInfo.BEGINNING_OF_SENTENCE;
         }
         final String nthPrevWord = w[w.length - n];
         final int length = nthPrevWord.length();
         if (length <= 0) {
-            return  new PrevWordsInfo();
+            return PrevWordsInfo.BEGINNING_OF_SENTENCE;
         }
 
         // If ends in a sentence separator, the context is beginning-of-sentence.
         final char lastChar = nthPrevWord.charAt(length - 1);
         if (spacingAndPunctuations.isSentenceSeparator(lastChar)) {
-            new PrevWordsInfo();
+            return PrevWordsInfo.BEGINNING_OF_SENTENCE;
         }
         // If ends in a word separator or connector, the context is unclear.
         // TODO: Return meaningful context for this case.
         if (spacingAndPunctuations.isWordSeparator(lastChar)
                 || spacingAndPunctuations.isWordConnector(lastChar)) {
-            return new PrevWordsInfo(null);
+            return PrevWordsInfo.EMPTY_PREV_WORDS_INFO;
         }
         return new PrevWordsInfo(nthPrevWord);
     }
diff --git a/java/src/com/android/inputmethod/latin/Suggest.java b/java/src/com/android/inputmethod/latin/Suggest.java
index 1eccf2c..daa7f4b 100644
--- a/java/src/com/android/inputmethod/latin/Suggest.java
+++ b/java/src/com/android/inputmethod/latin/Suggest.java
@@ -18,6 +18,7 @@
 
 import android.text.TextUtils;
 
+import com.android.inputmethod.event.Event;
 import com.android.inputmethod.keyboard.ProximityInfo;
 import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
 import com.android.inputmethod.latin.define.ProductionFlag;
diff --git a/java/src/com/android/inputmethod/latin/WordComposer.java b/java/src/com/android/inputmethod/latin/WordComposer.java
index 6ecb373..c53a8fd 100644
--- a/java/src/com/android/inputmethod/latin/WordComposer.java
+++ b/java/src/com/android/inputmethod/latin/WordComposer.java
@@ -85,7 +85,7 @@
         mIsBatchMode = false;
         mCursorPositionWithinWord = 0;
         mRejectedBatchModeSuggestion = null;
-        mPrevWordsInfo = new PrevWordsInfo(null);
+        mPrevWordsInfo = PrevWordsInfo.EMPTY_PREV_WORDS_INFO;
         refreshTypedWordCache();
     }
 
@@ -117,7 +117,7 @@
         mIsBatchMode = false;
         mCursorPositionWithinWord = 0;
         mRejectedBatchModeSuggestion = null;
-        mPrevWordsInfo = new PrevWordsInfo(null);
+        mPrevWordsInfo = PrevWordsInfo.EMPTY_PREV_WORDS_INFO;
         refreshTypedWordCache();
     }
 
@@ -445,7 +445,7 @@
     // when the user inputs a separator that's not whitespace (including the case of the
     // double-space-to-period feature).
     public void discardPreviousWordForSuggestion() {
-        mPrevWordsInfo = new PrevWordsInfo(null);
+        mPrevWordsInfo = PrevWordsInfo.EMPTY_PREV_WORDS_INFO;
     }
 
     public void resumeSuggestionOnLastComposedWord(final LastComposedWord lastComposedWord,
diff --git a/java/src/com/android/inputmethod/latin/inputlogic/InputLogic.java b/java/src/com/android/inputmethod/latin/inputlogic/InputLogic.java
index 78d4bc8..237b43b 100644
--- a/java/src/com/android/inputmethod/latin/inputlogic/InputLogic.java
+++ b/java/src/com/android/inputmethod/latin/inputlogic/InputLogic.java
@@ -809,11 +809,10 @@
         final int codePoint = inputTransaction.mEvent.mCodePoint;
         final SettingsValues settingsValues = inputTransaction.mSettingsValues;
         boolean didAutoCorrect = false;
-        final boolean wasComposingWord = mWordComposer.isComposingWord();
         // We avoid sending spaces in languages without spaces if we were composing.
         final boolean shouldAvoidSendingCode = Constants.CODE_SPACE == codePoint
                 && !settingsValues.mSpacingAndPunctuations.mCurrentLanguageHasSpaces
-                && wasComposingWord;
+                && mWordComposer.isComposingWord();
         if (mWordComposer.isCursorFrontOrMiddleOfComposingWord()) {
             // If we are in the middle of a recorrection, we need to commit the recorrection
             // first so that we can insert the separator at the current cursor position.
@@ -857,7 +856,7 @@
             promotePhantomSpace(settingsValues);
         }
         if (ProductionFlag.USES_DEVELOPMENT_ONLY_DIAGNOSTICS) {
-            ResearchLogger.latinIME_handleSeparator(codePoint, wasComposingWord);
+            ResearchLogger.latinIME_handleSeparator(codePoint, mWordComposer.isComposingWord());
         }
 
         if (!shouldAvoidSendingCode) {
@@ -873,9 +872,7 @@
             }
 
             startDoubleSpacePeriodCountdown(inputTransaction);
-            if (wasComposingWord) {
-                inputTransaction.setRequiresUpdateSuggestions();
-            }
+            inputTransaction.setRequiresUpdateSuggestions();
         } else {
             if (swapWeakSpace) {
                 swapSwapperAndSpace(inputTransaction);
@@ -1612,8 +1609,9 @@
             return mConnection.getPrevWordsInfoFromNthPreviousWord(
                     spacingAndPunctuations, nthPreviousWord);
         } else {
-            return LastComposedWord.NOT_A_COMPOSED_WORD == mLastComposedWord ? new PrevWordsInfo()
-                    : new PrevWordsInfo(mLastComposedWord.mCommittedWord.toString());
+            return LastComposedWord.NOT_A_COMPOSED_WORD == mLastComposedWord ?
+                    PrevWordsInfo.BEGINNING_OF_SENTENCE :
+                            new PrevWordsInfo(mLastComposedWord.mCommittedWord.toString());
         }
     }
 
diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
index f5f072b..a2ae74b 100644
--- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
+++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
@@ -192,8 +192,9 @@
     public static final int VERSION2 = 2;
     // Dictionary version used for testing.
     public static final int VERSION4_ONLY_FOR_TESTING = 399;
-    public static final int VERSION4 = 401;
-    public static final int VERSION4_DEV = 402;
+    public static final int VERSION401 = 401;
+    public static final int VERSION4 = 402;
+    public static final int VERSION4_DEV = 403;
     static final int MINIMUM_SUPPORTED_VERSION = VERSION2;
     static final int MAXIMUM_SUPPORTED_VERSION = VERSION4_DEV;
 
diff --git a/java/src/com/android/inputmethod/latin/makedict/WordProperty.java b/java/src/com/android/inputmethod/latin/makedict/WordProperty.java
index 8533922..ed83251 100644
--- a/java/src/com/android/inputmethod/latin/makedict/WordProperty.java
+++ b/java/src/com/android/inputmethod/latin/makedict/WordProperty.java
@@ -35,6 +35,8 @@
     public final ProbabilityInfo mProbabilityInfo;
     public final ArrayList<WeightedString> mShortcutTargets;
     public final ArrayList<WeightedString> mBigrams;
+    // TODO: Support mIsBeginningOfSentence.
+    public final boolean mIsBeginningOfSentence;
     public final boolean mIsNotAWord;
     public final boolean mIsBlacklistEntry;
     public final boolean mHasShortcuts;
@@ -51,6 +53,7 @@
         mProbabilityInfo = probabilityInfo;
         mShortcutTargets = shortcutTargets;
         mBigrams = bigrams;
+        mIsBeginningOfSentence = false;
         mIsNotAWord = isNotAWord;
         mIsBlacklistEntry = isBlacklistEntry;
         mHasBigrams = bigrams != null && !bigrams.isEmpty();
@@ -77,6 +80,7 @@
         mProbabilityInfo = createProbabilityInfoFromArray(probabilityInfo);
         mShortcutTargets = CollectionUtils.newArrayList();
         mBigrams = CollectionUtils.newArrayList();
+        mIsBeginningOfSentence = false;
         mIsNotAWord = isNotAWord;
         mIsBlacklistEntry = isBlacklisted;
         mHasShortcuts = hasShortcuts;
diff --git a/java/src/com/android/inputmethod/latin/personalization/PersonalizationDataChunk.java b/java/src/com/android/inputmethod/latin/personalization/PersonalizationDataChunk.java
new file mode 100644
index 0000000..9d72de8
--- /dev/null
+++ b/java/src/com/android/inputmethod/latin/personalization/PersonalizationDataChunk.java
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.inputmethod.latin.personalization;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.Locale;
+
+public class PersonalizationDataChunk {
+    public final boolean mInputByUser;
+    public final List<String> mTokens;
+    public final int mTimestampInSeconds;
+    public final String mPackageName;
+    public final Locale mlocale = null;
+
+    public PersonalizationDataChunk(boolean inputByUser, final List<String> tokens,
+            final int timestampInSeconds, final String packageName) {
+        mInputByUser = inputByUser;
+        mTokens = Collections.unmodifiableList(tokens);
+        mTimestampInSeconds = timestampInSeconds;
+        mPackageName = packageName;
+    }
+}
diff --git a/java/src/com/android/inputmethod/latin/personalization/PersonalizationDictionarySessionRegistrar.java b/java/src/com/android/inputmethod/latin/personalization/PersonalizationDictionarySessionRegistrar.java
index 805f422..4506440 100644
--- a/java/src/com/android/inputmethod/latin/personalization/PersonalizationDictionarySessionRegistrar.java
+++ b/java/src/com/android/inputmethod/latin/personalization/PersonalizationDictionarySessionRegistrar.java
@@ -20,17 +20,14 @@
 import android.content.res.Configuration;
 
 import com.android.inputmethod.latin.DictionaryFacilitator;
-import com.android.inputmethod.latin.utils.DistracterFilter;
 
 public class PersonalizationDictionarySessionRegistrar {
     public static void init(final Context context,
-            final DictionaryFacilitator dictionaryFacilitator,
-            final DistracterFilter distracterFilter) {
+            final DictionaryFacilitator dictionaryFacilitator) {
     }
 
     public static void onConfigurationChanged(final Context context, final Configuration conf,
-            final DictionaryFacilitator dictionaryFacilitator,
-            final DistracterFilter distracterFilter) {
+            final DictionaryFacilitator dictionaryFacilitator) {
     }
 
     public static void onUpdateData(final Context context, final String type) {
diff --git a/java/src/com/android/inputmethod/latin/utils/CapsModeUtils.java b/java/src/com/android/inputmethod/latin/utils/CapsModeUtils.java
index 702688f..9362193 100644
--- a/java/src/com/android/inputmethod/latin/utils/CapsModeUtils.java
+++ b/java/src/com/android/inputmethod/latin/utils/CapsModeUtils.java
@@ -62,6 +62,22 @@
     }
 
     /**
+     * Helper method to find out if a code point is starting punctuation.
+     *
+     * This include the Unicode START_PUNCTUATION category, but also some other symbols that are
+     * starting, like the inverted question mark or the double quote.
+     *
+     * @param codePoint the code point
+     * @return true if it's starting punctuation, false otherwise.
+     */
+    private static boolean isStartPunctuation(final int codePoint) {
+        return (codePoint == Constants.CODE_DOUBLE_QUOTE || codePoint == Constants.CODE_SINGLE_QUOTE
+                || codePoint == Constants.CODE_INVERTED_QUESTION_MARK
+                || codePoint == Constants.CODE_INVERTED_EXCLAMATION_MARK
+                || Character.getType(codePoint) == Character.START_PUNCTUATION);
+    }
+
+    /**
      * Determine what caps mode should be in effect at the current offset in
      * the text. Only the mode bits set in <var>reqModes</var> will be
      * checked. Note that the caps mode flags here are explicitly defined
@@ -115,8 +131,7 @@
         } else {
             for (i = cs.length(); i > 0; i--) {
                 final char c = cs.charAt(i - 1);
-                if (c != Constants.CODE_DOUBLE_QUOTE && c != Constants.CODE_SINGLE_QUOTE
-                        && Character.getType(c) != Character.START_PUNCTUATION) {
+                if (!isStartPunctuation(c)) {
                     break;
                 }
             }
@@ -210,11 +225,14 @@
 
         // We found out that we have a period. We need to determine if this is a full stop or
         // otherwise sentence-ending period, or an abbreviation like "e.g.". An abbreviation
-        // looks like (\w\.){2,}
+        // looks like (\w\.){2,}. Moreover, in German, you put periods after digits for dates
+        // and some other things, and in German specifically we need to not go into autocaps after
+        // a whitespace-digits-period sequence.
         // To find out, we will have a simple state machine with the following states :
-        // START, WORD, PERIOD, ABBREVIATION
+        // START, WORD, PERIOD, ABBREVIATION, NUMBER
         // On START : (just before the first period)
         //           letter => WORD
+        //           digit => NUMBER if German; end with caps otherwise
         //           whitespace => end with no caps (it was a stand-alone period)
         //           otherwise => end with caps (several periods/symbols in a row)
         // On WORD : (within the word just before the first period)
@@ -228,6 +246,11 @@
         //           letter => LETTER
         //           period => PERIOD
         //           otherwise => end with no caps (it was an abbreviation)
+        // On NUMBER : (period immediately preceded by one or more digits)
+        //           digit => NUMBER
+        //           letter => LETTER (promote to word)
+        //           otherwise => end with no caps (it was a whitespace-digits-period sequence,
+        //            or a punctuation-digits-period sequence like "11.11.")
         // "Not an abbreviation" in the above chart essentially covers cases like "...yes.". This
         // should capitalize.
 
@@ -235,6 +258,7 @@
         final int WORD = 1;
         final int PERIOD = 2;
         final int LETTER = 3;
+        final int NUMBER = 4;
         final int caps = (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS
                 | TextUtils.CAP_MODE_SENTENCES) & reqModes;
         final int noCaps = (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS) & reqModes;
@@ -247,6 +271,8 @@
                     state = WORD;
                 } else if (Character.isWhitespace(c)) {
                     return noCaps;
+                } else if (Character.isDigit(c) && spacingAndPunctuations.mUsesGermanRules) {
+                    state = NUMBER;
                 } else {
                     return caps;
                 }
@@ -275,6 +301,15 @@
                 } else {
                     return noCaps;
                 }
+                break;
+            case NUMBER:
+                if (Character.isLetter(c)) {
+                    state = WORD;
+                } else if (Character.isDigit(c)) {
+                    state = NUMBER;
+                } else {
+                    return noCaps;
+                }
             }
         }
         // Here we arrived at the start of the line. This should behave exactly like whitespace.
diff --git a/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java b/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java
index 19a01ee..6e0fab3 100644
--- a/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java
+++ b/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java
@@ -16,132 +16,14 @@
 
 package com.android.inputmethod.latin.utils;
 
-import java.util.ArrayList;
-import java.util.HashMap;
 import java.util.List;
 import java.util.Locale;
-import java.util.Map;
-import java.util.concurrent.TimeUnit;
 
-import android.content.Context;
-import android.content.res.Resources;
-import android.text.InputType;
-import android.util.Log;
-import android.view.inputmethod.EditorInfo;
 import android.view.inputmethod.InputMethodSubtype;
 
-import com.android.inputmethod.keyboard.Keyboard;
-import com.android.inputmethod.keyboard.KeyboardId;
-import com.android.inputmethod.keyboard.KeyboardLayoutSet;
-import com.android.inputmethod.latin.Constants;
-import com.android.inputmethod.latin.DictionaryFacilitator;
 import com.android.inputmethod.latin.PrevWordsInfo;
-import com.android.inputmethod.latin.Suggest;
-import com.android.inputmethod.latin.Suggest.OnGetSuggestedWordsCallback;
-import com.android.inputmethod.latin.SuggestedWords;
-import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
-import com.android.inputmethod.latin.WordComposer;
 
-/**
- * This class is used to prevent distracters being added to personalization
- * or user history dictionaries
- */
-public class DistracterFilter {
-    private static final String TAG = DistracterFilter.class.getSimpleName();
-
-    private static final long TIMEOUT_TO_WAIT_LOADING_DICTIONARIES_IN_SECONDS = 120;
-
-    private final Context mContext;
-    private final Map<Locale, InputMethodSubtype> mLocaleToSubtypeMap;
-    private final Map<Locale, Keyboard> mLocaleToKeyboardMap;
-    private final DictionaryFacilitator mDictionaryFacilitator;
-    private final Suggest mSuggest;
-    private Keyboard mKeyboard;
-
-    // If the score of the top suggestion exceeds this value, the tested word (e.g.,
-    // an OOV, a misspelling, or an in-vocabulary word) would be considered as a distracter to
-    // words in dictionary. The greater the threshold is, the less likely the tested word would
-    // become a distracter, which means the tested word will be more likely to be added to
-    // the dictionary.
-    private static final float DISTRACTER_WORD_SCORE_THRESHOLD = 2.0f;
-
-    // Create empty distracter filter.
-    public DistracterFilter() {
-        this(null, new ArrayList<InputMethodSubtype>());
-    }
-
-    /**
-     * Create a DistracterFilter instance.
-     *
-     * @param context the context.
-     * @param enabledSubtypes the enabled subtypes.
-     */
-    public DistracterFilter(final Context context, final List<InputMethodSubtype> enabledSubtypes) {
-        mContext = context;
-        mLocaleToSubtypeMap = new HashMap<>();
-        if (enabledSubtypes != null) {
-            for (final InputMethodSubtype subtype : enabledSubtypes) {
-                final Locale locale = SubtypeLocaleUtils.getSubtypeLocale(subtype);
-                if (mLocaleToSubtypeMap.containsKey(locale)) {
-                    // Multiple subtypes are enabled for one locale.
-                    // TODO: Investigate what we should do for this case.
-                    continue;
-                }
-                mLocaleToSubtypeMap.put(locale, subtype);
-            }
-        }
-        mLocaleToKeyboardMap = new HashMap<>();
-        mDictionaryFacilitator = new DictionaryFacilitator();
-        mSuggest = new Suggest(mDictionaryFacilitator);
-        mKeyboard = null;
-    }
-
-    private static boolean suggestionExceedsDistracterThreshold(
-            final SuggestedWordInfo suggestion, final String consideredWord,
-            final float distracterThreshold) {
-        if (null != suggestion) {
-            final int suggestionScore = suggestion.mScore;
-            final float normalizedScore = BinaryDictionaryUtils.calcNormalizedScore(
-                    consideredWord, suggestion.mWord, suggestionScore);
-            if (normalizedScore > distracterThreshold) {
-                return true;
-            }
-        }
-        return false;
-    }
-
-    private void loadKeyboardForLocale(final Locale newLocale) {
-        final Keyboard cachedKeyboard = mLocaleToKeyboardMap.get(newLocale);
-        if (cachedKeyboard != null) {
-            mKeyboard = cachedKeyboard;
-            return;
-        }
-        final InputMethodSubtype subtype = mLocaleToSubtypeMap.get(newLocale);
-        if (subtype == null) {
-            return;
-        }
-        final EditorInfo editorInfo = new EditorInfo();
-        editorInfo.inputType = InputType.TYPE_CLASS_TEXT;
-        final KeyboardLayoutSet.Builder builder = new KeyboardLayoutSet.Builder(
-                mContext, editorInfo);
-        final Resources res = mContext.getResources();
-        final int keyboardWidth = ResourceUtils.getDefaultKeyboardWidth(res);
-        final int keyboardHeight = ResourceUtils.getDefaultKeyboardHeight(res);
-        builder.setKeyboardGeometry(keyboardWidth, keyboardHeight);
-        builder.setSubtype(subtype);
-        builder.setIsSpellChecker(false /* isSpellChecker */);
-        final KeyboardLayoutSet layoutSet = builder.build();
-        mKeyboard = layoutSet.getKeyboard(KeyboardId.ELEMENT_ALPHABET);
-    }
-
-    private void loadDictionariesForLocale(final Locale newlocale) throws InterruptedException {
-        mDictionaryFacilitator.resetDictionaries(mContext, newlocale,
-                false /* useContactsDict */, false /* usePersonalizedDicts */,
-                false /* forceReloadMainDictionary */, null /* listener */);
-        mDictionaryFacilitator.waitForLoadingMainDictionary(
-                TIMEOUT_TO_WAIT_LOADING_DICTIONARIES_IN_SECONDS, TimeUnit.SECONDS);
-    }
-
+public interface DistracterFilter {
     /**
      * Determine whether a word is a distracter to words in dictionaries.
      *
@@ -152,56 +34,25 @@
      * @return true if testedWord is a distracter, otherwise false.
      */
     public boolean isDistracterToWordsInDictionaries(final PrevWordsInfo prevWordsInfo,
-            final String testedWord, final Locale locale) {
-        if (locale == null) {
+            final String testedWord, final Locale locale);
+
+    public void updateEnabledSubtypes(final List<InputMethodSubtype> enabledSubtypes);
+
+    public void close();
+
+    public static final class EmptyDistracterFilter implements DistracterFilter {
+        @Override
+        public boolean isDistracterToWordsInDictionaries(PrevWordsInfo prevWordsInfo,
+                String testedWord, Locale locale) {
             return false;
         }
-        if (!locale.equals(mDictionaryFacilitator.getLocale())) {
-            if (!mLocaleToSubtypeMap.containsKey(locale)) {
-                Log.e(TAG, "Locale " + locale + " is not enabled.");
-                // TODO: Investigate what we should do for disabled locales.
-                return false;
-            }
-            loadKeyboardForLocale(locale);
-            // Reset dictionaries for the locale.
-            try {
-                loadDictionariesForLocale(locale);
-            } catch (final InterruptedException e) {
-                Log.e(TAG, "Interrupted while waiting for loading dicts in DistracterFilter", e);
-                return false;
-            }
-        }
-        if (mKeyboard == null) {
-            return false;
-        }
-        final WordComposer composer = new WordComposer();
-        final int[] codePoints = StringUtils.toCodePointArray(testedWord);
-        final int[] coordinates = mKeyboard.getCoordinates(codePoints);
-        composer.setComposingWord(codePoints, coordinates, prevWordsInfo);
 
-        final int trailingSingleQuotesCount = StringUtils.getTrailingSingleQuotesCount(testedWord);
-        final String consideredWord = trailingSingleQuotesCount > 0 ?
-                testedWord.substring(0, testedWord.length() - trailingSingleQuotesCount) :
-                testedWord;
-        final AsyncResultHolder<Boolean> holder = new AsyncResultHolder<Boolean>();
-        final OnGetSuggestedWordsCallback callback = new OnGetSuggestedWordsCallback() {
-            @Override
-            public void onGetSuggestedWords(final SuggestedWords suggestedWords) {
-                if (suggestedWords != null && suggestedWords.size() > 1) {
-                    // The suggestedWordInfo at 0 is the typed word. The 1st suggestion from
-                    // the decoder is at index 1.
-                    final SuggestedWordInfo firstSuggestion = suggestedWords.getInfo(1);
-                    final boolean hasStrongDistractor = suggestionExceedsDistracterThreshold(
-                            firstSuggestion, consideredWord, DISTRACTER_WORD_SCORE_THRESHOLD);
-                    holder.set(hasStrongDistractor);
-                }
-            }
-        };
-        mSuggest.getSuggestedWords(composer, prevWordsInfo, mKeyboard.getProximityInfo(),
-                true /* blockOffensiveWords */, true /* isCorrectionEnbaled */,
-                null /* additionalFeaturesOptions */, 0 /* sessionId */,
-                SuggestedWords.NOT_A_SEQUENCE_NUMBER, callback);
+        @Override
+        public void close() {
+        }
 
-        return holder.get(false /* defaultValue */, Constants.GET_SUGGESTED_WORDS_TIMEOUT);
+        @Override
+        public void updateEnabledSubtypes(List<InputMethodSubtype> enabledSubtypes) {
+        }
     }
 }
diff --git a/java/src/com/android/inputmethod/latin/utils/DistracterFilterUsingSuggestion.java b/java/src/com/android/inputmethod/latin/utils/DistracterFilterUsingSuggestion.java
new file mode 100644
index 0000000..92033b7
--- /dev/null
+++ b/java/src/com/android/inputmethod/latin/utils/DistracterFilterUsingSuggestion.java
@@ -0,0 +1,227 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.inputmethod.latin.utils;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+
+import android.content.Context;
+import android.content.res.Resources;
+import android.text.InputType;
+import android.util.Log;
+import android.view.inputmethod.EditorInfo;
+import android.view.inputmethod.InputMethodSubtype;
+
+import com.android.inputmethod.keyboard.Keyboard;
+import com.android.inputmethod.keyboard.KeyboardId;
+import com.android.inputmethod.keyboard.KeyboardLayoutSet;
+import com.android.inputmethod.latin.Constants;
+import com.android.inputmethod.latin.DictionaryFacilitator;
+import com.android.inputmethod.latin.PrevWordsInfo;
+import com.android.inputmethod.latin.Suggest;
+import com.android.inputmethod.latin.Suggest.OnGetSuggestedWordsCallback;
+import com.android.inputmethod.latin.SuggestedWords;
+import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
+import com.android.inputmethod.latin.WordComposer;
+
+/**
+ * This class is used to prevent distracters being added to personalization
+ * or user history dictionaries
+ */
+public class DistracterFilterUsingSuggestion implements DistracterFilter {
+    private static final String TAG = DistracterFilterUsingSuggestion.class.getSimpleName();
+
+    private static final long TIMEOUT_TO_WAIT_LOADING_DICTIONARIES_IN_SECONDS = 120;
+
+    private final Context mContext;
+    private final Map<Locale, InputMethodSubtype> mLocaleToSubtypeMap;
+    private final Map<Locale, Keyboard> mLocaleToKeyboardMap;
+    private final DictionaryFacilitator mDictionaryFacilitator;
+    private final Suggest mSuggest;
+    private Keyboard mKeyboard;
+    private final Object mLock = new Object();
+
+    // If the score of the top suggestion exceeds this value, the tested word (e.g.,
+    // an OOV, a misspelling, or an in-vocabulary word) would be considered as a distracter to
+    // words in dictionary. The greater the threshold is, the less likely the tested word would
+    // become a distracter, which means the tested word will be more likely to be added to
+    // the dictionary.
+    private static final float DISTRACTER_WORD_SCORE_THRESHOLD = 2.0f;
+
+    /**
+     * Create a DistracterFilter instance.
+     *
+     * @param context the context.
+     */
+    public DistracterFilterUsingSuggestion(final Context context) {
+        mContext = context;
+        mLocaleToSubtypeMap = new HashMap<>();
+        mLocaleToKeyboardMap = new HashMap<>();
+        mDictionaryFacilitator = new DictionaryFacilitator();
+        mSuggest = new Suggest(mDictionaryFacilitator);
+        mKeyboard = null;
+    }
+
+    @Override
+    public void close() {
+        mDictionaryFacilitator.closeDictionaries();
+    }
+
+    @Override
+    public void updateEnabledSubtypes(final List<InputMethodSubtype> enabledSubtypes) {
+        final Map<Locale, InputMethodSubtype> newLocaleToSubtypeMap = new HashMap<>();
+        if (enabledSubtypes != null) {
+            for (final InputMethodSubtype subtype : enabledSubtypes) {
+                final Locale locale = SubtypeLocaleUtils.getSubtypeLocale(subtype);
+                if (newLocaleToSubtypeMap.containsKey(locale)) {
+                    // Multiple subtypes are enabled for one locale.
+                    // TODO: Investigate what we should do for this case.
+                    continue;
+                }
+                newLocaleToSubtypeMap.put(locale, subtype);
+            }
+        }
+        if (mLocaleToSubtypeMap.equals(newLocaleToSubtypeMap)) {
+            // Enabled subtypes have not been changed.
+            return;
+        }
+        synchronized (mLock) {
+            mLocaleToSubtypeMap.clear();
+            mLocaleToSubtypeMap.putAll(newLocaleToSubtypeMap);
+            mLocaleToKeyboardMap.clear();
+        }
+    }
+
+    private static boolean suggestionExceedsDistracterThreshold(
+            final SuggestedWordInfo suggestion, final String consideredWord,
+            final float distracterThreshold) {
+        if (null != suggestion) {
+            final int suggestionScore = suggestion.mScore;
+            final float normalizedScore = BinaryDictionaryUtils.calcNormalizedScore(
+                    consideredWord, suggestion.mWord, suggestionScore);
+            if (normalizedScore > distracterThreshold) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    private void loadKeyboardForLocale(final Locale newLocale) {
+        final Keyboard cachedKeyboard = mLocaleToKeyboardMap.get(newLocale);
+        if (cachedKeyboard != null) {
+            mKeyboard = cachedKeyboard;
+            return;
+        }
+        final InputMethodSubtype subtype;
+        synchronized (mLock) {
+            subtype = mLocaleToSubtypeMap.get(newLocale);
+        }
+        if (subtype == null) {
+            return;
+        }
+        final EditorInfo editorInfo = new EditorInfo();
+        editorInfo.inputType = InputType.TYPE_CLASS_TEXT;
+        final KeyboardLayoutSet.Builder builder = new KeyboardLayoutSet.Builder(
+                mContext, editorInfo);
+        final Resources res = mContext.getResources();
+        final int keyboardWidth = ResourceUtils.getDefaultKeyboardWidth(res);
+        final int keyboardHeight = ResourceUtils.getDefaultKeyboardHeight(res);
+        builder.setKeyboardGeometry(keyboardWidth, keyboardHeight);
+        builder.setSubtype(subtype);
+        builder.setIsSpellChecker(false /* isSpellChecker */);
+        final KeyboardLayoutSet layoutSet = builder.build();
+        mKeyboard = layoutSet.getKeyboard(KeyboardId.ELEMENT_ALPHABET);
+    }
+
+    private void loadDictionariesForLocale(final Locale newlocale) throws InterruptedException {
+        mDictionaryFacilitator.resetDictionaries(mContext, newlocale,
+                false /* useContactsDict */, false /* usePersonalizedDicts */,
+                false /* forceReloadMainDictionary */, null /* listener */);
+        mDictionaryFacilitator.waitForLoadingMainDictionary(
+                TIMEOUT_TO_WAIT_LOADING_DICTIONARIES_IN_SECONDS, TimeUnit.SECONDS);
+    }
+
+    /**
+     * Determine whether a word is a distracter to words in dictionaries.
+     *
+     * @param prevWordsInfo the information of previous words.
+     * @param testedWord the word that will be tested to see whether it is a distracter to words
+     *                   in dictionaries.
+     * @param locale the locale of word.
+     * @return true if testedWord is a distracter, otherwise false.
+     */
+    @Override
+    public boolean isDistracterToWordsInDictionaries(final PrevWordsInfo prevWordsInfo,
+            final String testedWord, final Locale locale) {
+        if (locale == null) {
+            return false;
+        }
+        if (!locale.equals(mDictionaryFacilitator.getLocale())) {
+            synchronized (mLock) {
+                if (!mLocaleToSubtypeMap.containsKey(locale)) {
+                    Log.e(TAG, "Locale " + locale + " is not enabled.");
+                    // TODO: Investigate what we should do for disabled locales.
+                    return false;
+                }
+                loadKeyboardForLocale(locale);
+                // Reset dictionaries for the locale.
+                try {
+                    loadDictionariesForLocale(locale);
+                } catch (final InterruptedException e) {
+                    Log.e(TAG, "Interrupted while waiting for loading dicts in DistracterFilter",
+                            e);
+                    return false;
+                }
+            }
+        }
+        if (mKeyboard == null) {
+            return false;
+        }
+        final WordComposer composer = new WordComposer();
+        final int[] codePoints = StringUtils.toCodePointArray(testedWord);
+        final int[] coordinates = mKeyboard.getCoordinates(codePoints);
+        composer.setComposingWord(codePoints, coordinates, prevWordsInfo);
+
+        final int trailingSingleQuotesCount = StringUtils.getTrailingSingleQuotesCount(testedWord);
+        final String consideredWord = trailingSingleQuotesCount > 0 ?
+                testedWord.substring(0, testedWord.length() - trailingSingleQuotesCount) :
+                testedWord;
+        final AsyncResultHolder<Boolean> holder = new AsyncResultHolder<Boolean>();
+        final OnGetSuggestedWordsCallback callback = new OnGetSuggestedWordsCallback() {
+            @Override
+            public void onGetSuggestedWords(final SuggestedWords suggestedWords) {
+                if (suggestedWords != null && suggestedWords.size() > 1) {
+                    // The suggestedWordInfo at 0 is the typed word. The 1st suggestion from
+                    // the decoder is at index 1.
+                    final SuggestedWordInfo firstSuggestion = suggestedWords.getInfo(1);
+                    final boolean hasStrongDistractor = suggestionExceedsDistracterThreshold(
+                            firstSuggestion, consideredWord, DISTRACTER_WORD_SCORE_THRESHOLD);
+                    holder.set(hasStrongDistractor);
+                }
+            }
+        };
+        mSuggest.getSuggestedWords(composer, prevWordsInfo, mKeyboard.getProximityInfo(),
+                true /* blockOffensiveWords */, true /* isCorrectionEnbaled */,
+                null /* additionalFeaturesOptions */, 0 /* sessionId */,
+                SuggestedWords.NOT_A_SEQUENCE_NUMBER, callback);
+
+        return holder.get(false /* defaultValue */, Constants.GET_SUGGESTED_WORDS_TIMEOUT);
+    }
+}
diff --git a/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java b/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java
index 36543cc..9c759ed 100644
--- a/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java
+++ b/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java
@@ -24,6 +24,7 @@
 import com.android.inputmethod.latin.settings.SpacingAndPunctuations;
 
 import java.util.ArrayList;
+import java.util.List;
 import java.util.Locale;
 
 // Note: this class is used as a parameter type of a native method. You should be careful when you
@@ -79,14 +80,14 @@
 
     // Process a list of words and return a list of {@link LanguageModelParam} objects.
     public static ArrayList<LanguageModelParam> createLanguageModelParamsFrom(
-            final ArrayList<String> tokens, final int timestamp,
+            final List<String> tokens, final int timestamp,
             final DictionaryFacilitator dictionaryFacilitator,
             final SpacingAndPunctuations spacingAndPunctuations,
             final DistracterFilter distracterFilter) {
         final ArrayList<LanguageModelParam> languageModelParams =
                 CollectionUtils.newArrayList();
         final int N = tokens.size();
-        PrevWordsInfo prevWordsInfo = new PrevWordsInfo(null);
+        PrevWordsInfo prevWordsInfo = PrevWordsInfo.EMPTY_PREV_WORDS_INFO;
         for (int i = 0; i < N; ++i) {
             final String tempWord = tokens.get(i);
             if (StringUtils.isEmptyStringOrWhiteSpaces(tempWord)) {
@@ -103,7 +104,7 @@
                             + tempWord + "\"");
                 }
                 // Sentence terminator found. Split.
-                prevWordsInfo = new PrevWordsInfo(null);
+                prevWordsInfo = PrevWordsInfo.EMPTY_PREV_WORDS_INFO;
                 continue;
             }
             if (DEBUG_TOKEN) {
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index 5ad2114..e41fe1d 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -178,10 +178,10 @@
         jlong proximityInfo, jlong dicTraverseSession, jintArray xCoordinatesArray,
         jintArray yCoordinatesArray, jintArray timesArray, jintArray pointerIdsArray,
         jintArray inputCodePointsArray, jint inputSize, jintArray suggestOptions,
-        jintArray prevWordCodePointsForBigrams, jintArray outSuggestionCount,
-        jintArray outCodePointsArray, jintArray outScoresArray, jintArray outSpaceIndicesArray,
-        jintArray outTypesArray, jintArray outAutoCommitFirstWordConfidenceArray,
-        jfloatArray inOutLanguageWeight) {
+        jintArray prevWordCodePointsForBigrams, jboolean isBeginningOfSentence,
+        jintArray outSuggestionCount, jintArray outCodePointsArray, jintArray outScoresArray,
+        jintArray outSpaceIndicesArray, jintArray outTypesArray,
+        jintArray outAutoCommitFirstWordConfidenceArray, jfloatArray inOutLanguageWeight) {
     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
     // Assign 0 to outSuggestionCount here in case of returning earlier in this method.
     JniDataUtils::putIntToArray(env, outSuggestionCount, 0 /* index */, 0);
@@ -274,7 +274,7 @@
 }
 
 static jint latinime_BinaryDictionary_getBigramProbability(JNIEnv *env, jclass clazz,
-        jlong dict, jintArray word0, jintArray word1) {
+        jlong dict, jintArray word0, jboolean isBeginningOfSentence, jintArray word1) {
     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
     if (!dictionary) return JNI_FALSE;
     const jsize word0Length = env->GetArrayLength(word0);
@@ -283,7 +283,7 @@
     int word1CodePoints[word1Length];
     env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints);
     env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
-    const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, false /* isStartOfSentence */);
+    const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, isBeginningOfSentence);
     return dictionary->getBigramProbability(&prevWordsInfo, word1CodePoints, word1Length);
 }
 
@@ -326,7 +326,8 @@
 
 static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz, jlong dict,
         jintArray word, jint probability, jintArray shortcutTarget, jint shortcutProbability,
-        jboolean isNotAWord, jboolean isBlacklisted, jint timestamp) {
+        jboolean isBeginningOfSentence, jboolean isNotAWord, jboolean isBlacklisted,
+        jint timestamp) {
     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
     if (!dictionary) {
         return;
@@ -341,13 +342,14 @@
         shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
     }
     // Use 1 for count to indicate the word has inputted.
-    const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord,
+    const UnigramProperty unigramProperty(isBeginningOfSentence, isNotAWord,
             isBlacklisted, probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
     dictionary->addUnigramEntry(codePoints, codePointCount, &unigramProperty);
 }
 
 static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz, jlong dict,
-        jintArray word0, jintArray word1, jint probability, jint timestamp) {
+        jintArray word0, jboolean isBeginningOfSentence, jintArray word1, jint probability,
+        jint timestamp) {
     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
     if (!dictionary) {
         return;
@@ -363,13 +365,12 @@
     // Use 1 for count to indicate the bigram has inputted.
     const BigramProperty bigramProperty(&bigramTargetCodePoints, probability,
             timestamp, 0 /* level */, 1 /* count */);
-    const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length,
-            false /* isBeginningOfSentence */);
+    const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, isBeginningOfSentence);
     dictionary->addNgramEntry(&prevWordsInfo, &bigramProperty);
 }
 
 static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass clazz, jlong dict,
-        jintArray word0, jintArray word1) {
+        jintArray word0, jboolean isBeginningOfSentence, jintArray word1) {
     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
     if (!dictionary) {
         return;
@@ -380,8 +381,7 @@
     jsize word1Length = env->GetArrayLength(word1);
     int word1CodePoints[word1Length];
     env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
-    const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length,
-            false /* isBeginningOfSentence */);
+    const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, isBeginningOfSentence);
     dictionary->removeNgramEntry(&prevWordsInfo, word1CodePoints, word1Length);
 }
 
@@ -625,7 +625,7 @@
     },
     {
         const_cast<char *>("getSuggestionsNative"),
-        const_cast<char *>("(JJJ[I[I[I[I[II[I[I[I[I[I[I[I[I[F)V"),
+        const_cast<char *>("(JJJ[I[I[I[I[II[I[IZ[I[I[I[I[I[I[F)V"),
         reinterpret_cast<void *>(latinime_BinaryDictionary_getSuggestions)
     },
     {
@@ -635,7 +635,7 @@
     },
     {
         const_cast<char *>("getBigramProbabilityNative"),
-        const_cast<char *>("(J[I[I)I"),
+        const_cast<char *>("(J[IZ[I)I"),
         reinterpret_cast<void *>(latinime_BinaryDictionary_getBigramProbability)
     },
     {
@@ -651,17 +651,17 @@
     },
     {
         const_cast<char *>("addUnigramWordNative"),
-        const_cast<char *>("(J[II[IIZZI)V"),
+        const_cast<char *>("(J[II[IIZZZI)V"),
         reinterpret_cast<void *>(latinime_BinaryDictionary_addUnigramWord)
     },
     {
         const_cast<char *>("addBigramWordsNative"),
-        const_cast<char *>("(J[I[III)V"),
+        const_cast<char *>("(J[IZ[III)V"),
         reinterpret_cast<void *>(latinime_BinaryDictionary_addBigramWords)
     },
     {
         const_cast<char *>("removeBigramWordsNative"),
-        const_cast<char *>("(J[I[I)V"),
+        const_cast<char *>("(J[IZ[I)V"),
         reinterpret_cast<void *>(latinime_BinaryDictionary_removeBigramWords)
     },
     {
diff --git a/native/jni/src/suggest/core/session/prev_words_info.h b/native/jni/src/suggest/core/session/prev_words_info.h
index e4de1f4..a58000a 100644
--- a/native/jni/src/suggest/core/session/prev_words_info.h
+++ b/native/jni/src/suggest/core/session/prev_words_info.h
@@ -20,11 +20,11 @@
 #include "defines.h"
 #include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
 #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+#include "utils/char_utils.h"
 
 namespace latinime {
 
 // TODO: Support n-gram.
-// TODO: Support beginning of sentence.
 // This class does not take ownership of any code point buffers.
 class PrevWordsInfo {
  public:
@@ -52,8 +52,7 @@
 
     void getPrevWordsTerminalPtNodePos(
             const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
-            int *const outPrevWordsTerminalPtNodePos,
-            const bool tryLowerCaseSearch) const {
+            int *const outPrevWordsTerminalPtNodePos, const bool tryLowerCaseSearch) const {
         for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
             outPrevWordsTerminalPtNodePos[i] = getTerminalPtNodePosOfWord(dictStructurePolicy,
                     mPrevWordCodePoints[i], mPrevWordCodePointCount[i],
@@ -63,17 +62,11 @@
 
     BinaryDictionaryBigramsIterator getBigramsIteratorForPrediction(
             const DictionaryStructureWithBufferPolicy *const dictStructurePolicy) const {
-        int pos = getBigramListPositionForWord(dictStructurePolicy, mPrevWordCodePoints[0],
-                mPrevWordCodePointCount[0], false /* forceLowerCaseSearch */);
-        // getBigramListPositionForWord returns NOT_A_DICT_POS if this word isn't in the
-        // dictionary or has no bigrams
-        if (NOT_A_DICT_POS == pos) {
-            // If no bigrams for this exact word, search again in lower case.
-            pos = getBigramListPositionForWord(dictStructurePolicy, mPrevWordCodePoints[0],
-                    mPrevWordCodePointCount[0], true /* forceLowerCaseSearch */);
-        }
-        return BinaryDictionaryBigramsIterator(
-                dictStructurePolicy->getBigramsStructurePolicy(), pos);
+        const int bigramListPos = getBigramListPositionForWordWithTryingLowerCaseSearch(
+                dictStructurePolicy, mPrevWordCodePoints[0], mPrevWordCodePointCount[0],
+                mIsBeginningOfSentence[0]);
+        return BinaryDictionaryBigramsIterator(dictStructurePolicy->getBigramsStructurePolicy(),
+                bigramListPos);
     }
 
     // n is 1-indexed.
@@ -102,8 +95,18 @@
         if (!dictStructurePolicy || !wordCodePoints) {
             return NOT_A_DICT_POS;
         }
+        int codePoints[MAX_WORD_LENGTH];
+        int codePointCount = wordCodePointCount;
+        memmove(codePoints, wordCodePoints, sizeof(int) * codePointCount);
+        if (isBeginningOfSentence) {
+            codePointCount = CharUtils::attachBeginningOfSentenceMarker(codePoints,
+                    codePointCount, MAX_WORD_LENGTH);
+            if (codePointCount <= 0) {
+                return NOT_A_DICT_POS;
+            }
+        }
         const int wordPtNodePos = dictStructurePolicy->getTerminalPtNodePositionOfWord(
-                wordCodePoints, wordCodePointCount, false /* forceLowerCaseSearch */);
+                codePoints, codePointCount, false /* forceLowerCaseSearch */);
         if (wordPtNodePos != NOT_A_DICT_POS || !tryLowerCaseSearch) {
             // Return the position when when the word was found or doesn't try lower case
             // search.
@@ -112,7 +115,33 @@
         // Check bigrams for lower-cased previous word if original was not found. Useful for
         // auto-capitalized words like "The [current_word]".
         return dictStructurePolicy->getTerminalPtNodePositionOfWord(
-                wordCodePoints, wordCodePointCount, true /* forceLowerCaseSearch */);
+                codePoints, codePointCount, true /* forceLowerCaseSearch */);
+    }
+
+    static int getBigramListPositionForWordWithTryingLowerCaseSearch(
+            const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
+            const int *const wordCodePoints, const int wordCodePointCount,
+            const bool isBeginningOfSentence) {
+        int codePoints[MAX_WORD_LENGTH];
+        int codePointCount = wordCodePointCount;
+        memmove(codePoints, wordCodePoints, sizeof(int) * codePointCount);
+        if (isBeginningOfSentence) {
+            codePointCount = CharUtils::attachBeginningOfSentenceMarker(codePoints,
+                    codePointCount, MAX_WORD_LENGTH);
+            if (codePointCount <= 0) {
+                return NOT_A_DICT_POS;
+            }
+        }
+        int pos = getBigramListPositionForWord(dictStructurePolicy, codePoints,
+                codePointCount, false /* forceLowerCaseSearch */);
+        // getBigramListPositionForWord returns NOT_A_DICT_POS if this word isn't in the
+        // dictionary or has no bigrams
+        if (NOT_A_DICT_POS == pos) {
+            // If no bigrams for this exact word, search again in lower case.
+            pos = getBigramListPositionForWord(dictStructurePolicy, codePoints,
+                    codePointCount, true /* forceLowerCaseSearch */);
+        }
+        return pos;
     }
 
     static int getBigramListPositionForWord(
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
index 281c5a8..75f4fef 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
@@ -139,6 +139,8 @@
         switch (mDictFormatVersion) {
             case FormatUtils::VERSION_2:
                 return FormatUtils::VERSION_2;
+            case FormatUtils::VERSION_401:
+                return FormatUtils::VERSION_401;
             case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
                 return FormatUtils::VERSION_4_ONLY_FOR_TESTING;
             case FormatUtils::VERSION_4:
@@ -247,7 +249,7 @@
     }
 
     bool supportsBeginningOfSentence() const {
-        return mDictFormatVersion == FormatUtils::VERSION_4_DEV;
+        return mDictFormatVersion > FormatUtils::VERSION_401;
     }
 
  private:
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
index a8f8f28..b13ad18 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
@@ -98,6 +98,7 @@
         case FormatUtils::VERSION_2:
             // Version 2 dictionary writing is not supported.
             return false;
+        case FormatUtils::VERSION_401:
         case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
         case FormatUtils::VERSION_4:
         case FormatUtils::VERSION_4_DEV:
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
index f93d289..93e330a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
@@ -57,13 +57,14 @@
                 const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap) {
     FormatUtils::FORMAT_VERSION dictFormatVersion = FormatUtils::getFormatVersion(formatVersion);
     switch (dictFormatVersion) {
-        case FormatUtils::VERSION_4: {
+        case FormatUtils::VERSION_401: {
             return newPolicyForOnMemoryV4Dict<backward::v401::Ver4DictConstants,
                     backward::v401::Ver4DictBuffers,
                     backward::v401::Ver4DictBuffers::Ver4DictBuffersPtr,
                     backward::v401::Ver4PatriciaTriePolicy>(
                             dictFormatVersion, locale, attributeMap);
         }
+        case FormatUtils::VERSION_4:
         case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
         case FormatUtils::VERSION_4_DEV: {
             return newPolicyForOnMemoryV4Dict<Ver4DictConstants, Ver4DictBuffers,
@@ -115,13 +116,14 @@
         case FormatUtils::VERSION_2:
             AKLOGE("Given path is a directory but the format is version 2. path: %s", path);
             break;
-        case FormatUtils::VERSION_4: {
+        case FormatUtils::VERSION_401: {
             return newPolicyForV4Dict<backward::v401::Ver4DictConstants,
                     backward::v401::Ver4DictBuffers,
                     backward::v401::Ver4DictBuffers::Ver4DictBuffersPtr,
                     backward::v401::Ver4PatriciaTriePolicy>(
                             headerFilePath, formatVersion, std::move(mmappedBuffer));
         }
+        case FormatUtils::VERSION_4:
         case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
         case FormatUtils::VERSION_4_DEV: {
             return newPolicyForV4Dict<Ver4DictConstants, Ver4DictBuffers,
@@ -177,6 +179,7 @@
         case FormatUtils::VERSION_2:
             return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(
                     new PatriciaTriePolicy(std::move(mmappedBuffer)));
+        case FormatUtils::VERSION_401:
         case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
         case FormatUtils::VERSION_4:
         case FormatUtils::VERSION_4_DEV:
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp
index 028e9ec..1f00fc6 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp
@@ -56,7 +56,7 @@
         }
     } else {
         mValueStack.back() += 1;
-        if (ptNodeParams->isTerminal()) {
+        if (ptNodeParams->isTerminal() && !ptNodeParams->representsNonWordInfo()) {
             mValidUnigramCount += 1;
         }
     }
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
index 33e60e2..b2e60a8 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
@@ -160,7 +160,7 @@
     }
 
     AK_FORCE_INLINE bool representsNonWordInfo() const {
-        return getCodePointCount() > 0 && CharUtils::isInUnicodeSpace(getCodePoints()[0])
+        return getCodePointCount() > 0 && !CharUtils::isInUnicodeSpace(getCodePoints()[0])
                 && isNotAWord();
     }
 
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index 4495def..0247870 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -61,7 +61,7 @@
             isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY;
         }
         readingHelper.readNextSiblingNode(ptNodeParams);
-        if (!ptNodeParams.representsNonWordInfo()) {
+        if (ptNodeParams.representsNonWordInfo()) {
             // Skip PtNodes that represent non-word information.
             continue;
         }
@@ -181,9 +181,19 @@
     DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
     readingHelper.initWithPtNodeArrayPos(getRootPosition());
     bool addedNewUnigram = false;
-    if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length,
+    int codePointsToAdd[MAX_WORD_LENGTH];
+    int codePointCountToAdd = length;
+    memmove(codePointsToAdd, word, sizeof(int) * length);
+    if (unigramProperty->representsBeginningOfSentence()) {
+        codePointCountToAdd = CharUtils::attachBeginningOfSentenceMarker(codePointsToAdd,
+                codePointCountToAdd, MAX_WORD_LENGTH);
+    }
+    if (codePointCountToAdd <= 0) {
+        return false;
+    }
+    if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointsToAdd, codePointCountToAdd,
             unigramProperty, &addedNewUnigram)) {
-        if (addedNewUnigram) {
+        if (addedNewUnigram && !unigramProperty->representsBeginningOfSentence()) {
             mUnigramCount++;
         }
         if (unigramProperty->getShortcuts().size() > 0) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
index 105363d..a04551a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
@@ -41,11 +41,12 @@
     TimeKeeper::setCurrentTime();
     const FormatUtils::FORMAT_VERSION formatVersion = FormatUtils::getFormatVersion(dictVersion);
     switch (formatVersion) {
-        case FormatUtils::VERSION_4:
+        case FormatUtils::VERSION_401:
             return createEmptyV4DictFile<backward::v401::Ver4DictConstants,
                     backward::v401::Ver4DictBuffers,
                     backward::v401::Ver4DictBuffers::Ver4DictBuffersPtr>(
                             filePath, localeAsCodePointVector, attributeMap, formatVersion);
+        case FormatUtils::VERSION_4:
         case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
         case FormatUtils::VERSION_4_DEV:
             return createEmptyV4DictFile<Ver4DictConstants, Ver4DictBuffers,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
index ba405b0..18f5580 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
@@ -29,6 +29,8 @@
     switch (formatVersion) {
         case VERSION_2:
             return VERSION_2;
+        case VERSION_401:
+            return VERSION_401;
         case VERSION_4_ONLY_FOR_TESTING:
             return VERSION_4_ONLY_FOR_TESTING;
         case VERSION_4:
@@ -60,6 +62,8 @@
             // same so we use them for both here.
             if (ByteArrayUtils::readUint16(dict, 4) == VERSION_2) {
                 return VERSION_2;
+            } else if (ByteArrayUtils::readUint16(dict, 4) == VERSION_401) {
+                return VERSION_401;
             } else if (ByteArrayUtils::readUint16(dict, 4) == VERSION_4_ONLY_FOR_TESTING) {
                 return VERSION_4_ONLY_FOR_TESTING;
             } else if (ByteArrayUtils::readUint16(dict, 4) == VERSION_4) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
index c47f30c..b05cb2f 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
@@ -32,8 +32,9 @@
         // These MUST have the same values as the relevant constants in FormatSpec.java.
         VERSION_2 = 2,
         VERSION_4_ONLY_FOR_TESTING = 399,
-        VERSION_4 = 401,
-        VERSION_4_DEV = 402,
+        VERSION_401 = 401,
+        VERSION_4 = 402,
+        VERSION_4_DEV = 403,
         UNKNOWN_VERSION = -1
     };
 
diff --git a/native/jni/src/utils/char_utils.h b/native/jni/src/utils/char_utils.h
index 634c45b..f28ed56 100644
--- a/native/jni/src/utils/char_utils.h
+++ b/native/jni/src/utils/char_utils.h
@@ -18,6 +18,7 @@
 #define LATINIME_CHAR_UTILS_H
 
 #include <cctype>
+#include <cstring>
 #include <vector>
 
 #include "defines.h"
@@ -93,6 +94,19 @@
     static unsigned short latin_tolower(const unsigned short c);
     static const std::vector<int> EMPTY_STRING;
 
+    // Returns updated code point count. Returns 0 when the code points cannot be marked as a
+    // Beginning-of-Sentence.
+    static AK_FORCE_INLINE int attachBeginningOfSentenceMarker(int *const codePoints,
+            const int codePointCount, const int maxCodePoint) {
+        if (codePointCount >= maxCodePoint) {
+            // the code points cannot be marked as a Beginning-of-Sentence.
+            return 0;
+        }
+        memmove(codePoints + 1, codePoints, sizeof(int) * codePointCount);
+        codePoints[0] = CODE_POINT_BEGINNING_OF_SENTENCE;
+        return codePointCount + 1;
+    }
+
  private:
     DISALLOW_IMPLICIT_CONSTRUCTORS(CharUtils);
 
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java
index 2c2fed3..3349a32 100644
--- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java
+++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java
@@ -63,12 +63,16 @@
         super.tearDown();
     }
 
+    private static boolean supportsBeginningOfSentence(final int formatVersion) {
+        return formatVersion > FormatSpec.VERSION401;
+    }
+
     private void addUnigramWord(final BinaryDictionary binaryDictionary, final String word,
             final int probability) {
         binaryDictionary.addUnigramEntry(word, probability, "" /* shortcutTarget */,
                 BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */,
-                false /* isNotAWord */, false /* isBlacklisted */,
-                mCurrentTime /* timestamp */);
+                false /* isBeginningOfSentence */, false /* isNotAWord */,
+                false /* isBlacklisted */, mCurrentTime /* timestamp */);
     }
 
     private void addBigramWords(final BinaryDictionary binaryDictionary, final String word0,
@@ -631,4 +635,57 @@
         binaryDictionary.close();
         dictFile.delete();
     }
+
+    public void testBeginningOfSentence() {
+        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
+            if (supportsBeginningOfSentence(formatVersion)) {
+                testBeginningOfSentence(formatVersion);
+            }
+        }
+    }
+
+    private void testBeginningOfSentence(final int formatVersion) {
+        setCurrentTimeForTestMode(mCurrentTime);
+        File dictFile = null;
+        try {
+            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
+        } catch (IOException e) {
+            fail("IOException while writing an initial dictionary : " + e);
+        }
+        final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
+                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
+                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+
+        binaryDictionary.addUnigramEntry("", DUMMY_PROBABILITY, "" /* shortcutTarget */,
+                BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */,
+                true /* isBeginningOfSentence */, true /* isNotAWord */, false /* isBlacklisted */,
+                mCurrentTime);
+        final PrevWordsInfo prevWordsInfoStartOfSentence = PrevWordsInfo.BEGINNING_OF_SENTENCE;
+        addUnigramWord(binaryDictionary, "aaa", DUMMY_PROBABILITY);
+        binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "aaa", DUMMY_PROBABILITY,
+                mCurrentTime);
+        assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "aaa"));
+        binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "aaa", DUMMY_PROBABILITY,
+                mCurrentTime);
+        addUnigramWord(binaryDictionary, "bbb", DUMMY_PROBABILITY);
+        binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "bbb", DUMMY_PROBABILITY,
+                mCurrentTime);
+        assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "aaa"));
+        assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "bbb"));
+
+        forcePassingLongTime(binaryDictionary);
+        assertFalse(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "aaa"));
+        assertFalse(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "bbb"));
+
+        addUnigramWord(binaryDictionary, "aaa", DUMMY_PROBABILITY);
+        binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "aaa", DUMMY_PROBABILITY,
+                mCurrentTime);
+        addUnigramWord(binaryDictionary, "bbb", DUMMY_PROBABILITY);
+        binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "bbb", DUMMY_PROBABILITY,
+                mCurrentTime);
+        assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "aaa"));
+        assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "bbb"));
+        binaryDictionary.close();
+        dictFile.delete();
+    }
 }
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
index 2b82e54..7938409 100644
--- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
+++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
@@ -47,7 +47,11 @@
             new int[] { FormatSpec.VERSION4, FormatSpec.VERSION4_DEV };
 
     private static boolean canCheckBigramProbability(final int formatVersion) {
-        return formatVersion >= FormatSpec.VERSION4_DEV;
+        return formatVersion > FormatSpec.VERSION401;
+    }
+
+    private static boolean supportsBeginningOfSentence(final int formatVersion) {
+        return formatVersion > FormatSpec.VERSION401;
     }
 
     private File createEmptyDictionaryAndGetFile(final String dictId,
@@ -171,7 +175,8 @@
         addUnigramWord(binaryDictionary, invalidLongWord, probability);
         // Too long short cut.
         binaryDictionary.addUnigramEntry("a", probability, invalidLongWord,
-                10 /* shortcutProbability */, false /* isNotAWord */, false /* isBlacklisted */,
+                10 /* shortcutProbability */, false /* isBeginningOfSentence */,
+                false /* isNotAWord */, false /* isBlacklisted */,
                 BinaryDictionary.NOT_A_VALID_TIMESTAMP);
         addUnigramWord(binaryDictionary, "abc", probability);
         final int updatedProbability = 200;
@@ -192,8 +197,8 @@
             final int probability) {
         binaryDictionary.addUnigramEntry(word, probability, "" /* shortcutTarget */,
                 BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */,
-                false /* isNotAWord */, false /* isBlacklisted */,
-                BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
+                false /* isBeginningOfSentence */, false /* isNotAWord */,
+                false /* isBlacklisted */, BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
     }
 
     private static void addBigramWords(final BinaryDictionary binaryDictionary, final String word0,
@@ -1010,7 +1015,8 @@
             // TODO: Add tests for historical info.
             binaryDictionary.addUnigramEntry(word, unigramProbability,
                     null /* shortcutTarget */, BinaryDictionary.NOT_A_PROBABILITY,
-                    isNotAWord, isBlacklisted, BinaryDictionary.NOT_A_VALID_TIMESTAMP);
+                    false /* isBeginningOfSentence */, isNotAWord, isBlacklisted,
+                    BinaryDictionary.NOT_A_VALID_TIMESTAMP);
             if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
                 binaryDictionary.flushWithGC();
             }
@@ -1188,24 +1194,24 @@
         final int unigramProbability = 100;
         final int shortcutProbability = 10;
         binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz",
-                shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */,
-                0 /* timestamp */);
+                shortcutProbability, false /* isBeginningOfSentence */,
+                false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */);
         WordProperty wordProperty = binaryDictionary.getWordProperty("aaa");
         assertEquals(1, wordProperty.mShortcutTargets.size());
         assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord);
         assertEquals(shortcutProbability, wordProperty.mShortcutTargets.get(0).getProbability());
         final int updatedShortcutProbability = 2;
         binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz",
-                updatedShortcutProbability, false /* isNotAWord */, false /* isBlacklisted */,
-                0 /* timestamp */);
+                updatedShortcutProbability, false /* isBeginningOfSentence */,
+                false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */);
         wordProperty = binaryDictionary.getWordProperty("aaa");
         assertEquals(1, wordProperty.mShortcutTargets.size());
         assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord);
         assertEquals(updatedShortcutProbability,
                 wordProperty.mShortcutTargets.get(0).getProbability());
         binaryDictionary.addUnigramEntry("aaa", unigramProbability, "yyy",
-                shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */,
-                0 /* timestamp */);
+                shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */,
+                false /* isBlacklisted */, 0 /* timestamp */);
         final HashMap<String, Integer> shortcutTargets = new HashMap<String, Integer>();
         shortcutTargets.put("zzz", updatedShortcutProbability);
         shortcutTargets.put("yyy", shortcutProbability);
@@ -1275,8 +1281,8 @@
             final String word = words.get(random.nextInt(words.size()));
             final int unigramProbability = unigramProbabilities.get(word);
             binaryDictionary.addUnigramEntry(word, unigramProbability, shortcutTarget,
-                    shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */,
-                    0 /* timestamp */);
+                    shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */,
+                    false /* isBlacklisted */, 0 /* timestamp */);
             if (shortcutTargets.containsKey(word)) {
                 final HashMap<String, Integer> shortcutTargetsOfWord = shortcutTargets.get(word);
                 shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability);
@@ -1331,10 +1337,11 @@
         addBigramWords(binaryDictionary, "aaa", "bbb", bigramProbability);
         final int shortcutProbability = 10;
         binaryDictionary.addUnigramEntry("ccc", unigramProbability, "xxx", shortcutProbability,
-                false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */);
+                false /* isBeginningOfSentence */, false /* isNotAWord */,
+                false /* isBlacklisted */, 0 /* timestamp */);
         binaryDictionary.addUnigramEntry("ddd", unigramProbability, null /* shortcutTarget */,
-                Dictionary.NOT_A_PROBABILITY, true /* isNotAWord */,
-                true /* isBlacklisted */, 0 /* timestamp */);
+                Dictionary.NOT_A_PROBABILITY, false /* isBeginningOfSentence */,
+                true /* isNotAWord */, true /* isBlacklisted */, 0 /* timestamp */);
         assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
         assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb"));
         assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb"));
@@ -1434,4 +1441,46 @@
         assertEquals(bigramProbabilities.size(), Integer.parseInt(
                 binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY)));
     }
+
+    public void testBeginningOfSentence() {
+        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
+            if (supportsBeginningOfSentence(formatVersion)) {
+                testBeginningOfSentence(formatVersion);
+            }
+        }
+    }
+
+    private void testBeginningOfSentence(final int formatVersion) {
+        File dictFile = null;
+        try {
+            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
+        } catch (IOException e) {
+            fail("IOException while writing an initial dictionary : " + e);
+        }
+        final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
+                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
+                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+        final int dummyProbability = 0;
+        binaryDictionary.addUnigramEntry("", dummyProbability, "" /* shortcutTarget */,
+                BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */,
+                true /* isBeginningOfSentence */, true /* isNotAWord */, false /* isBlacklisted */,
+                BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
+        final PrevWordsInfo prevWordsInfoStartOfSentence = PrevWordsInfo.BEGINNING_OF_SENTENCE;
+        final int bigramProbability = 200;
+        addUnigramWord(binaryDictionary, "aaa", dummyProbability);
+        binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "aaa", bigramProbability,
+                BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
+        assertEquals(bigramProbability,
+                binaryDictionary.getNgramProbability(prevWordsInfoStartOfSentence, "aaa"));
+        binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "aaa", bigramProbability,
+                BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
+        addUnigramWord(binaryDictionary, "bbb", dummyProbability);
+        binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "bbb", bigramProbability,
+                BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
+        binaryDictionary.flushWithGC();
+        assertEquals(bigramProbability,
+                binaryDictionary.getNgramProbability(prevWordsInfoStartOfSentence, "aaa"));
+        assertEquals(bigramProbability,
+                binaryDictionary.getNgramProbability(prevWordsInfoStartOfSentence, "bbb"));
+    }
 }
diff --git a/tests/src/com/android/inputmethod/latin/DistracterFilterTest.java b/tests/src/com/android/inputmethod/latin/DistracterFilterTest.java
index e98f9ea..33f3794 100644
--- a/tests/src/com/android/inputmethod/latin/DistracterFilterTest.java
+++ b/tests/src/com/android/inputmethod/latin/DistracterFilterTest.java
@@ -20,23 +20,24 @@
 
 import android.test.suitebuilder.annotation.LargeTest;
 
-import com.android.inputmethod.latin.utils.DistracterFilter;
+import com.android.inputmethod.latin.utils.DistracterFilterUsingSuggestion;
 
 /**
  * Unit test for DistracterFilter
  */
 @LargeTest
 public class DistracterFilterTest extends InputTestsBase {
-    private DistracterFilter mDistracterFilter;
+    private DistracterFilterUsingSuggestion mDistracterFilter;
 
     @Override
     protected void setUp() throws Exception {
         super.setUp();
-        mDistracterFilter = mLatinIME.createDistracterFilter();
+        mDistracterFilter = new DistracterFilterUsingSuggestion(getContext());
+        mDistracterFilter.updateEnabledSubtypes(mLatinIME.getEnabledSubtypesForTest());
     }
 
     public void testIsDistractorToWordsInDictionaries() {
-        final PrevWordsInfo EMPTY_PREV_WORDS_INFO = new PrevWordsInfo(null);
+        final PrevWordsInfo EMPTY_PREV_WORDS_INFO = PrevWordsInfo.EMPTY_PREV_WORDS_INFO;
 
         final Locale localeEnUs = new Locale("en", "US");
         String typedWord = "alot";
diff --git a/tests/src/com/android/inputmethod/latin/RichInputConnectionAndTextRangeTests.java b/tests/src/com/android/inputmethod/latin/RichInputConnectionAndTextRangeTests.java
index f3351ff..c471eca 100644
--- a/tests/src/com/android/inputmethod/latin/RichInputConnectionAndTextRangeTests.java
+++ b/tests/src/com/android/inputmethod/latin/RichInputConnectionAndTextRangeTests.java
@@ -157,10 +157,10 @@
         // If one of the following cases breaks, the bigram suggestions won't work.
         assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord(
                 "abc def", mSpacingAndPunctuations, 2).mPrevWord, "abc");
-        assertNull(RichInputConnection.getPrevWordsInfoFromNthPreviousWord(
-                "abc", mSpacingAndPunctuations, 2).mPrevWord);
-        assertNull(RichInputConnection.getPrevWordsInfoFromNthPreviousWord(
-                "abc. def", mSpacingAndPunctuations, 2).mPrevWord);
+        assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord(
+                "abc", mSpacingAndPunctuations, 2), PrevWordsInfo.BEGINNING_OF_SENTENCE);
+        assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord(
+                "abc. def", mSpacingAndPunctuations, 2), PrevWordsInfo.BEGINNING_OF_SENTENCE);
 
         assertFalse(RichInputConnection.getPrevWordsInfoFromNthPreviousWord(
                 "abc def", mSpacingAndPunctuations, 2).mIsBeginningOfSentence);
@@ -180,16 +180,22 @@
         assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord(
                 "abc def .", mSpacingAndPunctuations, 2).mPrevWord, "def");
         assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord(
-                "abc ", mSpacingAndPunctuations, 2).mPrevWord, null);
+                "abc ", mSpacingAndPunctuations, 2), PrevWordsInfo.BEGINNING_OF_SENTENCE);
 
         assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord(
                 "abc def", mSpacingAndPunctuations, 1).mPrevWord, "def");
         assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord(
                 "abc def ", mSpacingAndPunctuations, 1).mPrevWord, "def");
-        assertNull(RichInputConnection.getPrevWordsInfoFromNthPreviousWord(
-                "abc def.", mSpacingAndPunctuations, 1).mPrevWord);
-        assertNull(RichInputConnection.getPrevWordsInfoFromNthPreviousWord(
-                "abc def .", mSpacingAndPunctuations, 1).mPrevWord);
+        assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord(
+                "abc def.", mSpacingAndPunctuations, 1), PrevWordsInfo.BEGINNING_OF_SENTENCE);
+        assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord(
+                "abc def .", mSpacingAndPunctuations, 1), PrevWordsInfo.BEGINNING_OF_SENTENCE);
+        assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord(
+                "abc, def", mSpacingAndPunctuations, 2), PrevWordsInfo.EMPTY_PREV_WORDS_INFO);
+        assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord(
+                "abc? def", mSpacingAndPunctuations, 2), PrevWordsInfo.EMPTY_PREV_WORDS_INFO);
+        assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord(
+                "abc! def", mSpacingAndPunctuations, 2), PrevWordsInfo.EMPTY_PREV_WORDS_INFO);
     }
 
     /**
diff --git a/tests/src/com/android/inputmethod/latin/ShiftModeTests.java b/tests/src/com/android/inputmethod/latin/ShiftModeTests.java
index 6fc9df7..f3756c2 100644
--- a/tests/src/com/android/inputmethod/latin/ShiftModeTests.java
+++ b/tests/src/com/android/inputmethod/latin/ShiftModeTests.java
@@ -78,4 +78,48 @@
         runMessages();
         assertTrue("Caps after a while after repeating Backspace a lot", isCapsModeAutoShifted());
     }
+
+    public void testAutoCapsAfterDigitsPeriod() {
+        changeLanguage("en");
+        type("On 22.11.");
+        assertFalse("(English) Auto caps after digits-period", isCapsModeAutoShifted());
+        type(" ");
+        assertTrue("(English) Auto caps after digits-period-whitespace", isCapsModeAutoShifted());
+        mEditText.setText("");
+        changeLanguage("fr");
+        type("Le 22.");
+        assertFalse("(French) Auto caps after digits-period", isCapsModeAutoShifted());
+        type(" ");
+        assertTrue("(French) Auto caps after digits-period-whitespace", isCapsModeAutoShifted());
+        mEditText.setText("");
+        changeLanguage("de");
+        type("Am 22.");
+        assertFalse("(German) Auto caps after digits-period", isCapsModeAutoShifted());
+        type(" ");
+        // For German, no auto-caps in this case
+        assertFalse("(German) Auto caps after digits-period-whitespace", isCapsModeAutoShifted());
+    }
+
+    public void testAutoCapsAfterInvertedMarks() {
+        changeLanguage("es");
+        assertTrue("(Spanish) Auto caps at start", isCapsModeAutoShifted());
+        type("Hey. ¿");
+        assertTrue("(Spanish) Auto caps after inverted what", isCapsModeAutoShifted());
+        mEditText.setText("");
+        type("¡");
+        assertTrue("(Spanish) Auto caps after inverted bang", isCapsModeAutoShifted());
+    }
+
+    public void testOtherSentenceSeparators() {
+        changeLanguage("hy-AM");
+        assertTrue("(Armenian) Auto caps at start", isCapsModeAutoShifted());
+        type("Hey. ");
+        assertFalse("(Armenian) No auto-caps after latin period", isCapsModeAutoShifted());
+        type("Hey\u0589");
+        assertFalse("(Armenian) No auto-caps directly after armenian period",
+                isCapsModeAutoShifted());
+        type(" ");
+        assertTrue("(Armenian) Auto-caps after armenian period-whitespace",
+                isCapsModeAutoShifted());
+    }
 }
diff --git a/tests/src/com/android/inputmethod/latin/WordComposerTests.java b/tests/src/com/android/inputmethod/latin/WordComposerTests.java
index 17e7185..274555a 100644
--- a/tests/src/com/android/inputmethod/latin/WordComposerTests.java
+++ b/tests/src/com/android/inputmethod/latin/WordComposerTests.java
@@ -74,7 +74,7 @@
                 CoordinateUtils.newCoordinateArray(CODEPOINTS_WITH_SUPPLEMENTARY_CHAR.length,
                         Constants.NOT_A_COORDINATE, Constants.NOT_A_COORDINATE);
         wc.setComposingWord(CODEPOINTS_WITH_SUPPLEMENTARY_CHAR, COORDINATES_WITH_SUPPLEMENTARY_CHAR,
-                new PrevWordsInfo(null));
+                PrevWordsInfo.EMPTY_PREV_WORDS_INFO);
         assertEquals(wc.size(), CODEPOINTS_WITH_SUPPLEMENTARY_CHAR.length);
         assertFalse(wc.isCursorFrontOrMiddleOfComposingWord());
         wc.setCursorPositionWithinWord(3);
@@ -109,7 +109,7 @@
         assertEquals(PREV_WORDS_INFO_STR_WITHIN_BMP, wc.getPrevWordsInfoForSuggestion());
 
 
-        final PrevWordsInfo PREV_WORDS_INFO_NULL = new PrevWordsInfo(null);
+        final PrevWordsInfo PREV_WORDS_INFO_NULL = PrevWordsInfo.EMPTY_PREV_WORDS_INFO;
         wc.setComposingWord(CODEPOINTS_WITH_SUPPLEMENTARY_CHAR, COORDINATES_WITH_SUPPLEMENTARY_CHAR,
                 PREV_WORDS_INFO_NULL);
         wc.setCursorPositionWithinWord(3);
diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java b/tests/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
index a04b810..0528e34 100644
--- a/tests/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
+++ b/tests/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
@@ -77,14 +77,14 @@
             if (null == wordProperty.mShortcutTargets || wordProperty.mShortcutTargets.isEmpty()) {
                 binaryDict.addUnigramEntry(wordProperty.mWord, wordProperty.getProbability(),
                         null /* shortcutTarget */, 0 /* shortcutProbability */,
-                        wordProperty.mIsNotAWord, wordProperty.mIsBlacklistEntry,
-                        0 /* timestamp */);
+                        wordProperty.mIsBeginningOfSentence, wordProperty.mIsNotAWord,
+                        wordProperty.mIsBlacklistEntry, 0 /* timestamp */);
             } else {
                 for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
                     binaryDict.addUnigramEntry(wordProperty.mWord, wordProperty.getProbability(),
                             shortcutTarget.mWord, shortcutTarget.getProbability(),
-                            wordProperty.mIsNotAWord, wordProperty.mIsBlacklistEntry,
-                            0 /* timestamp */);
+                            wordProperty.mIsBeginningOfSentence, wordProperty.mIsNotAWord,
+                            wordProperty.mIsBlacklistEntry, 0 /* timestamp */);
                 }
             }
             if (binaryDict.needsToRunGC(true /* mindsBlockByGC */)) {
diff --git a/tests/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryTests.java
index bc86864..7d3214a 100644
--- a/tests/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryTests.java
+++ b/tests/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryTests.java
@@ -110,7 +110,7 @@
     }
 
     private static void addToDict(final UserHistoryDictionary dict, final List<String> words) {
-        PrevWordsInfo prevWordsInfo = new PrevWordsInfo(null);
+        PrevWordsInfo prevWordsInfo = PrevWordsInfo.EMPTY_PREV_WORDS_INFO;
         for (String word : words) {
             UserHistoryDictionary.addToDictionary(dict, prevWordsInfo, word, true,
                     (int)TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis()));
diff --git a/tools/dicttool/compat/android/test/AndroidTestCase.java b/tools/dicttool/compat/android/test/AndroidTestCase.java
index d01b7ad..f765ce0 100644
--- a/tools/dicttool/compat/android/test/AndroidTestCase.java
+++ b/tools/dicttool/compat/android/test/AndroidTestCase.java
@@ -16,6 +16,8 @@
 
 package android.test;
 
+import com.android.inputmethod.latin.dicttool.Test;
+
 import junit.framework.TestCase;
 
 import java.io.File;
@@ -27,7 +29,11 @@
  */
 public class AndroidTestCase extends TestCase {
     public File getCacheDir() {
-        return new File(".");
+        final File dir = Test.TEST_TMP_DIR;
+        if (!dir.isDirectory()) {
+            dir.mkdirs();
+        }
+        return dir;
     }
     public AndroidTestCase getContext() {
         return this;
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java
index 48817b1..33661c8 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java
@@ -19,16 +19,29 @@
 import com.android.inputmethod.latin.makedict.BinaryDictDecoderEncoderTests;
 import com.android.inputmethod.latin.makedict.BinaryDictEncoderFlattenTreeTests;
 import com.android.inputmethod.latin.makedict.FusionDictionaryTest;
+import com.android.inputmethod.latin.utils.FileUtils;
 
+import java.io.File;
+import java.io.IOException;
 import java.lang.reflect.Constructor;
 import java.lang.reflect.InvocationTargetException;
 import java.lang.reflect.Method;
+import java.nio.file.Files;
 import java.util.ArrayList;
 
 /**
  * Dicttool command implementing self-tests.
  */
 public class Test extends Dicttool.Command {
+    private static final String getTmpDir() {
+        try {
+            return Files.createTempDirectory("dicttool").toString();
+        } catch (IOException e) {
+            throw new RuntimeException("Can't get temporary directory", e);
+        }
+    }
+    private static final String TEST_TMP_DIR_BASE = getTmpDir();
+    public static final File TEST_TMP_DIR = new File(TEST_TMP_DIR_BASE);
     public static final String COMMAND = "test";
     private static final int DEFAULT_MAX_UNIGRAMS = 1500;
     private long mSeed = System.currentTimeMillis();
@@ -56,8 +69,12 @@
 
     @Override
     public String getHelp() {
-        final StringBuilder s = new StringBuilder("test [-s seed] [-m maxUnigrams] [testName...]\n"
-                + "If seed is not specified, the current time is used.\nTest list is:\n");
+        final StringBuilder s = new StringBuilder(
+                "test [-s seed] [-m maxUnigrams] [-n] [testName...]\n"
+                + "If seed is not specified, the current time is used.\n"
+                + "If -n option is provided, do not delete temporary files in "
+                + TEST_TMP_DIR_BASE + "/*.\n"
+                + "Test list is:\n");
         for (final Method m : mAllTestMethods) {
             s.append("  ");
             s.append(m.getName());
@@ -70,17 +87,26 @@
     public void run() throws IllegalAccessException, InstantiationException,
             InvocationTargetException {
         int i = 0;
+        boolean deleteTmpDir = true;
         while (i < mArgs.length) {
             final String arg = mArgs[i++];
             if ("-s".equals(arg)) {
                 mSeed = Long.parseLong(mArgs[i++]);
             } else if ("-m".equals(arg)) {
                 mMaxUnigrams = Integer.parseInt(mArgs[i++]);
+            } else if ("-n".equals(arg)) {
+                deleteTmpDir = false;
             } else {
                 mUsedTestMethods.add(arg);
             }
         }
-        runChosenTests();
+        try {
+            runChosenTests();
+        } finally {
+            if (deleteTmpDir) {
+                FileUtils.deleteRecursively(TEST_TMP_DIR);
+            }
+        }
     }
 
     private void runChosenTests() throws IllegalAccessException, InstantiationException,