[Rlog81a] Determine correction type of words

Heuristic to determine whether a word was a typo correction or a complete
replacement by examining a correction to see if it falls within the list of
suggested words of the original.

Change-Id: Ieec4861a811e96aef0d14622e662b589ef8b4772
diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java
index a48778a..6085cb4 100644
--- a/java/src/com/android/inputmethod/latin/LatinIME.java
+++ b/java/src/com/android/inputmethod/latin/LatinIME.java
@@ -1647,7 +1647,7 @@
         mExpectingUpdateSelection = true;
         mConnection.endBatchEdit();
         if (ProductionFlag.IS_EXPERIMENTAL) {
-            ResearchLogger.latinIME_onEndBatchInput(batchInputText, 0);
+            ResearchLogger.latinIME_onEndBatchInput(batchInputText, 0, suggestedWords);
         }
         // Space state must be updated before calling updateShiftState
         mSpaceState = SPACE_STATE_PHANTOM;
@@ -2123,8 +2123,9 @@
                 Stats.onAutoCorrection(typedWord, autoCorrection, separatorString, mWordComposer);
             }
             if (ProductionFlag.IS_EXPERIMENTAL) {
+                final SuggestedWords suggestedWords = mSuggestionStripView.getSuggestions();
                 ResearchLogger.latinIme_commitCurrentAutoCorrection(typedWord, autoCorrection,
-                        separatorString, mWordComposer.isBatchMode());
+                        separatorString, mWordComposer.isBatchMode(), suggestedWords);
             }
             mExpectingUpdateSelection = true;
             commitChosenWord(autoCorrection, LastComposedWord.COMMIT_TYPE_DECIDED_WORD,
diff --git a/java/src/com/android/inputmethod/research/LogUnit.java b/java/src/com/android/inputmethod/research/LogUnit.java
index 715000d..0234bbc 100644
--- a/java/src/com/android/inputmethod/research/LogUnit.java
+++ b/java/src/com/android/inputmethod/research/LogUnit.java
@@ -17,6 +17,7 @@
 package com.android.inputmethod.research;
 
 import android.content.SharedPreferences;
+import android.text.TextUtils;
 import android.util.JsonWriter;
 import android.util.Log;
 import android.view.MotionEvent;
@@ -57,16 +58,32 @@
     // Assume that mTimeList is sorted in increasing order.  Do not insert null values into
     // mTimeList.
     private final ArrayList<Long> mTimeList;
+    // Word that this LogUnit generates.  Should be null if the LogUnit does not generate a genuine
+    // word (i.e. separators alone do not count as a word).  Should never be empty.
     private String mWord;
     private boolean mMayContainDigit;
     private boolean mIsPartOfMegaword;
     private boolean mContainsCorrection;
 
+    // mCorrectionType indicates whether the word was corrected at all, and if so, whether it was
+    // to a different word or just a "typo" correction.  It is considered a "typo" if the final
+    // word was listed in the suggestions available the first time the word was gestured or
+    // tapped.
+    private int mCorrectionType;
+    public static final int CORRECTIONTYPE_NO_CORRECTION = 0;
+    public static final int CORRECTIONTYPE_CORRECTION = 1;
+    public static final int CORRECTIONTYPE_DIFFERENT_WORD = 2;
+    public static final int CORRECTIONTYPE_TYPO = 3;
+
+    private SuggestedWords mSuggestedWords;
+
     public LogUnit() {
         mLogStatementList = new ArrayList<LogStatement>();
         mValuesList = new ArrayList<Object[]>();
         mTimeList = new ArrayList<Long>();
         mIsPartOfMegaword = false;
+        mCorrectionType = CORRECTIONTYPE_NO_CORRECTION;
+        mSuggestedWords = null;
     }
 
     private LogUnit(final ArrayList<LogStatement> logStatementList,
@@ -77,6 +94,8 @@
         mValuesList = valuesList;
         mTimeList = timeList;
         mIsPartOfMegaword = isPartOfMegaword;
+        mCorrectionType = CORRECTIONTYPE_NO_CORRECTION;
+        mSuggestedWords = null;
     }
 
     private static final Object[] NULL_VALUES = new Object[0];
@@ -167,6 +186,7 @@
     private static final String UPTIME_KEY = "_ut";
     private static final String EVENT_TYPE_KEY = "_ty";
     private static final String WORD_KEY = "_wo";
+    private static final String CORRECTION_TYPE_KEY = "_corType";
     private static final String LOG_UNIT_BEGIN_KEY = "logUnitStart";
     private static final String LOG_UNIT_END_KEY = "logUnitEnd";
 
@@ -177,6 +197,7 @@
             jsonWriter.name(CURRENT_TIME_KEY).value(System.currentTimeMillis());
             if (canIncludePrivateData) {
                 jsonWriter.name(WORD_KEY).value(getWord());
+                jsonWriter.name(CORRECTION_TYPE_KEY).value(getCorrectionType());
             }
             jsonWriter.name(EVENT_TYPE_KEY).value(LOG_UNIT_BEGIN_KEY);
             jsonWriter.endObject();
@@ -254,7 +275,33 @@
         return true;
     }
 
-    public void setWord(String word) {
+    /**
+     * Mark the current logUnit as containing data to generate {@code word}.
+     *
+     * If {@code setWord()} was previously called for this LogUnit, then the method will try to
+     * determine what kind of correction it is, and update its internal state of the correctionType
+     * accordingly.
+     *
+     * @param word The word this LogUnit generates.  Caller should not pass null or the empty
+     * string.
+     */
+    public void setWord(final String word) {
+        if (mWord != null) {
+            // The word was already set once, and it is now being changed.  See if the new word
+            // is close to the old word.  If so, then the change is probably a typo correction.
+            // If not, the user may have decided to enter a different word, so flag it.
+            if (mSuggestedWords != null) {
+                if (isInSuggestedWords(word, mSuggestedWords)) {
+                    mCorrectionType = CORRECTIONTYPE_TYPO;
+                } else {
+                    mCorrectionType = CORRECTIONTYPE_DIFFERENT_WORD;
+                }
+            } else {
+                // No suggested words, so it's not clear whether it's a typo or different word.
+                // Mark it as a generic correction.
+                mCorrectionType = CORRECTIONTYPE_CORRECTION;
+            }
+        }
         mWord = word;
     }
 
@@ -282,6 +329,14 @@
         return mContainsCorrection;
     }
 
+    public void setCorrectionType(final int correctionType) {
+        mCorrectionType = correctionType;
+    }
+
+    public int getCorrectionType() {
+        return mCorrectionType;
+    }
+
     public boolean isEmpty() {
         return mLogStatementList.isEmpty();
     }
@@ -328,8 +383,43 @@
         mValuesList.addAll(logUnit.mValuesList);
         mTimeList.addAll(logUnit.mTimeList);
         mWord = null;
+        if (logUnit.mWord != null) {
+            setWord(logUnit.mWord);
+        }
         mMayContainDigit = mMayContainDigit || logUnit.mMayContainDigit;
         mContainsCorrection = mContainsCorrection || logUnit.mContainsCorrection;
         mIsPartOfMegaword = false;
     }
+
+    public SuggestedWords getSuggestions() {
+        return mSuggestedWords;
+    }
+
+    /**
+     * Initialize the suggestions.
+     *
+     * Once set to a non-null value, the suggestions may not be changed again.  This is to keep
+     * track of the list of words that are close to the user's initial effort to type the word.
+     * Only words that are close to the initial effort are considered typo corrections.
+     */
+    public void initializeSuggestions(final SuggestedWords suggestedWords) {
+        if (mSuggestedWords == null) {
+            mSuggestedWords = suggestedWords;
+        }
+    }
+
+    private static boolean isInSuggestedWords(final String queryWord,
+            final SuggestedWords suggestedWords) {
+        if (TextUtils.isEmpty(queryWord)) {
+            return false;
+        }
+        final int size = suggestedWords.size();
+        for (int i = 0; i < size; i++) {
+            final SuggestedWordInfo wordInfo = suggestedWords.getInfo(i);
+            if (queryWord.equals(wordInfo.mWord)) {
+                return true;
+            }
+        }
+        return false;
+    }
 }
diff --git a/java/src/com/android/inputmethod/research/ResearchLogger.java b/java/src/com/android/inputmethod/research/ResearchLogger.java
index 0a24af6..29bc708 100644
--- a/java/src/com/android/inputmethod/research/ResearchLogger.java
+++ b/java/src/com/android/inputmethod/research/ResearchLogger.java
@@ -745,6 +745,10 @@
         mCurrentLogUnit.setContainsCorrection();
     }
 
+    private void setCurrentLogUnitCorrectionType(final int correctionType) {
+        mCurrentLogUnit.setCorrectionType(correctionType);
+    }
+
     /* package for test */ void commitCurrentLogUnit() {
         if (DEBUG) {
             Log.d(TAG, "commitCurrentLogUnit" + (mCurrentLogUnit.hasWord() ?
@@ -1194,13 +1198,17 @@
                     "suggestion", "x", "y");
     public static void latinIME_pickSuggestionManually(final String replacedWord,
             final int index, final String suggestion, final boolean isBatchMode) {
-        final String scrubbedWord = scrubDigitsFromString(suggestion);
         final ResearchLogger researchLogger = getInstance();
+        if (!replacedWord.equals(suggestion.toString())) {
+            // The user choose something other than what was already there.
+            researchLogger.setCurrentLogUnitContainsCorrection();
+            researchLogger.setCurrentLogUnitCorrectionType(LogUnit.CORRECTIONTYPE_TYPO);
+        }
+        final String scrubbedWord = scrubDigitsFromString(suggestion);
         researchLogger.enqueueEvent(LOGSTATEMENT_LATINIME_PICKSUGGESTIONMANUALLY,
                 scrubDigitsFromString(replacedWord), index,
                 suggestion == null ? null : scrubbedWord, Constants.SUGGESTION_STRIP_COORDINATE,
                 Constants.SUGGESTION_STRIP_COORDINATE);
-        researchLogger.setCurrentLogUnitContainsCorrection();
         researchLogger.commitCurrentLogUnitAsWord(scrubbedWord, Long.MAX_VALUE, isBatchMode);
         researchLogger.mStatistics.recordManualSuggestion(SystemClock.uptimeMillis());
     }
@@ -1490,10 +1498,12 @@
             new LogStatement("LatinIMECommitCurrentAutoCorrection", true, true, "typedWord",
                     "autoCorrection", "separatorString");
     public static void latinIme_commitCurrentAutoCorrection(final String typedWord,
-            final String autoCorrection, final String separatorString, final boolean isBatchMode) {
+            final String autoCorrection, final String separatorString, final boolean isBatchMode,
+            final SuggestedWords suggestedWords) {
         final String scrubbedTypedWord = scrubDigitsFromString(typedWord);
         final String scrubbedAutoCorrection = scrubDigitsFromString(autoCorrection);
         final ResearchLogger researchLogger = getInstance();
+        researchLogger.mCurrentLogUnit.initializeSuggestions(suggestedWords);
         researchLogger.commitCurrentLogUnitAsWord(scrubbedAutoCorrection, Long.MAX_VALUE,
                 isBatchMode);
 
@@ -1691,10 +1701,11 @@
             new LogStatement("LatinIMEOnEndBatchInput", true, false, "enteredText",
                     "enteredWordPos");
     public static void latinIME_onEndBatchInput(final CharSequence enteredText,
-            final int enteredWordPos) {
+            final int enteredWordPos, final SuggestedWords suggestedWords) {
         final ResearchLogger researchLogger = getInstance();
         researchLogger.enqueueEvent(LOGSTATEMENT_LATINIME_ONENDBATCHINPUT, enteredText,
                 enteredWordPos);
+        researchLogger.mCurrentLogUnit.initializeSuggestions(suggestedWords);
         researchLogger.mStatistics.recordGestureInput(enteredText.length(),
                 SystemClock.uptimeMillis());
     }