[Rlog78b] Make log privacy filtering decisions on n-grams

Previously, words were pushed out of a LogBuffer one at a time.  The receiving code had to keep
state to know whether a n-gram was safe to log.  This patch looks at the entire n-gram and makes a
single decision based on it alone.

mult-project commit with I3c40d7e02c77943d2668094ddb1d03efb942c74f

Change-Id: Id7d90bbd551b1a2f4e0e35f38852652f68f273f8
diff --git a/java/src/com/android/inputmethod/research/FixedLogBuffer.java b/java/src/com/android/inputmethod/research/FixedLogBuffer.java
index 7771119..5270730 100644
--- a/java/src/com/android/inputmethod/research/FixedLogBuffer.java
+++ b/java/src/com/android/inputmethod/research/FixedLogBuffer.java
@@ -16,6 +16,7 @@
 
 package com.android.inputmethod.research;
 
+import java.util.ArrayList;
 import java.util.LinkedList;
 
 /**
@@ -65,8 +66,13 @@
             super.shiftIn(newLogUnit);
             return;
         }
-        if (mNumActualWords == mWordCapacity) {
-            shiftOutThroughFirstWord();
+        if (mNumActualWords >= mWordCapacity) {
+            // Give subclass a chance to handle the buffer full condition by shifting out logUnits.
+            onBufferFull();
+            // If still full, evict.
+            if (mNumActualWords >= mWordCapacity) {
+                shiftOutWords(1);
+            }
         }
         super.shiftIn(newLogUnit);
         mNumActualWords++; // Must be a word, or we wouldn't be here.
@@ -81,18 +87,8 @@
         return logUnit;
     }
 
-    public void shiftOutThroughFirstWord() {
-        final LinkedList<LogUnit> logUnits = getLogUnits();
-        while (!logUnits.isEmpty()) {
-            final LogUnit logUnit = logUnits.removeFirst();
-            onShiftOut(logUnit);
-            if (logUnit.hasWord()) {
-                // Successfully shifted out a word-containing LogUnit and made space for the new
-                // LogUnit.
-                mNumActualWords--;
-                break;
-            }
-        }
+    public int getNumWords() {
+        return mNumActualWords;
     }
 
     /**
@@ -105,28 +101,63 @@
     }
 
     /**
-     * Called when a LogUnit is removed from the LogBuffer as a result of a shiftIn.  LogUnits are
-     * removed in the order entered.  This method is not called when shiftOut is called directly.
+     * Called when the buffer has just shifted in one more word than its maximum, and its about to
+     * shift out LogUnits to bring it back down to the maximum.
      *
      * Base class does nothing; subclasses may override if they want to record non-privacy sensitive
      * events that fall off the end.
      */
-    protected void onShiftOut(final LogUnit logUnit) {
+    protected void onBufferFull() {
     }
 
-    /**
-     * Called to deliberately remove the oldest LogUnit.  Usually called when draining the
-     * LogBuffer.
-     */
     @Override
     public LogUnit shiftOut() {
-        if (isEmpty()) {
-            return null;
-        }
         final LogUnit logUnit = super.shiftOut();
-        if (logUnit.hasWord()) {
+        if (logUnit != null && logUnit.hasWord()) {
             mNumActualWords--;
         }
         return logUnit;
     }
+
+    protected void shiftOutWords(final int numWords) {
+        final int targetNumWords = mNumActualWords - numWords;
+        final LinkedList<LogUnit> logUnits = getLogUnits();
+        while (mNumActualWords > targetNumWords && !logUnits.isEmpty()) {
+            shiftOut();
+        }
+    }
+
+    public void shiftOutAll() {
+        final LinkedList<LogUnit> logUnits = getLogUnits();
+        while (!logUnits.isEmpty()) {
+            shiftOut();
+        }
+        mNumActualWords = 0;
+    }
+
+    /**
+     * Returns a list of {@link LogUnit}s at the front of the buffer that have associated words.  No
+     * more than {@code n} LogUnits will have words associated with them.  If there are not enough
+     * LogUnits in the buffer to meet the word requirement, returns the all LogUnits.
+     *
+     * @param n The maximum number of {@link LogUnit}s with words to return.
+     * @return The list of the {@link LogUnit}s containing the first n words
+     */
+    public ArrayList<LogUnit> peekAtFirstNWords(int n) {
+        final LinkedList<LogUnit> logUnits = getLogUnits();
+        final int length = logUnits.size();
+        // Allocate space for n*2 logUnits.  There will be at least n, one for each word, and
+        // there may be additional for punctuation, between-word commands, etc.  This should be
+        // enough that reallocation won't be necessary.
+        final ArrayList<LogUnit> list = new ArrayList<LogUnit>(n * 2);
+        for (int i = 0; i < length && n > 0; i++) {
+            final LogUnit logUnit = logUnits.get(i);
+            list.add(logUnit);
+            final String word = logUnit.getWord();
+            if (word != null) {
+                n--;
+            }
+        }
+        return list;
+    }
 }
diff --git a/java/src/com/android/inputmethod/research/LogUnit.java b/java/src/com/android/inputmethod/research/LogUnit.java
index 70bbf9d..3534ef9 100644
--- a/java/src/com/android/inputmethod/research/LogUnit.java
+++ b/java/src/com/android/inputmethod/research/LogUnit.java
@@ -98,7 +98,7 @@
      * Publish the contents of this LogUnit to researchLog.
      */
     public synchronized void publishTo(final ResearchLog researchLog,
-            final boolean isIncludingPrivateData) {
+            final boolean canIncludePrivateData) {
         // Prepare debugging output if necessary
         final StringWriter debugStringWriter;
         final JsonWriter debugJsonWriter;
@@ -123,7 +123,7 @@
             JsonWriter jsonWriter = null;
             for (int i = 0; i < size; i++) {
                 final LogStatement logStatement = mLogStatementList.get(i);
-                if (!isIncludingPrivateData && logStatement.mIsPotentiallyPrivate) {
+                if (!canIncludePrivateData && logStatement.mIsPotentiallyPrivate) {
                     continue;
                 }
                 if (mIsPartOfMegaword && logStatement.mIsPotentiallyRevealing) {
@@ -134,7 +134,7 @@
                 // will not have been opened for writing.
                 if (jsonWriter == null) {
                     jsonWriter = researchLog.getValidJsonWriterLocked();
-                    outputLogUnitStart(jsonWriter, isIncludingPrivateData);
+                    outputLogUnitStart(jsonWriter, canIncludePrivateData);
                 }
                 outputLogStatementToLocked(jsonWriter, mLogStatementList.get(i), mValuesList.get(i),
                         mTimeList.get(i));
@@ -145,7 +145,7 @@
             }
             if (jsonWriter != null) {
                 // We must have called logUnitStart earlier, so emit a logUnitStop.
-                outputLogUnitStop(jsonWriter, isIncludingPrivateData);
+                outputLogUnitStop(jsonWriter);
             }
         }
         if (DEBUG) {
@@ -171,11 +171,11 @@
     private static final String LOG_UNIT_END_KEY = "logUnitEnd";
 
     private void outputLogUnitStart(final JsonWriter jsonWriter,
-            final boolean isIncludingPrivateData) {
+            final boolean canIncludePrivateData) {
         try {
             jsonWriter.beginObject();
             jsonWriter.name(CURRENT_TIME_KEY).value(System.currentTimeMillis());
-            if (isIncludingPrivateData) {
+            if (canIncludePrivateData) {
                 jsonWriter.name(WORD_KEY).value(getWord());
             }
             jsonWriter.name(EVENT_TYPE_KEY).value(LOG_UNIT_BEGIN_KEY);
@@ -186,8 +186,7 @@
         }
     }
 
-    private void outputLogUnitStop(final JsonWriter jsonWriter,
-            final boolean isIncludingPrivateData) {
+    private void outputLogUnitStop(final JsonWriter jsonWriter) {
         try {
             jsonWriter.beginObject();
             jsonWriter.name(CURRENT_TIME_KEY).value(System.currentTimeMillis());
diff --git a/java/src/com/android/inputmethod/research/MainLogBuffer.java b/java/src/com/android/inputmethod/research/MainLogBuffer.java
index a8f255a..c187e33 100644
--- a/java/src/com/android/inputmethod/research/MainLogBuffer.java
+++ b/java/src/com/android/inputmethod/research/MainLogBuffer.java
@@ -22,6 +22,7 @@
 import com.android.inputmethod.latin.Suggest;
 import com.android.inputmethod.latin.define.ProductionFlag;
 
+import java.util.ArrayList;
 import java.util.LinkedList;
 import java.util.Random;
 
@@ -56,19 +57,24 @@
  * If the user closes a session, then the entire LogBuffer is flushed, publishing any embedded
  * n-gram containing dictionary words.
  */
-public class MainLogBuffer extends FixedLogBuffer {
+public abstract class MainLogBuffer extends FixedLogBuffer {
     private static final String TAG = MainLogBuffer.class.getSimpleName();
     private static final boolean DEBUG = false && ProductionFlag.IS_EXPERIMENTAL_DEBUG;
 
     // The size of the n-grams logged.  E.g. N_GRAM_SIZE = 2 means to sample bigrams.
     public static final int N_GRAM_SIZE = 2;
-    // The number of words between n-grams to omit from the log.  If debugging, record 50% of all
-    // words.  Otherwise, only record 10%.
-    private static final int DEFAULT_NUMBER_OF_WORDS_BETWEEN_SAMPLES =
-            ProductionFlag.IS_EXPERIMENTAL_DEBUG ? 2 : 18;
 
-    private final ResearchLog mResearchLog;
+    // Whether all words should be recorded, leaving unsampled word between bigrams.  Useful for
+    // testing.
+    /* package for test */ static final boolean IS_LOGGING_EVERYTHING = false
+            && ProductionFlag.IS_EXPERIMENTAL_DEBUG;
+
+    // The number of words between n-grams to omit from the log.
+    private static final int DEFAULT_NUMBER_OF_WORDS_BETWEEN_SAMPLES =
+            IS_LOGGING_EVERYTHING ? 0 : (DEBUG ? 2 : 18);
+
     private Suggest mSuggest;
+    private boolean mIsStopping = false;
 
     /* package for test */ int mNumWordsBetweenNGrams;
 
@@ -76,9 +82,8 @@
     // after a sample is taken.
     /* package for test */ int mNumWordsUntilSafeToSample;
 
-    public MainLogBuffer(final ResearchLog researchLog) {
+    public MainLogBuffer() {
         super(N_GRAM_SIZE + DEFAULT_NUMBER_OF_WORDS_BETWEEN_SAMPLES);
-        mResearchLog = researchLog;
         mNumWordsBetweenNGrams = DEFAULT_NUMBER_OF_WORDS_BETWEEN_SAMPLES;
         final Random random = new Random();
         mNumWordsUntilSafeToSample = DEBUG ? 0 : random.nextInt(mNumWordsBetweenNGrams + 1);
@@ -92,6 +97,10 @@
         mNumWordsUntilSafeToSample = mNumWordsBetweenNGrams;
     }
 
+    public void setIsStopping() {
+        mIsStopping = true;
+    }
+
     /**
      * Determines whether uploading the n words at the front the MainLogBuffer will not violate
      * user privacy.
@@ -103,16 +112,36 @@
      * the screen orientation and other characteristics about the device can be uploaded without
      * revealing much about the user.
      */
-    public boolean isNGramSafe() {
+    private boolean isSafeNGram(final ArrayList<LogUnit> logUnits, final int minNGramSize) {
+        // Bypass privacy checks when debugging.
+        if (IS_LOGGING_EVERYTHING) {
+            if (mIsStopping) {
+                return true;
+            } else {
+                // Only check that it is the right length.  If not, wait for later words to make
+                // complete n-grams.
+                int numWordsInLogUnitList = 0;
+                final int length = logUnits.size();
+                for (int i = 0; i < length; i++) {
+                    final LogUnit logUnit = logUnits.get(i);
+                    final String word = logUnit.getWord();
+                    if (word != null) {
+                        numWordsInLogUnitList++;
+                    }
+                }
+                return numWordsInLogUnitList >= minNGramSize;
+            }
+        }
+
         // Check that we are not sampling too frequently.  Having sampled recently might disclose
         // too much of the user's intended meaning.
         if (mNumWordsUntilSafeToSample > 0) {
             return false;
         }
         if (mSuggest == null || !mSuggest.hasMainDictionary()) {
-            // Main dictionary is unavailable.  Since we cannot check it, we cannot tell if a word
-            // is out-of-vocabulary or not.  Therefore, we must judge the entire buffer contents to
-            // potentially pose a privacy risk.
+            // Main dictionary is unavailable.  Since we cannot check it, we cannot tell if a
+            // word is out-of-vocabulary or not.  Therefore, we must judge the entire buffer
+            // contents to potentially pose a privacy risk.
             return false;
         }
         // Reload the dictionary in case it has changed (e.g., because the user has changed
@@ -121,12 +150,12 @@
         if (dictionary == null) {
             return false;
         }
-        // Check each word in the buffer.  If any word poses a privacy threat, we cannot upload the
-        // complete buffer contents in detail.
-        final LinkedList<LogUnit> logUnits = getLogUnits();
+
+        // Check each word in the buffer.  If any word poses a privacy threat, we cannot upload
+        // the complete buffer contents in detail.
+        int numWordsInLogUnitList = 0;
         final int length = logUnits.size();
-        int wordsNeeded = N_GRAM_SIZE;
-        for (int i = 0; i < length && wordsNeeded > 0; i++) {
+        for (int i = 0; i < length; i++) {
             final LogUnit logUnit = logUnits.get(i);
             final String word = logUnit.getWord();
             if (word == null) {
@@ -135,6 +164,7 @@
                     return false;
                 }
             } else {
+                numWordsInLogUnitList++;
                 // Words not in the dictionary are a privacy threat.
                 if (ResearchLogger.hasLetters(word) && !(dictionary.isValidWord(word))) {
                     if (DEBUG) {
@@ -145,38 +175,59 @@
                 }
             }
         }
-        // All checks have passed; this buffer's content can be safely uploaded.
-        return true;
+
+        // Finally, only return true if the minNGramSize is met.
+        return numWordsInLogUnitList >= minNGramSize;
     }
 
-    public boolean isNGramComplete() {
+    public void shiftAndPublishAll() {
         final LinkedList<LogUnit> logUnits = getLogUnits();
-        final int length = logUnits.size();
-        int wordsNeeded = N_GRAM_SIZE;
-        for (int i = 0; i < length && wordsNeeded > 0; i++) {
-            final LogUnit logUnit = logUnits.get(i);
-            final String word = logUnit.getWord();
-            if (word != null) {
-                wordsNeeded--;
-            }
+        while (!logUnits.isEmpty()) {
+            publishLogUnitsAtFrontOfBuffer();
         }
-        return wordsNeeded == 0;
     }
 
     @Override
-    protected void onShiftOut(final LogUnit logUnit) {
-        if (mResearchLog != null) {
-            mResearchLog.publish(logUnit,
-                    ResearchLogger.IS_LOGGING_EVERYTHING /* isIncludingPrivateData */);
+    protected final void onBufferFull() {
+        publishLogUnitsAtFrontOfBuffer();
+    }
+
+    protected final void publishLogUnitsAtFrontOfBuffer() {
+        ArrayList<LogUnit> logUnits = peekAtFirstNWords(N_GRAM_SIZE);
+        if (isSafeNGram(logUnits, N_GRAM_SIZE)) {
+            // Good n-gram at the front of the buffer.  Publish it, disclosing details.
+            publish(logUnits, true /* canIncludePrivateData */);
+            shiftOutWords(N_GRAM_SIZE);
+            resetWordCounter();
+        } else {
+            // No good n-gram at front, and buffer is full.  Shift out the first word (or if there
+            // is none, the existing logUnits).
+            logUnits = peekAtFirstNWords(1);
+            publish(logUnits, false /* canIncludePrivateData */);
+            shiftOutWords(1);
         }
-        if (logUnit.hasWord()) {
-            if (mNumWordsUntilSafeToSample > 0) {
-                mNumWordsUntilSafeToSample--;
-                Log.d(TAG, "wordsUntilSafeToSample now at " + mNumWordsUntilSafeToSample);
-            }
-        }
+    }
+
+    /**
+     * Called when a list of logUnits should be published.
+     *
+     * It is the subclass's responsibility to implement the publication.
+     *
+     * @param logUnits The list of logUnits to be published.
+     * @param canIncludePrivateData Whether the private data in the logUnits can be included in
+     * publication.
+     */
+    protected abstract void publish(final ArrayList<LogUnit> logUnits,
+            final boolean canIncludePrivateData);
+
+    @Override
+    protected void shiftOutWords(int numWords) {
+        int oldNumActualWords = getNumActualWords();
+        super.shiftOutWords(numWords);
+        int numWordsShifted = oldNumActualWords - getNumActualWords();
+        mNumWordsUntilSafeToSample -= numWordsShifted;
         if (DEBUG) {
-            Log.d(TAG, "shiftedOut " + (logUnit.hasWord() ? logUnit.getWord() : ""));
+            Log.d(TAG, "wordsUntilSafeToSample now at " + mNumWordsUntilSafeToSample);
         }
     }
 }
diff --git a/java/src/com/android/inputmethod/research/ResearchLog.java b/java/src/com/android/inputmethod/research/ResearchLog.java
index 5edb46e..64ee830 100644
--- a/java/src/com/android/inputmethod/research/ResearchLog.java
+++ b/java/src/com/android/inputmethod/research/ResearchLog.java
@@ -185,12 +185,12 @@
         mFlushFuture = mExecutor.schedule(mFlushCallable, FLUSH_DELAY_IN_MS, TimeUnit.MILLISECONDS);
     }
 
-    public synchronized void publish(final LogUnit logUnit, final boolean isIncludingPrivateData) {
+    public synchronized void publish(final LogUnit logUnit, final boolean canIncludePrivateData) {
         try {
             mExecutor.submit(new Callable<Object>() {
                 @Override
                 public Object call() throws Exception {
-                    logUnit.publishTo(ResearchLog.this, isIncludingPrivateData);
+                    logUnit.publishTo(ResearchLog.this, canIncludePrivateData);
                     scheduleFlush();
                     return null;
                 }
diff --git a/java/src/com/android/inputmethod/research/ResearchLogger.java b/java/src/com/android/inputmethod/research/ResearchLogger.java
index a46216c..cc57842 100644
--- a/java/src/com/android/inputmethod/research/ResearchLogger.java
+++ b/java/src/com/android/inputmethod/research/ResearchLogger.java
@@ -69,7 +69,9 @@
 
 import java.io.File;
 import java.text.SimpleDateFormat;
+import java.util.ArrayList;
 import java.util.Date;
+import java.util.List;
 import java.util.Locale;
 import java.util.UUID;
 
@@ -84,9 +86,6 @@
 public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChangeListener {
     private static final String TAG = ResearchLogger.class.getSimpleName();
     private static final boolean DEBUG = false && ProductionFlag.IS_EXPERIMENTAL_DEBUG;
-    // Whether all n-grams should be logged.  true will disclose private info.
-    public static final boolean IS_LOGGING_EVERYTHING = false
-            && ProductionFlag.IS_EXPERIMENTAL_DEBUG;
     // Whether the TextView contents are logged at the end of the session.  true will disclose
     // private info.
     private static final boolean LOG_FULL_TEXTVIEW_CONTENTS = false
@@ -105,7 +104,8 @@
     private static final boolean IS_SHOWING_INDICATOR = true;
     // Change the default indicator to something very visible.  Currently two red vertical bars on
     // either side of they keyboard.
-    private static final boolean IS_SHOWING_INDICATOR_CLEARLY = false || IS_LOGGING_EVERYTHING;
+    private static final boolean IS_SHOWING_INDICATOR_CLEARLY = false ||
+            (MainLogBuffer.IS_LOGGING_EVERYTHING && ProductionFlag.IS_EXPERIMENTAL_DEBUG);
     // FEEDBACK_WORD_BUFFER_SIZE should add 1 because it must also hold the feedback LogUnit itself.
     public static final int FEEDBACK_WORD_BUFFER_SIZE = (Integer.MAX_VALUE - 1) + 1;
 
@@ -387,15 +387,41 @@
         }
         if (mMainLogBuffer == null) {
             mMainResearchLog = new ResearchLog(createLogFile(mFilesDir), mLatinIME);
-            mMainLogBuffer = new MainLogBuffer(mMainResearchLog);
+            mMainLogBuffer = new MainLogBuffer() {
+                @Override
+                protected void publish(final ArrayList<LogUnit> logUnits,
+                        boolean canIncludePrivateData) {
+                    canIncludePrivateData |= MainLogBuffer.IS_LOGGING_EVERYTHING;
+                    final int length = logUnits.size();
+                    for (int i = 0; i < length; i++) {
+                        final LogUnit logUnit = logUnits.get(i);
+                        final String word = logUnit.getWord();
+                        if (word != null && word.length() > 0 && hasLetters(word)) {
+                            Log.d(TAG, "onPublish: " + word + ", hc: "
+                                    + logUnit.containsCorrection());
+                            final Dictionary dictionary = getDictionary();
+                            mStatistics.recordWordEntered(
+                                    dictionary != null && dictionary.isValidWord(word),
+                                    logUnit.containsCorrection());
+                        }
+                    }
+                    if (mMainResearchLog != null) {
+                        publishLogUnits(logUnits, mMainResearchLog, canIncludePrivateData);
+                    }
+                }
+            };
             mMainLogBuffer.setSuggest(mSuggest);
         }
         if (mFeedbackLogBuffer == null) {
-            mFeedbackLog = new ResearchLog(createLogFile(mFilesDir), mLatinIME);
-            mFeedbackLogBuffer = new FixedLogBuffer(FEEDBACK_WORD_BUFFER_SIZE);
+            resetFeedbackLogging();
         }
     }
 
+    private void resetFeedbackLogging() {
+        mFeedbackLog = new ResearchLog(createLogFile(mFilesDir), mLatinIME);
+        mFeedbackLogBuffer = new FixedLogBuffer(FEEDBACK_WORD_BUFFER_SIZE);
+    }
+
     /* package */ void stop() {
         if (DEBUG) {
             Log.d(TAG, "stop called");
@@ -404,16 +430,11 @@
         commitCurrentLogUnit();
 
         if (mMainLogBuffer != null) {
-            while (!mMainLogBuffer.isEmpty()) {
-                if ((mMainLogBuffer.isNGramSafe() || IS_LOGGING_EVERYTHING) &&
-                        mMainResearchLog != null) {
-                    publishLogBuffer(mMainLogBuffer, mMainResearchLog,
-                            true /* isIncludingPrivateData */);
-                    mMainLogBuffer.resetWordCounter();
-                } else {
-                    mMainLogBuffer.shiftOutThroughFirstWord();
-                }
-            }
+            mMainLogBuffer.shiftAndPublishAll();
+            logStatistics();
+            commitCurrentLogUnit();
+            mMainLogBuffer.setIsStopping();
+            mMainLogBuffer.shiftAndPublishAll();
             mMainResearchLog.close(null /* callback */);
             mMainLogBuffer = null;
         }
@@ -731,13 +752,6 @@
         }
         if (!mCurrentLogUnit.isEmpty()) {
             if (mMainLogBuffer != null) {
-                if ((mMainLogBuffer.isNGramSafe() || IS_LOGGING_EVERYTHING) &&
-                        mMainLogBuffer.isNGramComplete() &&
-                        mMainResearchLog != null) {
-                    publishLogBuffer(mMainLogBuffer, mMainResearchLog,
-                            true /* isIncludingPrivateData */);
-                    mMainLogBuffer.resetWordCounter();
-                }
                 mMainLogBuffer.shiftIn(mCurrentLogUnit);
             }
             if (mFeedbackLogBuffer != null) {
@@ -798,33 +812,39 @@
         }
     }
 
+    /* package for test */ void publishLogBuffer(final LogBuffer logBuffer,
+            final ResearchLog researchLog, final boolean isIncludingPrivateData) {
+        publishLogUnits(logBuffer.getLogUnits(), researchLog, isIncludingPrivateData);
+    }
+
     private static final LogStatement LOGSTATEMENT_LOG_SEGMENT_OPENING =
             new LogStatement("logSegmentStart", false, false, "isIncludingPrivateData");
     private static final LogStatement LOGSTATEMENT_LOG_SEGMENT_CLOSING =
             new LogStatement("logSegmentEnd", false, false);
-    /* package for test */ void publishLogBuffer(final LogBuffer logBuffer,
-            final ResearchLog researchLog, final boolean isIncludingPrivateData) {
+    /* package for test */ void publishLogUnits(final List<LogUnit> logUnits,
+            final ResearchLog researchLog, final boolean canIncludePrivateData) {
         final LogUnit openingLogUnit = new LogUnit();
-        if (logBuffer.isEmpty()) return;
-        openingLogUnit.addLogStatement(LOGSTATEMENT_LOG_SEGMENT_OPENING, SystemClock.uptimeMillis(),
-                isIncludingPrivateData);
-        researchLog.publish(openingLogUnit, true /* isIncludingPrivateData */);
-        LogUnit logUnit;
-        int numWordsToPublish = MainLogBuffer.N_GRAM_SIZE;
-        while ((logUnit = logBuffer.shiftOut()) != null && numWordsToPublish > 0) {
+        if (logUnits.isEmpty()) return;
+        // LogUnits not containing private data, such as contextual data for the log, do not require
+        // logSegment boundary statements.
+        if (canIncludePrivateData) {
+            openingLogUnit.addLogStatement(LOGSTATEMENT_LOG_SEGMENT_OPENING,
+                    SystemClock.uptimeMillis(), canIncludePrivateData);
+            researchLog.publish(openingLogUnit, true /* isIncludingPrivateData */);
+        }
+        for (LogUnit logUnit : logUnits) {
             if (DEBUG) {
                 Log.d(TAG, "publishLogBuffer: " + (logUnit.hasWord() ? logUnit.getWord()
-                        : "<wordless>"));
+                        : "<wordless>") + ", correction?: " + logUnit.containsCorrection());
             }
-            researchLog.publish(logUnit, isIncludingPrivateData);
-            if (logUnit.getWord() != null) {
-                numWordsToPublish--;
-            }
+            researchLog.publish(logUnit, canIncludePrivateData);
         }
-        final LogUnit closingLogUnit = new LogUnit();
-        closingLogUnit.addLogStatement(LOGSTATEMENT_LOG_SEGMENT_CLOSING,
-                SystemClock.uptimeMillis());
-        researchLog.publish(closingLogUnit, true /* isIncludingPrivateData */);
+        if (canIncludePrivateData) {
+            final LogUnit closingLogUnit = new LogUnit();
+            closingLogUnit.addLogStatement(LOGSTATEMENT_LOG_SEGMENT_CLOSING,
+                    SystemClock.uptimeMillis());
+            researchLog.publish(closingLogUnit, true /* isIncludingPrivateData */);
+        }
     }
 
     public static boolean hasLetters(final String word) {
@@ -849,12 +869,8 @@
         if (word == null) {
             return;
         }
-        final Dictionary dictionary = getDictionary();
         if (word.length() > 0 && hasLetters(word)) {
             mCurrentLogUnit.setWord(word);
-            final boolean isDictionaryWord = dictionary != null
-                    && dictionary.isValidWord(word);
-            mStatistics.recordWordEntered(isDictionaryWord, mCurrentLogUnit.containsCorrection());
         }
         final LogUnit newLogUnit = mCurrentLogUnit.splitByTime(maxTime);
         enqueueCommitText(word, isBatchMode);
@@ -967,7 +983,7 @@
                         Integer.toHexString(editorInfo.inputType),
                         Integer.toHexString(editorInfo.imeOptions), editorInfo.fieldId,
                         Build.DISPLAY, Build.MODEL, prefs, versionCode, versionName,
-                        OUTPUT_FORMAT_VERSION, IS_LOGGING_EVERYTHING,
+                        OUTPUT_FORMAT_VERSION, MainLogBuffer.IS_LOGGING_EVERYTHING,
                         ProductionFlag.IS_EXPERIMENTAL_DEBUG);
             } catch (NameNotFoundException e) {
                 e.printStackTrace();
@@ -976,7 +992,6 @@
     }
 
     public void latinIME_onFinishInputViewInternal() {
-        logStatistics();
         stop();
     }
 
@@ -1524,6 +1539,7 @@
     public static void richInputConnection_commitText(final String committedWord,
             final int newCursorPosition, final boolean isBatchMode) {
         final ResearchLogger researchLogger = getInstance();
+        // Only include opening and closing logSegments if private data is included
         final String scrubbedWord = scrubDigitsFromString(committedWord);
         if (!researchLogger.isExpectingCommitText) {
             researchLogger.enqueueEvent(LOGSTATEMENT_RICHINPUTCONNECTIONCOMMITTEXT,