Optimize the store of bigram list

Bug: 4192129

Change-Id: Idcc62e4f9696b56b1d7013891b2da37b1784423e
diff --git a/java/src/com/android/inputmethod/latin/UserHistoryDictionary.java b/java/src/com/android/inputmethod/latin/UserHistoryDictionary.java
index fa3d1be..d5163f2 100644
--- a/java/src/com/android/inputmethod/latin/UserHistoryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/UserHistoryDictionary.java
@@ -30,8 +30,6 @@
 import com.android.inputmethod.latin.UserHistoryForgettingCurveUtils.ForgettingCurveParams;
 
 import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
 
 /**
  * Locally gathers stats about the words user types and various other signals like auto-correction
@@ -39,6 +37,7 @@
  */
 public class UserHistoryDictionary extends ExpandableDictionary {
     private static final String TAG = "UserHistoryDictionary";
+    public static final boolean DBG_SAVE_RESTORE = false;
 
     /** Any pair being typed or picked */
     private static final int FREQUENCY_FOR_TYPED = 2;
@@ -78,7 +77,8 @@
     /** Locale for which this auto dictionary is storing words */
     private String mLocale;
 
-    private HashSet<Bigram> mPendingWrites = new HashSet<Bigram>();
+    private UserHistoryDictionaryBigramList mBigramList =
+            new UserHistoryDictionaryBigramList();
     private final Object mPendingWritesLock = new Object();
     private static volatile boolean sUpdatingDB = false;
     private final SharedPreferences mPrefs;
@@ -99,35 +99,6 @@
 
     private static DatabaseHelper sOpenHelper = null;
 
-    private static class Bigram {
-        public final String mWord1;
-        public final String mWord2;
-
-        Bigram(String word1, String word2) {
-            this.mWord1 = word1;
-            this.mWord2 = word2;
-        }
-
-        @Override
-        public boolean equals(Object bigram) {
-            if (!(bigram instanceof Bigram)) {
-                return false;
-            }
-            final Bigram bigram2 = (Bigram) bigram;
-            final boolean eq1 =
-                    mWord1 == null ? bigram2.mWord1 == null : mWord1.equals(bigram2.mWord1);
-            if (!eq1) {
-                return false;
-            }
-            return mWord2 == null ? bigram2.mWord2 == null : mWord2.equals(bigram2.mWord2);
-        }
-
-        @Override
-        public int hashCode() {
-            return (mWord1 + " " + mWord2).hashCode();
-        }
-    }
-
     public void setDatabaseMax(int maxHistoryBigram) {
         sMaxHistoryBigrams = maxHistoryBigram;
     }
@@ -190,20 +161,17 @@
             freq = super.setBigramAndGetFrequency(word1, word2, new ForgettingCurveParams());
         }
         synchronized (mPendingWritesLock) {
-            final Bigram bi = new Bigram(word1, word2);
-            if (!mPendingWrites.contains(bi)) {
-                mPendingWrites.add(bi);
-            }
+            mBigramList.addBigram(word1, word2);
         }
 
         return freq;
     }
 
     public boolean cancelAddingUserHistory(String word1, String word2) {
-        final Bigram bi = new Bigram(word1, word2);
-        if (mPendingWrites.contains(bi)) {
-            mPendingWrites.remove(bi);
-            return super.removeBigram(word1, word2);
+        synchronized (mPendingWritesLock) {
+            if (mBigramList.removeBigram(word1, word2)) {
+                return super.removeBigram(word1, word2);
+            }
         }
         return false;
     }
@@ -214,11 +182,11 @@
     private void flushPendingWrites() {
         synchronized (mPendingWritesLock) {
             // Nothing pending? Return
-            if (mPendingWrites.isEmpty()) return;
+            if (mBigramList.isEmpty()) return;
             // Create a background thread to write the pending entries
-            new UpdateDbTask(sOpenHelper, mPendingWrites, mLocale, this).execute();
+            new UpdateDbTask(sOpenHelper, mBigramList, mLocale, this).execute();
             // Create a new map for writing new entries into while the old one is written to db
-            mPendingWrites = new HashSet<Bigram>();
+            mBigramList = new UserHistoryDictionaryBigramList();
         }
     }
 
@@ -251,6 +219,9 @@
                     final String word1 = cursor.getString(word1Index);
                     final String word2 = cursor.getString(word2Index);
                     final int frequency = cursor.getInt(frequencyIndex);
+                    if (DBG_SAVE_RESTORE) {
+                        Log.d(TAG, "--- Load user history: " + word1 + ", " + word2);
+                    }
                     // Safeguard against adding really long words. Stack may overflow due
                     // to recursive lookup
                     if (null == word1) {
@@ -259,8 +230,9 @@
                             && word2.length() < BinaryDictionary.MAX_WORD_LENGTH) {
                         super.setBigramAndGetFrequency(
                                 word1, word2, new ForgettingCurveParams(frequency, now, last));
-                        // TODO: optimize
-                        mPendingWrites.add(new Bigram(word1, word2));
+                    }
+                    synchronized(mPendingWritesLock) {
+                        mBigramList.addBigram(word1, word2);
                     }
                     cursor.moveToNext();
                 }
@@ -339,14 +311,15 @@
      * the in-memory trie.
      */
     private static class UpdateDbTask extends AsyncTask<Void, Void, Void> {
-        private final HashSet<Bigram> mMap;
+        private final UserHistoryDictionaryBigramList mBigramList;
         private final DatabaseHelper mDbHelper;
         private final String mLocale;
         private final UserHistoryDictionary mUserHistoryDictionary;
 
-        public UpdateDbTask(DatabaseHelper openHelper, HashSet<Bigram> pendingWrites,
+        public UpdateDbTask(
+                DatabaseHelper openHelper, UserHistoryDictionaryBigramList pendingWrites,
                 String locale, UserHistoryDictionary dict) {
-            mMap = pendingWrites;
+            mBigramList = pendingWrites;
             mLocale = locale;
             mDbHelper = openHelper;
             mUserHistoryDictionary = dict;
@@ -401,67 +374,71 @@
                 return null;
             }
             db.execSQL("PRAGMA foreign_keys = ON;");
+            final boolean addLevel0Bigram = mBigramList.size() <= sMaxHistoryBigrams;
+
             // Write all the entries to the db
-            final Iterator<Bigram> iterator = mMap.iterator();
-            while (iterator.hasNext()) {
-                // TODO: this process of making a text search for each pair each time
-                // is terribly inefficient. Optimize this.
-                final Bigram bi = iterator.next();
+            for (String word1 : mBigramList.keySet()) {
+                for (String word2 : mBigramList.getBigrams(word1)) {
+                    // TODO: this process of making a text search for each pair each time
+                    // is terribly inefficient. Optimize this.
+                    // find pair id
+                    Cursor c = null;
+                    try {
+                        if (null != word1) {
+                            c = db.query(MAIN_TABLE_NAME, new String[] { MAIN_COLUMN_ID },
+                                    MAIN_COLUMN_WORD1 + "=? AND " + MAIN_COLUMN_WORD2 + "=? AND "
+                                            + MAIN_COLUMN_LOCALE + "=?",
+                                            new String[] { word1, word2, mLocale }, null, null,
+                                            null);
+                        } else {
+                            c = db.query(MAIN_TABLE_NAME, new String[] { MAIN_COLUMN_ID },
+                                    MAIN_COLUMN_WORD1 + " IS NULL AND " + MAIN_COLUMN_WORD2
+                                            + "=? AND " + MAIN_COLUMN_LOCALE + "=?",
+                                            new String[] { word2, mLocale }, null, null, null);
+                        }
 
-                // find pair id
-                Cursor c = null;
-                try {
-                    if (null != bi.mWord1) {
-                        c = db.query(MAIN_TABLE_NAME, new String[] { MAIN_COLUMN_ID },
-                                MAIN_COLUMN_WORD1 + "=? AND " + MAIN_COLUMN_WORD2 + "=? AND "
-                                        + MAIN_COLUMN_LOCALE + "=?",
-                                        new String[] { bi.mWord1, bi.mWord2, mLocale }, null, null,
-                                        null);
-                    } else {
-                        c = db.query(MAIN_TABLE_NAME, new String[] { MAIN_COLUMN_ID },
-                                MAIN_COLUMN_WORD1 + " IS NULL AND " + MAIN_COLUMN_WORD2 + "=? AND "
-                                        + MAIN_COLUMN_LOCALE + "=?",
-                                        new String[] { bi.mWord2, mLocale }, null, null, null);
-                    }
-
-                    final int pairId;
-                    if (c.moveToFirst()) {
-                        // existing pair
-                        pairId = c.getInt(c.getColumnIndex(MAIN_COLUMN_ID));
-                        db.delete(FREQ_TABLE_NAME, FREQ_COLUMN_PAIR_ID + "=?",
-                                new String[] { Integer.toString(pairId) });
-                    } else {
-                        // new pair
-                        Long pairIdLong = db.insert(MAIN_TABLE_NAME, null,
-                                getContentValues(bi.mWord1, bi.mWord2, mLocale));
-                        pairId = pairIdLong.intValue();
-                    }
-                    // insert new frequency
-                    final int freq;
-                    if (bi.mWord1 == null) {
-                        freq = FREQUENCY_FOR_TYPED;
-                    } else {
-                        final NextWord nw = mUserHistoryDictionary.getBigramWord(
-                                bi.mWord1, bi.mWord2);
-                        if (nw != null) {
-                            final int tempFreq = nw.getFcValue();
-                            // TODO: Check whether the word is valid or not
-                            if (UserHistoryForgettingCurveUtils.needsToSave(
-                                    (byte)tempFreq, false)) {
-                                freq = tempFreq;
+                        final int pairId;
+                        if (c.moveToFirst()) {
+                            // existing pair
+                            pairId = c.getInt(c.getColumnIndex(MAIN_COLUMN_ID));
+                            db.delete(FREQ_TABLE_NAME, FREQ_COLUMN_PAIR_ID + "=?",
+                                    new String[] { Integer.toString(pairId) });
+                        } else {
+                            // new pair
+                            Long pairIdLong = db.insert(MAIN_TABLE_NAME, null,
+                                    getContentValues(word1, word2, mLocale));
+                            pairId = pairIdLong.intValue();
+                        }
+                        // insert new frequency
+                        final int freq;
+                        if (word1 == null) {
+                            freq = FREQUENCY_FOR_TYPED;
+                        } else {
+                            final NextWord nw = mUserHistoryDictionary.getBigramWord(word1, word2);
+                            if (nw != null) {
+                                final int tempFreq = nw.getFcValue();
+                                // TODO: Check whether the word is valid or not
+                                if (UserHistoryForgettingCurveUtils.needsToSave(
+                                        (byte)tempFreq, false, addLevel0Bigram)) {
+                                    freq = tempFreq;
+                                } else {
+                                    freq = -1;
+                                }
                             } else {
                                 freq = -1;
                             }
-                        } else {
-                            freq = -1;
                         }
-                    }
-                    if (freq > 0) {
-                        db.insert(FREQ_TABLE_NAME, null, getFrequencyContentValues(pairId, freq));
-                    }
-                } finally {
-                    if (c != null) {
-                        c.close();
+                        if (freq > 0) {
+                            if (DBG_SAVE_RESTORE) {
+                                Log.d(TAG, "--- Save user history: " + word1 + ", " + word2);
+                            }
+                            db.insert(FREQ_TABLE_NAME, null,
+                                    getFrequencyContentValues(pairId, freq));
+                        }
+                    } finally {
+                        if (c != null) {
+                            c.close();
+                        }
                     }
                 }
             }
diff --git a/java/src/com/android/inputmethod/latin/UserHistoryDictionaryBigramList.java b/java/src/com/android/inputmethod/latin/UserHistoryDictionaryBigramList.java
new file mode 100644
index 0000000..409f921
--- /dev/null
+++ b/java/src/com/android/inputmethod/latin/UserHistoryDictionaryBigramList.java
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.inputmethod.latin;
+
+import android.util.Log;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * A store of bigrams which will be updated when the user history dictionary is closed
+ * All bigrams including stale ones in SQL DB should be stored in this class to avoid adding stale
+ * bigrams when we write to the SQL DB.
+ */
+public class UserHistoryDictionaryBigramList {
+    private static final String TAG = UserHistoryDictionaryBigramList.class.getSimpleName();
+    private static final HashSet<String> EMPTY_STRING_SET = new HashSet<String>();
+    private final HashMap<String, HashSet<String>> mBigramMap =
+            new HashMap<String, HashSet<String>>();
+    private int mSize = 0;
+
+    public void evictAll() {
+        mSize = 0;
+        mBigramMap.clear();
+    }
+
+    public void addBigram(String word1, String word2) {
+        if (UserHistoryDictionary.DBG_SAVE_RESTORE) {
+            Log.d(TAG, "--- add bigram: " + word1 + ", " + word2);
+        }
+        final HashSet<String> set;
+        if (mBigramMap.containsKey(word1)) {
+            set = mBigramMap.get(word1);
+        } else {
+            set = new HashSet<String>();
+            mBigramMap.put(word1, set);
+        }
+        if (!set.contains(word2)) {
+            ++mSize;
+            set.add(word2);
+        }
+    }
+
+    public int size() {
+        return mSize;
+    }
+
+    public boolean isEmpty() {
+        return mBigramMap.isEmpty();
+    }
+
+    public Set<String> keySet() {
+        return mBigramMap.keySet();
+    }
+
+    public HashSet<String> getBigrams(String word1) {
+        if (!mBigramMap.containsKey(word1)) {
+            return EMPTY_STRING_SET;
+        } else {
+            return mBigramMap.get(word1);
+        }
+    }
+
+    public boolean removeBigram(String word1, String word2) {
+        final HashSet<String> set = getBigrams(word1);
+        if (set.isEmpty()) {
+            return false;
+        }
+        if (set.contains(word2)) {
+            set.remove(word2);
+            --mSize;
+            return true;
+        }
+        return false;
+    }
+}
diff --git a/java/src/com/android/inputmethod/latin/UserHistoryForgettingCurveUtils.java b/java/src/com/android/inputmethod/latin/UserHistoryForgettingCurveUtils.java
index f30fee2..feb1d00 100644
--- a/java/src/com/android/inputmethod/latin/UserHistoryForgettingCurveUtils.java
+++ b/java/src/com/android/inputmethod/latin/UserHistoryForgettingCurveUtils.java
@@ -162,10 +162,15 @@
 
     // TODO: isValid should be false for a word whose frequency is 0,
     // or that is not in the dictionary.
-    public static boolean needsToSave(byte fc, boolean isValid) {
+    /**
+     * Check wheather we should save the bigram to the SQL DB or not
+     */
+    public static boolean needsToSave(byte fc, boolean isValid, boolean addLevel0Bigram) {
         int level = fcToLevel(fc);
-        if (isValid && level == 0) {
-            return false;
+        if (level == 0) {
+            if (isValid || !addLevel0Bigram) {
+                return false;
+            }
         }
         final int elapsedTime = fcToElapsedTime(fc);
         return (elapsedTime < ELAPSED_TIME_MAX - 1 || level > 0);