Contacts dictionary rebuilds only when contact names have changed.

Bug: 6396600
Change-Id: Iad693ec4bab6351793d624e5c5b0a9f5c12a60e3
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
index a644ec0..cc20f42 100644
--- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
@@ -17,6 +17,7 @@
 package com.android.inputmethod.latin;
 
 import android.content.Context;
+import android.text.TextUtils;
 
 import com.android.inputmethod.keyboard.ProximityInfo;
 
@@ -84,6 +85,7 @@
             int typedLetterMultiplier, int fullWordMultiplier, int maxWordLength, int maxWords);
     private native void closeNative(long dict);
     private native boolean isValidWordNative(long dict, int[] word, int wordLength);
+    private native boolean isValidBigramNative(long dict, int[] word1, int[] word2);
     private native int getSuggestionsNative(long dict, long proximityInfo, int[] xCoordinates,
             int[] yCoordinates, int[] inputCodes, int codesSize, int[] prevWordForBigrams,
             boolean useFullEditDistance, char[] outputChars, int[] scores);
@@ -204,6 +206,15 @@
         return isValidWordNative(mNativeDict, chars, chars.length);
     }
 
+    // TODO: Add a batch process version (isValidBigramMultiple?) to avoid excessive numbers of jni
+    // calls when checking for changes in an entire dictionary.
+    public boolean isValidBigram(CharSequence word1, CharSequence word2) {
+        if (TextUtils.isEmpty(word1) || TextUtils.isEmpty(word2)) return false;
+        int[] chars1 = StringUtils.toCodePointArray(word1.toString());
+        int[] chars2 = StringUtils.toCodePointArray(word2.toString());
+        return isValidBigramNative(mNativeDict, chars1, chars2);
+    }
+
     @Override
     public synchronized void close() {
         closeInternal();
diff --git a/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java
index 65f97e9..22787c2 100644
--- a/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java
@@ -18,6 +18,7 @@
 import android.content.Context;
 import android.database.ContentObserver;
 import android.database.Cursor;
+import android.os.SystemClock;
 import android.provider.BaseColumns;
 import android.provider.ContactsContract.Contacts;
 import android.text.TextUtils;
@@ -30,18 +31,27 @@
 public class ContactsBinaryDictionary extends ExpandableBinaryDictionary {
 
     private static final String[] PROJECTION = {BaseColumns._ID, Contacts.DISPLAY_NAME,};
+    private static final String[] PROJECTION_ID_ONLY = {BaseColumns._ID};
 
     private static final String TAG = ContactsBinaryDictionary.class.getSimpleName();
     private static final String NAME = "contacts";
 
+    private static boolean DEBUG = false;
+
     /**
      * Frequency for contacts information into the dictionary
      */
     private static final int FREQUENCY_FOR_CONTACTS = 40;
     private static final int FREQUENCY_FOR_CONTACTS_BIGRAM = 90;
 
+    /** The maximum number of contacts that this dictionary supports. */
+    private static final int MAX_CONTACT_COUNT = 10000;
+
     private static final int INDEX_NAME = 1;
 
+    /** The number of contacts in the most recent dictionary rebuild. */
+    static private int sContactCountAtLastRebuild = 0;
+
     private ContentObserver mObserver;
 
     /**
@@ -98,6 +108,7 @@
             if (cursor != null) {
                 try {
                     if (cursor.moveToFirst()) {
+                        sContactCountAtLastRebuild = getContactCount();
                         addWords(cursor);
                     }
                 } finally {
@@ -125,15 +136,28 @@
 
     private void addWords(Cursor cursor) {
         clearFusionDictionary();
-        while (!cursor.isAfterLast()) {
+        int count = 0;
+        while (!cursor.isAfterLast() && count < MAX_CONTACT_COUNT) {
             String name = cursor.getString(INDEX_NAME);
-            if (name != null && -1 == name.indexOf('@')) {
+            if (isValidName(name)) {
                 addName(name);
+                ++count;
             }
             cursor.moveToNext();
         }
     }
 
+    private int getContactCount() {
+        // TODO: consider switching to a rawQuery("select count(*)...") on the database if
+        // performance is a bottleneck.
+        final Cursor cursor = mContext.getContentResolver().query(
+                Contacts.CONTENT_URI, PROJECTION_ID_ONLY, null, null, null);
+        if (cursor != null) {
+            return cursor.getCount();
+        }
+        return 0;
+    }
+
     /**
      * Adds the words in a name (e.g., firstname/lastname) to the binary dictionary along with their
      * bigrams depending on locale.
@@ -144,16 +168,9 @@
         // TODO: Better tokenization for non-Latin writing systems
         for (int i = 0; i < len; i++) {
             if (Character.isLetter(name.codePointAt(i))) {
-                int j;
-                for (j = i + 1; j < len; j++) {
-                    final int codePoint = name.codePointAt(j);
-                    if (!(codePoint == Keyboard.CODE_DASH || codePoint == Keyboard.CODE_SINGLE_QUOTE
-                            || Character.isLetter(codePoint))) {
-                        break;
-                    }
-                }
-                String word = name.substring(i, j);
-                i = j - 1;
+                int end = getWordEndPosition(name, len, i);
+                String word = name.substring(i, end);
+                i = end - 1;
                 // Don't add single letter words, possibly confuses
                 // capitalization of i.
                 final int wordLen = word.codePointCount(0, word.length());
@@ -169,4 +186,100 @@
             }
         }
     }
+
+    /**
+     * Returns the index of the last letter in the word, starting from position startIndex.
+     */
+    private static int getWordEndPosition(String string, int len, int startIndex) {
+        int end;
+        int cp = 0;
+        for (end = startIndex + 1; end < len; end += Character.charCount(cp)) {
+            cp = string.codePointAt(end);
+            if (!(cp == Keyboard.CODE_DASH || cp == Keyboard.CODE_SINGLE_QUOTE
+                    || Character.isLetter(cp))) {
+                break;
+            }
+        }
+        return end;
+    }
+
+    @Override
+    protected boolean hasContentChanged() {
+        final long startTime = SystemClock.uptimeMillis();
+        final int contactCount = getContactCount();
+        if (contactCount > MAX_CONTACT_COUNT) {
+            // If there are too many contacts then return false. In this rare case it is impossible
+            // to include all of them anyways and the cost of rebuilding the dictionary is too high.
+            // TODO: Sort and check only the MAX_CONTACT_COUNT most recent contacts?
+            return false;
+        }
+        if (contactCount != sContactCountAtLastRebuild) {
+            return true;
+        }
+        // Check all contacts since it's not possible to find out which names have changed.
+        // This is needed because it's possible to receive extraneous onChange events even when no
+        // name has changed.
+        Cursor cursor = mContext.getContentResolver().query(
+                Contacts.CONTENT_URI, PROJECTION, null, null, null);
+        if (cursor != null) {
+            try {
+                if (cursor.moveToFirst()) {
+                    while (!cursor.isAfterLast()) {
+                        String name = cursor.getString(INDEX_NAME);
+                        if (isValidName(name) && !isNameInDictionary(name)) {
+                            if (DEBUG) {
+                                Log.d(TAG, "Contact name missing: " + name + " (runtime = "
+                                        + (SystemClock.uptimeMillis() - startTime) + " ms)");
+                            }
+                            return true;
+                        }
+                        cursor.moveToNext();
+                    }
+                }
+            } finally {
+                cursor.close();
+            }
+        }
+        if (DEBUG) {
+            Log.d(TAG, "No contacts changed. (runtime = " + (SystemClock.uptimeMillis() - startTime)
+                    + " ms)");
+        }
+        return false;
+    }
+
+    private static boolean isValidName(String name) {
+        if (name != null && -1 == name.indexOf('@')) {
+            return true;
+        }
+        return false;
+    }
+
+    /**
+     * Checks if the words in a name are in the current binary dictionary.
+     */
+    private boolean isNameInDictionary(String name) {
+        int len = name.codePointCount(0, name.length());
+        String prevWord = null;
+        for (int i = 0; i < len; i++) {
+            if (Character.isLetter(name.codePointAt(i))) {
+                int end = getWordEndPosition(name, len, i);
+                String word = name.substring(i, end);
+                i = end - 1;
+                final int wordLen = word.codePointCount(0, word.length());
+                if (wordLen < MAX_WORD_LENGTH && wordLen > 1) {
+                    if (!TextUtils.isEmpty(prevWord) && mUseFirstLastBigrams) {
+                        if (!super.isValidBigramLocked(prevWord, word)) {
+                            return false;
+                        }
+                    } else {
+                        if (!super.isValidWordLocked(word)) {
+                            return false;
+                        }
+                    }
+                    prevWord = word;
+                }
+            }
+        }
+        return true;
+    }
 }
diff --git a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
index 3d89226..22d8f24 100644
--- a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
@@ -96,6 +96,13 @@
     protected abstract void loadDictionaryAsync();
 
     /**
+     * Indicates that the source dictionary content has changed and a rebuild of the binary file is
+     * required. If it returns false, the next reload will only read the current binary dictionary
+     * from file. Note that the shared binary dictionary is locked when this is called.
+     */
+    protected abstract boolean hasContentChanged();
+
+    /**
      * Gets the shared dictionary controller for the given filename.
      */
     private static synchronized DictionaryController getSharedDictionaryController(
@@ -148,8 +155,9 @@
      * the native side.
      */
     public void clearFusionDictionary() {
-        mFusionDictionary = new FusionDictionary(new Node(), new FusionDictionary.DictionaryOptions(
-                new HashMap<String, String>(), false, false));
+        mFusionDictionary = new FusionDictionary(new Node(),
+                new FusionDictionary.DictionaryOptions(new HashMap<String, String>(), false, 
+                        false));
     }
 
     /**
@@ -224,9 +232,7 @@
     protected boolean isValidWordInner(final CharSequence word) {
         if (mLocalDictionaryController.tryLock()) {
             try {
-                if (mBinaryDictionary != null) {
-                    return mBinaryDictionary.isValidWord(word);
-                }
+                return isValidWordLocked(word);
             } finally {
                 mLocalDictionaryController.unlock();
             }
@@ -234,6 +240,32 @@
         return false;
     }
 
+    protected boolean isValidWordLocked(final CharSequence word) {
+        if (mBinaryDictionary == null) return false;
+        return mBinaryDictionary.isValidWord(word);
+    }
+
+    protected boolean isValidBigram(final CharSequence word1, final CharSequence word2) {
+        if (mBinaryDictionary == null) return false;
+        return mBinaryDictionary.isValidBigram(word1, word2);
+    }
+
+    protected boolean isValidBigramInner(final CharSequence word1, final CharSequence word2) {
+        if (mLocalDictionaryController.tryLock()) {
+            try {
+                return isValidBigramLocked(word1, word2);
+            } finally {
+                mLocalDictionaryController.unlock();
+            }
+        }
+        return false;
+    }
+
+    protected boolean isValidBigramLocked(final CharSequence word1, final CharSequence word2) {
+        if (mBinaryDictionary == null) return false;
+        return mBinaryDictionary.isValidBigram(word1, word2);
+    }
+
     /**
      * Load the current binary dictionary from internal storage in a background thread. If no binary
      * dictionary exists, this method will generate one.
@@ -315,12 +347,16 @@
     }
 
     /**
-     * Sets whether or not the dictionary is out of date and requires a reload.
+     * Marks that the dictionary is out of date and requires a reload.
+     *
+     * @param requiresRebuild Indicates that the source dictionary content has changed and a rebuild
+     *        of the binary file is required. If not true, the next reload process will only read
+     *        the current binary dictionary from file.
      */
-    protected void setRequiresReload(final boolean reload) {
-        final long time = reload ? SystemClock.uptimeMillis() : 0;
-        mSharedDictionaryController.mLastUpdateRequestTime = time;
+    protected void setRequiresReload(final boolean requiresRebuild) {
+        final long time = SystemClock.uptimeMillis();
         mLocalDictionaryController.mLastUpdateRequestTime = time;
+        mSharedDictionaryController.mLastUpdateRequestTime = time;
         if (DEBUG) {
             Log.d(TAG, "Reload request: request=" + time + " update="
                     + mSharedDictionaryController.mLastUpdateTime);
@@ -351,21 +387,30 @@
             if (mSharedDictionaryController.isOutOfDate() || !dictionaryFileExists()) {
                 // If the shared dictionary file does not exist or is out of date, the first
                 // instance that acquires the lock will generate a new one.
-                mSharedDictionaryController.mLastUpdateTime = time;
-                mLocalDictionaryController.mLastUpdateTime = time;
-                generateBinaryDictionary();
-                loadBinaryDictionary();
-            } else if (mLocalDictionaryController.isOutOfDate()) {
-                // Otherwise, if only the local dictionary for this instance is out of date, load
-                // the shared dictionary from file.
-                mLocalDictionaryController.mLastUpdateTime = time;
+                if (hasContentChanged()) {
+                    // If the source content has changed, rebuild the binary dictionary.
+                    mSharedDictionaryController.mLastUpdateTime = time;
+                    generateBinaryDictionary();
+                    loadBinaryDictionary();
+                } else {
+                    // If not, the reload request was unnecessary so revert LastUpdateRequestTime
+                    // to LastUpdateTime.
+                    mSharedDictionaryController.mLastUpdateRequestTime =
+                            mSharedDictionaryController.mLastUpdateTime;
+                }
+            } else if (mBinaryDictionary == null || mLocalDictionaryController.mLastUpdateTime
+                    < mSharedDictionaryController.mLastUpdateTime) {
+                // Otherwise, if the local dictionary is older than the shared dictionary, load the
+                // shared dictionary.
                 loadBinaryDictionary();
             }
+            mLocalDictionaryController.mLastUpdateTime = time;
         } finally {
             mSharedDictionaryController.unlock();
         }
     }
 
+    // TODO: cache the file's existence so that we avoid doing a disk access each time.
     private boolean dictionaryFileExists() {
         final File file = new File(mContext.getFilesDir(), mFilename);
         return file.exists();
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index de9dbf9..b8f4ec7 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -182,6 +182,20 @@
     return result;
 }
 
+static jboolean latinime_BinaryDictionary_isValidBigram(JNIEnv *env, jobject object, jlong dict,
+        jintArray wordArray1, jintArray wordArray2) {
+    Dictionary *dictionary = (Dictionary*)dict;
+    if (!dictionary) return (jboolean) false;
+    jint *word1 = env->GetIntArrayElements(wordArray1, 0);
+    jint *word2 = env->GetIntArrayElements(wordArray2, 0);
+    jsize length1 = word1 ? env->GetArrayLength(wordArray1) : 0;
+    jsize length2 = word2 ? env->GetArrayLength(wordArray2) : 0;
+    jboolean result = dictionary->isValidBigram(word1, length1, word2, length2);
+    env->ReleaseIntArrayElements(wordArray2, word2, JNI_ABORT);
+    env->ReleaseIntArrayElements(wordArray1, word1, JNI_ABORT);
+    return result;
+}
+
 static jdouble latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jobject object,
         jcharArray before, jint beforeLength, jcharArray after, jint afterLength, jint score) {
     jchar *beforeChars = env->GetCharArrayElements(before, 0);
@@ -239,6 +253,7 @@
     {"getSuggestionsNative", "(JJ[I[I[II[IZ[C[I)I",
             (void*)latinime_BinaryDictionary_getSuggestions},
     {"isValidWordNative", "(J[II)Z", (void*)latinime_BinaryDictionary_isValidWord},
+    {"isValidBigramNative", "(J[I[I)Z", (void*)latinime_BinaryDictionary_isValidBigram},
     {"getBigramsNative", "(J[II[II[C[III)I", (void*)latinime_BinaryDictionary_getBigrams},
     {"calcNormalizedScoreNative", "([CI[CII)D",
             (void*)latinime_BinaryDictionary_calcNormalizedScore},
diff --git a/native/jni/src/bigram_dictionary.cpp b/native/jni/src/bigram_dictionary.cpp
index 0703108..7ed4dc4 100644
--- a/native/jni/src/bigram_dictionary.cpp
+++ b/native/jni/src/bigram_dictionary.cpp
@@ -128,7 +128,7 @@
                 ++bigramCount;
             }
         }
-    } while (0 != (UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags));
+    } while (UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags);
     return bigramCount;
 }
 
@@ -189,5 +189,25 @@
     return false;
 }
 
+bool BigramDictionary::isValidBigram(const int32_t *word1, int length1, const int32_t *word2,
+        int length2) {
+    const uint8_t* const root = DICT;
+    int pos = getBigramListPositionForWord(word1, length1);
+    // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
+    if (0 == pos) return false;
+    int nextWordPos = BinaryFormat::getTerminalPosition(root, word2, length2);
+    if (NOT_VALID_WORD == nextWordPos) return false;
+    int bigramFlags;
+    do {
+        bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
+        const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags,
+                &pos);
+        if (bigramPos == nextWordPos) {
+            return true;
+        }
+    } while (UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags);
+    return false;
+}
+
 // TODO: Move functions related to bigram to here
 } // namespace latinime
diff --git a/native/jni/src/bigram_dictionary.h b/native/jni/src/bigram_dictionary.h
index 7328d58..b8763a5 100644
--- a/native/jni/src/bigram_dictionary.h
+++ b/native/jni/src/bigram_dictionary.h
@@ -33,6 +33,7 @@
     int getBigramListPositionForWord(const int32_t *prevWord, const int prevWordLength);
     void fillBigramAddressToFrequencyMapAndFilter(const int32_t *prevWord, const int prevWordLength,
             std::map<int, int> *map, uint8_t *filter);
+    bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2);
     ~BigramDictionary();
  private:
     bool addWordBigram(unsigned short *word, int length, int frequency);
diff --git a/native/jni/src/dictionary.cpp b/native/jni/src/dictionary.cpp
index 9dc2072..8ea7c49 100644
--- a/native/jni/src/dictionary.cpp
+++ b/native/jni/src/dictionary.cpp
@@ -58,4 +58,9 @@
     return mUnigramDictionary->isValidWord(word, length);
 }
 
+bool Dictionary::isValidBigram(const int32_t *word1, int length1, const int32_t *word2,
+        int length2) {
+    return mBigramDictionary->isValidBigram(word1, length1, word2, length2);
+}
+
 } // namespace latinime
diff --git a/native/jni/src/dictionary.h b/native/jni/src/dictionary.h
index bce86d1..87891ee 100644
--- a/native/jni/src/dictionary.h
+++ b/native/jni/src/dictionary.h
@@ -53,6 +53,7 @@
     }
 
     bool isValidWord(const int32_t *word, int length);
+    bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2);
     void *getDict() { return (void *)mDict; }
     int getDictSize() { return mDictSize; }
     int getMmapFd() { return mMmapFd; }