Merge "Move code point constants from Keyboard to Constants class"
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java
index 05f2d93..ee0e9cd 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java
@@ -565,6 +565,7 @@
         return size;
     }
 
+    @SuppressWarnings("unused")
     private static void updateForwardLink(final FusionDictionaryBufferInterface buffer,
             final int nodeOriginAddress, final int newNodeAddress,
             final FormatOptions formatOptions) {
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
index 031306e..d1a3c7b 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
@@ -77,12 +77,12 @@
 
         @Override
         public int readUnsignedByte() {
-            return ((int)mBuffer.get()) & 0xFF;
+            return mBuffer.get() & 0xFF;
         }
 
         @Override
         public int readUnsignedShort() {
-            return ((int)mBuffer.getShort()) & 0xFFFF;
+            return mBuffer.getShort() & 0xFFFF;
         }
 
         @Override
@@ -474,11 +474,8 @@
         }
     }
 
-    private static final int SINT8_MAX = 0x7F;
-    private static final int SINT16_MAX = 0x7FFF;
     private static final int SINT24_MAX = 0x7FFFFF;
     private static final int MSB8 = 0x80;
-    private static final int MSB16 = 0x8000;
     private static final int MSB24 = 0x800000;
 
     // End utility methods.
@@ -1711,7 +1708,7 @@
      *
      * Concretely this only tests the magic number.
      *
-     * @param filename The name of the file to test.
+     * @param file The file to test.
      * @return true if it's a binary dictionary, false otherwise
      */
     public static boolean isBinaryDictionary(final File file) {
@@ -1751,8 +1748,7 @@
             final int bigramFrequency) {
         final float stepSize = (FormatSpec.MAX_TERMINAL_FREQUENCY - unigramFrequency)
                 / (1.5f + FormatSpec.MAX_BIGRAM_FREQUENCY);
-        final float resultFreqFloat = (float)unigramFrequency
-                + stepSize * (bigramFrequency + 1.0f);
+        final float resultFreqFloat = unigramFrequency + stepSize * (bigramFrequency + 1.0f);
         return (int)resultFreqFloat;
     }
 }
diff --git a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
index 4abed9f..c588824 100644
--- a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
+++ b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
@@ -16,7 +16,6 @@
 
 package com.android.inputmethod.latin.makedict;
 
-import com.android.inputmethod.annotations.UsedForTesting;
 import com.android.inputmethod.latin.Constants;
 
 import java.util.ArrayList;
@@ -143,11 +142,33 @@
             return NOT_A_TERMINAL != mFrequency;
         }
 
-        @UsedForTesting
         public int getFrequency() {
             return mFrequency;
         }
 
+        public boolean getIsNotAWord() {
+            return mIsNotAWord;
+        }
+
+        public boolean getIsBlacklistEntry() {
+            return mIsBlacklistEntry;
+        }
+
+        public ArrayList<WeightedString> getShortcutTargets() {
+            // We don't want write permission to escape outside the package, so we return a copy
+            if (null == mShortcutTargets) return null;
+            final ArrayList<WeightedString> copyOfShortcutTargets =
+                    new ArrayList<WeightedString>(mShortcutTargets);
+            return copyOfShortcutTargets;
+        }
+
+        public ArrayList<WeightedString> getBigrams() {
+            // We don't want write permission to escape outside the package, so we return a copy
+            if (null == mBigrams) return null;
+            final ArrayList<WeightedString> copyOfBigrams = new ArrayList<WeightedString>(mBigrams);
+            return copyOfBigrams;
+        }
+
         public boolean hasSeveralChars() {
             assert(mChars.length > 0);
             return 1 < mChars.length;
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java
index 4573fa6..0803b08 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java
@@ -17,7 +17,6 @@
 package com.android.inputmethod.latin.dicttool;
 
 import com.android.inputmethod.latin.makedict.BinaryDictInputOutput;
-import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.ByteBufferWrapper;
 import com.android.inputmethod.latin.makedict.FusionDictionary;
 import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
 
@@ -33,7 +32,6 @@
 import java.io.OutputStream;
 import java.nio.ByteBuffer;
 import java.nio.channels.FileChannel;
-import java.nio.channels.FileChannel.MapMode;
 import java.util.ArrayList;
 
 import javax.xml.parsers.ParserConfigurationException;
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java
index 60ba938..9548f25 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java
@@ -17,8 +17,13 @@
 package com.android.inputmethod.latin.dicttool;
 
 import com.android.inputmethod.latin.makedict.FusionDictionary;
+import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
+import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
+import com.android.inputmethod.latin.makedict.Word;
 
 import java.util.Arrays;
+import java.util.ArrayList;
+import java.util.HashMap;
 
 public class Diff extends Dicttool.Command {
     public static final String COMMAND = "diff";
@@ -57,22 +62,20 @@
             System.out.println("Header :");
             diffHeaders(dict0, dict1);
             if (languageDiffers(dict0, dict1)) {
-                // diffHeaders returns whether the language is different. If it is, we should bail
-                // to avoid flooding the output with thousands of useless word-level diffs.
+                // We only check for the language here. The rationale is that one may meaningfully
+                // diff a en_US with a en_GB dictionary, but someone who diffs a de dict with a
+                // pt_BR dict is almost certainly only interested in header-level diff, and the word
+                // diff would be very large, meaningless, and annoying.
                 return;
             }
             System.out.println("Body :");
         }
-        // TODO: implement the word-level diff
+        diffWords(dict0, dict1);
     }
 
     private static boolean languageDiffers(final FusionDictionary dict0,
             final FusionDictionary dict1) {
         // If either of the dictionaries have no locale, assume it's okay
-        // We only check for the language here. The rationale is that one may meaningfully diff
-        // a en_US with a en_GB dictionary, but someone who diffs a de dict with a pt_BR dict
-        // is almost certainly only interested in header-level diff, and the word diff would be very
-        // large, meaningless, and annoying.
         if (null == dict0.mOptions.mAttributes.get("locale")) return true;
         if (null == dict1.mOptions.mAttributes.get("locale")) return true;
         final String dict0Lang = dict0.mOptions.mAttributes.get("locale").split("_", 3)[0];
@@ -91,6 +94,8 @@
                     + dict0.mOptions.mGermanUmlautProcessing + " <=> "
                     + dict1.mOptions.mGermanUmlautProcessing);
         }
+        final HashMap<String, String> options1 =
+                new HashMap<String, String>(dict1.mOptions.mAttributes);
         for (final String optionKey : dict0.mOptions.mAttributes.keySet()) {
             if (!dict0.mOptions.mAttributes.get(optionKey).equals(
                     dict1.mOptions.mAttributes.get(optionKey))) {
@@ -98,11 +103,74 @@
                         + dict0.mOptions.mAttributes.get(optionKey) + " <=> "
                         + dict1.mOptions.mAttributes.get(optionKey));
             }
-            dict1.mOptions.mAttributes.remove(optionKey);
+            options1.remove(optionKey);
         }
-        for (final String optionKey : dict1.mOptions.mAttributes.keySet()) {
-            System.out.println("  " + optionKey + " : null <=> "
-                    + dict1.mOptions.mAttributes.get(optionKey));
+        for (final String optionKey : options1.keySet()) {
+            System.out.println("  " + optionKey + " : null <=> " + options1.get(optionKey));
+        }
+    }
+
+    private static void diffWords(final FusionDictionary dict0, final FusionDictionary dict1) {
+        for (final Word word0 : dict0) {
+            final CharGroup word1 = FusionDictionary.findWordInTree(dict1.mRoot, word0.mWord);
+            if (null == word1) {
+                // This word is not in dict1
+                System.out.println("Deleted: " + word0.mWord + " " + word0.mFrequency);
+            } else {
+                // We found the word. Compare frequencies, shortcuts, bigrams
+                if (word0.mFrequency != word1.getFrequency()) {
+                    System.out.println("Freq changed: " + word0.mWord + " " + word0.mFrequency
+                            + " -> " + word1.getFrequency());
+                }
+                if (word0.mIsNotAWord != word1.getIsNotAWord()) {
+                    System.out.println("Not a word: " + word0.mWord + " " + word0.mIsNotAWord
+                            + " -> " + word1.getIsNotAWord());
+                }
+                if (word0.mIsBlacklistEntry != word1.getIsBlacklistEntry()) {
+                    System.out.println("Blacklist: " + word0.mWord + " " + word0.mIsBlacklistEntry
+                            + " -> " + word1.getIsBlacklistEntry());
+                }
+                diffAttributes(word0.mWord, word0.mBigrams, word1.getBigrams());
+                diffAttributes(word0.mWord, word0.mShortcutTargets, word1.getShortcutTargets());
+            }
+        }
+    }
+
+    private static void diffAttributes(final String word, final ArrayList<WeightedString> list0,
+            final ArrayList<WeightedString> list1) {
+        if (null == list1) {
+            if (null == list0) return;
+            for (final WeightedString attribute0 : list0) {
+                System.out.println("Bigram removed: " + word + " " + attribute0.mWord + " "
+                        + attribute0.mFrequency);
+            }
+        } else if (null != list0) {
+            for (final WeightedString attribute0 : list0) {
+                // The following tests with #equals(). The WeightedString#equals() method returns
+                // true if both the string and the frequency are the same.
+                if (!list1.contains(attribute0)) {
+                    // Search for a word with the same string but a different frequency
+                    for (final WeightedString attribute1 : list1) {
+                        if (attribute0.mWord.equals(attribute1.mWord)) {
+                            System.out.println("Bigram freq changed: " + word + " "
+                                    + attribute0.mWord + " " + attribute0.mFrequency + " -> "
+                                    + attribute1.mFrequency);
+                            list1.remove(attribute1);
+                            break;
+                        }
+                        // We come here if we haven't found any matching string.
+                        System.out.println("Bigram removed: " + word + " " + attribute0.mWord);
+                    }
+                } else {
+                    list1.remove(attribute0);
+                }
+            }
+        }
+        // We removed any matching word that we found, so now list1 only contains words that
+        // are not included in list0.
+        for (final WeightedString attribute1 : list1) {
+            System.out.println("Bigram added: " + word + " " + attribute1.mWord + " "
+                    + attribute1.mFrequency);
         }
     }
 }
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java
index 98a4e8f..7f25818 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java
@@ -18,9 +18,12 @@
 
 import com.android.inputmethod.latin.makedict.FormatSpec;
 import com.android.inputmethod.latin.makedict.FusionDictionary;
+import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
 import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
 import com.android.inputmethod.latin.makedict.Word;
 
+import java.util.ArrayList;
+
 public class Info extends Dicttool.Command {
     public static final String COMMAND = "info";
 
@@ -59,14 +62,55 @@
                 + " whitelist entries)");
     }
 
+    private static void showWordInfo(final FusionDictionary dict, final String word) {
+        final CharGroup group = FusionDictionary.findWordInTree(dict.mRoot, word);
+        if (null == group) {
+            System.out.println(word + " is not in the dictionary");
+            return;
+        }
+        System.out.println("Word: " + word);
+        System.out.println("  Freq: " + group.getFrequency());
+        if (group.getIsNotAWord()) {
+            System.out.println("  Is not a word");
+        }
+        if (group.getIsBlacklistEntry()) {
+            System.out.println("  Is a blacklist entry");
+        }
+        final ArrayList<WeightedString> shortcutTargets = group.getShortcutTargets();
+        if (null == shortcutTargets || shortcutTargets.isEmpty()) {
+            System.out.println("  No shortcuts");
+        } else {
+            for (final WeightedString shortcutTarget : shortcutTargets) {
+                System.out.println("  Shortcut target: " + shortcutTarget.mWord + " ("
+                        + (FormatSpec.SHORTCUT_WHITELIST_FREQUENCY == shortcutTarget.mFrequency
+                                ? "whitelist" : shortcutTarget.mFrequency) + ")");
+            }
+        }
+        final ArrayList<WeightedString> bigrams = group.getBigrams();
+        if (null == bigrams || bigrams.isEmpty()) {
+            System.out.println("  No bigrams");
+        } else {
+            for (final WeightedString bigram : bigrams) {
+                System.out.println("  Bigram: " + bigram.mWord + " (" + bigram.mFrequency + ")");
+            }
+        }
+    }
+
     @Override
     public void run() {
         if (mArgs.length < 1) {
             throw new RuntimeException("Not enough arguments for command " + COMMAND);
         }
         final String filename = mArgs[0];
+        final boolean hasWordArguments = (1 == mArgs.length);
         final FusionDictionary dict = BinaryDictOffdeviceUtils.getDictionary(filename,
-                true /* report */);
-        showInfo(dict);
+                hasWordArguments /* report */);
+        if (hasWordArguments) {
+            showInfo(dict);
+        } else {
+            for (int i = 1; i < mArgs.length; ++i) {
+                showWordInfo(dict, mArgs[i]);
+            }
+        }
     }
 }