Merge "Move code point constants from Keyboard to Constants class"
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java
index 05f2d93..ee0e9cd 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java
@@ -565,6 +565,7 @@
return size;
}
+ @SuppressWarnings("unused")
private static void updateForwardLink(final FusionDictionaryBufferInterface buffer,
final int nodeOriginAddress, final int newNodeAddress,
final FormatOptions formatOptions) {
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
index 031306e..d1a3c7b 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
@@ -77,12 +77,12 @@
@Override
public int readUnsignedByte() {
- return ((int)mBuffer.get()) & 0xFF;
+ return mBuffer.get() & 0xFF;
}
@Override
public int readUnsignedShort() {
- return ((int)mBuffer.getShort()) & 0xFFFF;
+ return mBuffer.getShort() & 0xFFFF;
}
@Override
@@ -474,11 +474,8 @@
}
}
- private static final int SINT8_MAX = 0x7F;
- private static final int SINT16_MAX = 0x7FFF;
private static final int SINT24_MAX = 0x7FFFFF;
private static final int MSB8 = 0x80;
- private static final int MSB16 = 0x8000;
private static final int MSB24 = 0x800000;
// End utility methods.
@@ -1711,7 +1708,7 @@
*
* Concretely this only tests the magic number.
*
- * @param filename The name of the file to test.
+ * @param file The file to test.
* @return true if it's a binary dictionary, false otherwise
*/
public static boolean isBinaryDictionary(final File file) {
@@ -1751,8 +1748,7 @@
final int bigramFrequency) {
final float stepSize = (FormatSpec.MAX_TERMINAL_FREQUENCY - unigramFrequency)
/ (1.5f + FormatSpec.MAX_BIGRAM_FREQUENCY);
- final float resultFreqFloat = (float)unigramFrequency
- + stepSize * (bigramFrequency + 1.0f);
+ final float resultFreqFloat = unigramFrequency + stepSize * (bigramFrequency + 1.0f);
return (int)resultFreqFloat;
}
}
diff --git a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
index 4abed9f..c588824 100644
--- a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
+++ b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
@@ -16,7 +16,6 @@
package com.android.inputmethod.latin.makedict;
-import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.Constants;
import java.util.ArrayList;
@@ -143,11 +142,33 @@
return NOT_A_TERMINAL != mFrequency;
}
- @UsedForTesting
public int getFrequency() {
return mFrequency;
}
+ public boolean getIsNotAWord() {
+ return mIsNotAWord;
+ }
+
+ public boolean getIsBlacklistEntry() {
+ return mIsBlacklistEntry;
+ }
+
+ public ArrayList<WeightedString> getShortcutTargets() {
+ // We don't want write permission to escape outside the package, so we return a copy
+ if (null == mShortcutTargets) return null;
+ final ArrayList<WeightedString> copyOfShortcutTargets =
+ new ArrayList<WeightedString>(mShortcutTargets);
+ return copyOfShortcutTargets;
+ }
+
+ public ArrayList<WeightedString> getBigrams() {
+ // We don't want write permission to escape outside the package, so we return a copy
+ if (null == mBigrams) return null;
+ final ArrayList<WeightedString> copyOfBigrams = new ArrayList<WeightedString>(mBigrams);
+ return copyOfBigrams;
+ }
+
public boolean hasSeveralChars() {
assert(mChars.length > 0);
return 1 < mChars.length;
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java
index 4573fa6..0803b08 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java
@@ -17,7 +17,6 @@
package com.android.inputmethod.latin.dicttool;
import com.android.inputmethod.latin.makedict.BinaryDictInputOutput;
-import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.ByteBufferWrapper;
import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
@@ -33,7 +32,6 @@
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
-import java.nio.channels.FileChannel.MapMode;
import java.util.ArrayList;
import javax.xml.parsers.ParserConfigurationException;
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java
index 60ba938..9548f25 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java
@@ -17,8 +17,13 @@
package com.android.inputmethod.latin.dicttool;
import com.android.inputmethod.latin.makedict.FusionDictionary;
+import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
+import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
+import com.android.inputmethod.latin.makedict.Word;
import java.util.Arrays;
+import java.util.ArrayList;
+import java.util.HashMap;
public class Diff extends Dicttool.Command {
public static final String COMMAND = "diff";
@@ -57,22 +62,20 @@
System.out.println("Header :");
diffHeaders(dict0, dict1);
if (languageDiffers(dict0, dict1)) {
- // diffHeaders returns whether the language is different. If it is, we should bail
- // to avoid flooding the output with thousands of useless word-level diffs.
+ // We only check for the language here. The rationale is that one may meaningfully
+ // diff a en_US with a en_GB dictionary, but someone who diffs a de dict with a
+ // pt_BR dict is almost certainly only interested in header-level diff, and the word
+ // diff would be very large, meaningless, and annoying.
return;
}
System.out.println("Body :");
}
- // TODO: implement the word-level diff
+ diffWords(dict0, dict1);
}
private static boolean languageDiffers(final FusionDictionary dict0,
final FusionDictionary dict1) {
// If either of the dictionaries have no locale, assume it's okay
- // We only check for the language here. The rationale is that one may meaningfully diff
- // a en_US with a en_GB dictionary, but someone who diffs a de dict with a pt_BR dict
- // is almost certainly only interested in header-level diff, and the word diff would be very
- // large, meaningless, and annoying.
if (null == dict0.mOptions.mAttributes.get("locale")) return true;
if (null == dict1.mOptions.mAttributes.get("locale")) return true;
final String dict0Lang = dict0.mOptions.mAttributes.get("locale").split("_", 3)[0];
@@ -91,6 +94,8 @@
+ dict0.mOptions.mGermanUmlautProcessing + " <=> "
+ dict1.mOptions.mGermanUmlautProcessing);
}
+ final HashMap<String, String> options1 =
+ new HashMap<String, String>(dict1.mOptions.mAttributes);
for (final String optionKey : dict0.mOptions.mAttributes.keySet()) {
if (!dict0.mOptions.mAttributes.get(optionKey).equals(
dict1.mOptions.mAttributes.get(optionKey))) {
@@ -98,11 +103,74 @@
+ dict0.mOptions.mAttributes.get(optionKey) + " <=> "
+ dict1.mOptions.mAttributes.get(optionKey));
}
- dict1.mOptions.mAttributes.remove(optionKey);
+ options1.remove(optionKey);
}
- for (final String optionKey : dict1.mOptions.mAttributes.keySet()) {
- System.out.println(" " + optionKey + " : null <=> "
- + dict1.mOptions.mAttributes.get(optionKey));
+ for (final String optionKey : options1.keySet()) {
+ System.out.println(" " + optionKey + " : null <=> " + options1.get(optionKey));
+ }
+ }
+
+ private static void diffWords(final FusionDictionary dict0, final FusionDictionary dict1) {
+ for (final Word word0 : dict0) {
+ final CharGroup word1 = FusionDictionary.findWordInTree(dict1.mRoot, word0.mWord);
+ if (null == word1) {
+ // This word is not in dict1
+ System.out.println("Deleted: " + word0.mWord + " " + word0.mFrequency);
+ } else {
+ // We found the word. Compare frequencies, shortcuts, bigrams
+ if (word0.mFrequency != word1.getFrequency()) {
+ System.out.println("Freq changed: " + word0.mWord + " " + word0.mFrequency
+ + " -> " + word1.getFrequency());
+ }
+ if (word0.mIsNotAWord != word1.getIsNotAWord()) {
+ System.out.println("Not a word: " + word0.mWord + " " + word0.mIsNotAWord
+ + " -> " + word1.getIsNotAWord());
+ }
+ if (word0.mIsBlacklistEntry != word1.getIsBlacklistEntry()) {
+ System.out.println("Blacklist: " + word0.mWord + " " + word0.mIsBlacklistEntry
+ + " -> " + word1.getIsBlacklistEntry());
+ }
+ diffAttributes(word0.mWord, word0.mBigrams, word1.getBigrams());
+ diffAttributes(word0.mWord, word0.mShortcutTargets, word1.getShortcutTargets());
+ }
+ }
+ }
+
+ private static void diffAttributes(final String word, final ArrayList<WeightedString> list0,
+ final ArrayList<WeightedString> list1) {
+ if (null == list1) {
+ if (null == list0) return;
+ for (final WeightedString attribute0 : list0) {
+ System.out.println("Bigram removed: " + word + " " + attribute0.mWord + " "
+ + attribute0.mFrequency);
+ }
+ } else if (null != list0) {
+ for (final WeightedString attribute0 : list0) {
+ // The following tests with #equals(). The WeightedString#equals() method returns
+ // true if both the string and the frequency are the same.
+ if (!list1.contains(attribute0)) {
+ // Search for a word with the same string but a different frequency
+ for (final WeightedString attribute1 : list1) {
+ if (attribute0.mWord.equals(attribute1.mWord)) {
+ System.out.println("Bigram freq changed: " + word + " "
+ + attribute0.mWord + " " + attribute0.mFrequency + " -> "
+ + attribute1.mFrequency);
+ list1.remove(attribute1);
+ break;
+ }
+ // We come here if we haven't found any matching string.
+ System.out.println("Bigram removed: " + word + " " + attribute0.mWord);
+ }
+ } else {
+ list1.remove(attribute0);
+ }
+ }
+ }
+ // We removed any matching word that we found, so now list1 only contains words that
+ // are not included in list0.
+ for (final WeightedString attribute1 : list1) {
+ System.out.println("Bigram added: " + word + " " + attribute1.mWord + " "
+ + attribute1.mFrequency);
}
}
}
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java
index 98a4e8f..7f25818 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java
@@ -18,9 +18,12 @@
import com.android.inputmethod.latin.makedict.FormatSpec;
import com.android.inputmethod.latin.makedict.FusionDictionary;
+import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.makedict.Word;
+import java.util.ArrayList;
+
public class Info extends Dicttool.Command {
public static final String COMMAND = "info";
@@ -59,14 +62,55 @@
+ " whitelist entries)");
}
+ private static void showWordInfo(final FusionDictionary dict, final String word) {
+ final CharGroup group = FusionDictionary.findWordInTree(dict.mRoot, word);
+ if (null == group) {
+ System.out.println(word + " is not in the dictionary");
+ return;
+ }
+ System.out.println("Word: " + word);
+ System.out.println(" Freq: " + group.getFrequency());
+ if (group.getIsNotAWord()) {
+ System.out.println(" Is not a word");
+ }
+ if (group.getIsBlacklistEntry()) {
+ System.out.println(" Is a blacklist entry");
+ }
+ final ArrayList<WeightedString> shortcutTargets = group.getShortcutTargets();
+ if (null == shortcutTargets || shortcutTargets.isEmpty()) {
+ System.out.println(" No shortcuts");
+ } else {
+ for (final WeightedString shortcutTarget : shortcutTargets) {
+ System.out.println(" Shortcut target: " + shortcutTarget.mWord + " ("
+ + (FormatSpec.SHORTCUT_WHITELIST_FREQUENCY == shortcutTarget.mFrequency
+ ? "whitelist" : shortcutTarget.mFrequency) + ")");
+ }
+ }
+ final ArrayList<WeightedString> bigrams = group.getBigrams();
+ if (null == bigrams || bigrams.isEmpty()) {
+ System.out.println(" No bigrams");
+ } else {
+ for (final WeightedString bigram : bigrams) {
+ System.out.println(" Bigram: " + bigram.mWord + " (" + bigram.mFrequency + ")");
+ }
+ }
+ }
+
@Override
public void run() {
if (mArgs.length < 1) {
throw new RuntimeException("Not enough arguments for command " + COMMAND);
}
final String filename = mArgs[0];
+ final boolean hasWordArguments = (1 == mArgs.length);
final FusionDictionary dict = BinaryDictOffdeviceUtils.getDictionary(filename,
- true /* report */);
- showInfo(dict);
+ hasWordArguments /* report */);
+ if (hasWordArguments) {
+ showInfo(dict);
+ } else {
+ for (int i = 1; i < mArgs.length; ++i) {
+ showWordInfo(dict, mArgs[i]);
+ }
+ }
}
}