Merge "Rename BigramProperty to NgramProperty."

commit: a69c170a3f934dbeec9e1ea5e25c7b737b299c24 [log] [tgz]
author: Keisuke Kuroyanagi <ksk@google.com> Mon Sep 29 10:26:38 2014 +0000
committer: Android (Google) Code Review <android-gerrit@google.com> Mon Sep 29 10:26:39 2014 +0000
tree: 8083ab47a42b97e04e7687d1c2ed338802078186
parent: 4936d259d7eaa00017b73fdb3a1ae0fadb0531e2 [diff]
parent: 79bb37d499ed6fcabe981153d5ff0b5b69509933 [diff]
diff --git a/java/res/values/strings-talkback-descriptions.xml b/java/res/values/strings-talkback-descriptions.xml
index 80c7bdb..36fa7b3 100644
--- a/java/res/values/strings-talkback-descriptions.xml
+++ b/java/res/values/strings-talkback-descriptions.xml

@@ -139,6 +139,18 @@
     <string name="spoken_symbol_unknown">Unknown symbol</string>
     <!-- Spoken description for unknown emoji code point. -->
     <string name="spoken_emoji_unknown">Unknown emoji</string>
+    <!-- Spoken description for emoticons ":-!". -->
+    <string name="spoken_emoticon_3A_2D_21_20">Bored face</string>
+    <!-- Spoken description for emoticons ":-$". -->
+    <string name="spoken_emoticon_3A_2D_24_20">Embarrassed face</string>
+    <!-- Spoken description for emoticons "B-)". -->
+    <string name="spoken_emoticon_42_2D_29_20">Face wearing sunglasses</string>
+    <!-- Spoken description for emoticons ":O". -->
+    <string name="spoken_emoticon_3A_4F_20">Surprised face</string>
+    <!-- Spoken description for emoticons ":-*". -->
+    <string name="spoken_emoticon_3A_2D_2A_20">Kissing face</string>
+    <!-- Spoken description for emoticons ":-[". -->
+    <string name="spoken_emoticon_3A_2D_5B_20">Frowning face</string>
 
     <!-- Spoken descriptions when opening a more keys keyboard that has alternative characters. -->
     <string name="spoken_open_more_keys_keyboard">Alternative characters are available</string>

diff --git a/java/src/com/android/inputmethod/accessibility/KeyCodeDescriptionMapper.java b/java/src/com/android/inputmethod/accessibility/KeyCodeDescriptionMapper.java
index 7a3510e..edcdd4c 100644
--- a/java/src/com/android/inputmethod/accessibility/KeyCodeDescriptionMapper.java
+++ b/java/src/com/android/inputmethod/accessibility/KeyCodeDescriptionMapper.java

@@ -37,6 +37,8 @@
     private static final String SPOKEN_LETTER_RESOURCE_NAME_FORMAT = "spoken_accented_letter_%04X";
     private static final String SPOKEN_SYMBOL_RESOURCE_NAME_FORMAT = "spoken_symbol_%04X";
     private static final String SPOKEN_EMOJI_RESOURCE_NAME_FORMAT = "spoken_emoji_%04X";
+    private static final String SPOKEN_EMOTICON_RESOURCE_NAME_PREFIX = "spoken_emoticon";
+    private static final String SPOKEN_EMOTICON_CODE_POINT_FORMAT = "_%02X";
 
     // The resource ID of the string spoken for obscured keys
     private static final int OBSCURED_KEY_RES_ID = R.string.spoken_description_dot;
@@ -109,7 +111,9 @@
         }
 
         if (code == Constants.CODE_OUTPUT_TEXT) {
-            return key.getOutputText();
+            final String outputText = key.getOutputText();
+            final String description = getSpokenEmoticonDescription(context, outputText);
+            return TextUtils.isEmpty(description) ? outputText : description;
         }
 
         // Just attempt to speak the description.
@@ -340,4 +344,21 @@
         }
         return resId;
     }
+
+    // TODO: Remove this method once TTS supports emoticon verbalization.
+    private String getSpokenEmoticonDescription(final Context context, final String outputText) {
+        final StringBuilder sb = new StringBuilder(SPOKEN_EMOTICON_RESOURCE_NAME_PREFIX);
+        final int textLength = outputText.length();
+        for (int index = 0; index < textLength; index = outputText.offsetByCodePoints(index, 1)) {
+            final int codePoint = outputText.codePointAt(index);
+            sb.append(String.format(Locale.ROOT, SPOKEN_EMOTICON_CODE_POINT_FORMAT, codePoint));
+        }
+        final String resourceName = sb.toString();
+        final Resources resources = context.getResources();
+        // Note that the resource package name may differ from the context package name.
+        final String resourcePackageName = resources.getResourcePackageName(
+                R.string.spoken_description_unknown);
+        final int resId = resources.getIdentifier(resourceName, "string", resourcePackageName);
+        return (resId == 0) ? null : resources.getString(resId);
+    }
 }

diff --git a/java/src/com/android/inputmethod/latin/makedict/DictionaryHeader.java b/java/src/com/android/inputmethod/latin/makedict/DictionaryHeader.java
index df447fd..3f9ffd2 100644
--- a/java/src/com/android/inputmethod/latin/makedict/DictionaryHeader.java
+++ b/java/src/com/android/inputmethod/latin/makedict/DictionaryHeader.java

@@ -47,6 +47,7 @@
     public static final String MAX_UNIGRAM_COUNT_KEY = "MAX_UNIGRAM_COUNT";
     public static final String MAX_BIGRAM_COUNT_KEY = "MAX_BIGRAM_COUNT";
     public static final String ATTRIBUTE_VALUE_TRUE = "1";
+    public static final String CODE_POINT_TABLE_KEY = "codePointTable";
 
     public DictionaryHeader(final int headerSize, final DictionaryOptions dictionaryOptions,
             final FormatOptions formatOptions) throws UnsupportedFormatException {

diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
index ec3c6e2..2661d5d 100644
--- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
+++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java

@@ -237,6 +237,8 @@
     static final int UINT16_MAX = 0xFFFF;
     static final int UINT24_MAX = 0xFFFFFF;
     static final int MSB8 = 0x80;
+    static final int MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20;
+    static final int MAXIMAL_ONE_BYTE_CHARACTER_VALUE = 0xFF;
 
     /**
      * Options about file format.

diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java
index 2b3fd89..12290e6 100644
--- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java
+++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java

@@ -27,6 +27,8 @@
 import java.io.IOException;
 import java.io.OutputStream;
 import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Map.Entry;
 
 /**
  * Encodes binary files for a FusionDictionary.
@@ -791,10 +793,12 @@
      * @param destination the stream to write the file header to.
      * @param dict the dictionary to write.
      * @param formatOptions file format options.
+     * @param codePointOccurrenceArray code points ordered by occurrence count.
      * @return the size of the header.
      */
     /* package */ static int writeDictionaryHeader(final OutputStream destination,
-            final FusionDictionary dict, final FormatOptions formatOptions)
+            final FusionDictionary dict, final FormatOptions formatOptions,
+            final ArrayList<Entry<Integer, Integer>> codePointOccurrenceArray)
                     throws IOException, UnsupportedFormatException {
         final int version = formatOptions.mVersion;
         if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION
@@ -833,6 +837,9 @@
             CharEncoding.writeString(headerBuffer, key);
             CharEncoding.writeString(headerBuffer, value);
         }
+
+        // TODO: Write out the code point table.
+
         final int size = headerBuffer.size();
         final byte[] bytes = headerBuffer.toByteArray();
         // Write out the header size.
@@ -845,4 +852,15 @@
         headerBuffer.close();
         return size;
     }
+
+    static final class CodePointTable {
+        final HashMap<Integer, Integer> mCodePointToOneByteCodeMap;
+        final ArrayList<Entry<Integer, Integer>> mCodePointOccurrenceArray;
+
+        CodePointTable(final HashMap<Integer, Integer> codePointToOneByteCodeMap,
+                final ArrayList<Entry<Integer, Integer>> codePointOccurrenceArray) {
+            mCodePointToOneByteCodeMap = codePointToOneByteCodeMap;
+            mCodePointOccurrenceArray = codePointOccurrenceArray;
+        }
+    }
 }

diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java
index 0fa75e8..012fd81 100644
--- a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java
+++ b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java

@@ -18,6 +18,7 @@
 
 import com.android.inputmethod.annotations.UsedForTesting;
 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
+import com.android.inputmethod.latin.makedict.BinaryDictEncoderUtils.CodePointTable;
 import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
 import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
 import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
@@ -28,7 +29,11 @@
 import java.io.IOException;
 import java.io.OutputStream;
 import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
 import java.util.Iterator;
+import java.util.Map.Entry;
 
 /**
  * An implementation of DictEncoder for version 2 binary dictionary.
@@ -73,6 +78,46 @@
         }
     }
 
+    // Package for testing
+    static CodePointTable makeCodePointTable(final FusionDictionary dict) {
+        final HashMap<Integer, Integer> codePointOccurrenceCounts = new HashMap<>();
+        for (final WordProperty word : dict) {
+            // Store per code point occurrence
+            final String wordString = word.mWord;
+            for (int i = 0; i < wordString.length(); ++i) {
+                final int codePoint = Character.codePointAt(wordString, i);
+                if (codePointOccurrenceCounts.containsKey(codePoint)) {
+                    codePointOccurrenceCounts.put(codePoint,
+                            codePointOccurrenceCounts.get(codePoint) + 1);
+                } else {
+                    codePointOccurrenceCounts.put(codePoint, 1);
+                }
+            }
+        }
+        final ArrayList<Entry<Integer, Integer>> codePointOccurrenceArray =
+                new ArrayList<>(codePointOccurrenceCounts.entrySet());
+        // Descending order sort by occurrence (value side)
+        Collections.sort(codePointOccurrenceArray, new Comparator<Entry<Integer, Integer>>() {
+            @Override
+            public int compare(final Entry<Integer, Integer> a, final Entry<Integer, Integer> b) {
+                return b.getValue().compareTo(a.getValue());
+            }
+        });
+        int currentCodePointTableIndex = FormatSpec.MINIMAL_ONE_BYTE_CHARACTER_VALUE;
+        // Temporary map for writing of nodes
+        final HashMap<Integer, Integer> codePointToOneByteCodeMap = new HashMap<>();
+        for (final Entry<Integer, Integer> entry : codePointOccurrenceArray) {
+            // Put a relation from the original code point to the one byte code.
+            codePointToOneByteCodeMap.put(entry.getKey(), currentCodePointTableIndex);
+            if (FormatSpec.MAXIMAL_ONE_BYTE_CHARACTER_VALUE < ++currentCodePointTableIndex) {
+                break;
+            }
+        }
+        // codePointToOneByteCodeMap for writing the trie
+        // codePointOccurrenceArray for writing the header
+        return new CodePointTable(codePointToOneByteCodeMap, codePointOccurrenceArray);
+    }
+
     @Override
     public void writeDictionary(final FusionDictionary dict, final FormatOptions formatOptions)
             throws IOException, UnsupportedFormatException {
@@ -85,7 +130,12 @@
         if (mOutStream == null) {
             openStream();
         }
-        BinaryDictEncoderUtils.writeDictionaryHeader(mOutStream, dict, formatOptions);
+
+        // Make code point conversion table ordered by occurrence of code points
+        final CodePointTable codePointTable = makeCodePointTable(dict);
+
+        BinaryDictEncoderUtils.writeDictionaryHeader(mOutStream, dict, formatOptions,
+                codePointTable.mCodePointOccurrenceArray);
 
         // Addresses are limited to 3 bytes, but since addresses can be relative to each node
         // array, the structure itself is not limited to 16MB. However, if it is over 16MB deciding

diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoderTests.java b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoderTests.java
new file mode 100644
index 0000000..7c0b92d
--- /dev/null
+++ b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoderTests.java

@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.inputmethod.latin.makedict;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map.Entry;
+
+import com.android.inputmethod.latin.makedict.BinaryDictEncoderUtils.CodePointTable;
+import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
+
+import android.test.AndroidTestCase;
+import android.test.suitebuilder.annotation.LargeTest;
+import android.util.Log;
+
+/**
+ * Unit tests for Ver2DictEncoder
+ */
+@LargeTest
+public class Ver2DictEncoderTests extends AndroidTestCase {
+    private static final String TAG = Ver2DictEncoderTests.class.getSimpleName();
+    private static final int UNIGRAM_FREQ = 10;
+
+    public void testCodePointTable() {
+        final String[] wordSource = {"words", "used", "for", "testing", "a", "code point", "table"};
+        final List<String> words = Arrays.asList(wordSource);
+        final String correctCodePointTable = "eotdsanirfg bclwup";
+        final String correctCodePointOccurrenceArrayString =
+                "10141164111411531003110297210521142103111911171108198199132111211021";
+        final String correctCodePointExpectedMapString = "323433363538373940494147454644424348";
+        final String dictName = "codePointTableTest";
+        final String dictVersion = Long.toString(System.currentTimeMillis());
+
+        final FormatSpec.FormatOptions formatOptions =
+                new FormatSpec.FormatOptions(FormatSpec.VERSION2);
+        final FusionDictionary sourcedict = new FusionDictionary(new PtNodeArray(),
+                BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions));
+        addUnigrams(sourcedict, words, null /* shortcutMap */);
+        final CodePointTable codePointTable = Ver2DictEncoder.makeCodePointTable(sourcedict);
+
+        // Check if mCodePointOccurrenceArray is correct
+        final StringBuilder codePointOccurrenceArrayString = new StringBuilder();
+        for (Entry<Integer, Integer> entry : codePointTable.mCodePointOccurrenceArray) {
+            codePointOccurrenceArrayString.append(entry.getKey());
+            codePointOccurrenceArrayString.append(entry.getValue());
+        }
+        assertEquals(codePointOccurrenceArrayString.toString(),
+                correctCodePointOccurrenceArrayString);
+
+        // Check if mCodePointToOneByteCodeMap is correct
+        final StringBuilder codePointExpectedMapString = new StringBuilder();
+        for (int i = 0; i < correctCodePointTable.length(); ++i) {
+            codePointExpectedMapString.append(codePointTable.mCodePointToOneByteCodeMap.get(
+                    correctCodePointTable.codePointAt(i)));
+        }
+        assertEquals(codePointExpectedMapString.toString(), correctCodePointExpectedMapString);
+    }
+
+    /**
+     * Adds unigrams to the dictionary.
+     */
+    private void addUnigrams(final FusionDictionary dict, final List<String> words,
+            final HashMap<String, List<String>> shortcutMap) {
+        for (final String word : words) {
+            final ArrayList<WeightedString> shortcuts = new ArrayList<>();
+            if (shortcutMap != null && shortcutMap.containsKey(word)) {
+                for (final String shortcut : shortcutMap.get(word)) {
+                    shortcuts.add(new WeightedString(shortcut, UNIGRAM_FREQ));
+                }
+            }
+            dict.add(word, new ProbabilityInfo(UNIGRAM_FREQ),
+                    (shortcutMap == null) ? null : shortcuts, false /* isNotAWord */);
+        }
+    }
+}
commit	a69c170a3f934dbeec9e1ea5e25c7b737b299c24	[log] [tgz]
author	Keisuke Kuroyanagi <ksk@google.com>	Mon Sep 29 10:26:38 2014 +0000
committer	Android (Google) Code Review <android-gerrit@google.com>	Mon Sep 29 10:26:39 2014 +0000
tree	8083ab47a42b97e04e7687d1c2ed338802078186
parent	4936d259d7eaa00017b73fdb3a1ae0fadb0531e2 [diff]
parent	79bb37d499ed6fcabe981153d5ff0b5b69509933 [diff]