Merge "Add more KeyboardState unit test cases"

commit: 1f79c87675d3f03691ae949860cdfc5c2f3195ce [log] [tgz]
author: Tadashi G. Takaoka <takaoka@google.com> Mon Jan 16 00:59:48 2012 -0800
committer: Android (Google) Code Review <android-gerrit@google.com> Mon Jan 16 00:59:48 2012 -0800
tree: 2b95078ee67703911d1516c81d20aebfdb9dda9a
parent: 67e13976b78619ec7bc45b5377a9f5da2534cfd5 [diff]
parent: 6d5c9c9edceddb70b03728aca21fed48d01bd973 [diff]
diff --git a/native/src/correction.cpp b/native/src/correction.cpp
index c40f94b..43ab3f5 100644
--- a/native/src/correction.cpp
+++ b/native/src/correction.cpp

@@ -215,20 +215,10 @@
 }
 
 // TODO: remove
-int Correction::getOutputIndex() {
-    return mOutputIndex;
-}
-
-// TODO: remove
 int Correction::getInputIndex() {
     return mInputIndex;
 }
 
-// TODO: remove
-bool Correction::needsToTraverseAllNodes() {
-    return mNeedsToTraverseAllNodes;
-}
-
 void Correction::incrementInputIndex() {
     ++mInputIndex;
 }
@@ -278,13 +268,12 @@
             mWord, mOutputIndex + 1);
 }
 
-// TODO: inline?
 Correction::CorrectionType Correction::processSkipChar(
         const int32_t c, const bool isTerminal, const bool inputIndexIncremented) {
     addCharToCurrentWord(c);
-    if (needsToTraverseAllNodes() && isTerminal) {
-        mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0);
-        mTerminalOutputIndex = mOutputIndex;
+    mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0);
+    mTerminalOutputIndex = mOutputIndex;
+    if (mNeedsToTraverseAllNodes && isTerminal) {
         incrementOutputIndex();
         return TRAVERSE_ALL_ON_TERMINAL;
     } else {
@@ -293,6 +282,13 @@
     }
 }
 
+Correction::CorrectionType Correction::processUnrelatedCorrectionType() {
+    // Needs to set mTerminalInputIndex and mTerminalOutputIndex before returning any CorrectionType
+    mTerminalInputIndex = mInputIndex;
+    mTerminalOutputIndex = mOutputIndex;
+    return UNRELATED;
+}
+
 inline bool isEquivalentChar(ProximityInfo::ProximityType type) {
     return type == ProximityInfo::EQUIVALENT_CHAR;
 }
@@ -301,7 +297,7 @@
         const int32_t c, const bool isTerminal) {
     const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount);
     if (correctionCount > mMaxErrors) {
-        return UNRELATED;
+        return processUnrelatedCorrectionType();
     }
 
     // TODO: Change the limit if we'll allow two or more corrections
@@ -381,7 +377,7 @@
                 AKLOGI("UNRELATED(0): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
                         mTransposedCount, mExcessiveCount, c);
             }
-            return UNRELATED;
+            return processUnrelatedCorrectionType();
         }
     }
 
@@ -484,7 +480,7 @@
                 AKLOGI("UNRELATED(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
                         mTransposedCount, mExcessiveCount, c);
             }
-            return UNRELATED;
+            return processUnrelatedCorrectionType();
         }
     } else if (secondTransposing) {
         // If inputIndex is greater than mInputLength, that means there is no
@@ -539,6 +535,8 @@
         }
         return ON_TERMINAL;
     } else {
+        mTerminalInputIndex = mInputIndex - 1;
+        mTerminalOutputIndex = mOutputIndex - 1;
         return NOT_ON_TERMINAL;
     }
 }

diff --git a/native/src/correction.h b/native/src/correction.h
index 4012e7e..a0fd55f 100644
--- a/native/src/correction.h
+++ b/native/src/correction.h

@@ -48,7 +48,6 @@
     void checkState();
     bool initProcessState(const int index);
 
-    int getOutputIndex();
     int getInputIndex();
 
     virtual ~Correction();
@@ -115,11 +114,11 @@
  private:
     inline void incrementInputIndex();
     inline void incrementOutputIndex();
-    inline bool needsToTraverseAllNodes();
     inline void startToTraverseAllNodes();
     inline bool isQuote(const unsigned short c);
     inline CorrectionType processSkipChar(
             const int32_t c, const bool isTerminal, const bool inputIndexIncremented);
+    inline CorrectionType processUnrelatedCorrectionType();
     inline void addCharToCurrentWord(const int32_t c);
 
     const int TYPED_LETTER_MULTIPLIER;

diff --git a/native/src/defines.h b/native/src/defines.h
index 01ef656..48f92d8 100644
--- a/native/src/defines.h
+++ b/native/src/defines.h

@@ -22,9 +22,23 @@
 #include <cutils/log.h>
 #define AKLOGE ALOGE
 #define AKLOGI ALOGI
+
+#define DUMP_WORD(word, length) do { dumpWord(word, length); } while(0)
+
+static char charBuf[50];
+
+static void dumpWord(const unsigned short* word, const int length) {
+    for (int i = 0; i < length; ++i) {
+        charBuf[i] = word[i];
+    }
+    charBuf[length] = 0;
+    AKLOGI("[ %s ]", charBuf);
+}
+
 #else
 #define AKLOGE(fmt, ...)
 #define AKLOGI(fmt, ...)
+#define DUMP_WORD(word, length)
 #endif
 
 #ifdef FLAG_DO_PROFILE
@@ -106,18 +120,6 @@
 #define DEBUG_CORRECTION_FREQ true
 #define DEBUG_WORDS_PRIORITY_QUEUE true
 
-#define DUMP_WORD(word, length) do { dumpWord(word, length); } while(0)
-
-static char charBuf[50];
-
-static void dumpWord(const unsigned short* word, const int length) {
-    for (int i = 0; i < length; ++i) {
-        charBuf[i] = word[i];
-    }
-    charBuf[length] = 0;
-    AKLOGI("[ %s ]", charBuf);
-}
-
 #else // FLAG_DBG
 
 #define DEBUG_DICT false
@@ -131,7 +133,6 @@
 #define DEBUG_CORRECTION_FREQ false
 #define DEBUG_WORDS_PRIORITY_QUEUE false
 
-#define DUMP_WORD(word, length)
 
 #endif // FLAG_DBG
 
@@ -207,7 +208,8 @@
 
 // Word limit for sub queues used in WordsPriorityQueuePool.  Sub queues are temporary queues used
 // for better performance.
-#define SUB_QUEUE_MAX_WORDS 5
+// Holds up to 1 candidate for each word
+#define SUB_QUEUE_MAX_WORDS 1
 #define SUB_QUEUE_MAX_COUNT 10
 
 #define MAX_DEPTH_MULTIPLIER 3

diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp
index ca7f0be..85e7e9b 100644
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp

@@ -186,7 +186,7 @@
 
     PROF_OPEN;
     PROF_START(0);
-    // Note: This line is intentionally left blank
+    queuePool->clearAll();
     PROF_END(0);
 
     PROF_START(1);
@@ -241,18 +241,17 @@
         }
     }
     PROF_END(6);
+    if (DEBUG_WORDS_PRIORITY_QUEUE) {
+        queuePool->dumpSubQueue1TopSuggestions();
+    }
 }
 
 void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int *xCoordinates,
-        const int *yCoordinates, const int *codes, const int inputLength,
-        WordsPriorityQueue *queue, Correction *correction) {
+        const int *yCoordinates, const int *codes, const int inputLength, Correction *correction) {
     if (DEBUG_DICT) {
         AKLOGI("initSuggest");
     }
     proximityInfo->setInputParams(codes, inputLength, xCoordinates, yCoordinates);
-    if (queue) {
-        queue->clear();
-    }
     const int maxDepth = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
     correction->initCorrection(proximityInfo, inputLength, maxDepth);
 }
@@ -264,15 +263,13 @@
         const int *xcoordinates, const int *ycoordinates, const int *codes,
         const bool useFullEditDistance, const int inputLength, Correction *correction,
         WordsPriorityQueuePool *queuePool) {
-    WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
-    initSuggestions(
-            proximityInfo, xcoordinates, ycoordinates, codes, inputLength, masterQueue, correction);
-    getSuggestionCandidates(useFullEditDistance, inputLength, correction, masterQueue,
+    initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction);
+    getSuggestionCandidates(useFullEditDistance, inputLength, correction, queuePool,
             true /* doAutoCompletion */, DEFAULT_MAX_ERRORS);
 }
 
 void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
-        const int inputLength, Correction *correction, WordsPriorityQueue *queue,
+        const int inputLength, Correction *correction, WordsPriorityQueuePool *queuePool,
         const bool doAutoCompletion, const int maxErrors) {
     // TODO: Remove setCorrectionParams
     correction->setCorrectionParams(0, 0, 0,
@@ -292,7 +289,7 @@
             int firstChildPos;
 
             const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos,
-                    correction, &childCount, &firstChildPos, &siblingPos, queue);
+                    correction, &childCount, &firstChildPos, &siblingPos, queuePool);
             // Update next sibling pos
             correction->setTreeSiblingPos(outputIndex, siblingPos);
 
@@ -327,14 +324,34 @@
 
 inline void UnigramDictionary::onTerminal(const int freq,
         const TerminalAttributes& terminalAttributes, Correction *correction,
-        WordsPriorityQueue *queue) {
+        WordsPriorityQueuePool *queuePool, const bool addToMasterQueue) {
+    const int inputIndex = correction->getInputIndex();
+    const bool addToSubQueue = inputIndex < SUB_QUEUE_MAX_COUNT;
+    if (!addToMasterQueue && !addToSubQueue) {
+        return;
+    }
+    WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
+    WordsPriorityQueue *subQueue = queuePool->getSubQueue1(inputIndex);
     int wordLength;
     unsigned short* wordPointer;
     const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength);
     if (finalFreq >= 0) {
         if (!terminalAttributes.isShortcutOnly()) {
-            addWord(wordPointer, wordLength, finalFreq, queue);
+            if (addToMasterQueue) {
+                addWord(wordPointer, wordLength, finalFreq, masterQueue);
+            }
+            // TODO: Check the validity of "inputIndex == wordLength"
+            //if (addToSubQueue && inputIndex == wordLength) {
+            if (addToSubQueue) {
+                addWord(wordPointer, wordLength, finalFreq, subQueue);
+            }
         }
+        // Please note that the shortcut candidates will be added to the master queue only.
+        if (!addToMasterQueue) {
+            return;
+        }
+
+        // From here, below is the code to add shortcut candidates.
         TerminalAttributes::ShortcutIterator iterator = terminalAttributes.getShortcutIterator();
         while (iterator.hasNextShortcutTarget()) {
             // TODO: addWord only supports weak ordering, meaning we have no means to control the
@@ -345,7 +362,7 @@
             uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL];
             const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
                     MAX_WORD_LENGTH_INTERNAL, shortcutTarget);
-            addWord(shortcutTarget, shortcutTargetStringLength, finalFreq, queue);
+            addWord(shortcutTarget, shortcutTargetStringLength, finalFreq, masterQueue);
         }
     }
 }
@@ -411,8 +428,7 @@
     }
 
     // TODO: Remove initSuggestions and correction->setCorrectionParams
-    initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength,
-            0 /* do not clear queue */, correction);
+    initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction);
 
     correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
             -1 /* transposedPos */, spaceProximityPos, missingSpacePos,
@@ -584,7 +600,7 @@
 // given level, as output into newCount when traversing this level's parent.
 inline bool UnigramDictionary::processCurrentNode(const int initialPos,
         Correction *correction, int *newCount,
-        int *newChildrenPosition, int *nextSiblingPosition, WordsPriorityQueue *queue) {
+        int *newChildrenPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool) {
     if (DEBUG_DICT) {
         correction->checkState();
     }
@@ -659,15 +675,13 @@
     } while (NOT_A_CHARACTER != c);
 
     if (isTerminalNode) {
-        if (needsToInvokeOnTerminal) {
-            // The frequency should be here, because we come here only if this is actually
-            // a terminal node, and we are on its last char.
-            const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos);
-            const int childrenAddressPos = BinaryFormat::skipFrequency(flags, pos);
-            const int attributesPos = BinaryFormat::skipChildrenPosition(flags, childrenAddressPos);
-            TerminalAttributes terminalAttributes(DICT_ROOT, flags, attributesPos);
-            onTerminal(freq, terminalAttributes, correction, queue);
-        }
+        // The frequency should be here, because we come here only if this is actually
+        // a terminal node, and we are on its last char.
+        const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos);
+        const int childrenAddressPos = BinaryFormat::skipFrequency(flags, pos);
+        const int attributesPos = BinaryFormat::skipChildrenPosition(flags, childrenAddressPos);
+        TerminalAttributes terminalAttributes(DICT_ROOT, flags, attributesPos);
+        onTerminal(freq, terminalAttributes, correction, queuePool, needsToInvokeOnTerminal);
 
         // If there are more chars in this node, then this virtual node has children.
         // If we are on the last char, this virtual node has children if this node has.

diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h
index 2358142..5e7a758 100644
--- a/native/src/unigram_dictionary.h
+++ b/native/src/unigram_dictionary.h

@@ -93,14 +93,13 @@
         const int codesRemain, const int currentDepth, int* codesDest, Correction *correction,
         WordsPriorityQueuePool* queuePool);
     void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
-            const int *ycoordinates, const int *codes, const int codesSize,
-            WordsPriorityQueue *queue, Correction *correction);
+            const int *ycoordinates, const int *codes, const int codesSize, Correction *correction);
     void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
             const int *ycoordinates, const int *codes, const bool useFullEditDistance,
             const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool);
     void getSuggestionCandidates(
             const bool useFullEditDistance, const int inputLength, Correction *correction,
-            WordsPriorityQueue* queue, const bool doAutoCompletion, const int maxErrors);
+            WordsPriorityQueuePool* queuePool, const bool doAutoCompletion, const int maxErrors);
     void getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo,
             const int *xcoordinates, const int *ycoordinates, const int *codes,
             const bool useFullEditDistance, const int inputLength, const int spaceProximityPos,
@@ -114,12 +113,12 @@
             const int inputLength, const int spaceProximityPos, Correction *correction,
             WordsPriorityQueuePool* queuePool);
     void onTerminal(const int freq, const TerminalAttributes& terminalAttributes,
-            Correction *correction, WordsPriorityQueue *queue);
+            Correction *correction, WordsPriorityQueuePool *queuePool, const bool addToMasterQueue);
     bool needsToSkipCurrentNode(const unsigned short c,
             const int inputIndex, const int skipPos, const int depth);
     // Process a node by considering proximity, missing and excessive character
     bool processCurrentNode(const int initialPos, Correction *correction, int *newCount,
-            int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueue *queue);
+            int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool);
     int getMostFrequentWordLike(const int startInputIndex, const int inputLength,
             ProximityInfo *proximityInfo, unsigned short *word);
     int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length,

diff --git a/native/src/words_priority_queue.h b/native/src/words_priority_queue.h
index ce5d2ce..54bf27a 100644
--- a/native/src/words_priority_queue.h
+++ b/native/src/words_priority_queue.h

@@ -128,6 +128,13 @@
         }
     }
 
+    void dumpTopWord() {
+        if (size() <= 0) {
+            return;
+        }
+        DUMP_WORD(mSuggestions.top()->mWord, mSuggestions.top()->mWordLength);
+    }
+
  private:
     struct wordComparator {
         bool operator ()(SuggestedWord * left, SuggestedWord * right) {

diff --git a/native/src/words_priority_queue_pool.h b/native/src/words_priority_queue_pool.h
index bf9619e..5fa2548 100644
--- a/native/src/words_priority_queue_pool.h
+++ b/native/src/words_priority_queue_pool.h

@@ -58,6 +58,21 @@
         return mSubQueues2[id];
     }
 
+    inline void clearAll() {
+        mMasterQueue->clear();
+        for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
+            mSubQueues1[i]->clear();
+            mSubQueues2[i]->clear();
+        }
+    }
+
+    void dumpSubQueue1TopSuggestions() {
+        AKLOGI("DUMP SUBQUEUE1 TOP SUGGESTIONS");
+        for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
+            mSubQueues1[i]->dumpTopWord();
+        }
+    }
+
  private:
     WordsPriorityQueue* mMasterQueue;
     WordsPriorityQueue* mSubQueues1[SUB_QUEUE_MAX_COUNT];

diff --git a/tools/makedict/src/com/android/inputmethod/latin/BinaryDictInputOutput.java b/tools/makedict/src/com/android/inputmethod/latin/BinaryDictInputOutput.java
index fcbb645..ae54c5d 100644
--- a/tools/makedict/src/com/android/inputmethod/latin/BinaryDictInputOutput.java
+++ b/tools/makedict/src/com/android/inputmethod/latin/BinaryDictInputOutput.java

@@ -144,7 +144,6 @@
 
     private static final int GROUP_CHARACTERS_TERMINATOR = 0x1F;
 
-    private static final int GROUP_COUNT_SIZE = 1;
     private static final int GROUP_TERMINATOR_SIZE = 1;
     private static final int GROUP_FLAGS_SIZE = 1;
     private static final int GROUP_FREQUENCY_SIZE = 1;
@@ -155,9 +154,8 @@
     private static final int NO_CHILDREN_ADDRESS = Integer.MIN_VALUE;
     private static final int INVALID_CHARACTER = -1;
 
-    // Limiting to 127 for upward compatibility
-    // TODO: implement a scheme to be able to shoot 256 chargroups in a node
-    private static final int MAX_CHARGROUPS_IN_A_NODE = 127;
+    private static final int MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT = 0x7F; // 127
+    private static final int MAX_CHARGROUPS_IN_A_NODE = 0x7FFF; // 32767
 
     private static final int MAX_TERMINAL_FREQUENCY = 255;
 
@@ -267,6 +265,22 @@
     }
 
     /**
+     * Compute the binary size of the group count for a node
+     * @param node the node
+     * @return the size of the group count, either 1 or 2 bytes.
+     */
+    private static int getGroupCountSize(final Node node) {
+        if (MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT >= node.mData.size()) {
+            return 1;
+        } else if (MAX_CHARGROUPS_IN_A_NODE >= node.mData.size()) {
+            return 2;
+        } else {
+            throw new RuntimeException("Can't have more than " + MAX_CHARGROUPS_IN_A_NODE
+                    + " groups in a node (found " + node.mData.size() +")");
+        }
+    }
+
+    /**
      * Compute the maximum size of a CharGroup, assuming 3-byte addresses for everything.
      *
      * @param group the CharGroup to compute the size of.
@@ -295,7 +309,7 @@
      * @param node the node to compute the maximum size of.
      */
     private static void setNodeMaximumSize(Node node) {
-        int size = GROUP_COUNT_SIZE;
+        int size = getGroupCountSize(node);
         for (CharGroup g : node.mData) {
             final int groupSize = getCharGroupMaximumSize(g);
             g.mCachedSize = groupSize;
@@ -394,7 +408,7 @@
      * @param dict the dictionary in which the word/attributes are to be found.
      */
     private static void computeActualNodeSize(Node node, FusionDictionary dict) {
-        int size = GROUP_COUNT_SIZE;
+        int size = getGroupCountSize(node);
         for (CharGroup group : node.mData) {
             int groupSize = GROUP_FLAGS_SIZE + getGroupCharactersSize(group);
             if (group.isTerminal()) groupSize += GROUP_FREQUENCY_SIZE;
@@ -437,12 +451,13 @@
         int nodeOffset = 0;
         for (Node n : flatNodes) {
             n.mCachedAddress = nodeOffset;
+            int groupCountSize = getGroupCountSize(n);
             int groupOffset = 0;
             for (CharGroup g : n.mData) {
-                g.mCachedAddress = GROUP_COUNT_SIZE + nodeOffset + groupOffset;
+                g.mCachedAddress = groupCountSize + nodeOffset + groupOffset;
                 groupOffset += g.mCachedSize;
             }
-            if (groupOffset + GROUP_COUNT_SIZE != n.mCachedSize) {
+            if (groupOffset + groupCountSize != n.mCachedSize) {
                 throw new RuntimeException("Bug : Stored and computed node size differ");
             }
             nodeOffset += n.mCachedSize;
@@ -629,13 +644,20 @@
     private static int writePlacedNode(FusionDictionary dict, byte[] buffer, Node node) {
         int index = node.mCachedAddress;
 
-        final int size = node.mData.size();
-        if (size > MAX_CHARGROUPS_IN_A_NODE)
-            throw new RuntimeException("A node has a group count over 127 (" + size + ").");
-
-        buffer[index++] = (byte)size;
+        final int groupCount = node.mData.size();
+        final int countSize = getGroupCountSize(node);
+        if (1 == countSize) {
+            buffer[index++] = (byte)groupCount;
+        } else if (2 == countSize) {
+            // We need to signal 2-byte size by setting the top bit of the MSB to 1, so
+            // we | 0x80 to do this.
+            buffer[index++] = (byte)((groupCount >> 8) | 0x80);
+            buffer[index++] = (byte)(groupCount & 0xFF);
+        } else {
+            throw new RuntimeException("Strange size from getGroupCountSize : " + countSize);
+        }
         int groupAddress = index;
-        for (int i = 0; i < size; ++i) {
+        for (int i = 0; i < groupCount; ++i) {
             CharGroup group = node.mData.get(i);
             if (index != group.mCachedAddress) throw new RuntimeException("Bug: write index is not "
                     + "the same as the cached address of the group");
commit	1f79c87675d3f03691ae949860cdfc5c2f3195ce	[log] [tgz]
author	Tadashi G. Takaoka <takaoka@google.com>	Mon Jan 16 00:59:48 2012 -0800
committer	Android (Google) Code Review <android-gerrit@google.com>	Mon Jan 16 00:59:48 2012 -0800
tree	2b95078ee67703911d1516c81d20aebfdb9dda9a
parent	67e13976b78619ec7bc45b5377a9f5da2534cfd5 [diff]
parent	6d5c9c9edceddb70b03728aca21fed48d01bd973 [diff]