Cosmetic fixes and a bug fix in UnigramDictionary::testCharGroupForContinuedLikeness(). This change has actually been extracted from a change work in progress I4fe423834b8131fb122251892c98228a6e08ba25 Change-Id: I52568fa09da2ea22be7f8bfe9676b7cd73c31fa4

commit: f2789819bd005b5b0581e8439601b5501306327d [log] [tgz]
author: Ken Wakasa <kwakasa@google.com> Tue Sep 04 12:49:46 2012 +0900
committer: Ken Wakasa <kwakasa@google.com> Tue Sep 04 14:23:37 2012 +0900
tree: a293e9aab8c0d097996e681cc86172f84caf37eb
parent: 1a397ececf88f2daffa103f8411aa9cbaabb50da [diff] [blame]
diff --git a/native/jni/src/unigram_dictionary.cpp b/native/jni/src/unigram_dictionary.cpp
index d4c51df..b7e245a 100644
--- a/native/jni/src/unigram_dictionary.cpp
+++ b/native/jni/src/unigram_dictionary.cpp

@@ -58,12 +58,12 @@
 }
 
 static inline unsigned int getCodesBufferSize(const int *codes, const int codesSize) {
-    return sizeof(*codes) * codesSize;
+    return static_cast<unsigned int>(sizeof(*codes)) * codesSize;
 }
 
 // TODO: This needs to take a const unsigned short* and not tinker with its contents
-static inline void addWord(
-        unsigned short *word, int length, int frequency, WordsPriorityQueue *queue, int type) {
+static inline void addWord(unsigned short *word, int length, int frequency,
+        WordsPriorityQueue *queue, int type) {
     queue->push(frequency, word, length, type);
 }
 
@@ -106,7 +106,7 @@
         WordsPriorityQueuePool *queuePool,
         const digraph_t *const digraphs, const unsigned int digraphsSize) const {
 
-    const int startIndex = codesDest - codesBuffer;
+    const int startIndex = static_cast<int>(codesDest - codesBuffer);
     if (currentDepth < MAX_DIGRAPH_SEARCH_DEPTH) {
         for (int i = 0; i < codesRemain; ++i) {
             xCoordinatesBuffer[startIndex + i] = xcoordinates[codesBufferSize - codesRemain + i];
@@ -170,8 +170,7 @@
 // bigramMap contains the association <bigram address> -> <bigram frequency>
 // bigramFilter is a bloom filter for fast rejection: see functions setInFilter and isInFilter
 // in bigram_dictionary.cpp
-int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo,
-        const int *xcoordinates,
+int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
         const int *ycoordinates, const int *codes, const int codesSize,
         const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
         const bool useFullEditDistance, unsigned short *outWords, int *frequencies,
@@ -597,11 +596,10 @@
 
 void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo,
         const int *xcoordinates, const int *ycoordinates, const int *codes,
-        const bool useFullEditDistance, const int inputSize,
-        Correction *correction, WordsPriorityQueuePool *queuePool,
-        const bool hasAutoCorrectionCandidate, const int startInputPos, const int startWordIndex,
-        const int outputWordLength, int *freqArray, int *wordLengthArray,
-        unsigned short *outputWord) const {
+        const bool useFullEditDistance, const int inputSize, Correction *correction,
+        WordsPriorityQueuePool *queuePool, const bool hasAutoCorrectionCandidate,
+        const int startInputPos, const int startWordIndex, const int outputWordLength,
+        int *freqArray, int *wordLengthArray, unsigned short *outputWord) const {
     if (startWordIndex >= (MULTIPLE_WORDS_SUGGESTION_MAX_WORDS - 1)) {
         // Return if the last word index
         return;
@@ -641,7 +639,7 @@
         // Missing space
         inputWordStartPos = i;
         inputWordLength = inputSize - i;
-        if(getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
+        if (getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
                 useFullEditDistance, correction, queuePool, inputSize, hasAutoCorrectionCandidate,
                 startWordIndex + 1, inputWordStartPos, inputWordLength, tempOutputWordLength,
                 false /* missing space */, freqArray, wordLengthArray, outputWord, 0)
@@ -724,13 +722,13 @@
 // In and out parameters may point to the same location. This function takes care
 // not to use any input parameters after it wrote into its outputs.
 static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
-        const uint8_t *const root, const int startPos,
-        const uint16_t *const inWord, const int startInputIndex,
-        int32_t *outNewWord, int *outInputIndex, int *outPos) {
+        const uint8_t *const root, const int startPos, const uint16_t *const inWord,
+        const int startInputIndex, const int inputSize, int32_t *outNewWord, int *outInputIndex,
+        int *outPos) {
     const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
     int pos = startPos;
-    int32_t character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
-    int32_t baseChar = toBaseLowerCase(character);
+    int32_t codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
+    int32_t baseChar = toBaseLowerCase(codePoint);
     const uint16_t wChar = toBaseLowerCase(inWord[startInputIndex]);
 
     if (baseChar != wChar) {
@@ -739,18 +737,18 @@
         return false;
     }
     int inputIndex = startInputIndex;
-    outNewWord[inputIndex] = character;
+    outNewWord[inputIndex] = codePoint;
     if (hasMultipleChars) {
-        character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
-        while (NOT_A_CHARACTER != character) {
-            baseChar = toBaseLowerCase(character);
-            if (toBaseLowerCase(inWord[++inputIndex]) != baseChar) {
+        codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
+        while (NOT_A_CODE_POINT != codePoint) {
+            baseChar = toBaseLowerCase(codePoint);
+            if (inputIndex + 1 >= inputSize || toBaseLowerCase(inWord[++inputIndex]) != baseChar) {
                 *outPos = BinaryFormat::skipOtherCharacters(root, pos);
                 *outInputIndex = startInputIndex;
                 return false;
             }
-            outNewWord[inputIndex] = character;
-            character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
+            outNewWord[inputIndex] = codePoint;
+            codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
         }
     }
     *outInputIndex = inputIndex + 1;
@@ -765,8 +763,9 @@
 static inline void onTerminalWordLike(const int freq, int32_t *newWord, const int length,
         short unsigned int *outWord, int *maxFreq) {
     if (freq > *maxFreq) {
-        for (int q = 0; q < length; ++q)
+        for (int q = 0; q < length; ++q) {
             outWord[q] = newWord[q];
+        }
         outWord[length] = 0;
         *maxFreq = freq;
     }
@@ -775,7 +774,7 @@
 // Will find the highest frequency of the words like the one passed as an argument,
 // that is, everything that only differs by case/accents.
 int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t *const inWord,
-        const int length, short unsigned int *outWord) const {
+        const int inputSize, short unsigned int *outWord) const {
     int32_t newWord[MAX_WORD_LENGTH_INTERNAL];
     int depth = 0;
     int maxFreq = -1;
@@ -795,12 +794,12 @@
             int inputIndex = stackInputIndex[depth];
             const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
             // Test whether all chars in this group match with the word we are searching for. If so,
-            // we want to traverse its children (or if the length match, evaluate its frequency).
+            // we want to traverse its children (or if the inputSize match, evaluate its frequency).
             // Note that this function will output the position regardless, but will only write
             // into inputIndex if there is a match.
             const bool isAlike = testCharGroupForContinuedLikeness(flags, root, pos, inWord,
-                    inputIndex, newWord, &inputIndex, &pos);
-            if (isAlike && (BinaryFormat::FLAG_IS_TERMINAL & flags) && (inputIndex == length)) {
+                    inputIndex, inputSize, newWord, &inputIndex, &pos);
+            if (isAlike && (BinaryFormat::FLAG_IS_TERMINAL & flags) && (inputIndex == inputSize)) {
                 const int frequency = BinaryFormat::readFrequencyWithoutMovingPointer(root, pos);
                 onTerminalWordLike(frequency, newWord, inputIndex, outWord, &maxFreq);
             }
@@ -809,8 +808,8 @@
             const int childrenNodePos = BinaryFormat::readChildrenPosition(root, flags, pos);
             // If we had a match and the word has children, we want to traverse them. We don't have
             // to traverse words longer than the one we are searching for, since they will not match
-            // anyway, so don't traverse unless inputIndex < length.
-            if (isAlike && (-1 != childrenNodePos) && (inputIndex < length)) {
+            // anyway, so don't traverse unless inputIndex < inputSize.
+            if (isAlike && (-1 != childrenNodePos) && (inputIndex < inputSize)) {
                 // Save position for this depth, to get back to this once children are done
                 stackChildCount[depth] = charGroupIndex;
                 stackSiblingPos[depth] = siblingPos;
@@ -853,7 +852,7 @@
     if (hasMultipleChars) {
         pos = BinaryFormat::skipOtherCharacters(root, pos);
     } else {
-        BinaryFormat::getCharCodeAndForwardPointer(DICT_ROOT, &pos);
+        BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos);
     }
     const int unigramFreq = BinaryFormat::readFrequencyWithoutMovingPointer(root, pos);
     return unigramFreq;
@@ -907,23 +906,23 @@
     // else if FLAG_IS_TERMINAL: the frequency
     // else if MASK_GROUP_ADDRESS_TYPE is not NONE: the children address
     // Note that you can't have a node that both is not a terminal and has no children.
-    int32_t c = BinaryFormat::getCharCodeAndForwardPointer(DICT_ROOT, &pos);
-    assert(NOT_A_CHARACTER != c);
+    int32_t c = BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos);
+    assert(NOT_A_CODE_POINT != c);
 
     // We are going to loop through each character and make it look like it's a different
     // node each time. To do that, we will process characters in this node in order until
-    // we find the character terminator. This is signalled by getCharCode* returning
-    // NOT_A_CHARACTER.
+    // we find the character terminator. This is signalled by getCodePoint* returning
+    // NOT_A_CODE_POINT.
     // As a special case, if there is only one character in this node, we must not read the
-    // next bytes so we will simulate the NOT_A_CHARACTER return by testing the flags.
+    // next bytes so we will simulate the NOT_A_CODE_POINT return by testing the flags.
     // This way, each loop run will look like a "virtual node".
     do {
         // We prefetch the next char. If 'c' is the last char of this node, we will have
-        // NOT_A_CHARACTER in the next char. From this we can decide whether this virtual node
+        // NOT_A_CODE_POINT in the next char. From this we can decide whether this virtual node
         // should behave as a terminal or not and whether we have children.
         const int32_t nextc = hasMultipleChars
-                ? BinaryFormat::getCharCodeAndForwardPointer(DICT_ROOT, &pos) : NOT_A_CHARACTER;
-        const bool isLastChar = (NOT_A_CHARACTER == nextc);
+                ? BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos) : NOT_A_CODE_POINT;
+        const bool isLastChar = (NOT_A_CODE_POINT == nextc);
         // If there are more chars in this nodes, then this virtual node is not a terminal.
         // If we are on the last char, this virtual node is a terminal if this node is.
         const bool isTerminal = isLastChar && isTerminalNode;
@@ -952,9 +951,9 @@
 
         // Prepare for the next character. Promote the prefetched char to current char - the loop
         // will take care of prefetching the next. If we finally found our last char, nextc will
-        // contain NOT_A_CHARACTER.
+        // contain NOT_A_CODE_POINT.
         c = nextc;
-    } while (NOT_A_CHARACTER != c);
+    } while (NOT_A_CODE_POINT != c);
 
     if (isTerminalNode) {
         // The frequency should be here, because we come here only if this is actually
commit	f2789819bd005b5b0581e8439601b5501306327d	[log] [tgz]
author	Ken Wakasa <kwakasa@google.com>	Tue Sep 04 12:49:46 2012 +0900
committer	Ken Wakasa <kwakasa@google.com>	Tue Sep 04 14:23:37 2012 +0900
tree	a293e9aab8c0d097996e681cc86172f84caf37eb
parent	1a397ececf88f2daffa103f8411aa9cbaabb50da [diff] [blame]