Remove checkFirstCharacter from BigramDictionary. Bug: 10028452 Change-Id: I27b147e83b312d73e975a0b2bc8074b33906e56e

commit: 2a2aac2568e3f2da3efc8aeaa392696471d63417 [log] [tgz]
author: Keisuke Kuroynagi <ksk@google.com> Wed Jul 31 13:47:23 2013 +0900
committer: Keisuke Kuroynagi <ksk@google.com> Wed Jul 31 13:47:23 2013 +0900
tree: 6df61a0f10689fc2fbcd965da5bc38910638832a
parent: ab2d273133a8fe2e1e0a66952d74127629394a4c [diff]
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index 6e1b80e..8b46c26 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp

@@ -186,7 +186,7 @@
                 scores, spaceIndices, outputTypes);
     } else {
         count = dictionary->getBigrams(prevWordCodePoints, prevWordCodePointsLength,
-                inputCodePoints, inputSize, outputCodePoints, scores, outputTypes);
+                outputCodePoints, scores, outputTypes);
     }
 
     // Copy back the output values

diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
index 09eecd3..3248d21 100644
--- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp

@@ -88,21 +88,14 @@
 /* Parameters :
  * prevWord: the word before, the one for which we need to look up bigrams.
  * prevWordLength: its length.
- * inputCodePoints: what user typed, in the same format as for UnigramDictionary::getSuggestions.
- * inputSize: the size of the codes array.
- * bigramCodePoints: an array for output, at the same format as outwords for getSuggestions.
- * bigramProbability: an array to output frequencies.
+ * outBigramCodePoints: an array for output, at the same format as outwords for getSuggestions.
+ * outBigramProbability: an array to output frequencies.
  * outputTypes: an array to output types.
  * This method returns the number of bigrams this word has, for backward compatibility.
- * Note: this is not the number of bigrams output in the array, which is the number of
- * bigrams this word has WHOSE first letter also matches the letter the user typed.
- * TODO: this may not be a sensible thing to do. It makes sense when the bigrams are
- * used to match the first letter of the second word, but once the user has typed more
- * and the bigrams are used to boost unigram result scores, it makes little sense to
- * reduce their scope to the ones that match the first letter.
  */
-int BigramDictionary::getPredictions(const int *prevWord, int prevWordLength, int *inputCodePoints,
-        int inputSize, int *bigramCodePoints, int *bigramProbability, int *outputTypes) const {
+int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLength,
+        int *const outBigramCodePoints, int *const outBigramProbability,
+        int *const outputTypes) const {
     // TODO: remove unused arguments, and refrain from storing stuff in members of this class
     // TODO: have "in" arguments before "out" ones, and make out args explicit in the name
 
@@ -127,21 +120,16 @@
                 getCodePointsAndProbabilityAndReturnCodePointCount(
                         mBinaryDictionaryInfo, bigramsIt.getBigramPos(), MAX_WORD_LENGTH,
                         bigramBuffer, &unigramProbability);
-
-        // inputSize == 0 means we are trying to find bigram predictions.
-        if (inputSize < 1 || checkFirstCharacter(bigramBuffer, inputCodePoints)) {
-            const int bigramProbabilityTemp = bigramsIt.getProbability();
-            // Due to space constraints, the probability for bigrams is approximate - the lower the
-            // unigram probability, the worse the precision. The theoritical maximum error in
-            // resulting probability is 8 - although in the practice it's never bigger than 3 or 4
-            // in very bad cases. This means that sometimes, we'll see some bigrams interverted
-            // here, but it can't get too bad.
-            const int probability = ProbabilityUtils::computeProbabilityForBigram(
-                    unigramProbability, bigramProbabilityTemp);
-            addWordBigram(bigramBuffer, length, probability, bigramProbability, bigramCodePoints,
-                    outputTypes);
-            ++bigramCount;
-        }
+        // Due to space constraints, the probability for bigrams is approximate - the lower the
+        // unigram probability, the worse the precision. The theoritical maximum error in
+        // resulting probability is 8 - although in the practice it's never bigger than 3 or 4
+        // in very bad cases. This means that sometimes, we'll see some bigrams interverted
+        // here, but it can't get too bad.
+        const int probability = ProbabilityUtils::computeProbabilityForBigram(
+                unigramProbability, bigramsIt.getProbability());
+        addWordBigram(bigramBuffer, length, probability, outBigramProbability, outBigramCodePoints,
+                outputTypes);
+        ++bigramCount;
     }
     return min(bigramCount, MAX_RESULTS);
 }
@@ -158,22 +146,6 @@
             mBinaryDictionaryInfo, pos);
 }
 
-bool BigramDictionary::checkFirstCharacter(int *word, int *inputCodePoints) const {
-    // Checks whether this word starts with same character or neighboring characters of
-    // what user typed.
-
-    int maxAlt = MAX_ALTERNATIVES;
-    const int firstBaseLowerCodePoint = CharUtils::toBaseLowerCase(*word);
-    while (maxAlt > 0) {
-        if (CharUtils::toBaseLowerCase(*inputCodePoints) == firstBaseLowerCodePoint) {
-            return true;
-        }
-        inputCodePoints++;
-        maxAlt--;
-    }
-    return false;
-}
-
 bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *word1,
         int length1) const {
     int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */);

diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.h b/native/jni/src/suggest/core/dictionary/bigram_dictionary.h
index 7706a2c..438c34c 100644
--- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.h

@@ -27,8 +27,8 @@
  public:
     BigramDictionary(const BinaryDictionaryInfo *const binaryDictionaryInfo);
 
-    int getPredictions(const int *word, int length, int *inputCodePoints, int inputSize,
-            int *outWords, int *frequencies, int *outputTypes) const;
+    int getPredictions(const int *word, int length, int *outBigramCodePoints,
+            int *outBigramProbability, int *outputTypes) const;
     bool isValidBigram(const int *word1, int length1, const int *word2, int length2) const;
     ~BigramDictionary();
 
@@ -37,13 +37,10 @@
 
     void addWordBigram(int *word, int length, int probability, int *bigramProbability,
             int *bigramCodePoints, int *outputTypes) const;
-    bool checkFirstCharacter(int *word, int *inputCodePoints) const;
     int getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
             const bool forceLowerCaseSearch) const;
 
     const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
-    // TODO: Re-implement proximity correction for bigram correction
-    static const int MAX_ALTERNATIVES = 1;
 };
 } // namespace latinime
 #endif // LATINIME_BIGRAM_DICTIONARY_H

diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index 52e6359..f597f99 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp

@@ -77,11 +77,10 @@
     }
 }
 
-int Dictionary::getBigrams(const int *word, int length, int *inputCodePoints, int inputSize,
-        int *outWords, int *frequencies, int *outputTypes) const {
+int Dictionary::getBigrams(const int *word, int length, int *outWords, int *frequencies,
+        int *outputTypes) const {
     if (length <= 0) return 0;
-    return mBigramDictionary->getPredictions(word, length, inputCodePoints, inputSize, outWords,
-            frequencies, outputTypes);
+    return mBigramDictionary->getPredictions(word, length, outWords, frequencies, outputTypes);
 }
 
 int Dictionary::getProbability(const int *word, int length) const {

diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h
index 1bf24a8..9f1e072 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/dictionary.h

@@ -62,8 +62,8 @@
             const SuggestOptions *const suggestOptions, int *outWords, int *frequencies,
             int *spaceIndices, int *outputTypes) const;
 
-    int getBigrams(const int *word, int length, int *inputCodePoints, int inputSize, int *outWords,
-            int *frequencies, int *outputTypes) const;
+    int getBigrams(const int *word, int length, int *outWords, int *frequencies,
+            int *outputTypes) const;
 
     int getProbability(const int *word, int length) const;
commit	2a2aac2568e3f2da3efc8aeaa392696471d63417	[log] [tgz]
author	Keisuke Kuroynagi <ksk@google.com>	Wed Jul 31 13:47:23 2013 +0900
committer	Keisuke Kuroynagi <ksk@google.com>	Wed Jul 31 13:47:23 2013 +0900
tree	6df61a0f10689fc2fbcd965da5bc38910638832a
parent	ab2d273133a8fe2e1e0a66952d74127629394a4c [diff]