Dump binary dictionary information when opening
Bug: 9459517
Change-Id: I122583e734936ae0284e1c7500c6c9242bc7973b
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index a93bbeb..2b8dbbc 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -93,8 +93,8 @@
AKLOGE("DICT: dictionary format is unknown, bad magic number");
releaseDictBuf(static_cast<const char *>(dictBuf) - offset, adjDictSize, fd);
} else {
- dictionary = new Dictionary(
- dictBuf, static_cast<int>(dictSize), fd, offset, updatableMmap);
+ dictionary = new Dictionary(env, dictBuf, static_cast<int>(dictSize), fd, offset,
+ updatableMmap);
}
PROF_END(66);
PROF_CLOSE;
diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h
index cb66814..607a744 100644
--- a/native/jni/src/defines.h
+++ b/native/jni/src/defines.h
@@ -35,6 +35,56 @@
// Must be equal to ProximityInfo.MAX_PROXIMITY_CHARS_SIZE in Java
#define MAX_PROXIMITY_CHARS_SIZE 16
#define ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE 2
+#define NELEMS(x) (sizeof(x) / sizeof((x)[0]))
+
+AK_FORCE_INLINE static int intArrayToCharArray(const int *const source, const int sourceSize,
+ char *dest, const int destSize) {
+ // We want to always terminate with a 0 char, so stop one short of the length to make
+ // sure there is room.
+ const int destLimit = destSize - 1;
+ int si = 0;
+ int di = 0;
+ while (si < sourceSize && di < destLimit && 0 != source[si]) {
+ const int codePoint = source[si++];
+ if (codePoint < 0x7F) { // One byte
+ dest[di++] = codePoint;
+ } else if (codePoint < 0x7FF) { // Two bytes
+ if (di + 1 >= destLimit) break;
+ dest[di++] = 0xC0 + (codePoint >> 6);
+ dest[di++] = 0x80 + (codePoint & 0x3F);
+ } else if (codePoint < 0xFFFF) { // Three bytes
+ if (di + 2 >= destLimit) break;
+ dest[di++] = 0xE0 + (codePoint >> 12);
+ dest[di++] = 0x80 + ((codePoint >> 6) & 0x3F);
+ dest[di++] = 0x80 + (codePoint & 0x3F);
+ } else if (codePoint <= 0x1FFFFF) { // Four bytes
+ if (di + 3 >= destLimit) break;
+ dest[di++] = 0xF0 + (codePoint >> 18);
+ dest[di++] = 0x80 + ((codePoint >> 12) & 0x3F);
+ dest[di++] = 0x80 + ((codePoint >> 6) & 0x3F);
+ dest[di++] = 0x80 + (codePoint & 0x3F);
+ } else if (codePoint <= 0x3FFFFFF) { // Five bytes
+ if (di + 4 >= destLimit) break;
+ dest[di++] = 0xF8 + (codePoint >> 24);
+ dest[di++] = 0x80 + ((codePoint >> 18) & 0x3F);
+ dest[di++] = 0x80 + ((codePoint >> 12) & 0x3F);
+ dest[di++] = 0x80 + ((codePoint >> 6) & 0x3F);
+ dest[di++] = codePoint & 0x3F;
+ } else if (codePoint <= 0x7FFFFFFF) { // Six bytes
+ if (di + 5 >= destLimit) break;
+ dest[di++] = 0xFC + (codePoint >> 30);
+ dest[di++] = 0x80 + ((codePoint >> 24) & 0x3F);
+ dest[di++] = 0x80 + ((codePoint >> 18) & 0x3F);
+ dest[di++] = 0x80 + ((codePoint >> 12) & 0x3F);
+ dest[di++] = 0x80 + ((codePoint >> 6) & 0x3F);
+ dest[di++] = codePoint & 0x3F;
+ } else {
+ // Not a code point... skip.
+ }
+ }
+ dest[di] = 0;
+ return di;
+}
#if defined(FLAG_DO_PROFILE) || defined(FLAG_DBG)
#include <android/log.h>
@@ -46,35 +96,13 @@
#define DUMP_RESULT(words, frequencies) do { dumpResult(words, frequencies); } while (0)
#define DUMP_WORD(word, length) do { dumpWord(word, length); } while (0)
-#define INTS_TO_CHARS(input, length, output) do { \
- intArrayToCharArray(input, length, output); } while (0)
-
-// TODO: Support full UTF-8 conversion
-AK_FORCE_INLINE static int intArrayToCharArray(const int *source, const int sourceSize,
- char *dest) {
- int si = 0;
- int di = 0;
- while (si < sourceSize && di < MAX_WORD_LENGTH - 1 && 0 != source[si]) {
- const int codePoint = source[si++];
- if (codePoint < 0x7F) {
- dest[di++] = codePoint;
- } else if (codePoint < 0x7FF) {
- dest[di++] = 0xC0 + (codePoint >> 6);
- dest[di++] = 0x80 + (codePoint & 0x3F);
- } else if (codePoint < 0xFFFF) {
- dest[di++] = 0xE0 + (codePoint >> 12);
- dest[di++] = 0x80 + ((codePoint & 0xFC0) >> 6);
- dest[di++] = 0x80 + (codePoint & 0x3F);
- }
- }
- dest[di] = 0;
- return di;
-}
+#define INTS_TO_CHARS(input, length, output, outlength) do { \
+ intArrayToCharArray(input, length, output, outlength); } while (0)
static inline void dumpWordInfo(const int *word, const int length, const int rank,
const int probability) {
static char charBuf[50];
- const int N = intArrayToCharArray(word, length, charBuf);
+ const int N = intArrayToCharArray(word, length, charBuf, NELEMS(charBuf));
if (N > 1) {
AKLOGI("%2d [ %s ] (%d)", rank, charBuf, probability);
}
@@ -90,7 +118,7 @@
static AK_FORCE_INLINE void dumpWord(const int *word, const int length) {
static char charBuf[50];
- const int N = intArrayToCharArray(word, length, charBuf);
+ const int N = intArrayToCharArray(word, length, charBuf, NELEMS(charBuf));
if (N > 1) {
AKLOGI("[ %s ]", charBuf);
}
@@ -304,8 +332,6 @@
template<typename T> AK_FORCE_INLINE const T &min(const T &a, const T &b) { return a < b ? a : b; }
template<typename T> AK_FORCE_INLINE const T &max(const T &a, const T &b) { return a > b ? a : b; }
-#define NELEMS(x) (sizeof(x) / sizeof((x)[0]))
-
// DEBUG
#define INPUTLENGTH_FOR_DEBUG (-1)
#define MIN_OUTPUT_INDEX_FOR_DEBUG (-1)
diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h
index 52db8e9..017df34 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node.h
@@ -28,15 +28,16 @@
#if DEBUG_DICT
#define LOGI_SHOW_ADD_COST_PROP \
do { char charBuf[50]; \
- INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf); \
+ INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf, NELEMS(charBuf)); \
AKLOGI("%20s, \"%c\", size = %03d, total = %03d, index(0) = %02d, dist = %.4f, %s,,", \
__FUNCTION__, getNodeCodePoint(), inputSize, getTotalInputIndex(), \
getInputIndex(0), getNormalizedCompoundDistance(), charBuf); } while (0)
#define DUMP_WORD_AND_SCORE(header) \
do { char charBuf[50]; char prevWordCharBuf[50]; \
- INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf); \
+ INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf, NELEMS(charBuf)); \
INTS_TO_CHARS(mDicNodeState.mDicNodeStatePrevWord.mPrevWord, \
- mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(), prevWordCharBuf); \
+ mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(), prevWordCharBuf, \
+ NELEMS(prevWordCharBuf)); \
AKLOGI("#%8s, %5f, %5f, %5f, %5f, %s, %s, %d,,", header, \
getSpatialDistanceForScoring(), getLanguageDistanceForScoring(), \
getNormalizedCompoundDistance(), getRawLength(), prevWordCharBuf, charBuf, \
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h
index 6dba0b2..240512b 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h
@@ -53,6 +53,20 @@
return mMultiWordCostMultiplier;
}
+ AK_FORCE_INLINE void readHeaderValueOrQuestionMark(const char *const key,
+ int *outValue, int outValueSize) const {
+ if (outValueSize <= 0) return;
+ if (outValueSize == 1) {
+ outValue[0] = '\0';
+ return;
+ }
+ if (!BinaryDictionaryHeaderReadingUtils::readHeaderValue(mBinaryDictionaryInfo,
+ key, outValue, outValueSize)) {
+ outValue[0] = '?';
+ outValue[1] = '\0';
+ }
+ }
+
private:
DISALLOW_COPY_AND_ASSIGN(BinaryDictionaryHeader);
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp
index 6e1b15c..c4c4bed 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp
@@ -82,8 +82,10 @@
if(ByteArrayUtils::compareStringInBufferWithCharArray(
binaryDictionaryInfo->getDictBuf(), key, headerSize - pos, &pos) == 0) {
// The key was found.
- ByteArrayUtils::readStringAndAdvancePosition(
+ const int length = ByteArrayUtils::readStringAndAdvancePosition(
binaryDictionaryInfo->getDictBuf(), outValueSize, outValue, &pos);
+ // Add a 0 terminator to the string.
+ outValue[length < outValueSize ? length : outValueSize - 1] = '\0';
return true;
}
ByteArrayUtils::advancePositionToBehindString(
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h
index 7cb3144..0fd4f69 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h
@@ -20,23 +20,27 @@
#include <stdint.h>
#include "defines.h"
+#include "jni.h"
#include "suggest/core/dictionary/binary_dictionary_format_utils.h"
#include "suggest/core/dictionary/binary_dictionary_header.h"
#include "suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h"
+#include "utils/log_utils.h"
namespace latinime {
class BinaryDictionaryInfo {
public:
- BinaryDictionaryInfo(const uint8_t *const dictBuf, const int dictSize, const int mmapFd,
- const int dictBufOffset, const bool isUpdatable)
+ BinaryDictionaryInfo(JNIEnv *env, const uint8_t *const dictBuf,
+ const int dictSize, const int mmapFd, const int dictBufOffset, const bool isUpdatable)
: mDictBuf(dictBuf), mDictSize(dictSize), mMmapFd(mmapFd),
mDictBufOffset(dictBufOffset), mIsUpdatable(isUpdatable),
mDictionaryFormat(BinaryDictionaryFormatUtils::detectFormatVersion(
mDictBuf, mDictSize)),
mDictionaryHeader(this), mDictRoot(mDictBuf + mDictionaryHeader.getSize()),
mStructurePolicy(DictionaryStructurePolicyFactory::getDictionaryStructurePolicy(
- mDictionaryFormat)) {}
+ mDictionaryFormat)) {
+ logDictionaryInfo(env);
+ }
AK_FORCE_INLINE const uint8_t *getDictBuf() const {
return mDictBuf;
@@ -88,6 +92,33 @@
const BinaryDictionaryHeader mDictionaryHeader;
const uint8_t *const mDictRoot;
const DictionaryStructurePolicy *const mStructurePolicy;
+
+ AK_FORCE_INLINE void logDictionaryInfo(JNIEnv *const env) const {
+ const int BUFFER_SIZE = 16;
+ int dictionaryIdCodePointBuffer[BUFFER_SIZE];
+ int versionStringCodePointBuffer[BUFFER_SIZE];
+ int dateStringCodePointBuffer[BUFFER_SIZE];
+ mDictionaryHeader.readHeaderValueOrQuestionMark("dictionary",
+ dictionaryIdCodePointBuffer, BUFFER_SIZE);
+ mDictionaryHeader.readHeaderValueOrQuestionMark("version",
+ versionStringCodePointBuffer, BUFFER_SIZE);
+ mDictionaryHeader.readHeaderValueOrQuestionMark("date",
+ dateStringCodePointBuffer, BUFFER_SIZE);
+
+ char dictionaryIdCharBuffer[BUFFER_SIZE];
+ char versionStringCharBuffer[BUFFER_SIZE];
+ char dateStringCharBuffer[BUFFER_SIZE];
+ intArrayToCharArray(dictionaryIdCodePointBuffer, BUFFER_SIZE,
+ dictionaryIdCharBuffer, BUFFER_SIZE);
+ intArrayToCharArray(versionStringCodePointBuffer, BUFFER_SIZE,
+ versionStringCharBuffer, BUFFER_SIZE);
+ intArrayToCharArray(dateStringCodePointBuffer, BUFFER_SIZE,
+ dateStringCharBuffer, BUFFER_SIZE);
+
+ LogUtils::logToJava(env,
+ "Dictionary info: dictionary = %s ; version = %s ; date = %s ; filesize = %i",
+ dictionaryIdCharBuffer, versionStringCharBuffer, dateStringCharBuffer, mDictSize);
+ }
};
}
#endif /* LATINIME_BINARY_DICTIONARY_INFO_H */
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index 675b549..f520a75 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -22,6 +22,7 @@
#include <stdint.h>
#include "defines.h"
+#include "jni.h"
#include "suggest/core/dictionary/bigram_dictionary.h"
#include "suggest/core/dictionary/binary_format.h"
#include "suggest/core/session/dic_traverse_session.h"
@@ -32,8 +33,9 @@
namespace latinime {
-Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufOffset, bool isUpdatable)
- : mBinaryDictionaryInfo(static_cast<const uint8_t *>(dict), dictSize, mmapFd,
+Dictionary::Dictionary(JNIEnv *env, void *dict, int dictSize, int mmapFd,
+ int dictBufOffset, bool isUpdatable)
+ : mBinaryDictionaryInfo(env, static_cast<const uint8_t *>(dict), dictSize, mmapFd,
dictBufOffset, isUpdatable),
mBigramDictionary(new BigramDictionary(&mBinaryDictionaryInfo)),
mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())),
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h
index 94579c2..1bf24a8 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/dictionary.h
@@ -20,6 +20,7 @@
#include <stdint.h>
#include "defines.h"
+#include "jni.h"
#include "suggest/core/dictionary/binary_dictionary_info.h"
namespace latinime {
@@ -52,7 +53,8 @@
static const int KIND_FLAG_POSSIBLY_OFFENSIVE = 0x80000000;
static const int KIND_FLAG_EXACT_MATCH = 0x40000000;
- Dictionary(void *dict, int dictSize, int mmapFd, int dictBufOffset, bool isUpdatable);
+ Dictionary(JNIEnv *env, void *dict, int dictSize, int mmapFd, int dictBufOffset,
+ bool isUpdatable);
int getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
diff --git a/native/jni/src/utils/log_utils.h b/native/jni/src/utils/log_utils.h
new file mode 100644
index 0000000..ccbecfc
--- /dev/null
+++ b/native/jni/src/utils/log_utils.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_LOG_UTILS_H
+#define LATINIME_LOG_UTILS_H
+
+#include <cstdio>
+#include <stdarg.h>
+
+#include "defines.h"
+#include "jni.h"
+
+namespace latinime {
+
+class LogUtils {
+ public:
+ static void logToJava(JNIEnv *const env, const char *const format, ...)
+#ifdef __GNUC__
+ __attribute__ ((format (printf, 2, 3)))
+#endif // __GNUC__
+ {
+ static const char *TAG = "LatinIME:LogUtils";
+ const jclass androidUtilLogClass = env->FindClass("android/util/Log");
+ const jmethodID logDotIMethodId = env->GetStaticMethodID(androidUtilLogClass, "i",
+ "(Ljava/lang/String;Ljava/lang/String;)I");
+ const jstring javaTag = env->NewStringUTF(TAG);
+
+ va_list argList;
+ va_start(argList, format);
+ // Get the necessary size. Add 1 for the 0 terminator.
+ const int size = vsnprintf(0, 0, format, argList) + 1;
+ va_end(argList);
+ char cString[size];
+ va_start(argList, format);
+ vsnprintf(cString, size, format, argList);
+ va_end(argList);
+
+ jstring javaString = env->NewStringUTF(cString);
+ env->CallStaticIntMethod(androidUtilLogClass, logDotIMethodId, javaTag, javaString);
+ env->DeleteLocalRef(javaString);
+ env->DeleteLocalRef(javaTag);
+ env->DeleteLocalRef(androidUtilLogClass);
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(LogUtils);
+};
+} // namespace latinime
+#endif // LATINIME_LOG_UTILS_H