satok | e808e43 | 2010-12-02 14:53:24 +0900 | [diff] [blame] | 1 | /* |
Ken Wakasa | a10b1a8 | 2013-01-08 17:23:43 +0900 | [diff] [blame] | 2 | * Copyright (C) 2010 The Android Open Source Project |
Ken Wakasa | 0bbb917 | 2012-07-25 17:51:43 +0900 | [diff] [blame] | 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
Ken Wakasa | a10b1a8 | 2013-01-08 17:23:43 +0900 | [diff] [blame] | 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
Ken Wakasa | 0bbb917 | 2012-07-25 17:51:43 +0900 | [diff] [blame] | 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
satok | e808e43 | 2010-12-02 14:53:24 +0900 | [diff] [blame] | 16 | |
| 17 | #ifndef LATINIME_DEFINES_H |
| 18 | #define LATINIME_DEFINES_H |
| 19 | |
Ken Wakasa | 1ce96fe | 2012-11-15 19:09:11 +0900 | [diff] [blame] | 20 | #ifdef __GNUC__ |
| 21 | #define AK_FORCE_INLINE __attribute__((always_inline)) __inline__ |
| 22 | #else // __GNUC__ |
| 23 | #define AK_FORCE_INLINE inline |
| 24 | #endif // __GNUC__ |
| 25 | |
Ken Wakasa | 6cee61d | 2013-01-15 16:15:48 +0900 | [diff] [blame] | 26 | #if defined(FLAG_DO_PROFILE) || defined(FLAG_DBG) |
| 27 | #undef AK_FORCE_INLINE |
| 28 | #define AK_FORCE_INLINE inline |
| 29 | #endif // defined(FLAG_DO_PROFILE) || defined(FLAG_DBG) |
| 30 | |
Ken Wakasa | 5db594a | 2013-01-12 01:18:00 +0900 | [diff] [blame] | 31 | // Must be identical to Constants.Dictionary.MAX_WORD_LENGTH in Java |
| 32 | #define MAX_WORD_LENGTH 48 |
| 33 | // Must be identical to BinaryDictionary.MAX_RESULTS in Java |
| 34 | #define MAX_RESULTS 18 |
Satoshi Kataoka | 4221738 | 2012-12-17 23:28:17 +0900 | [diff] [blame] | 35 | |
satok | 827ced8 | 2011-07-14 09:01:09 +0900 | [diff] [blame] | 36 | #if defined(FLAG_DO_PROFILE) || defined(FLAG_DBG) |
Ken Wakasa | e3f26dd | 2012-07-27 18:06:06 +0900 | [diff] [blame] | 37 | #include <android/log.h> |
| 38 | #ifndef LOG_TAG |
| 39 | #define LOG_TAG "LatinIME: " |
Ken Wakasa | 6cee61d | 2013-01-15 16:15:48 +0900 | [diff] [blame] | 40 | #endif // LOG_TAG |
Ken Wakasa | e3f26dd | 2012-07-27 18:06:06 +0900 | [diff] [blame] | 41 | #define AKLOGE(fmt, ...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, fmt, ##__VA_ARGS__) |
| 42 | #define AKLOGI(fmt, ...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, fmt, ##__VA_ARGS__) |
satok | 6ad15fc | 2012-01-16 16:21:21 +0900 | [diff] [blame] | 43 | |
Ken Wakasa | 5db594a | 2013-01-12 01:18:00 +0900 | [diff] [blame] | 44 | #define DUMP_RESULT(words, frequencies) do { dumpResult(words, frequencies); } while (0) |
Ken Wakasa | f278981 | 2012-09-04 12:49:46 +0900 | [diff] [blame] | 45 | #define DUMP_WORD(word, length) do { dumpWord(word, length); } while (0) |
Ken Wakasa | 1e61493 | 2012-10-29 18:06:22 +0900 | [diff] [blame] | 46 | #define INTS_TO_CHARS(input, length, output) do { \ |
| 47 | intArrayToCharArray(input, length, output); } while (0) |
satok | 6ad15fc | 2012-01-16 16:21:21 +0900 | [diff] [blame] | 48 | |
Satoshi Kataoka | 4221738 | 2012-12-17 23:28:17 +0900 | [diff] [blame] | 49 | // TODO: Support full UTF-8 conversion |
| 50 | AK_FORCE_INLINE static int intArrayToCharArray(const int *source, const int sourceSize, |
| 51 | char *dest) { |
| 52 | int si = 0; |
| 53 | int di = 0; |
Ken Wakasa | 5db594a | 2013-01-12 01:18:00 +0900 | [diff] [blame] | 54 | while (si < sourceSize && di < MAX_WORD_LENGTH - 1 && 0 != source[si]) { |
Satoshi Kataoka | 4221738 | 2012-12-17 23:28:17 +0900 | [diff] [blame] | 55 | const int codePoint = source[si++]; |
| 56 | if (codePoint < 0x7F) { |
| 57 | dest[di++] = codePoint; |
| 58 | } else if (codePoint < 0x7FF) { |
| 59 | dest[di++] = 0xC0 + (codePoint >> 6); |
| 60 | dest[di++] = 0x80 + (codePoint & 0x3F); |
| 61 | } else if (codePoint < 0xFFFF) { |
| 62 | dest[di++] = 0xE0 + (codePoint >> 12); |
| 63 | dest[di++] = 0x80 + ((codePoint & 0xFC0) >> 6); |
| 64 | dest[di++] = 0x80 + (codePoint & 0x3F); |
| 65 | } |
| 66 | } |
| 67 | dest[di] = 0; |
| 68 | return di; |
| 69 | } |
| 70 | |
Ken Wakasa | 1e61493 | 2012-10-29 18:06:22 +0900 | [diff] [blame] | 71 | static inline void dumpWordInfo(const int *word, const int length, const int rank, |
| 72 | const int frequency) { |
Satoshi Kataoka | 586b0ca | 2012-08-06 11:20:54 +0900 | [diff] [blame] | 73 | static char charBuf[50]; |
Satoshi Kataoka | 4221738 | 2012-12-17 23:28:17 +0900 | [diff] [blame] | 74 | const int N = intArrayToCharArray(word, length, charBuf); |
| 75 | if (N > 1) { |
Satoshi Kataoka | 586b0ca | 2012-08-06 11:20:54 +0900 | [diff] [blame] | 76 | AKLOGI("%2d [ %s ] (%d)", rank, charBuf, frequency); |
| 77 | } |
| 78 | } |
| 79 | |
Ken Wakasa | 5db594a | 2013-01-12 01:18:00 +0900 | [diff] [blame] | 80 | static inline void dumpResult(const int *outWords, const int *frequencies) { |
Satoshi Kataoka | 586b0ca | 2012-08-06 11:20:54 +0900 | [diff] [blame] | 81 | AKLOGI("--- DUMP RESULT ---------"); |
Ken Wakasa | 5db594a | 2013-01-12 01:18:00 +0900 | [diff] [blame] | 82 | for (int i = 0; i < MAX_RESULTS; ++i) { |
| 83 | dumpWordInfo(&outWords[i * MAX_WORD_LENGTH], MAX_WORD_LENGTH, i, frequencies[i]); |
Satoshi Kataoka | 586b0ca | 2012-08-06 11:20:54 +0900 | [diff] [blame] | 84 | } |
| 85 | AKLOGI("-------------------------"); |
| 86 | } |
| 87 | |
Ken Wakasa | 1ce96fe | 2012-11-15 19:09:11 +0900 | [diff] [blame] | 88 | static AK_FORCE_INLINE void dumpWord(const int *word, const int length) { |
Tadashi G. Takaoka | d1dbdb6 | 2012-03-06 15:35:46 +0900 | [diff] [blame] | 89 | static char charBuf[50]; |
Satoshi Kataoka | 4221738 | 2012-12-17 23:28:17 +0900 | [diff] [blame] | 90 | const int N = intArrayToCharArray(word, length, charBuf); |
| 91 | if (N > 1) { |
Satoshi Kataoka | 586b0ca | 2012-08-06 11:20:54 +0900 | [diff] [blame] | 92 | AKLOGI("[ %s ]", charBuf); |
| 93 | } |
satok | 6ad15fc | 2012-01-16 16:21:21 +0900 | [diff] [blame] | 94 | } |
| 95 | |
Satoshi Kataoka | 5540acb | 2012-09-03 18:35:32 +0900 | [diff] [blame] | 96 | #ifndef __ANDROID__ |
Satoshi Kataoka | 1c8fc83 | 2012-09-06 21:31:54 +0900 | [diff] [blame] | 97 | #include <cassert> |
Satoshi Kataoka | 5540acb | 2012-09-03 18:35:32 +0900 | [diff] [blame] | 98 | #include <execinfo.h> |
| 99 | #include <stdlib.h> |
Satoshi Kataoka | 1c8fc83 | 2012-09-06 21:31:54 +0900 | [diff] [blame] | 100 | |
Satoshi Kataoka | dd4d938 | 2013-01-09 12:54:39 +0900 | [diff] [blame] | 101 | #define DO_ASSERT_TEST |
Satoshi Kataoka | 1c8fc83 | 2012-09-06 21:31:54 +0900 | [diff] [blame] | 102 | #define ASSERT(success) do { if (!(success)) { showStackTrace(); assert(success);} } while (0) |
| 103 | #define SHOW_STACK_TRACE do { showStackTrace(); } while (0) |
| 104 | |
Satoshi Kataoka | 5540acb | 2012-09-03 18:35:32 +0900 | [diff] [blame] | 105 | static inline void showStackTrace() { |
| 106 | void *callstack[128]; |
| 107 | int i, frames = backtrace(callstack, 128); |
| 108 | char **strs = backtrace_symbols(callstack, frames); |
| 109 | for (i = 0; i < frames; ++i) { |
| 110 | if (i == 0) { |
| 111 | AKLOGI("=== Trace ==="); |
| 112 | continue; |
| 113 | } |
| 114 | AKLOGI("%s", strs[i]); |
| 115 | } |
| 116 | free(strs); |
| 117 | } |
Ken Wakasa | 6cee61d | 2013-01-15 16:15:48 +0900 | [diff] [blame] | 118 | #else // __ANDROID__ |
Satoshi Kataoka | 1c8fc83 | 2012-09-06 21:31:54 +0900 | [diff] [blame] | 119 | #include <cassert> |
Satoshi Kataoka | dd4d938 | 2013-01-09 12:54:39 +0900 | [diff] [blame] | 120 | #define DO_ASSERT_TEST |
Satoshi Kataoka | 1c8fc83 | 2012-09-06 21:31:54 +0900 | [diff] [blame] | 121 | #define ASSERT(success) assert(success) |
Satoshi Kataoka | 5540acb | 2012-09-03 18:35:32 +0900 | [diff] [blame] | 122 | #define SHOW_STACK_TRACE |
Ken Wakasa | 6cee61d | 2013-01-15 16:15:48 +0900 | [diff] [blame] | 123 | #endif // __ANDROID__ |
Satoshi Kataoka | 5540acb | 2012-09-03 18:35:32 +0900 | [diff] [blame] | 124 | |
Ken Wakasa | 6cee61d | 2013-01-15 16:15:48 +0900 | [diff] [blame] | 125 | #else // defined(FLAG_DO_PROFILE) || defined(FLAG_DBG) |
satok | 9fb6f47 | 2012-01-13 18:01:22 +0900 | [diff] [blame] | 126 | #define AKLOGE(fmt, ...) |
| 127 | #define AKLOGI(fmt, ...) |
Ken Wakasa | 5db594a | 2013-01-12 01:18:00 +0900 | [diff] [blame] | 128 | #define DUMP_RESULT(words, frequencies) |
satok | 6ad15fc | 2012-01-16 16:21:21 +0900 | [diff] [blame] | 129 | #define DUMP_WORD(word, length) |
Satoshi Kataoka | dd4d938 | 2013-01-09 12:54:39 +0900 | [diff] [blame] | 130 | #undef DO_ASSERT_TEST |
Satoshi Kataoka | 5540acb | 2012-09-03 18:35:32 +0900 | [diff] [blame] | 131 | #define ASSERT(success) |
| 132 | #define SHOW_STACK_TRACE |
Ken Wakasa | 1e61493 | 2012-10-29 18:06:22 +0900 | [diff] [blame] | 133 | #define INTS_TO_CHARS(input, length, output) |
Ken Wakasa | 6cee61d | 2013-01-15 16:15:48 +0900 | [diff] [blame] | 134 | #endif // defined(FLAG_DO_PROFILE) || defined(FLAG_DBG) |
satok | 827ced8 | 2011-07-14 09:01:09 +0900 | [diff] [blame] | 135 | |
satok | 20d9fda | 2011-07-13 14:40:30 +0900 | [diff] [blame] | 136 | #ifdef FLAG_DO_PROFILE |
satok | 61e2f85 | 2011-01-05 14:13:07 +0900 | [diff] [blame] | 137 | // Profiler |
| 138 | #include <time.h> |
satok | 9fb6f47 | 2012-01-13 18:01:22 +0900 | [diff] [blame] | 139 | |
satok | 61e2f85 | 2011-01-05 14:13:07 +0900 | [diff] [blame] | 140 | #define PROF_BUF_SIZE 100 |
satok | 0028ed3 | 2012-05-16 20:42:12 +0900 | [diff] [blame] | 141 | static float profile_buf[PROF_BUF_SIZE]; |
| 142 | static float profile_old[PROF_BUF_SIZE]; |
satok | 61e2f85 | 2011-01-05 14:13:07 +0900 | [diff] [blame] | 143 | static unsigned int profile_counter[PROF_BUF_SIZE]; |
| 144 | |
Ken Wakasa | e90b333 | 2011-01-07 15:01:51 +0900 | [diff] [blame] | 145 | #define PROF_RESET prof_reset() |
| 146 | #define PROF_COUNT(prof_buf_id) ++profile_counter[prof_buf_id] |
Ken Wakasa | f278981 | 2012-09-04 12:49:46 +0900 | [diff] [blame] | 147 | #define PROF_OPEN do { PROF_RESET; PROF_START(PROF_BUF_SIZE - 1); } while (0) |
Ken Wakasa | e90b333 | 2011-01-07 15:01:51 +0900 | [diff] [blame] | 148 | #define PROF_START(prof_buf_id) do { \ |
Ken Wakasa | f278981 | 2012-09-04 12:49:46 +0900 | [diff] [blame] | 149 | PROF_COUNT(prof_buf_id); profile_old[prof_buf_id] = (clock()); } while (0) |
| 150 | #define PROF_CLOSE do { PROF_END(PROF_BUF_SIZE - 1); PROF_OUTALL; } while (0) |
Ken Wakasa | e90b333 | 2011-01-07 15:01:51 +0900 | [diff] [blame] | 151 | #define PROF_END(prof_buf_id) profile_buf[prof_buf_id] += ((clock()) - profile_old[prof_buf_id]) |
| 152 | #define PROF_CLOCKOUT(prof_buf_id) \ |
satok | 9fb6f47 | 2012-01-13 18:01:22 +0900 | [diff] [blame] | 153 | AKLOGI("%s : clock is %f", __FUNCTION__, (clock() - profile_old[prof_buf_id])) |
Ken Wakasa | f278981 | 2012-09-04 12:49:46 +0900 | [diff] [blame] | 154 | #define PROF_OUTALL do { AKLOGI("--- %s ---", __FUNCTION__); prof_out(); } while (0) |
satok | 61e2f85 | 2011-01-05 14:13:07 +0900 | [diff] [blame] | 155 | |
Tadashi G. Takaoka | d1dbdb6 | 2012-03-06 15:35:46 +0900 | [diff] [blame] | 156 | static inline void prof_reset(void) { |
Ken Wakasa | e90b333 | 2011-01-07 15:01:51 +0900 | [diff] [blame] | 157 | for (int i = 0; i < PROF_BUF_SIZE; ++i) { |
satok | 61e2f85 | 2011-01-05 14:13:07 +0900 | [diff] [blame] | 158 | profile_buf[i] = 0; |
| 159 | profile_old[i] = 0; |
| 160 | profile_counter[i] = 0; |
| 161 | } |
| 162 | } |
| 163 | |
Tadashi G. Takaoka | d1dbdb6 | 2012-03-06 15:35:46 +0900 | [diff] [blame] | 164 | static inline void prof_out(void) { |
satok | 61e2f85 | 2011-01-05 14:13:07 +0900 | [diff] [blame] | 165 | if (profile_counter[PROF_BUF_SIZE - 1] != 1) { |
satok | 9fb6f47 | 2012-01-13 18:01:22 +0900 | [diff] [blame] | 166 | AKLOGI("Error: You must call PROF_OPEN before PROF_CLOSE."); |
satok | 61e2f85 | 2011-01-05 14:13:07 +0900 | [diff] [blame] | 167 | } |
satok | 9fb6f47 | 2012-01-13 18:01:22 +0900 | [diff] [blame] | 168 | AKLOGI("Total time is %6.3f ms.", |
Ken Wakasa | 77e8e81 | 2012-08-02 19:48:08 +0900 | [diff] [blame] | 169 | profile_buf[PROF_BUF_SIZE - 1] * 1000.0f / static_cast<float>(CLOCKS_PER_SEC)); |
Ken Wakasa | 2a6f58d | 2012-11-27 19:40:38 +0900 | [diff] [blame] | 170 | float all = 0.0f; |
Ken Wakasa | e90b333 | 2011-01-07 15:01:51 +0900 | [diff] [blame] | 171 | for (int i = 0; i < PROF_BUF_SIZE - 1; ++i) { |
satok | 61e2f85 | 2011-01-05 14:13:07 +0900 | [diff] [blame] | 172 | all += profile_buf[i]; |
| 173 | } |
Ken Wakasa | 2a6f58d | 2012-11-27 19:40:38 +0900 | [diff] [blame] | 174 | if (all < 1.0f) all = 1.0f; |
Ken Wakasa | e90b333 | 2011-01-07 15:01:51 +0900 | [diff] [blame] | 175 | for (int i = 0; i < PROF_BUF_SIZE - 1; ++i) { |
Ken Wakasa | 2a6f58d | 2012-11-27 19:40:38 +0900 | [diff] [blame] | 176 | if (profile_buf[i] > 0.0f) { |
satok | 9fb6f47 | 2012-01-13 18:01:22 +0900 | [diff] [blame] | 177 | AKLOGI("(%d): Used %4.2f%%, %8.4f ms. Called %d times.", |
Ken Wakasa | 2a6f58d | 2012-11-27 19:40:38 +0900 | [diff] [blame] | 178 | i, (profile_buf[i] * 100.0f / all), |
Ken Wakasa | 77e8e81 | 2012-08-02 19:48:08 +0900 | [diff] [blame] | 179 | profile_buf[i] * 1000.0f / static_cast<float>(CLOCKS_PER_SEC), |
| 180 | profile_counter[i]); |
Ken Wakasa | e90b333 | 2011-01-07 15:01:51 +0900 | [diff] [blame] | 181 | } |
satok | 61e2f85 | 2011-01-05 14:13:07 +0900 | [diff] [blame] | 182 | } |
| 183 | } |
| 184 | |
satok | 20d9fda | 2011-07-13 14:40:30 +0900 | [diff] [blame] | 185 | #else // FLAG_DO_PROFILE |
satok | 61e2f85 | 2011-01-05 14:13:07 +0900 | [diff] [blame] | 186 | #define PROF_BUF_SIZE 0 |
| 187 | #define PROF_RESET |
| 188 | #define PROF_COUNT(prof_buf_id) |
| 189 | #define PROF_OPEN |
| 190 | #define PROF_START(prof_buf_id) |
| 191 | #define PROF_CLOSE |
| 192 | #define PROF_END(prof_buf_id) |
| 193 | #define PROF_CLOCK_OUT(prof_buf_id) |
| 194 | #define PROF_CLOCKOUT(prof_buf_id) |
| 195 | #define PROF_OUTALL |
| 196 | |
satok | 20d9fda | 2011-07-13 14:40:30 +0900 | [diff] [blame] | 197 | #endif // FLAG_DO_PROFILE |
| 198 | |
| 199 | #ifdef FLAG_DBG |
satok | 20d9fda | 2011-07-13 14:40:30 +0900 | [diff] [blame] | 200 | #define DEBUG_DICT true |
| 201 | #define DEBUG_DICT_FULL false |
satok | 0cedd2b | 2011-08-12 01:05:27 +0900 | [diff] [blame] | 202 | #define DEBUG_EDIT_DISTANCE false |
satok | 10266c0 | 2011-08-19 22:05:59 +0900 | [diff] [blame] | 203 | #define DEBUG_SHOW_FOUND_WORD false |
satok | 20d9fda | 2011-07-13 14:40:30 +0900 | [diff] [blame] | 204 | #define DEBUG_NODE DEBUG_DICT_FULL |
| 205 | #define DEBUG_TRACE DEBUG_DICT_FULL |
satok | 1a6da63 | 2011-12-16 23:15:06 +0900 | [diff] [blame] | 206 | #define DEBUG_PROXIMITY_INFO false |
satok | 0cb2097 | 2012-03-13 22:07:56 +0900 | [diff] [blame] | 207 | #define DEBUG_PROXIMITY_CHARS false |
satok | 10266c0 | 2011-08-19 22:05:59 +0900 | [diff] [blame] | 208 | #define DEBUG_CORRECTION false |
satok | 29dc806 | 2012-01-17 15:59:15 +0900 | [diff] [blame] | 209 | #define DEBUG_CORRECTION_FREQ false |
| 210 | #define DEBUG_WORDS_PRIORITY_QUEUE false |
Satoshi Kataoka | 0ed8c6e | 2012-12-04 16:28:06 +0900 | [diff] [blame] | 211 | #define DEBUG_SAMPLING_POINTS false |
| 212 | #define DEBUG_POINTS_PROBABILITY false |
| 213 | #define DEBUG_DOUBLE_LETTER false |
satok | 20d9fda | 2011-07-13 14:40:30 +0900 | [diff] [blame] | 214 | |
Satoshi Kataoka | 952ec49 | 2012-09-11 15:51:38 +0900 | [diff] [blame] | 215 | #ifdef FLAG_FULL_DBG |
| 216 | #define DEBUG_GEO_FULL true |
| 217 | #else |
Satoshi Kataoka | 23a57ea | 2012-09-10 17:59:17 +0900 | [diff] [blame] | 218 | #define DEBUG_GEO_FULL false |
Satoshi Kataoka | 952ec49 | 2012-09-11 15:51:38 +0900 | [diff] [blame] | 219 | #endif |
Satoshi Kataoka | 23a57ea | 2012-09-10 17:59:17 +0900 | [diff] [blame] | 220 | |
satok | 20d9fda | 2011-07-13 14:40:30 +0900 | [diff] [blame] | 221 | #else // FLAG_DBG |
satok | 827ced8 | 2011-07-14 09:01:09 +0900 | [diff] [blame] | 222 | |
satok | 20d9fda | 2011-07-13 14:40:30 +0900 | [diff] [blame] | 223 | #define DEBUG_DICT false |
| 224 | #define DEBUG_DICT_FULL false |
satok | 0cedd2b | 2011-08-12 01:05:27 +0900 | [diff] [blame] | 225 | #define DEBUG_EDIT_DISTANCE false |
satok | 20d9fda | 2011-07-13 14:40:30 +0900 | [diff] [blame] | 226 | #define DEBUG_SHOW_FOUND_WORD false |
| 227 | #define DEBUG_NODE false |
| 228 | #define DEBUG_TRACE false |
| 229 | #define DEBUG_PROXIMITY_INFO false |
satok | 0cb2097 | 2012-03-13 22:07:56 +0900 | [diff] [blame] | 230 | #define DEBUG_PROXIMITY_CHARS false |
satok | 10266c0 | 2011-08-19 22:05:59 +0900 | [diff] [blame] | 231 | #define DEBUG_CORRECTION false |
| 232 | #define DEBUG_CORRECTION_FREQ false |
satok | 16379df | 2011-12-12 20:53:22 +0900 | [diff] [blame] | 233 | #define DEBUG_WORDS_PRIORITY_QUEUE false |
Keisuke Kuroyanagi | 806eba4 | 2012-10-09 19:57:08 +0900 | [diff] [blame] | 234 | #define DEBUG_SAMPLING_POINTS false |
| 235 | #define DEBUG_POINTS_PROBABILITY false |
Satoshi Kataoka | 9af5335 | 2012-11-16 23:06:41 +0900 | [diff] [blame] | 236 | #define DEBUG_DOUBLE_LETTER false |
satok | 20d9fda | 2011-07-13 14:40:30 +0900 | [diff] [blame] | 237 | |
Satoshi Kataoka | 23a57ea | 2012-09-10 17:59:17 +0900 | [diff] [blame] | 238 | #define DEBUG_GEO_FULL false |
| 239 | |
satok | e808e43 | 2010-12-02 14:53:24 +0900 | [diff] [blame] | 240 | #endif // FLAG_DBG |
| 241 | |
Jean Chalard | a5d5849 | 2011-02-18 17:50:58 +0900 | [diff] [blame] | 242 | #ifndef S_INT_MAX |
satok | 3c4bb77 | 2011-03-04 22:50:19 -0800 | [diff] [blame] | 243 | #define S_INT_MAX 2147483647 // ((1 << 31) - 1) |
Jean Chalard | a5d5849 | 2011-02-18 17:50:58 +0900 | [diff] [blame] | 244 | #endif |
Jean Chalard | 592f2b3 | 2012-11-20 21:19:26 +0900 | [diff] [blame] | 245 | #ifndef S_INT_MIN |
| 246 | // The literal constant -2147483648 does not work in C prior C90, because |
| 247 | // the compiler tries to fit the positive number into an int and then negate it. |
| 248 | // GCC warns about this. |
| 249 | #define S_INT_MIN (-2147483647 - 1) // -(1 << 31) |
| 250 | #endif |
Satoshi Kataoka | ee62b78 | 2013-01-21 18:29:27 +0900 | [diff] [blame^] | 251 | |
| 252 | #define MAX_PERCENTILE 100 |
| 253 | |
Jean Chalard | 22025c6 | 2012-11-29 17:33:53 +0900 | [diff] [blame] | 254 | // Number of base-10 digits in the largest integer + 1 to leave room for a zero terminator. |
| 255 | // As such, this is the maximum number of characters will be needed to represent an int as a |
| 256 | // string, including the terminator; this is used as the size of a string buffer large enough to |
| 257 | // hold any value that is intended to fit in an integer, e.g. in the code that reads the header |
| 258 | // of the binary dictionary where a {key,value} string pair scheme is used. |
| 259 | #define LARGEST_INT_DIGIT_COUNT 11 |
satok | 662fe69 | 2010-12-08 17:05:39 +0900 | [diff] [blame] | 260 | |
Ken Wakasa | e90b333 | 2011-01-07 15:01:51 +0900 | [diff] [blame] | 261 | // Define this to use mmap() for dictionary loading. Undefine to use malloc() instead of mmap(). |
| 262 | // We measured and compared performance of both, and found mmap() is fairly good in terms of |
| 263 | // loading time, and acceptable even for several initial lookups which involve page faults. |
| 264 | #define USE_MMAP_FOR_DICTIONARY |
| 265 | |
satok | e808e43 | 2010-12-02 14:53:24 +0900 | [diff] [blame] | 266 | // 22-bit address = ~4MB dictionary size limit, which on average would be about 200k-300k words |
| 267 | #define ADDRESS_MASK 0x3FFFFF |
| 268 | |
| 269 | // The bit that decides if an address follows in the next 22 bits |
| 270 | #define FLAG_ADDRESS_MASK 0x40 |
| 271 | // The bit that decides if this is a terminal node for a word. The node could still have children, |
| 272 | // if the word has other endings. |
| 273 | #define FLAG_TERMINAL_MASK 0x80 |
| 274 | |
| 275 | #define FLAG_BIGRAM_READ 0x80 |
| 276 | #define FLAG_BIGRAM_CHILDEXIST 0x40 |
| 277 | #define FLAG_BIGRAM_CONTINUED 0x80 |
| 278 | #define FLAG_BIGRAM_FREQ 0x7F |
| 279 | |
| 280 | #define DICTIONARY_VERSION_MIN 200 |
Ken Wakasa | f278981 | 2012-09-04 12:49:46 +0900 | [diff] [blame] | 281 | #define NOT_VALID_WORD (-99) |
| 282 | #define NOT_A_CODE_POINT (-1) |
| 283 | #define NOT_A_DISTANCE (-1) |
| 284 | #define NOT_A_COORDINATE (-1) |
| 285 | #define EQUIVALENT_CHAR_WITHOUT_DISTANCE_INFO (-2) |
| 286 | #define PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO (-3) |
| 287 | #define ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO (-4) |
| 288 | #define NOT_AN_INDEX (-1) |
| 289 | #define NOT_A_PROBABILITY (-1) |
satok | e808e43 | 2010-12-02 14:53:24 +0900 | [diff] [blame] | 290 | |
satok | 817e517 | 2011-03-04 06:06:45 -0800 | [diff] [blame] | 291 | #define KEYCODE_SPACE ' ' |
Ken Wakasa | 1e61493 | 2012-10-29 18:06:22 +0900 | [diff] [blame] | 292 | #define KEYCODE_SINGLE_QUOTE '\'' |
| 293 | #define KEYCODE_HYPHEN_MINUS '-' |
satok | 817e517 | 2011-03-04 06:06:45 -0800 | [diff] [blame] | 294 | |
Yusuke Nojima | afb9076 | 2011-10-05 18:11:42 +0900 | [diff] [blame] | 295 | #define CALIBRATE_SCORE_BY_TOUCH_COORDINATES true |
Yusuke Nojima | 258bfe6 | 2011-09-28 12:59:43 +0900 | [diff] [blame] | 296 | |
satok | 662fe69 | 2010-12-08 17:05:39 +0900 | [diff] [blame] | 297 | #define SUGGEST_WORDS_WITH_MISSING_CHARACTER true |
satok | 662fe69 | 2010-12-08 17:05:39 +0900 | [diff] [blame] | 298 | #define SUGGEST_WORDS_WITH_EXCESSIVE_CHARACTER true |
satok | a3d78f6 | 2010-12-09 22:08:33 +0900 | [diff] [blame] | 299 | #define SUGGEST_WORDS_WITH_TRANSPOSED_CHARACTERS true |
satok | 9955716 | 2012-01-26 22:49:13 +0900 | [diff] [blame] | 300 | #define SUGGEST_MULTIPLE_WORDS true |
satok | a3d78f6 | 2010-12-09 22:08:33 +0900 | [diff] [blame] | 301 | |
Jean Chalard | 8dc754a | 2011-01-27 14:20:22 +0900 | [diff] [blame] | 302 | // The following "rate"s are used as a multiplier before dividing by 100, so they are in percent. |
satok | 72bc17e | 2011-04-13 17:23:27 +0900 | [diff] [blame] | 303 | #define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 80 |
satok | dc5301e | 2011-04-11 16:14:45 +0900 | [diff] [blame] | 304 | #define WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X 12 |
satok | 54af64a | 2012-01-17 15:58:23 +0900 | [diff] [blame] | 305 | #define WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE 58 |
satok | 8330b48 | 2012-01-23 16:52:37 +0900 | [diff] [blame] | 306 | #define WORDS_WITH_MISTYPED_SPACE_DEMOTION_RATE 50 |
satok | a3d78f6 | 2010-12-09 22:08:33 +0900 | [diff] [blame] | 307 | #define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75 |
satok | 54fe9e0 | 2010-12-13 14:42:35 +0900 | [diff] [blame] | 308 | #define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75 |
satok | a161a4a | 2012-01-16 18:38:32 +0900 | [diff] [blame] | 309 | #define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 70 |
satok | 58c49b9 | 2011-01-27 03:23:39 +0900 | [diff] [blame] | 310 | #define FULL_MATCHED_WORDS_PROMOTION_RATE 120 |
satok | 9d2a302 | 2011-04-14 19:13:34 +0900 | [diff] [blame] | 311 | #define WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE 90 |
satok | 1b9fa94 | 2012-02-02 18:49:22 +0900 | [diff] [blame] | 312 | #define WORDS_WITH_ADDITIONAL_PROXIMITY_CHARACTER_DEMOTION_RATE 70 |
satok | 635f68e | 2011-08-10 22:19:33 +0900 | [diff] [blame] | 313 | #define WORDS_WITH_MATCH_SKIP_PROMOTION_RATE 105 |
satok | 1b9fa94 | 2012-02-02 18:49:22 +0900 | [diff] [blame] | 314 | #define WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE 148 |
| 315 | #define WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_MULTIPLIER 3 |
satok | 10266c0 | 2011-08-19 22:05:59 +0900 | [diff] [blame] | 316 | #define CORRECTION_COUNT_RATE_DEMOTION_RATE_BASE 45 |
| 317 | #define INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE 70 |
| 318 | #define FIRST_CHAR_DIFFERENT_DEMOTION_RATE 96 |
satok | eb050fc | 2011-10-03 19:21:13 +0900 | [diff] [blame] | 319 | #define TWO_WORDS_CAPITALIZED_DEMOTION_RATE 50 |
satok | 54af64a | 2012-01-17 15:58:23 +0900 | [diff] [blame] | 320 | #define TWO_WORDS_CORRECTION_DEMOTION_BASE 80 |
| 321 | #define TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER 1 |
Ken Wakasa | f25e705 | 2013-01-16 01:29:43 +0900 | [diff] [blame] | 322 | #define ZERO_DISTANCE_PROMOTION_RATE 110.0f |
Yusuke Nojima | a4c1f1c | 2011-10-06 19:12:20 +0900 | [diff] [blame] | 323 | #define NEUTRAL_SCORE_SQUARED_RADIUS 8.0f |
| 324 | #define HALF_SCORE_SQUARED_RADIUS 32.0f |
satok | a85f492 | 2012-01-30 18:18:30 +0900 | [diff] [blame] | 325 | #define MAX_FREQ 255 |
Jean Chalard | 9416c81 | 2012-05-15 19:24:47 +0900 | [diff] [blame] | 326 | #define MAX_BIGRAM_FREQ 15 |
satok | e808e43 | 2010-12-02 14:53:24 +0900 | [diff] [blame] | 327 | |
Satoshi Kataoka | 3e8c58f | 2012-06-05 17:55:52 +0900 | [diff] [blame] | 328 | // This must be the same as ProximityInfo#MAX_PROXIMITY_CHARS_SIZE, currently it's 16. |
| 329 | #define MAX_PROXIMITY_CHARS_SIZE_INTERNAL 16 |
| 330 | |
satok | e05b3f4 | 2012-01-31 17:15:43 +0900 | [diff] [blame] | 331 | // This must be equal to ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE in KeyDetector.java |
| 332 | #define ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE 2 |
| 333 | |
Ken Wakasa | 9e0c711 | 2012-08-09 22:26:58 +0900 | [diff] [blame] | 334 | // Assuming locale strings such as en_US, sr-Latn etc. |
| 335 | #define MAX_LOCALE_STRING_LENGTH 10 |
| 336 | |
satok | a7e5a5a | 2011-12-15 16:49:12 +0900 | [diff] [blame] | 337 | // Word limit for sub queues used in WordsPriorityQueuePool. Sub queues are temporary queues used |
| 338 | // for better performance. |
satok | 6ad15fc | 2012-01-16 16:21:21 +0900 | [diff] [blame] | 339 | // Holds up to 1 candidate for each word |
| 340 | #define SUB_QUEUE_MAX_WORDS 1 |
satok | b960477 | 2012-01-13 15:41:17 +0900 | [diff] [blame] | 341 | #define SUB_QUEUE_MAX_COUNT 10 |
satok | 54af64a | 2012-01-17 15:58:23 +0900 | [diff] [blame] | 342 | #define SUB_QUEUE_MIN_WORD_LENGTH 4 |
Satoshi Kataoka | 67e3cc8 | 2012-05-31 15:04:58 +0900 | [diff] [blame] | 343 | // TODO: Extend this limitation |
| 344 | #define MULTIPLE_WORDS_SUGGESTION_MAX_WORDS 5 |
Satoshi Kataoka | 6cbe204 | 2012-05-30 17:28:34 +0900 | [diff] [blame] | 345 | // TODO: Remove this limitation |
| 346 | #define MULTIPLE_WORDS_SUGGESTION_MAX_WORD_LENGTH 12 |
| 347 | // TODO: Remove this limitation |
Satoshi Kataoka | 67e3cc8 | 2012-05-31 15:04:58 +0900 | [diff] [blame] | 348 | #define MULTIPLE_WORDS_SUGGESTION_MAX_TOTAL_TRAVERSE_COUNT 45 |
satok | a85f492 | 2012-01-30 18:18:30 +0900 | [diff] [blame] | 349 | #define MULTIPLE_WORDS_DEMOTION_RATE 80 |
| 350 | #define MIN_INPUT_LENGTH_FOR_THREE_OR_MORE_WORDS_CORRECTION 6 |
satok | a7e5a5a | 2011-12-15 16:49:12 +0900 | [diff] [blame] | 351 | |
Ken Wakasa | b02ee3d | 2012-10-08 11:46:14 +0900 | [diff] [blame] | 352 | #define TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD 0.35f |
| 353 | #define START_TWO_WORDS_CORRECTION_THRESHOLD 0.185f |
satok | a0ac31f | 2012-05-23 19:55:27 +0900 | [diff] [blame] | 354 | /* heuristic... This should be changed if we change the unit of the frequency. */ |
| 355 | #define SUPPRESS_SHORT_MULTIPLE_WORDS_THRESHOLD_FREQ (MAX_FREQ * 58 / 100) |
satok | 29dc806 | 2012-01-17 15:59:15 +0900 | [diff] [blame] | 356 | |
satok | 6831926 | 2010-12-03 19:38:08 +0900 | [diff] [blame] | 357 | #define MAX_DEPTH_MULTIPLIER 3 |
satok | 1f6b52e | 2012-01-30 13:53:58 +0900 | [diff] [blame] | 358 | #define FIRST_WORD_INDEX 0 |
Satoshi Kataoka | 5817b6b | 2013-01-09 20:05:26 +0900 | [diff] [blame] | 359 | |
Satoshi Kataoka | 687a244 | 2012-08-23 15:46:43 +0900 | [diff] [blame] | 360 | // Max Distance between point to key |
| 361 | #define MAX_POINT_TO_KEY_LENGTH 10000000 |
| 362 | |
Keisuke Kuroyanagi | 95a49a5 | 2012-09-04 17:00:24 +0900 | [diff] [blame] | 363 | // The max number of the keys in one keyboard layout |
| 364 | #define MAX_KEY_COUNT_IN_A_KEYBOARD 64 |
| 365 | |
Jean Chalard | 6c30061 | 2012-03-06 19:54:03 +0900 | [diff] [blame] | 366 | // TODO: Reduce this constant if possible; check the maximum number of digraphs in the same |
| 367 | // word in the dictionary for languages with digraphs, like German and French |
| 368 | #define DEFAULT_MAX_DIGRAPH_SEARCH_DEPTH 5 |
Jean Chalard | a787dba | 2011-03-04 12:17:48 +0900 | [diff] [blame] | 369 | |
satok | 9955716 | 2012-01-26 22:49:13 +0900 | [diff] [blame] | 370 | #define MIN_USER_TYPED_LENGTH_FOR_MULTIPLE_WORD_SUGGESTION 3 |
satok | 54fe9e0 | 2010-12-13 14:42:35 +0900 | [diff] [blame] | 371 | #define MIN_USER_TYPED_LENGTH_FOR_EXCESSIVE_CHARACTER_SUGGESTION 3 |
satok | 662fe69 | 2010-12-08 17:05:39 +0900 | [diff] [blame] | 372 | |
Ken Wakasa | 5150e15 | 2012-09-27 19:21:25 +0900 | [diff] [blame] | 373 | // TODO: Remove |
Satoshi Kataoka | fe4f1ce | 2012-12-10 17:17:52 +0900 | [diff] [blame] | 374 | #define MAX_POINTER_COUNT 1 |
Ken Wakasa | 5150e15 | 2012-09-27 19:21:25 +0900 | [diff] [blame] | 375 | #define MAX_POINTER_COUNT_FOR_G 2 |
| 376 | |
Jean Chalard | f1634c8 | 2012-05-02 19:05:27 +0900 | [diff] [blame] | 377 | // Size, in bytes, of the bloom filter index for bigrams |
| 378 | // 128 gives us 1024 buckets. The probability of false positive is (1 - e ** (-kn/m))**k, |
| 379 | // where k is the number of hash functions, n the number of bigrams, and m the number of |
| 380 | // bits we can test. |
| 381 | // At the moment 100 is the maximum number of bigrams for a word with the current |
| 382 | // dictionaries, so n = 100. 1024 buckets give us m = 1024. |
| 383 | // With 1 hash function, our false positive rate is about 9.3%, which should be enough for |
| 384 | // our uses since we are only using this to increase average performance. For the record, |
| 385 | // k = 2 gives 3.1% and k = 3 gives 1.6%. With k = 1, making m = 2048 gives 4.8%, |
| 386 | // and m = 4096 gives 2.4%. |
| 387 | #define BIGRAM_FILTER_BYTE_SIZE 128 |
| 388 | // Must be smaller than BIGRAM_FILTER_BYTE_SIZE * 8, and preferably prime. 1021 is the largest |
| 389 | // prime under 128 * 8. |
| 390 | #define BIGRAM_FILTER_MODULO 1021 |
| 391 | #if BIGRAM_FILTER_BYTE_SIZE * 8 < BIGRAM_FILTER_MODULO |
| 392 | #error "BIGRAM_FILTER_MODULO is larger than BIGRAM_FILTER_BYTE_SIZE" |
| 393 | #endif |
| 394 | |
Tadashi G. Takaoka | 09baa36 | 2012-02-03 21:24:53 +0900 | [diff] [blame] | 395 | template<typename T> inline T min(T a, T b) { return a < b ? a : b; } |
| 396 | template<typename T> inline T max(T a, T b) { return a > b ? a : b; } |
satok | f5cded1 | 2010-12-06 21:28:24 +0900 | [diff] [blame] | 397 | |
Ken Wakasa | 0c2227a | 2013-01-21 11:37:54 +0900 | [diff] [blame] | 398 | #define M_PI_F 3.14159265f |
| 399 | |
Ken Wakasa | b02ee3d | 2012-10-08 11:46:14 +0900 | [diff] [blame] | 400 | #define NELEMS(x) (sizeof(x) / sizeof((x)[0])) |
| 401 | |
Yusuke Nojima | 258bfe6 | 2011-09-28 12:59:43 +0900 | [diff] [blame] | 402 | // The ratio of neutral area radius to sweet spot radius. |
| 403 | #define NEUTRAL_AREA_RADIUS_RATIO 1.3f |
| 404 | |
satok | e05b3f4 | 2012-01-31 17:15:43 +0900 | [diff] [blame] | 405 | // DEBUG |
Ken Wakasa | f278981 | 2012-09-04 12:49:46 +0900 | [diff] [blame] | 406 | #define INPUTLENGTH_FOR_DEBUG (-1) |
| 407 | #define MIN_OUTPUT_INDEX_FOR_DEBUG (-1) |
satok | e05b3f4 | 2012-01-31 17:15:43 +0900 | [diff] [blame] | 408 | |
satok | 1bc038c | 2012-06-14 11:25:50 -0700 | [diff] [blame] | 409 | #define DISALLOW_COPY_AND_ASSIGN(TypeName) \ |
| 410 | TypeName(const TypeName&); \ |
| 411 | void operator=(const TypeName&) |
| 412 | |
| 413 | #define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \ |
| 414 | TypeName(); \ |
| 415 | DISALLOW_COPY_AND_ASSIGN(TypeName) |
| 416 | |
Satoshi Kataoka | 3e8c58f | 2012-06-05 17:55:52 +0900 | [diff] [blame] | 417 | // Used as a return value for character comparison |
| 418 | typedef enum { |
| 419 | // Same char, possibly with different case or accent |
| 420 | EQUIVALENT_CHAR, |
| 421 | // It is a char located nearby on the keyboard |
| 422 | NEAR_PROXIMITY_CHAR, |
| 423 | // It is an unrelated char |
| 424 | UNRELATED_CHAR, |
| 425 | // Additional proximity char which can differ by language. |
| 426 | ADDITIONAL_PROXIMITY_CHAR |
| 427 | } ProximityType; |
Satoshi Kataoka | 6ae8dd4 | 2012-11-22 20:15:40 +0900 | [diff] [blame] | 428 | |
| 429 | typedef enum { |
| 430 | NOT_A_DOUBLE_LETTER, |
| 431 | A_DOUBLE_LETTER, |
| 432 | A_STRONG_DOUBLE_LETTER |
| 433 | } DoubleLetterLevel; |
satok | e808e43 | 2010-12-02 14:53:24 +0900 | [diff] [blame] | 434 | #endif // LATINIME_DEFINES_H |