Merge "Fix unit tests."
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
index 87cf0cd..daf40d4 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
@@ -65,7 +65,8 @@
               mMaxUnigramCount(HeaderReadWriteUtils::readIntAttributeValue(
                       &mAttributeMap, MAX_UNIGRAM_COUNT_KEY, DEFAULT_MAX_UNIGRAM_COUNT)),
               mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue(
-                      &mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)) {}
+                      &mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)),
+              mCodePointTable(HeaderReadWriteUtils::readCodePointTable(&mAttributeMap)) {}
 
     // Constructs header information using an attribute map.
     HeaderPolicy(const FormatUtils::FORMAT_VERSION dictFormatVersion,
@@ -97,7 +98,8 @@
               mMaxUnigramCount(HeaderReadWriteUtils::readIntAttributeValue(
                       &mAttributeMap, MAX_UNIGRAM_COUNT_KEY, DEFAULT_MAX_UNIGRAM_COUNT)),
               mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue(
-                      &mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)) {}
+                      &mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)),
+              mCodePointTable(HeaderReadWriteUtils::readCodePointTable(&mAttributeMap)) {}
 
     // Copy header information
     HeaderPolicy(const HeaderPolicy *const headerPolicy)
@@ -118,7 +120,8 @@
               mForgettingCurveDurationToLevelDown(
                       headerPolicy->mForgettingCurveDurationToLevelDown),
               mMaxUnigramCount(headerPolicy->mMaxUnigramCount),
-              mMaxBigramCount(headerPolicy->mMaxBigramCount) {}
+              mMaxBigramCount(headerPolicy->mMaxBigramCount),
+              mCodePointTable(headerPolicy->mCodePointTable) {}
 
     // Temporary dummy header.
     HeaderPolicy()
@@ -128,7 +131,8 @@
               mDate(0), mLastDecayedTime(0), mUnigramCount(0), mBigramCount(0),
               mExtendedRegionSize(0), mHasHistoricalInfoOfWords(false),
               mForgettingCurveOccurrencesToLevelUp(0), mForgettingCurveProbabilityValuesTableId(0),
-              mForgettingCurveDurationToLevelDown(0), mMaxUnigramCount(0), mMaxBigramCount(0) {}
+              mForgettingCurveDurationToLevelDown(0), mMaxUnigramCount(0), mMaxBigramCount(0),
+              mCodePointTable(nullptr) {}
 
     ~HeaderPolicy() {}
 
@@ -139,6 +143,8 @@
         switch (mDictFormatVersion) {
             case FormatUtils::VERSION_2:
                 return FormatUtils::VERSION_2;
+            case FormatUtils::VERSION_201:
+                return FormatUtils::VERSION_201;
             case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
                 return FormatUtils::VERSION_4_ONLY_FOR_TESTING;
             case FormatUtils::VERSION_4:
@@ -250,6 +256,10 @@
         return mDictFormatVersion >= FormatUtils::VERSION_4;
     }
 
+    const int *getCodePointTable() const {
+        return mCodePointTable;
+    }
+
  private:
     DISALLOW_COPY_AND_ASSIGN(HeaderPolicy);
 
@@ -295,6 +305,7 @@
     const int mForgettingCurveDurationToLevelDown;
     const int mMaxUnigramCount;
     const int mMaxBigramCount;
+    const int *const mCodePointTable;
 
     const std::vector<int> readLocale() const;
     float readMultipleWordCostMultiplier() const;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
index d2c3d2f..41a8b13 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
@@ -18,6 +18,7 @@
 
 #include <cctype>
 #include <cstdio>
+#include <memory>
 #include <vector>
 
 #include "defines.h"
@@ -34,12 +35,13 @@
 const int HeaderReadWriteUtils::LARGEST_INT_DIGIT_COUNT = 11;
 
 const int HeaderReadWriteUtils::MAX_ATTRIBUTE_KEY_LENGTH = 256;
-const int HeaderReadWriteUtils::MAX_ATTRIBUTE_VALUE_LENGTH = 256;
+const int HeaderReadWriteUtils::MAX_ATTRIBUTE_VALUE_LENGTH = 2048;
 
 const int HeaderReadWriteUtils::HEADER_MAGIC_NUMBER_SIZE = 4;
 const int HeaderReadWriteUtils::HEADER_DICTIONARY_VERSION_SIZE = 2;
 const int HeaderReadWriteUtils::HEADER_FLAG_SIZE = 2;
 const int HeaderReadWriteUtils::HEADER_SIZE_FIELD_SIZE = 4;
+const char *const HeaderReadWriteUtils::CODE_POINT_TABLE_KEY = "codePointTable";
 
 const HeaderReadWriteUtils::DictionaryFlags HeaderReadWriteUtils::NO_FLAGS = 0;
 
@@ -73,20 +75,32 @@
         return;
     }
     int keyBuffer[MAX_ATTRIBUTE_KEY_LENGTH];
-    int valueBuffer[MAX_ATTRIBUTE_VALUE_LENGTH];
+    std::unique_ptr<int[]> valueBuffer(new int[MAX_ATTRIBUTE_VALUE_LENGTH]);
     while (pos < headerSize) {
+        // The values in the header don't use the code point table for their encoding.
         const int keyLength = ByteArrayUtils::readStringAndAdvancePosition(dictBuf,
-                MAX_ATTRIBUTE_KEY_LENGTH, keyBuffer, &pos);
+                MAX_ATTRIBUTE_KEY_LENGTH, nullptr /* codePointTable */, keyBuffer, &pos);
         std::vector<int> key;
         key.insert(key.end(), keyBuffer, keyBuffer + keyLength);
         const int valueLength = ByteArrayUtils::readStringAndAdvancePosition(dictBuf,
-                MAX_ATTRIBUTE_VALUE_LENGTH, valueBuffer, &pos);
+                MAX_ATTRIBUTE_VALUE_LENGTH, nullptr /* codePointTable */, valueBuffer.get(), &pos);
         std::vector<int> value;
-        value.insert(value.end(), valueBuffer, valueBuffer + valueLength);
+        value.insert(value.end(), valueBuffer.get(), valueBuffer.get() + valueLength);
         headerAttributes->insert(AttributeMap::value_type(key, value));
     }
 }
 
+/* static */ const int *HeaderReadWriteUtils::readCodePointTable(
+        AttributeMap *const headerAttributes) {
+    AttributeMap::key_type keyVector;
+    insertCharactersIntoVector(CODE_POINT_TABLE_KEY, &keyVector);
+    AttributeMap::const_iterator it = headerAttributes->find(keyVector);
+    if (it == headerAttributes->end()) {
+        return nullptr;
+    }
+    return it->second.data();
+}
+
 /* static */ bool HeaderReadWriteUtils::writeDictionaryVersion(
         BufferWithExtendableBuffer *const buffer, const FormatUtils::FORMAT_VERSION version,
         int *const writingPos) {
@@ -96,7 +110,8 @@
     }
     switch (version) {
         case FormatUtils::VERSION_2:
-            // Version 2 dictionary writing is not supported.
+        case FormatUtils::VERSION_201:
+            // Version 2 or 201 dictionary writing is not supported.
             return false;
         case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
         case FormatUtils::VERSION_4:
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h
index 1ab2eec..5dd91b2 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h
@@ -46,6 +46,9 @@
     static void fetchAllHeaderAttributes(const uint8_t *const dictBuf,
             DictionaryHeaderStructurePolicy::AttributeMap *const headerAttributes);
 
+    static const int *readCodePointTable(
+            DictionaryHeaderStructurePolicy::AttributeMap *const headerAttributes);
+
     static bool writeDictionaryVersion(BufferWithExtendableBuffer *const buffer,
             const FormatUtils::FORMAT_VERSION version, int *const writingPos);
 
@@ -101,6 +104,8 @@
     static const int HEADER_FLAG_SIZE;
     static const int HEADER_SIZE_FIELD_SIZE;
 
+    static const char *const CODE_POINT_TABLE_KEY;
+
     // Value for the "flags" field. It's unused at the moment.
     static const DictionaryFlags NO_FLAGS;
 
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.cpp
index 82399f1..5c639b1 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.cpp
@@ -23,6 +23,7 @@
 
 #include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h"
 
+#include "suggest/policyimpl/dictionary/header/header_policy.h"
 #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
 #include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
 #include "suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.h"
@@ -59,8 +60,8 @@
     const int parentPos =
             DynamicPtReadingUtils::getParentPtNodePos(parentPosOffset, headPos);
     int codePoints[MAX_WORD_LENGTH];
-    const int codePonitCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
-            dictBuf, flags, MAX_WORD_LENGTH, codePoints, &pos);
+    const int codePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
+            dictBuf, flags, MAX_WORD_LENGTH, mHeaderPolicy->getCodePointTable(), codePoints, &pos);
     int terminalIdFieldPos = NOT_A_DICT_POS;
     int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
     int probability = NOT_A_PROBABILITY;
@@ -98,7 +99,7 @@
         // The destination position is stored at the same place as the parent position.
         return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(parentPos, newSiblingNodePos);
     } else {
-        return PtNodeParams(headPos, flags, parentPos, codePonitCount, codePoints,
+        return PtNodeParams(headPos, flags, parentPos, codePointCount, codePoints,
                 terminalIdFieldPos, terminalId, probability, childrenPosFieldPos, childrenPos,
                 newSiblingNodePos);
     }
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
index 9fa93ef..372c9e3 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
@@ -114,7 +114,8 @@
             mmappedBuffer->getReadOnlyByteArrayView());
     switch (formatVersion) {
         case FormatUtils::VERSION_2:
-            AKLOGE("Given path is a directory but the format is version 2. path: %s", path);
+        case FormatUtils::VERSION_201:
+            AKLOGE("Given path is a directory but the format is version 2 or 201. path: %s", path);
             break;
         case FormatUtils::VERSION_4: {
             return newPolicyForV4Dict<backward::v402::Ver4DictConstants,
@@ -175,6 +176,7 @@
     }
     switch (FormatUtils::detectFormatVersion(mmappedBuffer->getReadOnlyByteArrayView())) {
         case FormatUtils::VERSION_2:
+        case FormatUtils::VERSION_201:
             return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(
                     new PatriciaTriePolicy(std::move(mmappedBuffer)));
         case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.cpp
index e64a13c..6a498b2 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.cpp
@@ -61,19 +61,20 @@
 }
 
 /* static */ int PtReadingUtils::getCodePointAndAdvancePosition(const uint8_t *const buffer,
-        int *const pos) {
-    return ByteArrayUtils::readCodePointAndAdvancePosition(buffer, pos);
+        const int *const codePointTable, int *const pos) {
+    return ByteArrayUtils::readCodePointAndAdvancePosition(buffer, codePointTable, pos);
 }
 
 // Returns the number of read characters.
 /* static */ int PtReadingUtils::getCharsAndAdvancePosition(const uint8_t *const buffer,
-        const NodeFlags flags, const int maxLength, int *const outBuffer, int *const pos) {
+        const NodeFlags flags, const int maxLength, const int *const codePointTable,
+        int *const outBuffer, int *const pos) {
     int length = 0;
     if (hasMultipleChars(flags)) {
-        length = ByteArrayUtils::readStringAndAdvancePosition(buffer, maxLength, outBuffer,
-                pos);
+        length = ByteArrayUtils::readStringAndAdvancePosition(buffer, maxLength, codePointTable,
+                outBuffer, pos);
     } else {
-        const int codePoint = getCodePointAndAdvancePosition(buffer, pos);
+        const int codePoint = getCodePointAndAdvancePosition(buffer, codePointTable, pos);
         if (codePoint == NOT_A_CODE_POINT) {
             // CAVEAT: codePoint == NOT_A_CODE_POINT means the code point is
             // CHARACTER_ARRAY_TERMINATOR. The code point must not be CHARACTER_ARRAY_TERMINATOR
@@ -92,12 +93,12 @@
 
 // Returns the number of skipped characters.
 /* static */ int PtReadingUtils::skipCharacters(const uint8_t *const buffer, const NodeFlags flags,
-        const int maxLength, int *const pos) {
+        const int maxLength, const int *const codePointTable, int *const pos) {
     if (hasMultipleChars(flags)) {
         return ByteArrayUtils::advancePositionToBehindString(buffer, maxLength, pos);
     } else {
         if (maxLength > 0) {
-            getCodePointAndAdvancePosition(buffer, pos);
+            getCodePointAndAdvancePosition(buffer, codePointTable, pos);
             return 1;
         } else {
             return 0;
@@ -134,7 +135,7 @@
 
 /* static */ void PtReadingUtils::readPtNodeInfo(const uint8_t *const dictBuf, const int ptNodePos,
         const DictionaryShortcutsStructurePolicy *const shortcutPolicy,
-        const DictionaryBigramsStructurePolicy *const bigramPolicy,
+        const DictionaryBigramsStructurePolicy *const bigramPolicy, const int *const codePointTable,
         NodeFlags *const outFlags, int *const outCodePointCount, int *const outCodePoint,
         int *const outProbability, int *const outChildrenPos, int *const outShortcutPos,
         int *const outBigramPos, int *const outSiblingPos) {
@@ -142,7 +143,7 @@
     const NodeFlags flags = getFlagsAndAdvancePosition(dictBuf, &readingPos);
     *outFlags = flags;
     *outCodePointCount = getCharsAndAdvancePosition(
-            dictBuf, flags, MAX_WORD_LENGTH, outCodePoint, &readingPos);
+            dictBuf, flags, MAX_WORD_LENGTH, codePointTable, outCodePoint, &readingPos);
     *outProbability = isTerminal(flags) ?
             readProbabilityAndAdvancePosition(dictBuf, &readingPos) : NOT_A_PROBABILITY;
     *outChildrenPos = hasChildrenInFlags(flags) ?
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h
index c3f09c3..a69ec44 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h
@@ -34,15 +34,17 @@
 
     static NodeFlags getFlagsAndAdvancePosition(const uint8_t *const buffer, int *const pos);
 
-    static int getCodePointAndAdvancePosition(const uint8_t *const buffer, int *const pos);
+    static int getCodePointAndAdvancePosition(const uint8_t *const buffer,
+            const int *const codePointTable, int *const pos);
 
     // Returns the number of read characters.
     static int getCharsAndAdvancePosition(const uint8_t *const buffer, const NodeFlags flags,
-            const int maxLength, int *const outBuffer, int *const pos);
+            const int maxLength, const int *const codePointTable, int *const outBuffer,
+            int *const pos);
 
     // Returns the number of skipped characters.
     static int skipCharacters(const uint8_t *const buffer, const NodeFlags flags,
-            const int maxLength, int *const pos);
+            const int maxLength, const int *const codePointTable, int *const pos);
 
     static int readProbabilityAndAdvancePosition(const uint8_t *const buffer, int *const pos);
 
@@ -106,9 +108,10 @@
     static void readPtNodeInfo(const uint8_t *const dictBuf, const int ptNodePos,
             const DictionaryShortcutsStructurePolicy *const shortcutPolicy,
             const DictionaryBigramsStructurePolicy *const bigramPolicy,
-            NodeFlags *const outFlags, int *const outCodePointCount, int *const outCodePoint,
-            int *const outProbability, int *const outChildrenPos, int *const outShortcutPos,
-            int *const outBigramPos, int *const outSiblingPos);
+            const int *const codePointTable, NodeFlags *const outFlags,
+            int *const outCodePointCount, int *const outCodePoint, int *const outProbability,
+            int *const outChildrenPos, int *const outShortcutPos, int *const outBigramPos,
+            int *const outSiblingPos);
 
  private:
     DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTrieReadingUtils);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.cpp
index 7cb7dff..40b8720 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.cpp
@@ -45,7 +45,9 @@
 
 /* static */ int ShortcutListReadingUtils::readShortcutTarget(const ReadOnlyByteArrayView buffer,
         const int maxLength, int *const outWord, int *const pos) {
-    return ByteArrayUtils::readStringAndAdvancePosition(buffer.data(), maxLength, outWord, pos);
+    // TODO: Use codePointTable for shortcuts.
+    return ByteArrayUtils::readStringAndAdvancePosition(buffer.data(), maxLength,
+            nullptr /* codePointTable */, outWord, pos);
 }
 
 } // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
index 64b767d..6e7dba9 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
@@ -81,6 +81,7 @@
     const int ptNodePos = getTerminalPtNodePosFromWordId(wordId);
     int pos = getRootPosition();
     int wordPos = 0;
+    const int *const codePointTable = mHeaderPolicy.getCodePointTable();
     // One iteration of the outer loop iterates through PtNode arrays. As stated above, we will
     // only traverse PtNodes that are actually a part of the terminal we are searching, so each
     // time we enter this loop we are one depth level further than last time.
@@ -112,21 +113,21 @@
             const PatriciaTrieReadingUtils::NodeFlags flags =
                     PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mBuffer.data(), &pos);
             const int character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
-                    mBuffer.data(), &pos);
+                    mBuffer.data(), codePointTable, &pos);
             if (ptNodePos == startPos) {
                 // We found the position. Copy the rest of the code points in the buffer and return
                 // the length.
                 outCodePoints[wordPos] = character;
                 if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
                     int nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
-                            mBuffer.data(), &pos);
+                            mBuffer.data(), codePointTable, &pos);
                     // We count code points in order to avoid infinite loops if the file is broken
                     // or if there is some other bug
                     int charCount = maxCodePointCount;
                     while (NOT_A_CODE_POINT != nextChar && --charCount > 0) {
                         outCodePoints[++wordPos] = nextChar;
                         nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
-                                mBuffer.data(), &pos);
+                                mBuffer.data(), codePointTable, &pos);
                     }
                 }
                 *outUnigramProbability =
@@ -138,7 +139,7 @@
             // first and possibly the probability.
             if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
                 PatriciaTrieReadingUtils::skipCharacters(mBuffer.data(), flags, MAX_WORD_LENGTH,
-                        &pos);
+                        codePointTable, &pos);
             }
             if (PatriciaTrieReadingUtils::isTerminal(flags)) {
                 PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mBuffer.data(), &pos);
@@ -189,17 +190,17 @@
                             PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(
                                     mBuffer.data(), &lastCandidatePtNodePos);
                     const int lastChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
-                            mBuffer.data(), &lastCandidatePtNodePos);
+                            mBuffer.data(), codePointTable, &lastCandidatePtNodePos);
                     // We copy all the characters in this PtNode to the buffer
                     outCodePoints[wordPos] = lastChar;
                     if (PatriciaTrieReadingUtils::hasMultipleChars(lastFlags)) {
                         int nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
-                                mBuffer.data(), &lastCandidatePtNodePos);
+                                mBuffer.data(), codePointTable, &lastCandidatePtNodePos);
                         int charCount = maxCodePointCount;
                         while (-1 != nextChar && --charCount > 0) {
                             outCodePoints[++wordPos] = nextChar;
                             nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
-                                    mBuffer.data(), &lastCandidatePtNodePos);
+                                    mBuffer.data(), codePointTable, &lastCandidatePtNodePos);
                         }
                     }
                     ++wordPos;
@@ -404,9 +405,11 @@
     int shortcutPos = NOT_A_DICT_POS;
     int bigramPos = NOT_A_DICT_POS;
     int siblingPos = NOT_A_DICT_POS;
+    const int *const codePointTable = mHeaderPolicy.getCodePointTable();
     PatriciaTrieReadingUtils::readPtNodeInfo(mBuffer.data(), ptNodePos, &mShortcutListPolicy,
-            &mBigramListPolicy, &flags, &mergedNodeCodePointCount, mergedNodeCodePoints,
-            &probability, &childrenPos, &shortcutPos, &bigramPos, &siblingPos);
+            &mBigramListPolicy, codePointTable, &flags, &mergedNodeCodePointCount,
+            mergedNodeCodePoints, &probability, &childrenPos, &shortcutPos, &bigramPos,
+            &siblingPos);
     // Skip PtNodes don't start with Unicode code point because they represent non-word information.
     if (CharUtils::isInUnicodeSpace(mergedNodeCodePoints[0])) {
         const int wordId = PatriciaTrieReadingUtils::isTerminal(flags) ? ptNodePos : NOT_A_WORD_ID;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
index 70e8d84..5f17951 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
@@ -43,10 +43,11 @@
     PatriciaTriePolicy(MmappedBuffer::MmappedBufferPtr mmappedBuffer)
             : mMmappedBuffer(std::move(mmappedBuffer)),
               mHeaderPolicy(mMmappedBuffer->getReadOnlyByteArrayView().data(),
-                      FormatUtils::VERSION_2),
+                      FormatUtils::detectFormatVersion(mmappedBuffer->getReadOnlyByteArrayView())),
               mBuffer(mMmappedBuffer->getReadOnlyByteArrayView().skip(mHeaderPolicy.getSize())),
               mBigramListPolicy(mBuffer), mShortcutListPolicy(mBuffer),
-              mPtNodeReader(mBuffer, &mBigramListPolicy, &mShortcutListPolicy),
+              mPtNodeReader(mBuffer, &mBigramListPolicy, &mShortcutListPolicy,
+                      mHeaderPolicy.getCodePointTable()),
               mPtNodeArrayReader(mBuffer), mTerminalPtNodePositionsForIteratingWords(),
               mIsCorrupted(false) {}
 
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp
index 74cdf79..dc0ed96 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp
@@ -38,8 +38,8 @@
     int bigramPos = NOT_A_DICT_POS;
     int siblingPos = NOT_A_DICT_POS;
     PatriciaTrieReadingUtils::readPtNodeInfo(mBuffer.data(), ptNodePos, mShortuctPolicy,
-            mBigramPolicy, &flags, &mergedNodeCodePointCount, mergedNodeCodePoints, &probability,
-            &childrenPos, &shortcutPos, &bigramPos, &siblingPos);
+            mBigramPolicy, mCodePointTable, &flags, &mergedNodeCodePointCount, mergedNodeCodePoints,
+            &probability, &childrenPos, &shortcutPos, &bigramPos, &siblingPos);
     if (mergedNodeCodePointCount <= 0) {
         AKLOGE("Empty PtNode is not allowed. Code point count: %d", mergedNodeCodePointCount);
         ASSERT(false);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h
index 0f6769d..24ec5bc 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h
@@ -33,8 +33,10 @@
  public:
     Ver2ParticiaTrieNodeReader(const ReadOnlyByteArrayView buffer,
             const DictionaryBigramsStructurePolicy *const bigramPolicy,
-            const DictionaryShortcutsStructurePolicy *const shortcutPolicy)
-            : mBuffer(buffer), mBigramPolicy(bigramPolicy), mShortuctPolicy(shortcutPolicy) {}
+            const DictionaryShortcutsStructurePolicy *const shortcutPolicy,
+            const int *const codePointTable)
+            : mBuffer(buffer), mBigramPolicy(bigramPolicy), mShortuctPolicy(shortcutPolicy),
+              mCodePointTable(codePointTable) {}
 
     virtual const PtNodeParams fetchPtNodeParamsInBufferFromPtNodePos(const int ptNodePos) const;
 
@@ -44,6 +46,7 @@
     const ReadOnlyByteArrayView mBuffer;
     const DictionaryBigramsStructurePolicy *const mBigramPolicy;
     const DictionaryShortcutsStructurePolicy *const mShortuctPolicy;
+    const int *const mCodePointTable;
 };
 } // namespace latinime
 #endif /* LATINIME_VER2_PATRICIA_TRIE_NODE_READER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp
index 731092e..d795239 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp
@@ -16,6 +16,7 @@
 
 #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
 
+#include "suggest/policyimpl/dictionary/header/header_policy.h"
 #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
 #include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
 #include "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h"
@@ -51,7 +52,7 @@
             DynamicPtReadingUtils::getParentPtNodePos(parentPosOffset, headPos);
     int codePoints[MAX_WORD_LENGTH];
     const int codePonitCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
-            dictBuf, flags, MAX_WORD_LENGTH, codePoints, &pos);
+            dictBuf, flags, MAX_WORD_LENGTH, mHeaderPolicy->getCodePointTable(), codePoints, &pos);
     int terminalIdFieldPos = NOT_A_DICT_POS;
     int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
     int probability = NOT_A_PROBABILITY;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
index ecbe792..da2c30c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
@@ -42,8 +42,10 @@
     if (readingPosIsInAdditionalBuffer) {
         *pos -= mOriginalBuffer.size();
     }
+    // Code point table is not used for dynamic format.
     *outCodePointCount = ByteArrayUtils::readStringAndAdvancePosition(
-            getBuffer(readingPosIsInAdditionalBuffer), maxCodePointCount, outCodePoints, pos);
+            getBuffer(readingPosIsInAdditionalBuffer), maxCodePointCount,
+            nullptr /* codePointTable */, outCodePoints, pos);
     if (readingPosIsInAdditionalBuffer) {
         *pos += mOriginalBuffer.size();
     }
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h
index 4b3c989..abb9790 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h
@@ -147,11 +147,18 @@
      */
     static AK_FORCE_INLINE int readCodePoint(const uint8_t *const buffer, const int pos) {
         int p = pos;
-        return readCodePointAndAdvancePosition(buffer, &p);
+        return readCodePointAndAdvancePosition(buffer, nullptr /* codePointTable */, &p);
     }
 
     static AK_FORCE_INLINE int readCodePointAndAdvancePosition(
-            const uint8_t *const buffer, int *const pos) {
+            const uint8_t *const buffer, const int *const codePointTable, int *const pos) {
+        /*
+         * codePointTable is an array to convert the most frequent characters in this dictionary to
+         * 1 byte code points. It is only made of the original code points of the most frequent
+         * characters used in this dictionary. 0x20 - 0xFF is used for the 1 byte characters.
+         * The original code points are restored by picking the code points at the indices of the
+         * codePointTable. The indices are calculated by subtracting 0x20 from the firstByte.
+         */
         const uint8_t firstByte = readUint8(buffer, *pos);
         if (firstByte < MINIMUM_ONE_BYTE_CHARACTER_VALUE) {
             if (firstByte == CHARACTER_ARRAY_TERMINATOR) {
@@ -162,6 +169,9 @@
             }
         } else {
             *pos += 1;
+            if (codePointTable) {
+                return codePointTable[firstByte - MINIMUM_ONE_BYTE_CHARACTER_VALUE];
+            }
             return firstByte;
         }
     }
@@ -173,12 +183,13 @@
      */
     // Returns the length of the string.
     static int readStringAndAdvancePosition(const uint8_t *const buffer,
-            const int maxLength, int *const outBuffer, int *const pos) {
+            const int maxLength, const int *const codePointTable, int *const outBuffer,
+            int *const pos) {
         int length = 0;
-        int codePoint = readCodePointAndAdvancePosition(buffer, pos);
+        int codePoint = readCodePointAndAdvancePosition(buffer, codePointTable, pos);
         while (NOT_A_CODE_POINT != codePoint && length < maxLength) {
             outBuffer[length++] = codePoint;
-            codePoint = readCodePointAndAdvancePosition(buffer, pos);
+            codePoint = readCodePointAndAdvancePosition(buffer, codePointTable, pos);
         }
         return length;
     }
@@ -187,9 +198,9 @@
     static int advancePositionToBehindString(
             const uint8_t *const buffer, const int maxLength, int *const pos) {
         int length = 0;
-        int codePoint = readCodePointAndAdvancePosition(buffer, pos);
+        int codePoint = readCodePointAndAdvancePosition(buffer, nullptr /* codePointTable */, pos);
         while (NOT_A_CODE_POINT != codePoint && length < maxLength) {
-            codePoint = readCodePointAndAdvancePosition(buffer, pos);
+            codePoint = readCodePointAndAdvancePosition(buffer, nullptr /* codePointTable */, pos);
             length++;
         }
         return length;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
index e6e7167..0cffe56 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
@@ -29,6 +29,8 @@
     switch (formatVersion) {
         case VERSION_2:
             return VERSION_2;
+        case VERSION_201:
+            return VERSION_201;
         case VERSION_4_ONLY_FOR_TESTING:
             return VERSION_4_ONLY_FOR_TESTING;
         case VERSION_4:
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
index 51ad987..9631008 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
@@ -32,6 +32,7 @@
     enum FORMAT_VERSION {
         // These MUST have the same values as the relevant constants in FormatSpec.java.
         VERSION_2 = 2,
+        VERSION_201 = 201,
         VERSION_4_ONLY_FOR_TESTING = 399,
         VERSION_4 = 402,
         VERSION_4_DEV = 403,
diff --git a/native/jni/tests/suggest/policyimpl/dictionary/utils/byte_array_utils_test.cpp b/native/jni/tests/suggest/policyimpl/dictionary/utils/byte_array_utils_test.cpp
index a1c310d..c201e0d 100644
--- a/native/jni/tests/suggest/policyimpl/dictionary/utils/byte_array_utils_test.cpp
+++ b/native/jni/tests/suggest/policyimpl/dictionary/utils/byte_array_utils_test.cpp
@@ -23,6 +23,19 @@
 namespace latinime {
 namespace {
 
+TEST(ByteArrayUtilsTest, TestReadCodePointTable) {
+    const int codePointTable[] = { 0x6f, 0x6b };
+    const uint8_t buffer[] = { 0x20u, 0x21u, 0x00u, 0x01u, 0x00u };
+    int pos = 0;
+    // Expect the first entry of codePointTable
+    EXPECT_EQ(0x6f, ByteArrayUtils::readCodePointAndAdvancePosition(buffer, codePointTable, &pos));
+    // Expect the second entry of codePointTable
+    EXPECT_EQ(0x6b, ByteArrayUtils::readCodePointAndAdvancePosition(buffer, codePointTable, &pos));
+    // Expect the original code point from buffer[2] to buffer[4], 0x100
+    // It isn't picked from the codePointTable, since it exceeds the range of the codePointTable.
+    EXPECT_EQ(0x100, ByteArrayUtils::readCodePointAndAdvancePosition(buffer, codePointTable, &pos));
+}
+
 TEST(ByteArrayUtilsTest, TestReadInt) {
     const uint8_t buffer[] = { 0x1u, 0x8Au, 0x0u, 0xAAu };
 
@@ -67,7 +80,7 @@
 
     int pos = 0;
     int codePointArray[3];
-    EXPECT_EQ(3, ByteArrayUtils::readStringAndAdvancePosition(buffer, MAX_WORD_LENGTH,
+    EXPECT_EQ(3, ByteArrayUtils::readStringAndAdvancePosition(buffer, MAX_WORD_LENGTH, nullptr,
             codePointArray, &pos));
     EXPECT_EQ(0x10FF00, codePointArray[0]);
     EXPECT_EQ(0x20, codePointArray[1]);