Merge "Use reentrant lock for main dictionaries."
diff --git a/dictionaries/cs_wordlist.combined.gz b/dictionaries/cs_wordlist.combined.gz
index d69ef64..7829d65 100644
--- a/dictionaries/cs_wordlist.combined.gz
+++ b/dictionaries/cs_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/da_wordlist.combined.gz b/dictionaries/da_wordlist.combined.gz
index 919d28e..e714019 100644
--- a/dictionaries/da_wordlist.combined.gz
+++ b/dictionaries/da_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/de_wordlist.combined.gz b/dictionaries/de_wordlist.combined.gz
index f5cce9d..6a4bd44 100644
--- a/dictionaries/de_wordlist.combined.gz
+++ b/dictionaries/de_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/en_GB_wordlist.combined.gz b/dictionaries/en_GB_wordlist.combined.gz
index afef676..839f3ef 100644
--- a/dictionaries/en_GB_wordlist.combined.gz
+++ b/dictionaries/en_GB_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/en_US_wordlist.combined.gz b/dictionaries/en_US_wordlist.combined.gz
index eafbc9d..5595c75 100644
--- a/dictionaries/en_US_wordlist.combined.gz
+++ b/dictionaries/en_US_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/en_wordlist.combined.gz b/dictionaries/en_wordlist.combined.gz
index 9cbca0b..69c39d5 100644
--- a/dictionaries/en_wordlist.combined.gz
+++ b/dictionaries/en_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/es_wordlist.combined.gz b/dictionaries/es_wordlist.combined.gz
index 53b8607..0a48b6d 100644
--- a/dictionaries/es_wordlist.combined.gz
+++ b/dictionaries/es_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/fi_wordlist.combined.gz b/dictionaries/fi_wordlist.combined.gz
index 2720116..eefbfe5 100644
--- a/dictionaries/fi_wordlist.combined.gz
+++ b/dictionaries/fi_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/fr_wordlist.combined.gz b/dictionaries/fr_wordlist.combined.gz
index 1815e47..1a18320 100644
--- a/dictionaries/fr_wordlist.combined.gz
+++ b/dictionaries/fr_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/hr_wordlist.combined.gz b/dictionaries/hr_wordlist.combined.gz
index 7694a2a..864f676 100644
--- a/dictionaries/hr_wordlist.combined.gz
+++ b/dictionaries/hr_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/it_wordlist.combined.gz b/dictionaries/it_wordlist.combined.gz
index 3b84cd7..dfb1752 100644
--- a/dictionaries/it_wordlist.combined.gz
+++ b/dictionaries/it_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/lt_wordlist.combined.gz b/dictionaries/lt_wordlist.combined.gz
index 316a5af..029722d 100644
--- a/dictionaries/lt_wordlist.combined.gz
+++ b/dictionaries/lt_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/lv_wordlist.combined.gz b/dictionaries/lv_wordlist.combined.gz
index b036ac2..41e1c28 100644
--- a/dictionaries/lv_wordlist.combined.gz
+++ b/dictionaries/lv_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/nb_wordlist.combined.gz b/dictionaries/nb_wordlist.combined.gz
index b6e0d42..b699912 100644
--- a/dictionaries/nb_wordlist.combined.gz
+++ b/dictionaries/nb_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/nl_wordlist.combined.gz b/dictionaries/nl_wordlist.combined.gz
index 48ab0f4..89c2388 100644
--- a/dictionaries/nl_wordlist.combined.gz
+++ b/dictionaries/nl_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/pl_wordlist.combined.gz b/dictionaries/pl_wordlist.combined.gz
index bf02298..2b53f69 100644
--- a/dictionaries/pl_wordlist.combined.gz
+++ b/dictionaries/pl_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/pt_BR_wordlist.combined.gz b/dictionaries/pt_BR_wordlist.combined.gz
index 876eb71..2d22447 100644
--- a/dictionaries/pt_BR_wordlist.combined.gz
+++ b/dictionaries/pt_BR_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/pt_PT_wordlist.combined.gz b/dictionaries/pt_PT_wordlist.combined.gz
index 4068690..1504165 100644
--- a/dictionaries/pt_PT_wordlist.combined.gz
+++ b/dictionaries/pt_PT_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/sl_wordlist.combined.gz b/dictionaries/sl_wordlist.combined.gz
index 41a576b..55e1bb1 100644
--- a/dictionaries/sl_wordlist.combined.gz
+++ b/dictionaries/sl_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/sr_wordlist.combined.gz b/dictionaries/sr_wordlist.combined.gz
index dec6ae8..8488a08 100644
--- a/dictionaries/sr_wordlist.combined.gz
+++ b/dictionaries/sr_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/sv_wordlist.combined.gz b/dictionaries/sv_wordlist.combined.gz
index 0471772..6342520 100644
--- a/dictionaries/sv_wordlist.combined.gz
+++ b/dictionaries/sv_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/tr_wordlist.combined.gz b/dictionaries/tr_wordlist.combined.gz
index fae79ca..0251778 100644
--- a/dictionaries/tr_wordlist.combined.gz
+++ b/dictionaries/tr_wordlist.combined.gz
Binary files differ
diff --git a/java/res/raw/main_de.dict b/java/res/raw/main_de.dict
index 5d35e64..69796bb 100644
--- a/java/res/raw/main_de.dict
+++ b/java/res/raw/main_de.dict
Binary files differ
diff --git a/java/res/raw/main_en.dict b/java/res/raw/main_en.dict
index 8660c28..bef6b10 100644
--- a/java/res/raw/main_en.dict
+++ b/java/res/raw/main_en.dict
Binary files differ
diff --git a/java/res/raw/main_es.dict b/java/res/raw/main_es.dict
index f5906c2..261ab8c 100644
--- a/java/res/raw/main_es.dict
+++ b/java/res/raw/main_es.dict
Binary files differ
diff --git a/java/res/raw/main_fr.dict b/java/res/raw/main_fr.dict
index 0d2e518..18f5298 100644
--- a/java/res/raw/main_fr.dict
+++ b/java/res/raw/main_fr.dict
Binary files differ
diff --git a/java/res/raw/main_it.dict b/java/res/raw/main_it.dict
index 523f645..e161c24 100644
--- a/java/res/raw/main_it.dict
+++ b/java/res/raw/main_it.dict
Binary files differ
diff --git a/java/res/raw/main_pt_br.dict b/java/res/raw/main_pt_br.dict
index 98a27c7..21bbe7c 100644
--- a/java/res/raw/main_pt_br.dict
+++ b/java/res/raw/main_pt_br.dict
Binary files differ
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
index 6ec7aee..29c6c04 100644
--- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
@@ -115,7 +115,7 @@
private static native long openNative(String sourceDir, long dictOffset, long dictSize,
boolean isUpdatable);
private static native void flushNative(long dict, String filePath);
- private static native boolean needsToRunGCNative(long dict);
+ private static native boolean needsToRunGCNative(long dict, boolean mindsBlockByGC);
private static native void flushWithGCNative(long dict, String filePath);
private static native void closeNative(long dict);
private static native int getProbabilityNative(long dict, int[] word);
@@ -270,7 +270,7 @@
}
private void runGCIfRequired() {
- if (needsToRunGCNative(mNativeDict)) {
+ if (needsToRunGC(true /* mindsBlockByGC */)) {
flushWithGC();
}
}
@@ -326,9 +326,15 @@
reopen();
}
- public boolean needsToRunGC() {
+ /**
+ * Checks whether GC is needed to run or not.
+ * @param mindsBlockByGC Whether to mind operations blocked by GC. We don't need to care about
+ * the blocking in some situations such as in idle time or just before closing.
+ * @return whether GC is needed to run or not.
+ */
+ public boolean needsToRunGC(final boolean mindsBlockByGC) {
if (!isValidDictionary()) return false;
- return needsToRunGCNative(mNativeDict);
+ return needsToRunGCNative(mNativeDict, mindsBlockByGC);
}
@UsedForTesting
diff --git a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
index cbba3f8..2d1ca51 100644
--- a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
@@ -505,7 +505,7 @@
BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(),
DICTIONARY_FORMAT_VERSION, getHeaderAttributeMap());
} else {
- if (mBinaryDictionary.needsToRunGC()) {
+ if (mBinaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
mBinaryDictionary.flushWithGC();
} else {
mBinaryDictionary.flush();
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java
index 6cc0bfb..af61f29 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java
@@ -385,12 +385,14 @@
nodeSize + size, ptNode.mChildren));
}
nodeSize += getShortcutListSize(ptNode.mShortcutTargets);
- if (null != ptNode.mBigrams) {
- for (WeightedString bigram : ptNode.mBigrams) {
- final int offset = getOffsetToTargetPtNodeDuringUpdate(ptNodeArray,
- nodeSize + size + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE,
- FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord));
- nodeSize += getByteSize(offset) + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE;
+ if (formatOptions.mVersion < FormatSpec.FIRST_VERSION_WITH_TERMINAL_ID) {
+ if (null != ptNode.mBigrams) {
+ for (WeightedString bigram : ptNode.mBigrams) {
+ final int offset = getOffsetToTargetPtNodeDuringUpdate(ptNodeArray,
+ nodeSize + size + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE,
+ FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord));
+ nodeSize += getByteSize(offset) + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE;
+ }
}
}
ptNode.mCachedSize = nodeSize;
diff --git a/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java
index bf3d191..411e265 100644
--- a/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java
+++ b/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java
@@ -77,7 +77,7 @@
* @param newParentAddress the absolute address of the parent.
* @param formatOptions file format options.
*/
- public static void updateParentAddress(final DictBuffer dictBuffer,
+ private static void updateParentAddress(final DictBuffer dictBuffer,
final int ptNodeOriginAddress, final int newParentAddress,
final FormatOptions formatOptions) {
final int originalPosition = dictBuffer.position();
@@ -109,7 +109,7 @@
* @param newParentAddress the address to be written.
* @param formatOptions file format options.
*/
- public static void updateParentAddresses(final DictBuffer dictBuffer,
+ private static void updateParentAddresses(final DictBuffer dictBuffer,
final int ptNodeOriginAddress, final int newParentAddress,
final FormatOptions formatOptions) {
final int originalPosition = dictBuffer.position();
@@ -136,7 +136,7 @@
* @param newChildrenAddress the absolute address of the child.
* @param formatOptions file format options.
*/
- public static void updateChildrenAddress(final DictBuffer dictBuffer,
+ private static void updateChildrenAddress(final DictBuffer dictBuffer,
final int ptNodeOriginAddress, final int newChildrenAddress,
final FormatOptions formatOptions) {
final int originalPosition = dictBuffer.position();
diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
index 2765222..9481a8c 100644
--- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
+++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
@@ -265,8 +265,12 @@
static final String FREQ_FILE_EXTENSION = ".freq";
// tat = Terminal Address Table
static final String TERMINAL_ADDRESS_TABLE_FILE_EXTENSION = ".tat";
+ static final String BIGRAM_FILE_EXTENSION = ".bigram";
+ static final String BIGRAM_LOOKUP_TABLE_FILE_EXTENSION = ".bigram_lookup";
+ static final String BIGRAM_ADDRESS_TABLE_FILE_EXTENSION = ".bigram_index";
static final int FREQUENCY_AND_FLAGS_SIZE = 2;
static final int TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3;
+ static final int BIGRAM_ADDRESS_TABLE_BLOCK_SIZE = 4;
static final int NO_CHILDREN_ADDRESS = Integer.MIN_VALUE;
static final int NO_PARENT_ADDRESS = 0;
diff --git a/java/src/com/android/inputmethod/latin/makedict/SparseTable.java b/java/src/com/android/inputmethod/latin/makedict/SparseTable.java
index 0b9cf91..96d057a 100644
--- a/java/src/com/android/inputmethod/latin/makedict/SparseTable.java
+++ b/java/src/com/android/inputmethod/latin/makedict/SparseTable.java
@@ -18,6 +18,9 @@
import com.android.inputmethod.annotations.UsedForTesting;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
@@ -147,4 +150,45 @@
BinaryDictEncoderUtils.writeUIntToStream(contentOutStream, index, 4);
}
}
+
+ @UsedForTesting
+ public void writeToFiles(final File lookupTableFile, final File contentFile)
+ throws IOException {
+ FileOutputStream lookupTableOutStream = null;
+ FileOutputStream contentOutStream = null;
+ try {
+ lookupTableOutStream = new FileOutputStream(lookupTableFile);
+ contentOutStream = new FileOutputStream(contentFile);
+ write(lookupTableOutStream, contentOutStream);
+ } finally {
+ if (lookupTableOutStream != null) {
+ lookupTableOutStream.close();
+ }
+ if (contentOutStream != null) {
+ contentOutStream.close();
+ }
+ }
+ }
+
+ private static byte[] readFileToByteArray(final File file) throws IOException {
+ final byte[] contents = new byte[(int) file.length()];
+ FileInputStream inStream = null;
+ try {
+ inStream = new FileInputStream(file);
+ inStream.read(contents);
+ } finally {
+ if (inStream != null) {
+ inStream.close();
+ }
+ }
+ return contents;
+ }
+
+ @UsedForTesting
+ public static SparseTable readFromFiles(final File lookupTableFile, final File contentFile,
+ final int blockSize) throws IOException {
+ final byte[] lookupTable = readFileToByteArray(lookupTableFile);
+ final byte[] content = readFileToByteArray(contentFile);
+ return new SparseTable(lookupTable, content, blockSize);
+ }
}
diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java
index 4c8ff8e..0aa4319 100644
--- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java
+++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java
@@ -42,12 +42,15 @@
private static final int FILETYPE_TRIE = 1;
private static final int FILETYPE_FREQUENCY = 2;
private static final int FILETYPE_TERMINAL_ADDRESS_TABLE = 3;
+ private static final int FILETYPE_BIGRAM = 4;
private final File mDictDirectory;
private final DictionaryBufferFactory mBufferFactory;
private DictBuffer mDictBuffer;
private DictBuffer mFrequencyBuffer;
private DictBuffer mTerminalAddressTableBuffer;
+ private DictBuffer mBigramBuffer;
+ private SparseTable mBigramAddressTable;
@UsedForTesting
/* package */ Ver4DictDecoder(final File dictDirectory, final int factoryFlag) {
@@ -82,6 +85,9 @@
} else if (fileType == FILETYPE_TERMINAL_ADDRESS_TABLE) {
return new File(mDictDirectory,
mDictDirectory.getName() + FormatSpec.TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
+ } else if (fileType == FILETYPE_BIGRAM) {
+ return new File(mDictDirectory,
+ mDictDirectory.getName() + FormatSpec.BIGRAM_FILE_EXTENSION);
} else {
throw new RuntimeException("Unsupported kind of file : " + fileType);
}
@@ -94,6 +100,8 @@
mFrequencyBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_FREQUENCY));
mTerminalAddressTableBuffer = mBufferFactory.getDictionaryBuffer(
getFile(FILETYPE_TERMINAL_ADDRESS_TABLE));
+ mBigramBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_BIGRAM));
+ loadBigramAddressSparseTable();
}
@Override
@@ -118,6 +126,15 @@
return header;
}
+ private void loadBigramAddressSparseTable() throws IOException {
+ final File lookupIndexFile = new File(mDictDirectory,
+ mDictDirectory.getName() + FormatSpec.BIGRAM_LOOKUP_TABLE_FILE_EXTENSION);
+ final File contentFile = new File(mDictDirectory,
+ mDictDirectory.getName() + FormatSpec.BIGRAM_ADDRESS_TABLE_FILE_EXTENSION);
+ mBigramAddressTable = SparseTable.readFromFiles(lookupIndexFile, contentFile,
+ FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE);
+ }
+
protected static class PtNodeReader extends DictDecoder.PtNodeReader {
protected static int readFrequency(final DictBuffer frequencyBuffer, final int terminalId) {
frequencyBuffer.position(terminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE + 1);
@@ -191,8 +208,21 @@
final ArrayList<PendingAttribute> bigrams;
if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) {
bigrams = new ArrayList<PendingAttribute>();
- addressPointer += PtNodeReader.readBigramAddresses(mDictBuffer, bigrams,
- addressPointer);
+ final int posOfBigrams = mBigramAddressTable.get(terminalId);
+ mBigramBuffer.position(posOfBigrams);
+ while (bigrams.size() < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
+ // If bigrams.size() reaches FormatSpec.MAX_BIGRAMS_IN_A_PTNODE,
+ // remaining bigram entries are ignored.
+ final int bigramFlags = mBigramBuffer.readUnsignedByte();
+ final int targetTerminalId = mBigramBuffer.readUnsignedInt24();
+ mTerminalAddressTableBuffer.position(
+ targetTerminalId * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE);
+ final int targetAddress = mTerminalAddressTableBuffer.readUnsignedInt24();
+ bigrams.add(new PendingAttribute(
+ bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY,
+ targetAddress));
+ if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break;
+ }
if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
MakedictLog.d("too many bigrams in a node.");
}
diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
index 4fb8967..4c25faf 100644
--- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
+++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
@@ -26,6 +26,7 @@
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
+import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
@@ -43,9 +44,13 @@
private byte[] mTrieBuf;
private int mTriePos;
private int mHeaderSize;
+ private SparseTable mBigramAddressTable;
private OutputStream mTrieOutStream;
private OutputStream mFreqOutStream;
private OutputStream mTerminalAddressTableOutStream;
+ private OutputStream mBigramOutStream;
+ private File mDictDir;
+ private String mBaseFilename;
@UsedForTesting
public Ver4DictEncoder(final File dictPlacedDir) {
@@ -55,12 +60,14 @@
private void openStreams(final FormatOptions formatOptions, final DictionaryOptions dictOptions)
throws FileNotFoundException, IOException {
final FileHeader header = new FileHeader(0, dictOptions, formatOptions);
- final String filename = header.getId() + "." + header.getVersion();
- final File mDictDir = new File(mDictPlacedDir, filename);
- final File trieFile = new File(mDictDir, filename + FormatSpec.TRIE_FILE_EXTENSION);
- final File freqFile = new File(mDictDir, filename + FormatSpec.FREQ_FILE_EXTENSION);
+ mBaseFilename = header.getId() + "." + header.getVersion();
+ mDictDir = new File(mDictPlacedDir, mBaseFilename);
+ final File trieFile = new File(mDictDir, mBaseFilename + FormatSpec.TRIE_FILE_EXTENSION);
+ final File freqFile = new File(mDictDir, mBaseFilename + FormatSpec.FREQ_FILE_EXTENSION);
final File terminalAddressTableFile = new File(mDictDir,
- filename + FormatSpec.TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
+ mBaseFilename + FormatSpec.TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
+ final File bigramFile = new File(mDictDir,
+ mBaseFilename + FormatSpec.BIGRAM_FILE_EXTENSION);
if (!mDictDir.isDirectory()) {
if (mDictDir.exists()) mDictDir.delete();
mDictDir.mkdirs();
@@ -71,6 +78,7 @@
mTrieOutStream = new FileOutputStream(trieFile);
mFreqOutStream = new FileOutputStream(freqFile);
mTerminalAddressTableOutStream = new FileOutputStream(terminalAddressTableFile);
+ mBigramOutStream = new FileOutputStream(bigramFile);
}
private void close() throws IOException {
@@ -84,10 +92,14 @@
if (mTerminalAddressTableOutStream != null) {
mTerminalAddressTableOutStream.close();
}
+ if (mBigramOutStream != null) {
+ mBigramOutStream.close();
+ }
} finally {
mTrieOutStream = null;
mFreqOutStream = null;
mTerminalAddressTableOutStream = null;
+ mBigramOutStream = null;
}
}
@@ -123,6 +135,10 @@
if (MakedictLog.DBG) BinaryDictEncoderUtils.checkFlatPtNodeArrayList(flatNodes);
writeTerminalData(flatNodes, terminalCount);
+ mBigramAddressTable = new SparseTable(terminalCount,
+ FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE);
+ writeBigrams(flatNodes, dict);
+ writeBigramAddressSparseTable();
final PtNodeArray lastNodeArray = flatNodes.get(flatNodes.size() - 1);
final int bufferSize = lastNodeArray.mCachedAddressAfterUpdate + lastNodeArray.mCachedSize;
@@ -230,24 +246,41 @@
shortcutByteSize, FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE);
}
- private void writeBigrams(ArrayList<WeightedString> bigrams, FusionDictionary dict) {
- if (bigrams == null) return;
+ private void writeBigrams(final ArrayList<PtNodeArray> flatNodes, final FusionDictionary dict)
+ throws IOException {
+ final ByteArrayOutputStream bigramBuffer = new ByteArrayOutputStream();
- final Iterator<WeightedString> bigramIterator = bigrams.iterator();
- while (bigramIterator.hasNext()) {
- final WeightedString bigram = bigramIterator.next();
- final PtNode target =
- FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord);
- final int addressOfBigram = target.mCachedAddressAfterUpdate;
- final int unigramFrequencyForThisWord = target.mFrequency;
- final int offset = addressOfBigram
- - (mTriePos + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
- int bigramFlags = BinaryDictEncoderUtils.makeBigramFlags(bigramIterator.hasNext(),
- offset, bigram.mFrequency, unigramFrequencyForThisWord, bigram.mWord);
- mTrieBuf[mTriePos++] = (byte) bigramFlags;
- mTriePos += BinaryDictEncoderUtils.writeChildrenPosition(mTrieBuf,
- mTriePos, Math.abs(offset));
+ for (final PtNodeArray nodeArray : flatNodes) {
+ for (final PtNode ptNode : nodeArray.mData) {
+ if (ptNode.mBigrams != null) {
+ final int startPos = bigramBuffer.size();
+ mBigramAddressTable.set(ptNode.mTerminalId, startPos);
+ final Iterator<WeightedString> bigramIterator = ptNode.mBigrams.iterator();
+ while (bigramIterator.hasNext()) {
+ final WeightedString bigram = bigramIterator.next();
+ final PtNode target =
+ FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord);
+ final int unigramFrequencyForThisWord = target.mFrequency;
+ final int bigramFlags = BinaryDictEncoderUtils.makeBigramFlags(
+ bigramIterator.hasNext(), 0, bigram.mFrequency,
+ unigramFrequencyForThisWord, bigram.mWord);
+ BinaryDictEncoderUtils.writeUIntToStream(bigramBuffer, bigramFlags,
+ FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
+ BinaryDictEncoderUtils.writeUIntToStream(bigramBuffer, target.mTerminalId,
+ FormatSpec.PTNODE_ATTRIBUTE_MAX_ADDRESS_SIZE);
+ }
+ }
+ }
}
+ bigramBuffer.writeTo(mBigramOutStream);
+ }
+
+ private void writeBigramAddressSparseTable() throws IOException {
+ final File lookupIndexFile =
+ new File(mDictDir, mBaseFilename + FormatSpec.BIGRAM_LOOKUP_TABLE_FILE_EXTENSION);
+ final File contentFile =
+ new File(mDictDir, mBaseFilename + FormatSpec.BIGRAM_ADDRESS_TABLE_FILE_EXTENSION);
+ mBigramAddressTable.writeToFiles(lookupIndexFile, contentFile);
}
@Override
@@ -267,7 +300,6 @@
}
writeChildrenPosition(ptNode, formatOptions);
writeShortcuts(ptNode.mShortcutTargets);
- writeBigrams(ptNode.mBigrams, dict);
}
private void writeTerminalData(final ArrayList<PtNodeArray> flatNodes,
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index 85e100e..c5ef264 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -113,10 +113,10 @@
}
static bool latinime_BinaryDictionary_needsToRunGC(JNIEnv *env, jclass clazz,
- jlong dict) {
+ jlong dict, jboolean mindsBlockByGC) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return false;
- return dictionary->needsToRunGC();
+ return dictionary->needsToRunGC(mindsBlockByGC == JNI_TRUE);
}
static void latinime_BinaryDictionary_flushWithGC(JNIEnv *env, jclass clazz, jlong dict,
@@ -364,7 +364,7 @@
},
{
const_cast<char *>("needsToRunGCNative"),
- const_cast<char *>("(J)Z"),
+ const_cast<char *>("(JZ)Z"),
reinterpret_cast<void *>(latinime_BinaryDictionary_needsToRunGC)
},
{
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index 9b4c91a..b1d01ed 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -123,8 +123,8 @@
mDictionaryStructureWithBufferPolicy->flushWithGC(filePath);
}
-bool Dictionary::needsToRunGC() {
- return mDictionaryStructureWithBufferPolicy->needsToRunGC();
+bool Dictionary::needsToRunGC(const bool mindsBlockByGC) {
+ return mDictionaryStructureWithBufferPolicy->needsToRunGC(mindsBlockByGC);
}
void Dictionary::getProperty(const char *const query, char *const outResult,
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h
index 5740508..d8a0f3e 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/dictionary.h
@@ -81,7 +81,7 @@
void flushWithGC(const char *const filePath);
- bool needsToRunGC();
+ bool needsToRunGC(const bool mindsBlockByGC);
void getProperty(const char *const query, char *const outResult,
const int maxResultLength) const;
diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
index 2434287..c7ffef0 100644
--- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
@@ -78,7 +78,7 @@
virtual void flushWithGC(const char *const filePath) = 0;
- virtual bool needsToRunGC() const = 0;
+ virtual bool needsToRunGC(const bool mindsBlockByGC) const = 0;
virtual void getProperty(const char *const query, char *const outResult,
const int maxResultLength) const = 0;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
index 5eb4733..5f755c3 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
@@ -41,7 +41,7 @@
return false;
}
} else {
- valueStack.back() += 1;
+ mValueStack.back() += 1;
if (node->isTerminal()) {
mValidUnigramCount += 1;
}
@@ -49,6 +49,23 @@
return true;
}
+bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability
+ ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
+ const int *const nodeCodePoints) {
+ if (!node->isDeleted()) {
+ int pos = node->getBigramsPos();
+ if (pos != NOT_A_DICT_POS) {
+ int bigramEntryCount = 0;
+ if (!mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(&pos,
+ &bigramEntryCount)) {
+ return false;
+ }
+ mValidBigramEntryCount += bigramEntryCount;
+ }
+ }
+ return true;
+}
+
// Writes dummy PtNode array size when the head of PtNode array is read.
bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
::onDescend(const int ptNodeArrayPos) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h
index aa6e609..3019988 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h
@@ -40,22 +40,22 @@
TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
DynamicPatriciaTrieWritingHelper *const writingHelper,
BufferWithExtendableBuffer *const buffer)
- : mWritingHelper(writingHelper), mBuffer(buffer), valueStack(),
+ : mWritingHelper(writingHelper), mBuffer(buffer), mValueStack(),
mChildrenValue(0), mValidUnigramCount(0) {}
~TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted() {};
bool onAscend() {
- if (valueStack.empty()) {
+ if (mValueStack.empty()) {
return false;
}
- mChildrenValue = valueStack.back();
- valueStack.pop_back();
+ mChildrenValue = mValueStack.back();
+ mValueStack.pop_back();
return true;
}
bool onDescend(const int ptNodeArrayPos) {
- valueStack.push_back(0);
+ mValueStack.push_back(0);
return true;
}
@@ -74,7 +74,7 @@
DynamicPatriciaTrieWritingHelper *const mWritingHelper;
BufferWithExtendableBuffer *const mBuffer;
- std::vector<int> valueStack;
+ std::vector<int> mValueStack;
int mChildrenValue;
int mValidUnigramCount;
};
@@ -94,20 +94,7 @@
bool onReadingPtNodeArrayTail() { return true; }
bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
- const int *const nodeCodePoints) {
- if (!node->isDeleted()) {
- int pos = node->getBigramsPos();
- if (pos != NOT_A_DICT_POS) {
- int bigramEntryCount = 0;
- if (!mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(&pos,
- &bigramEntryCount)) {
- return false;
- }
- mValidBigramEntryCount += bigramEntryCount;
- }
- }
- return true;
- }
+ const int *const nodeCodePoints);
int getValidBigramEntryCount() const {
return mValidBigramEntryCount;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
index 4581ec0..8c0890e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
@@ -291,7 +291,7 @@
writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy);
}
-bool DynamicPatriciaTriePolicy::needsToRunGC() const {
+bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
if (!mBuffer->isUpdatable()) {
AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
return false;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
index 7f9d4d9..bdb436c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
@@ -91,7 +91,7 @@
void flushWithGC(const char *const filePath);
- bool needsToRunGC() const;
+ bool needsToRunGC(const bool mindsBlockByGC) const;
void getProperty(const char *const query, char *const outResult,
const int maxResultLength) const;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
index bae5e8c..2a2e9bc 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
@@ -147,7 +147,7 @@
const HeaderPolicy *const headerPolicy, const int unigramCount, const int bigramCount) {
BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
const int extendedRegionSize = headerPolicy->getExtendedRegionSize() +
- mBuffer->getTailPosition() - mBuffer->getOriginalBufferSize();
+ mBuffer->getUsedAdditionalBufferSize();
if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */,
unigramCount, bigramCount, extendedRegionSize)) {
return;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp
index 78c6c04..9ce9994 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp
@@ -20,7 +20,8 @@
// Note that these are corresponding definitions in Java side in FormatSpec.FileHeader.
const char *const HeaderPolicy::MULTIPLE_WORDS_DEMOTION_RATE_KEY = "MULTIPLE_WORDS_DEMOTION_RATE";
-const char *const HeaderPolicy::USES_FORGETTING_CURVE_KEY = "USES_FORGETTING_CURVE";
+// TODO: Change attribute string to "IS_DECAYING_DICT".
+const char *const HeaderPolicy::IS_DECAYING_DICT_KEY = "USES_FORGETTING_CURVE";
const char *const HeaderPolicy::LAST_UPDATED_TIME_KEY = "date";
const char *const HeaderPolicy::UNIGRAM_COUNT_KEY = "UNIGRAM_COUNT";
const char *const HeaderPolicy::BIGRAM_COUNT_KEY = "BIGRAM_COUNT";
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
index 93b9c6f..4261667 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
@@ -36,8 +36,8 @@
mSize(HeaderReadWriteUtils::getHeaderSize(dictBuf)),
mAttributeMap(createAttributeMapAndReadAllAttributes(dictBuf)),
mMultiWordCostMultiplier(readMultipleWordCostMultiplier()),
- mUsesForgettingCurve(HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap,
- USES_FORGETTING_CURVE_KEY, false /* defaultValue */)),
+ mIsDecayingDict(HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap,
+ IS_DECAYING_DICT_KEY, false /* defaultValue */)),
mLastUpdatedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)),
mUnigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
@@ -54,8 +54,8 @@
mDictionaryFlags(HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap(
attributeMap)), mSize(0), mAttributeMap(*attributeMap),
mMultiWordCostMultiplier(readMultipleWordCostMultiplier()),
- mUsesForgettingCurve(HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap,
- USES_FORGETTING_CURVE_KEY, false /* defaultValue */)),
+ mIsDecayingDict(HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap,
+ IS_DECAYING_DICT_KEY, false /* defaultValue */)),
mLastUpdatedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)),
mUnigramCount(0), mBigramCount(0), mExtendedRegionSize(0) {}
@@ -82,8 +82,8 @@
return mMultiWordCostMultiplier;
}
- AK_FORCE_INLINE bool usesForgettingCurve() const {
- return mUsesForgettingCurve;
+ AK_FORCE_INLINE bool isDecayingDict() const {
+ return mIsDecayingDict;
}
AK_FORCE_INLINE int getLastUpdatedTime() const {
@@ -113,7 +113,7 @@
DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderPolicy);
static const char *const MULTIPLE_WORDS_DEMOTION_RATE_KEY;
- static const char *const USES_FORGETTING_CURVE_KEY;
+ static const char *const IS_DECAYING_DICT_KEY;
static const char *const LAST_UPDATED_TIME_KEY;
static const char *const UNIGRAM_COUNT_KEY;
static const char *const BIGRAM_COUNT_KEY;
@@ -126,7 +126,7 @@
const int mSize;
HeaderReadWriteUtils::AttributeMap mAttributeMap;
const float mMultiWordCostMultiplier;
- const bool mUsesForgettingCurve;
+ const bool mIsDecayingDict;
const int mLastUpdatedTime;
const int mUnigramCount;
const int mBigramCount;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
index 4277ff5..8d88c68 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
@@ -107,7 +107,7 @@
AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
}
- bool needsToRunGC() const {
+ bool needsToRunGC(const bool mindsBlockByGC) const {
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
return false;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
index 17d2e39..9dc3482 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
@@ -42,6 +42,10 @@
return mOriginalBufferSize + mUsedAdditionalBufferSize;
}
+ AK_FORCE_INLINE int getUsedAdditionalBufferSize() const {
+ return mUsedAdditionalBufferSize;
+ }
+
/**
* For reading.
*/
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
index 4fae919..f22e94c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
@@ -96,7 +96,7 @@
fclose(file);
return false;
}
- const int additionalBufSize = buffer->getTailPosition() - buffer->getOriginalBufferSize();
+ const int additionalBufSize = buffer->getUsedAdditionalBufferSize();
if (additionalBufSize > 0 && fwrite(buffer->getBuffer(true /* usesAdditionalBuffer */),
additionalBufSize, 1, file) < 1) {
fclose(file);
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
index 826c0f7..6a21522 100644
--- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
+++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
@@ -606,7 +606,7 @@
binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
- while(!binaryDictionary.needsToRunGC()) {
+ while(!binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
final String word = CodePointUtils.generateWord(random, codePointSet);
words.add(word);
final int unigramProbability = random.nextInt(0xFF);
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java
index 6c4cbcf..bd06e9f 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java
@@ -80,17 +80,17 @@
}
/**
- * Returns a decrypted/uncompressed binary dictionary.
+ * Returns a decrypted/uncompressed dictionary.
*
- * This will decrypt/uncompress any number of times as necessary until it finds the binary
+ * This will decrypt/uncompress any number of times as necessary until it finds the
* dictionary signature, and copy the decoded file to a temporary place.
- * If this is not a binary dictionary, the method returns null.
+ * If this is not a dictionary, the method returns null.
*/
- public static DecoderChainSpec getRawBinaryDictionaryOrNull(final File src) {
- return getRawBinaryDictionaryOrNullInternal(new DecoderChainSpec(), src, 0);
+ public static DecoderChainSpec getRawDictionaryOrNull(final File src) {
+ return getRawDictionaryOrNullInternal(new DecoderChainSpec(), src, 0);
}
- private static DecoderChainSpec getRawBinaryDictionaryOrNullInternal(
+ private static DecoderChainSpec getRawDictionaryOrNullInternal(
final DecoderChainSpec spec, final File src, final int depth) {
// Unfortunately the decoding scheme we use can consider any data to be encrypted
// and will product some output, meaning it's not possible to reliably detect encrypted
@@ -98,7 +98,8 @@
// over and over, ending in a stack overflow. Hence we limit the depth at which we try
// decoding the file.
if (depth > MAX_DECODE_DEPTH) return null;
- if (BinaryDictDecoderUtils.isBinaryDictionary(src)) {
+ if (BinaryDictDecoderUtils.isBinaryDictionary(src)
+ || CombinedInputOutput.isCombinedDictionary(src.getAbsolutePath())) {
spec.mFile = src;
return spec;
}
@@ -106,7 +107,7 @@
final File uncompressedFile = tryGetUncompressedFile(src);
if (null != uncompressedFile) {
final DecoderChainSpec newSpec =
- getRawBinaryDictionaryOrNullInternal(spec, uncompressedFile, depth + 1);
+ getRawDictionaryOrNullInternal(spec, uncompressedFile, depth + 1);
if (null == newSpec) return null;
return newSpec.addStep(COMPRESSION);
}
@@ -114,7 +115,7 @@
final File decryptedFile = tryGetDecryptedFile(src);
if (null != decryptedFile) {
final DecoderChainSpec newSpec =
- getRawBinaryDictionaryOrNullInternal(spec, decryptedFile, depth + 1);
+ getRawDictionaryOrNullInternal(spec, decryptedFile, depth + 1);
if (null == newSpec) return null;
return newSpec.addStep(ENCRYPTION);
}
@@ -175,15 +176,16 @@
return XmlDictInputOutput.readDictionaryXml(
new BufferedInputStream(new FileInputStream(file)),
null /* shortcuts */, null /* bigrams */);
- } else if (CombinedInputOutput.isCombinedDictionary(filename)) {
- if (report) System.out.println("Format : Combined format");
- return CombinedInputOutput.readDictionaryCombined(
- new BufferedInputStream(new FileInputStream(file)));
} else {
- final DecoderChainSpec decodedSpec = getRawBinaryDictionaryOrNull(file);
+ final DecoderChainSpec decodedSpec = getRawDictionaryOrNull(file);
if (null == decodedSpec) {
crash(filename, new RuntimeException(
filename + " does not seem to be a dictionary file"));
+ } else if (CombinedInputOutput.isCombinedDictionary(
+ decodedSpec.mFile.getAbsolutePath())){
+ if (report) System.out.println("Format : Combined format");
+ return CombinedInputOutput.readDictionaryCombined(
+ new BufferedInputStream(new FileInputStream(decodedSpec.mFile)));
} else {
final DictDecoder dictDecoder = FormatSpec.getDictDecoder(decodedSpec.mFile,
DictDecoder.USE_BYTEARRAY);
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Package.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Package.java
index 9274dcd..dff3387 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Package.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Package.java
@@ -79,7 +79,7 @@
throw new RuntimeException("Too many/too few arguments for command " + COMMAND);
}
final BinaryDictOffdeviceUtils.DecoderChainSpec decodedSpec =
- BinaryDictOffdeviceUtils.getRawBinaryDictionaryOrNull(new File(mArgs[0]));
+ BinaryDictOffdeviceUtils.getRawDictionaryOrNull(new File(mArgs[0]));
if (null == decodedSpec) {
System.out.println(mArgs[0] + " does not seem to be a dictionary");
return;
diff --git a/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java b/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java
index 1eff497..1baeb7a 100644
--- a/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java
+++ b/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java
@@ -64,7 +64,7 @@
// Test for an actually compressed dictionary and its contents
final BinaryDictOffdeviceUtils.DecoderChainSpec decodeSpec =
- BinaryDictOffdeviceUtils.getRawBinaryDictionaryOrNull(dst);
+ BinaryDictOffdeviceUtils.getRawDictionaryOrNull(dst);
for (final String step : decodeSpec.mDecoderSpec) {
assertEquals("Wrong decode spec", BinaryDictOffdeviceUtils.COMPRESSION, step);
}
@@ -90,7 +90,7 @@
// Test that a random data file actually fails
assertNull("Wrongly identified data file",
- BinaryDictOffdeviceUtils.getRawBinaryDictionaryOrNull(dst));
+ BinaryDictOffdeviceUtils.getRawDictionaryOrNull(dst));
final File gzDst = File.createTempFile("testGetRawDict", ".tmp");
gzDst.deleteOnExit();
@@ -103,6 +103,6 @@
// Test that a compressed random data file actually fails
assertNull("Wrongly identified data file",
- BinaryDictOffdeviceUtils.getRawBinaryDictionaryOrNull(gzDst));
+ BinaryDictOffdeviceUtils.getRawDictionaryOrNull(gzDst));
}
}