Move tools/makedict from platform/development to platform/packages/inputmethods/LatinIME

The corresponding change is I559207ab

Change-Id: I01ef7084cffa72468e87147e0ec7a9b16dd19990
diff --git a/tools/Android.mk b/tools/Android.mk
new file mode 100644
index 0000000..8f1acc5
--- /dev/null
+++ b/tools/Android.mk
@@ -0,0 +1,17 @@
+# Copyright (C) 2010 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+LOCAL_PATH := $(call my-dir)
+
+include $(call all-makefiles-under,$(LOCAL_PATH))
diff --git a/tools/makedict/Android.mk b/tools/makedict/Android.mk
new file mode 100644
index 0000000..b9fc553
--- /dev/null
+++ b/tools/makedict/Android.mk
@@ -0,0 +1,24 @@
+#
+# Copyright (C) 2009 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+LOCAL_PATH := $(call my-dir)
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(call all-java-files-under,src)
+LOCAL_JAR_MANIFEST := etc/manifest.txt
+LOCAL_MODULE := makedict
+
+include $(BUILD_HOST_JAVA_LIBRARY)
+include $(LOCAL_PATH)/etc/Android.mk
diff --git a/tools/makedict/etc/Android.mk b/tools/makedict/etc/Android.mk
new file mode 100644
index 0000000..da16286
--- /dev/null
+++ b/tools/makedict/etc/Android.mk
@@ -0,0 +1,20 @@
+# Copyright (C) 2009 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+LOCAL_PATH := $(call my-dir)
+include $(CLEAR_VARS)
+
+LOCAL_PREBUILT_EXECUTABLES := makedict
+include $(BUILD_HOST_PREBUILT)
+
diff --git a/tools/makedict/etc/makedict b/tools/makedict/etc/makedict
new file mode 100755
index 0000000..8420d6e
--- /dev/null
+++ b/tools/makedict/etc/makedict
@@ -0,0 +1,63 @@
+#!/bin/sh
+# Copyright 2009, The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Set up prog to be the path of this script, including following symlinks,
+# and set up progdir to be the fully-qualified pathname of its directory.
+prog="$0"
+while [ -h "${prog}" ]; do
+    newProg=`/bin/ls -ld "${prog}"`
+    newProg=`expr "${newProg}" : ".* -> \(.*\)$"`
+    if expr "x${newProg}" : 'x/' >/dev/null; then
+        prog="${newProg}"
+    else
+        progdir=`dirname "${prog}"`
+        prog="${progdir}/${newProg}"
+    fi
+done
+oldwd=`pwd`
+progdir=`dirname "${prog}"`
+cd "${progdir}"
+progdir=`pwd`
+prog="${progdir}"/`basename "${prog}"`
+cd "${oldwd}"
+
+jarfile=makedict.jar
+frameworkdir="$progdir"
+if [ ! -r "$frameworkdir/$jarfile" ]
+then
+    frameworkdir=`dirname "$progdir"`/tools/lib
+    libdir=`dirname "$progdir"`/tools/lib
+fi
+if [ ! -r "$frameworkdir/$jarfile" ]
+then
+    frameworkdir=`dirname "$progdir"`/framework
+    libdir=`dirname "$progdir"`/lib
+fi
+if [ ! -r "$frameworkdir/$jarfile" ]
+then
+    echo `basename "$prog"`": can't find $jarfile"
+    exit 1
+fi
+
+if [ "$OSTYPE" = "cygwin" ] ; then
+    jarpath=`cygpath -w  "$frameworkdir/$jarfile"`
+    progdir=`cygpath -w  "$progdir"`
+else
+    jarpath="$frameworkdir/$jarfile"
+fi
+
+# need to use "java.ext.dirs" because "-jar" causes classpath to be ignored
+# might need more memory, e.g. -Xmx128M
+exec java -Djava.ext.dirs="$frameworkdir" -jar "$jarpath" "$@"
diff --git a/tools/makedict/etc/manifest.txt b/tools/makedict/etc/manifest.txt
new file mode 100644
index 0000000..aa3a3e8
--- /dev/null
+++ b/tools/makedict/etc/manifest.txt
@@ -0,0 +1 @@
+Main-Class: com.android.tools.dict.MakeBinaryDictionary
diff --git a/tools/makedict/src/com/android/tools/dict/BigramDictionary.java b/tools/makedict/src/com/android/tools/dict/BigramDictionary.java
new file mode 100644
index 0000000..35115bf
--- /dev/null
+++ b/tools/makedict/src/com/android/tools/dict/BigramDictionary.java
@@ -0,0 +1,286 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.tools.dict;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.helpers.DefaultHandler;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
+
+/**
+ * Helper for MakeBinaryDictionary
+ * Deals with all the bigram data
+ */
+public class BigramDictionary {
+
+    /*
+     * Must match the values in the client side which is located in dictionary.cpp & dictionary.h
+     * Changing these values will generate totally different structure which must be also reflected
+     * on the client side.
+     */
+    public static final int FLAG_BIGRAM_READ = 0x80;
+    public static final int FLAG_BIGRAM_CHILDEXIST = 0x40;
+    public static final int FLAG_BIGRAM_CONTINUED = 0x80;
+    public static final int FLAG_BIGRAM_FREQ = 0x7F;
+
+    public static final int FOR_REVERSE_LOOKUPALL = -99;
+
+    public ArrayList<String> mBigramToFill = new ArrayList<String>();
+    public ArrayList<Integer> mBigramToFillAddress = new ArrayList<Integer>();
+
+    public HashMap<String, Bigram> mBi;
+
+    public boolean mHasBigram;
+
+    public BigramDictionary(String bigramSrcFilename, boolean hasBigram) {
+        mHasBigram = hasBigram;
+        loadBigram(bigramSrcFilename);
+    }
+
+    private void loadBigram(String filename) {
+        mBi = new HashMap<String, Bigram>();
+        if (!mHasBigram) {
+            System.out.println("Number of bigrams = " + Bigram.sBigramNum);
+            return;
+        }
+        try {
+            SAXParser parser = SAXParserFactory.newInstance().newSAXParser();
+            parser.parse(new File(filename), new DefaultHandler() {
+                String w1 = null;
+                boolean inWord1 = false;
+                boolean inWord2 = false;
+                int freq = 0, counter = 0;
+                Bigram tempBigram = null;
+
+                @Override
+                public void startElement(String uri, String localName,
+                        String qName, Attributes attributes) {
+                    if (qName.equals("bi")) {
+                        inWord1 = true;
+                        w1 = attributes.getValue(0);
+                        int count = Integer.parseInt(attributes.getValue(1));
+                        tempBigram = new Bigram(count);
+                        counter = 0;
+                    } else if (qName.equals("w")) {
+                        inWord2 = true;
+                        String word2 = attributes.getValue(0);
+                        int freq = Integer.parseInt(attributes.getValue(1));
+                        tempBigram.setWord2(counter, word2, freq);
+                        counter++;
+                        Bigram.sBigramNum++;
+                    }
+                }
+
+                @Override
+                public void endElement(String uri, String localName,
+                        String qName) {
+                    if (inWord2) {
+                        inWord2 = false;
+                    } else if (inWord1) {
+                        inWord1 = false;
+                        mBi.put(w1, tempBigram);
+                    }
+                }
+            });
+        } catch (Exception ioe) {
+            System.err.println("Exception in parsing bigram\n" + ioe);
+            ioe.printStackTrace();
+        }
+        System.out.println("Number of bigrams = " + Bigram.sBigramNum);
+    }
+
+    byte[] writeBigrams(byte[] dict, Map<String, Integer> mDictionary) {
+        for (int i = 0; i < mBigramToFill.size(); i++) {
+            String w1 = mBigramToFill.get(i);
+            int address = mBigramToFillAddress.get(i);
+
+            Bigram temp = mBi.get(w1);
+            int word2Count = temp.count;
+            int j4;
+            for (int j = 0; j < word2Count; j++) {
+                if (!mDictionary.containsKey(temp.word2[j])) {
+                    System.out.println("Not in dictionary: " + temp.word2[j]);
+                    System.exit(0);
+                } else {
+                    j4 = (j * 4);
+                    int addressOfWord2 = mDictionary.get(temp.word2[j]);
+                    dict[address + j4 + 0] = (byte) (((addressOfWord2 & 0x3F0000) >> 16)
+                            | FLAG_BIGRAM_READ);
+                    dict[address + j4 + 1] = (byte) ((addressOfWord2 & 0x00FF00) >> 8);
+                    dict[address + j4 + 2] = (byte) ((addressOfWord2 & 0x0000FF));
+
+                    if (j == (word2Count - 1)) {
+                        dict[address + j4 + 3] = (byte) (temp.freq[j] & FLAG_BIGRAM_FREQ);
+                    } else {
+                        dict[address + j4 + 3] = (byte) ((temp.freq[j] & FLAG_BIGRAM_FREQ)
+                                | FLAG_BIGRAM_CONTINUED);
+                    }
+                }
+            }
+        }
+
+        return dict;
+    }
+
+    void reverseLookupAll(Map<String, Integer> mDictionary, byte[] dict) {
+        Set<String> st = mDictionary.keySet();
+        for (String s : st) {
+            searchForTerminalNode(mDictionary.get(s), FOR_REVERSE_LOOKUPALL, dict);
+        }
+    }
+
+    void searchForTerminalNode(int bigramAddress, int frequency, byte[] dict) {
+        StringBuilder sb = new StringBuilder(48);
+        int pos;
+        boolean found = false;
+        int followDownBranchAddress = 2;
+        char followingChar = ' ';
+        int depth = 0;
+        int totalLoopCount = 0;
+
+        while (!found) {
+            boolean followDownAddressSearchStop = false;
+            boolean firstAddress = true;
+            boolean haveToSearchAll = true;
+
+            if (depth > 0) {
+                sb.append(followingChar);
+            }
+            pos = followDownBranchAddress; // pos start at count
+            int count = dict[pos] & 0xFF;
+            pos++;
+            for (int i = 0; i < count; i++) {
+                totalLoopCount++;
+                // pos at data
+                pos++;
+                // pos now at flag
+                if (!MakeBinaryDictionary.getFirstBitOfByte(pos, dict)) { // non-terminal
+                    if (!followDownAddressSearchStop) {
+                        int addr = MakeBinaryDictionary.get22BitAddress(pos, dict);
+                        if (addr > bigramAddress) {
+                            followDownAddressSearchStop = true;
+                            if (firstAddress) {
+                                firstAddress = false;
+                                haveToSearchAll = true;
+                            } else if (!haveToSearchAll) {
+                                break;
+                            }
+                        } else {
+                            followDownBranchAddress = addr;
+                            followingChar = (char) (0xFF & dict[pos-1]);
+                            if(firstAddress) {
+                                firstAddress = false;
+                                haveToSearchAll = false;
+                            }
+                        }
+                    }
+                    pos += 3;
+                } else if (MakeBinaryDictionary.getFirstBitOfByte(pos, dict)) { // terminal
+                    // found !!
+                    if (bigramAddress == (pos-1)) {
+                        sb.append((char) (0xFF & dict[pos-1]));
+                        found = true;
+                        break;
+                    }
+
+                    // address + freq (4 byte)
+                    if (MakeBinaryDictionary.getSecondBitOfByte(pos, dict)) {
+                        if (!followDownAddressSearchStop) {
+                            int addr = MakeBinaryDictionary.get22BitAddress(pos, dict);
+                            if (addr > bigramAddress) {
+                                followDownAddressSearchStop = true;
+                                if (firstAddress) {
+                                    firstAddress = false;
+                                    haveToSearchAll = true;
+                                } else if (!haveToSearchAll) {
+                                    break;
+                                }
+                            } else {
+                                followDownBranchAddress = addr;
+                                followingChar = (char) (0xFF & dict[pos-1]);
+                                if(firstAddress) {
+                                    firstAddress = false;
+                                    haveToSearchAll = true;
+                                }
+                            }
+                        }
+                        pos += 4;
+                    } else { // freq only (2 byte)
+                        pos += 2;
+                    }
+                    // skipping bigram
+                    int bigramExist = (dict[pos] & FLAG_BIGRAM_READ);
+                    if (bigramExist > 0) {
+                        int nextBigramExist = 1;
+                        while (nextBigramExist > 0) {
+                            pos += 3;
+                            nextBigramExist = (dict[pos++] & FLAG_BIGRAM_CONTINUED);
+                        }
+                    } else {
+                        pos++;
+                    }
+                }
+            }
+            depth++;
+            if (followDownBranchAddress == 2) {
+                System.out.println("ERROR!!! Cannot find bigram!!");
+                System.exit(0);
+            }
+        }
+
+        if (frequency == FOR_REVERSE_LOOKUPALL) {
+            System.out.println("Reverse: " + sb.toString() + " (" + bigramAddress + ")"
+                    + "   Loop: " + totalLoopCount);
+        } else {
+            System.out.println("   bigram: " + sb.toString() + " (" + bigramAddress + ") freq: "
+                    + frequency + "   Loop: " + totalLoopCount);
+        }
+    }
+
+    static class Bigram {
+        String[] word2;
+        int[] freq;
+        int count;
+        static int sBigramNum = 0;
+
+        String getSecondWord(int i) {
+            return word2[i];
+        }
+
+        int getFrequency(int i) {
+            return (freq[i] == 0) ? 1 : freq[i];
+        }
+
+        void setWord2(int index, String word2, int freq) {
+            this.word2[index] = word2;
+            this.freq[index] = freq;
+        }
+
+        public Bigram(int word2Count) {
+            count = word2Count;
+            word2 = new String[word2Count];
+            freq = new int[word2Count];
+        }
+    }
+}
diff --git a/tools/makedict/src/com/android/tools/dict/MakeBinaryDictionary.java b/tools/makedict/src/com/android/tools/dict/MakeBinaryDictionary.java
new file mode 100755
index 0000000..ca6de56
--- /dev/null
+++ b/tools/makedict/src/com/android/tools/dict/MakeBinaryDictionary.java
@@ -0,0 +1,443 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.tools.dict;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.helpers.DefaultHandler;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
+
+/**
+ * Compresses a list of words, frequencies, and bigram data
+ * into a tree structured binary dictionary.
+ * Dictionary Version: 200 (may contain bigrams)
+ *  Version number started from 200 rather than 1 because we wanted to prevent number of roots in
+ *  any old dictionaries being mistaken as the version number. There is not a chance that there
+ *  will be more than 200 roots. Version number should be increased when there is structural change
+ *  in the data. There is no need to increase the version when only the words in the data changes.
+ */
+public class MakeBinaryDictionary {
+
+    private static final int VERSION_NUM = 200;
+
+    public static final int ALPHA_SIZE = 256;
+
+    public static final String TAG_WORD = "w";
+    public static final String ATTR_FREQ = "f";
+
+    private static final int FLAG_ADDRESS_MASK  = 0x400000;
+    private static final int FLAG_TERMINAL_MASK = 0x800000;
+    private static final int ADDRESS_MASK = 0x3FFFFF;
+
+    /**
+     * Unit for this variable is in bytes
+     * If destination file name is main.dict and file limit causes dictionary to be separated into
+     * multiple file, it will generate main0.dict, main1.dict, and so forth.
+     */
+    private static int sOutputFileSize;
+    private static boolean sSplitOutput;
+
+    public static final CharNode EMPTY_NODE = new CharNode();
+
+    List<CharNode> roots;
+    Map<String, Integer> mDictionary;
+    int mWordCount;
+
+    BigramDictionary bigramDict;
+
+    static class CharNode {
+        char data;
+        int freq;
+        boolean terminal;
+        List<CharNode> children;
+        static int sNodes;
+
+        public CharNode() {
+            sNodes++;
+        }
+    }
+
+    public static void usage() {
+        System.err.println("Usage: makedict -s <src_dict.xml> [-b <src_bigram.xml>] "
+                + "-d <dest.dict> [--size filesize]");
+        System.exit(-1);
+    }
+    
+    public static void main(String[] args) {
+        int checkSource = -1;
+        int checkBigram = -1;
+        int checkDest = -1;
+        int checkFileSize = -1;
+        for (int i = 0; i < args.length; i+=2) {
+            if (args[i].equals("-s")) checkSource = (i + 1);
+            if (args[i].equals("-b")) checkBigram = (i + 1);
+            if (args[i].equals("-d")) checkDest = (i + 1);
+            if (args[i].equals("--size")) checkFileSize = (i + 1);
+        }
+        if (checkFileSize >= 0) {
+            sSplitOutput = true;
+            sOutputFileSize = Integer.parseInt(args[checkFileSize]);
+        } else {
+            sSplitOutput = false;
+        }
+        if (checkDest >= 0 && !args[checkDest].endsWith(".dict")) {
+            System.err.println("Error: Dictionary output file extension should be \".dict\"");
+            usage();
+        } else if (checkSource >= 0 && checkBigram >= 0 && checkDest >= 0 &&
+                ((!sSplitOutput && args.length == 6) || (sSplitOutput && args.length == 8))) {
+            new MakeBinaryDictionary(args[checkSource], args[checkBigram], args[checkDest]);
+        } else if (checkSource >= 0 && checkDest >= 0 &&
+                ((!sSplitOutput && args.length == 4) || (sSplitOutput && args.length == 6))) {
+            new MakeBinaryDictionary(args[checkSource], null, args[checkDest]);
+        } else {
+            usage();
+        }
+    }
+
+    public MakeBinaryDictionary(String srcFilename, String bigramSrcFilename, String destFilename){
+        System.out.println("Generating dictionary version " + VERSION_NUM);
+        bigramDict = new BigramDictionary(bigramSrcFilename, (bigramSrcFilename != null));
+        populateDictionary(srcFilename);
+        writeToDict(destFilename);
+
+        // Enable the code below to verify that the generated tree is traversable
+        // and bigram data is stored correctly.
+        if (false) {
+            bigramDict.reverseLookupAll(mDictionary, dict);
+            traverseDict(2, new char[32], 0);
+        }
+    }
+
+    private void populateDictionary(String filename) {
+        roots = new ArrayList<CharNode>();
+        mDictionary = new HashMap<String, Integer>();
+        try {
+            SAXParser parser = SAXParserFactory.newInstance().newSAXParser();
+            parser.parse(new File(filename), new DefaultHandler() {
+                boolean inWord;
+                int freq;
+                StringBuilder wordBuilder = new StringBuilder(48);
+
+                @Override
+                public void startElement(String uri, String localName,
+                        String qName, Attributes attributes) {
+                    if (qName.equals("w")) {
+                        inWord = true;
+                        freq = Integer.parseInt(attributes.getValue(0));
+                        wordBuilder.setLength(0);
+                    }
+                }
+
+                @Override
+                public void characters(char[] data, int offset, int length) {
+                    // Ignore other whitespace
+                    if (!inWord) return;
+                    wordBuilder.append(data, offset, length);
+                }
+
+                @Override
+                public void endElement(String uri, String localName,
+                        String qName) {
+                    if (qName.equals("w")) {
+                        if (wordBuilder.length() > 1) {
+                            addWordTop(wordBuilder.toString(), freq);
+                            mWordCount++;
+                        }
+                        inWord = false;
+                    }
+                }
+            });
+        } catch (Exception ioe) {
+            System.err.println("Exception in parsing\n" + ioe);
+            ioe.printStackTrace();
+        }
+        System.out.println("Nodes = " + CharNode.sNodes);
+    }
+
+    private int indexOf(List<CharNode> children, char c) {
+        if (children == null) {
+            return -1;
+        }
+        for (int i = 0; i < children.size(); i++) {
+            if (children.get(i).data == c) {
+                return i;
+            }
+        }
+        return -1;
+    }
+
+    private void addWordTop(String word, int occur) {
+        if (occur > 255) occur = 255;
+        char firstChar = word.charAt(0);
+        int index = indexOf(roots, firstChar);
+        if (index == -1) {
+            CharNode newNode = new CharNode();
+            newNode.data = firstChar;
+            newNode.freq = occur;
+            index = roots.size();
+            roots.add(newNode);
+        } else {
+            roots.get(index).freq += occur;
+        }
+        if (word.length() > 1) {
+            addWordRec(roots.get(index), word, 1, occur);
+        } else {
+            roots.get(index).terminal = true;
+        }
+    }
+
+    private void addWordRec(CharNode parent, String word, int charAt, int occur) {
+        CharNode child = null;
+        char data = word.charAt(charAt);
+        if (parent.children == null) {
+            parent.children = new ArrayList<CharNode>();
+        } else {
+            for (int i = 0; i < parent.children.size(); i++) {
+                CharNode node = parent.children.get(i);
+                if (node.data == data) {
+                    child = node;
+                    break;
+                }
+            }
+        }
+        if (child == null) {
+            child = new CharNode();
+            parent.children.add(child);
+        }
+        child.data = data;
+        if (child.freq == 0) child.freq = occur;
+        if (word.length() > charAt + 1) {
+            addWordRec(child, word, charAt + 1, occur);
+        } else {
+            child.terminal = true;
+            child.freq = occur;
+        }
+    }
+
+    byte[] dict;
+    int dictSize;
+    static final int CHAR_WIDTH = 8;
+    static final int FLAGS_WIDTH = 1; // Terminal flag (word end)
+    static final int ADDR_WIDTH = 23; // Offset to children
+    static final int FREQ_WIDTH_BYTES = 1;
+    static final int COUNT_WIDTH_BYTES = 1;
+
+    private void addCount(int count) {
+        dict[dictSize++] = (byte) (0xFF & count);
+    }
+
+    private void addNode(CharNode node, String word1) {
+        if (node.terminal) { // store address of each word1
+            mDictionary.put(word1, dictSize);
+        }
+        int charData = 0xFFFF & node.data;
+        if (charData > 254) {
+            dict[dictSize++] = (byte) 255;
+            dict[dictSize++] = (byte) ((node.data >> 8) & 0xFF);
+            dict[dictSize++] = (byte) (node.data & 0xFF);
+        } else {
+            dict[dictSize++] = (byte) (0xFF & node.data);
+        }
+        if (node.children != null) {
+            dictSize += 3; // Space for children address
+        } else {
+            dictSize += 1; // Space for just the terminal/address flags
+        }
+        if ((0xFFFFFF & node.freq) > 255) {
+            node.freq = 255;
+        }
+        if (node.terminal) {
+            byte freq = (byte) (0xFF & node.freq);
+            dict[dictSize++] = freq;
+            // bigram
+            if (bigramDict.mBi.containsKey(word1)) {
+                int count = bigramDict.mBi.get(word1).count;
+                bigramDict.mBigramToFill.add(word1);
+                bigramDict.mBigramToFillAddress.add(dictSize);
+                dictSize += (4 * count);
+            } else {
+                dict[dictSize++] = (byte) (0x00);
+            }
+        }
+    }
+
+    int nullChildrenCount = 0;
+    int notTerminalCount = 0;
+
+    private void updateNodeAddress(int nodeAddress, CharNode node,
+            int childrenAddress) {
+        if ((dict[nodeAddress] & 0xFF) == 0xFF) { // 3 byte character
+            nodeAddress += 2;
+        }
+        childrenAddress = ADDRESS_MASK & childrenAddress;
+        if (childrenAddress == 0) {
+            nullChildrenCount++;
+        } else {
+            childrenAddress |= FLAG_ADDRESS_MASK;
+        }
+        if (node.terminal) {
+            childrenAddress |= FLAG_TERMINAL_MASK;
+        } else {
+            notTerminalCount++;
+        }
+        dict[nodeAddress + 1] = (byte) (childrenAddress >> 16);
+        if ((childrenAddress & FLAG_ADDRESS_MASK) != 0) {
+            dict[nodeAddress + 2] = (byte) ((childrenAddress & 0xFF00) >> 8);
+            dict[nodeAddress + 3] = (byte) ((childrenAddress & 0xFF));
+        }
+    }
+
+    void writeWordsRec(List<CharNode> children, StringBuilder word) {
+        if (children == null || children.size() == 0) {
+            return;
+        }
+        final int childCount = children.size();
+        addCount(childCount);
+        int[] childrenAddresses = new int[childCount];
+        for (int j = 0; j < childCount; j++) {
+            CharNode node = children.get(j);
+            childrenAddresses[j] = dictSize;
+            word.append(children.get(j).data);
+            addNode(node, word.toString());
+            word.deleteCharAt(word.length()-1);
+        }
+        for (int j = 0; j < childCount; j++) {
+            CharNode node = children.get(j);
+            int nodeAddress = childrenAddresses[j];
+            int cacheDictSize = dictSize;
+            word.append(children.get(j).data);
+            writeWordsRec(node.children, word);
+            word.deleteCharAt(word.length()-1);
+            updateNodeAddress(nodeAddress, node, node.children != null
+                    ? cacheDictSize : 0);
+        }
+    }
+
+    void writeToDict(String dictFilename) {
+        // 4MB max, 22-bit offsets
+        dict = new byte[4 * 1024 * 1024]; // 4MB upper limit. Actual is probably
+                                          // < 1MB in most cases, as there is a limit in the
+                                          // resource size in apks.
+        dictSize = 0;
+
+        dict[dictSize++] = (byte) (0xFF & VERSION_NUM); // version info
+        dict[dictSize++] = (byte) (0xFF & (bigramDict.mHasBigram ? 1 : 0));
+
+        StringBuilder word = new StringBuilder(48);
+        writeWordsRec(roots, word);
+        dict = bigramDict.writeBigrams(dict, mDictionary);
+        System.out.println("Dict Size = " + dictSize);
+        if (!sSplitOutput) {
+            sOutputFileSize = dictSize;
+        }
+        try {
+            int currentLoc = 0;
+            int i = 0;
+            int extension = dictFilename.indexOf(".dict");
+            String filename = dictFilename.substring(0, extension);
+            while (dictSize > 0) {
+                FileOutputStream fos;
+                if (sSplitOutput) {
+                    fos = new FileOutputStream(filename + i + ".dict");
+                } else {
+                    fos = new FileOutputStream(filename + ".dict");
+                }
+                if (dictSize > sOutputFileSize) {
+                    fos.write(dict, currentLoc, sOutputFileSize);
+                    dictSize -= sOutputFileSize;
+                    currentLoc += sOutputFileSize;
+                } else {
+                    fos.write(dict, currentLoc, dictSize);
+                    dictSize = 0;
+                }
+                fos.close();
+                i++;
+            }
+        } catch (IOException ioe) {
+            System.err.println("Error writing dict file:" + ioe);
+        }
+    }
+
+    void traverseDict(int pos, char[] word, int depth) {
+        int count = dict[pos++] & 0xFF;
+        for (int i = 0; i < count; i++) {
+            char c = (char) (dict[pos++] & 0xFF);
+            if (c == 0xFF) { // two byte character
+                c = (char) (((dict[pos] & 0xFF) << 8) | (dict[pos+1] & 0xFF));
+                pos += 2;
+            }
+            word[depth] = c;
+            boolean terminal = getFirstBitOfByte(pos, dict);
+            int address = 0;
+            if ((dict[pos] & (FLAG_ADDRESS_MASK >> 16)) > 0) { // address check
+                address = get22BitAddress(pos, dict);
+                pos += 3;
+            } else {
+                pos += 1;
+            }
+            if (terminal) {
+                showWord(word, depth + 1, dict[pos] & 0xFF);
+                pos++;
+
+                int bigramExist = (dict[pos] & bigramDict.FLAG_BIGRAM_READ);
+                if (bigramExist > 0) {
+                    int nextBigramExist = 1;
+                    while (nextBigramExist > 0) {
+                        int bigramAddress = get22BitAddress(pos, dict);
+                        pos += 3;
+                        int frequency = (bigramDict.FLAG_BIGRAM_FREQ & dict[pos]);
+                        bigramDict.searchForTerminalNode(bigramAddress, frequency, dict);
+                        nextBigramExist = (dict[pos++] & bigramDict.FLAG_BIGRAM_CONTINUED);
+                    }
+                } else {
+                    pos++;
+                }
+            }
+            if (address != 0) {
+                traverseDict(address, word, depth + 1);
+            }
+        }
+    }
+
+    void showWord(char[] word, int size, int freq) {
+        System.out.print(new String(word, 0, size) + " " + freq + "\n");
+    }
+
+    static int get22BitAddress(int pos, byte[] dict) {
+        return ((dict[pos + 0] & 0x3F) << 16)
+                | ((dict[pos + 1] & 0xFF) << 8)
+                | ((dict[pos + 2] & 0xFF));
+    }
+
+    static boolean getFirstBitOfByte(int pos, byte[] dict) {
+        return (dict[pos] & 0x80) > 0;
+    }
+
+    static boolean getSecondBitOfByte(int pos, byte[] dict) {
+        return (dict[pos] & 0x40) > 0;
+    }
+}