Use sorted int[] to represent word separators

Change-Id: I4103541d99fe59bfcf12379a1298a0a690497846
diff --git a/java/src/com/android/inputmethod/latin/RichInputConnection.java b/java/src/com/android/inputmethod/latin/RichInputConnection.java
index a5b147a..325a0d9 100644
--- a/java/src/com/android/inputmethod/latin/RichInputConnection.java
+++ b/java/src/com/android/inputmethod/latin/RichInputConnection.java
@@ -35,6 +35,7 @@
 import com.android.inputmethod.latin.utils.TextRange;
 import com.android.inputmethod.research.ResearchLogger;
 
+import java.util.Arrays;
 import java.util.regex.Pattern;
 
 /**
@@ -557,8 +558,8 @@
         return getNthPreviousWord(prev, spacingAndPunctuations, n);
     }
 
-    private static boolean isSeparator(int code, String sep) {
-        return sep.indexOf(code) != -1;
+    private static boolean isSeparator(final int code, final int[] sortedSeparators) {
+        return Arrays.binarySearch(sortedSeparators, code) >= 0;
     }
 
     // Get the nth word before cursor. n = 1 retrieves the word immediately before the cursor,
@@ -597,29 +598,29 @@
     }
 
     /**
-     * @param separators characters which may separate words
+     * @param sortedSeparators a sorted array of code points which may separate words
      * @return the word that surrounds the cursor, including up to one trailing
      *   separator. For example, if the field contains "he|llo world", where |
      *   represents the cursor, then "hello " will be returned.
      */
-    public CharSequence getWordAtCursor(String separators) {
+    public CharSequence getWordAtCursor(final int[] sortedSeparators) {
         // getWordRangeAtCursor returns null if the connection is null
-        TextRange r = getWordRangeAtCursor(separators, 0);
+        final TextRange r = getWordRangeAtCursor(sortedSeparators, 0);
         return (r == null) ? null : r.mWord;
     }
 
     /**
      * Returns the text surrounding the cursor.
      *
-     * @param sep a string of characters that split words.
+     * @param sortedSeparators a sorted array of code points that split words.
      * @param additionalPrecedingWordsCount the number of words before the current word that should
      *   be included in the returned range
      * @return a range containing the text surrounding the cursor
      */
-    public TextRange getWordRangeAtCursor(final String sep,
+    public TextRange getWordRangeAtCursor(final int[] sortedSeparators,
             final int additionalPrecedingWordsCount) {
         mIC = mParent.getCurrentInputConnection();
-        if (mIC == null || sep == null) {
+        if (mIC == null) {
             return null;
         }
         final CharSequence before = mIC.getTextBeforeCursor(Constants.EDITOR_CONTENTS_CACHE_SIZE,
@@ -638,7 +639,7 @@
         while (true) { // see comments below for why this is guaranteed to halt
             while (startIndexInBefore > 0) {
                 final int codePoint = Character.codePointBefore(before, startIndexInBefore);
-                if (isStoppingAtWhitespace == isSeparator(codePoint, sep)) {
+                if (isStoppingAtWhitespace == isSeparator(codePoint, sortedSeparators)) {
                     break;  // inner loop
                 }
                 --startIndexInBefore;
@@ -659,7 +660,7 @@
         int endIndexInAfter = -1;
         while (++endIndexInAfter < after.length()) {
             final int codePoint = Character.codePointAt(after, endIndexInAfter);
-            if (isSeparator(codePoint, sep)) {
+            if (isSeparator(codePoint, sortedSeparators)) {
                 break;
             }
             if (Character.isSupplementaryCodePoint(codePoint)) {
diff --git a/java/src/com/android/inputmethod/latin/inputlogic/InputLogic.java b/java/src/com/android/inputmethod/latin/inputlogic/InputLogic.java
index d290daa..375a42e 100644
--- a/java/src/com/android/inputmethod/latin/inputlogic/InputLogic.java
+++ b/java/src/com/android/inputmethod/latin/inputlogic/InputLogic.java
@@ -970,7 +970,8 @@
             if (TextUtils.isEmpty(selectedText)) return; // Race condition with the input connection
             mRecapitalizeStatus.initialize(mConnection.getExpectedSelectionStart(),
                     mConnection.getExpectedSelectionEnd(), selectedText.toString(),
-                    settingsValues.mLocale, settingsValues.mSpacingAndPunctuations.mWordSeparators);
+                    settingsValues.mLocale,
+                    settingsValues.mSpacingAndPunctuations.mSortedWordSeparators);
             // We trim leading and trailing whitespace.
             mRecapitalizeStatus.trim();
         }
@@ -1073,7 +1074,7 @@
         final int expectedCursorPosition = mConnection.getExpectedSelectionStart();
         if (!mConnection.isCursorTouchingWord(settingsValues.mSpacingAndPunctuations)) return;
         final TextRange range = mConnection.getWordRangeAtCursor(
-                settingsValues.mSpacingAndPunctuations.mWordSeparators,
+                settingsValues.mSpacingAndPunctuations.mSortedWordSeparators,
                 0 /* additionalPrecedingWordsCount */);
         if (null == range) return; // Happens if we don't have an input connection at all
         if (range.length() <= 0) return; // Race condition. No text to resume on, so bail out.
diff --git a/java/src/com/android/inputmethod/latin/settings/Settings.java b/java/src/com/android/inputmethod/latin/settings/Settings.java
index 7db1071..9bf269b 100644
--- a/java/src/com/android/inputmethod/latin/settings/Settings.java
+++ b/java/src/com/android/inputmethod/latin/settings/Settings.java
@@ -181,10 +181,6 @@
         return mSettingsValues.mIsInternal;
     }
 
-    public String getWordSeparators() {
-        return mSettingsValues.mSpacingAndPunctuations.mWordSeparators;
-    }
-
     public boolean isWordSeparator(final int code) {
         return mSettingsValues.isWordSeparator(code);
     }
diff --git a/java/src/com/android/inputmethod/latin/settings/SpacingAndPunctuations.java b/java/src/com/android/inputmethod/latin/settings/SpacingAndPunctuations.java
index 1a6f622..8ba32ff 100644
--- a/java/src/com/android/inputmethod/latin/settings/SpacingAndPunctuations.java
+++ b/java/src/com/android/inputmethod/latin/settings/SpacingAndPunctuations.java
@@ -35,8 +35,8 @@
     private final int[] mSortedSymbolsPrecededBySpace;
     private final int[] mSortedSymbolsFollowedBySpace;
     private final int[] mSortedWordConnectors;
+    public final int[] mSortedWordSeparators;
     public final SuggestedWords mSuggestPuncList;
-    public final String mWordSeparators;
     private final int mSentenceSeparator;
     public final String mSentenceSeparatorAndSpace;
     public final boolean mCurrentLanguageHasSpaces;
@@ -53,10 +53,11 @@
         // To be able to binary search the code point. See {@link #isWordConnector(int)}.
         mSortedWordConnectors = StringUtils.toSortedCodePointArray(
                 res.getString(R.string.symbols_word_connectors));
+        mSortedWordSeparators = StringUtils.toSortedCodePointArray(
+                res.getString(R.string.symbols_word_separators));
         final String[] suggestPuncsSpec = KeySpecParser.splitKeySpecs(res.getString(
                 R.string.suggested_punctuations));
         mSuggestPuncList = createSuggestPuncList(suggestPuncsSpec);
-        mWordSeparators = res.getString(R.string.symbols_word_separators);
         mSentenceSeparator = res.getInteger(R.integer.sentence_separator);
         mSentenceSeparatorAndSpace = new String(new int[] {
                 mSentenceSeparator, Constants.CODE_SPACE }, 0, 2);
@@ -91,7 +92,7 @@
     }
 
     public boolean isWordSeparator(final int code) {
-        return mWordSeparators.contains(String.valueOf((char)code));
+        return Arrays.binarySearch(mSortedWordSeparators, code) >= 0;
     }
 
     public boolean isWordConnector(final int code) {
diff --git a/java/src/com/android/inputmethod/latin/utils/RecapitalizeStatus.java b/java/src/com/android/inputmethod/latin/utils/RecapitalizeStatus.java
index 0f5cd80..4521ec5 100644
--- a/java/src/com/android/inputmethod/latin/utils/RecapitalizeStatus.java
+++ b/java/src/com/android/inputmethod/latin/utils/RecapitalizeStatus.java
@@ -37,12 +37,12 @@
         CAPS_MODE_ALL_UPPER
     };
 
-    private static final int getStringMode(final String string, final String separators) {
+    private static final int getStringMode(final String string, final int[] sortedSeparators) {
         if (StringUtils.isIdenticalAfterUpcase(string)) {
             return CAPS_MODE_ALL_UPPER;
         } else if (StringUtils.isIdenticalAfterDowncase(string)) {
             return CAPS_MODE_ALL_LOWER;
-        } else if (StringUtils.isIdenticalAfterCapitalizeEachWord(string, separators)) {
+        } else if (StringUtils.isIdenticalAfterCapitalizeEachWord(string, sortedSeparators)) {
             return CAPS_MODE_FIRST_WORD_UPPER;
         } else {
             return CAPS_MODE_ORIGINAL_MIXED_CASE;
@@ -60,26 +60,28 @@
     private int mRotationStyleCurrentIndex;
     private boolean mSkipOriginalMixedCaseMode;
     private Locale mLocale;
-    private String mSeparators;
+    private int[] mSortedSeparators;
     private String mStringAfter;
     private boolean mIsActive;
 
+    private static final int[] EMPTY_STORTED_SEPARATORS = {};
+
     public RecapitalizeStatus() {
         // By default, initialize with dummy values that won't match any real recapitalize.
-        initialize(-1, -1, "", Locale.getDefault(), "");
+        initialize(-1, -1, "", Locale.getDefault(), EMPTY_STORTED_SEPARATORS);
         deactivate();
     }
 
     public void initialize(final int cursorStart, final int cursorEnd, final String string,
-            final Locale locale, final String separators) {
+            final Locale locale, final int[] sortedSeparators) {
         mCursorStartBefore = cursorStart;
         mStringBefore = string;
         mCursorStartAfter = cursorStart;
         mCursorEndAfter = cursorEnd;
         mStringAfter = string;
-        final int initialMode = getStringMode(mStringBefore, separators);
+        final int initialMode = getStringMode(mStringBefore, sortedSeparators);
         mLocale = locale;
-        mSeparators = separators;
+        mSortedSeparators = sortedSeparators;
         if (CAPS_MODE_ORIGINAL_MIXED_CASE == initialMode) {
             mRotationStyleCurrentIndex = 0;
             mSkipOriginalMixedCaseMode = false;
@@ -131,7 +133,7 @@
                 mStringAfter = mStringBefore.toLowerCase(mLocale);
                 break;
             case CAPS_MODE_FIRST_WORD_UPPER:
-                mStringAfter = StringUtils.capitalizeEachWord(mStringBefore, mSeparators,
+                mStringAfter = StringUtils.capitalizeEachWord(mStringBefore, mSortedSeparators,
                         mLocale);
                 break;
             case CAPS_MODE_ALL_UPPER:
diff --git a/java/src/com/android/inputmethod/latin/utils/StringUtils.java b/java/src/com/android/inputmethod/latin/utils/StringUtils.java
index 699a0b8..5920c68 100644
--- a/java/src/com/android/inputmethod/latin/utils/StringUtils.java
+++ b/java/src/com/android/inputmethod/latin/utils/StringUtils.java
@@ -272,39 +272,39 @@
     }
 
     public static boolean isIdenticalAfterCapitalizeEachWord(final String text,
-            final String separators) {
-        boolean needCapsNext = true;
+            final int[] sortedSeparators) {
+        boolean needsCapsNext = true;
         final int len = text.length();
         for (int i = 0; i < len; i = text.offsetByCodePoints(i, 1)) {
             final int codePoint = text.codePointAt(i);
             if (Character.isLetter(codePoint)) {
-                if ((needCapsNext && !Character.isUpperCase(codePoint))
-                        || (!needCapsNext && !Character.isLowerCase(codePoint))) {
+                if ((needsCapsNext && !Character.isUpperCase(codePoint))
+                        || (!needsCapsNext && !Character.isLowerCase(codePoint))) {
                     return false;
                 }
             }
             // We need a capital letter next if this is a separator.
-            needCapsNext = (-1 != separators.indexOf(codePoint));
+            needsCapsNext = (Arrays.binarySearch(sortedSeparators, codePoint) >= 0);
         }
         return true;
     }
 
     // TODO: like capitalizeFirst*, this does not work perfectly for Dutch because of the IJ digraph
     // which should be capitalized together in *some* cases.
-    public static String capitalizeEachWord(final String text, final String separators,
+    public static String capitalizeEachWord(final String text, final int[] sortedSeparators,
             final Locale locale) {
         final StringBuilder builder = new StringBuilder();
-        boolean needCapsNext = true;
+        boolean needsCapsNext = true;
         final int len = text.length();
         for (int i = 0; i < len; i = text.offsetByCodePoints(i, 1)) {
             final String nextChar = text.substring(i, text.offsetByCodePoints(i, 1));
-            if (needCapsNext) {
+            if (needsCapsNext) {
                 builder.append(nextChar.toUpperCase(locale));
             } else {
                 builder.append(nextChar.toLowerCase(locale));
             }
             // We need a capital letter next if this is a separator.
-            needCapsNext = (-1 != separators.indexOf(nextChar.codePointAt(0)));
+            needsCapsNext = (Arrays.binarySearch(sortedSeparators, nextChar.codePointAt(0)) >= 0);
         }
         return builder.toString();
     }
diff --git a/java/src/com/android/inputmethod/research/ResearchLogger.java b/java/src/com/android/inputmethod/research/ResearchLogger.java
index e7f49a6..11fb3a1 100644
--- a/java/src/com/android/inputmethod/research/ResearchLogger.java
+++ b/java/src/com/android/inputmethod/research/ResearchLogger.java
@@ -59,6 +59,7 @@
 import com.android.inputmethod.latin.SuggestedWords;
 import com.android.inputmethod.latin.define.ProductionFlag;
 import com.android.inputmethod.latin.utils.InputTypeUtils;
+import com.android.inputmethod.latin.utils.StringUtils;
 import com.android.inputmethod.latin.utils.TextRange;
 import com.android.inputmethod.research.MotionEventReader.ReplayData;
 import com.android.inputmethod.research.ui.SplashScreen;
@@ -131,7 +132,8 @@
     public static final String RESEARCH_KEY_OUTPUT_TEXT = ".research.";
 
     // constants related to specific log points
-    private static final String WHITESPACE_SEPARATORS = " \t\n\r";
+    private static final int[] WHITESPACE_SEPARATORS =
+            StringUtils.toSortedCodePointArray(" \t\n\r");
     private static final int MAX_INPUTVIEW_LENGTH_TO_CAPTURE = 8192; // must be >=1
     private static final String PREF_RESEARCH_SAVED_CHANNEL = "pref_research_saved_channel";
 
diff --git a/tests/src/com/android/inputmethod/latin/RichInputConnectionAndTextRangeTests.java b/tests/src/com/android/inputmethod/latin/RichInputConnectionAndTextRangeTests.java
index f19d185..7f07435 100644
--- a/tests/src/com/android/inputmethod/latin/RichInputConnectionAndTextRangeTests.java
+++ b/tests/src/com/android/inputmethod/latin/RichInputConnectionAndTextRangeTests.java
@@ -32,6 +32,7 @@
 
 import com.android.inputmethod.latin.settings.SpacingAndPunctuations;
 import com.android.inputmethod.latin.utils.RunInLocale;
+import com.android.inputmethod.latin.utils.StringUtils;
 import com.android.inputmethod.latin.utils.TextRange;
 
 import java.util.Locale;
@@ -183,6 +184,12 @@
     /**
      * Test logic in getting the word range at the cursor.
      */
+    private static final int[] SPACE = { Constants.CODE_SPACE };
+    static final int[] TAB = { Constants.CODE_TAB };
+    private static final int[] SPACE_TAB = StringUtils.toSortedCodePointArray(" \t");
+    // A character that needs surrogate pair to represent its code point (U+2008A).
+    private static final String SUPPLEMENTARY_CHAR = "\uD840\uDC8A";
+
     public void testGetWordRangeAtCursor() {
         ExtractedText et = new ExtractedText();
         final MockInputMethodService mockInputMethodService = new MockInputMethodService();
@@ -194,48 +201,47 @@
 
         ic.beginBatchEdit();
         // basic case
-        r = ic.getWordRangeAtCursor(" ", 0);
+        r = ic.getWordRangeAtCursor(SPACE, 0);
         assertTrue(TextUtils.equals("word", r.mWord));
 
         // more than one word
-        r = ic.getWordRangeAtCursor(" ", 1);
+        r = ic.getWordRangeAtCursor(SPACE, 1);
         assertTrue(TextUtils.equals("word word", r.mWord));
         ic.endBatchEdit();
 
         // tab character instead of space
         mockInputMethodService.setInputConnection(new MockConnection("one\tword\two", "rd", et));
         ic.beginBatchEdit();
-        r = ic.getWordRangeAtCursor("\t", 1);
+        r = ic.getWordRangeAtCursor(TAB, 1);
         ic.endBatchEdit();
         assertTrue(TextUtils.equals("word\tword", r.mWord));
 
         // only one word doesn't go too far
         mockInputMethodService.setInputConnection(new MockConnection("one\tword\two", "rd", et));
         ic.beginBatchEdit();
-        r = ic.getWordRangeAtCursor("\t", 1);
+        r = ic.getWordRangeAtCursor(TAB, 1);
         ic.endBatchEdit();
         assertTrue(TextUtils.equals("word\tword", r.mWord));
 
         // tab or space
         mockInputMethodService.setInputConnection(new MockConnection("one word\two", "rd", et));
         ic.beginBatchEdit();
-        r = ic.getWordRangeAtCursor(" \t", 1);
+        r = ic.getWordRangeAtCursor(SPACE_TAB, 1);
         ic.endBatchEdit();
         assertTrue(TextUtils.equals("word\tword", r.mWord));
 
         // tab or space multiword
         mockInputMethodService.setInputConnection(new MockConnection("one word\two", "rd", et));
         ic.beginBatchEdit();
-        r = ic.getWordRangeAtCursor(" \t", 2);
+        r = ic.getWordRangeAtCursor(SPACE_TAB, 2);
         ic.endBatchEdit();
         assertTrue(TextUtils.equals("one word\tword", r.mWord));
 
         // splitting on supplementary character
-        final String supplementaryChar = "\uD840\uDC8A";
         mockInputMethodService.setInputConnection(
-                new MockConnection("one word" + supplementaryChar + "wo", "rd", et));
+                new MockConnection("one word" + SUPPLEMENTARY_CHAR + "wo", "rd", et));
         ic.beginBatchEdit();
-        r = ic.getWordRangeAtCursor(supplementaryChar, 0);
+        r = ic.getWordRangeAtCursor(StringUtils.toSortedCodePointArray(SUPPLEMENTARY_CHAR), 0);
         ic.endBatchEdit();
         assertTrue(TextUtils.equals("word", r.mWord));
     }
@@ -265,7 +271,7 @@
         TextRange r;
         SuggestionSpan[] suggestions;
 
-        r = ic.getWordRangeAtCursor(" ", 0);
+        r = ic.getWordRangeAtCursor(SPACE, 0);
         suggestions = r.getSuggestionSpansAtWord();
         assertEquals(suggestions.length, 1);
         MoreAsserts.assertEquals(suggestions[0].getSuggestions(), SUGGESTIONS1);
@@ -277,7 +283,7 @@
         text.setSpan(new SuggestionSpan(Locale.ENGLISH, SUGGESTIONS2, 0 /* flags */),
                 10 /* start */, 16 /* end */, 0 /* flags */);
         mockInputMethodService.setInputConnection(new MockConnection(text, cursorPos));
-        r = ic.getWordRangeAtCursor(" ", 0);
+        r = ic.getWordRangeAtCursor(SPACE, 0);
         suggestions = r.getSuggestionSpansAtWord();
         assertEquals(suggestions.length, 2);
         MoreAsserts.assertEquals(suggestions[0].getSuggestions(), SUGGESTIONS1);
@@ -290,7 +296,7 @@
         text.setSpan(new SuggestionSpan(Locale.ENGLISH, SUGGESTIONS2, 0 /* flags */),
                 5 /* start */, 16 /* end */, 0 /* flags */);
         mockInputMethodService.setInputConnection(new MockConnection(text, cursorPos));
-        r = ic.getWordRangeAtCursor(" ", 0);
+        r = ic.getWordRangeAtCursor(SPACE, 0);
         suggestions = r.getSuggestionSpansAtWord();
         assertEquals(suggestions.length, 1);
         MoreAsserts.assertEquals(suggestions[0].getSuggestions(), SUGGESTIONS1);
@@ -302,7 +308,7 @@
         text.setSpan(new SuggestionSpan(Locale.ENGLISH, SUGGESTIONS2, 0 /* flags */),
                 10 /* start */, 20 /* end */, 0 /* flags */);
         mockInputMethodService.setInputConnection(new MockConnection(text, cursorPos));
-        r = ic.getWordRangeAtCursor(" ", 0);
+        r = ic.getWordRangeAtCursor(SPACE, 0);
         suggestions = r.getSuggestionSpansAtWord();
         assertEquals(suggestions.length, 1);
         MoreAsserts.assertEquals(suggestions[0].getSuggestions(), SUGGESTIONS1);
@@ -314,7 +320,7 @@
         text.setSpan(new SuggestionSpan(Locale.ENGLISH, SUGGESTIONS2, 0 /* flags */),
                 5 /* start */, 20 /* end */, 0 /* flags */);
         mockInputMethodService.setInputConnection(new MockConnection(text, cursorPos));
-        r = ic.getWordRangeAtCursor(" ", 0);
+        r = ic.getWordRangeAtCursor(SPACE, 0);
         suggestions = r.getSuggestionSpansAtWord();
         assertEquals(suggestions.length, 1);
         MoreAsserts.assertEquals(suggestions[0].getSuggestions(), SUGGESTIONS1);
@@ -326,7 +332,7 @@
         text.setSpan(new SuggestionSpan(Locale.ENGLISH, SUGGESTIONS2, 0 /* flags */),
                 5 /* start */, 20 /* end */, 0 /* flags */);
         mockInputMethodService.setInputConnection(new MockConnection(text, cursorPos));
-        r = ic.getWordRangeAtCursor(" ", 0);
+        r = ic.getWordRangeAtCursor(SPACE, 0);
         suggestions = r.getSuggestionSpansAtWord();
         assertEquals(suggestions.length, 0);
     }
diff --git a/tests/src/com/android/inputmethod/latin/utils/RecapitalizeStatusTests.java b/tests/src/com/android/inputmethod/latin/utils/RecapitalizeStatusTests.java
index a520412..ada80c3 100644
--- a/tests/src/com/android/inputmethod/latin/utils/RecapitalizeStatusTests.java
+++ b/tests/src/com/android/inputmethod/latin/utils/RecapitalizeStatusTests.java
@@ -19,31 +19,35 @@
 import android.test.AndroidTestCase;
 import android.test.suitebuilder.annotation.SmallTest;
 
+import com.android.inputmethod.latin.Constants;
+
 import java.util.Locale;
 
 @SmallTest
 public class RecapitalizeStatusTests extends AndroidTestCase {
+    private static final int[] SPACE = { Constants.CODE_SPACE };
+
     public void testTrim() {
         final RecapitalizeStatus status = new RecapitalizeStatus();
-        status.initialize(30, 40, "abcdefghij", Locale.ENGLISH, " ");
+        status.initialize(30, 40, "abcdefghij", Locale.ENGLISH, SPACE);
         status.trim();
         assertEquals("abcdefghij", status.getRecapitalizedString());
         assertEquals(30, status.getNewCursorStart());
         assertEquals(40, status.getNewCursorEnd());
 
-        status.initialize(30, 44, "    abcdefghij", Locale.ENGLISH, " ");
+        status.initialize(30, 44, "    abcdefghij", Locale.ENGLISH, SPACE);
         status.trim();
         assertEquals("abcdefghij", status.getRecapitalizedString());
         assertEquals(34, status.getNewCursorStart());
         assertEquals(44, status.getNewCursorEnd());
 
-        status.initialize(30, 40, "abcdefgh  ", Locale.ENGLISH, " ");
+        status.initialize(30, 40, "abcdefgh  ", Locale.ENGLISH, SPACE);
         status.trim();
         assertEquals("abcdefgh", status.getRecapitalizedString());
         assertEquals(30, status.getNewCursorStart());
         assertEquals(38, status.getNewCursorEnd());
 
-        status.initialize(30, 45, "   abcdefghij  ", Locale.ENGLISH, " ");
+        status.initialize(30, 45, "   abcdefghij  ", Locale.ENGLISH, SPACE);
         status.trim();
         assertEquals("abcdefghij", status.getRecapitalizedString());
         assertEquals(33, status.getNewCursorStart());
@@ -52,7 +56,7 @@
 
     public void testRotate() {
         final RecapitalizeStatus status = new RecapitalizeStatus();
-        status.initialize(29, 40, "abcd efghij", Locale.ENGLISH, " ");
+        status.initialize(29, 40, "abcd efghij", Locale.ENGLISH, SPACE);
         status.rotate();
         assertEquals("Abcd Efghij", status.getRecapitalizedString());
         assertEquals(29, status.getNewCursorStart());
@@ -64,7 +68,7 @@
         status.rotate();
         assertEquals("Abcd Efghij", status.getRecapitalizedString());
 
-        status.initialize(29, 40, "Abcd Efghij", Locale.ENGLISH, " ");
+        status.initialize(29, 40, "Abcd Efghij", Locale.ENGLISH, SPACE);
         status.rotate();
         assertEquals("ABCD EFGHIJ", status.getRecapitalizedString());
         assertEquals(29, status.getNewCursorStart());
@@ -76,7 +80,7 @@
         status.rotate();
         assertEquals("ABCD EFGHIJ", status.getRecapitalizedString());
 
-        status.initialize(29, 40, "ABCD EFGHIJ", Locale.ENGLISH, " ");
+        status.initialize(29, 40, "ABCD EFGHIJ", Locale.ENGLISH, SPACE);
         status.rotate();
         assertEquals("abcd efghij", status.getRecapitalizedString());
         assertEquals(29, status.getNewCursorStart());
@@ -88,7 +92,7 @@
         status.rotate();
         assertEquals("abcd efghij", status.getRecapitalizedString());
 
-        status.initialize(29, 39, "AbCDefghij", Locale.ENGLISH, " ");
+        status.initialize(29, 39, "AbCDefghij", Locale.ENGLISH, SPACE);
         status.rotate();
         assertEquals("abcdefghij", status.getRecapitalizedString());
         assertEquals(29, status.getNewCursorStart());
@@ -102,7 +106,7 @@
         status.rotate();
         assertEquals("abcdefghij", status.getRecapitalizedString());
 
-        status.initialize(29, 40, "Abcd efghij", Locale.ENGLISH, " ");
+        status.initialize(29, 40, "Abcd efghij", Locale.ENGLISH, SPACE);
         status.rotate();
         assertEquals("abcd efghij", status.getRecapitalizedString());
         assertEquals(29, status.getNewCursorStart());
@@ -116,7 +120,8 @@
         status.rotate();
         assertEquals("abcd efghij", status.getRecapitalizedString());
 
-        status.initialize(30, 34, "grüß", Locale.GERMAN, " "); status.rotate();
+        status.initialize(30, 34, "grüß", Locale.GERMAN, SPACE);
+        status.rotate();
         assertEquals("Grüß", status.getRecapitalizedString());
         assertEquals(30, status.getNewCursorStart());
         assertEquals(34, status.getNewCursorEnd());
@@ -133,7 +138,8 @@
         assertEquals(30, status.getNewCursorStart());
         assertEquals(34, status.getNewCursorEnd());
 
-        status.initialize(30, 33, "œuf", Locale.FRENCH, " "); status.rotate();
+        status.initialize(30, 33, "œuf", Locale.FRENCH, SPACE);
+        status.rotate();
         assertEquals("Œuf", status.getRecapitalizedString());
         assertEquals(30, status.getNewCursorStart());
         assertEquals(33, status.getNewCursorEnd());
@@ -150,7 +156,8 @@
         assertEquals(30, status.getNewCursorStart());
         assertEquals(33, status.getNewCursorEnd());
 
-        status.initialize(30, 33, "œUf", Locale.FRENCH, " "); status.rotate();
+        status.initialize(30, 33, "œUf", Locale.FRENCH, SPACE);
+        status.rotate();
         assertEquals("œuf", status.getRecapitalizedString());
         assertEquals(30, status.getNewCursorStart());
         assertEquals(33, status.getNewCursorEnd());
@@ -171,7 +178,8 @@
         assertEquals(30, status.getNewCursorStart());
         assertEquals(33, status.getNewCursorEnd());
 
-        status.initialize(30, 35, "école", Locale.FRENCH, " "); status.rotate();
+        status.initialize(30, 35, "école", Locale.FRENCH, SPACE);
+        status.rotate();
         assertEquals("École", status.getRecapitalizedString());
         assertEquals(30, status.getNewCursorStart());
         assertEquals(35, status.getNewCursorEnd());
diff --git a/tests/src/com/android/inputmethod/latin/utils/StringAndJsonUtilsTests.java b/tests/src/com/android/inputmethod/latin/utils/StringAndJsonUtilsTests.java
index 7196612..e55c32b 100644
--- a/tests/src/com/android/inputmethod/latin/utils/StringAndJsonUtilsTests.java
+++ b/tests/src/com/android/inputmethod/latin/utils/StringAndJsonUtilsTests.java
@@ -19,6 +19,8 @@
 import android.test.AndroidTestCase;
 import android.test.suitebuilder.annotation.SmallTest;
 
+import com.android.inputmethod.latin.Constants;
+
 import java.util.Arrays;
 import java.util.List;
 import java.util.Locale;
@@ -206,42 +208,47 @@
         assertTrue(StringUtils.isIdenticalAfterDowncase(""));
     }
 
-    private static void checkCapitalize(final String src, final String dst, final String separators,
-            final Locale locale) {
-        assertEquals(dst, StringUtils.capitalizeEachWord(src, separators, locale));
+    private static void checkCapitalize(final String src, final String dst,
+            final int[] sortedSeparators, final Locale locale) {
+        assertEquals(dst, StringUtils.capitalizeEachWord(src, sortedSeparators, locale));
         assert(src.equals(dst)
-                == StringUtils.isIdenticalAfterCapitalizeEachWord(src, separators));
+                == StringUtils.isIdenticalAfterCapitalizeEachWord(src, sortedSeparators));
     }
 
+    private static final int[] SPACE = { Constants.CODE_SPACE };
+    private static final int[] SPACE_PERIOD = StringUtils.toSortedCodePointArray(" .");
+    private static final int[] SENTENCE_SEPARATORS =
+            StringUtils.toSortedCodePointArray(" \n.!?*()&");
+    private static final int[] WORD_SEPARATORS = StringUtils.toSortedCodePointArray(" \n.!?*,();&");
+
     public void testCapitalizeEachWord() {
-        checkCapitalize("", "", " ", Locale.ENGLISH);
-        checkCapitalize("test", "Test", " ", Locale.ENGLISH);
-        checkCapitalize("    test", "    Test", " ", Locale.ENGLISH);
-        checkCapitalize("Test", "Test", " ", Locale.ENGLISH);
-        checkCapitalize("    Test", "    Test", " ", Locale.ENGLISH);
-        checkCapitalize(".Test", ".test", " ", Locale.ENGLISH);
-        checkCapitalize(".Test", ".Test", " .", Locale.ENGLISH);
-        checkCapitalize(".Test", ".Test", ". ", Locale.ENGLISH);
-        checkCapitalize("test and retest", "Test And Retest", " .", Locale.ENGLISH);
-        checkCapitalize("Test and retest", "Test And Retest", " .", Locale.ENGLISH);
-        checkCapitalize("Test And Retest", "Test And Retest", " .", Locale.ENGLISH);
-        checkCapitalize("Test And.Retest  ", "Test And.Retest  ", " .", Locale.ENGLISH);
-        checkCapitalize("Test And.retest  ", "Test And.Retest  ", " .", Locale.ENGLISH);
-        checkCapitalize("Test And.retest  ", "Test And.retest  ", " ", Locale.ENGLISH);
-        checkCapitalize("Test And.Retest  ", "Test And.retest  ", " ", Locale.ENGLISH);
-        checkCapitalize("test and ietest", "Test And İetest", " .", new Locale("tr"));
-        checkCapitalize("test and ietest", "Test And Ietest", " .", Locale.ENGLISH);
-        checkCapitalize("Test&Retest", "Test&Retest", " \n.!?*()&", Locale.ENGLISH);
-        checkCapitalize("Test&retest", "Test&Retest", " \n.!?*()&", Locale.ENGLISH);
-        checkCapitalize("test&Retest", "Test&Retest", " \n.!?*()&", Locale.ENGLISH);
+        checkCapitalize("", "", SPACE, Locale.ENGLISH);
+        checkCapitalize("test", "Test", SPACE, Locale.ENGLISH);
+        checkCapitalize("    test", "    Test", SPACE, Locale.ENGLISH);
+        checkCapitalize("Test", "Test", SPACE, Locale.ENGLISH);
+        checkCapitalize("    Test", "    Test", SPACE, Locale.ENGLISH);
+        checkCapitalize(".Test", ".test", SPACE, Locale.ENGLISH);
+        checkCapitalize(".Test", ".Test", SPACE_PERIOD, Locale.ENGLISH);
+        checkCapitalize("test and retest", "Test And Retest", SPACE_PERIOD, Locale.ENGLISH);
+        checkCapitalize("Test and retest", "Test And Retest", SPACE_PERIOD, Locale.ENGLISH);
+        checkCapitalize("Test And Retest", "Test And Retest", SPACE_PERIOD, Locale.ENGLISH);
+        checkCapitalize("Test And.Retest  ", "Test And.Retest  ", SPACE_PERIOD, Locale.ENGLISH);
+        checkCapitalize("Test And.retest  ", "Test And.Retest  ", SPACE_PERIOD, Locale.ENGLISH);
+        checkCapitalize("Test And.retest  ", "Test And.retest  ", SPACE, Locale.ENGLISH);
+        checkCapitalize("Test And.Retest  ", "Test And.retest  ", SPACE, Locale.ENGLISH);
+        checkCapitalize("test and ietest", "Test And İetest", SPACE_PERIOD, new Locale("tr"));
+        checkCapitalize("test and ietest", "Test And Ietest", SPACE_PERIOD, Locale.ENGLISH);
+        checkCapitalize("Test&Retest", "Test&Retest", SENTENCE_SEPARATORS, Locale.ENGLISH);
+        checkCapitalize("Test&retest", "Test&Retest", SENTENCE_SEPARATORS, Locale.ENGLISH);
+        checkCapitalize("test&Retest", "Test&Retest", SENTENCE_SEPARATORS, Locale.ENGLISH);
         checkCapitalize("rest\nrecreation! And in the end...",
-                "Rest\nRecreation! And In The End...", " \n.!?*,();&", Locale.ENGLISH);
+                "Rest\nRecreation! And In The End...", WORD_SEPARATORS, Locale.ENGLISH);
         checkCapitalize("lorem ipsum dolor sit amet", "Lorem Ipsum Dolor Sit Amet",
-                " \n.,!?*()&;", Locale.ENGLISH);
+                WORD_SEPARATORS, Locale.ENGLISH);
         checkCapitalize("Lorem!Ipsum (Dolor) Sit * Amet", "Lorem!Ipsum (Dolor) Sit * Amet",
-                " \n,.;!?*()&", Locale.ENGLISH);
+                WORD_SEPARATORS, Locale.ENGLISH);
         checkCapitalize("Lorem!Ipsum (dolor) Sit * Amet", "Lorem!Ipsum (Dolor) Sit * Amet",
-                " \n,.;!?*()&", Locale.ENGLISH);
+                WORD_SEPARATORS, Locale.ENGLISH);
     }
 
     public void testLooksLikeURL() {