Selections spans should not split surrogate pair.

When committing a span after a revert, the offset logic was such that it
split a surrogate unicode pair used to express an emoji.
Checking the last character of the span lets us avoid this problem.

Fix for bug 19255233.

Change-Id: I07d18d9002b5075f7925319dd05962011656c311
diff --git a/common/src/com/android/inputmethod/latin/common/Constants.java b/common/src/com/android/inputmethod/latin/common/Constants.java
index a860d35..a10f866 100644
--- a/common/src/com/android/inputmethod/latin/common/Constants.java
+++ b/common/src/com/android/inputmethod/latin/common/Constants.java
@@ -163,7 +163,6 @@
     // TODO: replace the following constants with state in InputTransaction?
     public static final int NOT_A_COORDINATE = -1;
     public static final int SUGGESTION_STRIP_COORDINATE = -2;
-    public static final int SPELL_CHECKER_COORDINATE = -3;
     public static final int EXTERNAL_KEYBOARD_COORDINATE = -4;
 
     // A hint on how many characters to cache from the TextView. A good value of this is given by
@@ -214,8 +213,6 @@
     public static final int CODE_DASH = '-';
     public static final int CODE_SINGLE_QUOTE = '\'';
     public static final int CODE_DOUBLE_QUOTE = '"';
-    public static final int CODE_QUESTION_MARK = '?';
-    public static final int CODE_EXCLAMATION_MARK = '!';
     public static final int CODE_SLASH = '/';
     public static final int CODE_BACKSLASH = '\\';
     public static final int CODE_VERTICAL_BAR = '|';
diff --git a/common/src/com/android/inputmethod/latin/common/UnicodeSurrogate.java b/common/src/com/android/inputmethod/latin/common/UnicodeSurrogate.java
new file mode 100644
index 0000000..1097463
--- /dev/null
+++ b/common/src/com/android/inputmethod/latin/common/UnicodeSurrogate.java
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */
+
+package com.android.inputmethod.latin.common;
+
+/**
+ * Emojis are supplementary characters expressed as a low+high pair. For instance,
+ * the emoji U+1F625 is encoded as "\uD83D\uDE25" in UTF-16, where '\uD83D' is in
+ * the range of [0xd800, 0xdbff] and '\uDE25' is in the range of [0xdc00, 0xdfff].
+ * {@see http://docs.oracle.com/javase/6/docs/api/java/lang/Character.html#unicode}
+ */
+public final class UnicodeSurrogate {
+    private static final char LOW_SURROGATE_MIN = '\uD800';
+    private static final char LOW_SURROGATE_MAX = '\uDBFF';
+    private static final char HIGH_SURROGATE_MIN = '\uDC00';
+    private static final char HIGH_SURROGATE_MAX = '\uDFFF';
+
+    public static boolean isLowSurrogate(final char c) {
+        return c >= LOW_SURROGATE_MIN && c <= LOW_SURROGATE_MAX;
+    }
+
+    public static boolean isHighSurrogate(final char c) {
+        return c >= HIGH_SURROGATE_MIN && c <= HIGH_SURROGATE_MAX;
+    }
+}
diff --git a/java/src/com/android/inputmethod/latin/RichInputConnection.java b/java/src/com/android/inputmethod/latin/RichInputConnection.java
index 2cf476f..f770e88 100644
--- a/java/src/com/android/inputmethod/latin/RichInputConnection.java
+++ b/java/src/com/android/inputmethod/latin/RichInputConnection.java
@@ -21,6 +21,7 @@
 import android.os.Bundle;
 import android.text.SpannableStringBuilder;
 import android.text.TextUtils;
+import android.text.style.CharacterStyle;
 import android.util.Log;
 import android.view.KeyEvent;
 import android.view.inputmethod.CompletionInfo;
@@ -32,6 +33,7 @@
 
 import com.android.inputmethod.compat.InputConnectionCompatUtils;
 import com.android.inputmethod.latin.common.Constants;
+import com.android.inputmethod.latin.common.UnicodeSurrogate;
 import com.android.inputmethod.latin.common.StringUtils;
 import com.android.inputmethod.latin.inputlogic.PrivateCommandPerformer;
 import com.android.inputmethod.latin.settings.SpacingAndPunctuations;
@@ -261,7 +263,28 @@
         mComposingText.setLength(0);
         mLastCommittedTextHasBackgroundColor = false;
         if (null != mIC) {
-            mIC.commitText(text, newCursorPosition);
+            mTempObjectForCommitText.clear();
+            mTempObjectForCommitText.append(text);
+            final CharacterStyle[] spans = mTempObjectForCommitText.getSpans(
+                    0, text.length(), CharacterStyle.class);
+            for (final CharacterStyle span : spans) {
+                final int spanStart = mTempObjectForCommitText.getSpanStart(span);
+                final int spanEnd = mTempObjectForCommitText.getSpanEnd(span);
+                final int spanFlags = mTempObjectForCommitText.getSpanFlags(span);
+                // We have to adjust the end of the span to include an additional character.
+                // This is to avoid splitting a unicode surrogate pair.
+                // See com.android.inputmethod.latin.common.Constants.UnicodeSurrogate
+                // See https://b.corp.google.com/issues/19255233
+                if (0 < spanEnd && spanEnd < mTempObjectForCommitText.length()) {
+                    final char spanEndChar = mTempObjectForCommitText.charAt(spanEnd - 1);
+                    final char nextChar = mTempObjectForCommitText.charAt(spanEnd);
+                    if (UnicodeSurrogate.isLowSurrogate(spanEndChar)
+                            && UnicodeSurrogate.isHighSurrogate(nextChar)) {
+                        mTempObjectForCommitText.setSpan(span, spanStart, spanEnd + 1, spanFlags);
+                    }
+                }
+            }
+            mIC.commitText(mTempObjectForCommitText, newCursorPosition);
         }
     }
 
diff --git a/tests/src/com/android/inputmethod/latin/common/UnicodeSurrogateTests.java b/tests/src/com/android/inputmethod/latin/common/UnicodeSurrogateTests.java
new file mode 100644
index 0000000..59bb082
--- /dev/null
+++ b/tests/src/com/android/inputmethod/latin/common/UnicodeSurrogateTests.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.inputmethod.latin.common;
+
+import android.test.AndroidTestCase;
+import android.test.suitebuilder.annotation.SmallTest;
+
+@SmallTest
+public class UnicodeSurrogateTests extends AndroidTestCase {
+
+    public void testIsLowSurrogate() {
+        assertFalse(UnicodeSurrogate.isLowSurrogate('\uD7FF'));
+        assertTrue(UnicodeSurrogate.isLowSurrogate('\uD83D'));
+        assertFalse(UnicodeSurrogate.isLowSurrogate('\uDC00'));
+    }
+
+    public void testIsHighSurrogate() {
+        assertFalse(UnicodeSurrogate.isHighSurrogate('\uDBFF'));
+        assertTrue(UnicodeSurrogate.isHighSurrogate('\uDE25'));
+        assertFalse(UnicodeSurrogate.isHighSurrogate('\uE000'));
+    }
+}