Selections spans should not split surrogate pair.

When committing a span after a revert, the offset logic was such that it
split a surrogate unicode pair used to express an emoji.
Checking the last character of the span lets us avoid this problem.

Fix for bug 19255233.

Change-Id: I07d18d9002b5075f7925319dd05962011656c311
This commit is contained in:
Dan Zivkovic 2015-02-04 16:12:15 -08:00
parent c71e4d6534
commit f3c319fb8a
4 changed files with 98 additions and 4 deletions

View File

@ -163,7 +163,6 @@ public final class Constants {
// TODO: replace the following constants with state in InputTransaction?
public static final int NOT_A_COORDINATE = -1;
public static final int SUGGESTION_STRIP_COORDINATE = -2;
public static final int SPELL_CHECKER_COORDINATE = -3;
public static final int EXTERNAL_KEYBOARD_COORDINATE = -4;
// A hint on how many characters to cache from the TextView. A good value of this is given by
@ -214,8 +213,6 @@ public final class Constants {
public static final int CODE_DASH = '-';
public static final int CODE_SINGLE_QUOTE = '\'';
public static final int CODE_DOUBLE_QUOTE = '"';
public static final int CODE_QUESTION_MARK = '?';
public static final int CODE_EXCLAMATION_MARK = '!';
public static final int CODE_SLASH = '/';
public static final int CODE_BACKSLASH = '\\';
public static final int CODE_VERTICAL_BAR = '|';

View File

@ -0,0 +1,38 @@
/*
* Copyright (C) 2015 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License
*/
package com.android.inputmethod.latin.common;
/**
* Emojis are supplementary characters expressed as a low+high pair. For instance,
* the emoji U+1F625 is encoded as "\uD83D\uDE25" in UTF-16, where '\uD83D' is in
* the range of [0xd800, 0xdbff] and '\uDE25' is in the range of [0xdc00, 0xdfff].
* {@see http://docs.oracle.com/javase/6/docs/api/java/lang/Character.html#unicode}
*/
public final class UnicodeSurrogate {
private static final char LOW_SURROGATE_MIN = '\uD800';
private static final char LOW_SURROGATE_MAX = '\uDBFF';
private static final char HIGH_SURROGATE_MIN = '\uDC00';
private static final char HIGH_SURROGATE_MAX = '\uDFFF';
public static boolean isLowSurrogate(final char c) {
return c >= LOW_SURROGATE_MIN && c <= LOW_SURROGATE_MAX;
}
public static boolean isHighSurrogate(final char c) {
return c >= HIGH_SURROGATE_MIN && c <= HIGH_SURROGATE_MAX;
}
}

View File

@ -21,6 +21,7 @@ import android.os.Build;
import android.os.Bundle;
import android.text.SpannableStringBuilder;
import android.text.TextUtils;
import android.text.style.CharacterStyle;
import android.util.Log;
import android.view.KeyEvent;
import android.view.inputmethod.CompletionInfo;
@ -32,6 +33,7 @@ import android.view.inputmethod.InputMethodManager;
import com.android.inputmethod.compat.InputConnectionCompatUtils;
import com.android.inputmethod.latin.common.Constants;
import com.android.inputmethod.latin.common.UnicodeSurrogate;
import com.android.inputmethod.latin.common.StringUtils;
import com.android.inputmethod.latin.inputlogic.PrivateCommandPerformer;
import com.android.inputmethod.latin.settings.SpacingAndPunctuations;
@ -261,7 +263,28 @@ public final class RichInputConnection implements PrivateCommandPerformer {
mComposingText.setLength(0);
mLastCommittedTextHasBackgroundColor = false;
if (null != mIC) {
mIC.commitText(text, newCursorPosition);
mTempObjectForCommitText.clear();
mTempObjectForCommitText.append(text);
final CharacterStyle[] spans = mTempObjectForCommitText.getSpans(
0, text.length(), CharacterStyle.class);
for (final CharacterStyle span : spans) {
final int spanStart = mTempObjectForCommitText.getSpanStart(span);
final int spanEnd = mTempObjectForCommitText.getSpanEnd(span);
final int spanFlags = mTempObjectForCommitText.getSpanFlags(span);
// We have to adjust the end of the span to include an additional character.
// This is to avoid splitting a unicode surrogate pair.
// See com.android.inputmethod.latin.common.Constants.UnicodeSurrogate
// See https://b.corp.google.com/issues/19255233
if (0 < spanEnd && spanEnd < mTempObjectForCommitText.length()) {
final char spanEndChar = mTempObjectForCommitText.charAt(spanEnd - 1);
final char nextChar = mTempObjectForCommitText.charAt(spanEnd);
if (UnicodeSurrogate.isLowSurrogate(spanEndChar)
&& UnicodeSurrogate.isHighSurrogate(nextChar)) {
mTempObjectForCommitText.setSpan(span, spanStart, spanEnd + 1, spanFlags);
}
}
}
mIC.commitText(mTempObjectForCommitText, newCursorPosition);
}
}

View File

@ -0,0 +1,36 @@
/*
* Copyright (C) 2015 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.android.inputmethod.latin.common;
import android.test.AndroidTestCase;
import android.test.suitebuilder.annotation.SmallTest;
@SmallTest
public class UnicodeSurrogateTests extends AndroidTestCase {
public void testIsLowSurrogate() {
assertFalse(UnicodeSurrogate.isLowSurrogate('\uD7FF'));
assertTrue(UnicodeSurrogate.isLowSurrogate('\uD83D'));
assertFalse(UnicodeSurrogate.isLowSurrogate('\uDC00'));
}
public void testIsHighSurrogate() {
assertFalse(UnicodeSurrogate.isHighSurrogate('\uDBFF'));
assertTrue(UnicodeSurrogate.isHighSurrogate('\uDE25'));
assertFalse(UnicodeSurrogate.isHighSurrogate('\uE000'));
}
}