mirror of
https://gitlab.futo.org/keyboard/latinime.git
synced 2024-09-28 14:54:30 +01:00
We want to let the facilitator decide if a word is valid or invalid, and cache the answer in the facilitator's cache. The spell checker session doesn't need its own word cache, except as a crutch to communicate suggestions to the code that populates the suggestion drop-down. We leave that in place. Bug 20018546. Change-Id: I3c3c53e0c1d709fa2f64a2952a232acd7380b57a
391 lines
18 KiB
Java
391 lines
18 KiB
Java
/*
|
|
* Copyright (C) 2012 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
package com.android.inputmethod.latin.spellcheck;
|
|
|
|
import android.content.ContentResolver;
|
|
import android.database.ContentObserver;
|
|
import android.os.Binder;
|
|
import android.provider.UserDictionary.Words;
|
|
import android.service.textservice.SpellCheckerService.Session;
|
|
import android.text.TextUtils;
|
|
import android.util.Log;
|
|
import android.util.LruCache;
|
|
import android.view.textservice.SuggestionsInfo;
|
|
import android.view.textservice.TextInfo;
|
|
|
|
import com.android.inputmethod.compat.SuggestionsInfoCompatUtils;
|
|
import com.android.inputmethod.keyboard.Keyboard;
|
|
import com.android.inputmethod.latin.NgramContext;
|
|
import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
|
|
import com.android.inputmethod.latin.WordComposer;
|
|
import com.android.inputmethod.latin.common.Constants;
|
|
import com.android.inputmethod.latin.common.LocaleUtils;
|
|
import com.android.inputmethod.latin.common.StringUtils;
|
|
import com.android.inputmethod.latin.define.DebugFlags;
|
|
import com.android.inputmethod.latin.utils.BinaryDictionaryUtils;
|
|
import com.android.inputmethod.latin.utils.ScriptUtils;
|
|
import com.android.inputmethod.latin.utils.StatsUtils;
|
|
import com.android.inputmethod.latin.utils.SuggestionResults;
|
|
|
|
import java.util.ArrayList;
|
|
import java.util.List;
|
|
import java.util.Locale;
|
|
|
|
public abstract class AndroidWordLevelSpellCheckerSession extends Session {
|
|
private static final String TAG = AndroidWordLevelSpellCheckerSession.class.getSimpleName();
|
|
|
|
public final static String[] EMPTY_STRING_ARRAY = new String[0];
|
|
|
|
// Immutable, but not available in the constructor.
|
|
private Locale mLocale;
|
|
// Cache this for performance
|
|
private int mScript; // One of SCRIPT_LATIN or SCRIPT_CYRILLIC for now.
|
|
private final AndroidSpellCheckerService mService;
|
|
protected final SuggestionsCache mSuggestionsCache = new SuggestionsCache();
|
|
private final ContentObserver mObserver;
|
|
|
|
private static final String quotesRegexp =
|
|
"(\\u0022|\\u0027|\\u0060|\\u00B4|\\u2018|\\u2018|\\u201C|\\u201D)";
|
|
|
|
private static final class SuggestionsParams {
|
|
public final String[] mSuggestions;
|
|
public final int mFlags;
|
|
public SuggestionsParams(String[] suggestions, int flags) {
|
|
mSuggestions = suggestions;
|
|
mFlags = flags;
|
|
}
|
|
}
|
|
|
|
protected static final class SuggestionsCache {
|
|
private static final int MAX_CACHE_SIZE = 50;
|
|
private final LruCache<String, SuggestionsParams> mUnigramSuggestionsInfoCache =
|
|
new LruCache<>(MAX_CACHE_SIZE);
|
|
|
|
private static String generateKey(final String query) {
|
|
return query + "";
|
|
}
|
|
|
|
public SuggestionsParams getSuggestionsFromCache(final String query) {
|
|
return mUnigramSuggestionsInfoCache.get(query);
|
|
}
|
|
|
|
public void putSuggestionsToCache(
|
|
final String query, final String[] suggestions, final int flags) {
|
|
if (suggestions == null || TextUtils.isEmpty(query)) {
|
|
return;
|
|
}
|
|
mUnigramSuggestionsInfoCache.put(
|
|
generateKey(query),
|
|
new SuggestionsParams(suggestions, flags));
|
|
}
|
|
|
|
public void clearCache() {
|
|
mUnigramSuggestionsInfoCache.evictAll();
|
|
}
|
|
}
|
|
|
|
AndroidWordLevelSpellCheckerSession(final AndroidSpellCheckerService service) {
|
|
mService = service;
|
|
final ContentResolver cres = service.getContentResolver();
|
|
|
|
mObserver = new ContentObserver(null) {
|
|
@Override
|
|
public void onChange(boolean self) {
|
|
mSuggestionsCache.clearCache();
|
|
}
|
|
};
|
|
cres.registerContentObserver(Words.CONTENT_URI, true, mObserver);
|
|
}
|
|
|
|
@Override
|
|
public void onCreate() {
|
|
final String localeString = getLocale();
|
|
mLocale = (null == localeString) ? null
|
|
: LocaleUtils.constructLocaleFromString(localeString);
|
|
mScript = ScriptUtils.getScriptFromSpellCheckerLocale(mLocale);
|
|
}
|
|
|
|
@Override
|
|
public void onClose() {
|
|
final ContentResolver cres = mService.getContentResolver();
|
|
cres.unregisterContentObserver(mObserver);
|
|
}
|
|
|
|
private static final int CHECKABILITY_CHECKABLE = 0;
|
|
private static final int CHECKABILITY_TOO_MANY_NON_LETTERS = 1;
|
|
private static final int CHECKABILITY_CONTAINS_PERIOD = 2;
|
|
private static final int CHECKABILITY_EMAIL_OR_URL = 3;
|
|
private static final int CHECKABILITY_FIRST_LETTER_UNCHECKABLE = 4;
|
|
private static final int CHECKABILITY_TOO_SHORT = 5;
|
|
/**
|
|
* Finds out whether a particular string should be filtered out of spell checking.
|
|
*
|
|
* This will loosely match URLs, numbers, symbols. To avoid always underlining words that
|
|
* we know we will never recognize, this accepts a script identifier that should be one
|
|
* of the SCRIPT_* constants defined above, to rule out quickly characters from very
|
|
* different languages.
|
|
*
|
|
* @param text the string to evaluate.
|
|
* @param script the identifier for the script this spell checker recognizes
|
|
* @return one of the FILTER_OUT_* constants above.
|
|
*/
|
|
private static int getCheckabilityInScript(final String text, final int script) {
|
|
if (TextUtils.isEmpty(text) || text.length() <= 1) return CHECKABILITY_TOO_SHORT;
|
|
|
|
// TODO: check if an equivalent processing can't be done more quickly with a
|
|
// compiled regexp.
|
|
// Filter by first letter
|
|
final int firstCodePoint = text.codePointAt(0);
|
|
// Filter out words that don't start with a letter or an apostrophe
|
|
if (!ScriptUtils.isLetterPartOfScript(firstCodePoint, script)
|
|
&& '\'' != firstCodePoint) return CHECKABILITY_FIRST_LETTER_UNCHECKABLE;
|
|
|
|
// Filter contents
|
|
final int length = text.length();
|
|
int letterCount = 0;
|
|
for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) {
|
|
final int codePoint = text.codePointAt(i);
|
|
// Any word containing a COMMERCIAL_AT is probably an e-mail address
|
|
// Any word containing a SLASH is probably either an ad-hoc combination of two
|
|
// words or a URI - in either case we don't want to spell check that
|
|
if (Constants.CODE_COMMERCIAL_AT == codePoint || Constants.CODE_SLASH == codePoint) {
|
|
return CHECKABILITY_EMAIL_OR_URL;
|
|
}
|
|
// If the string contains a period, native returns strange suggestions (it seems
|
|
// to return suggestions for everything up to the period only and to ignore the
|
|
// rest), so we suppress lookup if there is a period.
|
|
// TODO: investigate why native returns these suggestions and remove this code.
|
|
if (Constants.CODE_PERIOD == codePoint) {
|
|
return CHECKABILITY_CONTAINS_PERIOD;
|
|
}
|
|
if (ScriptUtils.isLetterPartOfScript(codePoint, script)) ++letterCount;
|
|
}
|
|
// Guestimate heuristic: perform spell checking if at least 3/4 of the characters
|
|
// in this word are letters
|
|
return (letterCount * 4 < length * 3)
|
|
? CHECKABILITY_TOO_MANY_NON_LETTERS : CHECKABILITY_CHECKABLE;
|
|
}
|
|
|
|
/**
|
|
* Helper method to test valid capitalizations of a word.
|
|
*
|
|
* If the "text" is lower-case, we test only the exact string.
|
|
* If the "Text" is capitalized, we test the exact string "Text" and the lower-cased
|
|
* version of it "text".
|
|
* If the "TEXT" is fully upper case, we test the exact string "TEXT", the lower-cased
|
|
* version of it "text" and the capitalized version of it "Text".
|
|
*/
|
|
private boolean isInDictForAnyCapitalization(final String text, final int capitalizeType) {
|
|
// If the word is in there as is, then it's in the dictionary. If not, we'll test lower
|
|
// case versions, but only if the word is not already all-lower case or mixed case.
|
|
if (mService.isValidWord(mLocale, text)) return true;
|
|
if (StringUtils.CAPITALIZE_NONE == capitalizeType) return false;
|
|
|
|
// If we come here, we have a capitalized word (either First- or All-).
|
|
// Downcase the word and look it up again. If the word is only capitalized, we
|
|
// tested all possibilities, so if it's still negative we can return false.
|
|
final String lowerCaseText = text.toLowerCase(mLocale);
|
|
if (mService.isValidWord(mLocale, lowerCaseText)) return true;
|
|
if (StringUtils.CAPITALIZE_FIRST == capitalizeType) return false;
|
|
|
|
// If the lower case version is not in the dictionary, it's still possible
|
|
// that we have an all-caps version of a word that needs to be capitalized
|
|
// according to the dictionary. E.g. "GERMANS" only exists in the dictionary as "Germans".
|
|
return mService.isValidWord(mLocale,
|
|
StringUtils.capitalizeFirstAndDowncaseRest(lowerCaseText, mLocale));
|
|
}
|
|
|
|
// Note : this must be reentrant
|
|
/**
|
|
* Gets a list of suggestions for a specific string. This returns a list of possible
|
|
* corrections for the text passed as an argument. It may split or group words, and
|
|
* even perform grammatical analysis.
|
|
*/
|
|
private SuggestionsInfo onGetSuggestionsInternal(final TextInfo textInfo,
|
|
final int suggestionsLimit) {
|
|
return onGetSuggestionsInternal(textInfo, null, suggestionsLimit);
|
|
}
|
|
|
|
protected SuggestionsInfo onGetSuggestionsInternal(
|
|
final TextInfo textInfo, final NgramContext ngramContext, final int suggestionsLimit) {
|
|
try {
|
|
final String text = textInfo.getText().
|
|
replaceAll(AndroidSpellCheckerService.APOSTROPHE,
|
|
AndroidSpellCheckerService.SINGLE_QUOTE).
|
|
replaceAll("^" + quotesRegexp, "").
|
|
replaceAll(quotesRegexp + "$", "");
|
|
|
|
if (!mService.hasMainDictionaryForLocale(mLocale)) {
|
|
return AndroidSpellCheckerService.getNotInDictEmptySuggestions(
|
|
false /* reportAsTypo */);
|
|
}
|
|
|
|
// Handle special patterns like email, URI, telephone number.
|
|
final int checkability = getCheckabilityInScript(text, mScript);
|
|
if (CHECKABILITY_CHECKABLE != checkability) {
|
|
if (CHECKABILITY_CONTAINS_PERIOD == checkability) {
|
|
final String[] splitText = text.split(Constants.REGEXP_PERIOD);
|
|
boolean allWordsAreValid = true;
|
|
for (final String word : splitText) {
|
|
if (!mService.isValidWord(mLocale, word)) {
|
|
allWordsAreValid = false;
|
|
break;
|
|
}
|
|
}
|
|
if (allWordsAreValid) {
|
|
return new SuggestionsInfo(SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO
|
|
| SuggestionsInfo.RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS,
|
|
new String[] {
|
|
TextUtils.join(Constants.STRING_SPACE, splitText) });
|
|
}
|
|
}
|
|
return mService.isValidWord(mLocale, text) ?
|
|
AndroidSpellCheckerService.getInDictEmptySuggestions() :
|
|
AndroidSpellCheckerService.getNotInDictEmptySuggestions(
|
|
CHECKABILITY_CONTAINS_PERIOD == checkability /* reportAsTypo */);
|
|
}
|
|
|
|
// Handle normal words.
|
|
final int capitalizeType = StringUtils.getCapitalizationType(text);
|
|
|
|
if (isInDictForAnyCapitalization(text, capitalizeType)) {
|
|
if (DebugFlags.DEBUG_ENABLED) {
|
|
Log.i(TAG, "onGetSuggestionsInternal() : [" + text + "] is a valid word");
|
|
}
|
|
return AndroidSpellCheckerService.getInDictEmptySuggestions();
|
|
}
|
|
if (DebugFlags.DEBUG_ENABLED) {
|
|
Log.i(TAG, "onGetSuggestionsInternal() : [" + text + "] is NOT a valid word");
|
|
}
|
|
|
|
final Keyboard keyboard = mService.getKeyboardForLocale(mLocale);
|
|
if (null == keyboard) {
|
|
Log.w(TAG, "onGetSuggestionsInternal() : No keyboard for locale: " + mLocale);
|
|
// If there is no keyboard for this locale, don't do any spell-checking.
|
|
return AndroidSpellCheckerService.getNotInDictEmptySuggestions(
|
|
false /* reportAsTypo */);
|
|
}
|
|
|
|
final WordComposer composer = new WordComposer();
|
|
final int[] codePoints = StringUtils.toCodePointArray(text);
|
|
final int[] coordinates;
|
|
coordinates = keyboard.getCoordinates(codePoints);
|
|
composer.setComposingWord(codePoints, coordinates);
|
|
// TODO: Don't gather suggestions if the limit is <= 0 unless necessary
|
|
final SuggestionResults suggestionResults = mService.getSuggestionResults(
|
|
mLocale, composer.getComposedDataSnapshot(), ngramContext, keyboard);
|
|
final Result result = getResult(capitalizeType, mLocale, suggestionsLimit,
|
|
mService.getRecommendedThreshold(), text, suggestionResults);
|
|
if (DebugFlags.DEBUG_ENABLED) {
|
|
if (result.mSuggestions != null && result.mSuggestions.length > 0) {
|
|
final StringBuilder builder = new StringBuilder();
|
|
for (String suggestion : result.mSuggestions) {
|
|
builder.append(" [");
|
|
builder.append(suggestion);
|
|
builder.append("]");
|
|
}
|
|
Log.i(TAG, "onGetSuggestionsInternal() : Suggestions =" + builder);
|
|
}
|
|
}
|
|
// Handle word not in dictionary.
|
|
// This is called only once per unique word, so entering multiple
|
|
// instances of the same word does not result in more than one call
|
|
// to this method.
|
|
// Also, upon changing the orientation of the device, this is called
|
|
// again for every unique invalid word in the text box.
|
|
StatsUtils.onInvalidWordIdentification(text);
|
|
|
|
final int flags =
|
|
SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO
|
|
| (result.mHasRecommendedSuggestions
|
|
? SuggestionsInfoCompatUtils
|
|
.getValueOf_RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS()
|
|
: 0);
|
|
final SuggestionsInfo retval = new SuggestionsInfo(flags, result.mSuggestions);
|
|
mSuggestionsCache.putSuggestionsToCache(text, result.mSuggestions, flags);
|
|
return retval;
|
|
} catch (RuntimeException e) {
|
|
// Don't kill the keyboard if there is a bug in the spell checker
|
|
Log.e(TAG, "Exception while spellchecking", e);
|
|
return AndroidSpellCheckerService.getNotInDictEmptySuggestions(
|
|
false /* reportAsTypo */);
|
|
}
|
|
}
|
|
|
|
private static final class Result {
|
|
public final String[] mSuggestions;
|
|
public final boolean mHasRecommendedSuggestions;
|
|
public Result(final String[] gatheredSuggestions, final boolean hasRecommendedSuggestions) {
|
|
mSuggestions = gatheredSuggestions;
|
|
mHasRecommendedSuggestions = hasRecommendedSuggestions;
|
|
}
|
|
}
|
|
|
|
private static Result getResult(final int capitalizeType, final Locale locale,
|
|
final int suggestionsLimit, final float recommendedThreshold, final String originalText,
|
|
final SuggestionResults suggestionResults) {
|
|
if (suggestionResults.isEmpty() || suggestionsLimit <= 0) {
|
|
return new Result(null /* gatheredSuggestions */,
|
|
false /* hasRecommendedSuggestions */);
|
|
}
|
|
final ArrayList<String> suggestions = new ArrayList<>();
|
|
for (final SuggestedWordInfo suggestedWordInfo : suggestionResults) {
|
|
final String suggestion;
|
|
if (StringUtils.CAPITALIZE_ALL == capitalizeType) {
|
|
suggestion = suggestedWordInfo.mWord.toUpperCase(locale);
|
|
} else if (StringUtils.CAPITALIZE_FIRST == capitalizeType) {
|
|
suggestion = StringUtils.capitalizeFirstCodePoint(
|
|
suggestedWordInfo.mWord, locale);
|
|
} else {
|
|
suggestion = suggestedWordInfo.mWord;
|
|
}
|
|
suggestions.add(suggestion);
|
|
}
|
|
StringUtils.removeDupes(suggestions);
|
|
// This returns a String[], while toArray() returns an Object[] which cannot be cast
|
|
// into a String[].
|
|
final List<String> gatheredSuggestionsList =
|
|
suggestions.subList(0, Math.min(suggestions.size(), suggestionsLimit));
|
|
final String[] gatheredSuggestions =
|
|
gatheredSuggestionsList.toArray(new String[gatheredSuggestionsList.size()]);
|
|
|
|
final int bestScore = suggestionResults.first().mScore;
|
|
final String bestSuggestion = suggestions.get(0);
|
|
final float normalizedScore = BinaryDictionaryUtils.calcNormalizedScore(
|
|
originalText, bestSuggestion, bestScore);
|
|
final boolean hasRecommendedSuggestions = (normalizedScore > recommendedThreshold);
|
|
return new Result(gatheredSuggestions, hasRecommendedSuggestions);
|
|
}
|
|
|
|
/*
|
|
* The spell checker acts on its own behalf. That is needed, in particular, to be able to
|
|
* access the dictionary files, which the provider restricts to the identity of Latin IME.
|
|
* Since it's called externally by the application, the spell checker is using the identity
|
|
* of the application by default unless we clearCallingIdentity.
|
|
* That's what the following method does.
|
|
*/
|
|
@Override
|
|
public SuggestionsInfo onGetSuggestions(final TextInfo textInfo, final int suggestionsLimit) {
|
|
long ident = Binder.clearCallingIdentity();
|
|
try {
|
|
return onGetSuggestionsInternal(textInfo, suggestionsLimit);
|
|
} finally {
|
|
Binder.restoreCallingIdentity(ident);
|
|
}
|
|
}
|
|
}
|