Add whitelist dictionary

Bug: 3447571

Change-Id: I538e4e706982129b666ca1ab1085f1b68b69f87f
This commit is contained in:
satok 2011-03-04 16:56:10 +09:00
parent d7e2bc335d
commit bcfce3b3b9
7 changed files with 278 additions and 93 deletions

View File

@ -0,0 +1,38 @@
<?xml version="1.0" encoding="utf-8"?>
<!--
/*
**
** Copyright 2011, The Android Open Source Project
**
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
**
** http://www.apache.org/licenses/LICENSE-2.0
**
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*/
-->
<resources xmlns:xliff="urn:oasis:names:tc:xliff:document:1.2">
<!--
An entry of the whitelist word should be:
1. (int)frequency
2. (String)before
3. (String)after
-->
<string-array name="wordlist_whitelist">
<item>255</item>
<item>ill</item>
<item>I\'ll</item>
<item>255</item>
<item>thisd</item>
<item>this\'d</item>
</string-array>
</resources>

View File

@ -16,10 +16,11 @@
package com.android.inputmethod.latin;
import android.text.TextUtils;
import android.util.Log;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Map;
public class AutoCorrection {
private static final boolean DBG = LatinImeLogger.sDBG;
@ -46,40 +47,73 @@ public class AutoCorrection {
return mNormalizedScore;
}
public void updateAutoCorrectionStatus(Collection<Dictionary> dictionaries,
public void updateAutoCorrectionStatus(Map<String, Dictionary> dictionaries,
WordComposer wordComposer, ArrayList<CharSequence> suggestions, int[] priorities,
CharSequence typedWord, double autoCorrectionThreshold, int correctionMode,
CharSequence quickFixedWord) {
if (hasAutoCorrectionForTypedWord(
CharSequence quickFixedWord, CharSequence whitelistedWord) {
if (hasAutoCorrectionForWhitelistedWord(whitelistedWord)) {
mHasAutoCorrection = true;
mAutoCorrectionWord = whitelistedWord;
} else if (hasAutoCorrectionForTypedWord(
dictionaries, wordComposer, suggestions, typedWord, correctionMode)) {
mHasAutoCorrection = true;
mAutoCorrectionWord = typedWord;
} else if (hasAutoCorrectForBinaryDictionary(wordComposer, suggestions, correctionMode,
priorities, typedWord, autoCorrectionThreshold)) {
mHasAutoCorrection = true;
mAutoCorrectionWord = suggestions.get(0);
} else if (hasAutoCorrectionForQuickFix(quickFixedWord)) {
mHasAutoCorrection = true;
mAutoCorrectionWord = quickFixedWord;
} else if (hasAutoCorrectionForBinaryDictionary(wordComposer, suggestions, correctionMode,
priorities, typedWord, autoCorrectionThreshold)) {
mHasAutoCorrection = true;
mAutoCorrectionWord = suggestions.get(0);
}
}
private boolean hasAutoCorrectionForTypedWord(Collection<Dictionary> dictionaries,
WordComposer wordComposer, ArrayList<CharSequence> suggestions, CharSequence typedWord,
int correctionMode) {
boolean isValidWord = false;
for (final Dictionary dictionary : dictionaries) {
if (dictionary.isValidWord(typedWord)) {
isValidWord = true;
break;
public static boolean isValidWord(
Map<String, Dictionary> dictionaries, CharSequence word, boolean ignoreCase) {
if (TextUtils.isEmpty(word)) {
return false;
}
final CharSequence lowerCasedWord = word.toString().toLowerCase();
for (final String key : dictionaries.keySet()) {
if (key.equals(Suggest.DICT_KEY_WHITELIST)) continue;
final Dictionary dictionary = dictionaries.get(key);
if (dictionary.isValidWord(word)
|| (ignoreCase && dictionary.isValidWord(lowerCasedWord))) {
return true;
}
}
return false;
}
public static boolean isValidWordForAutoCorrection(
Map<String, Dictionary> dictionaries, CharSequence word, boolean ignoreCase) {
final Dictionary whiteList = dictionaries.get(Suggest.DICT_KEY_WHITELIST);
// If "word" is in the whitelist dictionary, it should not be auto corrected.
if (whiteList != null && whiteList.isValidWord(word)) {
return false;
}
return isValidWord(dictionaries, word, ignoreCase);
}
private static boolean hasAutoCorrectionForWhitelistedWord(CharSequence whiteListedWord) {
return whiteListedWord != null;
}
private boolean hasAutoCorrectionForTypedWord(Map<String, Dictionary> dictionaries,
WordComposer wordComposer, ArrayList<CharSequence> suggestions, CharSequence typedWord,
int correctionMode) {
if (TextUtils.isEmpty(typedWord)) return false;
boolean isValidWord = isValidWordForAutoCorrection(dictionaries, typedWord, false);
return wordComposer.size() > 1 && suggestions.size() > 0 && isValidWord
&& (correctionMode == Suggest.CORRECTION_FULL
|| correctionMode == Suggest.CORRECTION_FULL_BIGRAM);
}
private boolean hasAutoCorrectForBinaryDictionary(WordComposer wordComposer,
private static boolean hasAutoCorrectionForQuickFix(CharSequence quickFixedWord) {
return quickFixedWord != null;
}
private boolean hasAutoCorrectionForBinaryDictionary(WordComposer wordComposer,
ArrayList<CharSequence> suggestions, int correctionMode, int[] priorities,
CharSequence typedWord, double autoCorrectionThreshold) {
if (wordComposer.size() > 1 && (correctionMode == Suggest.CORRECTION_FULL
@ -106,7 +140,4 @@ public class AutoCorrection {
return false;
}
private boolean hasAutoCorrectionForQuickFix(CharSequence quickFixedWord) {
return quickFixedWord != null;
}
}

View File

@ -27,6 +27,7 @@ import android.provider.BaseColumns;
import android.util.Log;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

View File

@ -834,10 +834,10 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen
public void onDisplayCompletions(CompletionInfo[] applicationSpecifiedCompletions) {
if (DEBUG) {
Log.i(TAG, "Received completions:");
final int count = (applicationSpecifiedCompletions != null)
? applicationSpecifiedCompletions.length : 0;
for (int i = 0; i < count; i++) {
Log.i(TAG, " #" + i + ": " + applicationSpecifiedCompletions[i]);
if (applicationSpecifiedCompletions != null) {
for (int i = 0; i < applicationSpecifiedCompletions.length; i++) {
Log.i(TAG, " #" + i + ": " + applicationSpecifiedCompletions[i]);
}
}
}
if (mApplicationSpecifiedCompletionOn) {
@ -968,7 +968,7 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen
}
mCommittedLength = mComposing.length();
TextEntryState.acceptedTyped(mComposing);
addToDictionaries(mComposing, AutoDictionary.FREQUENCY_FOR_TYPED);
addToAutoAndUserBigramDictionaries(mComposing, AutoDictionary.FREQUENCY_FOR_TYPED);
}
updateSuggestions();
}
@ -1537,10 +1537,9 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen
boolean correctionAvailable = !mInputTypeNoAutoCorrect && mSuggest.hasAutoCorrection();
final CharSequence typedWord = word.getTypedWord();
// If we're in basic correct
final boolean typedWordValid = mSuggest.isValidWord(typedWord) ||
(preferCapitalization()
&& mSuggest.isValidWord(typedWord.toString().toLowerCase()));
// Here, we want to promote a whitelisted word if exists.
final boolean typedWordValid = AutoCorrection.isValidWordForAutoCorrection(
mSuggest.getUnigramDictionaries(), typedWord, preferCapitalization());
if (mCorrectionMode == Suggest.CORRECTION_FULL
|| mCorrectionMode == Suggest.CORRECTION_FULL_BIGRAM) {
correctionAvailable |= typedWordValid;
@ -1594,7 +1593,7 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen
mJustAccepted = true;
pickSuggestion(mBestWord);
// Add the word to the auto dictionary if it's not a known word
addToDictionaries(mBestWord, AutoDictionary.FREQUENCY_FOR_TYPED);
addToAutoAndUserBigramDictionaries(mBestWord, AutoDictionary.FREQUENCY_FOR_TYPED);
return true;
}
@ -1647,9 +1646,9 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen
pickSuggestion(suggestion);
// Add the word to the auto dictionary if it's not a known word
if (index == 0) {
addToDictionaries(suggestion, AutoDictionary.FREQUENCY_FOR_PICKED);
addToAutoAndUserBigramDictionaries(suggestion, AutoDictionary.FREQUENCY_FOR_PICKED);
} else {
addToBigramDictionary(suggestion, 1);
addToOnlyBigramDictionary(suggestion, 1);
}
LatinImeLogger.logOnManualSuggestion(mComposing.toString(), suggestion.toString(),
index, suggestions.mWords);
@ -1668,13 +1667,12 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen
// and correction, so we shouldn't try to show the hint
// We used to look at mCorrectionMode here, but showing the hint should have nothing
// to do with the autocorrection setting.
final boolean showingAddToDictionaryHint = index == 0 &&
// Test for no dictionary:
((!mHasDictionary && null != mSuggest) ||
// Test for dictionary && word is inside:
(mHasDictionary && null != mSuggest
&& !mSuggest.isValidWord(suggestion)
&& !mSuggest.isValidWord(suggestion.toString().toLowerCase())));
final boolean showingAddToDictionaryHint = index == 0 && mSuggest != null
// If there is no dictionary the hint should be shown.
&& (!mHasDictionary
// If "suggestion" is not in the dictionary, the hint should be shown.
|| !AutoCorrection.isValidWord(
mSuggest.getUnigramDictionaries(), suggestion, true));
if (!recorrecting) {
// Fool the state watcher so that a subsequent backspace will not do a revert, unless
@ -1726,6 +1724,7 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen
// If we didn't find a match, search for result in typed word history
WordComposer foundWord = null;
WordAlternatives alternatives = null;
// Search old suggestions to suggest re-corrected suggestions.
for (WordAlternatives entry : mWordHistory) {
if (TextUtils.equals(entry.getChosenWord(), touching.mWord)) {
if (entry instanceof TypedWordAlternatives) {
@ -1735,10 +1734,10 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen
break;
}
}
// If we didn't find a match, at least suggest corrections.
// If we didn't find a match, at least suggest corrections as re-corrected suggestions.
if (foundWord == null
&& (mSuggest.isValidWord(touching.mWord)
|| mSuggest.isValidWord(touching.mWord.toString().toLowerCase()))) {
&& (AutoCorrection.isValidWord(
mSuggest.getUnigramDictionaries(), touching.mWord, true))) {
foundWord = new WordComposer();
for (int i = 0; i < touching.mWord.length(); i++) {
foundWord.add(touching.mWord.charAt(i), new int[] {
@ -1801,21 +1800,22 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen
setCandidatesViewShown(isCandidateStripVisible());
}
private void addToDictionaries(CharSequence suggestion, int frequencyDelta) {
private void addToAutoAndUserBigramDictionaries(CharSequence suggestion, int frequencyDelta) {
checkAddToDictionary(suggestion, frequencyDelta, false);
}
private void addToBigramDictionary(CharSequence suggestion, int frequencyDelta) {
private void addToOnlyBigramDictionary(CharSequence suggestion, int frequencyDelta) {
checkAddToDictionary(suggestion, frequencyDelta, true);
}
/**
* Adds to the UserBigramDictionary and/or AutoDictionary
* @param addToBigramDictionary true if it should be added to bigram dictionary if possible
* @param selectedANotTypedWord true if it should be added to bigram dictionary if possible
*/
private void checkAddToDictionary(CharSequence suggestion, int frequencyDelta,
boolean addToBigramDictionary) {
boolean selectedANotTypedWord) {
if (suggestion == null || suggestion.length() < 1) return;
// Only auto-add to dictionary if auto-correct is ON. Otherwise we'll be
// adding words in situations where the user or application really didn't
// want corrections enabled or learned.
@ -1823,9 +1823,14 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen
|| mCorrectionMode == Suggest.CORRECTION_FULL_BIGRAM)) {
return;
}
if (!addToBigramDictionary && mAutoDictionary.isValidWord(suggestion)
|| (!mSuggest.isValidWord(suggestion.toString())
&& !mSuggest.isValidWord(suggestion.toString().toLowerCase()))) {
final boolean selectedATypedWordAndItsInAutoDic =
!selectedANotTypedWord && mAutoDictionary.isValidWord(suggestion);
final boolean isValidWord = AutoCorrection.isValidWord(
mSuggest.getUnigramDictionaries(), suggestion, true);
final boolean needsToAddToAutoDictionary = selectedATypedWordAndItsInAutoDic
|| !isValidWord;
if (needsToAddToAutoDictionary) {
mAutoDictionary.addWord(suggestion.toString(), frequencyDelta);
}

View File

@ -67,6 +67,13 @@ public class Suggest implements Dictionary.WordCallback {
// If you add a type of dictionary, increment DIC_TYPE_LAST_ID
public static final int DIC_TYPE_LAST_ID = 4;
public static final String DICT_KEY_MAIN = "main";
public static final String DICT_KEY_CONTACTS = "contacts";
public static final String DICT_KEY_AUTO = "auto";
public static final String DICT_KEY_USER = "user";
public static final String DICT_KEY_USER_BIGRAM = "user_bigram";
public static final String DICT_KEY_WHITELIST ="whitelist";
static final int LARGE_DICTIONARY_THRESHOLD = 200 * 1000;
private static final boolean DBG = LatinImeLogger.sDBG;
@ -74,11 +81,7 @@ public class Suggest implements Dictionary.WordCallback {
private AutoCorrection mAutoCorrection;
private BinaryDictionary mMainDict;
private static final String DICT_KEY_MAIN = "main";
private static final String DICT_KEY_CONTACTS = "contacts";
private static final String DICT_KEY_AUTO = "auto";
private static final String DICT_KEY_USER = "user";
private static final String DICT_KEY_USER_BIGRAM = "user_bigram";
private WhitelistDictionary mWhiteListDictionary;
private final Map<String, Dictionary> mUnigramDictionaries = new HashMap<String, Dictionary>();
private final Map<String, Dictionary> mBigramDictionaries = new HashMap<String, Dictionary>();
@ -104,19 +107,23 @@ public class Suggest implements Dictionary.WordCallback {
private int mCorrectionMode = CORRECTION_BASIC;
public Suggest(Context context, int dictionaryResId) {
init(BinaryDictionary.initDictionary(context, dictionaryResId, DIC_MAIN));
init(context, BinaryDictionary.initDictionary(context, dictionaryResId, DIC_MAIN));
}
/* package for test */ Suggest(File dictionary, long startOffset, long length) {
init(BinaryDictionary.initDictionary(dictionary, startOffset, length, DIC_MAIN));
init(null, BinaryDictionary.initDictionary(dictionary, startOffset, length, DIC_MAIN));
}
private void init(BinaryDictionary mainDict) {
private void init(Context context, BinaryDictionary mainDict) {
if (mainDict != null) {
mMainDict = mainDict;
mUnigramDictionaries.put(DICT_KEY_MAIN, mainDict);
mBigramDictionaries.put(DICT_KEY_MAIN, mainDict);
}
mWhiteListDictionary = WhitelistDictionary.init(context);
if (mWhiteListDictionary != null) {
mUnigramDictionaries.put(DICT_KEY_WHITELIST, mWhiteListDictionary);
}
mAutoCorrection = new AutoCorrection();
initPool();
}
@ -144,6 +151,10 @@ public class Suggest implements Dictionary.WordCallback {
return mMainDict != null && mMainDict.getSize() > LARGE_DICTIONARY_THRESHOLD;
}
public Map<String, Dictionary> getUnigramDictionaries() {
return mUnigramDictionaries;
}
public int getApproxMaxWordLength() {
return APPROX_MAX_WORD_LENGTH;
}
@ -218,6 +229,25 @@ public class Suggest implements Dictionary.WordCallback {
return getSuggestedWordBuilder(view, wordComposer, prevWordForBigram).build();
}
private CharSequence capitalizeWord(boolean all, boolean first, CharSequence word) {
if (TextUtils.isEmpty(word) || !(all || first)) return word;
final int wordLength = word.length();
final int poolSize = mStringPool.size();
final StringBuilder sb =
poolSize > 0 ? (StringBuilder) mStringPool.remove(poolSize - 1)
: new StringBuilder(getApproxMaxWordLength());
sb.setLength(0);
if (all) {
sb.append(word.toString().toUpperCase());
} else if (first) {
sb.append(Character.toUpperCase(word.charAt(0)));
if (wordLength > 1) {
sb.append(word.subSequence(1, wordLength));
}
}
return sb;
}
// TODO: cleanup dictionaries looking up and suggestions building with SuggestedWords.Builder
public SuggestedWords.Builder getSuggestedWordBuilder(View view, WordComposer wordComposer,
CharSequence prevWordForBigram) {
@ -277,8 +307,8 @@ public class Suggest implements Dictionary.WordCallback {
} else if (wordComposer.size() > 1) {
// At second character typed, search the unigrams (scores being affected by bigrams)
for (final String key : mUnigramDictionaries.keySet()) {
// Skip AutoDictionary to lookup
if (key.equals(DICT_KEY_AUTO))
// Skip AutoDictionary and WhitelistDictionary to lookup
if (key.equals(DICT_KEY_AUTO) || key.equals(DICT_KEY_WHITELIST))
continue;
final Dictionary dictionary = mUnigramDictionaries.get(key);
dictionary.getWords(wordComposer, this);
@ -290,28 +320,12 @@ public class Suggest implements Dictionary.WordCallback {
// Apply quick fix only for the typed word.
if (mQuickFixesEnabled) {
final String lowerCaseTypedWord = typedWordString.toLowerCase();
CharSequence tempAutoText =
AutoText.get(lowerCaseTypedWord, 0, lowerCaseTypedWord.length(), view);
CharSequence tempAutoText = capitalizeWord(
mIsAllUpperCase, mIsFirstCharCapitalized, AutoText.get(
lowerCaseTypedWord, 0, lowerCaseTypedWord.length(), view));
// TODO: cleanup canAdd
// Is there an AutoText (also known as Quick Fixes) correction?
// Capitalize as needed
if (!TextUtils.isEmpty(tempAutoText)
&& (mIsAllUpperCase || mIsFirstCharCapitalized)) {
final int tempAutoTextLength = tempAutoText.length();
final int poolSize = mStringPool.size();
final StringBuilder sb =
poolSize > 0 ? (StringBuilder) mStringPool.remove(poolSize - 1)
: new StringBuilder(getApproxMaxWordLength());
sb.setLength(0);
if (mIsAllUpperCase) {
sb.append(tempAutoText.toString().toUpperCase());
} else if (mIsFirstCharCapitalized) {
sb.append(Character.toUpperCase(tempAutoText.charAt(0)));
if (tempAutoTextLength > 1) {
sb.append(tempAutoText.subSequence(1, tempAutoTextLength));
}
}
tempAutoText = sb.toString();
}
boolean canAdd = tempAutoText != null;
// Is that correction already the current prediction (or original word)?
canAdd &= !TextUtils.equals(tempAutoText, typedWord);
@ -328,14 +342,21 @@ public class Suggest implements Dictionary.WordCallback {
}
}
mAutoCorrection.updateAutoCorrectionStatus(mUnigramDictionaries.values(), wordComposer,
CharSequence whitelistedWord = capitalizeWord(mIsAllUpperCase, mIsFirstCharCapitalized,
mWhiteListDictionary.getWhiteListedWord(typedWordString));
mAutoCorrection.updateAutoCorrectionStatus(mUnigramDictionaries, wordComposer,
mSuggestions, mPriorities, typedWord, mAutoCorrectionThreshold, mCorrectionMode,
autoText);
autoText, whitelistedWord);
if (autoText != null) {
mSuggestions.add(0, autoText);
}
if (whitelistedWord != null) {
mSuggestions.add(0, whitelistedWord);
}
if (typedWord != null) {
mSuggestions.add(0, typedWordString);
}
@ -516,17 +537,6 @@ public class Suggest implements Dictionary.WordCallback {
return -1;
}
public boolean isValidWord(final CharSequence word) {
if (word == null || word.length() == 0 || mMainDict == null) {
return false;
}
for (final Dictionary dictionary : mUnigramDictionaries.values()) {
if (dictionary.isValidWord(word))
return true;
}
return false;
}
private void collectGarbage(ArrayList<CharSequence> suggestions, int prefMaxSuggestions) {
int poolSize = mStringPool.size();
int garbageSize = suggestions.size();

View File

@ -0,0 +1,99 @@
/*
* Copyright (C) 2011 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.android.inputmethod.latin;
import android.content.Context;
import android.text.TextUtils;
import android.util.Log;
import android.util.Pair;
import java.util.HashMap;
public class WhitelistDictionary extends Dictionary {
private static final boolean DBG = LatinImeLogger.sDBG;
private static final String TAG = WhitelistDictionary.class.getSimpleName();
private final HashMap<String, Pair<Integer, String>> mWhitelistWords =
new HashMap<String, Pair<Integer, String>>();
private static final WhitelistDictionary sInstance = new WhitelistDictionary();
private WhitelistDictionary() {
}
public static WhitelistDictionary init(Context context) {
synchronized (sInstance) {
if (context != null) {
sInstance.initWordlist(
context.getResources().getStringArray(R.array.wordlist_whitelist));
} else {
sInstance.mWhitelistWords.clear();
}
}
return sInstance;
}
private void initWordlist(String[] wordlist) {
mWhitelistWords.clear();
final int N = wordlist.length;
if (N % 3 != 0) {
if (DBG) {
Log.d(TAG, "The number of the whitelist is invalid.");
}
return;
}
try {
for (int i = 0; i < N; i += 3) {
final int score = Integer.valueOf(wordlist[i]);
final String before = wordlist[i + 1];
final String after = wordlist[i + 2];
if (before != null && after != null) {
mWhitelistWords.put(
before.toLowerCase(), new Pair<Integer, String>(score, after));
}
}
} catch (NumberFormatException e) {
if (DBG) {
Log.d(TAG, "The score of the word is invalid.");
}
}
}
public String getWhiteListedWord(String before) {
if (before == null) return null;
final String lowerCaseBefore = before.toLowerCase();
if(mWhitelistWords.containsKey(lowerCaseBefore)) {
if (DBG) {
Log.d(TAG, "--- found whiteListedWord: " + lowerCaseBefore);
}
return mWhitelistWords.get(lowerCaseBefore).second;
}
return null;
}
// Not used for WhitelistDictionary. We use getWhitelistedWord() in Suggest.java instead
@Override
public void getWords(WordComposer composer, WordCallback callback) {
}
@Override
public boolean isValidWord(CharSequence word) {
if (TextUtils.isEmpty(word)) return false;
return !TextUtils.isEmpty(getWhiteListedWord(word.toString()));
}
}

View File

@ -89,7 +89,8 @@ public class SuggestHelper {
}
public boolean isValidWord(CharSequence typed) {
return mSuggest.isValidWord(typed);
return AutoCorrection.isValidWordForAutoCorrection(mSuggest.getUnigramDictionaries(),
typed, false);
}
// TODO: This may be slow, but is OK for test so far.