Fix shortcuts

This commit is contained in:
Aleksandras Kostarevas 2024-03-07 15:32:58 +02:00
parent 9fed68c03a
commit b13e04d3f1
6 changed files with 64 additions and 12 deletions

View File

@ -442,15 +442,18 @@ public final class BinaryDictionary extends Dictionary {
}
// Add a unigram entry to binary dictionary with unigram attributes in native code.
public boolean addUnigramEntry(
final String word, final int probability, final boolean isBeginningOfSentence,
final boolean isNotAWord, final boolean isPossiblyOffensive, final int timestamp) {
public boolean addUnigramEntry(final String word, final int probability,
final String shortcutTarget, final int shortcutProbability,
final boolean isBeginningOfSentence, final boolean isNotAWord,
final boolean isPossiblyOffensive, final int timestamp) {
if (word == null || (word.isEmpty() && !isBeginningOfSentence)) {
return false;
}
final int[] codePoints = StringUtils.toCodePointArray(word);
final int[] shortcutTargetCodePoints = (shortcutTarget != null) ?
StringUtils.toCodePointArray(shortcutTarget) : null;
if (!addUnigramEntryNative(mNativeDict, codePoints, probability,
null /* shortcutTargetCodePoints */, 0 /* shortcutProbability */,
shortcutTargetCodePoints, shortcutProbability,
isBeginningOfSentence, isNotAWord, isPossiblyOffensive, timestamp)) {
return false;
}

View File

@ -101,6 +101,7 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary
}
runGCIfRequiredLocked(true /* mindsBlockByGC */);
addUnigramLocked(word, ContactsDictionaryConstants.FREQUENCY_FOR_CONTACTS,
null, 0,
false /* isNotAWord */, false /* isPossiblyOffensive */,
BinaryDictionary.NOT_A_VALID_TIMESTAMP);
}
@ -152,7 +153,9 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary
}
runGCIfRequiredLocked(true /* mindsBlockByGC */);
addUnigramLocked(word,
ContactsDictionaryConstants.FREQUENCY_FOR_CONTACTS, false /* isNotAWord */,
ContactsDictionaryConstants.FREQUENCY_FOR_CONTACTS,
null, 0,
false /* isNotAWord */,
false /* isPossiblyOffensive */,
BinaryDictionary.NOT_A_VALID_TIMESTAMP);
if (ngramContext.isValid() && mUseFirstLastBigrams) {

View File

@ -295,18 +295,21 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
* Adds unigram information of a word to the dictionary. May overwrite an existing entry.
*/
public void addUnigramEntry(final String word, final int frequency,
final String shortcutTarget, final int shortcutProbability,
final boolean isNotAWord, final boolean isPossiblyOffensive, final int timestamp) {
updateDictionaryWithWriteLock(new Runnable() {
@Override
public void run() {
addUnigramLocked(word, frequency, isNotAWord, isPossiblyOffensive, timestamp);
addUnigramLocked(word, frequency, shortcutTarget, shortcutProbability, isNotAWord, isPossiblyOffensive, timestamp);
}
});
}
protected void addUnigramLocked(final String word, final int frequency,
final String shortcutTarget, final int shortcutProbability,
final boolean isNotAWord, final boolean isPossiblyOffensive, final int timestamp) {
if (!mBinaryDictionary.addUnigramEntry(word, frequency,
shortcutTarget, shortcutProbability,
false /* isBeginningOfSentence */, isNotAWord, isPossiblyOffensive, timestamp)) {
Log.e(TAG, "Cannot add unigram entry. word: " + word);
}

View File

@ -22,6 +22,7 @@ import android.database.ContentObserver;
import android.database.Cursor;
import android.database.sqlite.SQLiteException;
import android.net.Uri;
import android.os.Build;
import android.provider.UserDictionary.Words;
import android.text.TextUtils;
import android.util.Log;
@ -46,8 +47,19 @@ public class UserBinaryDictionary extends ExpandableBinaryDictionary {
private static final String USER_DICTIONARY_ALL_LANGUAGES = "";
private static final int HISTORICAL_DEFAULT_USER_DICTIONARY_FREQUENCY = 250;
private static final int LATINIME_DEFAULT_USER_DICTIONARY_FREQUENCY = 160;
// Shortcut frequency is 0~15, with 15 = whitelist. We don't want user dictionary entries
// to auto-correct, so we set this to the highest frequency that won't, i.e. 14.
private static final int USER_DICT_SHORTCUT_FREQUENCY = 14;
private static final String[] PROJECTION_QUERY = new String[] {Words.WORD, Words.FREQUENCY};
private static final String[] PROJECTION_QUERY_WITH_SHORTCUT = new String[] {
Words.WORD,
Words.SHORTCUT,
Words.FREQUENCY,
};
private static final String[] PROJECTION_QUERY_WITHOUT_SHORTCUT = new String[] {
Words.WORD,
Words.FREQUENCY,
};
private static final String NAME = "userunigram";
@ -159,7 +171,20 @@ public class UserBinaryDictionary extends ExpandableBinaryDictionary {
requestArguments = localeElements;
}
final String requestString = request.toString();
addWordsFromProjectionLocked(PROJECTION_QUERY, requestString, requestArguments);
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.JELLY_BEAN) {
try {
addWordsFromProjectionLocked(PROJECTION_QUERY_WITH_SHORTCUT, requestString,
requestArguments);
} catch (IllegalArgumentException e) {
// This may happen on some non-compliant devices where the declared API is JB+ but
// the SHORTCUT column is not present for some reason.
addWordsFromProjectionLocked(PROJECTION_QUERY_WITHOUT_SHORTCUT, requestString,
requestArguments);
}
} else {
addWordsFromProjectionLocked(PROJECTION_QUERY_WITHOUT_SHORTCUT, requestString,
requestArguments);
}
}
private void addWordsFromProjectionLocked(final String[] query, String request,
@ -194,23 +219,34 @@ public class UserBinaryDictionary extends ExpandableBinaryDictionary {
}
private void addWordsLocked(final Cursor cursor) {
final boolean hasShortcutColumn = Build.VERSION.SDK_INT >= Build.VERSION_CODES.JELLY_BEAN;
if (cursor == null) return;
if (cursor.moveToFirst()) {
final int indexWord = cursor.getColumnIndex(Words.WORD);
final int indexShortcut = hasShortcutColumn ? cursor.getColumnIndex(Words.SHORTCUT) : 0;
final int indexFrequency = cursor.getColumnIndex(Words.FREQUENCY);
while (!cursor.isAfterLast()) {
final String word = cursor.getString(indexWord);
final String shortcut = hasShortcutColumn ? cursor.getString(indexShortcut) : null;
final int frequency = cursor.getInt(indexFrequency);
final int adjustedFrequency = scaleFrequencyFromDefaultToLatinIme(frequency);
// Safeguard against adding really long words.
if (word.length() <= MAX_WORD_LENGTH) {
runGCIfRequiredLocked(true /* mindsBlockByGC */);
addUnigramLocked(word, adjustedFrequency, false /* isNotAWord */,
addUnigramLocked(word, adjustedFrequency, null /* shortcutTarget */,
0 /* shortcutFreq */, false /* isNotAWord */,
false /* isPossiblyOffensive */,
BinaryDictionary.NOT_A_VALID_TIMESTAMP);
if (null != shortcut && shortcut.length() <= MAX_WORD_LENGTH) {
runGCIfRequiredLocked(true /* mindsBlockByGC */);
addUnigramLocked(shortcut, adjustedFrequency, word,
USER_DICT_SHORTCUT_FREQUENCY, true /* isNotAWord */,
false /* isPossiblyOffensive */,
BinaryDictionary.NOT_A_VALID_TIMESTAMP);
}
}
cursor.moveToNext();
}
}
}
}
}

View File

@ -170,7 +170,9 @@ public class BinaryDictionaryTests {
addUnigramWord(binaryDictionary, validLongWord, probability);
addUnigramWord(binaryDictionary, invalidLongWord, probability);
// Too long short cut.
binaryDictionary.addUnigramEntry("a", probability, false /* isBeginningOfSentence */,
binaryDictionary.addUnigramEntry("a", probability,
null, 0,
false /* isBeginningOfSentence */,
false /* isNotAWord */, false /* isPossiblyOffensive */,
BinaryDictionary.NOT_A_VALID_TIMESTAMP);
addUnigramWord(binaryDictionary, "abc", probability);
@ -189,6 +191,7 @@ public class BinaryDictionaryTests {
private static void addUnigramWord(final BinaryDictionary binaryDictionary, final String word,
final int probability) {
binaryDictionary.addUnigramEntry(word, probability,
null, 0,
false /* isBeginningOfSentence */, false /* isNotAWord */,
false /* isPossiblyOffensive */,
BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
@ -736,6 +739,7 @@ public class BinaryDictionaryTests {
final boolean isPossiblyOffensive = random.nextBoolean();
// TODO: Add tests for historical info.
binaryDictionary.addUnigramEntry(word, unigramProbability,
null, 0,
false /* isBeginningOfSentence */, isNotAWord, isPossiblyOffensive,
BinaryDictionary.NOT_A_VALID_TIMESTAMP);
if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
@ -883,7 +887,9 @@ public class BinaryDictionaryTests {
public void testPossiblyOffensiveAttributeMaintained() {
final BinaryDictionary binaryDictionary =
getEmptyBinaryDictionary(FormatSpec.VERSION403);
binaryDictionary.addUnigramEntry("ddd", 100, false, true, true, 0);
binaryDictionary.addUnigramEntry("ddd", 100,
null, 0,
false, true, true, 0);
WordProperty wordProperty = binaryDictionary.getWordProperty("ddd", false);
assertEquals(true, wordProperty.mIsPossiblyOffensive);
}

View File

@ -75,6 +75,7 @@ public class Ver4DictEncoder implements DictEncoder {
}
for (final WordProperty wordProperty : dict) {
if (!binaryDict.addUnigramEntry(wordProperty.mWord, wordProperty.getProbability(),
null, 0,
wordProperty.mIsBeginningOfSentence, wordProperty.mIsNotAWord,
wordProperty.mIsPossiblyOffensive, 0 /* timestamp */)) {
MakedictLog.e("Cannot add unigram entry for " + wordProperty.mWord);