From b13e04d3f181402cd8fce613e51dde009d9aac64 Mon Sep 17 00:00:00 2001 From: Aleksandras Kostarevas Date: Thu, 7 Mar 2024 15:32:58 +0200 Subject: [PATCH] Fix shortcuts --- .../inputmethod/latin/BinaryDictionary.java | 11 +++-- .../latin/ContactsBinaryDictionary.java | 5 ++- .../latin/ExpandableBinaryDictionary.java | 5 ++- .../latin/UserBinaryDictionary.java | 44 +++++++++++++++++-- .../latin/BinaryDictionaryTests.java | 10 ++++- .../latin/makedict/Ver4DictEncoder.java | 1 + 6 files changed, 64 insertions(+), 12 deletions(-) diff --git a/java/src/org/futo/inputmethod/latin/BinaryDictionary.java b/java/src/org/futo/inputmethod/latin/BinaryDictionary.java index b05380d4a..6af99c2a3 100644 --- a/java/src/org/futo/inputmethod/latin/BinaryDictionary.java +++ b/java/src/org/futo/inputmethod/latin/BinaryDictionary.java @@ -442,15 +442,18 @@ public final class BinaryDictionary extends Dictionary { } // Add a unigram entry to binary dictionary with unigram attributes in native code. - public boolean addUnigramEntry( - final String word, final int probability, final boolean isBeginningOfSentence, - final boolean isNotAWord, final boolean isPossiblyOffensive, final int timestamp) { + public boolean addUnigramEntry(final String word, final int probability, + final String shortcutTarget, final int shortcutProbability, + final boolean isBeginningOfSentence, final boolean isNotAWord, + final boolean isPossiblyOffensive, final int timestamp) { if (word == null || (word.isEmpty() && !isBeginningOfSentence)) { return false; } final int[] codePoints = StringUtils.toCodePointArray(word); + final int[] shortcutTargetCodePoints = (shortcutTarget != null) ? + StringUtils.toCodePointArray(shortcutTarget) : null; if (!addUnigramEntryNative(mNativeDict, codePoints, probability, - null /* shortcutTargetCodePoints */, 0 /* shortcutProbability */, + shortcutTargetCodePoints, shortcutProbability, isBeginningOfSentence, isNotAWord, isPossiblyOffensive, timestamp)) { return false; } diff --git a/java/src/org/futo/inputmethod/latin/ContactsBinaryDictionary.java b/java/src/org/futo/inputmethod/latin/ContactsBinaryDictionary.java index 44dce9c86..1b548ee31 100644 --- a/java/src/org/futo/inputmethod/latin/ContactsBinaryDictionary.java +++ b/java/src/org/futo/inputmethod/latin/ContactsBinaryDictionary.java @@ -101,6 +101,7 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary } runGCIfRequiredLocked(true /* mindsBlockByGC */); addUnigramLocked(word, ContactsDictionaryConstants.FREQUENCY_FOR_CONTACTS, + null, 0, false /* isNotAWord */, false /* isPossiblyOffensive */, BinaryDictionary.NOT_A_VALID_TIMESTAMP); } @@ -152,7 +153,9 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary } runGCIfRequiredLocked(true /* mindsBlockByGC */); addUnigramLocked(word, - ContactsDictionaryConstants.FREQUENCY_FOR_CONTACTS, false /* isNotAWord */, + ContactsDictionaryConstants.FREQUENCY_FOR_CONTACTS, + null, 0, + false /* isNotAWord */, false /* isPossiblyOffensive */, BinaryDictionary.NOT_A_VALID_TIMESTAMP); if (ngramContext.isValid() && mUseFirstLastBigrams) { diff --git a/java/src/org/futo/inputmethod/latin/ExpandableBinaryDictionary.java b/java/src/org/futo/inputmethod/latin/ExpandableBinaryDictionary.java index 39c7ce1a0..a5b151fde 100644 --- a/java/src/org/futo/inputmethod/latin/ExpandableBinaryDictionary.java +++ b/java/src/org/futo/inputmethod/latin/ExpandableBinaryDictionary.java @@ -295,18 +295,21 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { * Adds unigram information of a word to the dictionary. May overwrite an existing entry. */ public void addUnigramEntry(final String word, final int frequency, + final String shortcutTarget, final int shortcutProbability, final boolean isNotAWord, final boolean isPossiblyOffensive, final int timestamp) { updateDictionaryWithWriteLock(new Runnable() { @Override public void run() { - addUnigramLocked(word, frequency, isNotAWord, isPossiblyOffensive, timestamp); + addUnigramLocked(word, frequency, shortcutTarget, shortcutProbability, isNotAWord, isPossiblyOffensive, timestamp); } }); } protected void addUnigramLocked(final String word, final int frequency, + final String shortcutTarget, final int shortcutProbability, final boolean isNotAWord, final boolean isPossiblyOffensive, final int timestamp) { if (!mBinaryDictionary.addUnigramEntry(word, frequency, + shortcutTarget, shortcutProbability, false /* isBeginningOfSentence */, isNotAWord, isPossiblyOffensive, timestamp)) { Log.e(TAG, "Cannot add unigram entry. word: " + word); } diff --git a/java/src/org/futo/inputmethod/latin/UserBinaryDictionary.java b/java/src/org/futo/inputmethod/latin/UserBinaryDictionary.java index a51b1dd3f..8da8cd12b 100644 --- a/java/src/org/futo/inputmethod/latin/UserBinaryDictionary.java +++ b/java/src/org/futo/inputmethod/latin/UserBinaryDictionary.java @@ -22,6 +22,7 @@ import android.database.ContentObserver; import android.database.Cursor; import android.database.sqlite.SQLiteException; import android.net.Uri; +import android.os.Build; import android.provider.UserDictionary.Words; import android.text.TextUtils; import android.util.Log; @@ -46,8 +47,19 @@ public class UserBinaryDictionary extends ExpandableBinaryDictionary { private static final String USER_DICTIONARY_ALL_LANGUAGES = ""; private static final int HISTORICAL_DEFAULT_USER_DICTIONARY_FREQUENCY = 250; private static final int LATINIME_DEFAULT_USER_DICTIONARY_FREQUENCY = 160; + // Shortcut frequency is 0~15, with 15 = whitelist. We don't want user dictionary entries + // to auto-correct, so we set this to the highest frequency that won't, i.e. 14. + private static final int USER_DICT_SHORTCUT_FREQUENCY = 14; - private static final String[] PROJECTION_QUERY = new String[] {Words.WORD, Words.FREQUENCY}; + private static final String[] PROJECTION_QUERY_WITH_SHORTCUT = new String[] { + Words.WORD, + Words.SHORTCUT, + Words.FREQUENCY, + }; + private static final String[] PROJECTION_QUERY_WITHOUT_SHORTCUT = new String[] { + Words.WORD, + Words.FREQUENCY, + }; private static final String NAME = "userunigram"; @@ -159,7 +171,20 @@ public class UserBinaryDictionary extends ExpandableBinaryDictionary { requestArguments = localeElements; } final String requestString = request.toString(); - addWordsFromProjectionLocked(PROJECTION_QUERY, requestString, requestArguments); + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.JELLY_BEAN) { + try { + addWordsFromProjectionLocked(PROJECTION_QUERY_WITH_SHORTCUT, requestString, + requestArguments); + } catch (IllegalArgumentException e) { + // This may happen on some non-compliant devices where the declared API is JB+ but + // the SHORTCUT column is not present for some reason. + addWordsFromProjectionLocked(PROJECTION_QUERY_WITHOUT_SHORTCUT, requestString, + requestArguments); + } + } else { + addWordsFromProjectionLocked(PROJECTION_QUERY_WITHOUT_SHORTCUT, requestString, + requestArguments); + } } private void addWordsFromProjectionLocked(final String[] query, String request, @@ -194,23 +219,34 @@ public class UserBinaryDictionary extends ExpandableBinaryDictionary { } private void addWordsLocked(final Cursor cursor) { + final boolean hasShortcutColumn = Build.VERSION.SDK_INT >= Build.VERSION_CODES.JELLY_BEAN; if (cursor == null) return; if (cursor.moveToFirst()) { final int indexWord = cursor.getColumnIndex(Words.WORD); + final int indexShortcut = hasShortcutColumn ? cursor.getColumnIndex(Words.SHORTCUT) : 0; final int indexFrequency = cursor.getColumnIndex(Words.FREQUENCY); while (!cursor.isAfterLast()) { final String word = cursor.getString(indexWord); + final String shortcut = hasShortcutColumn ? cursor.getString(indexShortcut) : null; final int frequency = cursor.getInt(indexFrequency); final int adjustedFrequency = scaleFrequencyFromDefaultToLatinIme(frequency); // Safeguard against adding really long words. if (word.length() <= MAX_WORD_LENGTH) { runGCIfRequiredLocked(true /* mindsBlockByGC */); - addUnigramLocked(word, adjustedFrequency, false /* isNotAWord */, + addUnigramLocked(word, adjustedFrequency, null /* shortcutTarget */, + 0 /* shortcutFreq */, false /* isNotAWord */, false /* isPossiblyOffensive */, BinaryDictionary.NOT_A_VALID_TIMESTAMP); + if (null != shortcut && shortcut.length() <= MAX_WORD_LENGTH) { + runGCIfRequiredLocked(true /* mindsBlockByGC */); + addUnigramLocked(shortcut, adjustedFrequency, word, + USER_DICT_SHORTCUT_FREQUENCY, true /* isNotAWord */, + false /* isPossiblyOffensive */, + BinaryDictionary.NOT_A_VALID_TIMESTAMP); + } } cursor.moveToNext(); } } } -} +} \ No newline at end of file diff --git a/tests/src/org/futo/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/org/futo/inputmethod/latin/BinaryDictionaryTests.java index 857d954a9..8616db5ca 100644 --- a/tests/src/org/futo/inputmethod/latin/BinaryDictionaryTests.java +++ b/tests/src/org/futo/inputmethod/latin/BinaryDictionaryTests.java @@ -170,7 +170,9 @@ public class BinaryDictionaryTests { addUnigramWord(binaryDictionary, validLongWord, probability); addUnigramWord(binaryDictionary, invalidLongWord, probability); // Too long short cut. - binaryDictionary.addUnigramEntry("a", probability, false /* isBeginningOfSentence */, + binaryDictionary.addUnigramEntry("a", probability, + null, 0, + false /* isBeginningOfSentence */, false /* isNotAWord */, false /* isPossiblyOffensive */, BinaryDictionary.NOT_A_VALID_TIMESTAMP); addUnigramWord(binaryDictionary, "abc", probability); @@ -189,6 +191,7 @@ public class BinaryDictionaryTests { private static void addUnigramWord(final BinaryDictionary binaryDictionary, final String word, final int probability) { binaryDictionary.addUnigramEntry(word, probability, + null, 0, false /* isBeginningOfSentence */, false /* isNotAWord */, false /* isPossiblyOffensive */, BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); @@ -736,6 +739,7 @@ public class BinaryDictionaryTests { final boolean isPossiblyOffensive = random.nextBoolean(); // TODO: Add tests for historical info. binaryDictionary.addUnigramEntry(word, unigramProbability, + null, 0, false /* isBeginningOfSentence */, isNotAWord, isPossiblyOffensive, BinaryDictionary.NOT_A_VALID_TIMESTAMP); if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { @@ -883,7 +887,9 @@ public class BinaryDictionaryTests { public void testPossiblyOffensiveAttributeMaintained() { final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403); - binaryDictionary.addUnigramEntry("ddd", 100, false, true, true, 0); + binaryDictionary.addUnigramEntry("ddd", 100, + null, 0, + false, true, true, 0); WordProperty wordProperty = binaryDictionary.getWordProperty("ddd", false); assertEquals(true, wordProperty.mIsPossiblyOffensive); } diff --git a/tests/src/org/futo/inputmethod/latin/makedict/Ver4DictEncoder.java b/tests/src/org/futo/inputmethod/latin/makedict/Ver4DictEncoder.java index 757707aea..aa7cf8049 100644 --- a/tests/src/org/futo/inputmethod/latin/makedict/Ver4DictEncoder.java +++ b/tests/src/org/futo/inputmethod/latin/makedict/Ver4DictEncoder.java @@ -75,6 +75,7 @@ public class Ver4DictEncoder implements DictEncoder { } for (final WordProperty wordProperty : dict) { if (!binaryDict.addUnigramEntry(wordProperty.mWord, wordProperty.getProbability(), + null, 0, wordProperty.mIsBeginningOfSentence, wordProperty.mIsNotAWord, wordProperty.mIsPossiblyOffensive, 0 /* timestamp */)) { MakedictLog.e("Cannot add unigram entry for " + wordProperty.mWord);