mirror of
https://gitlab.futo.org/keyboard/latinime.git
synced 2024-09-28 14:54:30 +01:00
Implement addMultipleDictionaryEntries as a jni method.
Bug: 11757169 Change-Id: Ie1569e0b3c0503588ebfe5d09c7ca06116f54485
This commit is contained in:
parent
0e8dbe0284
commit
ca16be7552
@ -137,6 +137,8 @@ public final class BinaryDictionary extends Dictionary {
|
||||
private static native void addBigramWordsNative(long dict, int[] word0, int[] word1,
|
||||
int probability);
|
||||
private static native void removeBigramWordsNative(long dict, int[] word0, int[] word1);
|
||||
private static native int addMultipleDictionaryEntriesNative(long dict,
|
||||
LanguageModelParam[] languageModelParams, int startIndex);
|
||||
private static native int calculateProbabilityNative(long dict, int unigramProbability,
|
||||
int bigramProbability);
|
||||
private static native String getPropertyNative(long dict, String query);
|
||||
@ -303,6 +305,46 @@ public final class BinaryDictionary extends Dictionary {
|
||||
removeBigramWordsNative(mNativeDict, codePoints0, codePoints1);
|
||||
}
|
||||
|
||||
public static class LanguageModelParam {
|
||||
public final int[] mWord0;
|
||||
public final int[] mWord1;
|
||||
public final int mUnigramProbability;
|
||||
public final int mBigramProbability;
|
||||
|
||||
// Constructor for unigram.
|
||||
public LanguageModelParam(final String word, final int unigramProbability) {
|
||||
mWord0 = null;
|
||||
mWord1 = StringUtils.toCodePointArray(word);
|
||||
mUnigramProbability = unigramProbability;
|
||||
mBigramProbability = NOT_A_PROBABILITY;
|
||||
}
|
||||
|
||||
// Constructor for unigram and bigram.
|
||||
public LanguageModelParam(final String word0, final String word1,
|
||||
final int unigramProbability, final int bigramProbability) {
|
||||
mWord0 = StringUtils.toCodePointArray(word0);
|
||||
mWord1 = StringUtils.toCodePointArray(word1);
|
||||
mUnigramProbability = unigramProbability;
|
||||
mBigramProbability = bigramProbability;
|
||||
}
|
||||
}
|
||||
|
||||
public void addMultipleDictionaryEntries(final LanguageModelParam[] languageModelParams) {
|
||||
if (!isValidDictionary()) return;
|
||||
int processedParamCount = 0;
|
||||
while (processedParamCount < languageModelParams.length) {
|
||||
if (needsToRunGC(true /* mindsBlockByGC */)) {
|
||||
flushWithGC();
|
||||
}
|
||||
processedParamCount = addMultipleDictionaryEntriesNative(mNativeDict,
|
||||
languageModelParams, processedParamCount);
|
||||
if (processedParamCount <= 0) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void reopen() {
|
||||
close();
|
||||
final File dictFile = new File(mDictFilePath);
|
||||
|
@ -22,6 +22,7 @@ import android.util.Log;
|
||||
|
||||
import com.android.inputmethod.annotations.UsedForTesting;
|
||||
import com.android.inputmethod.keyboard.ProximityInfo;
|
||||
import com.android.inputmethod.latin.BinaryDictionary.LanguageModelParam;
|
||||
import com.android.inputmethod.latin.makedict.FormatSpec;
|
||||
import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
|
||||
import com.android.inputmethod.latin.utils.AsyncResultHolder;
|
||||
@ -326,7 +327,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
|
||||
* Dynamically adds a word bigram in the dictionary. May overwrite an existing entry.
|
||||
*/
|
||||
protected void addBigramDynamically(final String word0, final String word1,
|
||||
final int frequency, final boolean isValid) {
|
||||
final int frequency) {
|
||||
if (!mIsUpdatable) {
|
||||
Log.w(TAG, "addBigramDynamically is called for non-updatable dictionary: "
|
||||
+ mFilename);
|
||||
@ -363,22 +364,6 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
|
||||
public void onFinished();
|
||||
}
|
||||
|
||||
public static class LanguageModelParam {
|
||||
public final String mWord0;
|
||||
public final String mWord1;
|
||||
public final boolean mIsValid;
|
||||
public final int mFrequency;
|
||||
public final int mBigramFrequency;
|
||||
public LanguageModelParam(final String word0, final String word1, final boolean isValid,
|
||||
final int frequency, final int bigramFrequency) {
|
||||
mWord0 = word0;
|
||||
mWord1 = word1;
|
||||
mIsValid = isValid;
|
||||
mFrequency = frequency;
|
||||
mBigramFrequency = bigramFrequency;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Dynamically add multiple entries to the dictionary.
|
||||
*/
|
||||
@ -395,21 +380,9 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
|
||||
public void run() {
|
||||
final boolean locked = setProcessingLargeTaskIfNot();
|
||||
try {
|
||||
for (final LanguageModelParam languageModelParam : languageModelParams) {
|
||||
if (languageModelParam.mWord1 == null) {
|
||||
continue;
|
||||
}
|
||||
if (mBinaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
|
||||
mBinaryDictionary.flushWithGC();
|
||||
}
|
||||
mBinaryDictionary.addUnigramWord(languageModelParam.mWord1,
|
||||
languageModelParam.mFrequency);
|
||||
if (languageModelParam.mWord0 != null
|
||||
&& !languageModelParam.mWord0.equals(languageModelParam.mWord1)) {
|
||||
mBinaryDictionary.addBigramWords(languageModelParam.mWord0,
|
||||
languageModelParam.mWord1, languageModelParam.mBigramFrequency);
|
||||
}
|
||||
}
|
||||
mBinaryDictionary.addMultipleDictionaryEntries(
|
||||
languageModelParams.toArray(
|
||||
new LanguageModelParam[languageModelParams.size()]));
|
||||
} finally {
|
||||
if (callback != null) {
|
||||
callback.onFinished();
|
||||
|
@ -21,6 +21,7 @@ import android.content.SharedPreferences;
|
||||
import android.util.Log;
|
||||
|
||||
import com.android.inputmethod.annotations.UsedForTesting;
|
||||
import com.android.inputmethod.latin.BinaryDictionary.LanguageModelParam;
|
||||
import com.android.inputmethod.latin.Constants;
|
||||
import com.android.inputmethod.latin.Dictionary;
|
||||
import com.android.inputmethod.latin.ExpandableBinaryDictionary;
|
||||
@ -147,7 +148,7 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB
|
||||
return;
|
||||
}
|
||||
if (null != word0) {
|
||||
addBigramDynamically(word0, word1, frequency, isValid);
|
||||
addBigramDynamically(word0, word1, frequency);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -18,6 +18,7 @@ package com.android.inputmethod.latin.personalization;
|
||||
|
||||
import android.content.Context;
|
||||
|
||||
import com.android.inputmethod.latin.BinaryDictionary.LanguageModelParam;
|
||||
import com.android.inputmethod.latin.ExpandableBinaryDictionary;
|
||||
|
||||
import java.lang.ref.WeakReference;
|
||||
@ -28,24 +29,6 @@ import java.util.ArrayList;
|
||||
* dictionary.
|
||||
*/
|
||||
public abstract class PersonalizationDictionaryUpdateSession {
|
||||
/**
|
||||
* This class is a parameter for a new unigram or bigram word which will be added
|
||||
* to the personalization dictionary.
|
||||
*/
|
||||
public static class PersonalizationLanguageModelParam {
|
||||
public final String mWord0;
|
||||
public final String mWord1;
|
||||
public final boolean mIsValid;
|
||||
public final int mFrequency;
|
||||
public PersonalizationLanguageModelParam(String word0, String word1, boolean isValid,
|
||||
int frequency) {
|
||||
mWord0 = word0;
|
||||
mWord1 = word1;
|
||||
mIsValid = isValid;
|
||||
mFrequency = frequency;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Use a dynamic binary dictionary instead
|
||||
public WeakReference<PersonalizationDictionary> mDictionary;
|
||||
public WeakReference<DecayingExpandableBinaryDictionaryBase> mPredictionDictionary;
|
||||
@ -117,7 +100,7 @@ public abstract class PersonalizationDictionaryUpdateSession {
|
||||
|
||||
// TODO: Support multi locale.
|
||||
public void addMultipleDictionaryEntriesToPersonalizationDictionary(
|
||||
final ArrayList<ExpandableBinaryDictionary.LanguageModelParam> languageModelParams,
|
||||
final ArrayList<LanguageModelParam> languageModelParams,
|
||||
final ExpandableBinaryDictionary.AddMultipleDictionaryEntriesCallback callback) {
|
||||
final DecayingExpandableBinaryDictionaryBase dictionary = getPredictionDictionary();
|
||||
if (dictionary == null) {
|
||||
@ -128,17 +111,4 @@ public abstract class PersonalizationDictionaryUpdateSession {
|
||||
}
|
||||
dictionary.addMultipleDictionaryEntriesToDictionary(languageModelParams, callback);
|
||||
}
|
||||
|
||||
// Bulk import
|
||||
// TODO: Support multi locale to add bigram
|
||||
public void addBigramsToPersonalizationDictionary(
|
||||
final ArrayList<PersonalizationLanguageModelParam> lmParams) {
|
||||
final DecayingExpandableBinaryDictionaryBase dictionary = getPredictionDictionary();
|
||||
if (dictionary == null) {
|
||||
return;
|
||||
}
|
||||
for (final PersonalizationLanguageModelParam lmParam : lmParams) {
|
||||
dictionary.addToDictionary(lmParam.mWord0, lmParam.mWord1, lmParam.mIsValid);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -320,6 +320,60 @@ static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass claz
|
||||
word1Length);
|
||||
}
|
||||
|
||||
|
||||
// Returns how many language model params are processed.
|
||||
static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, jclass clazz,
|
||||
jlong dict, jobjectArray languageModelParams, jint startIndex) {
|
||||
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
|
||||
if (!dictionary) {
|
||||
return 0;
|
||||
}
|
||||
jsize languageModelParamCount = env->GetArrayLength(languageModelParams);
|
||||
if (languageModelParamCount == 0 || startIndex >= languageModelParamCount) {
|
||||
return 0;
|
||||
}
|
||||
jobject languageModelParam = env->GetObjectArrayElement(languageModelParams, 0);
|
||||
jclass languageModelParamClass = env->GetObjectClass(languageModelParam);
|
||||
env->DeleteLocalRef(languageModelParam);
|
||||
jfieldID word0FieldId = env->GetFieldID(languageModelParamClass, "mWord0", "[I");
|
||||
jfieldID word1FieldId = env->GetFieldID(languageModelParamClass, "mWord1", "[I");
|
||||
jfieldID unigramProbabilityFieldId =
|
||||
env->GetFieldID(languageModelParamClass, "mUnigramProbability", "I");
|
||||
jfieldID bigramProbabilityFieldId =
|
||||
env->GetFieldID(languageModelParamClass, "mBigramProbability", "I");
|
||||
env->DeleteLocalRef(languageModelParamClass);
|
||||
|
||||
for (int i = startIndex; i < languageModelParamCount; ++i) {
|
||||
jobject languageModelParam = env->GetObjectArrayElement(languageModelParams, i);
|
||||
jintArray word0 = static_cast<jintArray>(
|
||||
env->GetObjectField(languageModelParam, word0FieldId));
|
||||
jsize word0Length = word0 ? env->GetArrayLength(word0) : 0;
|
||||
int word0CodePoints[word0Length];
|
||||
if (word0) {
|
||||
env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints);
|
||||
}
|
||||
jintArray word1 = static_cast<jintArray>(
|
||||
env->GetObjectField(languageModelParam, word1FieldId));
|
||||
jsize word1Length = env->GetArrayLength(word1);
|
||||
int word1CodePoints[word1Length];
|
||||
env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
|
||||
jint unigramProbability = env->GetIntField(languageModelParam, unigramProbabilityFieldId);
|
||||
dictionary->addUnigramWord(word1CodePoints, word1Length, unigramProbability);
|
||||
if (word0) {
|
||||
jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId);
|
||||
dictionary->addBigramWords(word0CodePoints, word0Length, word1CodePoints, word1Length,
|
||||
bigramProbability);
|
||||
}
|
||||
if (dictionary->needsToRunGC(true /* mindsBlockByGC */)) {
|
||||
return i + 1;
|
||||
}
|
||||
env->DeleteLocalRef(word0);
|
||||
env->DeleteLocalRef(word1);
|
||||
env->DeleteLocalRef(languageModelParam);
|
||||
}
|
||||
return languageModelParamCount;
|
||||
}
|
||||
|
||||
static int latinime_BinaryDictionary_calculateProbabilityNative(JNIEnv *env, jclass clazz,
|
||||
jlong dict, jint unigramProbability, jint bigramProbability) {
|
||||
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
|
||||
@ -418,6 +472,12 @@ static const JNINativeMethod sMethods[] = {
|
||||
const_cast<char *>("(J[I[I)V"),
|
||||
reinterpret_cast<void *>(latinime_BinaryDictionary_removeBigramWords)
|
||||
},
|
||||
{
|
||||
const_cast<char *>("addMultipleDictionaryEntriesNative"),
|
||||
const_cast<char *>(
|
||||
"(J[Lcom/android/inputmethod/latin/BinaryDictionary$LanguageModelParam;I)I"),
|
||||
reinterpret_cast<void *>(latinime_BinaryDictionary_addMultipleDictionaryEntries)
|
||||
},
|
||||
{
|
||||
const_cast<char *>("calculateProbabilityNative"),
|
||||
const_cast<char *>("(JII)I"),
|
||||
|
@ -21,6 +21,7 @@ import android.test.suitebuilder.annotation.LargeTest;
|
||||
import android.text.TextUtils;
|
||||
import android.util.Pair;
|
||||
|
||||
import com.android.inputmethod.latin.BinaryDictionary.LanguageModelParam;
|
||||
import com.android.inputmethod.latin.makedict.CodePointUtils;
|
||||
import com.android.inputmethod.latin.makedict.FormatSpec;
|
||||
|
||||
@ -33,6 +34,7 @@ import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
|
||||
// TODO Use the seed passed as an argument for makedict test.
|
||||
@LargeTest
|
||||
public class BinaryDictionaryTests extends AndroidTestCase {
|
||||
private static final String TEST_DICT_FILE_EXTENSION = ".testDict";
|
||||
@ -776,4 +778,66 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||
|
||||
dictFile.delete();
|
||||
}
|
||||
|
||||
public void testAddMultipleDictionaryEntries() {
|
||||
testAddMultipleDictionaryEntries(3 /* formatVersion */);
|
||||
testAddMultipleDictionaryEntries(4 /* formatVersion */);
|
||||
}
|
||||
|
||||
private void testAddMultipleDictionaryEntries(final int formatVersion) {
|
||||
final int codePointSetSize = 20;
|
||||
final int lmParamCount = 1000;
|
||||
final double bigramContinueRate = 0.9;
|
||||
final long seed = System.currentTimeMillis();
|
||||
final Random random = new Random(seed);
|
||||
|
||||
File dictFile = null;
|
||||
try {
|
||||
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
|
||||
} catch (IOException e) {
|
||||
fail("IOException while writing an initial dictionary : " + e);
|
||||
}
|
||||
|
||||
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
|
||||
final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>();
|
||||
final HashMap<Pair<String, String>, Integer> bigramProbabilities =
|
||||
new HashMap<Pair<String, String>, Integer>();
|
||||
|
||||
final LanguageModelParam[] languageModelParams = new LanguageModelParam[lmParamCount];
|
||||
String prevWord = null;
|
||||
for (int i = 0; i < languageModelParams.length; i++) {
|
||||
final String word = CodePointUtils.generateWord(random, codePointSet);
|
||||
final int probability = random.nextInt(0xFF);
|
||||
final int bigramProbability = random.nextInt(0xF);
|
||||
unigramProbabilities.put(word, probability);
|
||||
if (prevWord == null) {
|
||||
languageModelParams[i] = new LanguageModelParam(word, probability);
|
||||
} else {
|
||||
languageModelParams[i] = new LanguageModelParam(prevWord, word, probability,
|
||||
bigramProbability);
|
||||
bigramProbabilities.put(new Pair<String, String>(prevWord, word),
|
||||
bigramProbability);
|
||||
}
|
||||
prevWord = (random.nextDouble() < bigramContinueRate) ? word : null;
|
||||
}
|
||||
|
||||
final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
|
||||
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
||||
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||
binaryDictionary.addMultipleDictionaryEntries(languageModelParams);
|
||||
|
||||
for (Map.Entry<String, Integer> entry : unigramProbabilities.entrySet()) {
|
||||
assertEquals((int)entry.getValue(), binaryDictionary.getFrequency(entry.getKey()));
|
||||
}
|
||||
|
||||
for (Map.Entry<Pair<String, String>, Integer> entry : bigramProbabilities.entrySet()) {
|
||||
final String word0 = entry.getKey().first;
|
||||
final String word1 = entry.getKey().second;
|
||||
final int unigramProbability = unigramProbabilities.get(word1);
|
||||
final int bigramProbability = entry.getValue();
|
||||
final int probability = binaryDictionary.calculateProbability(
|
||||
unigramProbability, bigramProbability);
|
||||
assertEquals(probability, binaryDictionary.getBigramProbability(word0, word1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user