futokb/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
Keisuke Kuroyanagi 16cc3992d7 Use trigrams for personalization dict.
5Bug: 14425059
Change-Id: I73cf6904e569d60996a3b079f16ea6df0cb90f02
2014-10-23 14:32:45 +09:00

746 lines
34 KiB
C++

/*
* Copyright (C) 2009 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define LOG_TAG "LatinIME: jni: BinaryDictionary"
#include "com_android_inputmethod_latin_BinaryDictionary.h"
#include <cstring> // for memset()
#include <vector>
#include "defines.h"
#include "jni.h"
#include "jni_common.h"
#include "suggest/core/dictionary/dictionary.h"
#include "suggest/core/dictionary/property/unigram_property.h"
#include "suggest/core/dictionary/property/word_property.h"
#include "suggest/core/result/suggestion_results.h"
#include "suggest/core/session/ngram_context.h"
#include "suggest/core/suggest_options.h"
#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h"
#include "utils/char_utils.h"
#include "utils/int_array_view.h"
#include "utils/jni_data_utils.h"
#include "utils/log_utils.h"
#include "utils/time_keeper.h"
namespace latinime {
class ProximityInfo;
static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring sourceDir,
jlong dictOffset, jlong dictSize, jboolean isUpdatable) {
PROF_OPEN;
PROF_START(66);
const jsize sourceDirUtf8Length = env->GetStringUTFLength(sourceDir);
if (sourceDirUtf8Length <= 0) {
AKLOGE("DICT: Can't get sourceDir string");
return 0;
}
char sourceDirChars[sourceDirUtf8Length + 1];
env->GetStringUTFRegion(sourceDir, 0, env->GetStringLength(sourceDir), sourceDirChars);
sourceDirChars[sourceDirUtf8Length] = '\0';
DictionaryStructureWithBufferPolicy::StructurePolicyPtr dictionaryStructureWithBufferPolicy(
DictionaryStructureWithBufferPolicyFactory::newPolicyForExistingDictFile(
sourceDirChars, static_cast<int>(dictOffset), static_cast<int>(dictSize),
isUpdatable == JNI_TRUE));
if (!dictionaryStructureWithBufferPolicy) {
return 0;
}
Dictionary *const dictionary =
new Dictionary(env, std::move(dictionaryStructureWithBufferPolicy));
PROF_END(66);
PROF_CLOSE;
return reinterpret_cast<jlong>(dictionary);
}
static jlong latinime_BinaryDictionary_createOnMemory(JNIEnv *env, jclass clazz,
jlong formatVersion, jstring locale, jobjectArray attributeKeyStringArray,
jobjectArray attributeValueStringArray) {
const jsize localeUtf8Length = env->GetStringUTFLength(locale);
char localeChars[localeUtf8Length + 1];
env->GetStringUTFRegion(locale, 0, env->GetStringLength(locale), localeChars);
localeChars[localeUtf8Length] = '\0';
std::vector<int> localeCodePoints;
HeaderReadWriteUtils::insertCharactersIntoVector(localeChars, &localeCodePoints);
const int keyCount = env->GetArrayLength(attributeKeyStringArray);
const int valueCount = env->GetArrayLength(attributeValueStringArray);
if (keyCount != valueCount) {
return false;
}
DictionaryHeaderStructurePolicy::AttributeMap attributeMap =
JniDataUtils::constructAttributeMap(env, attributeKeyStringArray,
attributeValueStringArray);
DictionaryStructureWithBufferPolicy::StructurePolicyPtr dictionaryStructureWithBufferPolicy =
DictionaryStructureWithBufferPolicyFactory::newPolicyForOnMemoryDict(
formatVersion, localeCodePoints, &attributeMap);
if (!dictionaryStructureWithBufferPolicy) {
return 0;
}
Dictionary *const dictionary =
new Dictionary(env, std::move(dictionaryStructureWithBufferPolicy));
return reinterpret_cast<jlong>(dictionary);
}
static bool latinime_BinaryDictionary_flush(JNIEnv *env, jclass clazz, jlong dict,
jstring filePath) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return false;
const jsize filePathUtf8Length = env->GetStringUTFLength(filePath);
char filePathChars[filePathUtf8Length + 1];
env->GetStringUTFRegion(filePath, 0, env->GetStringLength(filePath), filePathChars);
filePathChars[filePathUtf8Length] = '\0';
return dictionary->flush(filePathChars);
}
static bool latinime_BinaryDictionary_needsToRunGC(JNIEnv *env, jclass clazz,
jlong dict, jboolean mindsBlockByGC) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return false;
return dictionary->needsToRunGC(mindsBlockByGC == JNI_TRUE);
}
static bool latinime_BinaryDictionary_flushWithGC(JNIEnv *env, jclass clazz, jlong dict,
jstring filePath) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return false;
const jsize filePathUtf8Length = env->GetStringUTFLength(filePath);
char filePathChars[filePathUtf8Length + 1];
env->GetStringUTFRegion(filePath, 0, env->GetStringLength(filePath), filePathChars);
filePathChars[filePathUtf8Length] = '\0';
return dictionary->flushWithGC(filePathChars);
}
static void latinime_BinaryDictionary_close(JNIEnv *env, jclass clazz, jlong dict) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return;
delete dictionary;
}
static void latinime_BinaryDictionary_getHeaderInfo(JNIEnv *env, jclass clazz, jlong dict,
jintArray outHeaderSize, jintArray outFormatVersion, jobject outAttributeKeys,
jobject outAttributeValues) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return;
const DictionaryHeaderStructurePolicy *const headerPolicy =
dictionary->getDictionaryStructurePolicy()->getHeaderStructurePolicy();
JniDataUtils::putIntToArray(env, outHeaderSize, 0 /* index */, headerPolicy->getSize());
JniDataUtils::putIntToArray(env, outFormatVersion, 0 /* index */,
headerPolicy->getFormatVersionNumber());
// Output attribute map
jclass arrayListClass = env->FindClass("java/util/ArrayList");
jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z");
const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap =
headerPolicy->getAttributeMap();
for (DictionaryHeaderStructurePolicy::AttributeMap::const_iterator it = attributeMap->begin();
it != attributeMap->end(); ++it) {
// Output key
jintArray keyCodePointArray = env->NewIntArray(it->first.size());
JniDataUtils::outputCodePoints(env, keyCodePointArray, 0 /* start */,
it->first.size(), it->first.data(), it->first.size(),
false /* needsNullTermination */);
env->CallBooleanMethod(outAttributeKeys, addMethodId, keyCodePointArray);
env->DeleteLocalRef(keyCodePointArray);
// Output value
jintArray valueCodePointArray = env->NewIntArray(it->second.size());
JniDataUtils::outputCodePoints(env, valueCodePointArray, 0 /* start */,
it->second.size(), it->second.data(), it->second.size(),
false /* needsNullTermination */);
env->CallBooleanMethod(outAttributeValues, addMethodId, valueCodePointArray);
env->DeleteLocalRef(valueCodePointArray);
}
env->DeleteLocalRef(arrayListClass);
return;
}
static int latinime_BinaryDictionary_getFormatVersion(JNIEnv *env, jclass clazz, jlong dict) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return 0;
const DictionaryHeaderStructurePolicy *const headerPolicy =
dictionary->getDictionaryStructurePolicy()->getHeaderStructurePolicy();
return headerPolicy->getFormatVersionNumber();
}
static void latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, jlong dict,
jlong proximityInfo, jlong dicTraverseSession, jintArray xCoordinatesArray,
jintArray yCoordinatesArray, jintArray timesArray, jintArray pointerIdsArray,
jintArray inputCodePointsArray, jint inputSize, jintArray suggestOptions,
jobjectArray prevWordCodePointArrays, jbooleanArray isBeginningOfSentenceArray,
jint prevWordCount, jintArray outSuggestionCount, jintArray outCodePointsArray,
jintArray outScoresArray, jintArray outSpaceIndicesArray, jintArray outTypesArray,
jintArray outAutoCommitFirstWordConfidenceArray,
jfloatArray inOutWeightOfLangModelVsSpatialModel) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
// Assign 0 to outSuggestionCount here in case of returning earlier in this method.
JniDataUtils::putIntToArray(env, outSuggestionCount, 0 /* index */, 0);
if (!dictionary) {
return;
}
ProximityInfo *pInfo = reinterpret_cast<ProximityInfo *>(proximityInfo);
DicTraverseSession *traverseSession =
reinterpret_cast<DicTraverseSession *>(dicTraverseSession);
if (!traverseSession) {
return;
}
// Input values
int xCoordinates[inputSize];
int yCoordinates[inputSize];
int times[inputSize];
int pointerIds[inputSize];
const jsize inputCodePointsLength = env->GetArrayLength(inputCodePointsArray);
int inputCodePoints[inputCodePointsLength];
env->GetIntArrayRegion(xCoordinatesArray, 0, inputSize, xCoordinates);
env->GetIntArrayRegion(yCoordinatesArray, 0, inputSize, yCoordinates);
env->GetIntArrayRegion(timesArray, 0, inputSize, times);
env->GetIntArrayRegion(pointerIdsArray, 0, inputSize, pointerIds);
env->GetIntArrayRegion(inputCodePointsArray, 0, inputCodePointsLength, inputCodePoints);
const jsize numberOfOptions = env->GetArrayLength(suggestOptions);
int options[numberOfOptions];
env->GetIntArrayRegion(suggestOptions, 0, numberOfOptions, options);
SuggestOptions givenSuggestOptions(options, numberOfOptions);
// Output values
/* By the way, let's check the output array length here to make sure */
const jsize outputCodePointsLength = env->GetArrayLength(outCodePointsArray);
if (outputCodePointsLength != (MAX_WORD_LENGTH * MAX_RESULTS)) {
AKLOGE("Invalid outputCodePointsLength: %d", outputCodePointsLength);
ASSERT(false);
return;
}
const jsize scoresLength = env->GetArrayLength(outScoresArray);
if (scoresLength != MAX_RESULTS) {
AKLOGE("Invalid scoresLength: %d", scoresLength);
ASSERT(false);
return;
}
const jsize outputAutoCommitFirstWordConfidenceLength =
env->GetArrayLength(outAutoCommitFirstWordConfidenceArray);
ASSERT(outputAutoCommitFirstWordConfidenceLength == 1);
if (outputAutoCommitFirstWordConfidenceLength != 1) {
// We only use the first result, as obviously we will only ever autocommit the first one
AKLOGE("Invalid outputAutoCommitFirstWordConfidenceLength: %d",
outputAutoCommitFirstWordConfidenceLength);
ASSERT(false);
return;
}
float weightOfLangModelVsSpatialModel;
env->GetFloatArrayRegion(inOutWeightOfLangModelVsSpatialModel, 0, 1 /* len */,
&weightOfLangModelVsSpatialModel);
SuggestionResults suggestionResults(MAX_RESULTS);
const NgramContext ngramContext = JniDataUtils::constructNgramContext(env,
prevWordCodePointArrays, isBeginningOfSentenceArray, prevWordCount);
if (givenSuggestOptions.isGesture() || inputSize > 0) {
// TODO: Use SuggestionResults to return suggestions.
dictionary->getSuggestions(pInfo, traverseSession, xCoordinates, yCoordinates,
times, pointerIds, inputCodePoints, inputSize, &ngramContext,
&givenSuggestOptions, weightOfLangModelVsSpatialModel, &suggestionResults);
} else {
dictionary->getPredictions(&ngramContext, &suggestionResults);
}
if (DEBUG_DICT) {
suggestionResults.dumpSuggestions();
}
suggestionResults.outputSuggestions(env, outSuggestionCount, outCodePointsArray,
outScoresArray, outSpaceIndicesArray, outTypesArray,
outAutoCommitFirstWordConfidenceArray, inOutWeightOfLangModelVsSpatialModel);
}
static jint latinime_BinaryDictionary_getProbability(JNIEnv *env, jclass clazz, jlong dict,
jintArray word) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return NOT_A_PROBABILITY;
const jsize codePointCount = env->GetArrayLength(word);
int codePoints[codePointCount];
env->GetIntArrayRegion(word, 0, codePointCount, codePoints);
return dictionary->getProbability(CodePointArrayView(codePoints, codePointCount));
}
static jint latinime_BinaryDictionary_getMaxProbabilityOfExactMatches(
JNIEnv *env, jclass clazz, jlong dict, jintArray word) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return NOT_A_PROBABILITY;
const jsize codePointCount = env->GetArrayLength(word);
int codePoints[codePointCount];
env->GetIntArrayRegion(word, 0, codePointCount, codePoints);
return dictionary->getMaxProbabilityOfExactMatches(
CodePointArrayView(codePoints, codePointCount));
}
static jint latinime_BinaryDictionary_getNgramProbability(JNIEnv *env, jclass clazz,
jlong dict, jobjectArray prevWordCodePointArrays, jbooleanArray isBeginningOfSentenceArray,
jintArray word) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return JNI_FALSE;
const jsize wordLength = env->GetArrayLength(word);
int wordCodePoints[wordLength];
env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
const NgramContext ngramContext = JniDataUtils::constructNgramContext(env,
prevWordCodePointArrays, isBeginningOfSentenceArray,
env->GetArrayLength(prevWordCodePointArrays));
return dictionary->getNgramProbability(&ngramContext,
CodePointArrayView(wordCodePoints, wordLength));
}
// Method to iterate all words in the dictionary for makedict.
// If token is 0, this method newly starts iterating the dictionary. This method returns 0 when
// the dictionary does not have a next word.
static jint latinime_BinaryDictionary_getNextWord(JNIEnv *env, jclass clazz,
jlong dict, jint token, jintArray outCodePoints, jbooleanArray outIsBeginningOfSentence) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return 0;
const jsize codePointBufSize = env->GetArrayLength(outCodePoints);
if (codePointBufSize != MAX_WORD_LENGTH) {
AKLOGE("Invalid outCodePointsLength: %d", codePointBufSize);
ASSERT(false);
return 0;
}
int wordCodePoints[codePointBufSize];
int wordCodePointCount = 0;
const int nextToken = dictionary->getNextWordAndNextToken(token, wordCodePoints,
&wordCodePointCount);
JniDataUtils::outputCodePoints(env, outCodePoints, 0 /* start */,
MAX_WORD_LENGTH /* maxLength */, wordCodePoints, wordCodePointCount,
false /* needsNullTermination */);
bool isBeginningOfSentence = false;
if (wordCodePointCount > 0 && wordCodePoints[0] == CODE_POINT_BEGINNING_OF_SENTENCE) {
isBeginningOfSentence = true;
}
JniDataUtils::putBooleanToArray(env, outIsBeginningOfSentence, 0 /* index */,
isBeginningOfSentence);
return nextToken;
}
static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz,
jlong dict, jintArray word, jboolean isBeginningOfSentence, jintArray outCodePoints,
jbooleanArray outFlags, jintArray outProbabilityInfo, jobject outNgramPrevWordsArray,
jobject outNgramPrevWordIsBeginningOfSentenceArray, jobject outNgramTargets,
jobject outNgramProbabilityInfo, jobject outShortcutTargets,
jobject outShortcutProbabilities) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return;
const jsize wordLength = env->GetArrayLength(word);
if (wordLength > MAX_WORD_LENGTH) {
AKLOGE("Invalid wordLength: %d", wordLength);
return;
}
int wordCodePoints[MAX_WORD_LENGTH];
env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
int codePointCount = wordLength;
if (isBeginningOfSentence) {
codePointCount = CharUtils::attachBeginningOfSentenceMarker(
wordCodePoints, wordLength, MAX_WORD_LENGTH);
if (codePointCount < 0) {
AKLOGE("Cannot attach Beginning-of-Sentence marker.");
return;
}
}
const WordProperty wordProperty = dictionary->getWordProperty(
CodePointArrayView(wordCodePoints, codePointCount));
wordProperty.outputProperties(env, outCodePoints, outFlags, outProbabilityInfo,
outNgramPrevWordsArray, outNgramPrevWordIsBeginningOfSentenceArray,
outNgramTargets, outNgramProbabilityInfo, outShortcutTargets,
outShortcutProbabilities);
}
static bool latinime_BinaryDictionary_addUnigramEntry(JNIEnv *env, jclass clazz, jlong dict,
jintArray word, jint probability, jintArray shortcutTarget, jint shortcutProbability,
jboolean isBeginningOfSentence, jboolean isNotAWord, jboolean isPossiblyOffensive,
jint timestamp) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return false;
}
jsize codePointCount = env->GetArrayLength(word);
int codePoints[codePointCount];
env->GetIntArrayRegion(word, 0, codePointCount, codePoints);
std::vector<UnigramProperty::ShortcutProperty> shortcuts;
{
std::vector<int> shortcutTargetCodePoints;
JniDataUtils::jintarrayToVector(env, shortcutTarget, &shortcutTargetCodePoints);
if (!shortcutTargetCodePoints.empty()) {
shortcuts.emplace_back(std::move(shortcutTargetCodePoints), shortcutProbability);
}
}
// Use 1 for count to indicate the word has inputted.
const UnigramProperty unigramProperty(isBeginningOfSentence, isNotAWord,
isPossiblyOffensive, probability, HistoricalInfo(timestamp, 0 /* level */,
1 /* count */), std::move(shortcuts));
return dictionary->addUnigramEntry(CodePointArrayView(codePoints, codePointCount),
&unigramProperty);
}
static bool latinime_BinaryDictionary_removeUnigramEntry(JNIEnv *env, jclass clazz, jlong dict,
jintArray word) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return false;
}
jsize codePointCount = env->GetArrayLength(word);
int codePoints[codePointCount];
env->GetIntArrayRegion(word, 0, codePointCount, codePoints);
return dictionary->removeUnigramEntry(CodePointArrayView(codePoints, codePointCount));
}
static bool latinime_BinaryDictionary_addNgramEntry(JNIEnv *env, jclass clazz, jlong dict,
jobjectArray prevWordCodePointArrays, jbooleanArray isBeginningOfSentenceArray,
jintArray word, jint probability, jint timestamp) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return false;
}
const NgramContext ngramContext = JniDataUtils::constructNgramContext(env,
prevWordCodePointArrays, isBeginningOfSentenceArray,
env->GetArrayLength(prevWordCodePointArrays));
jsize wordLength = env->GetArrayLength(word);
int wordCodePoints[wordLength];
env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
// Use 1 for count to indicate the ngram has inputted.
const NgramProperty ngramProperty(ngramContext,
CodePointArrayView(wordCodePoints, wordLength).toVector(),
probability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */));
return dictionary->addNgramEntry(&ngramProperty);
}
static bool latinime_BinaryDictionary_removeNgramEntry(JNIEnv *env, jclass clazz, jlong dict,
jobjectArray prevWordCodePointArrays, jbooleanArray isBeginningOfSentenceArray,
jintArray word) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return false;
}
const NgramContext ngramContext = JniDataUtils::constructNgramContext(env,
prevWordCodePointArrays, isBeginningOfSentenceArray,
env->GetArrayLength(prevWordCodePointArrays));
jsize codePointCount = env->GetArrayLength(word);
int wordCodePoints[codePointCount];
env->GetIntArrayRegion(word, 0, codePointCount, wordCodePoints);
return dictionary->removeNgramEntry(&ngramContext,
CodePointArrayView(wordCodePoints, codePointCount));
}
static bool latinime_BinaryDictionary_updateEntriesForWordWithNgramContext(JNIEnv *env,
jclass clazz, jlong dict, jobjectArray prevWordCodePointArrays,
jbooleanArray isBeginningOfSentenceArray, jintArray word, jboolean isValidWord, jint count,
jint timestamp) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return false;
}
const NgramContext ngramContext = JniDataUtils::constructNgramContext(env,
prevWordCodePointArrays, isBeginningOfSentenceArray,
env->GetArrayLength(prevWordCodePointArrays));
jsize codePointCount = env->GetArrayLength(word);
int wordCodePoints[codePointCount];
env->GetIntArrayRegion(word, 0, codePointCount, wordCodePoints);
const HistoricalInfo historicalInfo(timestamp, 0 /* level */, count);
return dictionary->updateEntriesForWordWithNgramContext(&ngramContext,
CodePointArrayView(wordCodePoints, codePointCount), isValidWord == JNI_TRUE,
historicalInfo);
}
// Returns how many input events are processed.
static int latinime_BinaryDictionary_updateEntriesForInputEvents(JNIEnv *env, jclass clazz,
jlong dict, jobjectArray inputEvents, jint startIndex) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return 0;
}
jsize inputEventCount = env->GetArrayLength(inputEvents);
if (inputEventCount == 0 || startIndex >= inputEventCount) {
return 0;
}
jobject inputEvent = env->GetObjectArrayElement(inputEvents, 0);
jclass wordInputEventClass = env->GetObjectClass(inputEvent);
env->DeleteLocalRef(inputEvent);
jfieldID targetWordFieldId = env->GetFieldID(wordInputEventClass, "mTargetWord", "[I");
jfieldID prevWordCountFieldId = env->GetFieldID(wordInputEventClass, "mPrevWordsCount", "I");
jfieldID prevWordArrayFieldId = env->GetFieldID(wordInputEventClass, "mPrevWordArray", "[[I");
jfieldID isPrevWordBoSArrayFieldId =
env->GetFieldID(wordInputEventClass, "mIsPrevWordBeginningOfSentenceArray", "[Z");
jfieldID isValidFieldId = env->GetFieldID(wordInputEventClass, "mIsValid", "Z");
jfieldID timestampFieldId = env->GetFieldID(wordInputEventClass, "mTimestamp", "I");
env->DeleteLocalRef(wordInputEventClass);
for (int i = startIndex; i < inputEventCount; ++i) {
jobject inputEvent = env->GetObjectArrayElement(inputEvents, i);
jintArray targetWord = static_cast<jintArray>(
env->GetObjectField(inputEvent, targetWordFieldId));
jsize wordLength = env->GetArrayLength(targetWord);
int wordCodePoints[wordLength];
env->GetIntArrayRegion(targetWord, 0, wordLength, wordCodePoints);
env->DeleteLocalRef(targetWord);
jint prevWordCount = env->GetIntField(inputEvent, prevWordCountFieldId);
jobjectArray prevWordArray =
static_cast<jobjectArray>(env->GetObjectField(inputEvent, prevWordArrayFieldId));
jbooleanArray isPrevWordBeginningOfSentenceArray = static_cast<jbooleanArray>(
env->GetObjectField(inputEvent, isPrevWordBoSArrayFieldId));
jboolean isValid = env->GetBooleanField(inputEvent, isValidFieldId);
jint timestamp = env->GetIntField(inputEvent, timestampFieldId);
const NgramContext ngramContext = JniDataUtils::constructNgramContext(env,
prevWordArray, isPrevWordBeginningOfSentenceArray, prevWordCount);
// Use 1 for count to indicate the word has inputted.
dictionary->updateEntriesForWordWithNgramContext(&ngramContext,
CodePointArrayView(wordCodePoints, wordLength), isValid,
HistoricalInfo(timestamp, 0 /* level */, 1 /* count */));
if (dictionary->needsToRunGC(true /* mindsBlockByGC */)) {
return i + 1;
}
env->DeleteLocalRef(prevWordArray);
env->DeleteLocalRef(isPrevWordBeginningOfSentenceArray);
env->DeleteLocalRef(inputEvent);
}
return inputEventCount;
}
static jstring latinime_BinaryDictionary_getProperty(JNIEnv *env, jclass clazz, jlong dict,
jstring query) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return env->NewStringUTF("");
}
const jsize queryUtf8Length = env->GetStringUTFLength(query);
char queryChars[queryUtf8Length + 1];
env->GetStringUTFRegion(query, 0, env->GetStringLength(query), queryChars);
queryChars[queryUtf8Length] = '\0';
static const int GET_PROPERTY_RESULT_LENGTH = 100;
char resultChars[GET_PROPERTY_RESULT_LENGTH];
resultChars[0] = '\0';
dictionary->getProperty(queryChars, queryUtf8Length, resultChars, GET_PROPERTY_RESULT_LENGTH);
return env->NewStringUTF(resultChars);
}
static bool latinime_BinaryDictionary_isCorruptedNative(JNIEnv *env, jclass clazz, jlong dict) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return false;
}
return dictionary->getDictionaryStructurePolicy()->isCorrupted();
}
static DictionaryStructureWithBufferPolicy::StructurePolicyPtr runGCAndGetNewStructurePolicy(
DictionaryStructureWithBufferPolicy::StructurePolicyPtr structurePolicy,
const char *const dictFilePath) {
structurePolicy->flushWithGC(dictFilePath);
structurePolicy.release();
return DictionaryStructureWithBufferPolicyFactory::newPolicyForExistingDictFile(
dictFilePath, 0 /* offset */, 0 /* size */, true /* isUpdatable */);
}
static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, jlong dict,
jstring dictFilePath, jlong newFormatVersion) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return false;
}
const jsize filePathUtf8Length = env->GetStringUTFLength(dictFilePath);
char dictFilePathChars[filePathUtf8Length + 1];
env->GetStringUTFRegion(dictFilePath, 0, env->GetStringLength(dictFilePath), dictFilePathChars);
dictFilePathChars[filePathUtf8Length] = '\0';
const DictionaryHeaderStructurePolicy *const headerPolicy =
dictionary->getDictionaryStructurePolicy()->getHeaderStructurePolicy();
DictionaryStructureWithBufferPolicy::StructurePolicyPtr dictionaryStructureWithBufferPolicy =
DictionaryStructureWithBufferPolicyFactory::newPolicyForOnMemoryDict(
newFormatVersion, *headerPolicy->getLocale(), headerPolicy->getAttributeMap());
if (!dictionaryStructureWithBufferPolicy) {
LogUtils::logToJava(env, "Cannot migrate header.");
return false;
}
int wordCodePoints[MAX_WORD_LENGTH];
int wordCodePointCount = 0;
int token = 0;
// Add unigrams.
do {
token = dictionary->getNextWordAndNextToken(token, wordCodePoints, &wordCodePointCount);
const WordProperty wordProperty = dictionary->getWordProperty(
CodePointArrayView(wordCodePoints, wordCodePointCount));
if (wordCodePoints[0] == CODE_POINT_BEGINNING_OF_SENTENCE) {
// Skip beginning-of-sentence unigram.
continue;
}
if (dictionaryStructureWithBufferPolicy->needsToRunGC(true /* mindsBlockByGC */)) {
dictionaryStructureWithBufferPolicy = runGCAndGetNewStructurePolicy(
std::move(dictionaryStructureWithBufferPolicy), dictFilePathChars);
if (!dictionaryStructureWithBufferPolicy) {
LogUtils::logToJava(env, "Cannot open dict after GC.");
return false;
}
}
if (!dictionaryStructureWithBufferPolicy->addUnigramEntry(
CodePointArrayView(wordCodePoints, wordCodePointCount),
wordProperty.getUnigramProperty())) {
LogUtils::logToJava(env, "Cannot add unigram to the new dict.");
return false;
}
} while (token != 0);
// Add ngrams.
do {
token = dictionary->getNextWordAndNextToken(token, wordCodePoints, &wordCodePointCount);
const WordProperty wordProperty = dictionary->getWordProperty(
CodePointArrayView(wordCodePoints, wordCodePointCount));
if (dictionaryStructureWithBufferPolicy->needsToRunGC(true /* mindsBlockByGC */)) {
dictionaryStructureWithBufferPolicy = runGCAndGetNewStructurePolicy(
std::move(dictionaryStructureWithBufferPolicy), dictFilePathChars);
if (!dictionaryStructureWithBufferPolicy) {
LogUtils::logToJava(env, "Cannot open dict after GC.");
return false;
}
}
for (const NgramProperty &ngramProperty : *wordProperty.getNgramProperties()) {
if (!dictionaryStructureWithBufferPolicy->addNgramEntry(&ngramProperty)) {
LogUtils::logToJava(env, "Cannot add ngram to the new dict.");
return false;
}
}
} while (token != 0);
// Save to File.
dictionaryStructureWithBufferPolicy->flushWithGC(dictFilePathChars);
return true;
}
static const JNINativeMethod sMethods[] = {
{
const_cast<char *>("openNative"),
const_cast<char *>("(Ljava/lang/String;JJZ)J"),
reinterpret_cast<void *>(latinime_BinaryDictionary_open)
},
{
const_cast<char *>("createOnMemoryNative"),
const_cast<char *>("(JLjava/lang/String;[Ljava/lang/String;[Ljava/lang/String;)J"),
reinterpret_cast<void *>(latinime_BinaryDictionary_createOnMemory)
},
{
const_cast<char *>("closeNative"),
const_cast<char *>("(J)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_close)
},
{
const_cast<char *>("getFormatVersionNative"),
const_cast<char *>("(J)I"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getFormatVersion)
},
{
const_cast<char *>("getHeaderInfoNative"),
const_cast<char *>("(J[I[ILjava/util/ArrayList;Ljava/util/ArrayList;)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getHeaderInfo)
},
{
const_cast<char *>("flushNative"),
const_cast<char *>("(JLjava/lang/String;)Z"),
reinterpret_cast<void *>(latinime_BinaryDictionary_flush)
},
{
const_cast<char *>("needsToRunGCNative"),
const_cast<char *>("(JZ)Z"),
reinterpret_cast<void *>(latinime_BinaryDictionary_needsToRunGC)
},
{
const_cast<char *>("flushWithGCNative"),
const_cast<char *>("(JLjava/lang/String;)Z"),
reinterpret_cast<void *>(latinime_BinaryDictionary_flushWithGC)
},
{
const_cast<char *>("getSuggestionsNative"),
const_cast<char *>("(JJJ[I[I[I[I[II[I[[I[ZI[I[I[I[I[I[I[F)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getSuggestions)
},
{
const_cast<char *>("getProbabilityNative"),
const_cast<char *>("(J[I)I"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getProbability)
},
{
const_cast<char *>("getMaxProbabilityOfExactMatchesNative"),
const_cast<char *>("(J[I)I"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getMaxProbabilityOfExactMatches)
},
{
const_cast<char *>("getNgramProbabilityNative"),
const_cast<char *>("(J[[I[Z[I)I"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getNgramProbability)
},
{
const_cast<char *>("getWordPropertyNative"),
const_cast<char *>("(J[IZ[I[Z[ILjava/util/ArrayList;Ljava/util/ArrayList;"
"Ljava/util/ArrayList;Ljava/util/ArrayList;Ljava/util/ArrayList;"
"Ljava/util/ArrayList;)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getWordProperty)
},
{
const_cast<char *>("getNextWordNative"),
const_cast<char *>("(JI[I[Z)I"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getNextWord)
},
{
const_cast<char *>("addUnigramEntryNative"),
const_cast<char *>("(J[II[IIZZZI)Z"),
reinterpret_cast<void *>(latinime_BinaryDictionary_addUnigramEntry)
},
{
const_cast<char *>("removeUnigramEntryNative"),
const_cast<char *>("(J[I)Z"),
reinterpret_cast<void *>(latinime_BinaryDictionary_removeUnigramEntry)
},
{
const_cast<char *>("addNgramEntryNative"),
const_cast<char *>("(J[[I[Z[III)Z"),
reinterpret_cast<void *>(latinime_BinaryDictionary_addNgramEntry)
},
{
const_cast<char *>("removeNgramEntryNative"),
const_cast<char *>("(J[[I[Z[I)Z"),
reinterpret_cast<void *>(latinime_BinaryDictionary_removeNgramEntry)
},
{
const_cast<char *>("updateEntriesForWordWithNgramContextNative"),
const_cast<char *>("(J[[I[Z[IZII)Z"),
reinterpret_cast<void *>(latinime_BinaryDictionary_updateEntriesForWordWithNgramContext)
},
{
const_cast<char *>("updateEntriesForInputEventsNative"),
const_cast<char *>(
"(J[Lcom/android/inputmethod/latin/utils/WordInputEventForPersonalization;I)I"),
reinterpret_cast<void *>(latinime_BinaryDictionary_updateEntriesForInputEvents)
},
{
const_cast<char *>("getPropertyNative"),
const_cast<char *>("(JLjava/lang/String;)Ljava/lang/String;"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getProperty)
},
{
const_cast<char *>("isCorruptedNative"),
const_cast<char *>("(J)Z"),
reinterpret_cast<void *>(latinime_BinaryDictionary_isCorruptedNative)
},
{
const_cast<char *>("migrateNative"),
const_cast<char *>("(JLjava/lang/String;J)Z"),
reinterpret_cast<void *>(latinime_BinaryDictionary_migrateNative)
}
};
int register_BinaryDictionary(JNIEnv *env) {
const char *const kClassPathName = "com/android/inputmethod/latin/BinaryDictionary";
return registerNativeMethods(env, kClassPathName, sMethods, NELEMS(sMethods));
}
} // namespace latinime