From 1adca93381d261a6070be2721dbf8b8abafbfe01 Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Fri, 23 May 2014 19:58:58 +0900 Subject: [PATCH] Extend jni methods and enable Beginning-of-Sentence. Bug: 14119293 Change-Id: I78fc877367dd0d6240eeacb750b6d2d0b93cba83 --- .../inputmethod/latin/BinaryDictionary.java | 46 ++++++----- .../latin/ExpandableBinaryDictionary.java | 2 +- .../inputmethod/latin/PrevWordsInfo.java | 8 +- .../latin/makedict/WordProperty.java | 4 + ...oid_inputmethod_latin_BinaryDictionary.cpp | 38 ++++----- .../dynamic_pt_gc_event_listeners.cpp | 2 +- .../latin/BinaryDictionaryDecayingTests.java | 61 +++++++++++++- .../latin/BinaryDictionaryTests.java | 79 +++++++++++++++---- .../latin/makedict/Ver4DictEncoder.java | 8 +- 9 files changed, 186 insertions(+), 62 deletions(-) diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java index e7ab02ac1..b77540622 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java @@ -191,7 +191,8 @@ public final class BinaryDictionary extends Dictionary { private static native void closeNative(long dict); private static native int getFormatVersionNative(long dict); private static native int getProbabilityNative(long dict, int[] word); - private static native int getBigramProbabilityNative(long dict, int[] word0, int[] word1); + private static native int getBigramProbabilityNative(long dict, int[] word0, + boolean isBeginningOfSentence, int[] word1); private static native void getWordPropertyNative(long dict, int[] word, int[] outCodePoints, boolean[] outFlags, int[] outProbabilityInfo, ArrayList outBigramTargets, ArrayList outBigramProbabilityInfo, @@ -200,15 +201,17 @@ public final class BinaryDictionary extends Dictionary { private static native void getSuggestionsNative(long dict, long proximityInfo, long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times, int[] pointerIds, int[] inputCodePoints, int inputSize, int[] suggestOptions, - int[] prevWordCodePointArray, int[] outputSuggestionCount, int[] outputCodePoints, - int[] outputScores, int[] outputIndices, int[] outputTypes, - int[] outputAutoCommitFirstWordConfidence, float[] inOutLanguageWeight); + int[] prevWordCodePointArray, boolean isBeginningOfSentence, + int[] outputSuggestionCount, int[] outputCodePoints, int[] outputScores, + int[] outputIndices, int[] outputTypes, int[] outputAutoCommitFirstWordConfidence, + float[] inOutLanguageWeight); private static native void addUnigramWordNative(long dict, int[] word, int probability, - int[] shortcutTarget, int shortcutProbability, boolean isNotAWord, - boolean isBlacklisted, int timestamp); - private static native void addBigramWordsNative(long dict, int[] word0, int[] word1, - int probability, int timestamp); - private static native void removeBigramWordsNative(long dict, int[] word0, int[] word1); + int[] shortcutTarget, int shortcutProbability, boolean isBeginningOfSentence, + boolean isNotAWord, boolean isBlacklisted, int timestamp); + private static native void addBigramWordsNative(long dict, int[] word0, + boolean isBeginningOfSentence, int[] word1, int probability, int timestamp); + private static native void removeBigramWordsNative(long dict, int[] word0, + boolean isBeginningOfSentence, int[] word1); private static native int addMultipleDictionaryEntriesNative(long dict, LanguageModelParam[] languageModelParams, int startIndex); private static native String getPropertyNative(long dict, String query); @@ -301,7 +304,8 @@ public final class BinaryDictionary extends Dictionary { getTraverseSession(sessionId).getSession(), inputPointers.getXCoordinates(), inputPointers.getYCoordinates(), inputPointers.getTimes(), inputPointers.getPointerIds(), mInputCodePoints, inputSize, - mNativeSuggestOptions.getOptions(), prevWordCodePointArray, mOutputSuggestionCount, + mNativeSuggestOptions.getOptions(), prevWordCodePointArray, + prevWordsInfo.mIsBeginningOfSentence, mOutputSuggestionCount, mOutputCodePoints, mOutputScores, mSpaceIndices, mOutputTypes, mOutputAutoCommitFirstWordConfidence, mInputOutputLanguageWeight); if (inOutLanguageWeight != null) { @@ -364,12 +368,13 @@ public final class BinaryDictionary extends Dictionary { } public int getNgramProbability(final PrevWordsInfo prevWordsInfo, final String word) { - if (TextUtils.isEmpty(prevWordsInfo.mPrevWord) || TextUtils.isEmpty(word)) { + if (!prevWordsInfo.isValid() || TextUtils.isEmpty(word)) { return NOT_A_PROBABILITY; } final int[] codePoints0 = StringUtils.toCodePointArray(prevWordsInfo.mPrevWord); final int[] codePoints1 = StringUtils.toCodePointArray(word); - return getBigramProbabilityNative(mNativeDict, codePoints0, codePoints1); + return getBigramProbabilityNative(mNativeDict, codePoints0, + prevWordsInfo.mIsBeginningOfSentence, codePoints1); } public WordProperty getWordProperty(final String word) { @@ -420,16 +425,17 @@ public final class BinaryDictionary extends Dictionary { // Add a unigram entry to binary dictionary with unigram attributes in native code. public void addUnigramEntry(final String word, final int probability, - final String shortcutTarget, final int shortcutProbability, final boolean isNotAWord, + final String shortcutTarget, final int shortcutProbability, + final boolean isBeginningOfSentence, final boolean isNotAWord, final boolean isBlacklisted, final int timestamp) { - if (TextUtils.isEmpty(word)) { + if (word == null || (word.isEmpty() && !isBeginningOfSentence)) { return; } final int[] codePoints = StringUtils.toCodePointArray(word); final int[] shortcutTargetCodePoints = (shortcutTarget != null) ? StringUtils.toCodePointArray(shortcutTarget) : null; addUnigramWordNative(mNativeDict, codePoints, probability, shortcutTargetCodePoints, - shortcutProbability, isNotAWord, isBlacklisted, timestamp); + shortcutProbability, isBeginningOfSentence, isNotAWord, isBlacklisted, timestamp); mHasUpdated = true; } @@ -437,23 +443,25 @@ public final class BinaryDictionary extends Dictionary { public void addNgramEntry(final PrevWordsInfo prevWordsInfo, final String word, final int probability, final int timestamp) { - if (TextUtils.isEmpty(prevWordsInfo.mPrevWord) || TextUtils.isEmpty(word)) { + if (!prevWordsInfo.isValid() || TextUtils.isEmpty(word)) { return; } final int[] codePoints0 = StringUtils.toCodePointArray(prevWordsInfo.mPrevWord); final int[] codePoints1 = StringUtils.toCodePointArray(word); - addBigramWordsNative(mNativeDict, codePoints0, codePoints1, probability, timestamp); + addBigramWordsNative(mNativeDict, codePoints0, prevWordsInfo.mIsBeginningOfSentence, + codePoints1, probability, timestamp); mHasUpdated = true; } // Remove an n-gram entry from the binary dictionary in native code. public void removeNgramEntry(final PrevWordsInfo prevWordsInfo, final String word) { - if (TextUtils.isEmpty(prevWordsInfo.mPrevWord) || TextUtils.isEmpty(word)) { + if (!prevWordsInfo.isValid() || TextUtils.isEmpty(word)) { return; } final int[] codePoints0 = StringUtils.toCodePointArray(prevWordsInfo.mPrevWord); final int[] codePoints1 = StringUtils.toCodePointArray(word); - removeBigramWordsNative(mNativeDict, codePoints0, codePoints1); + removeBigramWordsNative(mNativeDict, codePoints0, prevWordsInfo.mIsBeginningOfSentence, + codePoints1); mHasUpdated = true; } diff --git a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java index d67253c3b..334ebb37d 100644 --- a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java @@ -292,7 +292,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { final String shortcutTarget, final int shortcutFreq, final boolean isNotAWord, final boolean isBlacklisted, final int timestamp) { mBinaryDictionary.addUnigramEntry(word, frequency, shortcutTarget, shortcutFreq, - isNotAWord, isBlacklisted, timestamp); + false /* isBeginningOfSentence */, isNotAWord, isBlacklisted, timestamp); } /** diff --git a/java/src/com/android/inputmethod/latin/PrevWordsInfo.java b/java/src/com/android/inputmethod/latin/PrevWordsInfo.java index ecc8947db..3494d16f7 100644 --- a/java/src/com/android/inputmethod/latin/PrevWordsInfo.java +++ b/java/src/com/android/inputmethod/latin/PrevWordsInfo.java @@ -20,6 +20,8 @@ import android.util.Log; // TODO: Support multiple previous words for n-gram. public class PrevWordsInfo { + public static final PrevWordsInfo BEGINNING_OF_SENTENCE = new PrevWordsInfo(); + // The previous word. May be null after resetting and before starting a new composing word, or // when there is no context like at the start of text for example. It can also be set to null // externally when the user enters a separator that does not let bigrams across, like a period @@ -32,7 +34,7 @@ public class PrevWordsInfo { // Beginning of sentence. public PrevWordsInfo() { - mPrevWord = null; + mPrevWord = ""; mIsBeginningOfSentence = true; } @@ -40,4 +42,8 @@ public class PrevWordsInfo { mPrevWord = prevWord; mIsBeginningOfSentence = false; } + + public boolean isValid() { + return mPrevWord != null; + } } diff --git a/java/src/com/android/inputmethod/latin/makedict/WordProperty.java b/java/src/com/android/inputmethod/latin/makedict/WordProperty.java index 853392200..ed832510c 100644 --- a/java/src/com/android/inputmethod/latin/makedict/WordProperty.java +++ b/java/src/com/android/inputmethod/latin/makedict/WordProperty.java @@ -35,6 +35,8 @@ public final class WordProperty implements Comparable { public final ProbabilityInfo mProbabilityInfo; public final ArrayList mShortcutTargets; public final ArrayList mBigrams; + // TODO: Support mIsBeginningOfSentence. + public final boolean mIsBeginningOfSentence; public final boolean mIsNotAWord; public final boolean mIsBlacklistEntry; public final boolean mHasShortcuts; @@ -51,6 +53,7 @@ public final class WordProperty implements Comparable { mProbabilityInfo = probabilityInfo; mShortcutTargets = shortcutTargets; mBigrams = bigrams; + mIsBeginningOfSentence = false; mIsNotAWord = isNotAWord; mIsBlacklistEntry = isBlacklistEntry; mHasBigrams = bigrams != null && !bigrams.isEmpty(); @@ -77,6 +80,7 @@ public final class WordProperty implements Comparable { mProbabilityInfo = createProbabilityInfoFromArray(probabilityInfo); mShortcutTargets = CollectionUtils.newArrayList(); mBigrams = CollectionUtils.newArrayList(); + mIsBeginningOfSentence = false; mIsNotAWord = isNotAWord; mIsBlacklistEntry = isBlacklisted; mHasShortcuts = hasShortcuts; diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index 5ad211441..e41fe1d43 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -178,10 +178,10 @@ static void latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, jlong proximityInfo, jlong dicTraverseSession, jintArray xCoordinatesArray, jintArray yCoordinatesArray, jintArray timesArray, jintArray pointerIdsArray, jintArray inputCodePointsArray, jint inputSize, jintArray suggestOptions, - jintArray prevWordCodePointsForBigrams, jintArray outSuggestionCount, - jintArray outCodePointsArray, jintArray outScoresArray, jintArray outSpaceIndicesArray, - jintArray outTypesArray, jintArray outAutoCommitFirstWordConfidenceArray, - jfloatArray inOutLanguageWeight) { + jintArray prevWordCodePointsForBigrams, jboolean isBeginningOfSentence, + jintArray outSuggestionCount, jintArray outCodePointsArray, jintArray outScoresArray, + jintArray outSpaceIndicesArray, jintArray outTypesArray, + jintArray outAutoCommitFirstWordConfidenceArray, jfloatArray inOutLanguageWeight) { Dictionary *dictionary = reinterpret_cast(dict); // Assign 0 to outSuggestionCount here in case of returning earlier in this method. JniDataUtils::putIntToArray(env, outSuggestionCount, 0 /* index */, 0); @@ -274,7 +274,7 @@ static jint latinime_BinaryDictionary_getProbability(JNIEnv *env, jclass clazz, } static jint latinime_BinaryDictionary_getBigramProbability(JNIEnv *env, jclass clazz, - jlong dict, jintArray word0, jintArray word1) { + jlong dict, jintArray word0, jboolean isBeginningOfSentence, jintArray word1) { Dictionary *dictionary = reinterpret_cast(dict); if (!dictionary) return JNI_FALSE; const jsize word0Length = env->GetArrayLength(word0); @@ -283,7 +283,7 @@ static jint latinime_BinaryDictionary_getBigramProbability(JNIEnv *env, jclass c int word1CodePoints[word1Length]; env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints); env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints); - const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, false /* isStartOfSentence */); + const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, isBeginningOfSentence); return dictionary->getBigramProbability(&prevWordsInfo, word1CodePoints, word1Length); } @@ -326,7 +326,8 @@ static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz, static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz, jlong dict, jintArray word, jint probability, jintArray shortcutTarget, jint shortcutProbability, - jboolean isNotAWord, jboolean isBlacklisted, jint timestamp) { + jboolean isBeginningOfSentence, jboolean isNotAWord, jboolean isBlacklisted, + jint timestamp) { Dictionary *dictionary = reinterpret_cast(dict); if (!dictionary) { return; @@ -341,13 +342,14 @@ static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz, shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability); } // Use 1 for count to indicate the word has inputted. - const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord, + const UnigramProperty unigramProperty(isBeginningOfSentence, isNotAWord, isBlacklisted, probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts); dictionary->addUnigramEntry(codePoints, codePointCount, &unigramProperty); } static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz, jlong dict, - jintArray word0, jintArray word1, jint probability, jint timestamp) { + jintArray word0, jboolean isBeginningOfSentence, jintArray word1, jint probability, + jint timestamp) { Dictionary *dictionary = reinterpret_cast(dict); if (!dictionary) { return; @@ -363,13 +365,12 @@ static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz, // Use 1 for count to indicate the bigram has inputted. const BigramProperty bigramProperty(&bigramTargetCodePoints, probability, timestamp, 0 /* level */, 1 /* count */); - const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, - false /* isBeginningOfSentence */); + const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, isBeginningOfSentence); dictionary->addNgramEntry(&prevWordsInfo, &bigramProperty); } static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass clazz, jlong dict, - jintArray word0, jintArray word1) { + jintArray word0, jboolean isBeginningOfSentence, jintArray word1) { Dictionary *dictionary = reinterpret_cast(dict); if (!dictionary) { return; @@ -380,8 +381,7 @@ static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass claz jsize word1Length = env->GetArrayLength(word1); int word1CodePoints[word1Length]; env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints); - const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, - false /* isBeginningOfSentence */); + const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, isBeginningOfSentence); dictionary->removeNgramEntry(&prevWordsInfo, word1CodePoints, word1Length); } @@ -625,7 +625,7 @@ static const JNINativeMethod sMethods[] = { }, { const_cast("getSuggestionsNative"), - const_cast("(JJJ[I[I[I[I[II[I[I[I[I[I[I[I[I[F)V"), + const_cast("(JJJ[I[I[I[I[II[I[IZ[I[I[I[I[I[I[F)V"), reinterpret_cast(latinime_BinaryDictionary_getSuggestions) }, { @@ -635,7 +635,7 @@ static const JNINativeMethod sMethods[] = { }, { const_cast("getBigramProbabilityNative"), - const_cast("(J[I[I)I"), + const_cast("(J[IZ[I)I"), reinterpret_cast(latinime_BinaryDictionary_getBigramProbability) }, { @@ -651,17 +651,17 @@ static const JNINativeMethod sMethods[] = { }, { const_cast("addUnigramWordNative"), - const_cast("(J[II[IIZZI)V"), + const_cast("(J[II[IIZZZI)V"), reinterpret_cast(latinime_BinaryDictionary_addUnigramWord) }, { const_cast("addBigramWordsNative"), - const_cast("(J[I[III)V"), + const_cast("(J[IZ[III)V"), reinterpret_cast(latinime_BinaryDictionary_addBigramWords) }, { const_cast("removeBigramWordsNative"), - const_cast("(J[I[I)V"), + const_cast("(J[IZ[I)V"), reinterpret_cast(latinime_BinaryDictionary_removeBigramWords) }, { diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp index 028e9ecbf..1f00fc6ab 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp @@ -56,7 +56,7 @@ bool DynamicPtGcEventListeners } } else { mValueStack.back() += 1; - if (ptNodeParams->isTerminal()) { + if (ptNodeParams->isTerminal() && !ptNodeParams->representsNonWordInfo()) { mValidUnigramCount += 1; } } diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java index 2c2fed3c1..d551cc7e6 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java @@ -63,12 +63,16 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { super.tearDown(); } + private static boolean supportsBeginningOfSentence(final int formatVersion) { + return formatVersion >= FormatSpec.VERSION4_DEV; + } + private void addUnigramWord(final BinaryDictionary binaryDictionary, final String word, final int probability) { binaryDictionary.addUnigramEntry(word, probability, "" /* shortcutTarget */, BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */, - false /* isNotAWord */, false /* isBlacklisted */, - mCurrentTime /* timestamp */); + false /* isBeginningOfSentence */, false /* isNotAWord */, + false /* isBlacklisted */, mCurrentTime /* timestamp */); } private void addBigramWords(final BinaryDictionary binaryDictionary, final String word0, @@ -631,4 +635,57 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { binaryDictionary.close(); dictFile.delete(); } + + public void testBeginningOfSentence() { + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + if (supportsBeginningOfSentence(formatVersion)) { + testBeginningOfSentence(formatVersion); + } + } + } + + private void testBeginningOfSentence(final int formatVersion) { + setCurrentTimeForTestMode(mCurrentTime); + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + + binaryDictionary.addUnigramEntry("", DUMMY_PROBABILITY, "" /* shortcutTarget */, + BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */, + true /* isBeginningOfSentence */, true /* isNotAWord */, false /* isBlacklisted */, + mCurrentTime); + final PrevWordsInfo prevWordsInfoStartOfSentence = PrevWordsInfo.BEGINNING_OF_SENTENCE; + addUnigramWord(binaryDictionary, "aaa", DUMMY_PROBABILITY); + binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "aaa", DUMMY_PROBABILITY, + mCurrentTime); + assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "aaa")); + binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "aaa", DUMMY_PROBABILITY, + mCurrentTime); + addUnigramWord(binaryDictionary, "bbb", DUMMY_PROBABILITY); + binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "bbb", DUMMY_PROBABILITY, + mCurrentTime); + assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "aaa")); + assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "bbb")); + + forcePassingLongTime(binaryDictionary); + assertFalse(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "aaa")); + assertFalse(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "bbb")); + + addUnigramWord(binaryDictionary, "aaa", DUMMY_PROBABILITY); + binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "aaa", DUMMY_PROBABILITY, + mCurrentTime); + addUnigramWord(binaryDictionary, "bbb", DUMMY_PROBABILITY); + binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "bbb", DUMMY_PROBABILITY, + mCurrentTime); + assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "aaa")); + assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "bbb")); + binaryDictionary.close(); + dictFile.delete(); + } } diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java index 2b82e544a..31ad3446a 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java @@ -50,6 +50,10 @@ public class BinaryDictionaryTests extends AndroidTestCase { return formatVersion >= FormatSpec.VERSION4_DEV; } + private static boolean supportsBeginningOfSentence(final int formatVersion) { + return formatVersion >= FormatSpec.VERSION4_DEV; + } + private File createEmptyDictionaryAndGetFile(final String dictId, final int formatVersion) throws IOException { if (formatVersion == FormatSpec.VERSION4 @@ -171,7 +175,8 @@ public class BinaryDictionaryTests extends AndroidTestCase { addUnigramWord(binaryDictionary, invalidLongWord, probability); // Too long short cut. binaryDictionary.addUnigramEntry("a", probability, invalidLongWord, - 10 /* shortcutProbability */, false /* isNotAWord */, false /* isBlacklisted */, + 10 /* shortcutProbability */, false /* isBeginningOfSentence */, + false /* isNotAWord */, false /* isBlacklisted */, BinaryDictionary.NOT_A_VALID_TIMESTAMP); addUnigramWord(binaryDictionary, "abc", probability); final int updatedProbability = 200; @@ -192,8 +197,8 @@ public class BinaryDictionaryTests extends AndroidTestCase { final int probability) { binaryDictionary.addUnigramEntry(word, probability, "" /* shortcutTarget */, BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */, - false /* isNotAWord */, false /* isBlacklisted */, - BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); + false /* isBeginningOfSentence */, false /* isNotAWord */, + false /* isBlacklisted */, BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); } private static void addBigramWords(final BinaryDictionary binaryDictionary, final String word0, @@ -1010,7 +1015,8 @@ public class BinaryDictionaryTests extends AndroidTestCase { // TODO: Add tests for historical info. binaryDictionary.addUnigramEntry(word, unigramProbability, null /* shortcutTarget */, BinaryDictionary.NOT_A_PROBABILITY, - isNotAWord, isBlacklisted, BinaryDictionary.NOT_A_VALID_TIMESTAMP); + false /* isBeginningOfSentence */, isNotAWord, isBlacklisted, + BinaryDictionary.NOT_A_VALID_TIMESTAMP); if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { binaryDictionary.flushWithGC(); } @@ -1188,24 +1194,24 @@ public class BinaryDictionaryTests extends AndroidTestCase { final int unigramProbability = 100; final int shortcutProbability = 10; binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz", - shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */, - 0 /* timestamp */); + shortcutProbability, false /* isBeginningOfSentence */, + false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */); WordProperty wordProperty = binaryDictionary.getWordProperty("aaa"); assertEquals(1, wordProperty.mShortcutTargets.size()); assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord); assertEquals(shortcutProbability, wordProperty.mShortcutTargets.get(0).getProbability()); final int updatedShortcutProbability = 2; binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz", - updatedShortcutProbability, false /* isNotAWord */, false /* isBlacklisted */, - 0 /* timestamp */); + updatedShortcutProbability, false /* isBeginningOfSentence */, + false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */); wordProperty = binaryDictionary.getWordProperty("aaa"); assertEquals(1, wordProperty.mShortcutTargets.size()); assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord); assertEquals(updatedShortcutProbability, wordProperty.mShortcutTargets.get(0).getProbability()); binaryDictionary.addUnigramEntry("aaa", unigramProbability, "yyy", - shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */, - 0 /* timestamp */); + shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */, + false /* isBlacklisted */, 0 /* timestamp */); final HashMap shortcutTargets = new HashMap(); shortcutTargets.put("zzz", updatedShortcutProbability); shortcutTargets.put("yyy", shortcutProbability); @@ -1275,8 +1281,8 @@ public class BinaryDictionaryTests extends AndroidTestCase { final String word = words.get(random.nextInt(words.size())); final int unigramProbability = unigramProbabilities.get(word); binaryDictionary.addUnigramEntry(word, unigramProbability, shortcutTarget, - shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */, - 0 /* timestamp */); + shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */, + false /* isBlacklisted */, 0 /* timestamp */); if (shortcutTargets.containsKey(word)) { final HashMap shortcutTargetsOfWord = shortcutTargets.get(word); shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability); @@ -1331,10 +1337,11 @@ public class BinaryDictionaryTests extends AndroidTestCase { addBigramWords(binaryDictionary, "aaa", "bbb", bigramProbability); final int shortcutProbability = 10; binaryDictionary.addUnigramEntry("ccc", unigramProbability, "xxx", shortcutProbability, - false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */); + false /* isBeginningOfSentence */, false /* isNotAWord */, + false /* isBlacklisted */, 0 /* timestamp */); binaryDictionary.addUnigramEntry("ddd", unigramProbability, null /* shortcutTarget */, - Dictionary.NOT_A_PROBABILITY, true /* isNotAWord */, - true /* isBlacklisted */, 0 /* timestamp */); + Dictionary.NOT_A_PROBABILITY, false /* isBeginningOfSentence */, + true /* isNotAWord */, true /* isBlacklisted */, 0 /* timestamp */); assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa")); assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb")); assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb")); @@ -1434,4 +1441,46 @@ public class BinaryDictionaryTests extends AndroidTestCase { assertEquals(bigramProbabilities.size(), Integer.parseInt( binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY))); } + + public void testBeginningOfSentence() { + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + if (supportsBeginningOfSentence(formatVersion)) { + testBeginningOfSentence(formatVersion); + } + } + } + + private void testBeginningOfSentence(final int formatVersion) { + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + final int dummyProbability = 0; + binaryDictionary.addUnigramEntry("", dummyProbability, "" /* shortcutTarget */, + BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */, + true /* isBeginningOfSentence */, true /* isNotAWord */, false /* isBlacklisted */, + BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); + final PrevWordsInfo prevWordsInfoStartOfSentence = PrevWordsInfo.BEGINNING_OF_SENTENCE; + final int bigramProbability = 200; + addUnigramWord(binaryDictionary, "aaa", dummyProbability); + binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "aaa", bigramProbability, + BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); + assertEquals(bigramProbability, + binaryDictionary.getNgramProbability(prevWordsInfoStartOfSentence, "aaa")); + binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "aaa", bigramProbability, + BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); + addUnigramWord(binaryDictionary, "bbb", dummyProbability); + binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "bbb", bigramProbability, + BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); + binaryDictionary.flushWithGC(); + assertEquals(bigramProbability, + binaryDictionary.getNgramProbability(prevWordsInfoStartOfSentence, "aaa")); + assertEquals(bigramProbability, + binaryDictionary.getNgramProbability(prevWordsInfoStartOfSentence, "bbb")); + } } diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java b/tests/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java index a04b81024..0528e341e 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java +++ b/tests/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java @@ -77,14 +77,14 @@ public class Ver4DictEncoder implements DictEncoder { if (null == wordProperty.mShortcutTargets || wordProperty.mShortcutTargets.isEmpty()) { binaryDict.addUnigramEntry(wordProperty.mWord, wordProperty.getProbability(), null /* shortcutTarget */, 0 /* shortcutProbability */, - wordProperty.mIsNotAWord, wordProperty.mIsBlacklistEntry, - 0 /* timestamp */); + wordProperty.mIsBeginningOfSentence, wordProperty.mIsNotAWord, + wordProperty.mIsBlacklistEntry, 0 /* timestamp */); } else { for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) { binaryDict.addUnigramEntry(wordProperty.mWord, wordProperty.getProbability(), shortcutTarget.mWord, shortcutTarget.getProbability(), - wordProperty.mIsNotAWord, wordProperty.mIsBlacklistEntry, - 0 /* timestamp */); + wordProperty.mIsBeginningOfSentence, wordProperty.mIsNotAWord, + wordProperty.mIsBlacklistEntry, 0 /* timestamp */); } } if (binaryDict.needsToRunGC(true /* mindsBlockByGC */)) {