Merge "Add a JNI to get the frequency" into jb-dev

This commit is contained in:
Satoshi Kataoka 2012-05-29 01:27:59 -07:00 committed by Android (Google) Code Review
commit 51705efc96
6 changed files with 25 additions and 11 deletions

View File

@ -84,7 +84,7 @@ public class BinaryDictionary extends Dictionary {
private native long openNative(String sourceDir, long dictOffset, long dictSize,
int typedLetterMultiplier, int fullWordMultiplier, int maxWordLength, int maxWords);
private native void closeNative(long dict);
private native boolean isValidWordNative(long dict, int[] word, int wordLength);
private native int getFrequencyNative(long dict, int[] word, int wordLength);
private native boolean isValidBigramNative(long dict, int[] word1, int[] word2);
private native int getSuggestionsNative(long dict, long proximityInfo, int[] xCoordinates,
int[] yCoordinates, int[] inputCodes, int codesSize, int[] prevWordForBigrams,
@ -203,7 +203,8 @@ public class BinaryDictionary extends Dictionary {
public boolean isValidWord(CharSequence word) {
if (word == null) return false;
int[] chars = StringUtils.toCodePointArray(word.toString());
return isValidWordNative(mNativeDict, chars, chars.length);
final int freq = getFrequencyNative(mNativeDict, chars, chars.length);
return freq >= 0;
}
// TODO: Add a batch process version (isValidBigramMultiple?) to avoid excessive numbers of jni

View File

@ -173,12 +173,12 @@ static int latinime_BinaryDictionary_getBigrams(JNIEnv *env, jobject object, jlo
return count;
}
static jboolean latinime_BinaryDictionary_isValidWord(JNIEnv *env, jobject object, jlong dict,
static jint latinime_BinaryDictionary_getFrequency(JNIEnv *env, jobject object, jlong dict,
jintArray wordArray, jint wordLength) {
Dictionary *dictionary = (Dictionary*)dict;
if (!dictionary) return (jboolean) false;
jint *word = env->GetIntArrayElements(wordArray, 0);
jboolean result = dictionary->isValidWord(word, wordLength);
jint result = dictionary->getFrequency(word, wordLength);
env->ReleaseIntArrayElements(wordArray, word, JNI_ABORT);
return result;
}
@ -253,7 +253,7 @@ static JNINativeMethod sMethods[] = {
{"closeNative", "(J)V", (void*)latinime_BinaryDictionary_close},
{"getSuggestionsNative", "(JJ[I[I[II[IZ[C[I)I",
(void*)latinime_BinaryDictionary_getSuggestions},
{"isValidWordNative", "(J[II)Z", (void*)latinime_BinaryDictionary_isValidWord},
{"getFrequencyNative", "(J[II)I", (void*)latinime_BinaryDictionary_getFrequency},
{"isValidBigramNative", "(J[I[I)Z", (void*)latinime_BinaryDictionary_isValidBigram},
{"getBigramsNative", "(J[II[II[C[III)I", (void*)latinime_BinaryDictionary_getBigrams},
{"calcNormalizedScoreNative", "([CI[CII)F",

View File

@ -55,8 +55,8 @@ Dictionary::~Dictionary() {
delete mBigramDictionary;
}
bool Dictionary::isValidWord(const int32_t *word, int length) {
return mUnigramDictionary->isValidWord(word, length);
int Dictionary::getFrequency(const int32_t *word, int length) {
return mUnigramDictionary->getFrequency(word, length);
}
bool Dictionary::isValidBigram(const int32_t *word1, int length1, const int32_t *word2,

View File

@ -52,7 +52,7 @@ class Dictionary {
maxWordLength, maxBigrams);
}
bool isValidWord(const int32_t *word, int length);
int getFrequency(const int32_t *word, int length);
bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2);
void *getDict() { return (void *)mDict; }
int getDictSize() { return mDictSize; }

View File

@ -747,8 +747,21 @@ int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t * const inWor
return maxFreq;
}
bool UnigramDictionary::isValidWord(const int32_t* const inWord, const int length) const {
return NOT_VALID_WORD != BinaryFormat::getTerminalPosition(DICT_ROOT, inWord, length);
int UnigramDictionary::getFrequency(const int32_t* const inWord, const int length) const {
const uint8_t* const root = DICT_ROOT;
int pos = BinaryFormat::getTerminalPosition(root, inWord, length);
if (NOT_VALID_WORD == pos) {
return NOT_A_PROBABILITY;
}
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
const bool hasMultipleChars = (0 != (FLAG_HAS_MULTIPLE_CHARS & flags));
if (hasMultipleChars) {
pos = BinaryFormat::skipOtherCharacters(root, pos);
} else {
BinaryFormat::getCharCodeAndForwardPointer(DICT_ROOT, &pos);
}
const int unigramFreq = BinaryFormat::readFrequencyWithoutMovingPointer(root, pos);
return unigramFreq;
}
// TODO: remove this function.

View File

@ -72,7 +72,7 @@ class UnigramDictionary {
UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultipler,
int fullWordMultiplier, int maxWordLength, int maxWords, const unsigned int flags);
bool isValidWord(const int32_t* const inWord, const int length) const;
int getFrequency(const int32_t* const inWord, const int length) const;
int getBigramPosition(int pos, unsigned short *word, int offset, int length) const;
int getSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueuePool *queuePool,
Correction *correction, const int *xcoordinates, const int *ycoordinates,