Merge changes I94a64865,I62725bfe

* changes:
  Refactor step3: const unigram_dictionary
  refactor native step 2
This commit is contained in:
Satoshi Kataoka 2012-06-08 04:17:38 -07:00 committed by Android (Google) Code Review
commit 730b49fffc
8 changed files with 218 additions and 240 deletions

View File

@ -98,7 +98,7 @@ inline static int getCurrentEditDistance(int *editDistanceTable, const int editD
static const char QUOTE = '\''; static const char QUOTE = '\'';
inline bool Correction::isQuote(const unsigned short c) { inline bool Correction::isQuote(const unsigned short c) {
const unsigned short userTypedChar = mProximityInfo->getPrimaryCharAt(mInputIndex); const unsigned short userTypedChar = mProximityInfoState.getPrimaryCharAt(mInputIndex);
return (c == QUOTE && userTypedChar != QUOTE); return (c == QUOTE && userTypedChar != QUOTE);
} }
@ -283,7 +283,7 @@ bool Correction::needsToPrune() const {
void Correction::addCharToCurrentWord(const int32_t c) { void Correction::addCharToCurrentWord(const int32_t c) {
mWord[mOutputIndex] = c; mWord[mOutputIndex] = c;
const unsigned short *primaryInputWord = mProximityInfo->getPrimaryInputWord(); const unsigned short *primaryInputWord = mProximityInfoState.getPrimaryInputWord();
calcEditDistanceOneStep(mEditDistanceTable, primaryInputWord, mInputLength, calcEditDistanceOneStep(mEditDistanceTable, primaryInputWord, mInputLength,
mWord, mOutputIndex + 1); mWord, mOutputIndex + 1);
} }
@ -335,19 +335,19 @@ Correction::CorrectionType Correction::processCharAndCalcState(
bool incremented = false; bool incremented = false;
if (mLastCharExceeded && mInputIndex == mInputLength - 1) { if (mLastCharExceeded && mInputIndex == mInputLength - 1) {
// TODO: Do not check the proximity if EditDistance exceeds the threshold // TODO: Do not check the proximity if EditDistance exceeds the threshold
const ProximityType matchId = const ProximityType matchId = mProximityInfoState.getMatchedProximityId(
mProximityInfo->getMatchedProximityId(mInputIndex, c, true, &proximityIndex); mInputIndex, c, true, &proximityIndex);
if (isEquivalentChar(matchId)) { if (isEquivalentChar(matchId)) {
mLastCharExceeded = false; mLastCharExceeded = false;
--mExcessiveCount; --mExcessiveCount;
mDistances[mOutputIndex] = mDistances[mOutputIndex] =
mProximityInfo->getNormalizedSquaredDistance(mInputIndex, 0); mProximityInfoState.getNormalizedSquaredDistance(mInputIndex, 0);
} else if (matchId == NEAR_PROXIMITY_CHAR) { } else if (matchId == NEAR_PROXIMITY_CHAR) {
mLastCharExceeded = false; mLastCharExceeded = false;
--mExcessiveCount; --mExcessiveCount;
++mProximityCount; ++mProximityCount;
mDistances[mOutputIndex] = mDistances[mOutputIndex] = mProximityInfoState.getNormalizedSquaredDistance(
mProximityInfo->getNormalizedSquaredDistance(mInputIndex, proximityIndex); mInputIndex, proximityIndex);
} }
if (!isQuote(c)) { if (!isQuote(c)) {
incrementInputIndex(); incrementInputIndex();
@ -388,7 +388,8 @@ Correction::CorrectionType Correction::processCharAndCalcState(
bool secondTransposing = false; bool secondTransposing = false;
if (mTransposedCount % 2 == 1) { if (mTransposedCount % 2 == 1) {
if (isEquivalentChar(mProximityInfo->getMatchedProximityId(mInputIndex - 1, c, false))) { if (isEquivalentChar(mProximityInfoState.getMatchedProximityId(
mInputIndex - 1, c, false))) {
++mTransposedCount; ++mTransposedCount;
secondTransposing = true; secondTransposing = true;
} else if (mCorrectionStates[mOutputIndex].mExceeding) { } else if (mCorrectionStates[mOutputIndex].mExceeding) {
@ -419,7 +420,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
ProximityType matchedProximityCharId = secondTransposing ProximityType matchedProximityCharId = secondTransposing
? EQUIVALENT_CHAR ? EQUIVALENT_CHAR
: mProximityInfo->getMatchedProximityId( : mProximityInfoState.getMatchedProximityId(
mInputIndex, c, checkProximityChars, &proximityIndex); mInputIndex, c, checkProximityChars, &proximityIndex);
if (UNRELATED_CHAR == matchedProximityCharId if (UNRELATED_CHAR == matchedProximityCharId
@ -427,7 +428,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
if (canTryCorrection && mOutputIndex > 0 if (canTryCorrection && mOutputIndex > 0
&& mCorrectionStates[mOutputIndex].mProximityMatching && mCorrectionStates[mOutputIndex].mProximityMatching
&& mCorrectionStates[mOutputIndex].mExceeding && mCorrectionStates[mOutputIndex].mExceeding
&& isEquivalentChar(mProximityInfo->getMatchedProximityId( && isEquivalentChar(mProximityInfoState.getMatchedProximityId(
mInputIndex, mWord[mOutputIndex - 1], false))) { mInputIndex, mWord[mOutputIndex - 1], false))) {
if (DEBUG_CORRECTION if (DEBUG_CORRECTION
&& (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength) && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength)
@ -446,7 +447,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
// Here, we are doing something equivalent to matchedProximityCharId, // Here, we are doing something equivalent to matchedProximityCharId,
// but we already know that "excessive char correction" just happened // but we already know that "excessive char correction" just happened
// so that we just need to check "mProximityCount == 0". // so that we just need to check "mProximityCount == 0".
matchedProximityCharId = mProximityInfo->getMatchedProximityId( matchedProximityCharId = mProximityInfoState.getMatchedProximityId(
mInputIndex, c, mProximityCount == 0, &proximityIndex); mInputIndex, c, mProximityCount == 0, &proximityIndex);
} }
} }
@ -463,10 +464,10 @@ Correction::CorrectionType Correction::processCharAndCalcState(
if (mInputIndex < mInputLength - 1 && mOutputIndex > 0 && mTransposedCount > 0 if (mInputIndex < mInputLength - 1 && mOutputIndex > 0 && mTransposedCount > 0
&& !mCorrectionStates[mOutputIndex].mTransposing && !mCorrectionStates[mOutputIndex].mTransposing
&& mCorrectionStates[mOutputIndex - 1].mTransposing && mCorrectionStates[mOutputIndex - 1].mTransposing
&& isEquivalentChar(mProximityInfo->getMatchedProximityId( && isEquivalentChar(mProximityInfoState.getMatchedProximityId(
mInputIndex, mWord[mOutputIndex - 1], false)) mInputIndex, mWord[mOutputIndex - 1], false))
&& isEquivalentChar( && isEquivalentChar(
mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false))) { mProximityInfoState.getMatchedProximityId(mInputIndex + 1, c, false))) {
// Conversion t->e // Conversion t->e
// Example: // Example:
// occaisional -> occa sional // occaisional -> occa sional
@ -478,7 +479,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
&& !mCorrectionStates[mOutputIndex].mTransposing && !mCorrectionStates[mOutputIndex].mTransposing
&& mCorrectionStates[mOutputIndex - 1].mTransposing && mCorrectionStates[mOutputIndex - 1].mTransposing
&& isEquivalentChar( && isEquivalentChar(
mProximityInfo->getMatchedProximityId(mInputIndex - 1, c, false))) { mProximityInfoState.getMatchedProximityId(mInputIndex - 1, c, false))) {
// Conversion t->s // Conversion t->s
// Example: // Example:
// chcolate -> chocolate // chcolate -> chocolate
@ -490,7 +491,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
&& mCorrectionStates[mOutputIndex].mProximityMatching && mCorrectionStates[mOutputIndex].mProximityMatching
&& mCorrectionStates[mOutputIndex].mSkipping && mCorrectionStates[mOutputIndex].mSkipping
&& isEquivalentChar( && isEquivalentChar(
mProximityInfo->getMatchedProximityId(mInputIndex - 1, c, false))) { mProximityInfoState.getMatchedProximityId(mInputIndex - 1, c, false))) {
// Conversion p->s // Conversion p->s
// Note: This logic tries saving cases like contrst --> contrast -- "a" is one of // Note: This logic tries saving cases like contrst --> contrast -- "a" is one of
// proximity chars of "s", but it should rather be handled as a skipped char. // proximity chars of "s", but it should rather be handled as a skipped char.
@ -502,7 +503,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
&& mCorrectionStates[mOutputIndex].mSkipping && mCorrectionStates[mOutputIndex].mSkipping
&& mCorrectionStates[mOutputIndex].mAdditionalProximityMatching && mCorrectionStates[mOutputIndex].mAdditionalProximityMatching
&& isProximityCharOrEquivalentChar( && isProximityCharOrEquivalentChar(
mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false))) { mProximityInfoState.getMatchedProximityId(mInputIndex + 1, c, false))) {
// Conversion s->a // Conversion s->a
incrementInputIndex(); incrementInputIndex();
--mSkippedCount; --mSkippedCount;
@ -511,7 +512,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
mDistances[mOutputIndex] = ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO; mDistances[mOutputIndex] = ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO;
} else if ((mExceeding || mTransposing) && mInputIndex - 1 < mInputLength } else if ((mExceeding || mTransposing) && mInputIndex - 1 < mInputLength
&& isEquivalentChar( && isEquivalentChar(
mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false))) { mProximityInfoState.getMatchedProximityId(mInputIndex + 1, c, false))) {
// 1.2. Excessive or transpose correction // 1.2. Excessive or transpose correction
if (mTransposing) { if (mTransposing) {
++mTransposedCount; ++mTransposedCount;
@ -573,12 +574,12 @@ Correction::CorrectionType Correction::processCharAndCalcState(
} else if (isEquivalentChar(matchedProximityCharId)) { } else if (isEquivalentChar(matchedProximityCharId)) {
mMatching = true; mMatching = true;
++mEquivalentCharCount; ++mEquivalentCharCount;
mDistances[mOutputIndex] = mProximityInfo->getNormalizedSquaredDistance(mInputIndex, 0); mDistances[mOutputIndex] = mProximityInfoState.getNormalizedSquaredDistance(mInputIndex, 0);
} else if (NEAR_PROXIMITY_CHAR == matchedProximityCharId) { } else if (NEAR_PROXIMITY_CHAR == matchedProximityCharId) {
mProximityMatching = true; mProximityMatching = true;
++mProximityCount; ++mProximityCount;
mDistances[mOutputIndex] = mDistances[mOutputIndex] =
mProximityInfo->getNormalizedSquaredDistance(mInputIndex, proximityIndex); mProximityInfoState.getNormalizedSquaredDistance(mInputIndex, proximityIndex);
if (DEBUG_CORRECTION if (DEBUG_CORRECTION
&& (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength) && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength)
&& (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0
@ -662,7 +663,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex
const int excessivePos = correction->getExcessivePos(); const int excessivePos = correction->getExcessivePos();
const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER; const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
const int fullWordMultiplier = correction->FULL_WORD_MULTIPLIER; const int fullWordMultiplier = correction->FULL_WORD_MULTIPLIER;
const ProximityInfo *proximityInfo = correction->mProximityInfo; const ProximityInfoState *proximityInfoState = &correction->mProximityInfoState;
const int skippedCount = correction->mSkippedCount; const int skippedCount = correction->mSkippedCount;
const int transposedCount = correction->mTransposedCount / 2; const int transposedCount = correction->mTransposedCount / 2;
const int excessiveCount = correction->mExcessiveCount + correction->mTransposedCount % 2; const int excessiveCount = correction->mExcessiveCount + correction->mTransposedCount % 2;
@ -685,7 +686,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex
const bool skipped = skippedCount > 0; const bool skipped = skippedCount > 0;
const int quoteDiffCount = max(0, getQuoteCount(word, outputLength) const int quoteDiffCount = max(0, getQuoteCount(word, outputLength)
- getQuoteCount(proximityInfo->getPrimaryInputWord(), inputLength)); - getQuoteCount(proximityInfoState->getPrimaryInputWord(), inputLength));
// TODO: Calculate edit distance for transposed and excessive // TODO: Calculate edit distance for transposed and excessive
int ed = 0; int ed = 0;
@ -737,7 +738,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex
multiplyIntCapped(matchWeight, &finalFreq); multiplyIntCapped(matchWeight, &finalFreq);
} }
if (proximityInfo->getMatchedProximityId(0, word[0], true) == UNRELATED_CHAR) { if (proximityInfoState->getMatchedProximityId(0, word[0], true) == UNRELATED_CHAR) {
multiplyRate(FIRST_CHAR_DIFFERENT_DEMOTION_RATE, &finalFreq); multiplyRate(FIRST_CHAR_DIFFERENT_DEMOTION_RATE, &finalFreq);
} }
@ -763,7 +764,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex
// Demotion for a word with excessive character // Demotion for a word with excessive character
if (excessiveCount > 0) { if (excessiveCount > 0) {
multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE, &finalFreq); multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE, &finalFreq);
if (!lastCharExceeded && !proximityInfo->existsAdjacentProximityChars(excessivePos)) { if (!lastCharExceeded && !proximityInfoState->existsAdjacentProximityChars(excessivePos)) {
if (DEBUG_DICT_FULL) { if (DEBUG_DICT_FULL) {
AKLOGI("Double excessive demotion"); AKLOGI("Double excessive demotion");
} }
@ -774,8 +775,9 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex
} }
const bool performTouchPositionCorrection = const bool performTouchPositionCorrection =
CALIBRATE_SCORE_BY_TOUCH_COORDINATES && proximityInfo->touchPositionCorrectionEnabled() CALIBRATE_SCORE_BY_TOUCH_COORDINATES
&& skippedCount == 0 && excessiveCount == 0 && transposedCount == 0; && proximityInfoState->touchPositionCorrectionEnabled()
&& skippedCount == 0 && excessiveCount == 0 && transposedCount == 0;
// Score calibration by touch coordinates is being done only for pure-fat finger typing error // Score calibration by touch coordinates is being done only for pure-fat finger typing error
// cases. // cases.
int additionalProximityCount = 0; int additionalProximityCount = 0;
@ -1145,5 +1147,4 @@ float Correction::RankingAlgorithm::calcNormalizedScore(const unsigned short* be
const float weight = 1.0 - (float) distance / afterLength; const float weight = 1.0 - (float) distance / afterLength;
return (score / maxScore) * weight; return (score / maxScore) * weight;
} }
} // namespace latinime } // namespace latinime

View File

@ -19,9 +19,10 @@
#include <assert.h> #include <assert.h>
#include <stdint.h> #include <stdint.h>
#include "correction_state.h"
#include "correction_state.h"
#include "defines.h" #include "defines.h"
#include "proximity_info_state.h"
namespace latinime { namespace latinime {
@ -178,6 +179,21 @@ class Correction {
static const int FULL_WORD_MULTIPLIER = 2; static const int FULL_WORD_MULTIPLIER = 2;
}; };
// proximity info state
void initInputParams(const ProximityInfo *proximityInfo, const int32_t *inputCodes,
const int inputLength, const int *xCoordinates, const int *yCoordinates) {
mProximityInfoState.initInputParams(
proximityInfo, inputCodes, inputLength, xCoordinates, yCoordinates);
}
const unsigned short* getPrimaryInputWord() const {
return mProximityInfoState.getPrimaryInputWord();
}
unsigned short getPrimaryCharAt(const int index) const {
return mProximityInfoState.getPrimaryCharAt(index);
}
private: private:
inline void incrementInputIndex(); inline void incrementInputIndex();
inline void incrementOutputIndex(); inline void incrementOutputIndex();
@ -240,7 +256,7 @@ class Correction {
bool mExceeding; bool mExceeding;
bool mTransposing; bool mTransposing;
bool mSkipping; bool mSkipping;
ProximityInfoState mProximityInfoState;
}; };
} // namespace latinime } // namespace latinime
#endif // LATINIME_CORRECTION_H #endif // LATINIME_CORRECTION_H

View File

@ -73,9 +73,6 @@ ProximityInfo::ProximityInfo(const std::string localeStr, const int maxProximity
copyOrFillZero(mSweetSpotRadii, sweetSpotRadii, KEY_COUNT * sizeof(mSweetSpotRadii[0])); copyOrFillZero(mSweetSpotRadii, sweetSpotRadii, KEY_COUNT * sizeof(mSweetSpotRadii[0]));
initializeCodeToKeyIndex(); initializeCodeToKeyIndex();
mProximityInfoState = new ProximityInfoState(this, MAX_PROXIMITY_CHARS_SIZE,
HAS_TOUCH_POSITION_CORRECTION_DATA, MOST_COMMON_KEY_WIDTH_SQUARE, mLocaleStr,
KEY_COUNT, CELL_HEIGHT, CELL_WIDTH, GRID_WIDTH, GRID_HEIGHT);
} }
// Build the reversed look up table from the char code to the index in mKeyXCoordinates, // Build the reversed look up table from the char code to the index in mKeyXCoordinates,
@ -92,7 +89,6 @@ void ProximityInfo::initializeCodeToKeyIndex() {
ProximityInfo::~ProximityInfo() { ProximityInfo::~ProximityInfo() {
delete[] mProximityCharsArray; delete[] mProximityCharsArray;
delete mProximityInfoState;
} }
inline int ProximityInfo::getStartIndexFromCoordinates(const int x, const int y) const { inline int ProximityInfo::getStartIndexFromCoordinates(const int x, const int y) const {
@ -203,12 +199,6 @@ void ProximityInfo::calculateNearbyKeyCodes(
} }
} }
// TODO: remove
void ProximityInfo::initInputParams(const int32_t *inputCodes, const int inputLength,
const int *xCoordinates, const int *yCoordinates) {
mProximityInfoState->initInputParams(inputCodes, inputLength, xCoordinates, yCoordinates);
}
int ProximityInfo::getKeyIndex(const int c) const { int ProximityInfo::getKeyIndex(const int c) const {
if (KEY_COUNT == 0) { if (KEY_COUNT == 0) {
// We do not have the coordinate data // We do not have the coordinate data
@ -220,47 +210,4 @@ int ProximityInfo::getKeyIndex(const int c) const {
} }
return mCodeToKeyIndex[baseLowerC]; return mCodeToKeyIndex[baseLowerC];
} }
// TODO: remove
inline const int* ProximityInfo::getProximityCharsAt(const int index) const {
return mProximityInfoState->getProximityCharsAt(index);
}
// TODO: remove
unsigned short ProximityInfo::getPrimaryCharAt(const int index) const {
return mProximityInfoState->getPrimaryCharAt(index);
}
// TODO: remove
bool ProximityInfo::existsCharInProximityAt(const int index, const int c) const {
return mProximityInfoState->existsCharInProximityAt(index, c);
}
// TODO: remove
bool ProximityInfo::existsAdjacentProximityChars(const int index) const {
return mProximityInfoState->existsAdjacentProximityChars(index);
}
// TODO: remove
ProximityType ProximityInfo::getMatchedProximityId(const int index,
const unsigned short c, const bool checkProximityChars, int *proximityIndex) const {
return mProximityInfoState->getMatchedProximityId(
index, c, checkProximityChars, proximityIndex);
}
// TODO: remove
int ProximityInfo::getNormalizedSquaredDistance(
const int inputIndex, const int proximityIndex) const {
return mProximityInfoState->getNormalizedSquaredDistance(inputIndex, proximityIndex);
}
// TODO: remove
const unsigned short* ProximityInfo::getPrimaryInputWord() const {
return mProximityInfoState->getPrimaryInputWord();
}
// TODO: remove
bool ProximityInfo::touchPositionCorrectionEnabled() const {
return mProximityInfoState->touchPositionCorrectionEnabled();
}
} // namespace latinime } // namespace latinime

View File

@ -25,11 +25,9 @@
namespace latinime { namespace latinime {
class Correction; class Correction;
class ProximityInfoState;
class ProximityInfo { class ProximityInfo {
public: public:
ProximityInfo(const std::string localeStr, const int maxProximityCharsSize, ProximityInfo(const std::string localeStr, const int maxProximityCharsSize,
const int keyboardWidth, const int keyboardHeight, const int gridWidth, const int keyboardWidth, const int keyboardHeight, const int gridWidth,
const int gridHeight, const int mostCommonkeyWidth, const int gridHeight, const int mostCommonkeyWidth,
@ -68,21 +66,37 @@ class ProximityInfo {
void calculateNearbyKeyCodes( void calculateNearbyKeyCodes(
const int x, const int y, const int32_t primaryKey, int *inputCodes) const; const int x, const int y, const int32_t primaryKey, int *inputCodes) const;
//////////////////////////////////// bool hasTouchPositionCorrectionData() const {
// Access to proximity info state // return HAS_TOUCH_POSITION_CORRECTION_DATA;
// TODO: remove // }
////////////////////////////////////
void initInputParams(const int32_t *inputCodes, const int inputLength, int getMostCommonKeyWidthSquare() const {
const int *xCoordinates, const int *yCoordinates); return MOST_COMMON_KEY_WIDTH_SQUARE;
const int* getProximityCharsAt(const int index) const; }
unsigned short getPrimaryCharAt(const int index) const;
bool existsCharInProximityAt(const int index, const int c) const; std::string getLocaleStr() const {
bool existsAdjacentProximityChars(const int index) const; return mLocaleStr;
ProximityType getMatchedProximityId(const int index, const unsigned short c, }
const bool checkProximityChars, int *proximityIndex = 0) const;
const unsigned short* getPrimaryInputWord() const; int getKeyCount() const {
bool touchPositionCorrectionEnabled() const; return KEY_COUNT;
//////////////////////////////////// }
int getCellHeight() const {
return CELL_HEIGHT;
}
int getCellWidth() const {
return CELL_WIDTH;
}
int getGridWidth() const {
return GRID_WIDTH;
}
int getGridHeight() const {
return GRID_HEIGHT;
}
private: private:
// The max number of the keys in one keyboard layout // The max number of the keys in one keyboard layout
@ -121,7 +135,6 @@ class ProximityInfo {
float mSweetSpotRadii[MAX_KEY_COUNT_IN_A_KEYBOARD]; float mSweetSpotRadii[MAX_KEY_COUNT_IN_A_KEYBOARD];
int mCodeToKeyIndex[MAX_CHAR_CODE + 1]; int mCodeToKeyIndex[MAX_CHAR_CODE + 1];
// TODO: move to correction.h // TODO: move to correction.h
ProximityInfoState *mProximityInfoState;
}; };
} // namespace latinime } // namespace latinime

View File

@ -27,25 +27,41 @@
#include "proximity_info_state.h" #include "proximity_info_state.h"
namespace latinime { namespace latinime {
void ProximityInfoState::initInputParams(const int32_t* inputCodes, const int inputLength, void ProximityInfoState::initInputParams(
const ProximityInfo* proximityInfo, const int32_t* inputCodes, const int inputLength,
const int* xCoordinates, const int* yCoordinates) { const int* xCoordinates, const int* yCoordinates) {
mProximityInfo = proximityInfo;
mHasTouchPositionCorrectionData = proximityInfo->hasTouchPositionCorrectionData();
mMostCommonKeyWidthSquare = proximityInfo->getMostCommonKeyWidthSquare();
mLocaleStr = proximityInfo->getLocaleStr();
mKeyCount = proximityInfo->getKeyCount();
mCellHeight = proximityInfo->getCellHeight();
mCellWidth = proximityInfo->getCellWidth();
mGridHeight = proximityInfo->getGridWidth();
mGridWidth = proximityInfo->getGridHeight();
const int normalizedSquaredDistancesLength =
MAX_PROXIMITY_CHARS_SIZE_INTERNAL * MAX_WORD_LENGTH_INTERNAL;
for (int i = 0; i < normalizedSquaredDistancesLength; ++i) {
mNormalizedSquaredDistances[i] = NOT_A_DISTANCE;
}
memset(mInputCodes, 0, memset(mInputCodes, 0,
MAX_WORD_LENGTH_INTERNAL * MAX_PROXIMITY_CHARS_SIZE * sizeof(mInputCodes[0])); MAX_WORD_LENGTH_INTERNAL * MAX_PROXIMITY_CHARS_SIZE_INTERNAL * sizeof(mInputCodes[0]));
for (int i = 0; i < inputLength; ++i) { for (int i = 0; i < inputLength; ++i) {
const int32_t primaryKey = inputCodes[i]; const int32_t primaryKey = inputCodes[i];
const int x = xCoordinates[i]; const int x = xCoordinates[i];
const int y = yCoordinates[i]; const int y = yCoordinates[i];
int *proximities = &mInputCodes[i * MAX_PROXIMITY_CHARS_SIZE]; int *proximities = &mInputCodes[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL];
mProximityInfo->calculateNearbyKeyCodes(x, y, primaryKey, proximities); mProximityInfo->calculateNearbyKeyCodes(x, y, primaryKey, proximities);
} }
if (DEBUG_PROXIMITY_CHARS) { if (DEBUG_PROXIMITY_CHARS) {
for (int i = 0; i < inputLength; ++i) { for (int i = 0; i < inputLength; ++i) {
AKLOGI("---"); AKLOGI("---");
for (int j = 0; j < MAX_PROXIMITY_CHARS_SIZE; ++j) { for (int j = 0; j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL; ++j) {
int icc = mInputCodes[i * MAX_PROXIMITY_CHARS_SIZE + j]; int icc = mInputCodes[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + j];
int icfjc = inputCodes[i * MAX_PROXIMITY_CHARS_SIZE + j]; int icfjc = inputCodes[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + j];
icc += 0; icc += 0;
icfjc += 0; icfjc += 0;
AKLOGI("--- (%d)%c,%c", i, icc, icfjc); AKLOGI("--- A<%d>,B<%d>", icc, icfjc); AKLOGI("--- (%d)%c,%c", i, icc, icfjc); AKLOGI("--- A<%d>,B<%d>", icc, icfjc);
@ -54,8 +70,8 @@ void ProximityInfoState::initInputParams(const int32_t* inputCodes, const int in
} }
mInputXCoordinates = xCoordinates; mInputXCoordinates = xCoordinates;
mInputYCoordinates = yCoordinates; mInputYCoordinates = yCoordinates;
mTouchPositionCorrectionEnabled = HAS_TOUCH_POSITION_CORRECTION_DATA && xCoordinates mTouchPositionCorrectionEnabled =
&& yCoordinates; mHasTouchPositionCorrectionData && xCoordinates && yCoordinates;
mInputLength = inputLength; mInputLength = inputLength;
for (int i = 0; i < inputLength; ++i) { for (int i = 0; i < inputLength; ++i) {
mPrimaryInputWord[i] = getPrimaryCharAt(i); mPrimaryInputWord[i] = getPrimaryCharAt(i);
@ -74,17 +90,17 @@ void ProximityInfoState::initInputParams(const int32_t* inputCodes, const int in
a += 0; a += 0;
AKLOGI("--- Primary = %c, x = %d, y = %d", primaryKey, x, y); AKLOGI("--- Primary = %c, x = %d, y = %d", primaryKey, x, y);
} }
for (int j = 0; j < MAX_PROXIMITY_CHARS_SIZE && proximityChars[j] > 0; ++j) { for (int j = 0; j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL && proximityChars[j] > 0; ++j) {
const int currentChar = proximityChars[j]; const int currentChar = proximityChars[j];
const float squaredDistance = const float squaredDistance =
hasInputCoordinates() ? calculateNormalizedSquaredDistance( hasInputCoordinates() ? calculateNormalizedSquaredDistance(
mProximityInfo->getKeyIndex(currentChar), i) : mProximityInfo->getKeyIndex(currentChar), i) :
NOT_A_DISTANCE_FLOAT; NOT_A_DISTANCE_FLOAT;
if (squaredDistance >= 0.0f) { if (squaredDistance >= 0.0f) {
mNormalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE + j] = mNormalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + j] =
(int) (squaredDistance * NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR); (int) (squaredDistance * NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR);
} else { } else {
mNormalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE + j] = mNormalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + j] =
(j == 0) ? EQUIVALENT_CHAR_WITHOUT_DISTANCE_INFO : (j == 0) ? EQUIVALENT_CHAR_WITHOUT_DISTANCE_INFO :
PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO; PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO;
} }
@ -94,4 +110,30 @@ void ProximityInfoState::initInputParams(const int32_t* inputCodes, const int in
} }
} }
} }
float ProximityInfoState::calculateNormalizedSquaredDistance(
const int keyIndex, const int inputIndex) const {
if (keyIndex == NOT_AN_INDEX) {
return NOT_A_DISTANCE_FLOAT;
}
if (!mProximityInfo->hasSweetSpotData(keyIndex)) {
return NOT_A_DISTANCE_FLOAT;
}
if (NOT_A_COORDINATE == mInputXCoordinates[inputIndex]) {
return NOT_A_DISTANCE_FLOAT;
}
const float squaredDistance = calculateSquaredDistanceFromSweetSpotCenter(
keyIndex, inputIndex);
const float squaredRadius = square(mProximityInfo->getSweetSpotRadiiAt(keyIndex));
return squaredDistance / squaredRadius;
}
float ProximityInfoState::calculateSquaredDistanceFromSweetSpotCenter(
const int keyIndex, const int inputIndex) const {
const float sweetSpotCenterX = mProximityInfo->getSweetSpotCenterXAt(keyIndex);
const float sweetSpotCenterY = mProximityInfo->getSweetSpotCenterYAt(keyIndex);
const float inputX = (float)mInputXCoordinates[inputIndex];
const float inputY = (float)mInputYCoordinates[inputIndex];
return square(inputX - sweetSpotCenterX) + square(inputY - sweetSpotCenterY);
}
} // namespace latinime } // namespace latinime

View File

@ -22,6 +22,7 @@
#include <string> #include <string>
#include "additional_proximity_chars.h" #include "additional_proximity_chars.h"
#include "char_utils.h"
#include "defines.h" #include "defines.h"
namespace latinime { namespace latinime {
@ -33,8 +34,6 @@ class ProximityInfoState {
static const int NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR_LOG_2 = 10; static const int NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR_LOG_2 = 10;
static const int NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR = static const int NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR =
1 << NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR_LOG_2; 1 << NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR_LOG_2;
// The max number of the keys in one keyboard layout
static const int MAX_KEY_COUNT_IN_A_KEYBOARD = 64;
// The upper limit of the char code in mCodeToKeyIndex // The upper limit of the char code in mCodeToKeyIndex
static const int MAX_CHAR_CODE = 127; static const int MAX_CHAR_CODE = 127;
static const float NOT_A_DISTANCE_FLOAT = -1.0f; static const float NOT_A_DISTANCE_FLOAT = -1.0f;
@ -43,39 +42,15 @@ class ProximityInfoState {
///////////////////////////////////////// /////////////////////////////////////////
// Defined in proximity_info_state.cpp // // Defined in proximity_info_state.cpp //
///////////////////////////////////////// /////////////////////////////////////////
void initInputParams(const int32_t* inputCodes, const int inputLength, void initInputParams(
const int* xCoordinates, const int* yCoordinates); const ProximityInfo* proximityInfo, const int32_t* inputCodes, const int inputLength,
const int* xCoordinates, const int* yCoordinates);
///////////////////////////////////////// /////////////////////////////////////////
// Defined here // // Defined here //
///////////////////////////////////////// /////////////////////////////////////////
// TODO: Move the constructor to initInputParams
ProximityInfoState(ProximityInfo* proximityInfo, const int maxProximityCharsSize,
const bool hasTouchPositionCorrectionData, const int mostCommonKeyWidthSquare,
const std::string localeStr, const int keyCount, const int cellHeight,
const int cellWidth, const int gridHeight, const int gridWidth)
: mProximityInfo(proximityInfo),
MAX_PROXIMITY_CHARS_SIZE(maxProximityCharsSize),
HAS_TOUCH_POSITION_CORRECTION_DATA(hasTouchPositionCorrectionData),
MOST_COMMON_KEY_WIDTH_SQUARE(mostCommonKeyWidthSquare),
LOCALE_STR(localeStr),
KEY_COUNT(keyCount),
CELL_HEIGHT(cellHeight),
CELL_WIDTH(cellWidth),
GRID_HEIGHT(gridHeight),
GRID_WIDTH(gridWidth),
mInputXCoordinates(0),
mInputYCoordinates(0),
mTouchPositionCorrectionEnabled(false) {
const int normalizedSquaredDistancesLength =
MAX_PROXIMITY_CHARS_SIZE_INTERNAL * MAX_WORD_LENGTH_INTERNAL;
for (int i = 0; i < normalizedSquaredDistancesLength; ++i) {
mNormalizedSquaredDistances[i] = NOT_A_DISTANCE;
}
}
inline const int* getProximityCharsAt(const int index) const { inline const int* getProximityCharsAt(const int index) const {
return mInputCodes + (index * MAX_PROXIMITY_CHARS_SIZE); return mInputCodes + (index * MAX_PROXIMITY_CHARS_SIZE_INTERNAL);
} }
inline unsigned short getPrimaryCharAt(const int index) const { inline unsigned short getPrimaryCharAt(const int index) const {
@ -85,7 +60,7 @@ class ProximityInfoState {
inline bool existsCharInProximityAt(const int index, const int c) const { inline bool existsCharInProximityAt(const int index, const int c) const {
const int *chars = getProximityCharsAt(index); const int *chars = getProximityCharsAt(index);
int i = 0; int i = 0;
while (chars[i] > 0 && i < MAX_PROXIMITY_CHARS_SIZE) { while (chars[i] > 0 && i < MAX_PROXIMITY_CHARS_SIZE_INTERNAL) {
if (chars[i++] == c) { if (chars[i++] == c) {
return true; return true;
} }
@ -120,7 +95,7 @@ class ProximityInfoState {
// in their list. The non-accented version of the character should be considered // in their list. The non-accented version of the character should be considered
// "close", but not the other keys close to the non-accented version. // "close", but not the other keys close to the non-accented version.
inline ProximityType getMatchedProximityId(const int index, inline ProximityType getMatchedProximityId(const int index,
const unsigned short c, const bool checkProximityChars, int *proximityIndex) const { const unsigned short c, const bool checkProximityChars, int *proximityIndex = 0) const {
const int *currentChars = getProximityCharsAt(index); const int *currentChars = getProximityCharsAt(index);
const int firstChar = currentChars[0]; const int firstChar = currentChars[0];
const unsigned short baseLowerC = toBaseLowerCase(c); const unsigned short baseLowerC = toBaseLowerCase(c);
@ -141,7 +116,7 @@ class ProximityInfoState {
// Not an exact nor an accent-alike match: search the list of close keys // Not an exact nor an accent-alike match: search the list of close keys
int j = 1; int j = 1;
while (j < MAX_PROXIMITY_CHARS_SIZE while (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL
&& currentChars[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) { && currentChars[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
const bool matched = (currentChars[j] == baseLowerC || currentChars[j] == c); const bool matched = (currentChars[j] == baseLowerC || currentChars[j] == c);
if (matched) { if (matched) {
@ -152,10 +127,10 @@ class ProximityInfoState {
} }
++j; ++j;
} }
if (j < MAX_PROXIMITY_CHARS_SIZE if (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL
&& currentChars[j] == ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) { && currentChars[j] == ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
++j; ++j;
while (j < MAX_PROXIMITY_CHARS_SIZE while (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL
&& currentChars[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) { && currentChars[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
const bool matched = (currentChars[j] == baseLowerC || currentChars[j] == c); const bool matched = (currentChars[j] == baseLowerC || currentChars[j] == c);
if (matched) { if (matched) {
@ -174,7 +149,8 @@ class ProximityInfoState {
inline int getNormalizedSquaredDistance( inline int getNormalizedSquaredDistance(
const int inputIndex, const int proximityIndex) const { const int inputIndex, const int proximityIndex) const {
return mNormalizedSquaredDistances[inputIndex * MAX_PROXIMITY_CHARS_SIZE + proximityIndex]; return mNormalizedSquaredDistances[
inputIndex * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + proximityIndex];
} }
inline const unsigned short* getPrimaryInputWord() const { inline const unsigned short* getPrimaryInputWord() const {
@ -186,38 +162,23 @@ class ProximityInfoState {
} }
private: private:
inline float square(const float x) const { return x * x; } /////////////////////////////////////////
// Defined in proximity_info_state.cpp //
/////////////////////////////////////////
float calculateNormalizedSquaredDistance(const int keyIndex, const int inputIndex) const;
float calculateNormalizedSquaredDistance( float calculateSquaredDistanceFromSweetSpotCenter(
const int keyIndex, const int inputIndex) const { const int keyIndex, const int inputIndex) const;
if (keyIndex == NOT_AN_INDEX) {
return NOT_A_DISTANCE_FLOAT; /////////////////////////////////////////
} // Defined here //
if (!mProximityInfo->hasSweetSpotData(keyIndex)) { /////////////////////////////////////////
return NOT_A_DISTANCE_FLOAT; inline float square(const float x) const { return x * x; }
}
if (NOT_A_COORDINATE == mInputXCoordinates[inputIndex]) {
return NOT_A_DISTANCE_FLOAT;
}
const float squaredDistance = calculateSquaredDistanceFromSweetSpotCenter(
keyIndex, inputIndex);
const float squaredRadius = square(mProximityInfo->getSweetSpotRadiiAt(keyIndex));
return squaredDistance / squaredRadius;
}
bool hasInputCoordinates() const { bool hasInputCoordinates() const {
return mInputXCoordinates && mInputYCoordinates; return mInputXCoordinates && mInputYCoordinates;
} }
float calculateSquaredDistanceFromSweetSpotCenter(
const int keyIndex, const int inputIndex) const {
const float sweetSpotCenterX = mProximityInfo->getSweetSpotCenterXAt(keyIndex);
const float sweetSpotCenterY = mProximityInfo->getSweetSpotCenterYAt(keyIndex);
const float inputX = (float)mInputXCoordinates[inputIndex];
const float inputY = (float)mInputYCoordinates[inputIndex];
return square(inputX - sweetSpotCenterX) + square(inputY - sweetSpotCenterY);
}
bool sameAsTyped(const unsigned short *word, int length) const { bool sameAsTyped(const unsigned short *word, int length) const {
if (length != mInputLength) { if (length != mInputLength) {
return false; return false;
@ -227,23 +188,22 @@ class ProximityInfoState {
if ((unsigned int) *inputCodes != (unsigned int) *word) { if ((unsigned int) *inputCodes != (unsigned int) *word) {
return false; return false;
} }
inputCodes += MAX_PROXIMITY_CHARS_SIZE; inputCodes += MAX_PROXIMITY_CHARS_SIZE_INTERNAL;
word++; word++;
} }
return true; return true;
} }
// TODO: const // const
ProximityInfo *mProximityInfo; const ProximityInfo *mProximityInfo;
const int MAX_PROXIMITY_CHARS_SIZE; bool mHasTouchPositionCorrectionData;
const bool HAS_TOUCH_POSITION_CORRECTION_DATA; int mMostCommonKeyWidthSquare;
const int MOST_COMMON_KEY_WIDTH_SQUARE; std::string mLocaleStr;
const std::string LOCALE_STR; int mKeyCount;
const int KEY_COUNT; int mCellHeight;
const int CELL_HEIGHT; int mCellWidth;
const int CELL_WIDTH; int mGridHeight;
const int GRID_HEIGHT; int mGridWidth;
const int GRID_WIDTH;
const int *mInputXCoordinates; const int *mInputXCoordinates;
const int *mInputYCoordinates; const int *mInputYCoordinates;

View File

@ -103,7 +103,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
const bool useFullEditDistance, const int *codesSrc, const bool useFullEditDistance, const int *codesSrc,
const int codesRemain, const int currentDepth, int *codesDest, Correction *correction, const int codesRemain, const int currentDepth, int *codesDest, Correction *correction,
WordsPriorityQueuePool *queuePool, WordsPriorityQueuePool *queuePool,
const digraph_t* const digraphs, const unsigned int digraphsSize) { const digraph_t* const digraphs, const unsigned int digraphsSize) const {
const int startIndex = codesDest - codesBuffer; const int startIndex = codesDest - codesBuffer;
if (currentDepth < MAX_DIGRAPH_SEARCH_DEPTH) { if (currentDepth < MAX_DIGRAPH_SEARCH_DEPTH) {
@ -173,7 +173,7 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo,
WordsPriorityQueuePool *queuePool, Correction *correction, const int *xcoordinates, WordsPriorityQueuePool *queuePool, Correction *correction, const int *xcoordinates,
const int *ycoordinates, const int *codes, const int codesSize, const int *ycoordinates, const int *codes, const int codesSize,
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
const bool useFullEditDistance, unsigned short *outWords, int *frequencies) { const bool useFullEditDistance, unsigned short *outWords, int *frequencies) const {
queuePool->clearAll(); queuePool->clearAll();
Correction* masterCorrection = correction; Correction* masterCorrection = correction;
@ -205,17 +205,17 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo,
PROF_START(20); PROF_START(20);
if (DEBUG_DICT) { if (DEBUG_DICT) {
float ns = queuePool->getMasterQueue()->getHighestNormalizedScore( float ns = queuePool->getMasterQueue()->getHighestNormalizedScore(
proximityInfo->getPrimaryInputWord(), codesSize, 0, 0, 0); correction->getPrimaryInputWord(), codesSize, 0, 0, 0);
ns += 0; ns += 0;
AKLOGI("Max normalized score = %f", ns); AKLOGI("Max normalized score = %f", ns);
} }
const int suggestedWordsCount = const int suggestedWordsCount =
queuePool->getMasterQueue()->outputSuggestions( queuePool->getMasterQueue()->outputSuggestions(
proximityInfo->getPrimaryInputWord(), codesSize, frequencies, outWords); correction->getPrimaryInputWord(), codesSize, frequencies, outWords);
if (DEBUG_DICT) { if (DEBUG_DICT) {
float ns = queuePool->getMasterQueue()->getHighestNormalizedScore( float ns = queuePool->getMasterQueue()->getHighestNormalizedScore(
proximityInfo->getPrimaryInputWord(), codesSize, 0, 0, 0); correction->getPrimaryInputWord(), codesSize, 0, 0, 0);
ns += 0; ns += 0;
AKLOGI("Returning %d words", suggestedWordsCount); AKLOGI("Returning %d words", suggestedWordsCount);
/// Print the returned words /// Print the returned words
@ -235,7 +235,8 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo,
void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
const int *xcoordinates, const int *ycoordinates, const int *codes, const int *xcoordinates, const int *ycoordinates, const int *codes,
const int inputLength, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, const int inputLength, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
const bool useFullEditDistance, Correction *correction, WordsPriorityQueuePool *queuePool) { const bool useFullEditDistance, Correction *correction,
WordsPriorityQueuePool *queuePool) const {
PROF_OPEN; PROF_OPEN;
PROF_START(0); PROF_START(0);
@ -259,7 +260,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
WordsPriorityQueue* masterQueue = queuePool->getMasterQueue(); WordsPriorityQueue* masterQueue = queuePool->getMasterQueue();
if (masterQueue->size() > 0) { if (masterQueue->size() > 0) {
float nsForMaster = masterQueue->getHighestNormalizedScore( float nsForMaster = masterQueue->getHighestNormalizedScore(
proximityInfo->getPrimaryInputWord(), inputLength, 0, 0, 0); correction->getPrimaryInputWord(), inputLength, 0, 0, 0);
hasAutoCorrectionCandidate = (nsForMaster > START_TWO_WORDS_CORRECTION_THRESHOLD); hasAutoCorrectionCandidate = (nsForMaster > START_TWO_WORDS_CORRECTION_THRESHOLD);
} }
PROF_END(4); PROF_END(4);
@ -288,11 +289,11 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
const unsigned short* word = sw->mWord; const unsigned short* word = sw->mWord;
const int wordLength = sw->mWordLength; const int wordLength = sw->mWordLength;
float ns = Correction::RankingAlgorithm::calcNormalizedScore( float ns = Correction::RankingAlgorithm::calcNormalizedScore(
proximityInfo->getPrimaryInputWord(), i, word, wordLength, score); correction->getPrimaryInputWord(), i, word, wordLength, score);
ns += 0; ns += 0;
AKLOGI("--- TOP SUB WORDS for %d --- %d %f [%d]", i, score, ns, AKLOGI("--- TOP SUB WORDS for %d --- %d %f [%d]", i, score, ns,
(ns > TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD)); (ns > TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD));
DUMP_WORD(proximityInfo->getPrimaryInputWord(), i); DUMP_WORD(correction->getPrimaryInputWord(), i);
DUMP_WORD(word, wordLength); DUMP_WORD(word, wordLength);
} }
} }
@ -300,12 +301,13 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
} }
void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int *xCoordinates, void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int *xCoordinates,
const int *yCoordinates, const int *codes, const int inputLength, Correction *correction) { const int *yCoordinates, const int *codes, const int inputLength,
Correction *correction) const {
if (DEBUG_DICT) { if (DEBUG_DICT) {
AKLOGI("initSuggest"); AKLOGI("initSuggest");
DUMP_WORD_INT(codes, inputLength); DUMP_WORD_INT(codes, inputLength);
} }
proximityInfo->initInputParams(codes, inputLength, xCoordinates, yCoordinates); correction->initInputParams(proximityInfo, codes, inputLength, xCoordinates, yCoordinates);
const int maxDepth = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH); const int maxDepth = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
correction->initCorrection(proximityInfo, inputLength, maxDepth); correction->initCorrection(proximityInfo, inputLength, maxDepth);
} }
@ -317,7 +319,7 @@ void UnigramDictionary::getOneWordSuggestions(ProximityInfo *proximityInfo,
const int *xcoordinates, const int *ycoordinates, const int *codes, const int *xcoordinates, const int *ycoordinates, const int *codes,
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
const bool useFullEditDistance, const int inputLength, const bool useFullEditDistance, const int inputLength,
Correction *correction, WordsPriorityQueuePool *queuePool) { Correction *correction, WordsPriorityQueuePool *queuePool) const {
initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction); initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction);
getSuggestionCandidates(useFullEditDistance, inputLength, bigramMap, bigramFilter, correction, getSuggestionCandidates(useFullEditDistance, inputLength, bigramMap, bigramFilter, correction,
queuePool, true /* doAutoCompletion */, DEFAULT_MAX_ERRORS, FIRST_WORD_INDEX); queuePool, true /* doAutoCompletion */, DEFAULT_MAX_ERRORS, FIRST_WORD_INDEX);
@ -326,7 +328,7 @@ void UnigramDictionary::getOneWordSuggestions(ProximityInfo *proximityInfo,
void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance, void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
const int inputLength, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, const int inputLength, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
Correction *correction, WordsPriorityQueuePool *queuePool, Correction *correction, WordsPriorityQueuePool *queuePool,
const bool doAutoCompletion, const int maxErrors, const int currentWordIndex) { const bool doAutoCompletion, const int maxErrors, const int currentWordIndex) const {
uint8_t totalTraverseCount = correction->pushAndGetTotalTraverseCount(); uint8_t totalTraverseCount = correction->pushAndGetTotalTraverseCount();
if (DEBUG_DICT) { if (DEBUG_DICT) {
AKLOGI("Traverse count %d", totalTraverseCount); AKLOGI("Traverse count %d", totalTraverseCount);
@ -374,7 +376,7 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
inline void UnigramDictionary::onTerminal(const int probability, inline void UnigramDictionary::onTerminal(const int probability,
const TerminalAttributes& terminalAttributes, Correction *correction, const TerminalAttributes& terminalAttributes, Correction *correction,
WordsPriorityQueuePool *queuePool, const bool addToMasterQueue, WordsPriorityQueuePool *queuePool, const bool addToMasterQueue,
const int currentWordIndex) { const int currentWordIndex) const {
const int inputIndex = correction->getInputIndex(); const int inputIndex = correction->getInputIndex();
const bool addToSubQueue = inputIndex < SUB_QUEUE_MAX_COUNT; const bool addToSubQueue = inputIndex < SUB_QUEUE_MAX_COUNT;
@ -430,7 +432,7 @@ int UnigramDictionary::getSubStringSuggestion(
const bool hasAutoCorrectionCandidate, const int currentWordIndex, const bool hasAutoCorrectionCandidate, const int currentWordIndex,
const int inputWordStartPos, const int inputWordLength, const int inputWordStartPos, const int inputWordLength,
const int outputWordStartPos, const bool isSpaceProximity, int *freqArray, const int outputWordStartPos, const bool isSpaceProximity, int *freqArray,
int*wordLengthArray, unsigned short* outputWord, int *outputWordLength) { int*wordLengthArray, unsigned short* outputWord, int *outputWordLength) const {
if (inputWordLength > MULTIPLE_WORDS_SUGGESTION_MAX_WORD_LENGTH) { if (inputWordLength > MULTIPLE_WORDS_SUGGESTION_MAX_WORD_LENGTH) {
return FLAG_MULTIPLE_SUGGEST_ABORT; return FLAG_MULTIPLE_SUGGEST_ABORT;
} }
@ -479,11 +481,12 @@ int UnigramDictionary::getSubStringSuggestion(
initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
inputLength, correction); inputLength, correction);
unsigned short word[MAX_WORD_LENGTH_INTERNAL];
int freq = getMostFrequentWordLike( int freq = getMostFrequentWordLike(
inputWordStartPos, inputWordLength, proximityInfo, mWord); inputWordStartPos, inputWordLength, correction, word);
if (freq > 0) { if (freq > 0) {
nextWordLength = inputWordLength; nextWordLength = inputWordLength;
tempOutputWord = mWord; tempOutputWord = word;
} else if (!hasAutoCorrectionCandidate) { } else if (!hasAutoCorrectionCandidate) {
if (inputWordStartPos > 0) { if (inputWordStartPos > 0) {
const int offset = inputWordStartPos; const int offset = inputWordStartPos;
@ -510,7 +513,7 @@ int UnigramDictionary::getSubStringSuggestion(
} }
int score = 0; int score = 0;
const float ns = queue->getHighestNormalizedScore( const float ns = queue->getHighestNormalizedScore(
proximityInfo->getPrimaryInputWord(), inputWordLength, correction->getPrimaryInputWord(), inputWordLength,
&tempOutputWord, &score, &nextWordLength); &tempOutputWord, &score, &nextWordLength);
if (DEBUG_DICT) { if (DEBUG_DICT) {
AKLOGI("NS(%d) = %f, Score = %d", currentWordIndex, ns, score); AKLOGI("NS(%d) = %f, Score = %d", currentWordIndex, ns, score);
@ -577,7 +580,7 @@ void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo,
Correction *correction, WordsPriorityQueuePool* queuePool, Correction *correction, WordsPriorityQueuePool* queuePool,
const bool hasAutoCorrectionCandidate, const int startInputPos, const int startWordIndex, const bool hasAutoCorrectionCandidate, const int startInputPos, const int startWordIndex,
const int outputWordLength, int *freqArray, int* wordLengthArray, const int outputWordLength, int *freqArray, int* wordLengthArray,
unsigned short* outputWord) { unsigned short* outputWord) const {
if (startWordIndex >= (MULTIPLE_WORDS_SUGGESTION_MAX_WORDS - 1)) { if (startWordIndex >= (MULTIPLE_WORDS_SUGGESTION_MAX_WORDS - 1)) {
// Return if the last word index // Return if the last word index
return; return;
@ -656,7 +659,7 @@ void UnigramDictionary::getSplitMultipleWordsSuggestions(ProximityInfo *proximit
const int *xcoordinates, const int *ycoordinates, const int *codes, const int *xcoordinates, const int *ycoordinates, const int *codes,
const bool useFullEditDistance, const int inputLength, const bool useFullEditDistance, const int inputLength,
Correction *correction, WordsPriorityQueuePool* queuePool, Correction *correction, WordsPriorityQueuePool* queuePool,
const bool hasAutoCorrectionCandidate) { const bool hasAutoCorrectionCandidate) const {
if (inputLength >= MAX_WORD_LENGTH) return; if (inputLength >= MAX_WORD_LENGTH) return;
if (DEBUG_DICT) { if (DEBUG_DICT) {
AKLOGI("--- Suggest multiple words"); AKLOGI("--- Suggest multiple words");
@ -678,11 +681,11 @@ void UnigramDictionary::getSplitMultipleWordsSuggestions(ProximityInfo *proximit
// Wrapper for getMostFrequentWordLikeInner, which matches it to the previous // Wrapper for getMostFrequentWordLikeInner, which matches it to the previous
// interface. // interface.
inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex, inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex,
const int inputLength, ProximityInfo *proximityInfo, unsigned short *word) { const int inputLength, Correction *correction, unsigned short *word) const {
uint16_t inWord[inputLength]; uint16_t inWord[inputLength];
for (int i = 0; i < inputLength; ++i) { for (int i = 0; i < inputLength; ++i) {
inWord[i] = (uint16_t)proximityInfo->getPrimaryCharAt(startInputIndex + i); inWord[i] = (uint16_t)correction->getPrimaryCharAt(startInputIndex + i);
} }
return getMostFrequentWordLikeInner(inWord, inputLength, word); return getMostFrequentWordLikeInner(inWord, inputLength, word);
} }
@ -751,21 +754,24 @@ static inline void onTerminalWordLike(const int freq, int32_t* newWord, const in
// Will find the highest frequency of the words like the one passed as an argument, // Will find the highest frequency of the words like the one passed as an argument,
// that is, everything that only differs by case/accents. // that is, everything that only differs by case/accents.
int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t * const inWord, int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t * const inWord,
const int length, short unsigned int* outWord) { const int length, short unsigned int* outWord) const {
int32_t newWord[MAX_WORD_LENGTH_INTERNAL]; int32_t newWord[MAX_WORD_LENGTH_INTERNAL];
int depth = 0; int depth = 0;
int maxFreq = -1; int maxFreq = -1;
const uint8_t* const root = DICT_ROOT; const uint8_t* const root = DICT_ROOT;
int stackChildCount[MAX_WORD_LENGTH_INTERNAL];
int stackInputIndex[MAX_WORD_LENGTH_INTERNAL];
int stackSiblingPos[MAX_WORD_LENGTH_INTERNAL];
int startPos = 0; int startPos = 0;
mStackChildCount[0] = BinaryFormat::getGroupCountAndForwardPointer(root, &startPos); stackChildCount[0] = BinaryFormat::getGroupCountAndForwardPointer(root, &startPos);
mStackInputIndex[0] = 0; stackInputIndex[0] = 0;
mStackSiblingPos[0] = startPos; stackSiblingPos[0] = startPos;
while (depth >= 0) { while (depth >= 0) {
const int charGroupCount = mStackChildCount[depth]; const int charGroupCount = stackChildCount[depth];
int pos = mStackSiblingPos[depth]; int pos = stackSiblingPos[depth];
for (int charGroupIndex = charGroupCount - 1; charGroupIndex >= 0; --charGroupIndex) { for (int charGroupIndex = charGroupCount - 1; charGroupIndex >= 0; --charGroupIndex) {
int inputIndex = mStackInputIndex[depth]; int inputIndex = stackInputIndex[depth];
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
// Test whether all chars in this group match with the word we are searching for. If so, // Test whether all chars in this group match with the word we are searching for. If so,
// we want to traverse its children (or if the length match, evaluate its frequency). // we want to traverse its children (or if the length match, evaluate its frequency).
@ -785,15 +791,15 @@ int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t * const inWor
// anyway, so don't traverse unless inputIndex < length. // anyway, so don't traverse unless inputIndex < length.
if (isAlike && (-1 != childrenNodePos) && (inputIndex < length)) { if (isAlike && (-1 != childrenNodePos) && (inputIndex < length)) {
// Save position for this depth, to get back to this once children are done // Save position for this depth, to get back to this once children are done
mStackChildCount[depth] = charGroupIndex; stackChildCount[depth] = charGroupIndex;
mStackSiblingPos[depth] = siblingPos; stackSiblingPos[depth] = siblingPos;
// Prepare stack values for next depth // Prepare stack values for next depth
++depth; ++depth;
int childrenPos = childrenNodePos; int childrenPos = childrenNodePos;
mStackChildCount[depth] = stackChildCount[depth] =
BinaryFormat::getGroupCountAndForwardPointer(root, &childrenPos); BinaryFormat::getGroupCountAndForwardPointer(root, &childrenPos);
mStackSiblingPos[depth] = childrenPos; stackSiblingPos[depth] = childrenPos;
mStackInputIndex[depth] = inputIndex; stackInputIndex[depth] = inputIndex;
pos = childrenPos; pos = childrenPos;
// Go to the next depth level. // Go to the next depth level.
++depth; ++depth;
@ -848,7 +854,7 @@ int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offs
inline bool UnigramDictionary::processCurrentNode(const int initialPos, inline bool UnigramDictionary::processCurrentNode(const int initialPos,
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, Correction *correction, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, Correction *correction,
int *newCount, int *newChildrenPosition, int *nextSiblingPosition, int *newCount, int *newChildrenPosition, int *nextSiblingPosition,
WordsPriorityQueuePool *queuePool, const int currentWordIndex) { WordsPriorityQueuePool *queuePool, const int currentWordIndex) const {
if (DEBUG_DICT) { if (DEBUG_DICT) {
correction->checkState(); correction->checkState();
} }

View File

@ -81,7 +81,7 @@ class UnigramDictionary {
Correction *correction, const int *xcoordinates, const int *ycoordinates, Correction *correction, const int *xcoordinates, const int *ycoordinates,
const int *codes, const int codesSize, const std::map<int, int> *bigramMap, const int *codes, const int codesSize, const std::map<int, int> *bigramMap,
const uint8_t *bigramFilter, const bool useFullEditDistance, unsigned short *outWords, const uint8_t *bigramFilter, const bool useFullEditDistance, unsigned short *outWords,
int *frequencies); int *frequencies) const;
virtual ~UnigramDictionary(); virtual ~UnigramDictionary();
private: private:
@ -89,7 +89,7 @@ class UnigramDictionary {
const int *ycoordinates, const int *codes, const int inputLength, const int *ycoordinates, const int *codes, const int inputLength,
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
const bool useFullEditDistance, Correction *correction, const bool useFullEditDistance, Correction *correction,
WordsPriorityQueuePool *queuePool); WordsPriorityQueuePool *queuePool) const;
int getDigraphReplacement(const int *codes, const int i, const int codesSize, int getDigraphReplacement(const int *codes, const int i, const int codesSize,
const digraph_t* const digraphs, const unsigned int digraphsSize) const; const digraph_t* const digraphs, const unsigned int digraphsSize) const;
void getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo, void getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo,
@ -99,37 +99,36 @@ class UnigramDictionary {
const bool useFullEditDistance, const int* codesSrc, const int codesRemain, const bool useFullEditDistance, const int* codesSrc, const int codesRemain,
const int currentDepth, int* codesDest, Correction *correction, const int currentDepth, int* codesDest, Correction *correction,
WordsPriorityQueuePool* queuePool, const digraph_t* const digraphs, WordsPriorityQueuePool* queuePool, const digraph_t* const digraphs,
const unsigned int digraphsSize); const unsigned int digraphsSize) const;
void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const int codesSize, Correction *correction); const int *ycoordinates, const int *codes, const int codesSize,
Correction *correction) const;
void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const std::map<int, int> *bigramMap, const int *ycoordinates, const int *codes, const std::map<int, int> *bigramMap,
const uint8_t *bigramFilter, const bool useFullEditDistance, const int inputLength, const uint8_t *bigramFilter, const bool useFullEditDistance, const int inputLength,
Correction *correction, WordsPriorityQueuePool* queuePool); Correction *correction, WordsPriorityQueuePool* queuePool) const;
void getSuggestionCandidates( void getSuggestionCandidates(
const bool useFullEditDistance, const int inputLength, const bool useFullEditDistance, const int inputLength,
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
Correction *correction, WordsPriorityQueuePool* queuePool, const bool doAutoCompletion, Correction *correction, WordsPriorityQueuePool* queuePool, const bool doAutoCompletion,
const int maxErrors, const int currentWordIndex); const int maxErrors, const int currentWordIndex) const;
void getSplitMultipleWordsSuggestions(ProximityInfo *proximityInfo, void getSplitMultipleWordsSuggestions(ProximityInfo *proximityInfo,
const int *xcoordinates, const int *ycoordinates, const int *codes, const int *xcoordinates, const int *ycoordinates, const int *codes,
const bool useFullEditDistance, const int inputLength, const bool useFullEditDistance, const int inputLength,
Correction *correction, WordsPriorityQueuePool* queuePool, Correction *correction, WordsPriorityQueuePool* queuePool,
const bool hasAutoCorrectionCandidate); const bool hasAutoCorrectionCandidate) const;
void onTerminal(const int freq, const TerminalAttributes& terminalAttributes, void onTerminal(const int freq, const TerminalAttributes& terminalAttributes,
Correction *correction, WordsPriorityQueuePool *queuePool, const bool addToMasterQueue, Correction *correction, WordsPriorityQueuePool *queuePool, const bool addToMasterQueue,
const int currentWordIndex); const int currentWordIndex) const;
bool needsToSkipCurrentNode(const unsigned short c,
const int inputIndex, const int skipPos, const int depth);
// Process a node by considering proximity, missing and excessive character // Process a node by considering proximity, missing and excessive character
bool processCurrentNode(const int initialPos, const std::map<int, int> *bigramMap, bool processCurrentNode(const int initialPos, const std::map<int, int> *bigramMap,
const uint8_t *bigramFilter, Correction *correction, int *newCount, const uint8_t *bigramFilter, Correction *correction, int *newCount,
int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool, int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool,
const int currentWordIndex); const int currentWordIndex) const;
int getMostFrequentWordLike(const int startInputIndex, const int inputLength, int getMostFrequentWordLike(const int startInputIndex, const int inputLength,
ProximityInfo *proximityInfo, unsigned short *word); Correction *correction, unsigned short *word) const;
int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length, int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length,
short unsigned int *outWord); short unsigned int *outWord) const;
int getSubStringSuggestion( int getSubStringSuggestion(
ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates,
const int *codes, const bool useFullEditDistance, Correction *correction, const int *codes, const bool useFullEditDistance, Correction *correction,
@ -137,14 +136,14 @@ class UnigramDictionary {
const bool hasAutoCorrectionCandidate, const int currentWordIndex, const bool hasAutoCorrectionCandidate, const int currentWordIndex,
const int inputWordStartPos, const int inputWordLength, const int inputWordStartPos, const int inputWordLength,
const int outputWordStartPos, const bool isSpaceProximity, int *freqArray, const int outputWordStartPos, const bool isSpaceProximity, int *freqArray,
int *wordLengthArray, unsigned short* outputWord, int *outputWordLength); int *wordLengthArray, unsigned short* outputWord, int *outputWordLength) const;
void getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, void getMultiWordsSuggestionRec(ProximityInfo *proximityInfo,
const int *xcoordinates, const int *ycoordinates, const int *codes, const int *xcoordinates, const int *ycoordinates, const int *codes,
const bool useFullEditDistance, const int inputLength, const bool useFullEditDistance, const int inputLength,
Correction *correction, WordsPriorityQueuePool* queuePool, Correction *correction, WordsPriorityQueuePool* queuePool,
const bool hasAutoCorrectionCandidate, const int startPos, const int startWordIndex, const bool hasAutoCorrectionCandidate, const int startPos, const int startWordIndex,
const int outputWordLength, int *freqArray, int* wordLengthArray, const int outputWordLength, int *freqArray, int* wordLengthArray,
unsigned short* outputWord); unsigned short* outputWord) const;
const uint8_t* const DICT_ROOT; const uint8_t* const DICT_ROOT;
const int MAX_WORD_LENGTH; const int MAX_WORD_LENGTH;
@ -158,12 +157,6 @@ class UnigramDictionary {
static const digraph_t GERMAN_UMLAUT_DIGRAPHS[]; static const digraph_t GERMAN_UMLAUT_DIGRAPHS[];
static const digraph_t FRENCH_LIGATURES_DIGRAPHS[]; static const digraph_t FRENCH_LIGATURES_DIGRAPHS[];
// Still bundled members
unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];// TODO: remove
int mStackChildCount[MAX_WORD_LENGTH_INTERNAL];// TODO: remove
int mStackInputIndex[MAX_WORD_LENGTH_INTERNAL];// TODO: remove
int mStackSiblingPos[MAX_WORD_LENGTH_INTERNAL];// TODO: remove
}; };
} // namespace latinime } // namespace latinime