Introduce MAX_PREV_WORD_COUNT_FOR_N_GRAM.

Bug: 14425059
Change-Id: I587f90df026a14359d2ee452040bbfaf02c1ae51
This commit is contained in:
Keisuke Kuroyanagi 2014-05-15 18:45:49 +09:00
parent b87fffb8be
commit b94ec1437b
4 changed files with 36 additions and 19 deletions

View File

@ -336,6 +336,9 @@ static inline void prof_out(void) {
#define MAX_POINTER_COUNT 1
#define MAX_POINTER_COUNT_G 2
// (MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1)-gram is supported.
#define MAX_PREV_WORD_COUNT_FOR_N_GRAM 1
#define DISALLOW_DEFAULT_CONSTRUCTOR(TypeName) \
TypeName() = delete

View File

@ -36,17 +36,17 @@ void DicTraverseSession::init(const Dictionary *const dictionary,
->getMultiWordCostMultiplier();
mSuggestOptions = suggestOptions;
if (!prevWordsInfo->getPrevWordCodePoints()) {
mPrevWordPtNodePos = NOT_A_DICT_POS;
mPrevWordsPtNodePos[0] = NOT_A_DICT_POS;
return;
}
// TODO: merge following similar calls to getTerminalPosition into one case-insensitive call.
mPrevWordPtNodePos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(
mPrevWordsPtNodePos[0] = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(
prevWordsInfo->getPrevWordCodePoints(), prevWordsInfo->getPrevWordCodePointCount(),
false /* forceLowerCaseSearch */);
if (mPrevWordPtNodePos == NOT_A_DICT_POS) {
if (mPrevWordsPtNodePos[0] == NOT_A_DICT_POS) {
// Check bigrams for lower-cased previous word if original was not found. Useful for
// auto-capitalized words like "The [current_word]".
mPrevWordPtNodePos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(
mPrevWordsPtNodePos[0] = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(
prevWordsInfo->getPrevWordCodePoints(), prevWordsInfo->getPrevWordCodePointCount(),
true /* forceLowerCaseSearch */);
}

View File

@ -50,12 +50,14 @@ class DicTraverseSession {
}
AK_FORCE_INLINE DicTraverseSession(JNIEnv *env, jstring localeStr, bool usesLargeCache)
: mPrevWordPtNodePos(NOT_A_DICT_POS), mProximityInfo(nullptr),
mDictionary(nullptr), mSuggestOptions(nullptr), mDicNodesCache(usesLargeCache),
mMultiBigramMap(), mInputSize(0), mMaxPointerCount(1),
: mProximityInfo(nullptr), mDictionary(nullptr), mSuggestOptions(nullptr),
mDicNodesCache(usesLargeCache), mMultiBigramMap(), mInputSize(0), mMaxPointerCount(1),
mMultiWordCostMultiplier(1.0f) {
// NOTE: mProximityInfoStates is an array of instances.
// No need to initialize it explicitly here.
for (size_t i = 0; i < NELEMS(mPrevWordsPtNodePos); ++i) {
mPrevWordsPtNodePos[i] = NOT_A_DICT_POS;
}
}
// Non virtual inline destructor -- never inherit this class
@ -77,7 +79,7 @@ class DicTraverseSession {
//--------------------
const ProximityInfo *getProximityInfo() const { return mProximityInfo; }
const SuggestOptions *getSuggestOptions() const { return mSuggestOptions; }
int getPrevWordPtNodePos() const { return mPrevWordPtNodePos; }
int getPrevWordPtNodePos() const { return mPrevWordsPtNodePos[0]; }
DicNodesCache *getDicTraverseCache() { return &mDicNodesCache; }
MultiBigramMap *getMultiBigramMap() { return &mMultiBigramMap; }
const ProximityInfoState *getProximityInfoState(int id) const {
@ -164,7 +166,7 @@ class DicTraverseSession {
const int *const inputYs, const int *const times, const int *const pointerIds,
const int inputSize, const float maxSpatialDistance, const int maxPointerCount);
int mPrevWordPtNodePos;
int mPrevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
const ProximityInfo *mProximityInfo;
const Dictionary *mDictionary;
const SuggestOptions *mSuggestOptions;

View File

@ -27,27 +27,39 @@ namespace latinime {
class PrevWordsInfo {
public:
// No prev word information.
PrevWordsInfo()
: mPrevWordCodePoints(nullptr), mPrevWordCodePointCount(0) {}
PrevWordsInfo() {
clear();
}
PrevWordsInfo(const int *const prevWordCodePoints, const int prevWordCodePointCount,
const bool isBeginningOfSentence)
: mPrevWordCodePoints(prevWordCodePoints),
mPrevWordCodePointCount(prevWordCodePointCount) {}
const bool isBeginningOfSentence) {
clear();
mPrevWordCodePoints[0] = prevWordCodePoints;
mPrevWordCodePointCount[0] = prevWordCodePointCount;
mIsBeginningOfSentence[0] = isBeginningOfSentence;
}
const int *getPrevWordCodePoints() const {
return mPrevWordCodePoints;
return mPrevWordCodePoints[0];
}
int getPrevWordCodePointCount() const {
return mPrevWordCodePointCount;
return mPrevWordCodePointCount[0];
}
private:
DISALLOW_COPY_AND_ASSIGN(PrevWordsInfo);
const int *const mPrevWordCodePoints;
const int mPrevWordCodePointCount;
void clear() {
for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
mPrevWordCodePoints[i] = nullptr;
mPrevWordCodePointCount[i] = 0;
mIsBeginningOfSentence[i] = false;
}
}
const int *mPrevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
int mPrevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
bool mIsBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
};
} // namespace latinime
#endif // LATINIME_PREV_WORDS_INFO_H