Merge "Add timestamp as an argument to updating methods."

This commit is contained in:
Keisuke Kuroyanagi 2013-11-28 12:02:22 +00:00 committed by Android (Google) Code Review
commit ebd5541edf
27 changed files with 127 additions and 96 deletions

View File

@ -297,7 +297,7 @@ static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz,
if (shortcutTarget) {
env->GetIntArrayRegion(shortcutTarget, 0, shortcutLength, shortcutTargetCodePoints);
}
dictionary->addUnigramWord(codePoints, wordLength, probability);
dictionary->addUnigramWord(codePoints, wordLength, probability, timeStamp);
}
static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz, jlong dict,
@ -313,7 +313,7 @@ static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz,
int word1CodePoints[word1Length];
env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
dictionary->addBigramWords(word0CodePoints, word0Length, word1CodePoints,
word1Length, probability);
word1Length, probability, timeStamp);
}
static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass clazz, jlong dict,
@ -348,13 +348,15 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
jclass languageModelParamClass = env->GetObjectClass(languageModelParam);
env->DeleteLocalRef(languageModelParam);
// TODO: Support shortcut, timestamp and flags.
// TODO: Support shortcut and flags.
jfieldID word0FieldId = env->GetFieldID(languageModelParamClass, "mWord0", "[I");
jfieldID word1FieldId = env->GetFieldID(languageModelParamClass, "mWord1", "[I");
jfieldID unigramProbabilityFieldId =
env->GetFieldID(languageModelParamClass, "mUnigramProbability", "I");
jfieldID bigramProbabilityFieldId =
env->GetFieldID(languageModelParamClass, "mBigramProbability", "I");
jfieldID timeStampFieldId =
env->GetFieldID(languageModelParamClass, "mTimeStamp", "I");
env->DeleteLocalRef(languageModelParamClass);
for (int i = startIndex; i < languageModelParamCount; ++i) {
@ -375,11 +377,12 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
int word1CodePoints[word1Length];
env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
jint unigramProbability = env->GetIntField(languageModelParam, unigramProbabilityFieldId);
dictionary->addUnigramWord(word1CodePoints, word1Length, unigramProbability);
jint timeStamp = env->GetIntField(languageModelParam, timeStampFieldId);
dictionary->addUnigramWord(word1CodePoints, word1Length, unigramProbability, timeStamp);
if (word0) {
jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId);
dictionary->addBigramWords(word0CodePoints, word0Length, word1CodePoints, word1Length,
bigramProbability);
bigramProbability, timeStamp);
}
if (dictionary->needsToRunGC(true /* mindsBlockByGC */)) {
return i + 1;

View File

@ -93,14 +93,16 @@ int Dictionary::getBigramProbability(const int *word0, int length0, const int *w
return mBigramDictionary.get()->getBigramProbability(word0, length0, word1, length1);
}
void Dictionary::addUnigramWord(const int *const word, const int length, const int probability) {
mDictionaryStructureWithBufferPolicy.get()->addUnigramWord(word, length, probability);
void Dictionary::addUnigramWord(const int *const word, const int length, const int probability,
const int timestamp) {
mDictionaryStructureWithBufferPolicy.get()->addUnigramWord(word, length, probability,
timestamp);
}
void Dictionary::addBigramWords(const int *const word0, const int length0, const int *const word1,
const int length1, const int probability) {
const int length1, const int probability, const int timestamp) {
mDictionaryStructureWithBufferPolicy.get()->addBigramWords(word0, length0, word1, length1,
probability);
probability, timestamp);
}
void Dictionary::removeBigramWords(const int *const word0, const int length0,

View File

@ -72,10 +72,11 @@ class Dictionary {
int getBigramProbability(const int *word0, int length0, const int *word1, int length1) const;
void addUnigramWord(const int *const word, const int length, const int probability);
void addUnigramWord(const int *const word, const int length, const int probability,
const int timestamp);
void addBigramWords(const int *const word0, const int length0, const int *const word1,
const int length1, const int probability);
const int length1, const int probability, const int timestamp);
void removeBigramWords(const int *const word0, const int length0, const int *const word1,
const int length1);

View File

@ -67,11 +67,11 @@ class DictionaryStructureWithBufferPolicy {
// Returns whether the update was success or not.
virtual bool addUnigramWord(const int *const word, const int length,
const int probability) = 0;
const int probability, const int timestamp) = 0;
// Returns whether the update was success or not.
virtual bool addBigramWords(const int *const word0, const int length0, const int *const word1,
const int length1, const int probability) = 0;
const int length1, const int probability, const int timestamp) = 0;
// Returns whether the update was success or not.
virtual bool removeBigramWords(const int *const word0, const int length0,

View File

@ -36,7 +36,7 @@ void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const out
}
bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId,
const int newProbability, bool *const outAddedNewEntry) {
const int newProbability, const int timestamp, bool *const outAddedNewEntry) {
if (outAddedNewEntry) {
*outAddedNewEntry = false;
}

View File

@ -44,7 +44,7 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
}
bool addNewEntry(const int terminalId, const int newTargetTerminalId, const int newProbability,
bool *const outAddedNewEntry);
const int timestamp, bool *const outAddedNewEntry);
bool removeEntry(const int terminalId, const int targetTerminalId);

View File

@ -27,6 +27,9 @@ const char *const HeaderPolicy::LAST_DECAYED_TIME_KEY = "LAST_DECAYED_TIME";
const char *const HeaderPolicy::UNIGRAM_COUNT_KEY = "UNIGRAM_COUNT";
const char *const HeaderPolicy::BIGRAM_COUNT_KEY = "BIGRAM_COUNT";
const char *const HeaderPolicy::EXTENDED_REGION_SIZE_KEY = "EXTENDED_REGION_SIZE";
// Historical info is information that is needed to support decaying such as timestamp, level and
// count.
const char *const HeaderPolicy::HAS_HISTORICAL_INFO_KEY = "HAS_HISTORICAL_INFO";
const int HeaderPolicy::DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE = 100;
const float HeaderPolicy::MULTIPLE_WORD_COST_MULTIPLIER_SCALE = 100.0f;

View File

@ -47,7 +47,9 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
mBigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
BIGRAM_COUNT_KEY, 0 /* defaultValue */)),
mExtendedRegionSize(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
EXTENDED_REGION_SIZE_KEY, 0 /* defaultValue */)) {}
EXTENDED_REGION_SIZE_KEY, 0 /* defaultValue */)),
mHasHistoricalInfoOfWords(HeaderReadWriteUtils::readBoolAttributeValue(
&mAttributeMap, HAS_HISTORICAL_INFO_KEY, false /* defaultValue */)) {}
// Constructs header information using an attribute map.
HeaderPolicy(const FormatUtils::FORMAT_VERSION dictFormatVersion,
@ -62,7 +64,9 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)),
mLastDecayedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)),
mUnigramCount(0), mBigramCount(0), mExtendedRegionSize(0) {}
mUnigramCount(0), mBigramCount(0), mExtendedRegionSize(0),
mHasHistoricalInfoOfWords(HeaderReadWriteUtils::readBoolAttributeValue(
&mAttributeMap, HAS_HISTORICAL_INFO_KEY, false /* defaultValue */)) {}
~HeaderPolicy() {}
@ -123,6 +127,10 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
return mExtendedRegionSize;
}
AK_FORCE_INLINE bool hasHistricalInfoOfWords() const {
return mHasHistoricalInfoOfWords;
}
void readHeaderValueOrQuestionMark(const char *const key,
int *outValue, int outValueSize) const;
@ -140,6 +148,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
static const char *const UNIGRAM_COUNT_KEY;
static const char *const BIGRAM_COUNT_KEY;
static const char *const EXTENDED_REGION_SIZE_KEY;
static const char *const HAS_HISTORICAL_INFO_KEY;
static const int DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE;
static const float MULTIPLE_WORD_COST_MULTIPLIER_SCALE;
@ -154,6 +163,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
const int mUnigramCount;
const int mBigramCount;
const int mExtendedRegionSize;
const bool mHasHistoricalInfoOfWords;
float readMultipleWordCostMultiplier() const;

View File

@ -49,7 +49,7 @@ class PtNodeWriter {
const int movedPos, const int bigramLinkedNodePos) = 0;
virtual bool updatePtNodeProbability(const PtNodeParams *const toBeUpdatedPtNodeParams,
const int probability) = 0;
const int probability, const int timestamp) = 0;
virtual bool updateChildrenPosition(const PtNodeParams *const toBeUpdatedPtNodeParams,
const int newChildrenPosition) = 0;
@ -58,10 +58,10 @@ class PtNodeWriter {
int *const ptNodeWritingPos) = 0;
virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
int *const ptNodeWritingPos) = 0;
const int timestamp, int *const ptNodeWritingPos) = 0;
virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
const PtNodeParams *const targetPtNodeParam, const int probability,
const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp,
bool *const outAddedNewBigram) = 0;
virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams,

View File

@ -76,14 +76,15 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
return &mShortcutListPolicy;
}
bool addUnigramWord(const int *const word, const int length, const int probability) {
bool addUnigramWord(const int *const word, const int length, const int probability,
const int timestamp) {
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
return false;
}
bool addBigramWords(const int *const word0, const int length0, const int *const word1,
const int length1, const int probability) {
const int length1, const int probability, const int timestamp) {
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
return false;

View File

@ -30,12 +30,14 @@ bool DynamicPatriciaTrieGcEventListeners
// PtNode is useless when the PtNode is not a terminal and doesn't have any not useless
// children.
bool isUselessPtNode = !ptNodeParams->isTerminal();
if (ptNodeParams->isTerminal() && mIsDecayingDict) {
if (ptNodeParams->isTerminal() && mNeedsToDecayWhenUpdating) {
// TODO: Avoid decaying probability during GC.
const int newProbability =
ForgettingCurveUtils::getEncodedProbabilityToSave(ptNodeParams->getProbability(),
mHeaderPolicy);
// Update probability.
if (!mPtNodeWriter->updatePtNodeProbability(ptNodeParams, newProbability)) {
if (!mPtNodeWriter->updatePtNodeProbability(ptNodeParams, newProbability,
0 /* timestamp */)) {
return false;
}
if (!ForgettingCurveUtils::isValidEncodedProbability(newProbability)) {

View File

@ -43,10 +43,10 @@ class DynamicPatriciaTrieGcEventListeners {
TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
const DictionaryHeaderStructurePolicy *const headerPolicy,
PtNodeWriter *const ptNodeWriter, BufferWithExtendableBuffer *const buffer,
const bool isDecayingDict)
const bool needsToDecayWhenUpdating)
: mHeaderPolicy(headerPolicy), mPtNodeWriter(ptNodeWriter), mBuffer(buffer),
mIsDecayingDict(isDecayingDict), mValueStack(), mChildrenValue(0),
mValidUnigramCount(0) {}
mNeedsToDecayWhenUpdating(needsToDecayWhenUpdating), mValueStack(),
mChildrenValue(0), mValidUnigramCount(0) {}
~TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted() {};
@ -80,7 +80,7 @@ class DynamicPatriciaTrieGcEventListeners {
const DictionaryHeaderStructurePolicy *const mHeaderPolicy;
PtNodeWriter *const mPtNodeWriter;
BufferWithExtendableBuffer *const mBuffer;
const bool mIsDecayingDict;
const bool mNeedsToDecayWhenUpdating;
std::vector<int> mValueStack;
int mChildrenValue;
int mValidUnigramCount;

View File

@ -102,7 +102,8 @@ bool DynamicPatriciaTrieNodeWriter::markPtNodeAsMoved(
}
bool DynamicPatriciaTrieNodeWriter::updatePtNodeProbability(
const PtNodeParams *const toBeUpdatedPtNodeParams, const int newProbability) {
const PtNodeParams *const toBeUpdatedPtNodeParams, const int newProbability,
const int timestamp) {
if (!toBeUpdatedPtNodeParams->isTerminal()) {
return false;
}
@ -127,7 +128,7 @@ bool DynamicPatriciaTrieNodeWriter::writePtNodeAndAdvancePosition(
}
bool DynamicPatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos) {
const PtNodeParams *const ptNodeParams, const int timestamp, int *const ptNodeWritingPos) {
int probabilityFieldPos = NOT_A_DICT_POS;
if (!writePtNodeAndGetProbabilityFieldPosAndAdvancePosition(ptNodeParams, &probabilityFieldPos,
ptNodeWritingPos)) {
@ -144,7 +145,7 @@ bool DynamicPatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
bool DynamicPatriciaTrieNodeWriter::addNewBigramEntry(
const PtNodeParams *const sourcePtNodeParams,
const PtNodeParams *const targetPtNodeParam, const int probability,
const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp,
bool *const outAddedNewBigram) {
const int newNodePos = mBuffer->getTailPosition();
int writingPos = newNodePos;

View File

@ -52,7 +52,7 @@ class DynamicPatriciaTrieNodeWriter : public PtNodeWriter {
const int movedPos, const int bigramLinkedNodePos);
virtual bool updatePtNodeProbability(const PtNodeParams *const toBeUpdatedPtNodeParams,
const int newProbability);
const int newProbability, const int timestamp);
virtual bool updateChildrenPosition(const PtNodeParams *const toBeUpdatedPtNodeParams,
const int newChildrenPosition);
@ -60,11 +60,11 @@ class DynamicPatriciaTrieNodeWriter : public PtNodeWriter {
virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
int *const ptNodeWritingPos);
virtual bool writeNewTerminalPtNodeAndAdvancePosition(
const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos);
virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
const int timestamp, int *const ptNodeWritingPos);
virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
const PtNodeParams *const targetPtNodeParam, const int probability,
const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp,
bool *const outAddedNewBigram);
virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams,

View File

@ -141,7 +141,7 @@ int DynamicPatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) c
}
bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int length,
const int probability) {
const int probability, const int timestamp) {
if (!mMmappedBuffer.get()->isUpdatable()) {
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
return false;
@ -154,7 +154,7 @@ bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader);
readingHelper.initWithPtNodeArrayPos(getRootPosition());
bool addedNewUnigram = false;
if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length, probability,
if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length, probability, timestamp,
&addedNewUnigram)) {
if (addedNewUnigram) {
mUnigramCount++;
@ -166,7 +166,8 @@ bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int
}
bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int length0,
const int *const word1, const int length1, const int probability) {
const int *const word1, const int length1, const int probability,
const int timestamp) {
if (!mMmappedBuffer.get()->isUpdatable()) {
AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
return false;
@ -187,7 +188,8 @@ bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int
return false;
}
bool addedNewBigram = false;
if (mUpdatingHelper.addBigramWords(word0Pos, word1Pos, probability, &addedNewBigram)) {
if (mUpdatingHelper.addBigramWords(word0Pos, word1Pos, probability, timestamp,
&addedNewBigram)) {
if (addedNewBigram) {
mBigramCount++;
}

View File

@ -88,10 +88,11 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
return &mShortcutListPolicy;
}
bool addUnigramWord(const int *const word, const int length, const int probability);
bool addUnigramWord(const int *const word, const int length, const int probability,
const int timestamp);
bool addBigramWords(const int *const word0, const int length0, const int *const word1,
const int length1, const int probability);
const int length1, const int probability, const int timestamp);
bool removeBigramWords(const int *const word0, const int length0, const int *const word1,
const int length1);

View File

@ -30,7 +30,7 @@ const int DynamicPatriciaTrieUpdatingHelper::CHILDREN_POSITION_FIELD_SIZE = 3;
bool DynamicPatriciaTrieUpdatingHelper::addUnigramWord(
DynamicPatriciaTrieReadingHelper *const readingHelper,
const int *const wordCodePoints, const int codePointCount, const int probability,
bool *const outAddedNewUnigram) {
const int timestamp, bool *const outAddedNewUnigram) {
int parentPos = NOT_A_DICT_POS;
while (!readingHelper->isEnd()) {
const PtNodeParams ptNodeParams(readingHelper->getPtNodeParams());
@ -52,18 +52,18 @@ bool DynamicPatriciaTrieUpdatingHelper::addUnigramWord(
if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(ptNodeParams, j,
wordCodePoints[matchedCodePointCount + j])) {
*outAddedNewUnigram = true;
return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, probability,
return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, probability, timestamp,
wordCodePoints + matchedCodePointCount,
codePointCount - matchedCodePointCount);
}
}
// All characters are matched.
if (codePointCount == readingHelper->getTotalCodePointCount(ptNodeParams)) {
return setPtNodeProbability(&ptNodeParams, probability, outAddedNewUnigram);
return setPtNodeProbability(&ptNodeParams, probability, timestamp, outAddedNewUnigram);
}
if (!ptNodeParams.hasChildren()) {
*outAddedNewUnigram = true;
return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams, probability,
return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams, probability, timestamp,
wordCodePoints + readingHelper->getTotalCodePointCount(ptNodeParams),
codePointCount - readingHelper->getTotalCodePointCount(ptNodeParams));
}
@ -79,17 +79,18 @@ bool DynamicPatriciaTrieUpdatingHelper::addUnigramWord(
*outAddedNewUnigram = true;
return createAndInsertNodeIntoPtNodeArray(parentPos,
wordCodePoints + readingHelper->getPrevTotalCodePointCount(),
codePointCount - readingHelper->getPrevTotalCodePointCount(), probability, &pos);
codePointCount - readingHelper->getPrevTotalCodePointCount(), probability,
timestamp, &pos);
}
bool DynamicPatriciaTrieUpdatingHelper::addBigramWords(const int word0Pos, const int word1Pos,
const int probability, bool *const outAddedNewBigram) {
const int probability, const int timestamp, bool *const outAddedNewBigram) {
const PtNodeParams sourcePtNodeParams(
mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word0Pos));
const PtNodeParams targetPtNodeParams(
mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word1Pos));
return mPtNodeWriter->addNewBigramEntry(&sourcePtNodeParams, &targetPtNodeParams, probability,
outAddedNewBigram);
timestamp, outAddedNewBigram);
}
// Remove a bigram relation from word0Pos to word1Pos.
@ -103,23 +104,23 @@ bool DynamicPatriciaTrieUpdatingHelper::removeBigramWords(const int word0Pos, co
bool DynamicPatriciaTrieUpdatingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos,
const int *const nodeCodePoints, const int nodeCodePointCount, const int probability,
int *const forwardLinkFieldPos) {
const int timestamp, int *const forwardLinkFieldPos) {
const int newPtNodeArrayPos = mBuffer->getTailPosition();
if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
newPtNodeArrayPos, forwardLinkFieldPos)) {
return false;
}
return createNewPtNodeArrayWithAChildPtNode(parentPos, nodeCodePoints, nodeCodePointCount,
probability);
probability, timestamp);
}
bool DynamicPatriciaTrieUpdatingHelper::setPtNodeProbability(
const PtNodeParams *const originalPtNodeParams, const int probability,
const PtNodeParams *const originalPtNodeParams, const int probability, const int timestamp,
bool *const outAddedNewUnigram) {
if (originalPtNodeParams->isTerminal()) {
// Overwrites the probability.
*outAddedNewUnigram = false;
return mPtNodeWriter->updatePtNodeProbability(originalPtNodeParams, probability);
return mPtNodeWriter->updatePtNodeProbability(originalPtNodeParams, probability, timestamp);
} else {
// Make the node terminal and write the probability.
*outAddedNewUnigram = true;
@ -130,7 +131,7 @@ bool DynamicPatriciaTrieUpdatingHelper::setPtNodeProbability(
originalPtNodeParams->getCodePointCount(), originalPtNodeParams->getCodePoints(),
probability));
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
&writingPos)) {
timestamp, &writingPos)) {
return false;
}
if (!mPtNodeWriter->markPtNodeAsMoved(originalPtNodeParams, movedPos, movedPos)) {
@ -141,19 +142,19 @@ bool DynamicPatriciaTrieUpdatingHelper::setPtNodeProbability(
}
bool DynamicPatriciaTrieUpdatingHelper::createChildrenPtNodeArrayAndAChildPtNode(
const PtNodeParams *const parentPtNodeParams, const int probability,
const PtNodeParams *const parentPtNodeParams, const int probability, const int timestamp,
const int *const codePoints, const int codePointCount) {
const int newPtNodeArrayPos = mBuffer->getTailPosition();
if (!mPtNodeWriter->updateChildrenPosition(parentPtNodeParams, newPtNodeArrayPos)) {
return false;
}
return createNewPtNodeArrayWithAChildPtNode(parentPtNodeParams->getHeadPos(), codePoints,
codePointCount, probability);
codePointCount, probability, timestamp);
}
bool DynamicPatriciaTrieUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(
const int parentPtNodePos, const int *const nodeCodePoints, const int nodeCodePointCount,
const int probability) {
const int probability, const int timestamp) {
int writingPos = mBuffer->getTailPosition();
if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
1 /* arraySize */, &writingPos)) {
@ -161,7 +162,7 @@ bool DynamicPatriciaTrieUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(
}
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(true /* isTerminal */,
parentPtNodePos, nodeCodePointCount, nodeCodePoints, probability));
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, timestamp,
&writingPos)) {
return false;
}
@ -175,7 +176,7 @@ bool DynamicPatriciaTrieUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(
// Returns whether the dictionary updating was succeeded or not.
bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
const int probabilityOfNewPtNode, const int timestamp, const int *const newNodeCodePoints,
const int newNodeCodePointCount) {
// When addsExtraChild is true, split the reallocating PtNode and add new child.
// Reallocating PtNode: abcde, newNode: abcxy.
@ -201,7 +202,7 @@ bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount,
reallocatingPtNodeParams->getCodePoints(), probabilityOfNewPtNode));
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
&writingPos)) {
timestamp, &writingPos)) {
return false;
}
}
@ -227,7 +228,7 @@ bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
firstPartOfReallocatedPtNodePos, newNodeCodePointCount - overlappingCodePointCount,
newNodeCodePoints + overlappingCodePointCount, probabilityOfNewPtNode));
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&extraChildPtNodeParams,
&writingPos)) {
timestamp, &writingPos)) {
return false;
}
}

View File

@ -42,11 +42,11 @@ class DynamicPatriciaTrieUpdatingHelper {
// Add a word to the dictionary. If the word already exists, update the probability.
bool addUnigramWord(DynamicPatriciaTrieReadingHelper *const readingHelper,
const int *const wordCodePoints, const int codePointCount, const int probability,
bool *const outAddedNewUnigram);
const int timestamp, bool *const outAddedNewUnigram);
// Add a bigram relation from word0Pos to word1Pos.
bool addBigramWords(const int word0Pos, const int word1Pos, const int probability,
bool *const outAddedNewBigram);
const int timestamp, bool *const outAddedNewBigram);
// Remove a bigram relation from word0Pos to word1Pos.
bool removeBigramWords(const int word0Pos, const int word1Pos);
@ -61,21 +61,23 @@ class DynamicPatriciaTrieUpdatingHelper {
PtNodeWriter *const mPtNodeWriter;
bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos);
const int nodeCodePointCount, const int probability, const int timestamp,
int *const forwardLinkFieldPos);
bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams, const int probability,
bool *const outAddedNewUnigram);
const int timestamp, bool *const outAddedNewUnigram);
bool createChildrenPtNodeArrayAndAChildPtNode(const PtNodeParams *const parentPtNodeParams,
const int probability, const int *const codePoints, const int codePointCount);
const int probability, const int timestamp, const int *const codePoints,
const int codePointCount);
bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints,
const int nodeCodePointCount, const int probability);
const int nodeCodePointCount, const int probability, const int timestamp);
bool reallocatePtNodeAndAddNewPtNodes(
const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
const int newNodeCodePointCount);
const int probabilityOfNewPtNode, const int timestamp,
const int *const newNodeCodePoints, const int newNodeCodePointCount);
const PtNodeParams getUpdatedPtNodeParams(const PtNodeParams *const originalPtNodeParams,
const bool isTerminal, const int parentPos, const int codePointCount,

View File

@ -28,7 +28,9 @@ int ProbabilityDictContent::getProbability(const int terminalId) const {
if (terminalId < 0 || terminalId >= mSize) {
return NOT_A_PROBABILITY;
}
return Ver4PatriciaTrieReadingUtils::getProbability(getBuffer(), terminalId);
const int probabilityFieldPos =
getEntryPos(terminalId) + Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE;
return getBuffer()->readUint(Ver4DictConstants::PROBABILITY_SIZE, probabilityFieldPos);
}
bool ProbabilityDictContent::setProbability(const int terminalId, const int probability) {

View File

@ -38,11 +38,13 @@ const int Ver4DictConstants::MAX_DICTIONARY_SIZE = 2 * 1024 * 1024;
const int Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE = 1 * 1024 * 1024;
const int Ver4DictConstants::NOT_A_TERMINAL_ID = -1;
const int Ver4DictConstants::NOT_A_TIME_STAMP = -1;
const int Ver4DictConstants::PROBABILITY_SIZE = 1;
const int Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE = 1;
const int Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3;
const int Ver4DictConstants::NOT_A_TERMINAL_ADDRESS = 0;
const int Ver4DictConstants::TERMINAL_ID_FIELD_SIZE = 4;
const int Ver4DictConstants::TIME_STAMP_FIELD_SIZE = 4;
const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE = 4;
const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE = 4;
@ -57,6 +59,7 @@ const int Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID =
const int Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE = 1;
const int Ver4DictConstants::BIGRAM_PROBABILITY_MASK = 0x0F;
const int Ver4DictConstants::BIGRAM_HAS_NEXT_MASK = 0x80;
const int Ver4DictConstants::BIGRAM_LARGE_PROBABILITY_FIELD_SIZE = 1;
const int Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE = 1;

View File

@ -38,11 +38,15 @@ class Ver4DictConstants {
static const int MAX_DICT_EXTENDED_REGION_SIZE;
static const int NOT_A_TERMINAL_ID;
static const int NOT_A_TIME_STAMP;
static const int PROBABILITY_SIZE;
static const int FLAGS_IN_PROBABILITY_FILE_SIZE;
static const int TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE;
static const int NOT_A_TERMINAL_ADDRESS;
static const int TERMINAL_ID_FIELD_SIZE;
static const int TIME_STAMP_FIELD_SIZE;
static const int WORD_LEVEL_FIELD_SIZE;
static const int WORD_COUNT_FIELD_SIZE;
static const int BIGRAM_ADDRESS_TABLE_BLOCK_SIZE;
static const int BIGRAM_ADDRESS_TABLE_DATA_SIZE;
@ -54,6 +58,8 @@ class Ver4DictConstants {
static const int INVALID_BIGRAM_TARGET_TERMINAL_ID;
static const int BIGRAM_PROBABILITY_MASK;
static const int BIGRAM_HAS_NEXT_MASK;
// Used when bigram list has time stamp.
static const int BIGRAM_LARGE_PROBABILITY_FIELD_SIZE;
static const int SHORTCUT_FLAGS_FIELD_SIZE;

View File

@ -112,7 +112,8 @@ bool Ver4PatriciaTrieNodeWriter::markPtNodeAsMoved(
}
bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbability(
const PtNodeParams *const toBeUpdatedPtNodeParams, const int newProbability) {
const PtNodeParams *const toBeUpdatedPtNodeParams, const int newProbability,
const int timestamp) {
if (!toBeUpdatedPtNodeParams->isTerminal()) {
return false;
}
@ -143,7 +144,7 @@ bool Ver4PatriciaTrieNodeWriter::writePtNodeAndAdvancePosition(
bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos) {
const PtNodeParams *const ptNodeParams, const int timestamp, int *const ptNodeWritingPos) {
int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
if (!writePtNodeAndGetTerminalIdAndAdvancePosition(ptNodeParams, &terminalId,
ptNodeWritingPos)) {
@ -158,10 +159,10 @@ bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry(
const PtNodeParams *const sourcePtNodeParams,
const PtNodeParams *const targetPtNodeParam, const int probability,
const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp,
bool *const outAddedNewBigram) {
return mBigramPolicy->addNewEntry(sourcePtNodeParams->getTerminalId(),
targetPtNodeParam->getTerminalId(), probability, outAddedNewBigram);
targetPtNodeParam->getTerminalId(), probability, timestamp, outAddedNewBigram);
}
bool Ver4PatriciaTrieNodeWriter::removeBigramEntry(

View File

@ -54,7 +54,7 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
const int movedPos, const int bigramLinkedNodePos);
virtual bool updatePtNodeProbability(const PtNodeParams *const toBeUpdatedPtNodeParams,
const int newProbability);
const int newProbability, const int timestamp);
virtual bool updateChildrenPosition(const PtNodeParams *const toBeUpdatedPtNodeParams,
const int newChildrenPosition);
@ -65,11 +65,11 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
int *const ptNodeWritingPos);
virtual bool writeNewTerminalPtNodeAndAdvancePosition(
const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos);
virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
const int timestamp, int *const ptNodeWritingPos);
virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
const PtNodeParams *const targetPtNodeParam, const int probability,
const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp,
bool *const outAddedNewBigram);
virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams,

View File

@ -137,7 +137,7 @@ int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) cons
}
bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int length,
const int probability) {
const int probability, const int timestamp) {
if (!mBuffers.get()->isUpdatable()) {
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
return false;
@ -150,7 +150,7 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len
DynamicPatriciaTrieReadingHelper readingHelper(mDictBuffer, &mNodeReader);
readingHelper.initWithPtNodeArrayPos(getRootPosition());
bool addedNewUnigram = false;
if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length, probability,
if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length, probability, timestamp,
&addedNewUnigram)) {
if (addedNewUnigram) {
mUnigramCount++;
@ -162,7 +162,8 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len
}
bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int length0,
const int *const word1, const int length1, const int probability) {
const int *const word1, const int length1, const int probability,
const int timestamp) {
if (!mBuffers.get()->isUpdatable()) {
AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
return false;
@ -183,7 +184,8 @@ bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int le
return false;
}
bool addedNewBigram = false;
if (mUpdatingHelper.addBigramWords(word0Pos, word1Pos, probability, &addedNewBigram)) {
if (mUpdatingHelper.addBigramWords(word0Pos, word1Pos, probability, timestamp,
&addedNewBigram)) {
if (addedNewBigram) {
mBigramCount++;
}

View File

@ -89,10 +89,11 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
return &mShortcutPolicy;
}
bool addUnigramWord(const int *const word, const int length, const int probability);
bool addUnigramWord(const int *const word, const int length, const int probability,
const int timestamp);
bool addBigramWords(const int *const word0, const int length0, const int *const word1,
const int length1, const int probability);
const int length1, const int probability, const int timestamp);
bool removeBigramWords(const int *const word0, const int length0, const int *const word1,
const int length1);

View File

@ -16,8 +16,6 @@
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
namespace latinime {
@ -27,12 +25,4 @@ namespace latinime {
return ByteArrayUtils::readUint32AndAdvancePosition(buffer, pos);
}
/* static */ int Ver4PatriciaTrieReadingUtils::getProbability(
const BufferWithExtendableBuffer *const probabilityBuffer, const int terminalId) {
const int pos = terminalId * (Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE
+ Ver4DictConstants::PROBABILITY_SIZE)
+ Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE;
return probabilityBuffer->readUint(Ver4DictConstants::PROBABILITY_SIZE, pos);
}
} // namespace latinime

View File

@ -30,9 +30,6 @@ class Ver4PatriciaTrieReadingUtils {
static int getTerminalIdAndAdvancePosition(const uint8_t *const buffer,
int *const pos);
static int getProbability(const BufferWithExtendableBuffer *const probabilityBuffer,
const int terminalId);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTrieReadingUtils);
};