Merge "Use IntArrayView to add/remove n-gram entry." into lmp-dev

This commit is contained in:
Keisuke Kuroyanagi 2014-08-06 05:01:49 +00:00 committed by Android (Google) Code Review
commit 4efa2c0cdb
11 changed files with 108 additions and 68 deletions

View File

@ -231,30 +231,31 @@ bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
&probabilityEntryToWrite);
}
bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry(
const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam,
const BigramProperty *const bigramProperty, bool *const outAddedNewBigram) {
if (!mBigramPolicy->addNewEntry(sourcePtNodeParams->getTerminalId(),
targetPtNodeParam->getTerminalId(), bigramProperty, outAddedNewBigram)) {
bool Ver4PatriciaTrieNodeWriter::addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) {
if (!mBigramPolicy->addNewEntry(prevWordIds[0], wordId, bigramProperty, outAddedNewEntry)) {
AKLOGE("Cannot add new bigram entry. terminalId: %d, targetTerminalId: %d",
sourcePtNodeParams->getTerminalId(), targetPtNodeParam->getTerminalId());
return false;
}
if (!sourcePtNodeParams->hasBigrams()) {
const int ptNodePos =
mBuffers->getTerminalPositionLookupTable()->getTerminalPtNodePosition(prevWordIds[0]);
const PtNodeParams sourcePtNodeParams =
mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
if (!sourcePtNodeParams.hasBigrams()) {
// Update has bigrams flag.
return updatePtNodeFlags(sourcePtNodeParams->getHeadPos(),
sourcePtNodeParams->isBlacklisted(), sourcePtNodeParams->isNotAWord(),
sourcePtNodeParams->isTerminal(), sourcePtNodeParams->hasShortcutTargets(),
return updatePtNodeFlags(sourcePtNodeParams.getHeadPos(),
sourcePtNodeParams.isBlacklisted(), sourcePtNodeParams.isNotAWord(),
sourcePtNodeParams.isTerminal(), sourcePtNodeParams.hasShortcutTargets(),
true /* hasBigrams */,
sourcePtNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
sourcePtNodeParams.getCodePointCount() > 1 /* hasMultipleChars */);
}
return true;
}
bool Ver4PatriciaTrieNodeWriter::removeBigramEntry(
const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam) {
return mBigramPolicy->removeEntry(sourcePtNodeParams->getTerminalId(),
targetPtNodeParam->getTerminalId());
bool Ver4PatriciaTrieNodeWriter::removeNgramEntry(const WordIdArrayView prevWordIds,
const int wordId) {
return mBigramPolicy->removeEntry(prevWordIds[0], wordId);
}
bool Ver4PatriciaTrieNodeWriter::updateAllBigramEntriesAndDeleteUselessEntries(

View File

@ -29,6 +29,7 @@
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
#include "suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h"
#include "utils/int_array_view.h"
namespace latinime {
namespace backward {
@ -61,8 +62,8 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
const PtNodeArrayReader *const ptNodeArrayReader,
Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy)
: mTrieBuffer(trieBuffer), mBuffers(buffers), mHeaderPolicy(headerPolicy),
mReadingHelper(ptNodeReader, ptNodeArrayReader), mBigramPolicy(bigramPolicy),
mShortcutPolicy(shortcutPolicy) {}
mPtNodeReader(ptNodeReader), mReadingHelper(ptNodeReader, ptNodeArrayReader),
mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy) {}
virtual ~Ver4PatriciaTrieNodeWriter() {}
@ -92,12 +93,10 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos);
virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
const PtNodeParams *const targetPtNodeParam, const BigramProperty *const bigramProperty,
bool *const outAddedNewBigram);
virtual bool addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
const BigramProperty *const bigramProperty, bool *const outAddedNewEntry);
virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams,
const PtNodeParams *const targetPtNodeParam);
virtual bool removeNgramEntry(const WordIdArrayView prevWordIds, const int wordId);
virtual bool updateAllBigramEntriesAndDeleteUselessEntries(
const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount);
@ -135,6 +134,7 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
BufferWithExtendableBuffer *const mTrieBuffer;
Ver4DictBuffers *const mBuffers;
const HeaderPolicy *const mHeaderPolicy;
const PtNodeReader *const mPtNodeReader;
DynamicPtReadingHelper mReadingHelper;
Ver4BigramListPolicy *const mBigramPolicy;
Ver4ShortcutListPolicy *const mShortcutPolicy;

View File

@ -309,8 +309,8 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
return false;
}
bool addedNewBigram = false;
if (mUpdatingHelper.addBigramWords(prevWordsPtNodePos[0], word1Pos, bigramProperty,
&addedNewBigram)) {
if (mUpdatingHelper.addNgramEntry(PtNodePosArrayView::fromObject(prevWordsPtNodePos),
word1Pos, bigramProperty, &addedNewBigram)) {
if (addedNewBigram) {
mBigramCount++;
}
@ -350,7 +350,8 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
if (wordPos == NOT_A_DICT_POS) {
return false;
}
if (mUpdatingHelper.removeBigramWords(prevWordsPtNodePos[0], wordPos)) {
if (mUpdatingHelper.removeNgramEntry(
PtNodePosArrayView::fromObject(prevWordsPtNodePos), wordPos)) {
mBigramCount--;
return true;
} else {

View File

@ -84,23 +84,39 @@ bool DynamicPtUpdatingHelper::addUnigramWord(
unigramProperty, &pos);
}
bool DynamicPtUpdatingHelper::addBigramWords(const int word0Pos, const int word1Pos,
const BigramProperty *const bigramProperty, bool *const outAddedNewBigram) {
const PtNodeParams sourcePtNodeParams(
mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(word0Pos));
const PtNodeParams targetPtNodeParams(
mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(word1Pos));
return mPtNodeWriter->addNewBigramEntry(&sourcePtNodeParams, &targetPtNodeParams,
bigramProperty, outAddedNewBigram);
bool DynamicPtUpdatingHelper::addNgramEntry(const PtNodePosArrayView prevWordsPtNodePos,
const int wordPos, const BigramProperty *const bigramProperty,
bool *const outAddedNewEntry) {
if (prevWordsPtNodePos.empty()) {
return false;
}
ASSERT(prevWordsPtNodePos.size() <= MAX_PREV_WORD_COUNT_FOR_N_GRAM);
int prevWordTerminalIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
for (size_t i = 0; i < prevWordsPtNodePos.size(); ++i) {
prevWordTerminalIds[i] = mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(
prevWordsPtNodePos[i]).getTerminalId();
}
const WordIdArrayView prevWordIds(prevWordTerminalIds, prevWordsPtNodePos.size());
const int wordId =
mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(wordPos).getTerminalId();
return mPtNodeWriter->addNgramEntry(prevWordIds, wordId, bigramProperty, outAddedNewEntry);
}
// Remove a bigram relation from word0Pos to word1Pos.
bool DynamicPtUpdatingHelper::removeBigramWords(const int word0Pos, const int word1Pos) {
const PtNodeParams sourcePtNodeParams(
mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(word0Pos));
const PtNodeParams targetPtNodeParams(
mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(word1Pos));
return mPtNodeWriter->removeBigramEntry(&sourcePtNodeParams, &targetPtNodeParams);
bool DynamicPtUpdatingHelper::removeNgramEntry(const PtNodePosArrayView prevWordsPtNodePos,
const int wordPos) {
if (prevWordsPtNodePos.empty()) {
return false;
}
ASSERT(prevWordsPtNodePos.size() <= MAX_PREV_WORD_COUNT_FOR_N_GRAM);
int prevWordTerminalIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
for (size_t i = 0; i < prevWordsPtNodePos.size(); ++i) {
prevWordTerminalIds[i] = mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(
prevWordsPtNodePos[i]).getTerminalId();
}
const WordIdArrayView prevWordIds(prevWordTerminalIds, prevWordsPtNodePos.size());
const int wordId =
mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(wordPos).getTerminalId();
return mPtNodeWriter->removeNgramEntry(prevWordIds, wordId);
}
bool DynamicPtUpdatingHelper::addShortcutTarget(const int wordPos,

View File

@ -19,6 +19,7 @@
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
#include "utils/int_array_view.h"
namespace latinime {
@ -42,12 +43,12 @@ class DynamicPtUpdatingHelper {
const int *const wordCodePoints, const int codePointCount,
const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram);
// Add a bigram relation from word0Pos to word1Pos.
bool addBigramWords(const int word0Pos, const int word1Pos,
const BigramProperty *const bigramProperty, bool *const outAddedNewBigram);
// Add an n-gram entry.
bool addNgramEntry(const PtNodePosArrayView prevWordsPtNodePos, const int wordPos,
const BigramProperty *const bigramProperty, bool *const outAddedNewEntry);
// Remove a bigram relation from word0Pos to word1Pos.
bool removeBigramWords(const int word0Pos, const int word1Pos);
// Remove an n-gram entry.
bool removeNgramEntry(const PtNodePosArrayView prevWordsPtNodePos, const int wordPos);
// Add a shortcut target.
bool addShortcutTarget(const int wordPos, const int *const targetCodePoints,

View File

@ -21,6 +21,7 @@
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
#include "utils/int_array_view.h"
namespace latinime {
@ -70,12 +71,10 @@ class PtNodeWriter {
virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos) = 0;
virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
const PtNodeParams *const targetPtNodeParam, const BigramProperty *const bigramProperty,
bool *const outAddedNewBigram) = 0;
virtual bool addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) = 0;
virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams,
const PtNodeParams *const targetPtNodeParam) = 0;
virtual bool removeNgramEntry(const WordIdArrayView prevWordIds, const int wordId) = 0;
virtual bool updateAllBigramEntriesAndDeleteUselessEntries(
const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount) = 0;

View File

@ -222,22 +222,19 @@ bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
terminalId, &probabilityEntryToWrite);
}
bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry(
const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam,
bool Ver4PatriciaTrieNodeWriter::addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
const BigramProperty *const bigramProperty, bool *const outAddedNewBigram) {
if (!mBigramPolicy->addNewEntry(sourcePtNodeParams->getTerminalId(),
targetPtNodeParam->getTerminalId(), bigramProperty, outAddedNewBigram)) {
if (!mBigramPolicy->addNewEntry(prevWordIds[0], wordId, bigramProperty, outAddedNewBigram)) {
AKLOGE("Cannot add new bigram entry. terminalId: %d, targetTerminalId: %d",
sourcePtNodeParams->getTerminalId(), targetPtNodeParam->getTerminalId());
prevWordIds[0], wordId);
return false;
}
return true;
}
bool Ver4PatriciaTrieNodeWriter::removeBigramEntry(
const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam) {
return mBigramPolicy->removeEntry(sourcePtNodeParams->getTerminalId(),
targetPtNodeParam->getTerminalId());
bool Ver4PatriciaTrieNodeWriter::removeNgramEntry(const WordIdArrayView prevWordIds,
const int wordId) {
return mBigramPolicy->removeEntry(prevWordIds[0], wordId);
}
bool Ver4PatriciaTrieNodeWriter::updateAllBigramEntriesAndDeleteUselessEntries(

View File

@ -75,12 +75,10 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos);
virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
const PtNodeParams *const targetPtNodeParam, const BigramProperty *const bigramProperty,
bool *const outAddedNewBigram);
virtual bool addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
const BigramProperty *const bigramProperty, bool *const outAddedNewEntry);
virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams,
const PtNodeParams *const targetPtNodeParam);
virtual bool removeNgramEntry(const WordIdArrayView prevWordIds, const int wordId);
virtual bool updateAllBigramEntriesAndDeleteUselessEntries(
const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount);

View File

@ -292,6 +292,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
false /* tryLowerCaseSearch */);
const auto prevWordsPtNodePosView = PtNodePosArrayView::fromFixedSizeArray(prevWordsPtNodePos);
// TODO: Support N-gram.
if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) {
if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)) {
@ -319,10 +320,10 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
if (word1Pos == NOT_A_DICT_POS) {
return false;
}
bool addedNewBigram = false;
if (mUpdatingHelper.addBigramWords(prevWordsPtNodePos[0], word1Pos, bigramProperty,
&addedNewBigram)) {
if (addedNewBigram) {
bool addedNewEntry = false;
if (mUpdatingHelper.addNgramEntry(prevWordsPtNodePosView, word1Pos, bigramProperty,
&addedNewEntry)) {
if (addedNewEntry) {
mBigramCount++;
}
return true;
@ -352,6 +353,7 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
false /* tryLowerCaseSerch */);
const auto prevWordsPtNodePosView = PtNodePosArrayView::fromFixedSizeArray(prevWordsPtNodePos);
// TODO: Support N-gram.
if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) {
return false;
@ -361,7 +363,7 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
if (wordPos == NOT_A_DICT_POS) {
return false;
}
if (mUpdatingHelper.removeBigramWords(prevWordsPtNodePos[0], wordPos)) {
if (mUpdatingHelper.removeNgramEntry(prevWordsPtNodePosView, wordPos)) {
mBigramCount--;
return true;
} else {

View File

@ -56,6 +56,16 @@ class IntArrayView {
explicit IntArrayView(const std::vector<int> &vector)
: mPtr(vector.data()), mSize(vector.size()) {}
template <int N>
AK_FORCE_INLINE static IntArrayView fromFixedSizeArray(const int (&array)[N]) {
return IntArrayView(array, N);
}
// Returns a view that points one int object. Does not take ownership of the given object.
AK_FORCE_INLINE static IntArrayView fromObject(const int *const object) {
return IntArrayView(object, 1);
}
AK_FORCE_INLINE int operator[](const size_t index) const {
ASSERT(index < mSize);
return mPtr[index];
@ -89,6 +99,7 @@ class IntArrayView {
};
using WordIdArrayView = IntArrayView;
using PtNodePosArrayView = IntArrayView;
} // namespace latinime
#endif // LATINIME_MEMORY_VIEW_H

View File

@ -43,5 +43,19 @@ TEST(IntArrayViewTest, TestIteration) {
EXPECT_EQ(expectedIndex, intArrayView.size());
}
TEST(IntArrayViewTest, TestConstructFromArray) {
const size_t ARRAY_SIZE = 100;
int intArray[ARRAY_SIZE];
const auto intArrayView = IntArrayView::fromFixedSizeArray(intArray);
EXPECT_EQ(ARRAY_SIZE, intArrayView.size());
}
TEST(IntArrayViewTest, TestConstructFromObject) {
const int object = 10;
const auto intArrayView = IntArrayView::fromObject(&object);
EXPECT_EQ(1, intArrayView.size());
EXPECT_EQ(object, intArrayView[0]);
}
} // namespace
} // namespace latinime