Add/Get n-gram probability entry in languageModelDictContent

Bug: 14425059
Change-Id: I7926c3812f89b9a71fe1873a5bc32f793f91b640
This commit is contained in:
Keisuke Kuroyanagi 2014-08-05 14:51:11 +09:00
parent 203ba408d7
commit 03dc44f543
5 changed files with 44 additions and 10 deletions

View File

@ -32,11 +32,11 @@ bool LanguageModelDictContent::runGC(
ProbabilityEntry LanguageModelDictContent::getNgramProbabilityEntry(
const WordIdArrayView prevWordIds, const int wordId) const {
if (!prevWordIds.empty()) {
// TODO: Read n-gram entry.
const int bitmapEntryIndex = getBitmapEntryIndex(prevWordIds);
if (bitmapEntryIndex == TrieMap::INVALID_INDEX) {
return ProbabilityEntry();
}
const TrieMap::Result result = mTrieMap.getRoot(wordId);
const TrieMap::Result result = mTrieMap.get(wordId, bitmapEntryIndex);
if (!result.mIsValid) {
// Not found.
return ProbabilityEntry();
@ -46,14 +46,13 @@ ProbabilityEntry LanguageModelDictContent::getNgramProbabilityEntry(
bool LanguageModelDictContent::setNgramProbabilityEntry(const WordIdArrayView prevWordIds,
const int terminalId, const ProbabilityEntry *const probabilityEntry) {
if (!prevWordIds.empty()) {
// TODO: Add n-gram entry.
const int bitmapEntryIndex = getBitmapEntryIndex(prevWordIds);
if (bitmapEntryIndex == TrieMap::INVALID_INDEX) {
return false;
}
return mTrieMap.putRoot(terminalId, probabilityEntry->encode(mHasHistoricalInfo));
return mTrieMap.put(terminalId, probabilityEntry->encode(mHasHistoricalInfo), bitmapEntryIndex);
}
bool LanguageModelDictContent::runGCInner(
const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
const TrieMap::TrieMapRange trieMapRange,
@ -81,4 +80,16 @@ bool LanguageModelDictContent::runGCInner(
return true;
}
int LanguageModelDictContent::getBitmapEntryIndex(const WordIdArrayView prevWordIds) const {
int bitmapEntryIndex = mTrieMap.getRootBitmapEntryIndex();
for (const int wordId : prevWordIds) {
const TrieMap::Result result = mTrieMap.get(wordId, bitmapEntryIndex);
if (!result.mIsValid) {
return TrieMap::INVALID_INDEX;
}
bitmapEntryIndex = result.mNextLevelBitmapEntryIndex;
}
return bitmapEntryIndex;
}
} // namespace latinime

View File

@ -76,6 +76,8 @@ class LanguageModelDictContent {
bool runGCInner(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
const TrieMap::TrieMapRange trieMapRange, const int nextLevelBitmapEntryIndex,
int *const outNgramCount);
int getBitmapEntryIndex(const WordIdArrayView prevWordIds) const;
};
} // namespace latinime
#endif /* LATINIME_LANGUAGE_MODEL_DICT_CONTENT_H */

View File

@ -169,6 +169,10 @@ class TrieMap {
return mBuffer.isNearSizeLimit();
}
int getRootBitmapEntryIndex() const {
return ROOT_BITMAP_ENTRY_INDEX;
}
// Returns bitmapEntryIndex. Create the next level map if it doesn't exist.
int getNextLevelBitmapEntryIndex(const int key) {
return getNextLevelBitmapEntryIndex(key, ROOT_BITMAP_ENTRY_INDEX);

View File

@ -73,6 +73,14 @@ class IntArrayView {
return mPtr;
}
AK_FORCE_INLINE const int *begin() const {
return mPtr;
}
AK_FORCE_INLINE const int *end() const {
return mPtr + mSize;
}
private:
DISALLOW_ASSIGNMENT_OPERATOR(IntArrayView);

View File

@ -24,15 +24,24 @@ namespace latinime {
namespace {
TEST(MemoryViewTest, TestAccess) {
static const int DATA_SIZE = 10000;
std::vector<int> intVector = {3, 2, 1, 0, -1, -2};
IntArrayView intArrayView(intVector);
EXPECT_EQ(intVector.size(), intArrayView.size());
for (int i = 0; i < DATA_SIZE; ++i) {
for (int i = 0; i < static_cast<int>(intVector.size()); ++i) {
EXPECT_EQ(intVector[i], intArrayView[i]);
}
}
TEST(MemoryViewTest, TestIteration) {
std::vector<int> intVector = {3, 2, 1, 0, -1, -2};
IntArrayView intArrayView(intVector);
std::set<int> intSet(intVector.begin(), intVector.end());
for (const int i : intArrayView) {
EXPECT_TRUE(intSet.count(i) > 0);
intSet.erase(i);
}
EXPECT_TRUE(intSet.empty());
}
} // namespace
} // namespace latinime