Use extendable buffer for reading node info.

Bug: 6669677
Change-Id: I78ba80100e3a38f2b49e43db1e6aef4e56ed062c
This commit is contained in:
Keisuke Kuroyanagi 2013-08-27 18:06:42 +09:00
parent 2f0c1253e2
commit 6c4d09e9e1
4 changed files with 45 additions and 27 deletions

View File

@ -19,34 +19,44 @@
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h" #include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" #include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/extendable_buffer.h"
namespace latinime { namespace latinime {
void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(const int nodePos, void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(const int nodePos,
const int maxCodePointCount, int *const outCodePoints) { const int maxCodePointCount, int *const outCodePoints) {
int pos = nodePos; const bool usesAdditionalBuffer = nodePos >= mOriginalDictSize;
mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); const uint8_t *const dictBuf =
usesAdditionalBuffer ? mExtendableBuffer->getBuffer() : mDictRoot;
int pos = (usesAdditionalBuffer) ? nodePos - mOriginalDictSize : nodePos;
mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
const int parentPos = const int parentPos =
DynamicPatriciaTrieReadingUtils::getParentPosAndAdvancePosition(mDictRoot, &pos); DynamicPatriciaTrieReadingUtils::getParentPosAndAdvancePosition(dictBuf, &pos);
mParentPos = (parentPos != 0) ? mNodePos + parentPos : NOT_A_DICT_POS; mParentPos = (parentPos != 0) ? mNodePos + parentPos : NOT_A_DICT_POS;
if (outCodePoints != 0) { if (outCodePoints != 0) {
mCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition( mCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
mDictRoot, mFlags, maxCodePointCount, outCodePoints, &pos); dictBuf, mFlags, maxCodePointCount, outCodePoints, &pos);
} else { } else {
mCodePointCount = PatriciaTrieReadingUtils::skipCharacters( mCodePointCount = PatriciaTrieReadingUtils::skipCharacters(
mDictRoot, mFlags, MAX_WORD_LENGTH, &pos); dictBuf, mFlags, MAX_WORD_LENGTH, &pos);
} }
if (isTerminal()) { if (isTerminal()) {
mProbability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos); mProbability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictBuf, &pos);
} else { } else {
mProbability = NOT_A_PROBABILITY; mProbability = NOT_A_PROBABILITY;
} }
if (hasChildren()) { if (hasChildren()) {
mChildrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( mChildrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
mDictRoot, mFlags, &pos); dictBuf, mFlags, &pos);
if (usesAdditionalBuffer && mChildrenPos != NOT_A_DICT_POS) {
mChildrenPos += mOriginalDictSize;
}
} else { } else {
mChildrenPos = NOT_A_DICT_POS; mChildrenPos = NOT_A_DICT_POS;
} }
if (usesAdditionalBuffer) {
pos += mOriginalDictSize;
}
if (PatriciaTrieReadingUtils::hasShortcutTargets(mFlags)) { if (PatriciaTrieReadingUtils::hasShortcutTargets(mFlags)) {
mShortcutPos = pos; mShortcutPos = pos;
mShortcutsPolicy->skipAllShortcuts(&pos); mShortcutsPolicy->skipAllShortcuts(&pos);

View File

@ -27,6 +27,7 @@ namespace latinime {
class DictionaryBigramsStructurePolicy; class DictionaryBigramsStructurePolicy;
class DictionaryShortcutsStructurePolicy; class DictionaryShortcutsStructurePolicy;
class ExtendableBuffer;
/* /*
* This class is used for helping to read nodes of dynamic patricia trie. This class handles moved * This class is used for helping to read nodes of dynamic patricia trie. This class handles moved
@ -34,12 +35,14 @@ class DictionaryShortcutsStructurePolicy;
*/ */
class DynamicPatriciaTrieNodeReader { class DynamicPatriciaTrieNodeReader {
public: public:
DynamicPatriciaTrieNodeReader(const uint8_t *const dictRoot, DynamicPatriciaTrieNodeReader(const uint8_t *const dictRoot, const int originalDictSize,
const ExtendableBuffer *const extendableBuffer,
const DictionaryBigramsStructurePolicy *const bigramsPolicy, const DictionaryBigramsStructurePolicy *const bigramsPolicy,
const DictionaryShortcutsStructurePolicy *const shortcutsPolicy) const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
: mDictRoot(dictRoot), mBigramsPolicy(bigramsPolicy), : mDictRoot(dictRoot), mOriginalDictSize(originalDictSize),
mExtendableBuffer(extendableBuffer), mBigramsPolicy(bigramsPolicy),
mShortcutsPolicy(shortcutsPolicy), mNodePos(NOT_A_VALID_WORD_POS), mFlags(0), mShortcutsPolicy(shortcutsPolicy), mNodePos(NOT_A_VALID_WORD_POS), mFlags(0),
mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mProbability(NOT_A_PROBABILITY), mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mProbability(NOT_A_PROBABILITY),
mChildrenPos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS), mChildrenPos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS),
mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_VALID_WORD_POS) {} mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_VALID_WORD_POS) {}
@ -123,6 +126,8 @@ class DynamicPatriciaTrieNodeReader {
// TODO: Consolidate mDictRoot. // TODO: Consolidate mDictRoot.
const uint8_t *const mDictRoot; const uint8_t *const mDictRoot;
const int mOriginalDictSize;
const ExtendableBuffer *const mExtendableBuffer;
const DictionaryBigramsStructurePolicy *const mBigramsPolicy; const DictionaryBigramsStructurePolicy *const mBigramsPolicy;
const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy; const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy;
int mNodePos; int mNodePos;

View File

@ -33,8 +33,8 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const d
if (!dicNode->hasChildren()) { if (!dicNode->hasChildren()) {
return; return;
} }
DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(), DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer,
getShortcutsStructurePolicy()); getBigramsStructurePolicy(), getShortcutsStructurePolicy());
int mergedNodeCodePoints[MAX_WORD_LENGTH]; int mergedNodeCodePoints[MAX_WORD_LENGTH];
int nextPos = dicNode->getChildrenPos(); int nextPos = dicNode->getChildrenPos();
int totalChildCount = 0; int totalChildCount = 0;
@ -79,8 +79,8 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun
int mergedNodeCodePoints[maxCodePointCount]; int mergedNodeCodePoints[maxCodePointCount];
int codePointCount = 0; int codePointCount = 0;
DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(), DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer,
getShortcutsStructurePolicy()); getBigramsStructurePolicy(), getShortcutsStructurePolicy());
// First, read terminal node and get its probability. // First, read terminal node and get its probability.
nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(nodePos, maxCodePointCount, nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(nodePos, maxCodePointCount,
mergedNodeCodePoints); mergedNodeCodePoints);
@ -124,8 +124,8 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in
int mergedNodeCodePoints[MAX_WORD_LENGTH]; int mergedNodeCodePoints[MAX_WORD_LENGTH];
int currentLength = 0; int currentLength = 0;
int pos = getRootPosition(); int pos = getRootPosition();
DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(), DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer,
getShortcutsStructurePolicy()); getBigramsStructurePolicy(), getShortcutsStructurePolicy());
while (currentLength < length) { while (currentLength < length) {
// When foundMatchedNode becomes true, currentLength is increased at least once. // When foundMatchedNode becomes true, currentLength is increased at least once.
bool foundMatchedNode = false; bool foundMatchedNode = false;
@ -198,8 +198,8 @@ int DynamicPatriciaTriePolicy::getUnigramProbability(const int nodePos) const {
if (nodePos == NOT_A_VALID_WORD_POS) { if (nodePos == NOT_A_VALID_WORD_POS) {
return NOT_A_PROBABILITY; return NOT_A_PROBABILITY;
} }
DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(), DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer,
getShortcutsStructurePolicy()); getBigramsStructurePolicy(), getShortcutsStructurePolicy());
nodeReader.fetchNodeInfoFromBuffer(nodePos); nodeReader.fetchNodeInfoFromBuffer(nodePos);
if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) { if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) {
return NOT_A_PROBABILITY; return NOT_A_PROBABILITY;
@ -211,8 +211,8 @@ int DynamicPatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) cons
if (nodePos == NOT_A_VALID_WORD_POS) { if (nodePos == NOT_A_VALID_WORD_POS) {
return NOT_A_DICT_POS; return NOT_A_DICT_POS;
} }
DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(), DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer,
getShortcutsStructurePolicy()); getBigramsStructurePolicy(), getShortcutsStructurePolicy());
nodeReader.fetchNodeInfoFromBuffer(nodePos); nodeReader.fetchNodeInfoFromBuffer(nodePos);
if (nodeReader.isDeleted()) { if (nodeReader.isDeleted()) {
return NOT_A_DICT_POS; return NOT_A_DICT_POS;
@ -224,8 +224,8 @@ int DynamicPatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const
if (nodePos == NOT_A_VALID_WORD_POS) { if (nodePos == NOT_A_VALID_WORD_POS) {
return NOT_A_DICT_POS; return NOT_A_DICT_POS;
} }
DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(), DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer,
getShortcutsStructurePolicy()); getBigramsStructurePolicy(), getShortcutsStructurePolicy());
nodeReader.fetchNodeInfoFromBuffer(nodePos); nodeReader.fetchNodeInfoFromBuffer(nodePos);
if (nodeReader.isDeleted()) { if (nodeReader.isDeleted()) {
return NOT_A_DICT_POS; return NOT_A_DICT_POS;

View File

@ -21,9 +21,9 @@
#include "defines.h" #include "defines.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h" #include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h" #include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/utils/extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" #include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
@ -37,7 +37,9 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
DynamicPatriciaTriePolicy(const MmappedBuffer *const buffer) DynamicPatriciaTriePolicy(const MmappedBuffer *const buffer)
: mBuffer(buffer), mExtendableBuffer(), mHeaderPolicy(mBuffer->getBuffer()), : mBuffer(buffer), mExtendableBuffer(), mHeaderPolicy(mBuffer->getBuffer()),
mDictRoot(mBuffer->getBuffer() + mHeaderPolicy.getSize()), mDictRoot(mBuffer->getBuffer() + mHeaderPolicy.getSize()),
mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {} mOriginalDictSize(mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
mBigramListPolicy(mDictRoot, mOriginalDictSize, &mExtendableBuffer),
mShortcutListPolicy(mDictRoot, mOriginalDictSize, &mExtendableBuffer) {}
~DynamicPatriciaTriePolicy() { ~DynamicPatriciaTriePolicy() {
delete mBuffer; delete mBuffer;
@ -93,8 +95,9 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
// TODO: Consolidate mDictRoot. // TODO: Consolidate mDictRoot.
// CAVEAT!: Be careful about array out of bound access with mDictRoot // CAVEAT!: Be careful about array out of bound access with mDictRoot
const uint8_t *const mDictRoot; const uint8_t *const mDictRoot;
const BigramListPolicy mBigramListPolicy; const int mOriginalDictSize;
const ShortcutListPolicy mShortcutListPolicy; const DynamicBigramListPolicy mBigramListPolicy;
const DynamicShortcutListPolicy mShortcutListPolicy;
}; };
} // namespace latinime } // namespace latinime
#endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H #endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H