Get bigrams iterator via dict structure policy.

Bug: 14425059
Change-Id: Ie4c6afbb6a3b707b5aa8e9e4f941c9da6e6bcdbf
This commit is contained in:
Keisuke Kuroyanagi 2014-07-08 16:23:31 +09:00
parent c495c5bdcb
commit b00973952f
10 changed files with 56 additions and 45 deletions

View File

@ -24,6 +24,11 @@ namespace latinime {
class BinaryDictionaryBigramsIterator {
public:
// Empty iterator.
BinaryDictionaryBigramsIterator()
: mBigramsStructurePolicy(nullptr), mPos(NOT_A_DICT_POS),
mBigramPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY), mHasNext(false) {}
BinaryDictionaryBigramsIterator(
const DictionaryBigramsStructurePolicy *const bigramsStructurePolicy, const int pos)
: mBigramsStructurePolicy(bigramsStructurePolicy), mPos(pos),

View File

@ -53,9 +53,8 @@ int MultiBigramMap::getBigramProbability(
void MultiBigramMap::BigramMap::init(
const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos) {
const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos);
BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
bigramsListPos);
BinaryDictionaryBigramsIterator bigramsIt =
structurePolicy->getBigramsIteratorOfPtNode(nodePos);
while (bigramsIt.hasNext()) {
bigramsIt.next();
if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) {
@ -89,9 +88,8 @@ int MultiBigramMap::readBigramProbabilityFromBinaryDictionary(
const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos,
const int nextWordPosition, const int unigramProbability) {
int bigramProbability = NOT_A_PROBABILITY;
const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos);
BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
bigramsListPos);
BinaryDictionaryBigramsIterator bigramsIt =
structurePolicy->getBigramsIteratorOfPtNode(nodePos);
while (bigramsIt.hasNext()) {
bigramsIt.next();
if (bigramsIt.getBigramPos() == nextWordPosition) {

View File

@ -20,6 +20,7 @@
#include <memory>
#include "defines.h"
#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
#include "suggest/core/dictionary/property/word_property.h"
namespace latinime {
@ -61,12 +62,10 @@ class DictionaryStructureWithBufferPolicy {
virtual int getShortcutPositionOfPtNode(const int nodePos) const = 0;
virtual int getBigramsPositionOfPtNode(const int nodePos) const = 0;
virtual BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int nodePos) const = 0;
virtual const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const = 0;
virtual const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const = 0;
virtual const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const = 0;
// Returns whether the update was success or not.

View File

@ -92,11 +92,9 @@ class PrevWordsInfo {
BinaryDictionaryBigramsIterator getBigramsIteratorForPrediction(
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy) const {
const int bigramListPos = getBigramListPositionForWordWithTryingLowerCaseSearch(
return getBigramsIteratorForWordWithTryingLowerCaseSearch(
dictStructurePolicy, mPrevWordCodePoints[0], mPrevWordCodePointCount[0],
mIsBeginningOfSentence[0]);
return BinaryDictionaryBigramsIterator(dictStructurePolicy->getBigramsStructurePolicy(),
bigramListPos);
}
// n is 1-indexed.
@ -156,12 +154,12 @@ class PrevWordsInfo {
codePoints, codePointCount, true /* forceLowerCaseSearch */);
}
static int getBigramListPositionForWordWithTryingLowerCaseSearch(
static BinaryDictionaryBigramsIterator getBigramsIteratorForWordWithTryingLowerCaseSearch(
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
const int *const wordCodePoints, const int wordCodePointCount,
const bool isBeginningOfSentence) {
if (!dictStructurePolicy || !wordCodePoints || wordCodePointCount > MAX_WORD_LENGTH) {
return NOT_A_DICT_POS;
return BinaryDictionaryBigramsIterator();
}
int codePoints[MAX_WORD_LENGTH];
int codePointCount = wordCodePointCount;
@ -170,30 +168,30 @@ class PrevWordsInfo {
codePointCount = CharUtils::attachBeginningOfSentenceMarker(codePoints,
codePointCount, MAX_WORD_LENGTH);
if (codePointCount <= 0) {
return NOT_A_DICT_POS;
return BinaryDictionaryBigramsIterator();
}
}
int pos = getBigramListPositionForWord(dictStructurePolicy, codePoints,
codePointCount, false /* forceLowerCaseSearch */);
// getBigramListPositionForWord returns NOT_A_DICT_POS if this word isn't in the
// dictionary or has no bigrams
if (NOT_A_DICT_POS == pos) {
// If no bigrams for this exact word, search again in lower case.
pos = getBigramListPositionForWord(dictStructurePolicy, codePoints,
codePointCount, true /* forceLowerCaseSearch */);
BinaryDictionaryBigramsIterator bigramsIt = getBigramsIteratorForWord(dictStructurePolicy,
codePoints, codePointCount, false /* forceLowerCaseSearch */);
// getBigramsIteratorForWord returns an empty iterator if this word isn't in the dictionary
// or has no bigrams.
if (bigramsIt.hasNext()) {
return bigramsIt;
}
return pos;
// If no bigrams for this exact word, search again in lower case.
return getBigramsIteratorForWord(dictStructurePolicy, codePoints,
codePointCount, true /* forceLowerCaseSearch */);
}
static int getBigramListPositionForWord(
static BinaryDictionaryBigramsIterator getBigramsIteratorForWord(
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
const int *wordCodePoints, const int wordCodePointCount,
const bool forceLowerCaseSearch) {
if (!wordCodePoints || wordCodePointCount <= 0) return NOT_A_DICT_POS;
if (!wordCodePoints || wordCodePointCount <= 0) return BinaryDictionaryBigramsIterator();
const int terminalPtNodePos = dictStructurePolicy->getTerminalPtNodePositionOfWord(
wordCodePoints, wordCodePointCount, forceLowerCaseSearch);
if (NOT_A_DICT_POS == terminalPtNodePos) return NOT_A_DICT_POS;
return dictStructurePolicy->getBigramsPositionOfPtNode(terminalPtNodePos);
if (NOT_A_DICT_POS == terminalPtNodePos) return BinaryDictionaryBigramsIterator();
return dictStructurePolicy->getBigramsIteratorOfPtNode(terminalPtNodePos);
}
void clear() {

View File

@ -154,6 +154,12 @@ int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) con
ptNodeParams.getTerminalId());
}
BinaryDictionaryBigramsIterator Ver4PatriciaTriePolicy::getBigramsIteratorOfPtNode(
const int ptNodePos) const {
const int bigramsPosition = getBigramsPositionOfPtNode(ptNodePos);
return BinaryDictionaryBigramsIterator(&mBigramPolicy, bigramsPosition);
}
int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS;

View File

@ -94,16 +94,12 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int getShortcutPositionOfPtNode(const int ptNodePos) const;
int getBigramsPositionOfPtNode(const int ptNodePos) const;
BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int ptNodePos) const;
const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
return mHeaderPolicy;
}
const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const {
return &mBigramPolicy;
}
const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const {
return &mShortcutPolicy;
}
@ -167,6 +163,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int mBigramCount;
std::vector<int> mTerminalPtNodePositionsForIteratingWords;
mutable bool mIsCorrupted;
int getBigramsPositionOfPtNode(const int ptNodePos) const;
};
} // namespace v402
} // namespace backward

View File

@ -304,6 +304,12 @@ int PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
return mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos).getShortcutPos();
}
BinaryDictionaryBigramsIterator PatriciaTriePolicy::getBigramsIteratorOfPtNode(
const int ptNodePos) const {
const int bigramsPosition = getBigramsPositionOfPtNode(ptNodePos);
return BinaryDictionaryBigramsIterator(&mBigramListPolicy, bigramsPosition);
}
int PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS;
@ -322,7 +328,7 @@ int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNod
int bigramPos = NOT_A_DICT_POS;
int siblingPos = NOT_A_DICT_POS;
PatriciaTrieReadingUtils::readPtNodeInfo(mDictRoot, ptNodePos, getShortcutsStructurePolicy(),
getBigramsStructurePolicy(), &flags, &mergedNodeCodePointCount, mergedNodeCodePoints,
&mBigramListPolicy, &flags, &mergedNodeCodePointCount, mergedNodeCodePoints,
&probability, &childrenPos, &shortcutPos, &bigramPos, &siblingPos);
// Skip PtNodes don't start with Unicode code point because they represent non-word information.
if (CharUtils::isInUnicodeSpace(mergedNodeCodePoints[0])) {
@ -352,7 +358,7 @@ const WordProperty PatriciaTriePolicy::getWordProperty(const int *const codePoin
std::vector<BigramProperty> bigrams;
const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos);
int bigramWord1CodePoints[MAX_WORD_LENGTH];
BinaryDictionaryBigramsIterator bigramsIt(getBigramsStructurePolicy(), bigramListPos);
BinaryDictionaryBigramsIterator bigramsIt(&mBigramListPolicy, bigramListPos);
while (bigramsIt.hasNext()) {
// Fetch the next bigram information and forward the iterator.
bigramsIt.next();

View File

@ -67,16 +67,12 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int getShortcutPositionOfPtNode(const int ptNodePos) const;
int getBigramsPositionOfPtNode(const int ptNodePos) const;
BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int ptNodePos) const;
const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
return &mHeaderPolicy;
}
const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const {
return &mBigramListPolicy;
}
const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const {
return &mShortcutListPolicy;
}
@ -158,6 +154,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
std::vector<int> mTerminalPtNodePositionsForIteratingWords;
mutable bool mIsCorrupted;
int getBigramsPositionOfPtNode(const int ptNodePos) const;
int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos,
DicNodeVector *const childDicNodes) const;
};

View File

@ -144,6 +144,12 @@ int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) con
ptNodeParams.getTerminalId());
}
BinaryDictionaryBigramsIterator Ver4PatriciaTriePolicy::getBigramsIteratorOfPtNode(
const int ptNodePos) const {
const int bigramsPosition = getBigramsPositionOfPtNode(ptNodePos);
return BinaryDictionaryBigramsIterator(&mBigramPolicy, bigramsPosition);
}
int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS;

View File

@ -76,16 +76,12 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int getShortcutPositionOfPtNode(const int ptNodePos) const;
int getBigramsPositionOfPtNode(const int ptNodePos) const;
BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int ptNodePos) const;
const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
return mHeaderPolicy;
}
const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const {
return &mBigramPolicy;
}
const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const {
return &mShortcutPolicy;
}
@ -146,6 +142,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int mBigramCount;
std::vector<int> mTerminalPtNodePositionsForIteratingWords;
mutable bool mIsCorrupted;
int getBigramsPositionOfPtNode(const int ptNodePos) const;
};
} // namespace latinime
#endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H