mirror of
https://gitlab.futo.org/keyboard/latinime.git
synced 2024-09-28 14:54:30 +01:00
Make "node"s clear by renaming to PtNode or DicNode.
Bug: 10233675 Change-Id: I248b927c724253f593e3806e82b4e1fed2c025fb
This commit is contained in:
parent
53702633e2
commit
c1fce1a720
@ -99,7 +99,7 @@ class DicNode {
|
||||
virtual ~DicNode() {}
|
||||
|
||||
// Init for copy
|
||||
void initByCopy(const DicNode *dicNode) {
|
||||
void initByCopy(const DicNode *const dicNode) {
|
||||
mIsUsed = true;
|
||||
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
|
||||
mDicNodeProperties.init(&dicNode->mDicNodeProperties);
|
||||
@ -107,25 +107,25 @@ class DicNode {
|
||||
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
|
||||
}
|
||||
|
||||
// Init for root with prevWordNodePos which is used for bigram
|
||||
void initAsRoot(const int rootGroupPos, const int prevWordNodePos) {
|
||||
// Init for root with prevWordPtNodePos which is used for bigram
|
||||
void initAsRoot(const int rootPtNodeArrayPos, const int prevWordPtNodePos) {
|
||||
mIsUsed = true;
|
||||
mIsCachedForNextSuggestion = false;
|
||||
mDicNodeProperties.init(
|
||||
NOT_A_DICT_POS /* pos */, rootGroupPos, NOT_A_CODE_POINT /* nodeCodePoint */,
|
||||
NOT_A_DICT_POS /* pos */, rootPtNodeArrayPos, NOT_A_CODE_POINT /* nodeCodePoint */,
|
||||
NOT_A_PROBABILITY /* probability */, false /* isTerminal */,
|
||||
true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */,
|
||||
0 /* terminalDepth */);
|
||||
mDicNodeState.init(prevWordNodePos);
|
||||
mDicNodeState.init(prevWordPtNodePos);
|
||||
PROF_NODE_RESET(mProfiler);
|
||||
}
|
||||
|
||||
// Init for root with previous word
|
||||
void initAsRootWithPreviousWord(DicNode *dicNode, const int rootGroupPos) {
|
||||
void initAsRootWithPreviousWord(const DicNode *const dicNode, const int rootPtNodeArrayPos) {
|
||||
mIsUsed = true;
|
||||
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
|
||||
mDicNodeProperties.init(
|
||||
NOT_A_DICT_POS /* pos */, rootGroupPos, NOT_A_CODE_POINT /* nodeCodePoint */,
|
||||
NOT_A_DICT_POS /* pos */, rootPtNodeArrayPos, NOT_A_CODE_POINT /* nodeCodePoint */,
|
||||
NOT_A_PROBABILITY /* probability */, false /* isTerminal */,
|
||||
true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */,
|
||||
0 /* terminalDepth */);
|
||||
@ -138,7 +138,7 @@ class DicNode {
|
||||
mDicNodeState.mDicNodeStatePrevWord.init(
|
||||
dicNode->mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() + 1,
|
||||
dicNode->mDicNodeProperties.getProbability(),
|
||||
dicNode->mDicNodeProperties.getPos(),
|
||||
dicNode->mDicNodeProperties.getPtNodePos(),
|
||||
dicNode->mDicNodeState.mDicNodeStatePrevWord.mPrevWord,
|
||||
dicNode->mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(),
|
||||
dicNode->getOutputWordBuf(),
|
||||
@ -148,26 +148,27 @@ class DicNode {
|
||||
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
|
||||
}
|
||||
|
||||
void initAsPassingChild(DicNode *parentNode) {
|
||||
void initAsPassingChild(DicNode *parentDicNode) {
|
||||
mIsUsed = true;
|
||||
mIsCachedForNextSuggestion = parentNode->mIsCachedForNextSuggestion;
|
||||
const int c = parentNode->getNodeTypedCodePoint();
|
||||
mDicNodeProperties.init(&parentNode->mDicNodeProperties, c);
|
||||
mDicNodeState.init(&parentNode->mDicNodeState);
|
||||
PROF_NODE_COPY(&parentNode->mProfiler, mProfiler);
|
||||
mIsCachedForNextSuggestion = parentDicNode->mIsCachedForNextSuggestion;
|
||||
const int parentCodePoint = parentDicNode->getNodeTypedCodePoint();
|
||||
mDicNodeProperties.init(&parentDicNode->mDicNodeProperties, parentCodePoint);
|
||||
mDicNodeState.init(&parentDicNode->mDicNodeState);
|
||||
PROF_NODE_COPY(&parentDicNode->mProfiler, mProfiler);
|
||||
}
|
||||
|
||||
void initAsChild(const DicNode *const dicNode, const int pos, const int childrenPos,
|
||||
const int probability, const bool isTerminal, const bool hasChildren,
|
||||
const bool isBlacklistedOrNotAWord, const uint16_t mergedNodeCodePointCount,
|
||||
const int *const mergedNodeCodePoints) {
|
||||
void initAsChild(const DicNode *const dicNode, const int ptNodePos,
|
||||
const int childrenPtNodeArrayPos, const int probability, const bool isTerminal,
|
||||
const bool hasChildren, const bool isBlacklistedOrNotAWord,
|
||||
const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) {
|
||||
mIsUsed = true;
|
||||
uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1);
|
||||
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
|
||||
const uint16_t newLeavingDepth = static_cast<uint16_t>(
|
||||
dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount);
|
||||
mDicNodeProperties.init(pos, childrenPos, mergedNodeCodePoints[0], probability,
|
||||
isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth, newLeavingDepth);
|
||||
mDicNodeProperties.init(ptNodePos, childrenPtNodeArrayPos, mergedNodeCodePoints[0],
|
||||
probability, isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth,
|
||||
newLeavingDepth);
|
||||
mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount,
|
||||
mergedNodeCodePoints);
|
||||
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
|
||||
@ -234,7 +235,7 @@ class DicNode {
|
||||
}
|
||||
|
||||
bool isFirstWord() const {
|
||||
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos() == NOT_A_DICT_POS;
|
||||
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos() == NOT_A_DICT_POS;
|
||||
}
|
||||
|
||||
bool isCompletion(const int inputSize) const {
|
||||
@ -246,29 +247,30 @@ class DicNode {
|
||||
}
|
||||
|
||||
// Used to get bigram probability in DicNodeUtils
|
||||
int getPos() const {
|
||||
return mDicNodeProperties.getPos();
|
||||
int getPtNodePos() const {
|
||||
return mDicNodeProperties.getPtNodePos();
|
||||
}
|
||||
|
||||
// Used to get bigram probability in DicNodeUtils
|
||||
int getPrevWordPos() const {
|
||||
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos();
|
||||
int getPrevWordTerminalPtNodePos() const {
|
||||
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos();
|
||||
}
|
||||
|
||||
// Used in DicNodeUtils
|
||||
int getChildrenPos() const {
|
||||
return mDicNodeProperties.getChildrenPos();
|
||||
int getChildrenPtNodeArrayPos() const {
|
||||
return mDicNodeProperties.getChildrenPtNodeArrayPos();
|
||||
}
|
||||
|
||||
int getProbability() const {
|
||||
return mDicNodeProperties.getProbability();
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE bool isTerminalWordNode() const {
|
||||
const bool isTerminalNodes = mDicNodeProperties.isTerminal();
|
||||
const int currentNodeDepth = getNodeCodePointCount();
|
||||
const int terminalNodeDepth = mDicNodeProperties.getLeavingDepth();
|
||||
return isTerminalNodes && currentNodeDepth > 0 && currentNodeDepth == terminalNodeDepth;
|
||||
AK_FORCE_INLINE bool isTerminalDicNode() const {
|
||||
const bool isTerminalPtNode = mDicNodeProperties.isTerminal();
|
||||
const int currentDicNodeDepth = getNodeCodePointCount();
|
||||
const int terminalDicNodeDepth = mDicNodeProperties.getLeavingDepth();
|
||||
return isTerminalPtNode && currentDicNodeDepth > 0
|
||||
&& currentDicNodeDepth == terminalDicNodeDepth;
|
||||
}
|
||||
|
||||
bool shouldBeFilteredBySafetyNetForBigram() const {
|
||||
@ -374,8 +376,8 @@ class DicNode {
|
||||
}
|
||||
|
||||
// Used to commit input partially
|
||||
int getPrevWordNodePos() const {
|
||||
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos();
|
||||
int getPrevWordPtNodePos() const {
|
||||
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos();
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE const int *getOutputWordBuf() const {
|
||||
@ -410,7 +412,7 @@ class DicNode {
|
||||
// TODO: Remove once touch path is merged into ProximityInfoState
|
||||
// Note: Returned codepoint may be a digraph codepoint if the node is in a composite glyph.
|
||||
int getNodeCodePoint() const {
|
||||
const int codePoint = mDicNodeProperties.getNodeCodePoint();
|
||||
const int codePoint = mDicNodeProperties.getDicNodeCodePoint();
|
||||
const DigraphUtils::DigraphCodePointIndex digraphIndex =
|
||||
mDicNodeState.mDicNodeStateScoring.getDigraphIndex();
|
||||
if (digraphIndex == DigraphUtils::NOT_A_DIGRAPH_INDEX) {
|
||||
@ -423,8 +425,8 @@ class DicNode {
|
||||
// Utils for cost calculation //
|
||||
////////////////////////////////
|
||||
AK_FORCE_INLINE bool isSameNodeCodePoint(const DicNode *const dicNode) const {
|
||||
return mDicNodeProperties.getNodeCodePoint()
|
||||
== dicNode->mDicNodeProperties.getNodeCodePoint();
|
||||
return mDicNodeProperties.getDicNodeCodePoint()
|
||||
== dicNode->mDicNodeProperties.getDicNodeCodePoint();
|
||||
}
|
||||
|
||||
// TODO: remove
|
||||
|
@ -22,7 +22,6 @@
|
||||
#include "suggest/core/dicnode/dic_node_vector.h"
|
||||
#include "suggest/core/dictionary/multi_bigram_map.h"
|
||||
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
||||
#include "utils/char_utils.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
@ -32,19 +31,20 @@ namespace latinime {
|
||||
|
||||
/* static */ void DicNodeUtils::initAsRoot(
|
||||
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
||||
const int prevWordNodePos, DicNode *const newRootNode) {
|
||||
newRootNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordNodePos);
|
||||
const int prevWordPtNodePos, DicNode *const newRootDicNode) {
|
||||
newRootDicNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordPtNodePos);
|
||||
}
|
||||
|
||||
/*static */ void DicNodeUtils::initAsRootWithPreviousWord(
|
||||
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
||||
DicNode *const prevWordLastNode, DicNode *const newRootNode) {
|
||||
newRootNode->initAsRootWithPreviousWord(
|
||||
prevWordLastNode, dictionaryStructurePolicy->getRootPosition());
|
||||
const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode) {
|
||||
newRootDicNode->initAsRootWithPreviousWord(
|
||||
prevWordLastDicNode, dictionaryStructurePolicy->getRootPosition());
|
||||
}
|
||||
|
||||
/* static */ void DicNodeUtils::initByCopy(DicNode *srcNode, DicNode *destNode) {
|
||||
destNode->initByCopy(srcNode);
|
||||
/* static */ void DicNodeUtils::initByCopy(const DicNode *const srcDicNode,
|
||||
DicNode *const destDicNode) {
|
||||
destDicNode->initByCopy(srcDicNode);
|
||||
}
|
||||
|
||||
///////////////////////////////////
|
||||
@ -52,14 +52,14 @@ namespace latinime {
|
||||
///////////////////////////////////
|
||||
/* static */ void DicNodeUtils::getAllChildDicNodes(DicNode *dicNode,
|
||||
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
||||
DicNodeVector *childDicNodes) {
|
||||
DicNodeVector *const childDicNodes) {
|
||||
if (dicNode->isTotalInputSizeExceedingLimit()) {
|
||||
return;
|
||||
}
|
||||
if (!dicNode->isLeavingNode()) {
|
||||
childDicNodes->pushPassingChild(dicNode);
|
||||
} else {
|
||||
dictionaryStructurePolicy->createAndGetAllChildNodes(dicNode, childDicNodes);
|
||||
dictionaryStructurePolicy->createAndGetAllChildDicNodes(dicNode, childDicNodes);
|
||||
}
|
||||
}
|
||||
|
||||
@ -71,11 +71,11 @@ namespace latinime {
|
||||
*/
|
||||
/* static */ float DicNodeUtils::getBigramNodeImprobability(
|
||||
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
||||
const DicNode *const node, MultiBigramMap *multiBigramMap) {
|
||||
if (node->hasMultipleWords() && !node->isValidMultipleWordSuggestion()) {
|
||||
const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) {
|
||||
if (dicNode->hasMultipleWords() && !dicNode->isValidMultipleWordSuggestion()) {
|
||||
return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
|
||||
}
|
||||
const int probability = getBigramNodeProbability(dictionaryStructurePolicy, node,
|
||||
const int probability = getBigramNodeProbability(dictionaryStructurePolicy, dicNode,
|
||||
multiBigramMap);
|
||||
// TODO: This equation to calculate the improbability looks unreasonable. Investigate this.
|
||||
const float cost = static_cast<float>(MAX_PROBABILITY - probability)
|
||||
@ -85,19 +85,19 @@ namespace latinime {
|
||||
|
||||
/* static */ int DicNodeUtils::getBigramNodeProbability(
|
||||
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
||||
const DicNode *const node, MultiBigramMap *multiBigramMap) {
|
||||
const int unigramProbability = node->getProbability();
|
||||
const int wordPos = node->getPos();
|
||||
const int prevWordPos = node->getPrevWordPos();
|
||||
if (NOT_A_DICT_POS == wordPos || NOT_A_DICT_POS == prevWordPos) {
|
||||
const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) {
|
||||
const int unigramProbability = dicNode->getProbability();
|
||||
const int ptNodePos = dicNode->getPtNodePos();
|
||||
const int prevWordTerminalPtNodePos = dicNode->getPrevWordTerminalPtNodePos();
|
||||
if (NOT_A_DICT_POS == ptNodePos || NOT_A_DICT_POS == prevWordTerminalPtNodePos) {
|
||||
// Note: Normally wordPos comes from the dictionary and should never equal
|
||||
// NOT_A_VALID_WORD_POS.
|
||||
return dictionaryStructurePolicy->getProbability(unigramProbability,
|
||||
NOT_A_PROBABILITY);
|
||||
}
|
||||
if (multiBigramMap) {
|
||||
return multiBigramMap->getBigramProbability(dictionaryStructurePolicy, prevWordPos,
|
||||
wordPos, unigramProbability);
|
||||
return multiBigramMap->getBigramProbability(dictionaryStructurePolicy,
|
||||
prevWordTerminalPtNodePos, ptNodePos, unigramProbability);
|
||||
}
|
||||
return dictionaryStructurePolicy->getProbability(unigramProbability,
|
||||
NOT_A_PROBABILITY);
|
||||
@ -109,7 +109,7 @@ namespace latinime {
|
||||
|
||||
// TODO: Move to char_utils?
|
||||
/* static */ int DicNodeUtils::appendTwoWords(const int *const src0, const int16_t length0,
|
||||
const int *const src1, const int16_t length1, int *dest) {
|
||||
const int *const src1, const int16_t length1, int *const dest) {
|
||||
int actualLength0 = 0;
|
||||
for (int i = 0; i < length0; ++i) {
|
||||
if (src0[i] == 0) {
|
||||
|
@ -31,20 +31,20 @@ class MultiBigramMap;
|
||||
class DicNodeUtils {
|
||||
public:
|
||||
static int appendTwoWords(const int *src0, const int16_t length0, const int *src1,
|
||||
const int16_t length1, int *dest);
|
||||
const int16_t length1, int *const dest);
|
||||
static void initAsRoot(
|
||||
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
||||
const int prevWordNodePos, DicNode *newRootNode);
|
||||
const int prevWordPtNodePos, DicNode *const newRootDicNode);
|
||||
static void initAsRootWithPreviousWord(
|
||||
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
||||
DicNode *prevWordLastNode, DicNode *newRootNode);
|
||||
static void initByCopy(DicNode *srcNode, DicNode *destNode);
|
||||
const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode);
|
||||
static void initByCopy(const DicNode *const srcDicNode, DicNode *const destDicNode);
|
||||
static void getAllChildDicNodes(DicNode *dicNode,
|
||||
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
||||
DicNodeVector *childDicNodes);
|
||||
static float getBigramNodeImprobability(
|
||||
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
||||
const DicNode *const node, MultiBigramMap *const multiBigramMap);
|
||||
const DicNode *const dicNode, MultiBigramMap *const multiBigramMap);
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(DicNodeUtils);
|
||||
@ -53,7 +53,7 @@ class DicNodeUtils {
|
||||
|
||||
static int getBigramNodeProbability(
|
||||
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
||||
const DicNode *const node, MultiBigramMap *multiBigramMap);
|
||||
const DicNode *const dicNode, MultiBigramMap *const multiBigramMap);
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif // LATINIME_DIC_NODE_UTILS_H
|
||||
|
@ -62,14 +62,14 @@ class DicNodeVector {
|
||||
mDicNodes.back().initAsPassingChild(dicNode);
|
||||
}
|
||||
|
||||
void pushLeavingChild(const DicNode *const dicNode, const int pos, const int childrenPos,
|
||||
const int probability, const bool isTerminal, const bool hasChildren,
|
||||
const bool isBlacklistedOrNotAWord, const uint16_t mergedNodeCodePointCount,
|
||||
const int *const mergedNodeCodePoints) {
|
||||
void pushLeavingChild(const DicNode *const dicNode, const int ptNodePos,
|
||||
const int childrenPtNodeArrayPos, const int probability, const bool isTerminal,
|
||||
const bool hasChildren, const bool isBlacklistedOrNotAWord,
|
||||
const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) {
|
||||
ASSERT(!mLock);
|
||||
mDicNodes.push_back(mEmptyNode);
|
||||
mDicNodes.back().initAsChild(dicNode, pos, childrenPos, probability, isTerminal,
|
||||
hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount,
|
||||
mDicNodes.back().initAsChild(dicNode, ptNodePos, childrenPtNodeArrayPos, probability,
|
||||
isTerminal, hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount,
|
||||
mergedNodeCodePoints);
|
||||
}
|
||||
|
||||
|
@ -24,15 +24,14 @@
|
||||
namespace latinime {
|
||||
|
||||
/**
|
||||
* Node for traversing the lexicon trie.
|
||||
* PtNode information related to the DicNode from the lexicon trie.
|
||||
*/
|
||||
// TODO: Introduce a dictionary node class which has attribute members required to understand the
|
||||
// dictionary structure.
|
||||
class DicNodeProperties {
|
||||
public:
|
||||
AK_FORCE_INLINE DicNodeProperties()
|
||||
: mPos(0), mChildrenPos(0), mProbability(0), mNodeCodePoint(0), mIsTerminal(false),
|
||||
mHasChildren(false), mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0) {}
|
||||
: mPtNodePos(0), mChildrenPtNodeArrayPos(0), mProbability(0), mDicNodeCodePoint(0),
|
||||
mIsTerminal(false), mHasChildrenPtNodes(false), mIsBlacklistedOrNotAWord(false),
|
||||
mDepth(0), mLeavingDepth(0) {}
|
||||
|
||||
virtual ~DicNodeProperties() {}
|
||||
|
||||
@ -40,57 +39,57 @@ class DicNodeProperties {
|
||||
void init(const int pos, const int childrenPos, const int nodeCodePoint, const int probability,
|
||||
const bool isTerminal, const bool hasChildren, const bool isBlacklistedOrNotAWord,
|
||||
const uint16_t depth, const uint16_t leavingDepth) {
|
||||
mPos = pos;
|
||||
mChildrenPos = childrenPos;
|
||||
mNodeCodePoint = nodeCodePoint;
|
||||
mPtNodePos = pos;
|
||||
mChildrenPtNodeArrayPos = childrenPos;
|
||||
mDicNodeCodePoint = nodeCodePoint;
|
||||
mProbability = probability;
|
||||
mIsTerminal = isTerminal;
|
||||
mHasChildren = hasChildren;
|
||||
mHasChildrenPtNodes = hasChildren;
|
||||
mIsBlacklistedOrNotAWord = isBlacklistedOrNotAWord;
|
||||
mDepth = depth;
|
||||
mLeavingDepth = leavingDepth;
|
||||
}
|
||||
|
||||
// Init for copy
|
||||
void init(const DicNodeProperties *const nodeProp) {
|
||||
mPos = nodeProp->mPos;
|
||||
mChildrenPos = nodeProp->mChildrenPos;
|
||||
mNodeCodePoint = nodeProp->mNodeCodePoint;
|
||||
mProbability = nodeProp->mProbability;
|
||||
mIsTerminal = nodeProp->mIsTerminal;
|
||||
mHasChildren = nodeProp->mHasChildren;
|
||||
mIsBlacklistedOrNotAWord = nodeProp->mIsBlacklistedOrNotAWord;
|
||||
mDepth = nodeProp->mDepth;
|
||||
mLeavingDepth = nodeProp->mLeavingDepth;
|
||||
void init(const DicNodeProperties *const dicNodeProp) {
|
||||
mPtNodePos = dicNodeProp->mPtNodePos;
|
||||
mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos;
|
||||
mDicNodeCodePoint = dicNodeProp->mDicNodeCodePoint;
|
||||
mProbability = dicNodeProp->mProbability;
|
||||
mIsTerminal = dicNodeProp->mIsTerminal;
|
||||
mHasChildrenPtNodes = dicNodeProp->mHasChildrenPtNodes;
|
||||
mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
|
||||
mDepth = dicNodeProp->mDepth;
|
||||
mLeavingDepth = dicNodeProp->mLeavingDepth;
|
||||
}
|
||||
|
||||
// Init as passing child
|
||||
void init(const DicNodeProperties *const nodeProp, const int codePoint) {
|
||||
mPos = nodeProp->mPos;
|
||||
mChildrenPos = nodeProp->mChildrenPos;
|
||||
mNodeCodePoint = codePoint; // Overwrite the node char of a passing child
|
||||
mProbability = nodeProp->mProbability;
|
||||
mIsTerminal = nodeProp->mIsTerminal;
|
||||
mHasChildren = nodeProp->mHasChildren;
|
||||
mIsBlacklistedOrNotAWord = nodeProp->mIsBlacklistedOrNotAWord;
|
||||
mDepth = nodeProp->mDepth + 1; // Increment the depth of a passing child
|
||||
mLeavingDepth = nodeProp->mLeavingDepth;
|
||||
void init(const DicNodeProperties *const dicNodeProp, const int codePoint) {
|
||||
mPtNodePos = dicNodeProp->mPtNodePos;
|
||||
mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos;
|
||||
mDicNodeCodePoint = codePoint; // Overwrite the node char of a passing child
|
||||
mProbability = dicNodeProp->mProbability;
|
||||
mIsTerminal = dicNodeProp->mIsTerminal;
|
||||
mHasChildrenPtNodes = dicNodeProp->mHasChildrenPtNodes;
|
||||
mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
|
||||
mDepth = dicNodeProp->mDepth + 1; // Increment the depth of a passing child
|
||||
mLeavingDepth = dicNodeProp->mLeavingDepth;
|
||||
}
|
||||
|
||||
int getPos() const {
|
||||
return mPos;
|
||||
int getPtNodePos() const {
|
||||
return mPtNodePos;
|
||||
}
|
||||
|
||||
int getChildrenPos() const {
|
||||
return mChildrenPos;
|
||||
int getChildrenPtNodeArrayPos() const {
|
||||
return mChildrenPtNodeArrayPos;
|
||||
}
|
||||
|
||||
int getProbability() const {
|
||||
return mProbability;
|
||||
}
|
||||
|
||||
int getNodeCodePoint() const {
|
||||
return mNodeCodePoint;
|
||||
int getDicNodeCodePoint() const {
|
||||
return mDicNodeCodePoint;
|
||||
}
|
||||
|
||||
uint16_t getDepth() const {
|
||||
@ -107,7 +106,7 @@ class DicNodeProperties {
|
||||
}
|
||||
|
||||
bool hasChildren() const {
|
||||
return mHasChildren || mDepth != mLeavingDepth;
|
||||
return mHasChildrenPtNodes || mDepth != mLeavingDepth;
|
||||
}
|
||||
|
||||
bool isBlacklistedOrNotAWord() const {
|
||||
@ -118,12 +117,12 @@ class DicNodeProperties {
|
||||
// Caution!!!
|
||||
// Use a default copy constructor and an assign operator because shallow copies are ok
|
||||
// for this class
|
||||
int mPos;
|
||||
int mChildrenPos;
|
||||
int mPtNodePos;
|
||||
int mChildrenPtNodeArrayPos;
|
||||
int mProbability;
|
||||
int mNodeCodePoint;
|
||||
int mDicNodeCodePoint;
|
||||
bool mIsTerminal;
|
||||
bool mHasChildren;
|
||||
bool mHasChildrenPtNodes;
|
||||
bool mIsBlacklistedOrNotAWord;
|
||||
uint16_t mDepth;
|
||||
uint16_t mLeavingDepth;
|
||||
|
@ -30,7 +30,7 @@ class DicNodeStatePrevWord {
|
||||
public:
|
||||
AK_FORCE_INLINE DicNodeStatePrevWord()
|
||||
: mPrevWordCount(0), mPrevWordLength(0), mPrevWordStart(0), mPrevWordProbability(0),
|
||||
mPrevWordNodePos(NOT_A_DICT_POS), mSecondWordFirstInputIndex(NOT_AN_INDEX) {
|
||||
mPrevWordPtNodePos(NOT_A_DICT_POS), mSecondWordFirstInputIndex(NOT_AN_INDEX) {
|
||||
memset(mPrevWord, 0, sizeof(mPrevWord));
|
||||
}
|
||||
|
||||
@ -41,7 +41,7 @@ class DicNodeStatePrevWord {
|
||||
mPrevWordCount = 0;
|
||||
mPrevWordStart = 0;
|
||||
mPrevWordProbability = -1;
|
||||
mPrevWordNodePos = NOT_A_DICT_POS;
|
||||
mPrevWordPtNodePos = NOT_A_DICT_POS;
|
||||
mSecondWordFirstInputIndex = NOT_AN_INDEX;
|
||||
}
|
||||
|
||||
@ -50,7 +50,7 @@ class DicNodeStatePrevWord {
|
||||
mPrevWordCount = 0;
|
||||
mPrevWordStart = 0;
|
||||
mPrevWordProbability = -1;
|
||||
mPrevWordNodePos = prevWordNodePos;
|
||||
mPrevWordPtNodePos = prevWordNodePos;
|
||||
mSecondWordFirstInputIndex = NOT_AN_INDEX;
|
||||
}
|
||||
|
||||
@ -60,7 +60,7 @@ class DicNodeStatePrevWord {
|
||||
mPrevWordCount = prevWord->mPrevWordCount;
|
||||
mPrevWordStart = prevWord->mPrevWordStart;
|
||||
mPrevWordProbability = prevWord->mPrevWordProbability;
|
||||
mPrevWordNodePos = prevWord->mPrevWordNodePos;
|
||||
mPrevWordPtNodePos = prevWord->mPrevWordPtNodePos;
|
||||
mSecondWordFirstInputIndex = prevWord->mSecondWordFirstInputIndex;
|
||||
memcpy(mPrevWord, prevWord->mPrevWord, prevWord->mPrevWordLength * sizeof(mPrevWord[0]));
|
||||
}
|
||||
@ -71,7 +71,7 @@ class DicNodeStatePrevWord {
|
||||
const int prevWordSecondWordFirstInputIndex, const int lastInputIndex) {
|
||||
mPrevWordCount = min(prevWordCount, static_cast<int16_t>(MAX_RESULTS));
|
||||
mPrevWordProbability = prevWordProbability;
|
||||
mPrevWordNodePos = prevWordNodePos;
|
||||
mPrevWordPtNodePos = prevWordNodePos;
|
||||
int twoWordsLen =
|
||||
DicNodeUtils::appendTwoWords(src0, length0, src1, length1, mPrevWord);
|
||||
if (twoWordsLen >= MAX_WORD_LENGTH) {
|
||||
@ -116,8 +116,8 @@ class DicNodeStatePrevWord {
|
||||
return mPrevWordStart;
|
||||
}
|
||||
|
||||
int getPrevWordNodePos() const {
|
||||
return mPrevWordNodePos;
|
||||
int getPrevWordPtNodePos() const {
|
||||
return mPrevWordPtNodePos;
|
||||
}
|
||||
|
||||
int getPrevWordCodePointAt(const int id) const {
|
||||
@ -147,7 +147,7 @@ class DicNodeStatePrevWord {
|
||||
int16_t mPrevWordLength;
|
||||
int16_t mPrevWordStart;
|
||||
int16_t mPrevWordProbability;
|
||||
int mPrevWordNodePos;
|
||||
int mPrevWordPtNodePos;
|
||||
int mSecondWordFirstInputIndex;
|
||||
};
|
||||
} // namespace latinime
|
||||
|
@ -144,7 +144,7 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng
|
||||
int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
|
||||
const bool forceLowerCaseSearch) const {
|
||||
if (0 >= prevWordLength) return NOT_A_DICT_POS;
|
||||
int pos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(prevWord, prevWordLength,
|
||||
int pos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(prevWord, prevWordLength,
|
||||
forceLowerCaseSearch);
|
||||
if (NOT_A_DICT_POS == pos) return NOT_A_DICT_POS;
|
||||
return mDictionaryStructurePolicy->getBigramsPositionOfPtNode(pos);
|
||||
@ -155,7 +155,7 @@ int BigramDictionary::getBigramProbability(const int *word0, int length0, const
|
||||
int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */);
|
||||
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
|
||||
if (NOT_A_DICT_POS == pos) return NOT_A_PROBABILITY;
|
||||
int nextWordPos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(word1, length1,
|
||||
int nextWordPos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(word1, length1,
|
||||
false /* forceLowerCaseSearch */);
|
||||
if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY;
|
||||
|
||||
|
@ -88,7 +88,7 @@ int Dictionary::getBigrams(const int *word, int length, int *outWords, int *freq
|
||||
}
|
||||
|
||||
int Dictionary::getProbability(const int *word, int length) const {
|
||||
int pos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(word, length,
|
||||
int pos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(word, length,
|
||||
false /* forceLowerCaseSearch */);
|
||||
if (NOT_A_DICT_POS == pos) {
|
||||
return NOT_A_PROBABILITY;
|
||||
|
@ -37,14 +37,14 @@ class DictionaryStructureWithBufferPolicy {
|
||||
|
||||
virtual int getRootPosition() const = 0;
|
||||
|
||||
virtual void createAndGetAllChildNodes(const DicNode *const dicNode,
|
||||
virtual void createAndGetAllChildDicNodes(const DicNode *const dicNode,
|
||||
DicNodeVector *const childDicNodes) const = 0;
|
||||
|
||||
virtual int getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||
const int nodePos, const int maxCodePointCount, int *const outCodePoints,
|
||||
int *const outUnigramProbability) const = 0;
|
||||
|
||||
virtual int getTerminalNodePositionOfWord(const int *const inWord,
|
||||
virtual int getTerminalPtNodePositionOfWord(const int *const inWord,
|
||||
const int length, const bool forceLowerCaseSearch) const = 0;
|
||||
|
||||
virtual int getProbability(const int unigramProbability,
|
||||
|
@ -35,16 +35,16 @@ void DicTraverseSession::init(const Dictionary *const dictionary, const int *pre
|
||||
->getMultiWordCostMultiplier();
|
||||
mSuggestOptions = suggestOptions;
|
||||
if (!prevWord) {
|
||||
mPrevWordPos = NOT_A_DICT_POS;
|
||||
mPrevWordPtNodePos = NOT_A_DICT_POS;
|
||||
return;
|
||||
}
|
||||
// TODO: merge following similar calls to getTerminalPosition into one case-insensitive call.
|
||||
mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(
|
||||
mPrevWordPtNodePos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(
|
||||
prevWord, prevWordLength, false /* forceLowerCaseSearch */);
|
||||
if (mPrevWordPos == NOT_A_DICT_POS) {
|
||||
if (mPrevWordPtNodePos == NOT_A_DICT_POS) {
|
||||
// Check bigrams for lower-cased previous word if original was not found. Useful for
|
||||
// auto-capitalized words like "The [current_word]".
|
||||
mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(
|
||||
mPrevWordPtNodePos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(
|
||||
prevWord, prevWordLength, true /* forceLowerCaseSearch */);
|
||||
}
|
||||
}
|
||||
|
@ -59,7 +59,7 @@ class DicTraverseSession {
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE DicTraverseSession(JNIEnv *env, jstring localeStr, bool usesLargeCache)
|
||||
: mPrevWordPos(NOT_A_DICT_POS), mProximityInfo(0),
|
||||
: mPrevWordPtNodePos(NOT_A_DICT_POS), mProximityInfo(0),
|
||||
mDictionary(0), mSuggestOptions(0), mDicNodesCache(usesLargeCache),
|
||||
mMultiBigramMap(), mInputSize(0), mPartiallyCommited(false), mMaxPointerCount(1),
|
||||
mMultiWordCostMultiplier(1.0f) {
|
||||
@ -86,11 +86,9 @@ class DicTraverseSession {
|
||||
//--------------------
|
||||
const ProximityInfo *getProximityInfo() const { return mProximityInfo; }
|
||||
const SuggestOptions *getSuggestOptions() const { return mSuggestOptions; }
|
||||
int getPrevWordPos() const { return mPrevWordPos; }
|
||||
int getPrevWordPtNodePos() const { return mPrevWordPtNodePos; }
|
||||
// TODO: REMOVE
|
||||
void setPrevWordPos(int pos) { mPrevWordPos = pos; }
|
||||
// TODO: Use proper parameter when changed
|
||||
int getDicRootPos() const { return 0; }
|
||||
void setPrevWordPtNodePos(const int ptNodePos) { mPrevWordPtNodePos = ptNodePos; }
|
||||
DicNodesCache *getDicTraverseCache() { return &mDicNodesCache; }
|
||||
MultiBigramMap *getMultiBigramMap() { return &mMultiBigramMap; }
|
||||
const ProximityInfoState *getProximityInfoState(int id) const {
|
||||
@ -119,26 +117,13 @@ class DicTraverseSession {
|
||||
return true;
|
||||
}
|
||||
|
||||
void getSearchKeys(const DicNode *node, std::vector<int> *const outputSearchKeyVector) const {
|
||||
for (int i = 0; i < MAX_POINTER_COUNT_G; ++i) {
|
||||
if (!mProximityInfoStates[i].isUsed()) {
|
||||
continue;
|
||||
}
|
||||
const int pointerId = node->getInputIndex(i);
|
||||
const std::vector<int> *const searchKeyVector =
|
||||
mProximityInfoStates[i].getSearchKeyVector(pointerId);
|
||||
outputSearchKeyVector->insert(outputSearchKeyVector->end(), searchKeyVector->begin(),
|
||||
searchKeyVector->end());
|
||||
}
|
||||
}
|
||||
|
||||
ProximityType getProximityTypeG(const DicNode *const node, const int childCodePoint) const {
|
||||
ProximityType getProximityTypeG(const DicNode *const dicNode, const int childCodePoint) const {
|
||||
ProximityType proximityType = UNRELATED_CHAR;
|
||||
for (int i = 0; i < MAX_POINTER_COUNT_G; ++i) {
|
||||
if (!mProximityInfoStates[i].isUsed()) {
|
||||
continue;
|
||||
}
|
||||
const int pointerId = node->getInputIndex(i);
|
||||
const int pointerId = dicNode->getInputIndex(i);
|
||||
proximityType = mProximityInfoStates[i].getProximityTypeG(pointerId, childCodePoint);
|
||||
ASSERT(proximityType == UNRELATED_CHAR || proximityType == MATCH_CHAR);
|
||||
// TODO: Make this more generic
|
||||
@ -192,7 +177,7 @@ class DicTraverseSession {
|
||||
const int *const inputYs, const int *const times, const int *const pointerIds,
|
||||
const int inputSize, const float maxSpatialDistance, const int maxPointerCount);
|
||||
|
||||
int mPrevWordPos;
|
||||
int mPrevWordPtNodePos;
|
||||
const ProximityInfo *mProximityInfo;
|
||||
const Dictionary *mDictionary;
|
||||
const SuggestOptions *mSuggestOptions;
|
||||
|
@ -98,7 +98,7 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPo
|
||||
// Continue suggestion after partial commit.
|
||||
DicNode *topDicNode =
|
||||
traverseSession->getDicTraverseCache()->setCommitPoint(commitPoint);
|
||||
traverseSession->setPrevWordPos(topDicNode->getPrevWordNodePos());
|
||||
traverseSession->setPrevWordPtNodePos(topDicNode->getPrevWordPtNodePos());
|
||||
traverseSession->getDicTraverseCache()->continueSearch();
|
||||
traverseSession->setPartiallyCommited();
|
||||
}
|
||||
@ -109,7 +109,7 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPo
|
||||
// Create a new dic node here
|
||||
DicNode rootNode;
|
||||
DicNodeUtils::initAsRoot(traverseSession->getDictionaryStructurePolicy(),
|
||||
traverseSession->getPrevWordPos(), &rootNode);
|
||||
traverseSession->getPrevWordPtNodePos(), &rootNode);
|
||||
traverseSession->getDicTraverseCache()->copyPushActive(&rootNode);
|
||||
}
|
||||
}
|
||||
@ -231,7 +231,7 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
|
||||
BinaryDictionaryShortcutIterator shortcutIt(
|
||||
traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(),
|
||||
traverseSession->getDictionaryStructurePolicy()
|
||||
->getShortcutPositionOfPtNode(terminalDicNode->getPos()));
|
||||
->getShortcutPositionOfPtNode(terminalDicNode->getPtNodePos()));
|
||||
// Shortcut is not supported for multiple words suggestions.
|
||||
// TODO: Check shortcuts during traversal for multiple words suggestions.
|
||||
const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode);
|
||||
@ -421,15 +421,15 @@ void Suggest::expandCurrentDicNodes(DicTraverseSession *traverseSession) const {
|
||||
}
|
||||
break;
|
||||
case UNRELATED_CHAR:
|
||||
// Just drop this node and do nothing.
|
||||
// Just drop this dicNode and do nothing.
|
||||
break;
|
||||
default:
|
||||
// Just drop this node and do nothing.
|
||||
// Just drop this dicNode and do nothing.
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Push the node for look-ahead correction
|
||||
// Push the dicNode for look-ahead correction
|
||||
if (allowsErrorCorrections && canDoLookAheadCorrection) {
|
||||
traverseSession->getDicTraverseCache()->copyPushNextActive(&dicNode);
|
||||
}
|
||||
@ -442,7 +442,7 @@ void Suggest::processTerminalDicNode(
|
||||
if (dicNode->getCompoundDistance() >= static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) {
|
||||
return;
|
||||
}
|
||||
if (!dicNode->isTerminalWordNode()) {
|
||||
if (!dicNode->isTerminalDicNode()) {
|
||||
return;
|
||||
}
|
||||
if (dicNode->shouldBeFilteredBySafetyNetForBigram()) {
|
||||
@ -463,7 +463,7 @@ void Suggest::processTerminalDicNode(
|
||||
|
||||
/**
|
||||
* Adds the expanded dicNode to the next search priority queue. Also creates an additional next word
|
||||
* (by the space omission error correction) search path if input dicNode is on a terminal node.
|
||||
* (by the space omission error correction) search path if input dicNode is on a terminal.
|
||||
*/
|
||||
void Suggest::processExpandedDicNode(
|
||||
DicTraverseSession *traverseSession, DicNode *dicNode) const {
|
||||
@ -505,7 +505,7 @@ void Suggest::processDicNodeAsSubstitution(DicTraverseSession *traverseSession,
|
||||
processExpandedDicNode(traverseSession, childDicNode);
|
||||
}
|
||||
|
||||
// Process the node codepoint as a digraph. This means that composite glyphs like the German
|
||||
// Process the DicNode codepoint as a digraph. This means that composite glyphs like the German
|
||||
// u-umlaut is expanded to the transliteration "ue". Note that this happens in parallel with
|
||||
// the normal non-digraph traversal, so both "uber" and "ueber" can be corrected to "[u-umlaut]ber".
|
||||
void Suggest::processDicNodeAsDigraph(DicTraverseSession *traverseSession,
|
||||
@ -518,7 +518,7 @@ void Suggest::processDicNodeAsDigraph(DicTraverseSession *traverseSession,
|
||||
/**
|
||||
* Handle the dicNode as an omission error (e.g., ths => this). Skip the current letter and consider
|
||||
* matches for all possible next letters. Note that just skipping the current letter without any
|
||||
* other conditions tends to flood the search dic nodes cache with omission nodes. Instead, check
|
||||
* other conditions tends to flood the search DicNodes cache with omission DicNodes. Instead, check
|
||||
* the possible *next* letters after the omission to better limit search to plausible omissions.
|
||||
* Note that apostrophes are handled as omissions.
|
||||
*/
|
||||
@ -605,7 +605,7 @@ void Suggest::processDicNodeAsTransposition(DicTraverseSession *traverseSession,
|
||||
}
|
||||
|
||||
/**
|
||||
* Weight child node by aligning it to the key
|
||||
* Weight child dicNode by aligning it to the key
|
||||
*/
|
||||
void Suggest::weightChildNode(DicTraverseSession *traverseSession, DicNode *dicNode) const {
|
||||
const int inputSize = traverseSession->getInputSize();
|
||||
|
@ -45,14 +45,14 @@ const int DynamicPatriciaTriePolicy::MAX_DICT_EXTENDED_REGION_SIZE = 1024 * 1024
|
||||
const int DynamicPatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
|
||||
DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE - 1024;
|
||||
|
||||
void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
|
||||
void DynamicPatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
|
||||
DicNodeVector *const childDicNodes) const {
|
||||
if (!dicNode->hasChildren()) {
|
||||
return;
|
||||
}
|
||||
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
|
||||
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
||||
readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPos());
|
||||
readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos());
|
||||
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
|
||||
while (!readingHelper.isEnd()) {
|
||||
childDicNodes->pushLeavingChild(dicNode, nodeReader->getHeadPos(),
|
||||
@ -107,7 +107,7 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun
|
||||
return codePointCount;
|
||||
}
|
||||
|
||||
int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
|
||||
int DynamicPatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
|
||||
const int length, const bool forceLowerCaseSearch) const {
|
||||
int searchCodePoints[length];
|
||||
for (int i = 0; i < length; ++i) {
|
||||
@ -246,12 +246,12 @@ bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int
|
||||
AKLOGE("The dictionary is too large to dynamically update.");
|
||||
return false;
|
||||
}
|
||||
const int word0Pos = getTerminalNodePositionOfWord(word0, length0,
|
||||
const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
|
||||
false /* forceLowerCaseSearch */);
|
||||
if (word0Pos == NOT_A_DICT_POS) {
|
||||
return false;
|
||||
}
|
||||
const int word1Pos = getTerminalNodePositionOfWord(word1, length1,
|
||||
const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1,
|
||||
false /* forceLowerCaseSearch */);
|
||||
if (word1Pos == NOT_A_DICT_POS) {
|
||||
return false;
|
||||
@ -280,12 +280,12 @@ bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const
|
||||
AKLOGE("The dictionary is too large to dynamically update.");
|
||||
return false;
|
||||
}
|
||||
const int word0Pos = getTerminalNodePositionOfWord(word0, length0,
|
||||
const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
|
||||
false /* forceLowerCaseSearch */);
|
||||
if (word0Pos == NOT_A_DICT_POS) {
|
||||
return false;
|
||||
}
|
||||
const int word1Pos = getTerminalNodePositionOfWord(word1, length1,
|
||||
const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1,
|
||||
false /* forceLowerCaseSearch */);
|
||||
if (word1Pos == NOT_A_DICT_POS) {
|
||||
return false;
|
||||
|
@ -50,14 +50,14 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||
return 0;
|
||||
}
|
||||
|
||||
void createAndGetAllChildNodes(const DicNode *const dicNode,
|
||||
void createAndGetAllChildDicNodes(const DicNode *const dicNode,
|
||||
DicNodeVector *const childDicNodes) const;
|
||||
|
||||
int getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||
const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints,
|
||||
int *const outUnigramProbability) const;
|
||||
|
||||
int getTerminalNodePositionOfWord(const int *const inWord,
|
||||
int getTerminalPtNodePositionOfWord(const int *const inWord,
|
||||
const int length, const bool forceLowerCaseSearch) const;
|
||||
|
||||
int getProbability(const int unigramProbability, const int bigramProbability) const;
|
||||
|
@ -22,7 +22,7 @@ namespace latinime {
|
||||
|
||||
// To avoid infinite loop caused by invalid or malicious forward links.
|
||||
const int DynamicPatriciaTrieReadingHelper::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
|
||||
const int DynamicPatriciaTrieReadingHelper::MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
|
||||
const int DynamicPatriciaTrieReadingHelper::MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
|
||||
const size_t DynamicPatriciaTrieReadingHelper::MAX_READING_STATE_STACK_SIZE = MAX_WORD_LENGTH;
|
||||
|
||||
// Visits all PtNodes in post-order depth first manner.
|
||||
@ -170,35 +170,41 @@ void DynamicPatriciaTrieReadingHelper::nextPtNodeArray() {
|
||||
mReadingState.mPos = NOT_A_DICT_POS;
|
||||
return;
|
||||
}
|
||||
mReadingState.mPosOfLastPtNodeArrayHead = mReadingState.mPos;
|
||||
mReadingState.mPosOfThisPtNodeArrayHead = mReadingState.mPos;
|
||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos);
|
||||
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
|
||||
if (usesAdditionalBuffer) {
|
||||
mReadingState.mPos -= mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
mReadingState.mNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
|
||||
dictBuf, &mReadingState.mPos);
|
||||
mReadingState.mRemainingPtNodeCountInThisArray =
|
||||
PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(dictBuf,
|
||||
&mReadingState.mPos);
|
||||
if (usesAdditionalBuffer) {
|
||||
mReadingState.mPos += mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
// Count up nodes and node arrays to avoid infinite loop.
|
||||
mReadingState.mTotalNodeCount += mReadingState.mNodeCount;
|
||||
mReadingState.mNodeArrayCount++;
|
||||
if (mReadingState.mNodeCount < 0
|
||||
|| mReadingState.mTotalNodeCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP
|
||||
|| mReadingState.mNodeArrayCount > MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) {
|
||||
mReadingState.mTotalPtNodeIndexInThisArrayChain +=
|
||||
mReadingState.mRemainingPtNodeCountInThisArray;
|
||||
mReadingState.mPtNodeArrayIndexInThisArrayChain++;
|
||||
if (mReadingState.mRemainingPtNodeCountInThisArray < 0
|
||||
|| mReadingState.mTotalPtNodeIndexInThisArrayChain
|
||||
> MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP
|
||||
|| mReadingState.mPtNodeArrayIndexInThisArrayChain
|
||||
> MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) {
|
||||
// Invalid dictionary.
|
||||
AKLOGI("Invalid dictionary. nodeCount: %d, totalNodeCount: %d, MAX_CHILD_COUNT: %d"
|
||||
"nodeArrayCount: %d, MAX_NODE_ARRAY_COUNT: %d",
|
||||
mReadingState.mNodeCount, mReadingState.mTotalNodeCount,
|
||||
MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP, mReadingState.mNodeArrayCount,
|
||||
MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP);
|
||||
mReadingState.mRemainingPtNodeCountInThisArray,
|
||||
mReadingState.mTotalPtNodeIndexInThisArrayChain,
|
||||
MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP,
|
||||
mReadingState.mPtNodeArrayIndexInThisArrayChain,
|
||||
MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP);
|
||||
ASSERT(false);
|
||||
mIsError = true;
|
||||
mReadingState.mPos = NOT_A_DICT_POS;
|
||||
return;
|
||||
}
|
||||
if (mReadingState.mNodeCount == 0) {
|
||||
if (mReadingState.mRemainingPtNodeCountInThisArray == 0) {
|
||||
// Empty node array. Try following forward link.
|
||||
followForwardLink();
|
||||
}
|
||||
|
@ -84,9 +84,9 @@ class DynamicPatriciaTrieReadingHelper {
|
||||
} else {
|
||||
mIsError = false;
|
||||
mReadingState.mPos = ptNodeArrayPos;
|
||||
mReadingState.mPrevTotalCodePointCount = 0;
|
||||
mReadingState.mTotalNodeCount = 0;
|
||||
mReadingState.mNodeArrayCount = 0;
|
||||
mReadingState.mTotalCodePointCountSinceInitialization = 0;
|
||||
mReadingState.mTotalPtNodeIndexInThisArrayChain = 0;
|
||||
mReadingState.mPtNodeArrayIndexInThisArrayChain = 0;
|
||||
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
||||
mReadingStateStack.clear();
|
||||
nextPtNodeArray();
|
||||
@ -103,12 +103,12 @@ class DynamicPatriciaTrieReadingHelper {
|
||||
} else {
|
||||
mIsError = false;
|
||||
mReadingState.mPos = ptNodePos;
|
||||
mReadingState.mNodeCount = 1;
|
||||
mReadingState.mPrevTotalCodePointCount = 0;
|
||||
mReadingState.mTotalNodeCount = 1;
|
||||
mReadingState.mNodeArrayCount = 1;
|
||||
mReadingState.mRemainingPtNodeCountInThisArray = 1;
|
||||
mReadingState.mTotalCodePointCountSinceInitialization = 0;
|
||||
mReadingState.mTotalPtNodeIndexInThisArrayChain = 1;
|
||||
mReadingState.mPtNodeArrayIndexInThisArrayChain = 1;
|
||||
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
||||
mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS;
|
||||
mReadingState.mPosOfThisPtNodeArrayHead = NOT_A_DICT_POS;
|
||||
mReadingStateStack.clear();
|
||||
fetchPtNodeInfo();
|
||||
}
|
||||
@ -128,12 +128,13 @@ class DynamicPatriciaTrieReadingHelper {
|
||||
|
||||
// Return code point count exclude the last read node's code points.
|
||||
AK_FORCE_INLINE int getPrevTotalCodePointCount() const {
|
||||
return mReadingState.mPrevTotalCodePointCount;
|
||||
return mReadingState.mTotalCodePointCountSinceInitialization;
|
||||
}
|
||||
|
||||
// Return code point count include the last read node's code points.
|
||||
AK_FORCE_INLINE int getTotalCodePointCount() const {
|
||||
return mReadingState.mPrevTotalCodePointCount + mNodeReader.getCodePointCount();
|
||||
return mReadingState.mTotalCodePointCountSinceInitialization
|
||||
+ mNodeReader.getCodePointCount();
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE void fetchMergedNodeCodePointsInReverseOrder(
|
||||
@ -149,9 +150,9 @@ class DynamicPatriciaTrieReadingHelper {
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE void readNextSiblingNode() {
|
||||
mReadingState.mNodeCount -= 1;
|
||||
mReadingState.mRemainingPtNodeCountInThisArray -= 1;
|
||||
mReadingState.mPos = mNodeReader.getSiblingNodePos();
|
||||
if (mReadingState.mNodeCount <= 0) {
|
||||
if (mReadingState.mRemainingPtNodeCountInThisArray <= 0) {
|
||||
// All nodes in the current node array have been read.
|
||||
followForwardLink();
|
||||
if (!isEnd()) {
|
||||
@ -165,9 +166,10 @@ class DynamicPatriciaTrieReadingHelper {
|
||||
// Read the first child node of the current node.
|
||||
AK_FORCE_INLINE void readChildNode() {
|
||||
if (mNodeReader.hasChildren()) {
|
||||
mReadingState.mPrevTotalCodePointCount += mNodeReader.getCodePointCount();
|
||||
mReadingState.mTotalNodeCount = 0;
|
||||
mReadingState.mNodeArrayCount = 0;
|
||||
mReadingState.mTotalCodePointCountSinceInitialization +=
|
||||
mNodeReader.getCodePointCount();
|
||||
mReadingState.mTotalPtNodeIndexInThisArrayChain = 0;
|
||||
mReadingState.mPtNodeArrayIndexInThisArrayChain = 0;
|
||||
mReadingState.mPos = mNodeReader.getChildrenPos();
|
||||
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
||||
// Read children node array.
|
||||
@ -183,13 +185,14 @@ class DynamicPatriciaTrieReadingHelper {
|
||||
// Read the parent node of the current node.
|
||||
AK_FORCE_INLINE void readParentNode() {
|
||||
if (mNodeReader.getParentPos() != NOT_A_DICT_POS) {
|
||||
mReadingState.mPrevTotalCodePointCount += mNodeReader.getCodePointCount();
|
||||
mReadingState.mTotalNodeCount = 1;
|
||||
mReadingState.mNodeArrayCount = 1;
|
||||
mReadingState.mNodeCount = 1;
|
||||
mReadingState.mTotalCodePointCountSinceInitialization +=
|
||||
mNodeReader.getCodePointCount();
|
||||
mReadingState.mTotalPtNodeIndexInThisArrayChain = 1;
|
||||
mReadingState.mPtNodeArrayIndexInThisArrayChain = 1;
|
||||
mReadingState.mRemainingPtNodeCountInThisArray = 1;
|
||||
mReadingState.mPos = mNodeReader.getParentPos();
|
||||
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
||||
mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS;
|
||||
mReadingState.mPosOfThisPtNodeArrayHead = NOT_A_DICT_POS;
|
||||
fetchPtNodeInfo();
|
||||
} else {
|
||||
mReadingState.mPos = NOT_A_DICT_POS;
|
||||
@ -201,7 +204,7 @@ class DynamicPatriciaTrieReadingHelper {
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE int getPosOfLastPtNodeArrayHead() const {
|
||||
return mReadingState.mPosOfLastPtNodeArrayHead;
|
||||
return mReadingState.mPosOfThisPtNodeArrayHead;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE void reloadCurrentPtNodeInfo() {
|
||||
@ -218,35 +221,41 @@ class DynamicPatriciaTrieReadingHelper {
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieReadingHelper);
|
||||
|
||||
class ReadingState {
|
||||
// This class encapsulates the reading state of a position in the dictionary. It points at a
|
||||
// specific PtNode in the dictionary.
|
||||
class PtNodeReadingState {
|
||||
public:
|
||||
// Note that copy constructor and assignment operator are used for this class to use
|
||||
// std::vector.
|
||||
ReadingState() : mPos(NOT_A_DICT_POS), mNodeCount(0), mPrevTotalCodePointCount(0),
|
||||
mTotalNodeCount(0), mNodeArrayCount(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS),
|
||||
mPosOfLastPtNodeArrayHead(NOT_A_DICT_POS) {}
|
||||
PtNodeReadingState() : mPos(NOT_A_DICT_POS), mRemainingPtNodeCountInThisArray(0),
|
||||
mTotalCodePointCountSinceInitialization(0), mTotalPtNodeIndexInThisArrayChain(0),
|
||||
mPtNodeArrayIndexInThisArrayChain(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS),
|
||||
mPosOfThisPtNodeArrayHead(NOT_A_DICT_POS) {}
|
||||
|
||||
int mPos;
|
||||
// Node count of a node array.
|
||||
int mNodeCount;
|
||||
int mPrevTotalCodePointCount;
|
||||
int mTotalNodeCount;
|
||||
int mNodeArrayCount;
|
||||
// Remaining node count in the current array.
|
||||
int mRemainingPtNodeCountInThisArray;
|
||||
int mTotalCodePointCountSinceInitialization;
|
||||
// Counter of PtNodes used to avoid infinite loops caused by broken or malicious links.
|
||||
int mTotalPtNodeIndexInThisArrayChain;
|
||||
// Counter of PtNode arrays used to avoid infinite loops caused by cyclic links of empty
|
||||
// PtNode arrays.
|
||||
int mPtNodeArrayIndexInThisArrayChain;
|
||||
int mPosOfLastForwardLinkField;
|
||||
int mPosOfLastPtNodeArrayHead;
|
||||
int mPosOfThisPtNodeArrayHead;
|
||||
};
|
||||
|
||||
static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP;
|
||||
static const int MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP;
|
||||
static const int MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP;
|
||||
static const size_t MAX_READING_STATE_STACK_SIZE;
|
||||
|
||||
// TODO: Introduce error code to track what caused the error.
|
||||
bool mIsError;
|
||||
ReadingState mReadingState;
|
||||
PtNodeReadingState mReadingState;
|
||||
const BufferWithExtendableBuffer *const mBuffer;
|
||||
DynamicPatriciaTrieNodeReader mNodeReader;
|
||||
int mMergedNodeCodePoints[MAX_WORD_LENGTH];
|
||||
std::vector<ReadingState> mReadingStateStack;
|
||||
std::vector<PtNodeReadingState> mReadingStateStack;
|
||||
|
||||
void nextPtNodeArray();
|
||||
|
||||
|
@ -25,12 +25,12 @@
|
||||
|
||||
namespace latinime {
|
||||
|
||||
void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
|
||||
void PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
|
||||
DicNodeVector *const childDicNodes) const {
|
||||
if (!dicNode->hasChildren()) {
|
||||
return;
|
||||
}
|
||||
int nextPos = dicNode->getChildrenPos();
|
||||
int nextPos = dicNode->getChildrenPtNodeArrayPos();
|
||||
if (nextPos < 0 || nextPos >= mDictBufferSize) {
|
||||
AKLOGE("Children PtNode array position is invalid. pos: %d, dict size: %d",
|
||||
nextPos, mDictBufferSize);
|
||||
@ -52,14 +52,14 @@ void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
|
||||
|
||||
// This retrieves code points and the probability of the word by its terminal position.
|
||||
// Due to the fact that words are ordered in the dictionary in a strict breadth-first order,
|
||||
// it is possible to check for this with advantageous complexity. For each node, we search
|
||||
// it is possible to check for this with advantageous complexity. For each PtNode array, we search
|
||||
// for PtNodes with children and compare the children position with the position we look for.
|
||||
// When we shoot the position we look for, it means the word we look for is in the children
|
||||
// of the previous PtNode. The only tricky part is the fact that if we arrive at the end of a
|
||||
// PtNode array with the last PtNode's children position still less than what we are searching for,
|
||||
// we must descend the last PtNode's children (for example, if the word we are searching for starts
|
||||
// with a z, it's the last PtNode of the root array, so all children addresses will be smaller
|
||||
// than the position we look for, and we have to descend the z node).
|
||||
// than the position we look for, and we have to descend the z PtNode).
|
||||
/* Parameters :
|
||||
* ptNodePos: the byte position of the terminal PtNode of the word we are searching for (this is
|
||||
* what is stored as the "bigram position" in each bigram)
|
||||
@ -74,9 +74,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||
int pos = getRootPosition();
|
||||
int wordPos = 0;
|
||||
// One iteration of the outer loop iterates through PtNode arrays. As stated above, we will
|
||||
// only traverse nodes that are actually a part of the terminal we are searching, so each time
|
||||
// we enter this loop we are one depth level further than last time.
|
||||
// The only reason we count nodes is because we want to reduce the probability of infinite
|
||||
// only traverse PtNodes that are actually a part of the terminal we are searching, so each
|
||||
// time we enter this loop we are one depth level further than last time.
|
||||
// The only reason we count PtNodes is because we want to reduce the probability of infinite
|
||||
// looping in case there is a bug. Since we know there is an upper bound to the depth we are
|
||||
// supposed to traverse, it does not hurt to count iterations.
|
||||
for (int loopCount = maxCodePointCount; loopCount > 0; --loopCount) {
|
||||
@ -140,8 +140,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||
found = true;
|
||||
} else if (1 >= ptNodeCount) {
|
||||
// However if we are on the LAST PtNode of this array, and we have NOT shot the
|
||||
// position we should descend THIS node. So we trick the lastCandidatePtNodePos
|
||||
// so that we will descend this PtNode, not the previous one.
|
||||
// position we should descend THIS PtNode. So we trick the
|
||||
// lastCandidatePtNodePos so that we will descend this PtNode, not the previous
|
||||
// one.
|
||||
lastCandidatePtNodePos = startPos;
|
||||
found = true;
|
||||
} else {
|
||||
@ -149,7 +150,7 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||
found = false;
|
||||
}
|
||||
} else {
|
||||
// Even if we don't have children here, we could still be on the last PtNode of /
|
||||
// Even if we don't have children here, we could still be on the last PtNode of
|
||||
// this array. If this is the case, we should descend the last PtNode that had
|
||||
// children, and their position is already in lastCandidatePtNodePos.
|
||||
found = (1 >= ptNodeCount);
|
||||
@ -230,9 +231,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||
return 0;
|
||||
}
|
||||
|
||||
// This function gets the position of the terminal node of the exact matching word in the
|
||||
// This function gets the position of the terminal PtNode of the exact matching word in the
|
||||
// dictionary. If no match is found, it returns NOT_A_DICT_POS.
|
||||
int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
|
||||
int PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
|
||||
const int length, const bool forceLowerCaseSearch) const {
|
||||
int pos = getRootPosition();
|
||||
int wordPos = 0;
|
||||
|
@ -47,14 +47,14 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||
return 0;
|
||||
}
|
||||
|
||||
void createAndGetAllChildNodes(const DicNode *const dicNode,
|
||||
void createAndGetAllChildDicNodes(const DicNode *const dicNode,
|
||||
DicNodeVector *const childDicNodes) const;
|
||||
|
||||
int getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||
const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints,
|
||||
int *const outUnigramProbability) const;
|
||||
|
||||
int getTerminalNodePositionOfWord(const int *const inWord,
|
||||
int getTerminalPtNodePositionOfWord(const int *const inWord,
|
||||
const int length, const bool forceLowerCaseSearch) const;
|
||||
|
||||
int getProbability(const int unigramProbability, const int bigramProbability) const;
|
||||
|
@ -81,7 +81,7 @@ class TypingTraversal : public Traversal {
|
||||
return false;
|
||||
}
|
||||
const int point0Index = dicNode->getInputIndex(0);
|
||||
return dicNode->isTerminalWordNode()
|
||||
return dicNode->isTerminalDicNode()
|
||||
&& traverseSession->getProximityInfoState(0)->
|
||||
hasSpaceProximity(point0Index);
|
||||
}
|
||||
@ -96,7 +96,7 @@ class TypingTraversal : public Traversal {
|
||||
if (dicNode->isCompletion(inputSize)) {
|
||||
return false;
|
||||
}
|
||||
if (!dicNode->isTerminalWordNode()) {
|
||||
if (!dicNode->isTerminalDicNode()) {
|
||||
return false;
|
||||
}
|
||||
const int16_t pointIndex = dicNode->getInputIndex(0);
|
||||
|
Loading…
Reference in New Issue
Block a user