mirror of
https://gitlab.futo.org/keyboard/latinime.git
synced 2024-09-28 14:54:30 +01:00
This is a manual cherrypick of Ib729ceedbc8ef837e50490439817b36039ae2b4e. Bug: 11772864 Change-Id: I5ecbe729dbdd24e194e48b4d68b17af8549c4726
600 lines
25 KiB
Java
600 lines
25 KiB
Java
/*
|
|
* Copyright (C) 2012 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
package com.android.inputmethod.latin.makedict;
|
|
|
|
import com.android.inputmethod.annotations.UsedForTesting;
|
|
import com.android.inputmethod.latin.Constants;
|
|
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
|
|
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
|
|
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
|
|
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
|
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
|
|
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
|
import com.android.inputmethod.latin.utils.ByteArrayDictBuffer;
|
|
|
|
import java.io.File;
|
|
import java.io.FileInputStream;
|
|
import java.io.FileNotFoundException;
|
|
import java.io.IOException;
|
|
import java.io.OutputStream;
|
|
import java.util.ArrayList;
|
|
import java.util.Iterator;
|
|
import java.util.Map;
|
|
import java.util.Stack;
|
|
|
|
public final class BinaryDictIOUtils {
|
|
private static final boolean DBG = false;
|
|
|
|
private BinaryDictIOUtils() {
|
|
// This utility class is not publicly instantiable.
|
|
}
|
|
|
|
private static final class Position {
|
|
public static final int NOT_READ_PTNODE_COUNT = -1;
|
|
|
|
public int mAddress;
|
|
public int mNumOfPtNode;
|
|
public int mPosition;
|
|
public int mLength;
|
|
|
|
public Position(int address, int length) {
|
|
mAddress = address;
|
|
mLength = length;
|
|
mNumOfPtNode = NOT_READ_PTNODE_COUNT;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Retrieves all node arrays without recursive call.
|
|
*/
|
|
private static void readUnigramsAndBigramsBinaryInner(final DictDecoder dictDecoder,
|
|
final int headerSize, final Map<Integer, String> words,
|
|
final Map<Integer, Integer> frequencies,
|
|
final Map<Integer, ArrayList<PendingAttribute>> bigrams,
|
|
final FormatOptions formatOptions) {
|
|
int[] pushedChars = new int[FormatSpec.MAX_WORD_LENGTH + 1];
|
|
|
|
Stack<Position> stack = new Stack<Position>();
|
|
int index = 0;
|
|
|
|
Position initPos = new Position(headerSize, 0);
|
|
stack.push(initPos);
|
|
|
|
while (!stack.empty()) {
|
|
Position p = stack.peek();
|
|
|
|
if (DBG) {
|
|
MakedictLog.d("read: address=" + p.mAddress + ", numOfPtNode=" +
|
|
p.mNumOfPtNode + ", position=" + p.mPosition + ", length=" + p.mLength);
|
|
}
|
|
|
|
if (dictDecoder.getPosition() != p.mAddress) dictDecoder.setPosition(p.mAddress);
|
|
if (index != p.mLength) index = p.mLength;
|
|
|
|
if (p.mNumOfPtNode == Position.NOT_READ_PTNODE_COUNT) {
|
|
p.mNumOfPtNode = dictDecoder.readPtNodeCount();
|
|
p.mAddress += getPtNodeCountSize(p.mNumOfPtNode);
|
|
p.mPosition = 0;
|
|
}
|
|
if (p.mNumOfPtNode == 0) {
|
|
stack.pop();
|
|
continue;
|
|
}
|
|
PtNodeInfo info = dictDecoder.readPtNode(p.mAddress, formatOptions);
|
|
for (int i = 0; i < info.mCharacters.length; ++i) {
|
|
pushedChars[index++] = info.mCharacters[i];
|
|
}
|
|
p.mPosition++;
|
|
|
|
final boolean isMovedPtNode = isMovedPtNode(info.mFlags,
|
|
formatOptions);
|
|
final boolean isDeletedPtNode = isDeletedPtNode(info.mFlags,
|
|
formatOptions);
|
|
if (!isMovedPtNode && !isDeletedPtNode
|
|
&& info.mFrequency != FusionDictionary.PtNode.NOT_A_TERMINAL) {// found word
|
|
words.put(info.mOriginalAddress, new String(pushedChars, 0, index));
|
|
frequencies.put(info.mOriginalAddress, info.mFrequency);
|
|
if (info.mBigrams != null) bigrams.put(info.mOriginalAddress, info.mBigrams);
|
|
}
|
|
|
|
if (p.mPosition == p.mNumOfPtNode) {
|
|
if (formatOptions.mSupportsDynamicUpdate) {
|
|
final boolean hasValidForwardLinkAddress =
|
|
dictDecoder.readAndFollowForwardLink();
|
|
if (hasValidForwardLinkAddress && dictDecoder.hasNextPtNodeArray()) {
|
|
// The node array has a forward link.
|
|
p.mNumOfPtNode = Position.NOT_READ_PTNODE_COUNT;
|
|
p.mAddress = dictDecoder.getPosition();
|
|
} else {
|
|
stack.pop();
|
|
}
|
|
} else {
|
|
stack.pop();
|
|
}
|
|
} else {
|
|
// The Ptnode array has more PtNodes.
|
|
p.mAddress = dictDecoder.getPosition();
|
|
}
|
|
|
|
if (!isMovedPtNode && hasChildrenAddress(info.mChildrenAddress)) {
|
|
final Position childrenPos = new Position(info.mChildrenAddress, index);
|
|
stack.push(childrenPos);
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Reads unigrams and bigrams from the binary file.
|
|
* Doesn't store a full memory representation of the dictionary.
|
|
*
|
|
* @param dictDecoder the dict decoder.
|
|
* @param words the map to store the address as a key and the word as a value.
|
|
* @param frequencies the map to store the address as a key and the frequency as a value.
|
|
* @param bigrams the map to store the address as a key and the list of address as a value.
|
|
* @throws IOException if the file can't be read.
|
|
* @throws UnsupportedFormatException if the format of the file is not recognized.
|
|
*/
|
|
/* package */ static void readUnigramsAndBigramsBinary(final DictDecoder dictDecoder,
|
|
final Map<Integer, String> words, final Map<Integer, Integer> frequencies,
|
|
final Map<Integer, ArrayList<PendingAttribute>> bigrams) throws IOException,
|
|
UnsupportedFormatException {
|
|
// Read header
|
|
final FileHeader header = dictDecoder.readHeader();
|
|
readUnigramsAndBigramsBinaryInner(dictDecoder, header.mHeaderSize, words,
|
|
frequencies, bigrams, header.mFormatOptions);
|
|
}
|
|
|
|
/**
|
|
* Gets the address of the last PtNode of the exact matching word in the dictionary.
|
|
* If no match is found, returns NOT_VALID_WORD.
|
|
*
|
|
* @param dictDecoder the dict decoder.
|
|
* @param word the word we search for.
|
|
* @return the address of the terminal node.
|
|
* @throws IOException if the file can't be read.
|
|
* @throws UnsupportedFormatException if the format of the file is not recognized.
|
|
*/
|
|
@UsedForTesting
|
|
/* package */ static int getTerminalPosition(final DictDecoder dictDecoder,
|
|
final String word) throws IOException, UnsupportedFormatException {
|
|
if (word == null) return FormatSpec.NOT_VALID_WORD;
|
|
dictDecoder.setPosition(0);
|
|
|
|
final FileHeader header = dictDecoder.readHeader();
|
|
int wordPos = 0;
|
|
final int wordLen = word.codePointCount(0, word.length());
|
|
for (int depth = 0; depth < Constants.DICTIONARY_MAX_WORD_LENGTH; ++depth) {
|
|
if (wordPos >= wordLen) return FormatSpec.NOT_VALID_WORD;
|
|
|
|
do {
|
|
final int ptNodeCount = dictDecoder.readPtNodeCount();
|
|
boolean foundNextPtNode = false;
|
|
for (int i = 0; i < ptNodeCount; ++i) {
|
|
final int ptNodePos = dictDecoder.getPosition();
|
|
final PtNodeInfo currentInfo = dictDecoder.readPtNode(ptNodePos,
|
|
header.mFormatOptions);
|
|
final boolean isMovedNode = isMovedPtNode(currentInfo.mFlags,
|
|
header.mFormatOptions);
|
|
final boolean isDeletedNode = isDeletedPtNode(currentInfo.mFlags,
|
|
header.mFormatOptions);
|
|
if (isMovedNode) continue;
|
|
boolean same = true;
|
|
for (int p = 0, j = word.offsetByCodePoints(0, wordPos);
|
|
p < currentInfo.mCharacters.length;
|
|
++p, j = word.offsetByCodePoints(j, 1)) {
|
|
if (wordPos + p >= wordLen
|
|
|| word.codePointAt(j) != currentInfo.mCharacters[p]) {
|
|
same = false;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (same) {
|
|
// found the PtNode matches the word.
|
|
if (wordPos + currentInfo.mCharacters.length == wordLen) {
|
|
if (currentInfo.mFrequency == PtNode.NOT_A_TERMINAL
|
|
|| isDeletedNode) {
|
|
return FormatSpec.NOT_VALID_WORD;
|
|
} else {
|
|
return ptNodePos;
|
|
}
|
|
}
|
|
wordPos += currentInfo.mCharacters.length;
|
|
if (currentInfo.mChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS) {
|
|
return FormatSpec.NOT_VALID_WORD;
|
|
}
|
|
foundNextPtNode = true;
|
|
dictDecoder.setPosition(currentInfo.mChildrenAddress);
|
|
break;
|
|
}
|
|
}
|
|
|
|
// If we found the next PtNode, it is under the file pointer.
|
|
// But if not, we are at the end of this node array so we expect to have
|
|
// a forward link address that we need to consult and possibly resume
|
|
// search on the next node array in the linked list.
|
|
if (foundNextPtNode) break;
|
|
if (!header.mFormatOptions.mSupportsDynamicUpdate) {
|
|
return FormatSpec.NOT_VALID_WORD;
|
|
}
|
|
|
|
final boolean hasValidForwardLinkAddress =
|
|
dictDecoder.readAndFollowForwardLink();
|
|
if (!hasValidForwardLinkAddress || !dictDecoder.hasNextPtNodeArray()) {
|
|
return FormatSpec.NOT_VALID_WORD;
|
|
}
|
|
} while(true);
|
|
}
|
|
return FormatSpec.NOT_VALID_WORD;
|
|
}
|
|
|
|
/**
|
|
* @return the size written, in bytes. Always 3 bytes.
|
|
*/
|
|
static int writeSInt24ToBuffer(final DictBuffer dictBuffer,
|
|
final int value) {
|
|
final int absValue = Math.abs(value);
|
|
dictBuffer.put((byte)(((value < 0 ? 0x80 : 0) | (absValue >> 16)) & 0xFF));
|
|
dictBuffer.put((byte)((absValue >> 8) & 0xFF));
|
|
dictBuffer.put((byte)(absValue & 0xFF));
|
|
return 3;
|
|
}
|
|
|
|
/**
|
|
* @return the size written, in bytes. Always 3 bytes.
|
|
*/
|
|
static int writeSInt24ToStream(final OutputStream destination, final int value)
|
|
throws IOException {
|
|
final int absValue = Math.abs(value);
|
|
destination.write((byte)(((value < 0 ? 0x80 : 0) | (absValue >> 16)) & 0xFF));
|
|
destination.write((byte)((absValue >> 8) & 0xFF));
|
|
destination.write((byte)(absValue & 0xFF));
|
|
return 3;
|
|
}
|
|
|
|
/**
|
|
* @return the size written, in bytes. 1, 2, or 3 bytes.
|
|
*/
|
|
private static int writeVariableAddress(final OutputStream destination, final int value)
|
|
throws IOException {
|
|
switch (BinaryDictEncoderUtils.getByteSize(value)) {
|
|
case 1:
|
|
destination.write((byte)value);
|
|
break;
|
|
case 2:
|
|
destination.write((byte)(0xFF & (value >> 8)));
|
|
destination.write((byte)(0xFF & value));
|
|
break;
|
|
case 3:
|
|
destination.write((byte)(0xFF & (value >> 16)));
|
|
destination.write((byte)(0xFF & (value >> 8)));
|
|
destination.write((byte)(0xFF & value));
|
|
break;
|
|
}
|
|
return BinaryDictEncoderUtils.getByteSize(value);
|
|
}
|
|
|
|
static void skipString(final DictBuffer dictBuffer,
|
|
final boolean hasMultipleChars) {
|
|
if (hasMultipleChars) {
|
|
int character = CharEncoding.readChar(dictBuffer);
|
|
while (character != FormatSpec.INVALID_CHARACTER) {
|
|
character = CharEncoding.readChar(dictBuffer);
|
|
}
|
|
} else {
|
|
CharEncoding.readChar(dictBuffer);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Write a string to a stream.
|
|
*
|
|
* @param destination the stream to write.
|
|
* @param word the string to be written.
|
|
* @return the size written, in bytes.
|
|
* @throws IOException
|
|
*/
|
|
private static int writeString(final OutputStream destination, final String word)
|
|
throws IOException {
|
|
int size = 0;
|
|
final int length = word.length();
|
|
for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) {
|
|
final int codePoint = word.codePointAt(i);
|
|
if (CharEncoding.getCharSize(codePoint) == 1) {
|
|
destination.write((byte)codePoint);
|
|
size++;
|
|
} else {
|
|
destination.write((byte)(0xFF & (codePoint >> 16)));
|
|
destination.write((byte)(0xFF & (codePoint >> 8)));
|
|
destination.write((byte)(0xFF & codePoint));
|
|
size += 3;
|
|
}
|
|
}
|
|
destination.write((byte)FormatSpec.PTNODE_CHARACTERS_TERMINATOR);
|
|
size += FormatSpec.PTNODE_TERMINATOR_SIZE;
|
|
return size;
|
|
}
|
|
|
|
/**
|
|
* Write a PtNode to an output stream from a PtNodeInfo.
|
|
* A PtNode is an in-memory representation of a node in the patricia trie.
|
|
* A PtNode info is a container for low-level information about how the
|
|
* PtNode is stored in the binary format.
|
|
*
|
|
* @param destination the stream to write.
|
|
* @param info the PtNode info to be written.
|
|
* @return the size written, in bytes.
|
|
*/
|
|
private static int writePtNode(final OutputStream destination, final PtNodeInfo info)
|
|
throws IOException {
|
|
int size = FormatSpec.PTNODE_FLAGS_SIZE;
|
|
destination.write((byte)info.mFlags);
|
|
final int parentOffset = info.mParentAddress == FormatSpec.NO_PARENT_ADDRESS ?
|
|
FormatSpec.NO_PARENT_ADDRESS : info.mParentAddress - info.mOriginalAddress;
|
|
size += writeSInt24ToStream(destination, parentOffset);
|
|
|
|
for (int i = 0; i < info.mCharacters.length; ++i) {
|
|
if (CharEncoding.getCharSize(info.mCharacters[i]) == 1) {
|
|
destination.write((byte)info.mCharacters[i]);
|
|
size++;
|
|
} else {
|
|
size += writeSInt24ToStream(destination, info.mCharacters[i]);
|
|
}
|
|
}
|
|
if (info.mCharacters.length > 1) {
|
|
destination.write((byte)FormatSpec.PTNODE_CHARACTERS_TERMINATOR);
|
|
size++;
|
|
}
|
|
|
|
if ((info.mFlags & FormatSpec.FLAG_IS_TERMINAL) != 0) {
|
|
destination.write((byte)info.mFrequency);
|
|
size++;
|
|
}
|
|
|
|
if (DBG) {
|
|
MakedictLog.d("writePtNode origin=" + info.mOriginalAddress + ", size=" + size
|
|
+ ", child=" + info.mChildrenAddress + ", characters ="
|
|
+ new String(info.mCharacters, 0, info.mCharacters.length));
|
|
}
|
|
final int childrenOffset = info.mChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS ?
|
|
0 : info.mChildrenAddress - (info.mOriginalAddress + size);
|
|
writeSInt24ToStream(destination, childrenOffset);
|
|
size += FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE;
|
|
|
|
if (info.mShortcutTargets != null && info.mShortcutTargets.size() > 0) {
|
|
final int shortcutListSize =
|
|
BinaryDictEncoderUtils.getShortcutListSize(info.mShortcutTargets);
|
|
destination.write((byte)(shortcutListSize >> 8));
|
|
destination.write((byte)(shortcutListSize & 0xFF));
|
|
size += 2;
|
|
final Iterator<WeightedString> shortcutIterator = info.mShortcutTargets.iterator();
|
|
while (shortcutIterator.hasNext()) {
|
|
final WeightedString target = shortcutIterator.next();
|
|
destination.write((byte)BinaryDictEncoderUtils.makeShortcutFlags(
|
|
shortcutIterator.hasNext(), target.mFrequency));
|
|
size++;
|
|
size += writeString(destination, target.mWord);
|
|
}
|
|
}
|
|
|
|
if (info.mBigrams != null) {
|
|
// TODO: Consolidate this code with the code that computes the size of the bigram list
|
|
// in BinaryDictEncoderUtils#computeActualNodeArraySize
|
|
for (int i = 0; i < info.mBigrams.size(); ++i) {
|
|
|
|
final int bigramFrequency = info.mBigrams.get(i).mFrequency;
|
|
int bigramFlags = (i < info.mBigrams.size() - 1)
|
|
? FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT : 0;
|
|
size++;
|
|
final int bigramOffset = info.mBigrams.get(i).mAddress - (info.mOriginalAddress
|
|
+ size);
|
|
bigramFlags |= (bigramOffset < 0) ? FormatSpec.FLAG_BIGRAM_ATTR_OFFSET_NEGATIVE : 0;
|
|
switch (BinaryDictEncoderUtils.getByteSize(bigramOffset)) {
|
|
case 1:
|
|
bigramFlags |= FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_ONEBYTE;
|
|
break;
|
|
case 2:
|
|
bigramFlags |= FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_TWOBYTES;
|
|
break;
|
|
case 3:
|
|
bigramFlags |= FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_THREEBYTES;
|
|
break;
|
|
}
|
|
bigramFlags |= bigramFrequency & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY;
|
|
destination.write((byte)bigramFlags);
|
|
size += writeVariableAddress(destination, Math.abs(bigramOffset));
|
|
}
|
|
}
|
|
return size;
|
|
}
|
|
|
|
/**
|
|
* Compute the size of the PtNode.
|
|
*/
|
|
static int computePtNodeSize(final PtNodeInfo info, final FormatOptions formatOptions) {
|
|
int size = FormatSpec.PTNODE_FLAGS_SIZE + FormatSpec.PARENT_ADDRESS_SIZE
|
|
+ BinaryDictEncoderUtils.getPtNodeCharactersSize(info.mCharacters)
|
|
+ getChildrenAddressSize(info.mFlags, formatOptions);
|
|
if ((info.mFlags & FormatSpec.FLAG_IS_TERMINAL) != 0) {
|
|
size += FormatSpec.PTNODE_FREQUENCY_SIZE;
|
|
}
|
|
if (info.mShortcutTargets != null && !info.mShortcutTargets.isEmpty()) {
|
|
size += BinaryDictEncoderUtils.getShortcutListSize(info.mShortcutTargets);
|
|
}
|
|
if (info.mBigrams != null) {
|
|
for (final PendingAttribute attr : info.mBigrams) {
|
|
size += FormatSpec.PTNODE_FLAGS_SIZE;
|
|
size += BinaryDictEncoderUtils.getByteSize(attr.mAddress);
|
|
}
|
|
}
|
|
return size;
|
|
}
|
|
|
|
/**
|
|
* Write a node array to the stream.
|
|
*
|
|
* @param destination the stream to write.
|
|
* @param infos an array of PtNodeInfo to be written.
|
|
* @return the size written, in bytes.
|
|
* @throws IOException
|
|
*/
|
|
static int writeNodes(final OutputStream destination, final PtNodeInfo[] infos)
|
|
throws IOException {
|
|
int size = getPtNodeCountSize(infos.length);
|
|
switch (getPtNodeCountSize(infos.length)) {
|
|
case 1:
|
|
destination.write((byte)infos.length);
|
|
break;
|
|
case 2:
|
|
final int encodedPtNodeCount =
|
|
infos.length | FormatSpec.LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE_FLAG;
|
|
destination.write((byte)(encodedPtNodeCount >> 8));
|
|
destination.write((byte)(encodedPtNodeCount & 0xFF));
|
|
break;
|
|
default:
|
|
throw new RuntimeException("Invalid node count size.");
|
|
}
|
|
for (final PtNodeInfo info : infos) size += writePtNode(destination, info);
|
|
writeSInt24ToStream(destination, FormatSpec.NO_FORWARD_LINK_ADDRESS);
|
|
return size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
|
|
}
|
|
|
|
private static final int HEADER_READING_BUFFER_SIZE = 16384;
|
|
/**
|
|
* Convenience method to read the header of a binary file.
|
|
*
|
|
* This is quite resource intensive - don't call when performance is critical.
|
|
*
|
|
* @param file The file to read.
|
|
* @param offset The offset in the file where to start reading the data.
|
|
* @param length The length of the data file.
|
|
*/
|
|
private static FileHeader getDictionaryFileHeader(
|
|
final File file, final long offset, final long length)
|
|
throws FileNotFoundException, IOException, UnsupportedFormatException {
|
|
final byte[] buffer = new byte[HEADER_READING_BUFFER_SIZE];
|
|
final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file,
|
|
new DictDecoder.DictionaryBufferFactory() {
|
|
@Override
|
|
public DictBuffer getDictionaryBuffer(File file)
|
|
throws FileNotFoundException, IOException {
|
|
final FileInputStream inStream = new FileInputStream(file);
|
|
try {
|
|
inStream.skip(offset);
|
|
inStream.read(buffer);
|
|
return new ByteArrayDictBuffer(buffer);
|
|
} finally {
|
|
inStream.close();
|
|
}
|
|
}
|
|
}
|
|
);
|
|
return dictDecoder.readHeader();
|
|
}
|
|
|
|
public static FileHeader getDictionaryFileHeaderOrNull(final File file, final long offset,
|
|
final long length) {
|
|
try {
|
|
final FileHeader header = getDictionaryFileHeader(file, offset, length);
|
|
return header;
|
|
} catch (UnsupportedFormatException e) {
|
|
return null;
|
|
} catch (IOException e) {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Helper method to hide the actual value of the no children address.
|
|
*/
|
|
public static boolean hasChildrenAddress(final int address) {
|
|
return FormatSpec.NO_CHILDREN_ADDRESS != address;
|
|
}
|
|
|
|
/**
|
|
* Helper method to check whether the node is moved.
|
|
*/
|
|
public static boolean isMovedPtNode(final int flags, final FormatOptions options) {
|
|
return options.mSupportsDynamicUpdate
|
|
&& ((flags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) == FormatSpec.FLAG_IS_MOVED);
|
|
}
|
|
|
|
/**
|
|
* Helper method to check whether the dictionary can be updated dynamically.
|
|
*/
|
|
public static boolean supportsDynamicUpdate(final FormatOptions options) {
|
|
return options.mVersion >= FormatSpec.FIRST_VERSION_WITH_DYNAMIC_UPDATE
|
|
&& options.mSupportsDynamicUpdate;
|
|
}
|
|
|
|
/**
|
|
* Helper method to check whether the node is deleted.
|
|
*/
|
|
public static boolean isDeletedPtNode(final int flags, final FormatOptions formatOptions) {
|
|
return formatOptions.mSupportsDynamicUpdate
|
|
&& ((flags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) == FormatSpec.FLAG_IS_DELETED);
|
|
}
|
|
|
|
/**
|
|
* Compute the binary size of the node count
|
|
* @param count the node count
|
|
* @return the size of the node count, either 1 or 2 bytes.
|
|
*/
|
|
public static int getPtNodeCountSize(final int count) {
|
|
if (FormatSpec.MAX_PTNODES_FOR_ONE_BYTE_PTNODE_COUNT >= count) {
|
|
return 1;
|
|
} else if (FormatSpec.MAX_PTNODES_IN_A_PT_NODE_ARRAY >= count) {
|
|
return 2;
|
|
} else {
|
|
throw new RuntimeException("Can't have more than "
|
|
+ FormatSpec.MAX_PTNODES_IN_A_PT_NODE_ARRAY + " PtNode in a PtNodeArray (found "
|
|
+ count + ")");
|
|
}
|
|
}
|
|
|
|
static int getChildrenAddressSize(final int optionFlags,
|
|
final FormatOptions formatOptions) {
|
|
if (formatOptions.mSupportsDynamicUpdate) return FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE;
|
|
switch (optionFlags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) {
|
|
case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE:
|
|
return 1;
|
|
case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES:
|
|
return 2;
|
|
case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES:
|
|
return 3;
|
|
case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS:
|
|
default:
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Calculate bigram frequency from compressed value
|
|
*
|
|
* @param unigramFrequency
|
|
* @param bigramFrequency compressed frequency
|
|
* @return approximate bigram frequency
|
|
*/
|
|
public static int reconstructBigramFrequency(final int unigramFrequency,
|
|
final int bigramFrequency) {
|
|
final float stepSize = (FormatSpec.MAX_TERMINAL_FREQUENCY - unigramFrequency)
|
|
/ (1.5f + FormatSpec.MAX_BIGRAM_FREQUENCY);
|
|
final float resultFreqFloat = unigramFrequency + stepSize * (bigramFrequency + 1.0f);
|
|
return (int)resultFreqFloat;
|
|
}
|
|
}
|