Merge "Implement writing empty ver4 dictionary to file."

This commit is contained in:
Keisuke Kuroyanagi 2013-11-18 06:17:46 +00:00 committed by Android (Google) Code Review
commit 4e76fd0e59
15 changed files with 372 additions and 155 deletions

View File

@ -91,6 +91,8 @@ LATIN_IME_CORE_SRC_FILES := \
dynamic_patricia_trie_writing_utils.cpp) \
$(addprefix suggest/policyimpl/dictionary/structure/v4/, \
content/bigram_dict_content.cpp \
content/sparse_table_dict_content.cpp \
ver4_dict_buffers.cpp \
ver4_dict_constants.cpp \
ver4_patricia_trie_node_reader.cpp \
ver4_patricia_trie_node_writer.cpp \
@ -100,6 +102,7 @@ LATIN_IME_CORE_SRC_FILES := \
buffer_with_extendable_buffer.cpp \
byte_array_utils.cpp \
dict_file_writing_utils.cpp \
file_utils.cpp \
forgetting_curve_utils.cpp \
format_utils.cpp \
mmapped_buffer.cpp \

View File

@ -59,6 +59,12 @@ class BigramDictContent : public SparseTableDictContent {
bool copyBigramList(const int bigramListPos, const int toPos);
bool flushToFile(const char *const dictDirPath) const {
return flush(dictDirPath, Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION,
Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION,
Ver4DictConstants::BIGRAM_FILE_EXTENSION);
}
private:
DISALLOW_COPY_AND_ASSIGN(BigramDictContent);

View File

@ -62,6 +62,10 @@ class ProbabilityDictContent : public SingleDictContent {
Ver4DictConstants::PROBABILITY_SIZE, &probabilityWritingPos);
}
bool flushToFile(const char *const dictDirPath) const {
return flush(dictDirPath, Ver4DictConstants::FREQ_FILE_EXTENSION);
}
private:
DISALLOW_COPY_AND_ASSIGN(ProbabilityDictContent);

View File

@ -60,6 +60,12 @@ class ShortcutDictContent : public SparseTableDictContent {
return addressLookupTable->get(terminalId);
}
bool flushToFile(const char *const dictDirPath) const {
return flush(dictDirPath, Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION,
Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION,
Ver4DictConstants::SHORTCUT_FILE_EXTENSION);
}
private:
DISALLOW_COPY_AND_ASSIGN(ShortcutDictContent);
};

View File

@ -21,6 +21,7 @@
#include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
namespace latinime {
@ -54,6 +55,12 @@ class SingleDictContent : public DictContent {
return &mExpandableContentBuffer;
}
bool flush(const char *const dictDirPath, const char *const contentFileName) const {
const BufferWithExtendableBuffer *bufferPtr = &mExpandableContentBuffer;
return DictFileWritingUtils::flushBuffersToFileInDir(dictDirPath, contentFileName,
&bufferPtr, 1 /* bufferCount */);
}
private:
DISALLOW_COPY_AND_ASSIGN(SingleDictContent);

View File

@ -0,0 +1,42 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h"
namespace latinime {
bool SparseTableDictContent::flush(const char *const dictDirPath,
const char *const lookupTableFileName, const char *const addressTableFileName,
const char *const contentFileName) const {
const BufferWithExtendableBuffer *lookupTableBufferPtr = &mExpandableLookupTableBuffer;
if (!DictFileWritingUtils::flushBuffersToFileInDir(dictDirPath, lookupTableFileName,
&lookupTableBufferPtr, 1 /* bufferCount */)) {
return false;
}
const BufferWithExtendableBuffer *addressTableBufferPtr = &mExpandableAddressTableBuffer;
if (!DictFileWritingUtils::flushBuffersToFileInDir(dictDirPath, addressTableFileName,
&addressTableBufferPtr, 1 /* bufferCount */)) {
return false;
}
const BufferWithExtendableBuffer *contentBufferPtr = &mExpandableContentBuffer;
if (!DictFileWritingUtils::flushBuffersToFileInDir(dictDirPath, contentFileName,
&contentBufferPtr, 1 /* bufferCount */)) {
return false;
}
return true;
}
} // namespace latinime

View File

@ -21,6 +21,7 @@
#include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
#include "suggest/policyimpl/dictionary/utils/sparse_table.h"
@ -85,6 +86,9 @@ class SparseTableDictContent : public DictContent {
return &mExpandableContentBuffer;
}
bool flush(const char *const dictDirPath, const char *const lookupTableFileName,
const char *const addressTableFileName, const char *const contentFileName) const;
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(SparseTableDictContent);

View File

@ -67,6 +67,10 @@ class TerminalPositionLookupTable : public SingleDictContent {
return mSize;
}
bool flushToFile(const char *const dictDirPath) const {
return flush(dictDirPath, Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
}
private:
DISALLOW_COPY_AND_ASSIGN(TerminalPositionLookupTable);

View File

@ -0,0 +1,81 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
#include <cerrno>
#include <sys/stat.h>
#include <sys/types.h>
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
#include "suggest/policyimpl/dictionary/utils/file_utils.h"
namespace latinime {
bool Ver4DictBuffers::flush(const char *const dictDirPath) const {
// Create temporary directory.
const int tmpDirPathBufSize = FileUtils::getFilePathWithSuffixBufSize(dictDirPath,
DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE);
char tmpDirPath[tmpDirPathBufSize];
FileUtils::getFilePathWithSuffix(dictDirPath,
DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE, tmpDirPathBufSize,
tmpDirPath);
if (mkdir(tmpDirPath, S_IRWXU) == -1) {
AKLOGE("Cannot create directory: %s. errno: %d.", tmpDirPath, errno);
return false;
}
// Write trie file.
const BufferWithExtendableBuffer *buffers[] =
{&mExpandableHeaderBuffer, &mExpandableTrieBuffer};
if (!DictFileWritingUtils::flushBuffersToFileInDir(tmpDirPath,
Ver4DictConstants::TRIE_FILE_EXTENSION, buffers, 2 /* bufferCount */)) {
AKLOGE("Dictionary trie file %s/%s cannot be written.", tmpDirPath,
Ver4DictConstants::TRIE_FILE_EXTENSION);
return false;
}
// Write dictionary contents.
if (!mTerminalPositionLookupTable.flushToFile(tmpDirPath)) {
AKLOGE("Terminal position lookup table cannot be written. %s", tmpDirPath);
return false;
}
if (!mProbabilityDictContent.flushToFile(tmpDirPath)) {
AKLOGE("Probability dict content cannot be written. %s", tmpDirPath);
return false;
}
if (!mBigramDictContent.flushToFile(tmpDirPath)) {
AKLOGE("Bigram dict content cannot be written. %s", tmpDirPath);
return false;
}
if (!mShortcutDictContent.flushToFile(tmpDirPath)) {
AKLOGE("Shortcut dict content cannot be written. %s", tmpDirPath);
return false;
}
// Remove existing dictionary.
if (!FileUtils::removeDirAndFiles(dictDirPath)) {
AKLOGE("Existing directory %s cannot be removed.", dictDirPath);
ASSERT(false);
return false;
}
// Rename temporary directory.
if (rename(tmpDirPath, dictDirPath) != 0) {
AKLOGE("%s cannot be renamed to %s", tmpDirPath, dictDirPath);
ASSERT(false);
return false;
}
return true;
}
} // namespace latinime

View File

@ -89,10 +89,7 @@ class Ver4DictBuffers {
return mIsUpdatable;
}
bool flush(const char *const dictDirPath) {
// TODO: Implement.
return false;
}
bool flush(const char *const dictDirPath) const;
private:
DISALLOW_COPY_AND_ASSIGN(Ver4DictBuffers);

View File

@ -17,12 +17,12 @@
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
#include <cstdio>
#include <cstring>
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/file_utils.h"
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
namespace latinime {
@ -36,9 +36,9 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
return createEmptyV3DictFile(filePath, attributeMap);
case 4:
return createEmptyV4DictFile(filePath, attributeMap);
return false;
default:
// Only version 3 dictionary is supported for now.
AKLOGE("Cannot create dictionary %s because format version %d is not supported.",
filePath, dictVersion);
return false;
}
}
@ -54,12 +54,13 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
BufferWithExtendableBuffer bodyBuffer(
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
if (!DynamicPatriciaTrieWritingUtils::writeEmptyDictionary(&bodyBuffer, 0 /* rootPos */)) {
AKLOGE("Empty ver3 dictionary structure cannot be created on memory.");
return false;
}
return flushAllHeaderAndBodyToFile(filePath, &headerBuffer, &bodyBuffer);
}
/* static */ bool DictFileWritingUtils::createEmptyV4DictFile(const char *const filePath,
/* static */ bool DictFileWritingUtils::createEmptyV4DictFile(const char *const dirPath,
const HeaderReadWriteUtils::AttributeMap *const attributeMap) {
Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers = Ver4DictBuffers::createVer4DictBuffers();
HeaderPolicy headerPolicy(FormatUtils::VERSION_4, attributeMap);
@ -68,42 +69,59 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
0 /* unigramCount */, 0 /* bigramCount */, 0 /* extendedRegionSize */);
if (!DynamicPatriciaTrieWritingUtils::writeEmptyDictionary(
dictBuffers.get()->getWritableTrieBuffer(), 0 /* rootPos */)) {
AKLOGE("Empty ver4 dictionary structure cannot be created on memory.");
return false;
}
return dictBuffers.get()->flush(filePath);
return dictBuffers.get()->flush(dirPath);
}
/* static */ bool DictFileWritingUtils::flushAllHeaderAndBodyToFile(const char *const filePath,
BufferWithExtendableBuffer *const dictHeader, BufferWithExtendableBuffer *const dictBody) {
const int tmpFileNameBufSize = strlen(filePath)
+ strlen(TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE) + 1 /* terminator */;
const int tmpFileNameBufSize = FileUtils::getFilePathWithSuffixBufSize(filePath,
TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE);
// Name of a temporary file used for writing that is a connected string of original name and
// TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE.
char tmpFileName[tmpFileNameBufSize];
snprintf(tmpFileName, tmpFileNameBufSize, "%s%s", filePath,
TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE);
FILE *const file = fopen(tmpFileName, "wb");
FileUtils::getFilePathWithSuffix(filePath, TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE,
tmpFileNameBufSize, tmpFileName);
const BufferWithExtendableBuffer *buffers[] = {dictHeader, dictBody};
if (!DictFileWritingUtils::flushBuffersToFile(tmpFileName, buffers, 2 /* bufferCount */)) {
AKLOGE("Dictionary structure cannot be written to %s.", tmpFileName);
return false;
}
if (rename(tmpFileName, filePath) != 0) {
AKLOGE("Dictionary file %s cannot be renamed to %s", tmpFileName, filePath);;
}
return true;
}
/* static */ bool DictFileWritingUtils::flushBuffersToFileInDir(const char *const dirPath,
const char *const fileName, const BufferWithExtendableBuffer **const buffers,
const int bufferCount) {
const int filePathBufSize = FileUtils::getFilePathBufSize(dirPath, fileName);
char filePath[filePathBufSize];
FileUtils::getFilePath(dirPath, fileName, filePathBufSize, filePath);
return flushBuffersToFile(filePath, buffers, bufferCount);
}
/* static */ bool DictFileWritingUtils::flushBuffersToFile(const char *const filePath,
const BufferWithExtendableBuffer **const buffers, const int bufferCount) {
FILE *const file = fopen(filePath, "wb");
if (!file) {
AKLOGE("Dictionary file %s cannot be opened.", tmpFileName);
AKLOGE("File %s cannot be opened.", filePath);
ASSERT(false);
return false;
}
// Write the dictionary header.
if (!writeBufferToFile(file, dictHeader)) {
remove(tmpFileName);
AKLOGE("Dictionary header cannot be written. size: %d", dictHeader->getTailPosition());
ASSERT(false);
return false;
}
// Write the dictionary body.
if (!writeBufferToFile(file, dictBody)) {
remove(tmpFileName);
AKLOGE("Dictionary body cannot be written. size: %d", dictBody->getTailPosition());
ASSERT(false);
return false;
for (int i = 0; i < bufferCount; ++i) {
if (!writeBufferToFile(file, buffers[i])) {
remove(filePath);
AKLOGE("Buffer cannot be written to the file %s. size: %d", filePath,
buffers[i]->getTailPosition());
ASSERT(false);
return false;
}
}
fclose(file);
rename(tmpFileName, filePath);
return true;
}

View File

@ -28,6 +28,8 @@ class BufferWithExtendableBuffer;
class DictFileWritingUtils {
public:
static const char *const TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE;
static bool createEmptyDictFile(const char *const filePath, const int dictVersion,
const HeaderReadWriteUtils::AttributeMap *const attributeMap);
@ -35,17 +37,21 @@ class DictFileWritingUtils {
BufferWithExtendableBuffer *const dictHeader,
BufferWithExtendableBuffer *const dictBody);
static bool flushBuffersToFileInDir(const char *const dirPath, const char *const fileName,
const BufferWithExtendableBuffer **const buffers, const int bufferCount);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DictFileWritingUtils);
static const char *const TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE;
static bool createEmptyV3DictFile(const char *const filePath,
const HeaderReadWriteUtils::AttributeMap *const attributeMap);
static bool createEmptyV4DictFile(const char *const filePath,
const HeaderReadWriteUtils::AttributeMap *const attributeMap);
static bool flushBuffersToFile(const char *const filePath,
const BufferWithExtendableBuffer **const buffers, const int bufferCount);
static bool writeBufferToFile(FILE *const file,
const BufferWithExtendableBuffer *const buffer);
};

View File

@ -0,0 +1,91 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "suggest/policyimpl/dictionary/utils/file_utils.h"
#include <cstdio>
#include <cstring>
#include <dirent.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
namespace latinime {
// Returns -1 on error.
/* static */ int FileUtils::getFileSize(const char *const filePath) {
const int fd = open(filePath, O_RDONLY);
if (fd == -1) {
return -1;
}
struct stat statBuf;
if (fstat(fd, &statBuf) != 0) {
close(fd);
return -1;
}
close(fd);
return static_cast<int>(statBuf.st_size);
}
// Remove a directory and all files in the directory.
/* static */ bool FileUtils::removeDirAndFiles(const char *const dirPath) {
DIR *const dir = opendir(dirPath);
if (dir == NULL) {
AKLOGE("Cannot open dir %s.", dirPath);
return true;
}
struct dirent *dirent;
while ((dirent = readdir(dir)) != NULL) {
if (dirent->d_type != DT_REG) {
continue;
}
const int filePathBufSize = getFilePathBufSize(dirPath, dirent->d_name);
char filePath[filePathBufSize];
getFilePath(dirPath, dirent->d_name, filePathBufSize, filePath);
if (remove(filePath) != 0) {
AKLOGE("Cannot remove file %s.", filePath);
return false;
}
}
if (remove(dirPath) != 0) {
AKLOGE("Cannot remove directory %s.", dirPath);
return false;
}
return true;
}
/* static */ int FileUtils::getFilePathWithSuffixBufSize(const char *const filePath,
const char *const suffix) {
return strlen(filePath) + strlen(suffix) + 1 /* terminator */;
}
/* static */ void FileUtils::getFilePathWithSuffix(const char *const filePath,
const char *const suffix, const int filePathBufSize, char *const outFilePath) {
snprintf(outFilePath, filePathBufSize, "%s%s", filePath, suffix);
}
/* static */ int FileUtils::getFilePathBufSize(const char *const dirPath,
const char *const fileName) {
return strlen(dirPath) + 1 /* '/' */ + strlen(fileName) + 1 /* terminator */;
}
/* static */ void FileUtils::getFilePath(const char *const dirPath, const char *const fileName,
const int filePathBufSize, char *const outFilePath) {
snprintf(outFilePath, filePathBufSize, "%s/%s", dirPath, fileName);
}
} // namespace latinime

View File

@ -17,11 +17,6 @@
#ifndef LATINIME_FILE_UTILS_H
#define LATINIME_FILE_UTILS_H
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include "defines.h"
namespace latinime {
@ -29,19 +24,20 @@ namespace latinime {
class FileUtils {
public:
// Returns -1 on error.
static int getFileSize(const char *const filePath) {
const int fd = open(filePath, O_RDONLY);
if (fd == -1) {
return -1;
}
struct stat statBuf;
if (fstat(fd, &statBuf) != 0) {
close(fd);
return -1;
}
close(fd);
return static_cast<int>(statBuf.st_size);
}
static int getFileSize(const char *const filePath);
// Remove a directory and all files in the directory.
static bool removeDirAndFiles(const char *const dirPath);
static int getFilePathWithSuffixBufSize(const char *const filePath, const char *const suffix);
static void getFilePathWithSuffix(const char *const filePath, const char *const suffix,
const int filePathBufSize, char *const outFilePath);
static int getFilePathBufSize(const char *const dirPath, const char *const fileName);
static void getFilePath(const char *const dirPath, const char *const fileName,
const int filePathBufSize, char *const outFilePath);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(FileUtils);

View File

@ -18,29 +18,21 @@ package com.android.inputmethod.latin;
import android.test.AndroidTestCase;
import android.test.suitebuilder.annotation.LargeTest;
import android.util.Log;
import com.android.inputmethod.latin.makedict.BinaryDictEncoderUtils;
import com.android.inputmethod.latin.makedict.DictEncoder;
import com.android.inputmethod.latin.makedict.FormatSpec;
import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
import com.android.inputmethod.latin.makedict.Ver4DictEncoder;
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
// TODO: Add a test to evaluate the speed of operations of Ver4 dictionary.
@LargeTest
public class Ver4BinaryDictionaryTests extends AndroidTestCase {
private static final String TAG = Ver4BinaryDictionaryTests.class.getSimpleName();
private static final String TEST_LOCALE = "test";
private static final FormatSpec.FormatOptions FORMAT_OPTIONS =
new FormatSpec.FormatOptions(4, true /* supportsDynamicUpdate */);
private static final String TEST_DICT_FILE_EXTENSION = ".testDict";
@Override
protected void setUp() throws Exception {
@ -52,42 +44,35 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase {
super.tearDown();
}
// TODO: remove after native code support dictionary creation.
private DictionaryOptions getDictionaryOptions(final String id, final String version) {
final DictionaryOptions options = new DictionaryOptions(new HashMap<String, String>(),
false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */);
options.mAttributes.put("version", version);
options.mAttributes.put("dictionary", id);
return options;
}
// TODO: remove after native code support dictionary creation.
private File getTrieFile(final String id, final String version) {
return new File(getContext().getCacheDir() + "/" + id + "." + version,
TEST_LOCALE + "." + version + FormatSpec.TRIE_FILE_EXTENSION);
// Note that dictVersion is different from dictionary format version and it never affects the
// dictionary format.
// TODO: Rename dictVersion to understandable name such as dictRevision.
private File createEmptyDictionaryAndGetTrieFile(final String dictVersion) throws IOException {
final File file = File.createTempFile(dictVersion, TEST_DICT_FILE_EXTENSION,
getContext().getCacheDir());
file.delete();
file.mkdir();
Map<String, String> attributeMap = new HashMap<String, String>();
attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE,
FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
if (BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(),
4 /* dictVersion */, attributeMap)) {
return new File(file, FormatSpec.TRIE_FILE_EXTENSION);
} else {
throw new IOException("Empty dictionary " + file.getAbsolutePath() + " "
+ FormatSpec.TRIE_FILE_EXTENSION + " cannot be created.");
}
}
public void testIsValidDictionary() {
final String dictVersion = Long.toString(System.currentTimeMillis());
final File trieFile = getTrieFile(TEST_LOCALE, dictVersion);
BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(),
0 /* offset */, trieFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
assertFalse(binaryDictionary.isValidDictionary());
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
getDictionaryOptions(TEST_LOCALE, dictVersion));
final DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir());
File trieFile = null;
try {
encoder.writeDictionary(dict, FORMAT_OPTIONS);
trieFile = createEmptyDictionaryAndGetTrieFile(dictVersion);
} catch (IOException e) {
Log.e(TAG, "IOException while writing dictionary", e);
} catch (UnsupportedFormatException e) {
Log.e(TAG, "Unsupported format", e);
fail("IOException while writing an initial dictionary : " + e);
}
binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(),
final BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(),
0 /* offset */, trieFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
assertTrue(binaryDictionary.isValidDictionary());
@ -96,27 +81,21 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase {
// TODO: Add large tests.
public void testReadProbability() {
final String dictVersion = Long.toString(System.currentTimeMillis());
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
getDictionaryOptions(TEST_LOCALE, dictVersion));
final int frequency = 100;
dict.add("a", frequency, null, false /* isNotAWord */);
dict.add("aaa", frequency, null, false /* isNotAWord */);
dict.add("ab", frequency, null, false /* isNotAWord */);
final DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir());
File trieFile = null;
try {
encoder.writeDictionary(dict, FORMAT_OPTIONS);
trieFile = createEmptyDictionaryAndGetTrieFile(dictVersion);
} catch (IOException e) {
Log.e(TAG, "IOException while writing dictionary", e);
} catch (UnsupportedFormatException e) {
Log.e(TAG, "Unsupported format", e);
fail("IOException while writing an initial dictionary : " + e);
}
final File trieFile = getTrieFile(TEST_LOCALE, dictVersion);
final BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(),
0 /* offset */, trieFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
assertTrue(binaryDictionary.isValidDictionary());
final int frequency = 100;
binaryDictionary.addUnigramWord("a", frequency);
binaryDictionary.addUnigramWord("aaa", frequency);
binaryDictionary.addUnigramWord("ab", frequency);
assertEquals(frequency, binaryDictionary.getFrequency("a"));
assertEquals(frequency, binaryDictionary.getFrequency("aaa"));
assertEquals(frequency, binaryDictionary.getFrequency("ab"));
@ -132,40 +111,32 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase {
// TODO: Add large tests.
public void testReadBigrams() {
final String dictVersion = Long.toString(System.currentTimeMillis());
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
getDictionaryOptions(TEST_LOCALE, dictVersion));
final int unigramFrequency = 1;
final int bigramFrequency0 = 150;
final int bigramFrequency1 = 1;
final int bigramFrequency2 = 255;
dict.add("a", unigramFrequency, null, false /* isNotAWord */);
dict.add("aaa", unigramFrequency, null, false /* isNotAWord */);
dict.add("ab", unigramFrequency, null, false /* isNotAWord */);
dict.setBigram("a", "aaa", bigramFrequency0);
dict.setBigram("a", "ab", bigramFrequency1);
dict.setBigram("aaa", "ab", bigramFrequency2);
final DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir());
File trieFile = null;
try {
encoder.writeDictionary(dict, FORMAT_OPTIONS);
trieFile = createEmptyDictionaryAndGetTrieFile(dictVersion);
} catch (IOException e) {
Log.e(TAG, "IOException while writing dictionary", e);
} catch (UnsupportedFormatException e) {
Log.e(TAG, "Unsupported format", e);
fail("IOException while writing an initial dictionary : " + e);
}
final File trieFile = getTrieFile(TEST_LOCALE, dictVersion);
final BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(),
0 /* offset */, trieFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
assertTrue(binaryDictionary.isValidDictionary());
final int unigramFrequency = 1;
final int bigramFrequency0 = 10;
final int bigramFrequency1 = 1;
final int bigramFrequency2 = 15;
binaryDictionary.addUnigramWord("a", unigramFrequency);
binaryDictionary.addUnigramWord("aaa", unigramFrequency);
binaryDictionary.addUnigramWord("ab", unigramFrequency);
binaryDictionary.addBigramWords("a", "aaa", bigramFrequency0);
binaryDictionary.addBigramWords("a", "ab", bigramFrequency1);
binaryDictionary.addBigramWords("aaa", "ab", bigramFrequency2);
assertEquals(getCalculatedBigramProbabiliy(binaryDictionary, unigramFrequency,
assertEquals(binaryDictionary.calculateProbability(unigramFrequency,
bigramFrequency0), binaryDictionary.getBigramProbability("a", "aaa"));
assertEquals(getCalculatedBigramProbabiliy(binaryDictionary, unigramFrequency,
assertEquals(binaryDictionary.calculateProbability(unigramFrequency,
bigramFrequency1), binaryDictionary.getBigramProbability("a", "ab"));
assertEquals(getCalculatedBigramProbabiliy(binaryDictionary, unigramFrequency,
assertEquals(binaryDictionary.calculateProbability(unigramFrequency,
bigramFrequency2), binaryDictionary.getBigramProbability("aaa", "ab"));
assertFalse(binaryDictionary.isValidBigram("aaa", "a"));
@ -176,21 +147,15 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase {
// TODO: Add large tests.
public void testWriteUnigrams() {
final String dictVersion = Long.toString(System.currentTimeMillis());
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
getDictionaryOptions(TEST_LOCALE, dictVersion));
final DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir());
File trieFile = null;
try {
encoder.writeDictionary(dict, FORMAT_OPTIONS);
trieFile = createEmptyDictionaryAndGetTrieFile(dictVersion);
} catch (IOException e) {
Log.e(TAG, "IOException while writing dictionary", e);
} catch (UnsupportedFormatException e) {
Log.e(TAG, "Unsupported format", e);
fail("IOException while writing an initial dictionary : " + e);
}
final File trieFile = getTrieFile(TEST_LOCALE, dictVersion);
final BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(),
0 /* offset */, trieFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
assertTrue(binaryDictionary.isValidDictionary());
final int probability = 100;
binaryDictionary.addUnigramWord("aaa", probability);
@ -208,25 +173,18 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase {
public void testWriteBigrams() {
final String dictVersion = Long.toString(System.currentTimeMillis());
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
getDictionaryOptions(TEST_LOCALE, dictVersion));
final DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir());
File trieFile = null;
try {
encoder.writeDictionary(dict, FORMAT_OPTIONS);
trieFile = createEmptyDictionaryAndGetTrieFile(dictVersion);
} catch (IOException e) {
Log.e(TAG, "IOException while writing dictionary", e);
} catch (UnsupportedFormatException e) {
Log.e(TAG, "Unsupported format", e);
fail("IOException while writing an initial dictionary : " + e);
}
final File trieFile = getTrieFile(TEST_LOCALE, dictVersion);
final BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(),
0 /* offset */, trieFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
assertTrue(binaryDictionary.isValidDictionary());
final int unigramProbability = 100;
final int bigramProbability = 10;
final int updatedBigramProbability = 15;
binaryDictionary.addUnigramWord("aaa", unigramProbability);
binaryDictionary.addUnigramWord("abb", unigramProbability);
binaryDictionary.addUnigramWord("bcc", unigramProbability);
@ -249,21 +207,15 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase {
public void testRemoveBigramWords() {
final String dictVersion = Long.toString(System.currentTimeMillis());
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
getDictionaryOptions(TEST_LOCALE, dictVersion));
final DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir());
File trieFile = null;
try {
encoder.writeDictionary(dict, FORMAT_OPTIONS);
trieFile = createEmptyDictionaryAndGetTrieFile(dictVersion);
} catch (IOException e) {
Log.e(TAG, "IOException while writing dictionary", e);
} catch (UnsupportedFormatException e) {
Log.e(TAG, "Unsupported format", e);
fail("IOException while writing an initial dictionary : " + e);
}
final File trieFile = getTrieFile(TEST_LOCALE, dictVersion);
final BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(),
0 /* offset */, trieFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
assertTrue(binaryDictionary.isValidDictionary());
final int unigramProbability = 100;
final int bigramProbability = 10;