From 5b33d197ba7c9e44847561b99f948d5a11e1ecc6 Mon Sep 17 00:00:00 2001 From: Jean Chalard Date: Mon, 20 Oct 2014 21:53:31 +0900 Subject: [PATCH] Add a header command to dicttool. This will allow to greatly improve the performance of the metadata-generating files, as they won't have to wait for the info command to read the entire dictionary when the header is all we need. Also add tests, and while we're at it, use the seed as intended to enable reproducible tests. Change-Id: I0ba79ef62f0292b23e63aed57ff565bb102281a2 --- .../BinaryDictDecoderEncoderTests.java | 77 +++++++++++++++++-- .../dicttool/BinaryDictOffdeviceUtils.java | 32 +++++--- .../latin/dicttool/CommandList.java | 1 + .../inputmethod/latin/dicttool/Header.java | 69 +++++++++++++++++ 4 files changed, 162 insertions(+), 17 deletions(-) create mode 100644 tools/dicttool/src/com/android/inputmethod/latin/dicttool/Header.java diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java index d239f8dac..25eaa64cf 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java @@ -24,8 +24,13 @@ import android.util.SparseArray; import com.android.inputmethod.latin.BinaryDictionary; import com.android.inputmethod.latin.common.CodePointUtils; +import com.android.inputmethod.latin.dicttool.BinaryDictOffdeviceUtils; +import com.android.inputmethod.latin.dicttool.Compress; +import com.android.inputmethod.latin.dicttool.Crypt; +import com.android.inputmethod.latin.dicttool.BinaryDictOffdeviceUtils.DecoderChainSpec; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; +import com.android.inputmethod.latin.makedict.FormatSpec.DictionaryOptions; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; @@ -67,6 +72,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { private static final SparseArray> sChainBigrams = new SparseArray<>(); private static final HashMap> sShortcuts = new HashMap<>(); + final Random mRandom; + public BinaryDictDecoderEncoderTests() { this(System.currentTimeMillis(), DEFAULT_MAX_UNIGRAMS); } @@ -75,10 +82,10 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { super(); BinaryDictionaryUtils.setCurrentTimeForTest(0); Log.e(TAG, "Testing dictionary: seed is " + seed); - final Random random = new Random(seed); + mRandom = new Random(seed); sWords.clear(); sWordsWithVariousCodePoints.clear(); - generateWords(maxUnigrams, random); + generateWords(maxUnigrams, mRandom); for (int i = 0; i < sWords.size(); ++i) { sChainBigrams.put(i, new ArrayList()); @@ -96,10 +103,10 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { sShortcuts.clear(); for (int i = 0; i < NUM_OF_NODES_HAVING_SHORTCUTS; ++i) { - final int from = Math.abs(random.nextInt()) % sWords.size(); + final int from = Math.abs(mRandom.nextInt()) % sWords.size(); sShortcuts.put(sWords.get(from), new ArrayList()); for (int j = 0; j < NUM_OF_SHORTCUTS; ++j) { - final int to = Math.abs(random.nextInt()) % sWords.size(); + final int to = Math.abs(mRandom.nextInt()) % sWords.size(); sShortcuts.get(sWords.get(from)).add(sWords.get(to)); } } @@ -604,11 +611,10 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { + " : " + outputOptions(bufferType, formatOptions)); // Test a word that isn't contained within the dictionary. - final Random random = new Random((int)System.currentTimeMillis()); final int[] codePointSet = CodePointUtils.generateCodePointSet(DEFAULT_CODE_POINT_SET_SIZE, - random); + mRandom); for (int i = 0; i < 1000; ++i) { - final String word = CodePointUtils.generateWord(random, codePointSet); + final String word = CodePointUtils.generateWord(mRandom, codePointSet); if (sWords.indexOf(word) != -1) continue; checkGetTerminalPosition(dictDecoder, word, false); } @@ -731,4 +737,61 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { assertTrue(wordSet.isEmpty()); assertTrue(bigramSet.isEmpty()); } + + public void runTestHeaderReaderProcessorWithOneSpec(final boolean compress, final boolean crypt) + throws IOException { + final String dictName = "testHeaderReaderProcessor"; + final String dictVersion = Long.toString(System.currentTimeMillis()); + final FormatOptions formatOptions = BinaryDictUtils.STATIC_OPTIONS; + final int MAX_NUMBER_OF_OPTIONS_TO_ADD = 5; + final HashMap options = new HashMap<>(); + // Required attributes + options.put("dictionary", "main:en_US"); + options.put("locale", "en_US"); + options.put("version", Integer.toString(mRandom.nextInt())); + // Add some random options for test + final int numberOfOptionsToAdd = mRandom.nextInt() % (MAX_NUMBER_OF_OPTIONS_TO_ADD + 1); + for (int i = 0; i < numberOfOptionsToAdd; ++i) { + options.put(sWordsWithVariousCodePoints.get(2 * i), + sWordsWithVariousCodePoints.get(2 * 1 + 1)); + } + final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), + new DictionaryOptions(options)); + addUnigrams(sWords.size(), dict, sWords, null); + File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions, + getContext().getCacheDir()); + timeWritingDictToFile(file, dict, formatOptions); + + if (compress) { + final File rawFile = file; + file = BinaryDictUtils.getDictFile(dictName + "compress", dictVersion, formatOptions, + getContext().getCacheDir()); + final Compress.Compressor compressCommand = new Compress.Compressor(); + compressCommand.setArgs(new String[] { rawFile.getPath(), file.getPath() }); + compressCommand.run(); + } + if (crypt) { + final File rawFile = file; + file = BinaryDictUtils.getDictFile(dictName + "crypt", dictVersion, formatOptions, + getContext().getCacheDir()); + final Crypt.Encrypter cryptCommand = new Crypt.Encrypter(); + cryptCommand.setArgs(new String[] { rawFile.getPath(), file.getPath() }); + cryptCommand.run(); + } + + final DecoderChainSpec spec = + BinaryDictOffdeviceUtils.decodeDictionaryForProcess(file, + new BinaryDictOffdeviceUtils.HeaderReaderProcessor()); + assertNotNull("Can't decode a dictionary we just wrote : " + file, spec); + final DictionaryHeader header = spec.mResult; + assertEquals("raw" + (crypt ? " > encryption" : "") + (compress ? " > compression" : ""), + spec.describeChain()); + assertEquals(header.mDictionaryOptions.mAttributes, options); + } + + public void testHeaderReaderProcessor() throws IOException { + runTestHeaderReaderProcessorWithOneSpec(false /* compress */, false /* crypt */); + runTestHeaderReaderProcessorWithOneSpec(true /* compress */, false /* crypt */); + runTestHeaderReaderProcessorWithOneSpec(true /* compress */, true /* crypt */); + } } diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java index 7894e17c4..3ec28f313 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java @@ -65,7 +65,7 @@ public final class BinaryDictOffdeviceUtils { }; private final int mDecoderSpecIndex; - T mResult; + public T mResult; public DecoderChainSpec() { mDecoderSpecIndex = 0; @@ -174,12 +174,13 @@ public final class BinaryDictOffdeviceUtils { } final int version = ((tmpBuffer[VERSION_START_OFFSET] & 0xFF) << 8) + (tmpBuffer[VERSION_START_OFFSET + 1] & 0xFF); - if (version != FormatSpec.VERSION2 && version != FormatSpec.VERSION201) { - throw new UnsupportedFormatException("Only versions 2 and 201 are supported"); + if (version != FormatSpec.VERSION2 && version != FormatSpec.VERSION201 + && version != FormatSpec.VERSION202) { + throw new UnsupportedFormatException("Only versions 2, 201, 202 are supported"); } - final int totalHeaderSize = ((tmpBuffer[HEADER_SIZE_OFFSET] & 0xFF) >> 24) - + ((tmpBuffer[HEADER_SIZE_OFFSET + 1] & 0xFF) >> 16) - + ((tmpBuffer[HEADER_SIZE_OFFSET + 2] & 0xFF) >> 8) + final int totalHeaderSize = ((tmpBuffer[HEADER_SIZE_OFFSET] & 0xFF) << 24) + + ((tmpBuffer[HEADER_SIZE_OFFSET + 1] & 0xFF) << 16) + + ((tmpBuffer[HEADER_SIZE_OFFSET + 2] & 0xFF) << 8) + (tmpBuffer[HEADER_SIZE_OFFSET + 3] & 0xFF); if (totalHeaderSize > MAX_HEADER_LENGTH) { throw new UnsupportedFormatException("Header too large"); @@ -215,11 +216,22 @@ public final class BinaryDictOffdeviceUtils { @Nonnull DecoderChainSpec spec = new DecoderChainSpec(); while (null != spec) { try { - try (final InputStream input = spec.getStream(src)) { - spec.mResult = processor.process(input); - return spec; + final InputStream input = spec.getStream(src); + spec.mResult = processor.process(input); + try { + input.close(); + } catch (IOException e) { + // CipherInputStream doesn't like being closed without having read the + // entire stream, for some reason. But we don't want to because it's a waste + // of resources. We really, really don't care about this. + // However on close() CipherInputStream does throw this exception, wrapped + // in an IOException so we need to catch it. + if (!(e.getCause() instanceof javax.crypto.BadPaddingException)) { + throw e; + } } - } catch (IOException | UnsupportedFormatException e) { + return spec; + } catch (IOException | UnsupportedFormatException | ArrayIndexOutOfBoundsException e) { // If the format is not the right one for this file, the processor will throw one // of these exceptions. In our case, that means we should try the next spec, // since it may still be at another format we haven't tried yet. diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CommandList.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CommandList.java index 07450ca51..8fdf7633f 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CommandList.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CommandList.java @@ -20,6 +20,7 @@ public class CommandList { public static void populate() { // TODO: Move some commands to native code. Dicttool.addCommand("info", Info.class); + Dicttool.addCommand("header", Header.class); Dicttool.addCommand("diff", Diff.class); Dicttool.addCommand("compress", Compress.Compressor.class); Dicttool.addCommand("uncompress", Compress.Uncompressor.class); diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Header.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Header.java new file mode 100644 index 000000000..51efdec33 --- /dev/null +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Header.java @@ -0,0 +1,69 @@ +/** + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package com.android.inputmethod.latin.dicttool; + +import com.android.inputmethod.latin.BinaryDictionary; +import com.android.inputmethod.latin.dicttool.BinaryDictOffdeviceUtils.DecoderChainSpec; +import com.android.inputmethod.latin.makedict.DictionaryHeader; +import com.android.inputmethod.latin.makedict.UnsupportedFormatException; + +import java.io.File; +import java.util.Arrays; +import java.util.Locale; + +public class Header extends Dicttool.Command { + public static final String COMMAND = "header"; + + public Header() { + } + + @Override + public String getHelp() { + return COMMAND + " : prints the header contents of a dictionary file"; + } + + @Override + public void run() throws UnsupportedFormatException { + final boolean plumbing; + if (mArgs.length > 0 && "-p".equals(mArgs[0])) { + plumbing = true; + mArgs = Arrays.copyOfRange(mArgs, 1, mArgs.length); + } else { + plumbing = false; + } + if (mArgs.length < 1) { + throw new RuntimeException("Not enough arguments for command " + COMMAND); + } + final String filename = mArgs[0]; + final File dictFile = new File(filename); + final DecoderChainSpec spec = + BinaryDictOffdeviceUtils.decodeDictionaryForProcess(dictFile, + new BinaryDictOffdeviceUtils.HeaderReaderProcessor()); + if (null == spec) { + throw new UnsupportedFormatException(filename + + " doesn't seem to be a valid version 2 dictionary file"); + } + + final DictionaryHeader header = spec.mResult; + System.out.println("Dictionary : " + dictFile.getAbsolutePath()); + System.out.println("Size : " + dictFile.length() + " bytes"); + System.out.println("Format : Binary dictionary format"); + System.out.println("Packaging : " + spec.describeChain()); + System.out.println("Header attributes :"); + System.out.print(header.mDictionaryOptions.toString(2 /* indentCount */, plumbing)); + } +}