mirror of
https://gitlab.futo.org/keyboard/latinime.git
synced 2024-09-28 14:54:30 +01:00
Merge "Fix two nasty bugs with surrogate pairs."
This commit is contained in:
commit
065f126f1d
@ -620,34 +620,34 @@ public final class FusionDictionary implements Iterable<Word> {
|
|||||||
* Helper method to find a word in a given branch.
|
* Helper method to find a word in a given branch.
|
||||||
*/
|
*/
|
||||||
@SuppressWarnings("unused")
|
@SuppressWarnings("unused")
|
||||||
public static CharGroup findWordInTree(Node node, final String s) {
|
public static CharGroup findWordInTree(Node node, final String string) {
|
||||||
int index = 0;
|
int index = 0;
|
||||||
final StringBuilder checker = DBG ? new StringBuilder() : null;
|
final StringBuilder checker = DBG ? new StringBuilder() : null;
|
||||||
|
final int[] codePoints = getCodePoints(string);
|
||||||
|
|
||||||
CharGroup currentGroup;
|
CharGroup currentGroup;
|
||||||
final int codePointCountInS = s.codePointCount(0, s.length());
|
|
||||||
do {
|
do {
|
||||||
int indexOfGroup = findIndexOfChar(node, s.codePointAt(index));
|
int indexOfGroup = findIndexOfChar(node, codePoints[index]);
|
||||||
if (CHARACTER_NOT_FOUND == indexOfGroup) return null;
|
if (CHARACTER_NOT_FOUND == indexOfGroup) return null;
|
||||||
currentGroup = node.mData.get(indexOfGroup);
|
currentGroup = node.mData.get(indexOfGroup);
|
||||||
|
|
||||||
if (s.length() - index < currentGroup.mChars.length) return null;
|
if (codePoints.length - index < currentGroup.mChars.length) return null;
|
||||||
int newIndex = index;
|
int newIndex = index;
|
||||||
while (newIndex < s.length() && newIndex - index < currentGroup.mChars.length) {
|
while (newIndex < codePoints.length && newIndex - index < currentGroup.mChars.length) {
|
||||||
if (currentGroup.mChars[newIndex - index] != s.codePointAt(newIndex)) return null;
|
if (currentGroup.mChars[newIndex - index] != codePoints[newIndex]) return null;
|
||||||
newIndex++;
|
newIndex++;
|
||||||
}
|
}
|
||||||
index = newIndex;
|
index = newIndex;
|
||||||
|
|
||||||
if (DBG) checker.append(new String(currentGroup.mChars, 0, currentGroup.mChars.length));
|
if (DBG) checker.append(new String(currentGroup.mChars, 0, currentGroup.mChars.length));
|
||||||
if (index < codePointCountInS) {
|
if (index < codePoints.length) {
|
||||||
node = currentGroup.mChildren;
|
node = currentGroup.mChildren;
|
||||||
}
|
}
|
||||||
} while (null != node && index < codePointCountInS);
|
} while (null != node && index < codePoints.length);
|
||||||
|
|
||||||
if (index < codePointCountInS) return null;
|
if (index < codePoints.length) return null;
|
||||||
if (!currentGroup.isTerminal()) return null;
|
if (!currentGroup.isTerminal()) return null;
|
||||||
if (DBG && !s.equals(checker.toString())) return null;
|
if (DBG && !codePoints.equals(checker.toString())) return null;
|
||||||
return currentGroup;
|
return currentGroup;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -847,12 +847,12 @@ public final class FusionDictionary implements Iterable<Word> {
|
|||||||
@Override
|
@Override
|
||||||
public Word next() {
|
public Word next() {
|
||||||
Position currentPos = mPositions.getLast();
|
Position currentPos = mPositions.getLast();
|
||||||
mCurrentString.setLength(mCurrentString.length() - currentPos.length);
|
mCurrentString.setLength(currentPos.length);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
if (currentPos.pos.hasNext()) {
|
if (currentPos.pos.hasNext()) {
|
||||||
final CharGroup currentGroup = currentPos.pos.next();
|
final CharGroup currentGroup = currentPos.pos.next();
|
||||||
currentPos.length = currentGroup.mChars.length;
|
currentPos.length = mCurrentString.length();
|
||||||
for (int i : currentGroup.mChars)
|
for (int i : currentGroup.mChars)
|
||||||
mCurrentString.append(Character.toChars(i));
|
mCurrentString.append(Character.toChars(i));
|
||||||
if (null != currentGroup.mChildren) {
|
if (null != currentGroup.mChildren) {
|
||||||
@ -866,7 +866,7 @@ public final class FusionDictionary implements Iterable<Word> {
|
|||||||
} else {
|
} else {
|
||||||
mPositions.removeLast();
|
mPositions.removeLast();
|
||||||
currentPos = mPositions.getLast();
|
currentPos = mPositions.getLast();
|
||||||
mCurrentString.setLength(mCurrentString.length() - mPositions.getLast().length);
|
mCurrentString.setLength(mPositions.getLast().length);
|
||||||
}
|
}
|
||||||
} while (true);
|
} while (true);
|
||||||
}
|
}
|
||||||
|
@ -72,15 +72,12 @@ public class BinaryDictIOTests extends AndroidTestCase {
|
|||||||
private static final FormatSpec.FormatOptions VERSION3_WITH_DYNAMIC_UPDATE =
|
private static final FormatSpec.FormatOptions VERSION3_WITH_DYNAMIC_UPDATE =
|
||||||
new FormatSpec.FormatOptions(3, true /* supportsDynamicUpdate */);
|
new FormatSpec.FormatOptions(3, true /* supportsDynamicUpdate */);
|
||||||
|
|
||||||
private static final String[] CHARACTERS = {
|
|
||||||
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
|
|
||||||
"n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"
|
|
||||||
};
|
|
||||||
|
|
||||||
public BinaryDictIOTests() {
|
public BinaryDictIOTests() {
|
||||||
super();
|
super();
|
||||||
|
|
||||||
final Random random = new Random(123456);
|
final long time = System.currentTimeMillis();
|
||||||
|
Log.e(TAG, "Testing dictionary: seed is " + time);
|
||||||
|
final Random random = new Random(time);
|
||||||
sWords.clear();
|
sWords.clear();
|
||||||
generateWords(MAX_UNIGRAMS, random);
|
generateWords(MAX_UNIGRAMS, random);
|
||||||
|
|
||||||
@ -132,13 +129,16 @@ public class BinaryDictIOTests extends AndroidTestCase {
|
|||||||
/**
|
/**
|
||||||
* Generates a random word.
|
* Generates a random word.
|
||||||
*/
|
*/
|
||||||
private String generateWord(final int value) {
|
private String generateWord(final Random random) {
|
||||||
final int lengthOfChars = CHARACTERS.length;
|
|
||||||
StringBuilder builder = new StringBuilder("a");
|
StringBuilder builder = new StringBuilder("a");
|
||||||
long lvalue = Math.abs((long)value);
|
int count = random.nextInt() % 30; // Arbitrarily 30 chars max
|
||||||
while (lvalue > 0) {
|
while (count > 0) {
|
||||||
builder.append(CHARACTERS[(int)(lvalue % lengthOfChars)]);
|
final long r = Math.abs(random.nextInt());
|
||||||
lvalue /= lengthOfChars;
|
if (r < 0) continue;
|
||||||
|
// Don't insert 0~20, but insert any other code point.
|
||||||
|
// Code points are in the range 0~0x10FFFF.
|
||||||
|
builder.appendCodePoint((int)(20 + r % (0x10FFFF - 20)));
|
||||||
|
--count;
|
||||||
}
|
}
|
||||||
return builder.toString();
|
return builder.toString();
|
||||||
}
|
}
|
||||||
@ -146,7 +146,7 @@ public class BinaryDictIOTests extends AndroidTestCase {
|
|||||||
private void generateWords(final int number, final Random random) {
|
private void generateWords(final int number, final Random random) {
|
||||||
final Set<String> wordSet = CollectionUtils.newHashSet();
|
final Set<String> wordSet = CollectionUtils.newHashSet();
|
||||||
while (wordSet.size() < number) {
|
while (wordSet.size() < number) {
|
||||||
wordSet.add(generateWord(random.nextInt()));
|
wordSet.add(generateWord(random));
|
||||||
}
|
}
|
||||||
sWords.addAll(wordSet);
|
sWords.addAll(wordSet);
|
||||||
}
|
}
|
||||||
@ -555,7 +555,7 @@ public class BinaryDictIOTests extends AndroidTestCase {
|
|||||||
// Test a word that isn't contained within the dictionary.
|
// Test a word that isn't contained within the dictionary.
|
||||||
final Random random = new Random((int)System.currentTimeMillis());
|
final Random random = new Random((int)System.currentTimeMillis());
|
||||||
for (int i = 0; i < 1000; ++i) {
|
for (int i = 0; i < 1000; ++i) {
|
||||||
final String word = generateWord(random.nextInt());
|
final String word = generateWord(random);
|
||||||
if (sWords.indexOf(word) != -1) continue;
|
if (sWords.indexOf(word) != -1) continue;
|
||||||
runGetTerminalPosition(buffer, word, i, false);
|
runGetTerminalPosition(buffer, word, i, false);
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,114 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2013 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.android.inputmethod.latin.makedict;
|
||||||
|
|
||||||
|
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
||||||
|
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
|
||||||
|
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
|
||||||
|
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
|
||||||
|
import com.android.inputmethod.latin.makedict.Word;
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Unit tests for BinaryDictInputOutput.
|
||||||
|
*/
|
||||||
|
public class FusionDictionaryTest extends TestCase {
|
||||||
|
private static final ArrayList<String> sWords = new ArrayList<String>();
|
||||||
|
private static final int MAX_UNIGRAMS = 1000;
|
||||||
|
|
||||||
|
private void prepare(final long seed) {
|
||||||
|
System.out.println("Seed is " + seed);
|
||||||
|
final Random random = new Random(seed);
|
||||||
|
sWords.clear();
|
||||||
|
generateWords(MAX_UNIGRAMS, random);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generates a random word.
|
||||||
|
*/
|
||||||
|
private String generateWord(final Random random) {
|
||||||
|
StringBuilder builder = new StringBuilder("a");
|
||||||
|
int count = random.nextInt() % 30;
|
||||||
|
while (count > 0) {
|
||||||
|
final long r = Math.abs(random.nextInt());
|
||||||
|
if (r < 0) continue;
|
||||||
|
// Don't insert 0~20, but insert any other code point.
|
||||||
|
// Code points are in the range 0~0x10FFFF.
|
||||||
|
if (builder.length() < 7)
|
||||||
|
builder.appendCodePoint((int)(20 +r % (0x10FFFF - 20)));
|
||||||
|
--count;
|
||||||
|
}
|
||||||
|
if (builder.length() == 1) return generateWord(random);
|
||||||
|
return builder.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void generateWords(final int number, final Random random) {
|
||||||
|
while (sWords.size() < number) {
|
||||||
|
sWords.add(generateWord(random));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void checkDictionary(final FusionDictionary dict, final ArrayList<String> words,
|
||||||
|
int limit) {
|
||||||
|
assertNotNull(dict);
|
||||||
|
for (final String word : words) {
|
||||||
|
if (--limit < 0) return;
|
||||||
|
final CharGroup cg = FusionDictionary.findWordInTree(dict.mRoot, word);
|
||||||
|
if (null == cg) {
|
||||||
|
System.out.println("word " + dumpWord(word));
|
||||||
|
dumpDict(dict);
|
||||||
|
}
|
||||||
|
assertNotNull(cg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private String dumpWord(final String word) {
|
||||||
|
final StringBuilder sb = new StringBuilder("");
|
||||||
|
for (int i = 0; i < word.length(); i = word.offsetByCodePoints(i, 1)) {
|
||||||
|
sb.append(word.codePointAt(i));
|
||||||
|
sb.append(" ");
|
||||||
|
}
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void dumpDict(final FusionDictionary dict) {
|
||||||
|
for (Word w : dict) {
|
||||||
|
System.out.println("Word " + dumpWord(w.mWord));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test the flattened array contains the expected number of nodes, and
|
||||||
|
// that it does not contain any duplicates.
|
||||||
|
public void testFusion() {
|
||||||
|
final FusionDictionary dict = new FusionDictionary(new Node(),
|
||||||
|
new DictionaryOptions(new HashMap<String, String>(),
|
||||||
|
false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */));
|
||||||
|
final long time = System.currentTimeMillis();
|
||||||
|
prepare(time);
|
||||||
|
for (int i = 0; i < sWords.size(); ++i) {
|
||||||
|
System.out.println("Adding in pos " + i + " : " + dumpWord(sWords.get(i)));
|
||||||
|
dict.add(sWords.get(i), 180, null, false);
|
||||||
|
dumpDict(dict);
|
||||||
|
checkDictionary(dict, sWords, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user