mirror of
synced 2024-09-28 14:54:30 +01:00
Allow sharing dictionaries between similar locales.
Bug: 5058488 Change-Id: Ib12013f58afad957a8205b439f87480cc12ea06f
This commit is contained in:
@ -67,25 +67,34 @@ public class BinaryDictionaryFileDumper {
* Queries a content provider for the list of word lists for a specific locale
* available to copy into Latin IME.
private static List<String> getWordListIds(final Locale locale, final Context context) {
private static List<WordListInfo> getWordListWordListInfos(final Locale locale,
final Context context) {
final ContentResolver resolver = context.getContentResolver();
final Uri dictionaryPackUri = getProviderUri(locale.toString());
final Cursor c = resolver.query(dictionaryPackUri, DICTIONARY_PROJECTION, null, null, null);
if (null == c) return Collections.<String>emptyList();
if (null == c) return Collections.<WordListInfo>emptyList();
if (c.getCount() <= 0 || !c.moveToFirst()) {
return Collections.<String>emptyList();
return Collections.<WordListInfo>emptyList();
final List<String> list = new ArrayList<String>();
try {
final List<WordListInfo> list = new ArrayList<WordListInfo>();
do {
final String id = c.getString(0);
if (TextUtils.isEmpty(id)) continue;
final String wordListId = c.getString(0);
final String wordListLocale = c.getString(1);
if (TextUtils.isEmpty(wordListId)) continue;
list.add(new WordListInfo(wordListId, wordListLocale));
} while (c.moveToNext());
return list;
} catch (Exception e) {
// Just in case we hit a problem in communication with the dictionary pack.
// We don't want to die.
Log.e(TAG, "Exception communicating with the dictionary pack : " + e);
return Collections.<WordListInfo>emptyList();
@ -108,7 +117,7 @@ public class BinaryDictionaryFileDumper {
* to the cache file name designated by its id and locale, overwriting it if already present
* and creating it (and its containing directory) if necessary.
private static AssetFileAddress cacheWordList(final String id, final Locale locale,
private static AssetFileAddress cacheWordList(final String id, final String locale,
final ContentResolver resolver, final Context context) {
@ -213,10 +222,10 @@ public class BinaryDictionaryFileDumper {
public static List<AssetFileAddress> cacheWordListsFromContentProvider(final Locale locale,
final Context context) {
final ContentResolver resolver = context.getContentResolver();
final List<String> idList = getWordListIds(locale, context);
final List<WordListInfo> idList = getWordListWordListInfos(locale, context);
final List<AssetFileAddress> fileAddressList = new ArrayList<AssetFileAddress>();
for (String id : idList) {
final AssetFileAddress afd = cacheWordList(id, locale, resolver, context);
for (WordListInfo id : idList) {
final AssetFileAddress afd = cacheWordList(id.mId, id.mLocale, resolver, context);
if (null != afd) {
@ -108,12 +108,19 @@ class BinaryDictionaryGetter {
return sb.toString();
* Helper method to get the top level cache directory.
private static String getWordListCacheDirectory(final Context context) {
return context.getFilesDir() + File.separator + "dicts";
* Find out the cache directory associated with a specific locale.
private static String getCacheDirectoryForLocale(Locale locale, Context context) {
final String relativeDirectoryName = replaceFileNameDangerousCharacters(locale.toString());
final String absoluteDirectoryName = context.getFilesDir() + File.separator
private static String getCacheDirectoryForLocale(final String locale, final Context context) {
final String relativeDirectoryName = replaceFileNameDangerousCharacters(locale);
final String absoluteDirectoryName = getWordListCacheDirectory(context) + File.separator
+ relativeDirectoryName;
final File directory = new File(absoluteDirectoryName);
if (!directory.exists()) {
@ -135,11 +142,11 @@ class BinaryDictionaryGetter {
* named like the locale, except it will also escape characters that look dangerous
* to some file systems.
* @param id the id of the dictionary for which to get a file name
* @param locale the locale for which to get the file name
* @param locale the locale for which to get the file name as a string
* @param context the context to use for getting the directory
* @return the name of the file to be created
public static String getCacheFileName(String id, Locale locale, Context context) {
public static String getCacheFileName(String id, String locale, Context context) {
final String fileName = replaceFileNameDangerousCharacters(id);
return getCacheDirectoryForLocale(locale, context) + File.separator + fileName;
@ -199,25 +206,53 @@ class BinaryDictionaryGetter {
* Returns the list of cached files for a specific locale.
* @param locale the locale to find the dictionary files for.
* @param context the context on which to open the files upon.
* @return an array of binary dictionary files, which may be empty but may not be null.
* Helper method to the list of cache directories, one for each distinct locale.
private static File[] getCachedWordLists(final Locale locale,
final Context context) {
final String directoryName = getCacheDirectoryForLocale(locale, context);
final File[] cacheFiles = new File(directoryName).listFiles();
if (null == cacheFiles) return EMPTY_FILE_ARRAY;
return cacheFiles;
private static File[] getCachedDirectoryList(final Context context) {
return new File(getWordListCacheDirectory(context)).listFiles();
* Returns the id of the main dict for a specified locale.
* Returns the list of cached files for a specific locale.
* @param locale the locale to find the dictionary files for, as a string.
* @param context the context on which to open the files upon.
* @return an array of binary dictionary files, which may be empty but may not be null.
private static File[] getCachedWordLists(final String locale,
final Context context) {
final File[] directoryList = getCachedDirectoryList(context);
if (null == directoryList) return EMPTY_FILE_ARRAY;
final ArrayList<File> cacheFiles = new ArrayList<File>();
for (File directory : directoryList) {
if (!directory.isDirectory()) continue;
final String dirLocale = getWordListIdFromFileName(directory.getName());
if (LocaleUtils.isMatch(LocaleUtils.getMatchLevel(dirLocale, locale))) {
final File[] wordLists = directory.listFiles();
if (null != wordLists) {
for (File wordList : wordLists) {
if (cacheFiles.isEmpty()) return EMPTY_FILE_ARRAY;
return cacheFiles.toArray(EMPTY_FILE_ARRAY);
* Returns the id associated with the main word list for a specified locale.
* Word lists stored in Android Keyboard's resources are referred to as the "main"
* word lists. Since they can be updated like any other list, we need to assign a
* unique ID to them. This ID is just the name of the language (locale-wise) they
* are for, and this method returns this ID.
private static String getMainDictId(final Locale locale) {
return locale.toString();
// This works because we don't include by default different dictionaries for
// different countries. This actually needs to return the id that we would
// like to use for word lists included in resources, and the following is okay.
return locale.getLanguage().toString();
@ -239,7 +274,7 @@ class BinaryDictionaryGetter {
// storage, but we don't really care about what was copied NOW: what we want is the
// list of everything we ever cached, so we ignore the return value.
BinaryDictionaryFileDumper.cacheWordListsFromContentProvider(locale, context);
final File[] cachedWordLists = getCachedWordLists(locale, context);
final File[] cachedWordLists = getCachedWordLists(locale.toString(), context);
final String mainDictId = getMainDictId(locale);
Normal file
Normal file
@ -0,0 +1,157 @@
* Copyright (C) 2011 The Android Open Source Project
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
package com.android.inputmethod.latin;
import android.text.TextUtils;
* A class to help with handling Locales in string form.
* This file has the same meaning and features (and shares all of its code) with
* the one in the dictionary pack. They need to be kept synchronized; for any
* update/bugfix to this file, consider also updating/fixing the version in the
* dictionary pack.
public class LocaleUtils {
private final static String TAG = LocaleUtils.class.getSimpleName();
// Locale match level constants.
// A higher level of match is guaranteed to have a higher numerical value.
// Some room is left within constants to add match cases that may arise necessary
// in the future, for example differentiating between the case where the countries
// are both present and different, and the case where one of the locales does not
// specify the countries. This difference is not needed now.
// Nothing matches.
public static final int LOCALE_NO_MATCH = 0;
// The languages matches, but the country are different. Or, the reference locale requires a
// country and the tested locale does not have one.
public static final int LOCALE_LANGUAGE_MATCH_COUNTRY_DIFFER = 3;
// The languages and country match, but the variants are different. Or, the reference locale
// requires a variant and the tested locale does not have one.
// The required locale is null or empty so it will accept anything, and the tested locale
// is non-null and non-empty.
public static final int LOCALE_ANY_MATCH = 10;
// The language matches, and the tested locale specifies a country but the reference locale
// does not require one.
public static final int LOCALE_LANGUAGE_MATCH = 15;
// The language and the country match, and the tested locale specifies a variant but the
// reference locale does not require one.
public static final int LOCALE_LANGUAGE_AND_COUNTRY_MATCH = 20;
// The compared locales are fully identical. This is the best match level.
public static final int LOCALE_FULL_MATCH = 30;
// The level at which a match is "normally" considered a locale match with standard algorithms.
// Don't use this directly, use #isMatch to test.
private static final int LOCALE_MATCH = LOCALE_ANY_MATCH;
// Make this match the maximum match level. If this evolves to have more than 2 digits
// when written in base 10, also adjust the getMatchLevelSortedString method.
private static final int MATCH_LEVEL_MAX = 30;
* Return how well a tested locale matches a reference locale.
* This will check the tested locale against the reference locale and return a measure of how
* a well it matches the reference. The general idea is that the tested locale has to match
* every specified part of the required locale. A full match occur when they are equal, a
* partial match when the tested locale agrees with the reference locale but is more specific,
* and a difference when the tested locale does not comply with all requirements from the
* reference locale.
* In more detail, if the reference locale specifies at least a language and the testedLocale
* does not specify one, or specifies a different one, LOCALE_NO_MATCH is returned. If the
* reference locale is empty or null, it will match anything - in the form of LOCALE_FULL_MATCH
* if the tested locale is empty or null, and LOCALE_ANY_MATCH otherwise. If the reference and
* tested locale agree on the language, but not on the country,
* LOCALE_LANGUAGE_MATCH_COUNTRY_DIFFER is returned if the reference locale specifies a country,
* and LOCALE_LANGUAGE_MATCH otherwise.
* If they agree on both the language and the country, but not on the variant,
* LOCALE_LANGUAGE_AND_COUNTRY_MATCH_VARIANT_DIFFER is returned if the reference locale
* specifies a variant, and LOCALE_LANGUAGE_AND_COUNTRY_MATCH otherwise. If everything matches,
* LOCALE_FULL_MATCH is returned.
* Examples:
* sp_US <=> en_US => LOCALE_NO_MATCH
* de <=> de => LOCALE_FULL_MATCH
* en_US <=> en_US => LOCALE_FULL_MATCH
* "" <=> en_US => LOCALE_ANY_MATCH
* @param referenceLocale the reference locale to test against.
* @param testedLocale the locale to test.
* @return a constant that measures how well the tested locale matches the reference locale.
public static int getMatchLevel(String referenceLocale, String testedLocale) {
if (TextUtils.isEmpty(referenceLocale)) {
return TextUtils.isEmpty(testedLocale) ? LOCALE_FULL_MATCH : LOCALE_ANY_MATCH;
if (null == testedLocale) return LOCALE_NO_MATCH;
String[] referenceParams = referenceLocale.split("_", 3);
String[] testedParams = testedLocale.split("_", 3);
// By spec of String#split, [0] cannot be null and length cannot be 0.
if (!referenceParams[0].equals(testedParams[0])) return LOCALE_NO_MATCH;
switch (referenceParams.length) {
case 1:
return 1 == testedParams.length ? LOCALE_FULL_MATCH : LOCALE_LANGUAGE_MATCH;
case 2:
if (1 == testedParams.length) return LOCALE_LANGUAGE_MATCH_COUNTRY_DIFFER;
if (!referenceParams[1].equals(testedParams[1]))
if (3 == testedParams.length) return LOCALE_LANGUAGE_AND_COUNTRY_MATCH;
case 3:
if (1 == testedParams.length) return LOCALE_LANGUAGE_MATCH_COUNTRY_DIFFER;
if (!referenceParams[1].equals(testedParams[1]))
if (2 == testedParams.length) return LOCALE_LANGUAGE_AND_COUNTRY_MATCH_VARIANT_DIFFER;
if (!referenceParams[2].equals(testedParams[2]))
// It should be impossible to come here
* Return a string that represents this match level, with better matches first.
* The strings are sorted in lexicographic order: a better match will always be less than
* a worse match when compared together.
public static String getMatchLevelSortedString(int matchLevel) {
// This works because the match levels are 0~99 (actually 0~30)
// Ideally this should use a number of digits equals to the 1og10 of the greater matchLevel
return String.format("%02d", MATCH_LEVEL_MAX - matchLevel);
* Find out whether a match level should be considered a match.
* This method takes a match level as returned by the #getMatchLevel method, and returns whether
* it should be considered a match in the usual sense with standard Locale functions.
* @param level the match level, as returned by getMatchLevel.
* @return whether this is a match or not.
public static boolean isMatch(int level) {
return LOCALE_MATCH <= level;
Normal file
Normal file
@ -0,0 +1,29 @@
* Copyright (C) 2011 The Android Open Source Project
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
package com.android.inputmethod.latin;
* Information container for a word list.
public class WordListInfo {
public final String mId;
public final String mLocale;
public WordListInfo(final String id, final String locale) {
mId = id;
mLocale = locale;
Reference in New Issue
Block a user