Merge "Enable Quadgram for personalized dicts."

This commit is contained in:
Keisuke Kuroyanagi 2014-12-03 03:43:49 +00:00 committed by Android (Google) Code Review
commit b0063751fc
7 changed files with 13 additions and 10 deletions

View File

@ -179,7 +179,7 @@ public final class Constants {
// (MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1)-gram is supported in Java side. Needs to modify // (MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1)-gram is supported in Java side. Needs to modify
// MAX_PREV_WORD_COUNT_FOR_N_GRAM in native/jni/src/defines.h for suggestions. // MAX_PREV_WORD_COUNT_FOR_N_GRAM in native/jni/src/defines.h for suggestions.
public static final int MAX_PREV_WORD_COUNT_FOR_N_GRAM = 2; public static final int MAX_PREV_WORD_COUNT_FOR_N_GRAM = 3;
// Key events coming any faster than this are long-presses. // Key events coming any faster than this are long-presses.
public static final int LONG_PRESS_MILLISECONDS = 200; public static final int LONG_PRESS_MILLISECONDS = 200;

View File

@ -275,7 +275,7 @@ static inline void showStackTrace() {
#define MAX_POINTER_COUNT_G 2 #define MAX_POINTER_COUNT_G 2
// (MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1)-gram is supported. // (MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1)-gram is supported.
#define MAX_PREV_WORD_COUNT_FOR_N_GRAM 2 #define MAX_PREV_WORD_COUNT_FOR_N_GRAM 3
#define DISALLOW_DEFAULT_CONSTRUCTOR(TypeName) \ #define DISALLOW_DEFAULT_CONSTRUCTOR(TypeName) \
TypeName() = delete TypeName() = delete

View File

@ -31,10 +31,11 @@ const char *const HeaderPolicy::IS_DECAYING_DICT_KEY = "USES_FORGETTING_CURVE";
const char *const HeaderPolicy::DATE_KEY = "date"; const char *const HeaderPolicy::DATE_KEY = "date";
const char *const HeaderPolicy::LAST_DECAYED_TIME_KEY = "LAST_DECAYED_TIME"; const char *const HeaderPolicy::LAST_DECAYED_TIME_KEY = "LAST_DECAYED_TIME";
const char *const HeaderPolicy::NGRAM_COUNT_KEYS[] = const char *const HeaderPolicy::NGRAM_COUNT_KEYS[] =
{"UNIGRAM_COUNT", "BIGRAM_COUNT", "TRIGRAM_COUNT"}; {"UNIGRAM_COUNT", "BIGRAM_COUNT", "TRIGRAM_COUNT", "QUADGRAM_COUNT"};
const char *const HeaderPolicy::MAX_NGRAM_COUNT_KEYS[] = const char *const HeaderPolicy::MAX_NGRAM_COUNT_KEYS[] =
{"MAX_UNIGRAM_ENTRY_COUNT", "MAX_BIGRAM_ENTRY_COUNT", "MAX_TRIGRAM_ENTRY_COUNT"}; {"MAX_UNIGRAM_ENTRY_COUNT", "MAX_BIGRAM_ENTRY_COUNT", "MAX_TRIGRAM_ENTRY_COUNT",
const int HeaderPolicy::DEFAULT_MAX_NGRAM_COUNTS[] = {10000, 30000, 30000}; "MAX_QUADGRAM_ENTRY_COUNT"};
const int HeaderPolicy::DEFAULT_MAX_NGRAM_COUNTS[] = {10000, 30000, 30000, 30000};
const char *const HeaderPolicy::EXTENDED_REGION_SIZE_KEY = "EXTENDED_REGION_SIZE"; const char *const HeaderPolicy::EXTENDED_REGION_SIZE_KEY = "EXTENDED_REGION_SIZE";
// Historical info is information that is needed to support decaying such as timestamp, level and // Historical info is information that is needed to support decaying such as timestamp, level and
// count. // count.

View File

@ -19,12 +19,13 @@
namespace latinime { namespace latinime {
// Used to provide stable probabilities even if the user's input count is small. // Used to provide stable probabilities even if the user's input count is small.
const int DynamicLanguageModelProbabilityUtils::ASSUMED_MIN_COUNTS[] = {8192, 2, 2}; const int DynamicLanguageModelProbabilityUtils::ASSUMED_MIN_COUNTS[] = {8192, 2, 2, 1};
// Encoded backoff weights. // Encoded backoff weights.
// Note that we give positive value for trigrams that means the weight is more than 1. // Note that we give positive values for trigrams and quadgrams that means the weight is more than
// 1.
// TODO: Apply backoff for main dictionaries and quit giving a positive backoff weight. // TODO: Apply backoff for main dictionaries and quit giving a positive backoff weight.
const int DynamicLanguageModelProbabilityUtils::ENCODED_BACKOFF_WEIGHTS[] = {-32, 0, 8}; const int DynamicLanguageModelProbabilityUtils::ENCODED_BACKOFF_WEIGHTS[] = {-32, -4, 2, 8};
// This value is used to remove too old entries from the dictionary. // This value is used to remove too old entries from the dictionary.
const int DynamicLanguageModelProbabilityUtils::DURATION_TO_DISCARD_ENTRY_IN_SECONDS = const int DynamicLanguageModelProbabilityUtils::DURATION_TO_DISCARD_ENTRY_IN_SECONDS =

View File

@ -66,7 +66,7 @@ class DynamicLanguageModelProbabilityUtils {
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicLanguageModelProbabilityUtils); DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicLanguageModelProbabilityUtils);
static_assert(MAX_PREV_WORD_COUNT_FOR_N_GRAM <= 2, "Max supported Ngram is Trigram."); static_assert(MAX_PREV_WORD_COUNT_FOR_N_GRAM <= 3, "Max supported Ngram is Quadgram.");
static const int ASSUMED_MIN_COUNTS[]; static const int ASSUMED_MIN_COUNTS[];
static const int ENCODED_BACKOFF_WEIGHTS[]; static const int ENCODED_BACKOFF_WEIGHTS[];

View File

@ -27,7 +27,7 @@ namespace latinime {
// Copyable but immutable // Copyable but immutable
class EntryCounts final { class EntryCounts final {
public: public:
EntryCounts() : mEntryCounts({{0, 0, 0}}) {} EntryCounts() : mEntryCounts({{0, 0, 0, 0}}) {}
explicit EntryCounts(const std::array<int, MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1> &counters) explicit EntryCounts(const std::array<int, MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1> &counters)
: mEntryCounts(counters) {} : mEntryCounts(counters) {}

View File

@ -25,6 +25,7 @@ enum class NgramType : int {
Unigram = 0, Unigram = 0,
Bigram = 1, Bigram = 1,
Trigram = 2, Trigram = 2,
Quadgram = 3,
NotANgramType = -1, NotANgramType = -1,
}; };