Merge "Enable Quadgram for personalized dicts."

2024-09-28 14:54:30 +01:00 · 2014-12-03 03:43:49 +00:00 · 2014-12-03 03:43:49 +00:00 · b0063751fc
commit b0063751fc
parent e79d504cf5 60021bbdc2
7 changed files with 13 additions and 10 deletions
--- a/common/src/com/android/inputmethod/latin/common/Constants.java
+++ b/common/src/com/android/inputmethod/latin/common/Constants.java
@ -179,7 +179,7 @@ public final class Constants {
    // (MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1)-gram is supported in Java side. Needs to modify
    // MAX_PREV_WORD_COUNT_FOR_N_GRAM in native/jni/src/defines.h for suggestions.
-    public static final int MAX_PREV_WORD_COUNT_FOR_N_GRAM = 2;
+    public static final int MAX_PREV_WORD_COUNT_FOR_N_GRAM = 3;
    // Key events coming any faster than this are long-presses.
    public static final int LONG_PRESS_MILLISECONDS = 200;
--- a/native/jni/src/defines.h
+++ b/native/jni/src/defines.h
@ -275,7 +275,7 @@ static inline void showStackTrace() {
 #define MAX_POINTER_COUNT_G 2
 // (MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1)-gram is supported.
-#define MAX_PREV_WORD_COUNT_FOR_N_GRAM 2
+#define MAX_PREV_WORD_COUNT_FOR_N_GRAM 3
 #define DISALLOW_DEFAULT_CONSTRUCTOR(TypeName) \
  TypeName() = delete
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp
@ -31,10 +31,11 @@ const char *const HeaderPolicy::IS_DECAYING_DICT_KEY = "USES_FORGETTING_CURVE";
 const char *const HeaderPolicy::DATE_KEY = "date";
 const char *const HeaderPolicy::LAST_DECAYED_TIME_KEY = "LAST_DECAYED_TIME";
 const char *const HeaderPolicy::NGRAM_COUNT_KEYS[] =
-        {"UNIGRAM_COUNT", "BIGRAM_COUNT", "TRIGRAM_COUNT"};
+        {"UNIGRAM_COUNT", "BIGRAM_COUNT", "TRIGRAM_COUNT", "QUADGRAM_COUNT"};
 const char *const HeaderPolicy::MAX_NGRAM_COUNT_KEYS[] =
-        {"MAX_UNIGRAM_ENTRY_COUNT", "MAX_BIGRAM_ENTRY_COUNT", "MAX_TRIGRAM_ENTRY_COUNT"};
+        {"MAX_UNIGRAM_ENTRY_COUNT", "MAX_BIGRAM_ENTRY_COUNT", "MAX_TRIGRAM_ENTRY_COUNT",
-const int HeaderPolicy::DEFAULT_MAX_NGRAM_COUNTS[] = {10000, 30000, 30000};
+                "MAX_QUADGRAM_ENTRY_COUNT"};
 const int HeaderPolicy::DEFAULT_MAX_NGRAM_COUNTS[] = {10000, 30000, 30000, 30000};
 const char *const HeaderPolicy::EXTENDED_REGION_SIZE_KEY = "EXTENDED_REGION_SIZE";
 // Historical info is information that is needed to support decaying such as timestamp, level and
 // count.
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dynamic_language_model_probability_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dynamic_language_model_probability_utils.cpp
@ -19,12 +19,13 @@
 namespace latinime {
 // Used to provide stable probabilities even if the user's input count is small.
-const int DynamicLanguageModelProbabilityUtils::ASSUMED_MIN_COUNTS[] = {8192, 2, 2};
+const int DynamicLanguageModelProbabilityUtils::ASSUMED_MIN_COUNTS[] = {8192, 2, 2, 1};
 // Encoded backoff weights.
-// Note that we give positive value for trigrams that means the weight is more than 1.
+// Note that we give positive values for trigrams and quadgrams that means the weight is more than
 // 1.
 // TODO: Apply backoff for main dictionaries and quit giving a positive backoff weight.
-const int DynamicLanguageModelProbabilityUtils::ENCODED_BACKOFF_WEIGHTS[] = {-32, 0, 8};
+const int DynamicLanguageModelProbabilityUtils::ENCODED_BACKOFF_WEIGHTS[] = {-32, -4, 2, 8};
 // This value is used to remove too old entries from the dictionary.
 const int DynamicLanguageModelProbabilityUtils::DURATION_TO_DISCARD_ENTRY_IN_SECONDS =
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dynamic_language_model_probability_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dynamic_language_model_probability_utils.h
@ -66,7 +66,7 @@ class DynamicLanguageModelProbabilityUtils {
 private:
    DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicLanguageModelProbabilityUtils);
-    static_assert(MAX_PREV_WORD_COUNT_FOR_N_GRAM <= 2, "Max supported Ngram is Trigram.");
+    static_assert(MAX_PREV_WORD_COUNT_FOR_N_GRAM <= 3, "Max supported Ngram is Quadgram.");
    static const int ASSUMED_MIN_COUNTS[];
    static const int ENCODED_BACKOFF_WEIGHTS[];
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/entry_counters.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/entry_counters.h
@ -27,7 +27,7 @@ namespace latinime {
 // Copyable but immutable
 class EntryCounts final {
 public:
-    EntryCounts() : mEntryCounts({{0, 0, 0}}) {}
+    EntryCounts() : mEntryCounts({{0, 0, 0, 0}}) {}
    explicit EntryCounts(const std::array<int, MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1> &counters)
            : mEntryCounts(counters) {}
--- a/native/jni/src/utils/ngram_utils.h
+++ b/native/jni/src/utils/ngram_utils.h
@ -25,6 +25,7 @@ enum class NgramType : int {
    Unigram = 0,
    Bigram = 1,
    Trigram = 2,
    Quadgram = 3,
    NotANgramType = -1,
 };