mirror of
https://gitlab.futo.org/keyboard/latinime.git
synced 2024-09-28 14:54:30 +01:00
Merge "Enable Quadgram for personalized dicts."
This commit is contained in:
commit
b0063751fc
@ -179,7 +179,7 @@ public final class Constants {
|
|||||||
|
|
||||||
// (MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1)-gram is supported in Java side. Needs to modify
|
// (MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1)-gram is supported in Java side. Needs to modify
|
||||||
// MAX_PREV_WORD_COUNT_FOR_N_GRAM in native/jni/src/defines.h for suggestions.
|
// MAX_PREV_WORD_COUNT_FOR_N_GRAM in native/jni/src/defines.h for suggestions.
|
||||||
public static final int MAX_PREV_WORD_COUNT_FOR_N_GRAM = 2;
|
public static final int MAX_PREV_WORD_COUNT_FOR_N_GRAM = 3;
|
||||||
|
|
||||||
// Key events coming any faster than this are long-presses.
|
// Key events coming any faster than this are long-presses.
|
||||||
public static final int LONG_PRESS_MILLISECONDS = 200;
|
public static final int LONG_PRESS_MILLISECONDS = 200;
|
||||||
|
@ -275,7 +275,7 @@ static inline void showStackTrace() {
|
|||||||
#define MAX_POINTER_COUNT_G 2
|
#define MAX_POINTER_COUNT_G 2
|
||||||
|
|
||||||
// (MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1)-gram is supported.
|
// (MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1)-gram is supported.
|
||||||
#define MAX_PREV_WORD_COUNT_FOR_N_GRAM 2
|
#define MAX_PREV_WORD_COUNT_FOR_N_GRAM 3
|
||||||
|
|
||||||
#define DISALLOW_DEFAULT_CONSTRUCTOR(TypeName) \
|
#define DISALLOW_DEFAULT_CONSTRUCTOR(TypeName) \
|
||||||
TypeName() = delete
|
TypeName() = delete
|
||||||
|
@ -31,10 +31,11 @@ const char *const HeaderPolicy::IS_DECAYING_DICT_KEY = "USES_FORGETTING_CURVE";
|
|||||||
const char *const HeaderPolicy::DATE_KEY = "date";
|
const char *const HeaderPolicy::DATE_KEY = "date";
|
||||||
const char *const HeaderPolicy::LAST_DECAYED_TIME_KEY = "LAST_DECAYED_TIME";
|
const char *const HeaderPolicy::LAST_DECAYED_TIME_KEY = "LAST_DECAYED_TIME";
|
||||||
const char *const HeaderPolicy::NGRAM_COUNT_KEYS[] =
|
const char *const HeaderPolicy::NGRAM_COUNT_KEYS[] =
|
||||||
{"UNIGRAM_COUNT", "BIGRAM_COUNT", "TRIGRAM_COUNT"};
|
{"UNIGRAM_COUNT", "BIGRAM_COUNT", "TRIGRAM_COUNT", "QUADGRAM_COUNT"};
|
||||||
const char *const HeaderPolicy::MAX_NGRAM_COUNT_KEYS[] =
|
const char *const HeaderPolicy::MAX_NGRAM_COUNT_KEYS[] =
|
||||||
{"MAX_UNIGRAM_ENTRY_COUNT", "MAX_BIGRAM_ENTRY_COUNT", "MAX_TRIGRAM_ENTRY_COUNT"};
|
{"MAX_UNIGRAM_ENTRY_COUNT", "MAX_BIGRAM_ENTRY_COUNT", "MAX_TRIGRAM_ENTRY_COUNT",
|
||||||
const int HeaderPolicy::DEFAULT_MAX_NGRAM_COUNTS[] = {10000, 30000, 30000};
|
"MAX_QUADGRAM_ENTRY_COUNT"};
|
||||||
|
const int HeaderPolicy::DEFAULT_MAX_NGRAM_COUNTS[] = {10000, 30000, 30000, 30000};
|
||||||
const char *const HeaderPolicy::EXTENDED_REGION_SIZE_KEY = "EXTENDED_REGION_SIZE";
|
const char *const HeaderPolicy::EXTENDED_REGION_SIZE_KEY = "EXTENDED_REGION_SIZE";
|
||||||
// Historical info is information that is needed to support decaying such as timestamp, level and
|
// Historical info is information that is needed to support decaying such as timestamp, level and
|
||||||
// count.
|
// count.
|
||||||
|
@ -19,12 +19,13 @@
|
|||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
// Used to provide stable probabilities even if the user's input count is small.
|
// Used to provide stable probabilities even if the user's input count is small.
|
||||||
const int DynamicLanguageModelProbabilityUtils::ASSUMED_MIN_COUNTS[] = {8192, 2, 2};
|
const int DynamicLanguageModelProbabilityUtils::ASSUMED_MIN_COUNTS[] = {8192, 2, 2, 1};
|
||||||
|
|
||||||
// Encoded backoff weights.
|
// Encoded backoff weights.
|
||||||
// Note that we give positive value for trigrams that means the weight is more than 1.
|
// Note that we give positive values for trigrams and quadgrams that means the weight is more than
|
||||||
|
// 1.
|
||||||
// TODO: Apply backoff for main dictionaries and quit giving a positive backoff weight.
|
// TODO: Apply backoff for main dictionaries and quit giving a positive backoff weight.
|
||||||
const int DynamicLanguageModelProbabilityUtils::ENCODED_BACKOFF_WEIGHTS[] = {-32, 0, 8};
|
const int DynamicLanguageModelProbabilityUtils::ENCODED_BACKOFF_WEIGHTS[] = {-32, -4, 2, 8};
|
||||||
|
|
||||||
// This value is used to remove too old entries from the dictionary.
|
// This value is used to remove too old entries from the dictionary.
|
||||||
const int DynamicLanguageModelProbabilityUtils::DURATION_TO_DISCARD_ENTRY_IN_SECONDS =
|
const int DynamicLanguageModelProbabilityUtils::DURATION_TO_DISCARD_ENTRY_IN_SECONDS =
|
||||||
|
@ -66,7 +66,7 @@ class DynamicLanguageModelProbabilityUtils {
|
|||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicLanguageModelProbabilityUtils);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicLanguageModelProbabilityUtils);
|
||||||
|
|
||||||
static_assert(MAX_PREV_WORD_COUNT_FOR_N_GRAM <= 2, "Max supported Ngram is Trigram.");
|
static_assert(MAX_PREV_WORD_COUNT_FOR_N_GRAM <= 3, "Max supported Ngram is Quadgram.");
|
||||||
|
|
||||||
static const int ASSUMED_MIN_COUNTS[];
|
static const int ASSUMED_MIN_COUNTS[];
|
||||||
static const int ENCODED_BACKOFF_WEIGHTS[];
|
static const int ENCODED_BACKOFF_WEIGHTS[];
|
||||||
|
@ -27,7 +27,7 @@ namespace latinime {
|
|||||||
// Copyable but immutable
|
// Copyable but immutable
|
||||||
class EntryCounts final {
|
class EntryCounts final {
|
||||||
public:
|
public:
|
||||||
EntryCounts() : mEntryCounts({{0, 0, 0}}) {}
|
EntryCounts() : mEntryCounts({{0, 0, 0, 0}}) {}
|
||||||
|
|
||||||
explicit EntryCounts(const std::array<int, MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1> &counters)
|
explicit EntryCounts(const std::array<int, MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1> &counters)
|
||||||
: mEntryCounts(counters) {}
|
: mEntryCounts(counters) {}
|
||||||
|
@ -25,6 +25,7 @@ enum class NgramType : int {
|
|||||||
Unigram = 0,
|
Unigram = 0,
|
||||||
Bigram = 1,
|
Bigram = 1,
|
||||||
Trigram = 2,
|
Trigram = 2,
|
||||||
|
Quadgram = 3,
|
||||||
NotANgramType = -1,
|
NotANgramType = -1,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user