Adjust space probability and mustNotAutocorrect

This commit is contained in:
Aleksandras Kostarevas 2023-10-13 18:44:38 +03:00
parent c34a411989
commit 92480fd460
2 changed files with 6 additions and 3 deletions

View File

@ -190,12 +190,14 @@ public class LanguageModel extends Dictionary {
if(!partialWord.isEmpty() && partialWord.trim().equalsIgnoreCase(outStrings[i].trim())) { if(!partialWord.isEmpty() && partialWord.trim().equalsIgnoreCase(outStrings[i].trim())) {
// If this prediction matches the partial word ignoring case, and this is the top // If this prediction matches the partial word ignoring case, and this is the top
// prediction, then we can break. // prediction, then we can break.
// Otherwise, we cannot autocorrect to the top prediction, as it does not match the
// partial word but one of the top ones does.
if(i == 0) { if(i == 0) {
break; break;
} else { } else {
mustNotAutocorrect = true; // Otherwise, we cannot autocorrect to the top prediction unless the model is
// super confident about this
if(outProbabilities[i] * 8.0f >= outProbabilities[0]) {
mustNotAutocorrect = true;
}
} }
} }
} }

View File

@ -126,6 +126,7 @@ struct LanguageModelState {
logits[specialTokens.XBU] = -999.0f; logits[specialTokens.XBU] = -999.0f;
for(int x : specialTokens.SAMPLING_BAD_TOKENS) { for(int x : specialTokens.SAMPLING_BAD_TOKENS) {
logits[specialTokens.SPACE] += std::max(0.0f, logits[x]);
logits[x] = -999.0f; logits[x] = -999.0f;
} }