LM rescoring WIP

2024-09-28 14:54:30 +01:00 · 2024-04-28 21:55:32 -04:00 · 2024-04-28 21:55:32 -04:00 · 0b1ad01f1a
commit 0b1ad01f1a
parent 7d5b12feaf
4 changed files with 199 additions and 7 deletions
--- a/java/src/org/futo/inputmethod/latin/SuggestedWords.java
+++ b/java/src/org/futo/inputmethod/latin/SuggestedWords.java
@ -45,7 +45,7 @@ public class SuggestedWords {
    public static final int INPUT_STYLE_BEGINNING_OF_SENTENCE_PREDICTION = 7;

    // The maximum number of suggestions available.
-    public static final int MAX_SUGGESTIONS = 18;
+    public static final int MAX_SUGGESTIONS = 40;

    private static final ArrayList<SuggestedWordInfo> EMPTY_WORD_INFO_LIST = new ArrayList<>(0);
    @Nonnull
--- a/java/src/org/futo/inputmethod/latin/xlm/LanguageModel.kt
+++ b/java/src/org/futo/inputmethod/latin/xlm/LanguageModel.kt
@ -8,10 +8,10 @@ import kotlinx.coroutines.newSingleThreadContext
 import kotlinx.coroutines.withContext
 import org.futo.inputmethod.keyboard.KeyDetector
 import org.futo.inputmethod.latin.NgramContext
+import org.futo.inputmethod.latin.SuggestedWords
 import org.futo.inputmethod.latin.SuggestedWords.SuggestedWordInfo
 import org.futo.inputmethod.latin.common.ComposedData
 import org.futo.inputmethod.latin.settings.SettingsValuesForSuggestion
-import org.futo.inputmethod.latin.xlm.BatchInputConverter.convertToString
 import java.util.Arrays
 import java.util.Locale

@ -53,7 +53,7 @@ class LanguageModel(
        val yCoords: IntArray
        var inputMode = 0
        if (isGesture) {
-            Log.w("LanguageModel", "Using experimental gesture support")
+            /*Log.w("LanguageModel", "Using experimental gesture support")
            inputMode = 1
            val xCoordsList = mutableListOf<Int>()
            val yCoordsList = mutableListOf<Int>()
@ -69,7 +69,16 @@ class LanguageModel(
            xCoords = IntArray(xCoordsList.size)
            yCoords = IntArray(yCoordsList.size)
            for (i in xCoordsList.indices) xCoords[i] = xCoordsList[i]
-            for (i in yCoordsList.indices) yCoords[i] = yCoordsList[i]
+            for (i in yCoordsList.indices) yCoords[i] = yCoordsList[i]*/
+
+            partialWord = ""
+
+            xCoords = IntArray(composedData.mInputPointers.pointerSize)
+            yCoords = IntArray(composedData.mInputPointers.pointerSize)
+            val xCoordsI = composedData.mInputPointers.xCoordinates
+            val yCoordsI = composedData.mInputPointers.yCoordinates
+            for (i in 0 until composedData.mInputPointers.pointerSize) xCoords[i] = xCoordsI[i]
+            for (i in 0 until composedData.mInputPointers.pointerSize) yCoords[i] = yCoordsI[i]
        } else {
            xCoords = IntArray(composedData.mInputPointers.pointerSize)
            yCoords = IntArray(composedData.mInputPointers.pointerSize)
@ -176,6 +185,57 @@ class LanguageModel(
        return context
    }

+    suspend fun rescoreSuggestions(
+        suggestedWords: SuggestedWords,
+        composedData: ComposedData,
+        ngramContext: NgramContext,
+        keyDetector: KeyDetector,
+        personalDictionary: List<String>,
+    ): List<SuggestedWordInfo>? = withContext(LanguageModelScope) {
+        if (mNativeState == 0L) {
+            loadModel()
+            Log.d("LanguageModel", "Exiting because mNativeState == 0")
+            return@withContext null
+        }
+
+        var composeInfo = getComposeInfo(composedData, keyDetector)
+        var context = getContext(composeInfo, ngramContext)
+
+        composeInfo = safeguardComposeInfo(composeInfo)
+        context = safeguardContext(context)
+        context = addPersonalDictionary(context, personalDictionary)
+
+        val wordStrings = suggestedWords.mSuggestedWordInfoList.map { it.mWord }.toTypedArray()
+        val wordScoresInput = suggestedWords.mSuggestedWordInfoList.map { it.mScore }.toTypedArray().toIntArray()
+        val wordScoresOutput = IntArray(wordScoresInput.size) { 0 }
+
+        rescoreSuggestionsNative(
+            mNativeState,
+            context,
+
+            wordStrings,
+            wordScoresInput,
+
+            wordScoresOutput
+        )
+
+        return@withContext suggestedWords.mSuggestedWordInfoList.mapIndexed { index, suggestedWordInfo ->
+            Log.i("LanguageModel", "Suggestion [${suggestedWordInfo.word}] reweighted, from ${suggestedWordInfo.mScore} to ${wordScoresOutput[index]}")
+            SuggestedWordInfo(
+                suggestedWordInfo.word,
+                suggestedWordInfo.mPrevWordsContext,
+
+                wordScoresOutput[index],
+                suggestedWordInfo.mKindAndFlags,
+
+                suggestedWordInfo.mSourceDict,
+                suggestedWordInfo.mIndexOfTouchPointOfSecondWord,
+
+                suggestedWordInfo.mAutoCommitFirstWordConfidence
+            )
+        }.sortedByDescending { it.mScore }
+    }
+
    suspend fun getSuggestions(
        composedData: ComposedData,
        ngramContext: NgramContext,
@ -320,4 +380,14 @@ class LanguageModel(
        outStrings: Array<String?>,
        outProbs: FloatArray
    )
+
+    private external fun rescoreSuggestionsNative(
+        state: Long,
+        context: String,
+
+        inSuggestedWords: Array<String>,
+        inSuggestedScores: IntArray,
+
+        outSuggestedScores: IntArray
+    )
 }
--- a/java/src/org/futo/inputmethod/latin/xlm/LanguageModelFacilitator.kt
+++ b/java/src/org/futo/inputmethod/latin/xlm/LanguageModelFacilitator.kt
@ -202,7 +202,7 @@ public class LanguageModelFacilitator(

        val autocorrectThreshold = context.getSetting(AutocorrectThresholdSetting)

-        return languageModel!!.getSuggestions(
+        return languageModel?.getSuggestions(
            values.composedData,
            values.ngramContext,
            keyboardSwitcher.mainKeyboardView.mKeyDetector,
@ -250,10 +250,42 @@ public class LanguageModelFacilitator(
            if(lmSuggestions == null) {
                holder.get(null, Constants.GET_SUGGESTED_WORDS_TIMEOUT.toLong())?.let { results ->
                    job.cancel()
-                    inputLogic.mSuggestionStripViewAccessor.showSuggestionStrip(results)
+
+                    val useRescoring = false
+
+                    val finalResults = if(useRescoring && values.composedData.mIsBatchMode) {
+                        val rescored = languageModel?.rescoreSuggestions(
+                            results,
+                            values.composedData,
+                            values.ngramContext,
+                            keyboardSwitcher.mainKeyboardView.mKeyDetector,
+                            userDictionary.getWords().map { it.word }
+                        )
+
+                        if(rescored != null) {
+                            SuggestedWords(
+                                ArrayList(rescored),
+                                // TODO: These should ideally not be null/false
+                                null,
+                                null,
+                                false,
+                                false,
+                                false,
+                                results.mInputStyle,
+                                results.mSequenceNumber
+                            )
+                            // TODO: We need the swapping rejection thing, the rescored array is resorted without the swapping
+                        } else {
+                            results
+                        }
+                    } else {
+                        results
+                    }
+
+                    inputLogic.mSuggestionStripViewAccessor.showSuggestionStrip(finalResults)

                    if(values.composedData.mIsBatchMode) {
-                        inputLogic.showBatchSuggestions(results, values.inputStyle == SuggestedWords.INPUT_STYLE_TAIL_BATCH);
+                        inputLogic.showBatchSuggestions(finalResults, values.inputStyle == SuggestedWords.INPUT_STYLE_TAIL_BATCH);
                    }

                    sequenceIdFinishedFlow.emit(values.sequenceId)
--- a/native/jni/org_futo_inputmethod_latin_xlm_LanguageModel.cpp
+++ b/native/jni/org_futo_inputmethod_latin_xlm_LanguageModel.cpp
@ -841,6 +841,16 @@ struct LanguageModelState {
    }
 };

+struct SuggestionItemToRescore {
+    int index;
+
+    int originalScore;
+    float transformedScore;
+
+    std::string word;
+    token_sequence tokens;
+};
+
 namespace latinime {
    static jlong xlm_LanguageModel_open(JNIEnv *env, jclass clazz, jstring modelDir) {
        AKLOGI("open LM");
@ -871,6 +881,81 @@ namespace latinime {
        delete state;
    }

+    // (JLjava/lang/String;[Ljava/lang/String;[I[I)V
+    // TODO: This will also need caching to not make things extremely slow by recomputing every time
+    static void xlm_LanguageModel_rescoreSuggestions(JNIEnv *env, jclass clazz,
+        jlong dict,
+        jstring context,
+        jobjectArray inWords,
+        jintArray inScores,
+
+        jintArray outScores
+    ) {
+        LanguageModelState *state = reinterpret_cast<LanguageModelState *>(dict);
+
+        std::string contextString = jstring2string(env, context);
+
+        size_t inputSize = env->GetArrayLength(inScores);
+        int scores[inputSize];
+        env->GetIntArrayRegion(inScores, 0, inputSize, scores);
+
+        float maxScore = -INFINITY;
+        float minScore = INFINITY;
+        for(int score : scores) {
+            if(score > maxScore) maxScore = score;
+            if(score < minScore) minScore = score;
+        }
+
+        minScore -= (maxScore - minScore) * 0.33f;
+
+        std::vector<SuggestionItemToRescore> words;
+        size_t numWords = env->GetArrayLength(inWords);
+
+        for(size_t i=0; i<numWords; i++) {
+            jstring jstr = static_cast<jstring>(env->GetObjectArrayElement(inWords, i));
+            SuggestionItemToRescore item = {
+                (int) i,
+                scores[i],
+                ((float)scores[i] - minScore) / (maxScore - minScore),
+                jstring2string(env, jstr),
+                {}
+            };
+
+            item.tokens = state->model->tokenize(trim(item.word) + " ");
+            words.push_back(item);
+        }
+
+
+        // TODO: Transform here
+        llama_context *ctx = ((LlamaAdapter *) state->model->adapter)->context;
+        size_t n_vocab = llama_n_vocab(llama_get_model(ctx));
+
+        token_sequence next_context = state->model->tokenize(trim(contextString) + " ");
+        next_context.insert(next_context.begin(), 1); // BOS
+
+        auto decoding_result = state->DecodePromptAndMixes(next_context, { });
+        float *logits = llama_get_logits_ith(ctx, decoding_result.logits_head);
+
+        softmax(logits, n_vocab);
+
+        AKLOGI("Iter");
+        for(auto &entry : words) {
+            float pseudoScore = logits[entry.tokens[0]] / (float)entry.tokens.size();
+            AKLOGI("Word [%s], %d tokens, prob[0] = %.8f", entry.word.c_str(), entry.tokens.size(), pseudoScore);
+            entry.transformedScore *= pseudoScore * 1000.0f;
+        }
+        // TODO: Transform here
+
+        // Output scores
+        jint *outArray = env->GetIntArrayElements(outScores, nullptr);
+
+        for(const auto &entry : words) {
+            outArray[entry.index] = entry.transformedScore * (maxScore - minScore) + minScore;
+        }
+
+        env->ReleaseIntArrayElements(outScores, outArray, 0);
+    }
+
    static void xlm_LanguageModel_getSuggestions(JNIEnv *env, jclass clazz,
         // inputs
         jlong dict,
@ -1103,6 +1188,11 @@ namespace latinime {
                    const_cast<char *>("getSuggestionsNative"),
                    const_cast<char *>("(JJLjava/lang/String;Ljava/lang/String;I[I[IF[Ljava/lang/String;[Ljava/lang/String;[F)V"),
                    reinterpret_cast<void *>(xlm_LanguageModel_getSuggestions)
+            },
+            {
+                    const_cast<char *>("rescoreSuggestionsNative"),
+                    const_cast<char *>("(JLjava/lang/String;[Ljava/lang/String;[I[I)V"),
+                    reinterpret_cast<void *>(xlm_LanguageModel_rescoreSuggestions)
            }
    };