From 7f656bb622c8acfbae26417e9417d30703e28b37 Mon Sep 17 00:00:00 2001 From: Aleksandras Kostarevas Date: Fri, 1 Sep 2023 08:51:42 +0300 Subject: [PATCH] Load voice input settings instead of hardcoding --- .../futo/inputmethod/latin/uix/Settings.kt | 17 ++ .../latin/uix/VoiceInputSettingKeys.kt | 45 ++++ .../latin/uix/actions/VoiceInputAction.kt | 237 ++++++++++++------ .../futo/voiceinput/shared/AudioRecognizer.kt | 12 +- .../futo/voiceinput/shared/RecognizerView.kt | 27 +- .../futo/voiceinput/shared/types/Tokens.kt | 4 - .../futo/voiceinput/shared/util/ArrayUtils.kt | 4 - .../shared/util/AudioFeatureExtraction.kt | 10 +- .../futo/voiceinput/shared/util/Settings.kt | 58 ----- .../voiceinput/shared/whisper/Tokenizer.kt | 22 +- 10 files changed, 246 insertions(+), 190 deletions(-) create mode 100644 java/src/org/futo/inputmethod/latin/uix/VoiceInputSettingKeys.kt delete mode 100644 voiceinput-shared/src/main/java/org/futo/voiceinput/shared/util/Settings.kt diff --git a/java/src/org/futo/inputmethod/latin/uix/Settings.kt b/java/src/org/futo/inputmethod/latin/uix/Settings.kt index 90efce28e..a91cba25f 100644 --- a/java/src/org/futo/inputmethod/latin/uix/Settings.kt +++ b/java/src/org/futo/inputmethod/latin/uix/Settings.kt @@ -69,5 +69,22 @@ fun LifecycleOwner.deferSetSetting(key: Preferences.Key, value: T): Job { } } +data class SettingsKey( + val key: Preferences.Key, + val default: T +) + +suspend fun Context.getSetting(key: SettingsKey): T { + val valueFlow: Flow = + this.dataStore.data.map { preferences -> preferences[key.key] ?: key.default }.take(1) + + return valueFlow.first() +} + +suspend fun Context.setSetting(key: SettingsKey, value: T) { + this.dataStore.edit { preferences -> + preferences[key.key] = value + } +} val THEME_KEY = stringPreferencesKey("activeThemeOption") \ No newline at end of file diff --git a/java/src/org/futo/inputmethod/latin/uix/VoiceInputSettingKeys.kt b/java/src/org/futo/inputmethod/latin/uix/VoiceInputSettingKeys.kt new file mode 100644 index 000000000..facbc03e9 --- /dev/null +++ b/java/src/org/futo/inputmethod/latin/uix/VoiceInputSettingKeys.kt @@ -0,0 +1,45 @@ +package org.futo.inputmethod.latin.uix + +import androidx.datastore.preferences.core.booleanPreferencesKey +import androidx.datastore.preferences.core.intPreferencesKey +import androidx.datastore.preferences.core.stringSetPreferencesKey + +val ENABLE_SOUND = SettingsKey( + key = booleanPreferencesKey("enable_sounds"), + default = true +) + +val VERBOSE_PROGRESS = SettingsKey( + key = booleanPreferencesKey("verbose_progress"), + default = false +) + +val ENABLE_ENGLISH = SettingsKey( + key = booleanPreferencesKey("enable_english"), + default = true +) + +val ENABLE_MULTILINGUAL = SettingsKey( + key = booleanPreferencesKey("enable_multilingual"), + default = false +) + +val DISALLOW_SYMBOLS = SettingsKey( + key = booleanPreferencesKey("disallow_symbols"), + default = true +) + +val ENGLISH_MODEL_INDEX = SettingsKey( + key = intPreferencesKey("english_model_index"), + default = 0 +) + +val MULTILINGUAL_MODEL_INDEX = SettingsKey( + key = intPreferencesKey("multilingual_model_index"), + default = 1 +) + +val LANGUAGE_TOGGLES = SettingsKey( + key = stringSetPreferencesKey("enabled_languages"), + default = setOf() +) \ No newline at end of file diff --git a/java/src/org/futo/inputmethod/latin/uix/actions/VoiceInputAction.kt b/java/src/org/futo/inputmethod/latin/uix/actions/VoiceInputAction.kt index ce1380822..45ca031eb 100644 --- a/java/src/org/futo/inputmethod/latin/uix/actions/VoiceInputAction.kt +++ b/java/src/org/futo/inputmethod/latin/uix/actions/VoiceInputAction.kt @@ -3,8 +3,6 @@ package org.futo.inputmethod.latin.uix.actions import androidx.compose.foundation.clickable import androidx.compose.foundation.interaction.MutableInteractionSource import androidx.compose.foundation.layout.Box -import androidx.compose.foundation.layout.Column -import androidx.compose.foundation.layout.ColumnScope import androidx.compose.foundation.layout.fillMaxSize import androidx.compose.runtime.Composable import androidx.compose.runtime.MutableState @@ -13,17 +11,40 @@ import androidx.compose.runtime.remember import androidx.compose.ui.Alignment import androidx.compose.ui.Modifier import androidx.compose.ui.res.stringResource +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.async +import kotlinx.coroutines.coroutineScope +import kotlinx.coroutines.launch +import kotlinx.coroutines.withContext +import kotlinx.coroutines.yield import org.futo.inputmethod.latin.R import org.futo.inputmethod.latin.uix.Action import org.futo.inputmethod.latin.uix.ActionInputTransaction import org.futo.inputmethod.latin.uix.ActionWindow +import org.futo.inputmethod.latin.uix.DISALLOW_SYMBOLS +import org.futo.inputmethod.latin.uix.ENABLE_ENGLISH +import org.futo.inputmethod.latin.uix.ENABLE_MULTILINGUAL +import org.futo.inputmethod.latin.uix.ENABLE_SOUND +import org.futo.inputmethod.latin.uix.ENGLISH_MODEL_INDEX import org.futo.inputmethod.latin.uix.KeyboardManagerForAction +import org.futo.inputmethod.latin.uix.LANGUAGE_TOGGLES +import org.futo.inputmethod.latin.uix.MULTILINGUAL_MODEL_INDEX import org.futo.inputmethod.latin.uix.PersistentActionState +import org.futo.inputmethod.latin.uix.VERBOSE_PROGRESS +import org.futo.inputmethod.latin.uix.getSetting +import org.futo.voiceinput.shared.ENGLISH_MODELS +import org.futo.voiceinput.shared.MULTILINGUAL_MODELS +import org.futo.voiceinput.shared.ModelDoesNotExistException import org.futo.voiceinput.shared.RecognizerView import org.futo.voiceinput.shared.RecognizerViewListener import org.futo.voiceinput.shared.RecognizerViewSettings import org.futo.voiceinput.shared.SoundPlayer +import org.futo.voiceinput.shared.types.Language +import org.futo.voiceinput.shared.types.ModelLoader +import org.futo.voiceinput.shared.types.getLanguageFromWhisperString +import org.futo.voiceinput.shared.whisper.DecodingConfiguration import org.futo.voiceinput.shared.whisper.ModelManager +import org.futo.voiceinput.shared.whisper.MultiModelRunConfiguration val SystemVoiceInputAction = Action( icon = R.drawable.mic_fill, @@ -44,93 +65,157 @@ class VoiceInputPersistentState(val manager: KeyboardManagerForAction) : Persist modelManager.cleanUp() } } -val VoiceInputAction = Action( - icon = R.drawable.mic_fill, - name = R.string.voice_input_action_title, - simplePressImpl = null, - persistentState = { VoiceInputPersistentState(it) }, - windowImpl = { manager, persistentState -> - val state = persistentState as VoiceInputPersistentState - object : ActionWindow, RecognizerViewListener { - private val recognizerView = RecognizerView( +private class VoiceInputActionWindow( + val manager: KeyboardManagerForAction, val state: VoiceInputPersistentState +) : ActionWindow, RecognizerViewListener { + val context = manager.getContext() + + private var shouldPlaySounds: Boolean = false + private suspend fun loadSettings(): RecognizerViewSettings = coroutineScope { + val enableSound = async { context.getSetting(ENABLE_SOUND) } + val verboseFeedback = async { context.getSetting(VERBOSE_PROGRESS) } + val disallowSymbols = async { context.getSetting(DISALLOW_SYMBOLS) } + val enableEnglish = async { context.getSetting(ENABLE_ENGLISH) } + val englishModelIdx = async { context.getSetting(ENGLISH_MODEL_INDEX) } + val enableMultilingual = async { context.getSetting(ENABLE_MULTILINGUAL) } + val multilingualModelIdx = async { context.getSetting(MULTILINGUAL_MODEL_INDEX) } + val allowedLanguages = async { + context.getSetting(LANGUAGE_TOGGLES).mapNotNull { getLanguageFromWhisperString(it) } + .toSet() + } + + val primaryModel = if (enableMultilingual.await()) { + MULTILINGUAL_MODELS[multilingualModelIdx.await()] + } else { + ENGLISH_MODELS[englishModelIdx.await()] + } + + val languageSpecificModels = mutableMapOf() + if (enableEnglish.await()) { + languageSpecificModels[Language.English] = ENGLISH_MODELS[englishModelIdx.await()] + } + + shouldPlaySounds = enableSound.await() + + return@coroutineScope RecognizerViewSettings( + shouldShowInlinePartialResult = false, + shouldShowVerboseFeedback = verboseFeedback.await(), + modelRunConfiguration = MultiModelRunConfiguration( + primaryModel = primaryModel, languageSpecificModels = languageSpecificModels + ), + decodingConfiguration = DecodingConfiguration( + languages = allowedLanguages.await(), suppressSymbols = disallowSymbols.await() + ) + ) + } + + private var recognizerView: MutableState = mutableStateOf(null) + + private val initJob = manager.getLifecycleScope().launch { + yield() + val settings = withContext(Dispatchers.IO) { + loadSettings() + } + + yield() + val recognizerView = try { + RecognizerView( context = manager.getContext(), - listener = this, - settings = RecognizerViewSettings( - shouldShowInlinePartialResult = false, - shouldShowVerboseFeedback = true - ), + listener = this@VoiceInputActionWindow, + settings = settings, lifecycleScope = manager.getLifecycleScope(), modelManager = state.modelManager ) + } catch(e: ModelDoesNotExistException) { + // TODO: Show an error to the user, with an option to download + close() + return@launch + } - init { - recognizerView.reset() - recognizerView.start() - } + this@VoiceInputActionWindow.recognizerView.value = recognizerView - private var inputTransaction: ActionInputTransaction? = null - private fun getOrStartInputTransaction(): ActionInputTransaction { - if(inputTransaction == null) { - inputTransaction = manager.createInputTransaction(true) - } + yield() + recognizerView.reset() - return inputTransaction!! - } + yield() + recognizerView.start() + } - @Composable - override fun windowName(): String { - return stringResource(R.string.voice_input_action_title) - } + private var inputTransaction: ActionInputTransaction? = null + private fun getOrStartInputTransaction(): ActionInputTransaction { + if (inputTransaction == null) { + inputTransaction = manager.createInputTransaction(true) + } - @Composable - override fun WindowContents() { - Box(modifier = Modifier - .fillMaxSize() - .clickable( - enabled = true, - onClickLabel = null, - onClick = { recognizerView.finish() }, - role = null, - indication = null, - interactionSource = remember { MutableInteractionSource() } - )) { - Box(modifier = Modifier.align(Alignment.Center)) { - recognizerView.Content() - } - } - } + return inputTransaction!! + } - override fun close() { - recognizerView.cancel() - } + @Composable + override fun windowName(): String { + return stringResource(R.string.voice_input_action_title) + } - private var wasFinished = false - override fun cancelled() { - if(!wasFinished) { - state.soundPlayer.playCancelSound() - getOrStartInputTransaction().cancel() - } - } - - override fun recordingStarted() { - state.soundPlayer.playStartSound() - } - - override fun finished(result: String) { - wasFinished = true - - getOrStartInputTransaction().commit(result) - manager.closeActionWindow() - } - - override fun partialResult(result: String) { - getOrStartInputTransaction().updatePartial(result) - } - - override fun requestPermission(onGranted: () -> Unit, onRejected: () -> Unit): Boolean { - return false + @Composable + override fun WindowContents() { + Box(modifier = Modifier + .fillMaxSize() + .clickable(enabled = true, + onClickLabel = null, + onClick = { recognizerView.value?.finish() }, + role = null, + indication = null, + interactionSource = remember { MutableInteractionSource() })) { + Box(modifier = Modifier.align(Alignment.Center)) { + recognizerView.value?.Content() } } } + + override fun close() { + initJob.cancel() + recognizerView.value?.cancel() + } + + private var wasFinished = false + override fun cancelled() { + if (!wasFinished) { + if (shouldPlaySounds) { + state.soundPlayer.playCancelSound() + } + getOrStartInputTransaction().cancel() + } + } + + override fun recordingStarted() { + if (shouldPlaySounds) { + state.soundPlayer.playStartSound() + } + } + + override fun finished(result: String) { + wasFinished = true + + getOrStartInputTransaction().commit(result) + manager.closeActionWindow() + } + + override fun partialResult(result: String) { + getOrStartInputTransaction().updatePartial(result) + } + + override fun requestPermission(onGranted: () -> Unit, onRejected: () -> Unit): Boolean { + return false + } +} + +val VoiceInputAction = Action(icon = R.drawable.mic_fill, + name = R.string.voice_input_action_title, + simplePressImpl = null, + persistentState = { VoiceInputPersistentState(it) }, + windowImpl = { manager, persistentState -> + VoiceInputActionWindow( + manager = manager, state = persistentState as VoiceInputPersistentState + ) + } ) \ No newline at end of file diff --git a/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/AudioRecognizer.kt b/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/AudioRecognizer.kt index e27866ed9..fc0730515 100644 --- a/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/AudioRecognizer.kt +++ b/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/AudioRecognizer.kt @@ -21,9 +21,7 @@ import com.konovalov.vad.config.SampleRate import com.konovalov.vad.models.VadModel import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.Job -import kotlinx.coroutines.cancelAndJoin import kotlinx.coroutines.launch -import kotlinx.coroutines.runBlocking import kotlinx.coroutines.withContext import kotlinx.coroutines.yield import org.futo.voiceinput.shared.types.AudioRecognizerListener @@ -51,11 +49,11 @@ data class AudioRecognizerSettings( class ModelDoesNotExistException(val models: List) : Throwable() class AudioRecognizer( - val context: Context, - val lifecycleScope: LifecycleCoroutineScope, - val modelManager: ModelManager, - val listener: AudioRecognizerListener, - val settings: AudioRecognizerSettings + private val context: Context, + private val lifecycleScope: LifecycleCoroutineScope, + modelManager: ModelManager, + private val listener: AudioRecognizerListener, + private val settings: AudioRecognizerSettings ) { private var isRecording = false private var recorder: AudioRecord? = null diff --git a/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/RecognizerView.kt b/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/RecognizerView.kt index d37b9be18..fdec4a270 100644 --- a/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/RecognizerView.kt +++ b/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/RecognizerView.kt @@ -1,15 +1,10 @@ package org.futo.voiceinput.shared import android.content.Context -import android.media.AudioAttributes -import android.media.AudioAttributes.CONTENT_TYPE_SONIFICATION -import android.media.AudioAttributes.USAGE_ASSISTANCE_SONIFICATION -import android.media.SoundPool import androidx.compose.foundation.layout.Column import androidx.compose.runtime.Composable import androidx.compose.runtime.mutableStateOf import androidx.lifecycle.LifecycleCoroutineScope -import kotlinx.coroutines.launch import org.futo.voiceinput.shared.types.AudioRecognizerListener import org.futo.voiceinput.shared.types.InferenceState import org.futo.voiceinput.shared.types.Language @@ -18,16 +13,16 @@ import org.futo.voiceinput.shared.ui.InnerRecognize import org.futo.voiceinput.shared.ui.PartialDecodingResult import org.futo.voiceinput.shared.ui.RecognizeLoadingCircle import org.futo.voiceinput.shared.ui.RecognizeMicError -import org.futo.voiceinput.shared.util.ENABLE_SOUND -import org.futo.voiceinput.shared.util.VERBOSE_PROGRESS -import org.futo.voiceinput.shared.util.ValueFromSettings import org.futo.voiceinput.shared.whisper.DecodingConfiguration import org.futo.voiceinput.shared.whisper.ModelManager import org.futo.voiceinput.shared.whisper.MultiModelRunConfiguration data class RecognizerViewSettings( val shouldShowVerboseFeedback: Boolean, - val shouldShowInlinePartialResult: Boolean + val shouldShowInlinePartialResult: Boolean, + + val modelRunConfiguration: MultiModelRunConfiguration, + val decodingConfiguration: DecodingConfiguration ) private val VerboseAnnotations = hashMapOf( @@ -192,14 +187,14 @@ class RecognizerView( } } - // TODO: Dummy settings, should get them from constructor private val recognizer: AudioRecognizer = AudioRecognizer( - context, lifecycleScope, modelManager, audioRecognizerListener, AudioRecognizerSettings( - modelRunConfiguration = MultiModelRunConfiguration( - primaryModel = ENGLISH_MODELS[0], languageSpecificModels = mapOf() - ), decodingConfiguration = DecodingConfiguration( - languages = setOf(), suppressSymbols = true - ) + context = context, + lifecycleScope = lifecycleScope, + modelManager = modelManager, + listener = audioRecognizerListener, + settings = AudioRecognizerSettings( + modelRunConfiguration = settings.modelRunConfiguration, + decodingConfiguration = settings.decodingConfiguration ) ) diff --git a/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/types/Tokens.kt b/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/types/Tokens.kt index f318977ff..67fb7bcc1 100644 --- a/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/types/Tokens.kt +++ b/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/types/Tokens.kt @@ -2,10 +2,6 @@ package org.futo.voiceinput.shared.types import org.futo.voiceinput.shared.whisper.stringifyUnicode -enum class SpecialTokenKind { - StartOfTranscript, EndOfText, Translate, Transcribe, NoCaptions, NoTimestamps, -} - // Based on https://github.com/openai/whisper/blob/248b6cb124225dd263bb9bd32d060b6517e067f8/whisper/tokenizer.py#L236 private val SYMBOLS = "#()*+/:;<=>@[\\]^_`{|}~「」『』".chunked(1) + listOf( "<<", diff --git a/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/util/ArrayUtils.kt b/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/util/ArrayUtils.kt index 75bca53dd..d17f2d618 100644 --- a/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/util/ArrayUtils.kt +++ b/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/util/ArrayUtils.kt @@ -12,10 +12,6 @@ fun Array.shape(): IntArray { return arrayOf(size, this[0].size).toIntArray() } -fun DoubleArray.toFloatArray(): FloatArray { - return this.map { it.toFloat() }.toFloatArray() -} - fun FloatArray.toDoubleArray(): DoubleArray { return this.map { it.toDouble() }.toDoubleArray() } diff --git a/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/util/AudioFeatureExtraction.kt b/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/util/AudioFeatureExtraction.kt index 7212967c5..7b007341c 100644 --- a/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/util/AudioFeatureExtraction.kt +++ b/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/util/AudioFeatureExtraction.kt @@ -69,10 +69,6 @@ fun melToFreq(mels: DoubleArray, melScale: MelScale): DoubleArray { return mels.map { melToFreq(it, melScale) }.toDoubleArray() } -fun freqToMel(freqs: DoubleArray, melScale: MelScale): DoubleArray { - return freqs.map { freqToMel(it, melScale) }.toDoubleArray() -} - fun linspace(min: Double, max: Double, num: Int): DoubleArray { val array = DoubleArray(num) val spacing = (max - min) / ((num - 1).toDouble()) @@ -170,11 +166,11 @@ fun melFilterBank( fun padY(yValues: DoubleArray, nFFT: Int): DoubleArray { val ypad = DoubleArray(nFFT + yValues.size) for (i in 0 until nFFT / 2) { - ypad[nFFT / 2 - i - 1] = yValues[i + 1].toDouble() - ypad[nFFT / 2 + yValues.size + i] = yValues[yValues.size - 2 - i].toDouble() + ypad[nFFT / 2 - i - 1] = yValues[i + 1] + ypad[nFFT / 2 + yValues.size + i] = yValues[yValues.size - 2 - i] } for (j in yValues.indices) { - ypad[nFFT / 2 + j] = yValues[j].toDouble() + ypad[nFFT / 2 + j] = yValues[j] } return ypad } diff --git a/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/util/Settings.kt b/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/util/Settings.kt deleted file mode 100644 index 9edccaf36..000000000 --- a/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/util/Settings.kt +++ /dev/null @@ -1,58 +0,0 @@ -package org.futo.voiceinput.shared.util - -import android.content.Context -import androidx.datastore.core.DataStore -import androidx.datastore.preferences.core.Preferences -import androidx.datastore.preferences.core.booleanPreferencesKey -import androidx.datastore.preferences.core.intPreferencesKey -import androidx.datastore.preferences.core.stringSetPreferencesKey -import androidx.datastore.preferences.preferencesDataStore -import kotlinx.coroutines.flow.Flow -import kotlinx.coroutines.flow.first -import kotlinx.coroutines.flow.map -import kotlinx.coroutines.flow.take - -class ValueFromSettings(val key: Preferences.Key, val default: T) { - private var _value = default - - val value: T - get() { - return _value - } - - suspend fun load(context: Context, onResult: ((T) -> Unit)? = null) { - val valueFlow: Flow = - context.dataStore.data.map { preferences -> preferences[key] ?: default }.take(1) - - valueFlow.collect { - _value = it - - if (onResult != null) { - onResult(it) - } - } - } - - suspend fun get(context: Context): T { - val valueFlow: Flow = - context.dataStore.data.map { preferences -> preferences[key] ?: default }.take(1) - - return valueFlow.first() - } -} - - -val Context.dataStore: DataStore by preferencesDataStore(name = "settingsVoice") -val ENABLE_SOUND = booleanPreferencesKey("enable_sounds") -val VERBOSE_PROGRESS = booleanPreferencesKey("verbose_progress") -val ENABLE_ENGLISH = booleanPreferencesKey("enable_english") -val ENABLE_MULTILINGUAL = booleanPreferencesKey("enable_multilingual") -val DISALLOW_SYMBOLS = booleanPreferencesKey("disallow_symbols") - -val ENGLISH_MODEL_INDEX = intPreferencesKey("english_model_index") -val ENGLISH_MODEL_INDEX_DEFAULT = 0 - -val MULTILINGUAL_MODEL_INDEX = intPreferencesKey("multilingual_model_index") -val MULTILINGUAL_MODEL_INDEX_DEFAULT = 1 - -val LANGUAGE_TOGGLES = stringSetPreferencesKey("enabled_languages") \ No newline at end of file diff --git a/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/whisper/Tokenizer.kt b/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/whisper/Tokenizer.kt index 127874c9c..b30b529bd 100644 --- a/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/whisper/Tokenizer.kt +++ b/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/whisper/Tokenizer.kt @@ -6,7 +6,6 @@ import kotlinx.serialization.json.int import kotlinx.serialization.json.jsonObject import kotlinx.serialization.json.jsonPrimitive import org.futo.voiceinput.shared.types.Language -import org.futo.voiceinput.shared.types.SpecialTokenKind import org.futo.voiceinput.shared.types.getLanguageFromWhisperString import org.futo.voiceinput.shared.types.getSymbolTokens import org.futo.voiceinput.shared.util.loadTextFromFile @@ -14,8 +13,8 @@ import org.futo.voiceinput.shared.util.loadTextFromResource import java.io.File class Tokenizer(tokenJson: String) { - val idToToken: Array - val tokenToId: HashMap = hashMapOf() + private val idToToken: Array + private val tokenToId: HashMap = hashMapOf() val symbolTokens: IntArray @@ -26,8 +25,8 @@ class Tokenizer(tokenJson: String) { val noTimestampsToken: Int val transcribeToken: Int - val startOfLanguages: Int - val endOfLanguages: Int + private val startOfLanguages: Int + private val endOfLanguages: Int init { val data = Json.parseToJsonElement(tokenJson) @@ -65,19 +64,6 @@ class Tokenizer(tokenJson: String) { return tokenToId[token] } - - fun toSpecialToken(token: Int): SpecialTokenKind? { - return when (token) { - decodeStartToken -> SpecialTokenKind.StartOfTranscript - decodeEndToken -> SpecialTokenKind.EndOfText - translateToken -> SpecialTokenKind.Translate - noCaptionsToken -> SpecialTokenKind.NoCaptions - noTimestampsToken -> SpecialTokenKind.NoTimestamps - transcribeToken -> SpecialTokenKind.Transcribe - else -> null - } - } - fun toLanguage(token: Int): Language? { if ((token < startOfLanguages) || (token > endOfLanguages)) return null