mirror of
https://gitlab.futo.org/keyboard/latinime.git
synced 2024-09-28 14:54:30 +01:00
Load voice input settings instead of hardcoding
This commit is contained in:
parent
af42223a0c
commit
7f656bb622
@ -69,5 +69,22 @@ fun <T> LifecycleOwner.deferSetSetting(key: Preferences.Key<T>, value: T): Job {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
data class SettingsKey<T>(
|
||||||
|
val key: Preferences.Key<T>,
|
||||||
|
val default: T
|
||||||
|
)
|
||||||
|
|
||||||
|
suspend fun <T> Context.getSetting(key: SettingsKey<T>): T {
|
||||||
|
val valueFlow: Flow<T> =
|
||||||
|
this.dataStore.data.map { preferences -> preferences[key.key] ?: key.default }.take(1)
|
||||||
|
|
||||||
|
return valueFlow.first()
|
||||||
|
}
|
||||||
|
|
||||||
|
suspend fun <T> Context.setSetting(key: SettingsKey<T>, value: T) {
|
||||||
|
this.dataStore.edit { preferences ->
|
||||||
|
preferences[key.key] = value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
val THEME_KEY = stringPreferencesKey("activeThemeOption")
|
val THEME_KEY = stringPreferencesKey("activeThemeOption")
|
@ -0,0 +1,45 @@
|
|||||||
|
package org.futo.inputmethod.latin.uix
|
||||||
|
|
||||||
|
import androidx.datastore.preferences.core.booleanPreferencesKey
|
||||||
|
import androidx.datastore.preferences.core.intPreferencesKey
|
||||||
|
import androidx.datastore.preferences.core.stringSetPreferencesKey
|
||||||
|
|
||||||
|
val ENABLE_SOUND = SettingsKey(
|
||||||
|
key = booleanPreferencesKey("enable_sounds"),
|
||||||
|
default = true
|
||||||
|
)
|
||||||
|
|
||||||
|
val VERBOSE_PROGRESS = SettingsKey(
|
||||||
|
key = booleanPreferencesKey("verbose_progress"),
|
||||||
|
default = false
|
||||||
|
)
|
||||||
|
|
||||||
|
val ENABLE_ENGLISH = SettingsKey(
|
||||||
|
key = booleanPreferencesKey("enable_english"),
|
||||||
|
default = true
|
||||||
|
)
|
||||||
|
|
||||||
|
val ENABLE_MULTILINGUAL = SettingsKey(
|
||||||
|
key = booleanPreferencesKey("enable_multilingual"),
|
||||||
|
default = false
|
||||||
|
)
|
||||||
|
|
||||||
|
val DISALLOW_SYMBOLS = SettingsKey(
|
||||||
|
key = booleanPreferencesKey("disallow_symbols"),
|
||||||
|
default = true
|
||||||
|
)
|
||||||
|
|
||||||
|
val ENGLISH_MODEL_INDEX = SettingsKey(
|
||||||
|
key = intPreferencesKey("english_model_index"),
|
||||||
|
default = 0
|
||||||
|
)
|
||||||
|
|
||||||
|
val MULTILINGUAL_MODEL_INDEX = SettingsKey(
|
||||||
|
key = intPreferencesKey("multilingual_model_index"),
|
||||||
|
default = 1
|
||||||
|
)
|
||||||
|
|
||||||
|
val LANGUAGE_TOGGLES = SettingsKey(
|
||||||
|
key = stringSetPreferencesKey("enabled_languages"),
|
||||||
|
default = setOf()
|
||||||
|
)
|
@ -3,8 +3,6 @@ package org.futo.inputmethod.latin.uix.actions
|
|||||||
import androidx.compose.foundation.clickable
|
import androidx.compose.foundation.clickable
|
||||||
import androidx.compose.foundation.interaction.MutableInteractionSource
|
import androidx.compose.foundation.interaction.MutableInteractionSource
|
||||||
import androidx.compose.foundation.layout.Box
|
import androidx.compose.foundation.layout.Box
|
||||||
import androidx.compose.foundation.layout.Column
|
|
||||||
import androidx.compose.foundation.layout.ColumnScope
|
|
||||||
import androidx.compose.foundation.layout.fillMaxSize
|
import androidx.compose.foundation.layout.fillMaxSize
|
||||||
import androidx.compose.runtime.Composable
|
import androidx.compose.runtime.Composable
|
||||||
import androidx.compose.runtime.MutableState
|
import androidx.compose.runtime.MutableState
|
||||||
@ -13,17 +11,40 @@ import androidx.compose.runtime.remember
|
|||||||
import androidx.compose.ui.Alignment
|
import androidx.compose.ui.Alignment
|
||||||
import androidx.compose.ui.Modifier
|
import androidx.compose.ui.Modifier
|
||||||
import androidx.compose.ui.res.stringResource
|
import androidx.compose.ui.res.stringResource
|
||||||
|
import kotlinx.coroutines.Dispatchers
|
||||||
|
import kotlinx.coroutines.async
|
||||||
|
import kotlinx.coroutines.coroutineScope
|
||||||
|
import kotlinx.coroutines.launch
|
||||||
|
import kotlinx.coroutines.withContext
|
||||||
|
import kotlinx.coroutines.yield
|
||||||
import org.futo.inputmethod.latin.R
|
import org.futo.inputmethod.latin.R
|
||||||
import org.futo.inputmethod.latin.uix.Action
|
import org.futo.inputmethod.latin.uix.Action
|
||||||
import org.futo.inputmethod.latin.uix.ActionInputTransaction
|
import org.futo.inputmethod.latin.uix.ActionInputTransaction
|
||||||
import org.futo.inputmethod.latin.uix.ActionWindow
|
import org.futo.inputmethod.latin.uix.ActionWindow
|
||||||
|
import org.futo.inputmethod.latin.uix.DISALLOW_SYMBOLS
|
||||||
|
import org.futo.inputmethod.latin.uix.ENABLE_ENGLISH
|
||||||
|
import org.futo.inputmethod.latin.uix.ENABLE_MULTILINGUAL
|
||||||
|
import org.futo.inputmethod.latin.uix.ENABLE_SOUND
|
||||||
|
import org.futo.inputmethod.latin.uix.ENGLISH_MODEL_INDEX
|
||||||
import org.futo.inputmethod.latin.uix.KeyboardManagerForAction
|
import org.futo.inputmethod.latin.uix.KeyboardManagerForAction
|
||||||
|
import org.futo.inputmethod.latin.uix.LANGUAGE_TOGGLES
|
||||||
|
import org.futo.inputmethod.latin.uix.MULTILINGUAL_MODEL_INDEX
|
||||||
import org.futo.inputmethod.latin.uix.PersistentActionState
|
import org.futo.inputmethod.latin.uix.PersistentActionState
|
||||||
|
import org.futo.inputmethod.latin.uix.VERBOSE_PROGRESS
|
||||||
|
import org.futo.inputmethod.latin.uix.getSetting
|
||||||
|
import org.futo.voiceinput.shared.ENGLISH_MODELS
|
||||||
|
import org.futo.voiceinput.shared.MULTILINGUAL_MODELS
|
||||||
|
import org.futo.voiceinput.shared.ModelDoesNotExistException
|
||||||
import org.futo.voiceinput.shared.RecognizerView
|
import org.futo.voiceinput.shared.RecognizerView
|
||||||
import org.futo.voiceinput.shared.RecognizerViewListener
|
import org.futo.voiceinput.shared.RecognizerViewListener
|
||||||
import org.futo.voiceinput.shared.RecognizerViewSettings
|
import org.futo.voiceinput.shared.RecognizerViewSettings
|
||||||
import org.futo.voiceinput.shared.SoundPlayer
|
import org.futo.voiceinput.shared.SoundPlayer
|
||||||
|
import org.futo.voiceinput.shared.types.Language
|
||||||
|
import org.futo.voiceinput.shared.types.ModelLoader
|
||||||
|
import org.futo.voiceinput.shared.types.getLanguageFromWhisperString
|
||||||
|
import org.futo.voiceinput.shared.whisper.DecodingConfiguration
|
||||||
import org.futo.voiceinput.shared.whisper.ModelManager
|
import org.futo.voiceinput.shared.whisper.ModelManager
|
||||||
|
import org.futo.voiceinput.shared.whisper.MultiModelRunConfiguration
|
||||||
|
|
||||||
val SystemVoiceInputAction = Action(
|
val SystemVoiceInputAction = Action(
|
||||||
icon = R.drawable.mic_fill,
|
icon = R.drawable.mic_fill,
|
||||||
@ -44,93 +65,157 @@ class VoiceInputPersistentState(val manager: KeyboardManagerForAction) : Persist
|
|||||||
modelManager.cleanUp()
|
modelManager.cleanUp()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
val VoiceInputAction = Action(
|
|
||||||
icon = R.drawable.mic_fill,
|
|
||||||
name = R.string.voice_input_action_title,
|
|
||||||
simplePressImpl = null,
|
|
||||||
persistentState = { VoiceInputPersistentState(it) },
|
|
||||||
|
|
||||||
windowImpl = { manager, persistentState ->
|
private class VoiceInputActionWindow(
|
||||||
val state = persistentState as VoiceInputPersistentState
|
val manager: KeyboardManagerForAction, val state: VoiceInputPersistentState
|
||||||
object : ActionWindow, RecognizerViewListener {
|
) : ActionWindow, RecognizerViewListener {
|
||||||
private val recognizerView = RecognizerView(
|
val context = manager.getContext()
|
||||||
|
|
||||||
|
private var shouldPlaySounds: Boolean = false
|
||||||
|
private suspend fun loadSettings(): RecognizerViewSettings = coroutineScope {
|
||||||
|
val enableSound = async { context.getSetting(ENABLE_SOUND) }
|
||||||
|
val verboseFeedback = async { context.getSetting(VERBOSE_PROGRESS) }
|
||||||
|
val disallowSymbols = async { context.getSetting(DISALLOW_SYMBOLS) }
|
||||||
|
val enableEnglish = async { context.getSetting(ENABLE_ENGLISH) }
|
||||||
|
val englishModelIdx = async { context.getSetting(ENGLISH_MODEL_INDEX) }
|
||||||
|
val enableMultilingual = async { context.getSetting(ENABLE_MULTILINGUAL) }
|
||||||
|
val multilingualModelIdx = async { context.getSetting(MULTILINGUAL_MODEL_INDEX) }
|
||||||
|
val allowedLanguages = async {
|
||||||
|
context.getSetting(LANGUAGE_TOGGLES).mapNotNull { getLanguageFromWhisperString(it) }
|
||||||
|
.toSet()
|
||||||
|
}
|
||||||
|
|
||||||
|
val primaryModel = if (enableMultilingual.await()) {
|
||||||
|
MULTILINGUAL_MODELS[multilingualModelIdx.await()]
|
||||||
|
} else {
|
||||||
|
ENGLISH_MODELS[englishModelIdx.await()]
|
||||||
|
}
|
||||||
|
|
||||||
|
val languageSpecificModels = mutableMapOf<Language, ModelLoader>()
|
||||||
|
if (enableEnglish.await()) {
|
||||||
|
languageSpecificModels[Language.English] = ENGLISH_MODELS[englishModelIdx.await()]
|
||||||
|
}
|
||||||
|
|
||||||
|
shouldPlaySounds = enableSound.await()
|
||||||
|
|
||||||
|
return@coroutineScope RecognizerViewSettings(
|
||||||
|
shouldShowInlinePartialResult = false,
|
||||||
|
shouldShowVerboseFeedback = verboseFeedback.await(),
|
||||||
|
modelRunConfiguration = MultiModelRunConfiguration(
|
||||||
|
primaryModel = primaryModel, languageSpecificModels = languageSpecificModels
|
||||||
|
),
|
||||||
|
decodingConfiguration = DecodingConfiguration(
|
||||||
|
languages = allowedLanguages.await(), suppressSymbols = disallowSymbols.await()
|
||||||
|
)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
private var recognizerView: MutableState<RecognizerView?> = mutableStateOf(null)
|
||||||
|
|
||||||
|
private val initJob = manager.getLifecycleScope().launch {
|
||||||
|
yield()
|
||||||
|
val settings = withContext(Dispatchers.IO) {
|
||||||
|
loadSettings()
|
||||||
|
}
|
||||||
|
|
||||||
|
yield()
|
||||||
|
val recognizerView = try {
|
||||||
|
RecognizerView(
|
||||||
context = manager.getContext(),
|
context = manager.getContext(),
|
||||||
listener = this,
|
listener = this@VoiceInputActionWindow,
|
||||||
settings = RecognizerViewSettings(
|
settings = settings,
|
||||||
shouldShowInlinePartialResult = false,
|
|
||||||
shouldShowVerboseFeedback = true
|
|
||||||
),
|
|
||||||
lifecycleScope = manager.getLifecycleScope(),
|
lifecycleScope = manager.getLifecycleScope(),
|
||||||
modelManager = state.modelManager
|
modelManager = state.modelManager
|
||||||
)
|
)
|
||||||
|
} catch(e: ModelDoesNotExistException) {
|
||||||
|
// TODO: Show an error to the user, with an option to download
|
||||||
|
close()
|
||||||
|
return@launch
|
||||||
|
}
|
||||||
|
|
||||||
init {
|
this@VoiceInputActionWindow.recognizerView.value = recognizerView
|
||||||
recognizerView.reset()
|
|
||||||
recognizerView.start()
|
|
||||||
}
|
|
||||||
|
|
||||||
private var inputTransaction: ActionInputTransaction? = null
|
yield()
|
||||||
private fun getOrStartInputTransaction(): ActionInputTransaction {
|
recognizerView.reset()
|
||||||
if(inputTransaction == null) {
|
|
||||||
inputTransaction = manager.createInputTransaction(true)
|
|
||||||
}
|
|
||||||
|
|
||||||
return inputTransaction!!
|
yield()
|
||||||
}
|
recognizerView.start()
|
||||||
|
}
|
||||||
|
|
||||||
@Composable
|
private var inputTransaction: ActionInputTransaction? = null
|
||||||
override fun windowName(): String {
|
private fun getOrStartInputTransaction(): ActionInputTransaction {
|
||||||
return stringResource(R.string.voice_input_action_title)
|
if (inputTransaction == null) {
|
||||||
}
|
inputTransaction = manager.createInputTransaction(true)
|
||||||
|
}
|
||||||
|
|
||||||
@Composable
|
return inputTransaction!!
|
||||||
override fun WindowContents() {
|
}
|
||||||
Box(modifier = Modifier
|
|
||||||
.fillMaxSize()
|
|
||||||
.clickable(
|
|
||||||
enabled = true,
|
|
||||||
onClickLabel = null,
|
|
||||||
onClick = { recognizerView.finish() },
|
|
||||||
role = null,
|
|
||||||
indication = null,
|
|
||||||
interactionSource = remember { MutableInteractionSource() }
|
|
||||||
)) {
|
|
||||||
Box(modifier = Modifier.align(Alignment.Center)) {
|
|
||||||
recognizerView.Content()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
override fun close() {
|
@Composable
|
||||||
recognizerView.cancel()
|
override fun windowName(): String {
|
||||||
}
|
return stringResource(R.string.voice_input_action_title)
|
||||||
|
}
|
||||||
|
|
||||||
private var wasFinished = false
|
@Composable
|
||||||
override fun cancelled() {
|
override fun WindowContents() {
|
||||||
if(!wasFinished) {
|
Box(modifier = Modifier
|
||||||
state.soundPlayer.playCancelSound()
|
.fillMaxSize()
|
||||||
getOrStartInputTransaction().cancel()
|
.clickable(enabled = true,
|
||||||
}
|
onClickLabel = null,
|
||||||
}
|
onClick = { recognizerView.value?.finish() },
|
||||||
|
role = null,
|
||||||
override fun recordingStarted() {
|
indication = null,
|
||||||
state.soundPlayer.playStartSound()
|
interactionSource = remember { MutableInteractionSource() })) {
|
||||||
}
|
Box(modifier = Modifier.align(Alignment.Center)) {
|
||||||
|
recognizerView.value?.Content()
|
||||||
override fun finished(result: String) {
|
|
||||||
wasFinished = true
|
|
||||||
|
|
||||||
getOrStartInputTransaction().commit(result)
|
|
||||||
manager.closeActionWindow()
|
|
||||||
}
|
|
||||||
|
|
||||||
override fun partialResult(result: String) {
|
|
||||||
getOrStartInputTransaction().updatePartial(result)
|
|
||||||
}
|
|
||||||
|
|
||||||
override fun requestPermission(onGranted: () -> Unit, onRejected: () -> Unit): Boolean {
|
|
||||||
return false
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
override fun close() {
|
||||||
|
initJob.cancel()
|
||||||
|
recognizerView.value?.cancel()
|
||||||
|
}
|
||||||
|
|
||||||
|
private var wasFinished = false
|
||||||
|
override fun cancelled() {
|
||||||
|
if (!wasFinished) {
|
||||||
|
if (shouldPlaySounds) {
|
||||||
|
state.soundPlayer.playCancelSound()
|
||||||
|
}
|
||||||
|
getOrStartInputTransaction().cancel()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun recordingStarted() {
|
||||||
|
if (shouldPlaySounds) {
|
||||||
|
state.soundPlayer.playStartSound()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun finished(result: String) {
|
||||||
|
wasFinished = true
|
||||||
|
|
||||||
|
getOrStartInputTransaction().commit(result)
|
||||||
|
manager.closeActionWindow()
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun partialResult(result: String) {
|
||||||
|
getOrStartInputTransaction().updatePartial(result)
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun requestPermission(onGranted: () -> Unit, onRejected: () -> Unit): Boolean {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
val VoiceInputAction = Action(icon = R.drawable.mic_fill,
|
||||||
|
name = R.string.voice_input_action_title,
|
||||||
|
simplePressImpl = null,
|
||||||
|
persistentState = { VoiceInputPersistentState(it) },
|
||||||
|
windowImpl = { manager, persistentState ->
|
||||||
|
VoiceInputActionWindow(
|
||||||
|
manager = manager, state = persistentState as VoiceInputPersistentState
|
||||||
|
)
|
||||||
|
}
|
||||||
)
|
)
|
@ -21,9 +21,7 @@ import com.konovalov.vad.config.SampleRate
|
|||||||
import com.konovalov.vad.models.VadModel
|
import com.konovalov.vad.models.VadModel
|
||||||
import kotlinx.coroutines.Dispatchers
|
import kotlinx.coroutines.Dispatchers
|
||||||
import kotlinx.coroutines.Job
|
import kotlinx.coroutines.Job
|
||||||
import kotlinx.coroutines.cancelAndJoin
|
|
||||||
import kotlinx.coroutines.launch
|
import kotlinx.coroutines.launch
|
||||||
import kotlinx.coroutines.runBlocking
|
|
||||||
import kotlinx.coroutines.withContext
|
import kotlinx.coroutines.withContext
|
||||||
import kotlinx.coroutines.yield
|
import kotlinx.coroutines.yield
|
||||||
import org.futo.voiceinput.shared.types.AudioRecognizerListener
|
import org.futo.voiceinput.shared.types.AudioRecognizerListener
|
||||||
@ -51,11 +49,11 @@ data class AudioRecognizerSettings(
|
|||||||
class ModelDoesNotExistException(val models: List<ModelLoader>) : Throwable()
|
class ModelDoesNotExistException(val models: List<ModelLoader>) : Throwable()
|
||||||
|
|
||||||
class AudioRecognizer(
|
class AudioRecognizer(
|
||||||
val context: Context,
|
private val context: Context,
|
||||||
val lifecycleScope: LifecycleCoroutineScope,
|
private val lifecycleScope: LifecycleCoroutineScope,
|
||||||
val modelManager: ModelManager,
|
modelManager: ModelManager,
|
||||||
val listener: AudioRecognizerListener,
|
private val listener: AudioRecognizerListener,
|
||||||
val settings: AudioRecognizerSettings
|
private val settings: AudioRecognizerSettings
|
||||||
) {
|
) {
|
||||||
private var isRecording = false
|
private var isRecording = false
|
||||||
private var recorder: AudioRecord? = null
|
private var recorder: AudioRecord? = null
|
||||||
|
@ -1,15 +1,10 @@
|
|||||||
package org.futo.voiceinput.shared
|
package org.futo.voiceinput.shared
|
||||||
|
|
||||||
import android.content.Context
|
import android.content.Context
|
||||||
import android.media.AudioAttributes
|
|
||||||
import android.media.AudioAttributes.CONTENT_TYPE_SONIFICATION
|
|
||||||
import android.media.AudioAttributes.USAGE_ASSISTANCE_SONIFICATION
|
|
||||||
import android.media.SoundPool
|
|
||||||
import androidx.compose.foundation.layout.Column
|
import androidx.compose.foundation.layout.Column
|
||||||
import androidx.compose.runtime.Composable
|
import androidx.compose.runtime.Composable
|
||||||
import androidx.compose.runtime.mutableStateOf
|
import androidx.compose.runtime.mutableStateOf
|
||||||
import androidx.lifecycle.LifecycleCoroutineScope
|
import androidx.lifecycle.LifecycleCoroutineScope
|
||||||
import kotlinx.coroutines.launch
|
|
||||||
import org.futo.voiceinput.shared.types.AudioRecognizerListener
|
import org.futo.voiceinput.shared.types.AudioRecognizerListener
|
||||||
import org.futo.voiceinput.shared.types.InferenceState
|
import org.futo.voiceinput.shared.types.InferenceState
|
||||||
import org.futo.voiceinput.shared.types.Language
|
import org.futo.voiceinput.shared.types.Language
|
||||||
@ -18,16 +13,16 @@ import org.futo.voiceinput.shared.ui.InnerRecognize
|
|||||||
import org.futo.voiceinput.shared.ui.PartialDecodingResult
|
import org.futo.voiceinput.shared.ui.PartialDecodingResult
|
||||||
import org.futo.voiceinput.shared.ui.RecognizeLoadingCircle
|
import org.futo.voiceinput.shared.ui.RecognizeLoadingCircle
|
||||||
import org.futo.voiceinput.shared.ui.RecognizeMicError
|
import org.futo.voiceinput.shared.ui.RecognizeMicError
|
||||||
import org.futo.voiceinput.shared.util.ENABLE_SOUND
|
|
||||||
import org.futo.voiceinput.shared.util.VERBOSE_PROGRESS
|
|
||||||
import org.futo.voiceinput.shared.util.ValueFromSettings
|
|
||||||
import org.futo.voiceinput.shared.whisper.DecodingConfiguration
|
import org.futo.voiceinput.shared.whisper.DecodingConfiguration
|
||||||
import org.futo.voiceinput.shared.whisper.ModelManager
|
import org.futo.voiceinput.shared.whisper.ModelManager
|
||||||
import org.futo.voiceinput.shared.whisper.MultiModelRunConfiguration
|
import org.futo.voiceinput.shared.whisper.MultiModelRunConfiguration
|
||||||
|
|
||||||
data class RecognizerViewSettings(
|
data class RecognizerViewSettings(
|
||||||
val shouldShowVerboseFeedback: Boolean,
|
val shouldShowVerboseFeedback: Boolean,
|
||||||
val shouldShowInlinePartialResult: Boolean
|
val shouldShowInlinePartialResult: Boolean,
|
||||||
|
|
||||||
|
val modelRunConfiguration: MultiModelRunConfiguration,
|
||||||
|
val decodingConfiguration: DecodingConfiguration
|
||||||
)
|
)
|
||||||
|
|
||||||
private val VerboseAnnotations = hashMapOf(
|
private val VerboseAnnotations = hashMapOf(
|
||||||
@ -192,14 +187,14 @@ class RecognizerView(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Dummy settings, should get them from constructor
|
|
||||||
private val recognizer: AudioRecognizer = AudioRecognizer(
|
private val recognizer: AudioRecognizer = AudioRecognizer(
|
||||||
context, lifecycleScope, modelManager, audioRecognizerListener, AudioRecognizerSettings(
|
context = context,
|
||||||
modelRunConfiguration = MultiModelRunConfiguration(
|
lifecycleScope = lifecycleScope,
|
||||||
primaryModel = ENGLISH_MODELS[0], languageSpecificModels = mapOf()
|
modelManager = modelManager,
|
||||||
), decodingConfiguration = DecodingConfiguration(
|
listener = audioRecognizerListener,
|
||||||
languages = setOf(), suppressSymbols = true
|
settings = AudioRecognizerSettings(
|
||||||
)
|
modelRunConfiguration = settings.modelRunConfiguration,
|
||||||
|
decodingConfiguration = settings.decodingConfiguration
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -2,10 +2,6 @@ package org.futo.voiceinput.shared.types
|
|||||||
|
|
||||||
import org.futo.voiceinput.shared.whisper.stringifyUnicode
|
import org.futo.voiceinput.shared.whisper.stringifyUnicode
|
||||||
|
|
||||||
enum class SpecialTokenKind {
|
|
||||||
StartOfTranscript, EndOfText, Translate, Transcribe, NoCaptions, NoTimestamps,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Based on https://github.com/openai/whisper/blob/248b6cb124225dd263bb9bd32d060b6517e067f8/whisper/tokenizer.py#L236
|
// Based on https://github.com/openai/whisper/blob/248b6cb124225dd263bb9bd32d060b6517e067f8/whisper/tokenizer.py#L236
|
||||||
private val SYMBOLS = "#()*+/:;<=>@[\\]^_`{|}~「」『』".chunked(1) + listOf(
|
private val SYMBOLS = "#()*+/:;<=>@[\\]^_`{|}~「」『』".chunked(1) + listOf(
|
||||||
"<<",
|
"<<",
|
||||||
|
@ -12,10 +12,6 @@ fun Array<DoubleArray>.shape(): IntArray {
|
|||||||
return arrayOf(size, this[0].size).toIntArray()
|
return arrayOf(size, this[0].size).toIntArray()
|
||||||
}
|
}
|
||||||
|
|
||||||
fun DoubleArray.toFloatArray(): FloatArray {
|
|
||||||
return this.map { it.toFloat() }.toFloatArray()
|
|
||||||
}
|
|
||||||
|
|
||||||
fun FloatArray.toDoubleArray(): DoubleArray {
|
fun FloatArray.toDoubleArray(): DoubleArray {
|
||||||
return this.map { it.toDouble() }.toDoubleArray()
|
return this.map { it.toDouble() }.toDoubleArray()
|
||||||
}
|
}
|
||||||
|
@ -69,10 +69,6 @@ fun melToFreq(mels: DoubleArray, melScale: MelScale): DoubleArray {
|
|||||||
return mels.map { melToFreq(it, melScale) }.toDoubleArray()
|
return mels.map { melToFreq(it, melScale) }.toDoubleArray()
|
||||||
}
|
}
|
||||||
|
|
||||||
fun freqToMel(freqs: DoubleArray, melScale: MelScale): DoubleArray {
|
|
||||||
return freqs.map { freqToMel(it, melScale) }.toDoubleArray()
|
|
||||||
}
|
|
||||||
|
|
||||||
fun linspace(min: Double, max: Double, num: Int): DoubleArray {
|
fun linspace(min: Double, max: Double, num: Int): DoubleArray {
|
||||||
val array = DoubleArray(num)
|
val array = DoubleArray(num)
|
||||||
val spacing = (max - min) / ((num - 1).toDouble())
|
val spacing = (max - min) / ((num - 1).toDouble())
|
||||||
@ -170,11 +166,11 @@ fun melFilterBank(
|
|||||||
fun padY(yValues: DoubleArray, nFFT: Int): DoubleArray {
|
fun padY(yValues: DoubleArray, nFFT: Int): DoubleArray {
|
||||||
val ypad = DoubleArray(nFFT + yValues.size)
|
val ypad = DoubleArray(nFFT + yValues.size)
|
||||||
for (i in 0 until nFFT / 2) {
|
for (i in 0 until nFFT / 2) {
|
||||||
ypad[nFFT / 2 - i - 1] = yValues[i + 1].toDouble()
|
ypad[nFFT / 2 - i - 1] = yValues[i + 1]
|
||||||
ypad[nFFT / 2 + yValues.size + i] = yValues[yValues.size - 2 - i].toDouble()
|
ypad[nFFT / 2 + yValues.size + i] = yValues[yValues.size - 2 - i]
|
||||||
}
|
}
|
||||||
for (j in yValues.indices) {
|
for (j in yValues.indices) {
|
||||||
ypad[nFFT / 2 + j] = yValues[j].toDouble()
|
ypad[nFFT / 2 + j] = yValues[j]
|
||||||
}
|
}
|
||||||
return ypad
|
return ypad
|
||||||
}
|
}
|
||||||
|
@ -1,58 +0,0 @@
|
|||||||
package org.futo.voiceinput.shared.util
|
|
||||||
|
|
||||||
import android.content.Context
|
|
||||||
import androidx.datastore.core.DataStore
|
|
||||||
import androidx.datastore.preferences.core.Preferences
|
|
||||||
import androidx.datastore.preferences.core.booleanPreferencesKey
|
|
||||||
import androidx.datastore.preferences.core.intPreferencesKey
|
|
||||||
import androidx.datastore.preferences.core.stringSetPreferencesKey
|
|
||||||
import androidx.datastore.preferences.preferencesDataStore
|
|
||||||
import kotlinx.coroutines.flow.Flow
|
|
||||||
import kotlinx.coroutines.flow.first
|
|
||||||
import kotlinx.coroutines.flow.map
|
|
||||||
import kotlinx.coroutines.flow.take
|
|
||||||
|
|
||||||
class ValueFromSettings<T>(val key: Preferences.Key<T>, val default: T) {
|
|
||||||
private var _value = default
|
|
||||||
|
|
||||||
val value: T
|
|
||||||
get() {
|
|
||||||
return _value
|
|
||||||
}
|
|
||||||
|
|
||||||
suspend fun load(context: Context, onResult: ((T) -> Unit)? = null) {
|
|
||||||
val valueFlow: Flow<T> =
|
|
||||||
context.dataStore.data.map { preferences -> preferences[key] ?: default }.take(1)
|
|
||||||
|
|
||||||
valueFlow.collect {
|
|
||||||
_value = it
|
|
||||||
|
|
||||||
if (onResult != null) {
|
|
||||||
onResult(it)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
suspend fun get(context: Context): T {
|
|
||||||
val valueFlow: Flow<T> =
|
|
||||||
context.dataStore.data.map { preferences -> preferences[key] ?: default }.take(1)
|
|
||||||
|
|
||||||
return valueFlow.first()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
val Context.dataStore: DataStore<Preferences> by preferencesDataStore(name = "settingsVoice")
|
|
||||||
val ENABLE_SOUND = booleanPreferencesKey("enable_sounds")
|
|
||||||
val VERBOSE_PROGRESS = booleanPreferencesKey("verbose_progress")
|
|
||||||
val ENABLE_ENGLISH = booleanPreferencesKey("enable_english")
|
|
||||||
val ENABLE_MULTILINGUAL = booleanPreferencesKey("enable_multilingual")
|
|
||||||
val DISALLOW_SYMBOLS = booleanPreferencesKey("disallow_symbols")
|
|
||||||
|
|
||||||
val ENGLISH_MODEL_INDEX = intPreferencesKey("english_model_index")
|
|
||||||
val ENGLISH_MODEL_INDEX_DEFAULT = 0
|
|
||||||
|
|
||||||
val MULTILINGUAL_MODEL_INDEX = intPreferencesKey("multilingual_model_index")
|
|
||||||
val MULTILINGUAL_MODEL_INDEX_DEFAULT = 1
|
|
||||||
|
|
||||||
val LANGUAGE_TOGGLES = stringSetPreferencesKey("enabled_languages")
|
|
@ -6,7 +6,6 @@ import kotlinx.serialization.json.int
|
|||||||
import kotlinx.serialization.json.jsonObject
|
import kotlinx.serialization.json.jsonObject
|
||||||
import kotlinx.serialization.json.jsonPrimitive
|
import kotlinx.serialization.json.jsonPrimitive
|
||||||
import org.futo.voiceinput.shared.types.Language
|
import org.futo.voiceinput.shared.types.Language
|
||||||
import org.futo.voiceinput.shared.types.SpecialTokenKind
|
|
||||||
import org.futo.voiceinput.shared.types.getLanguageFromWhisperString
|
import org.futo.voiceinput.shared.types.getLanguageFromWhisperString
|
||||||
import org.futo.voiceinput.shared.types.getSymbolTokens
|
import org.futo.voiceinput.shared.types.getSymbolTokens
|
||||||
import org.futo.voiceinput.shared.util.loadTextFromFile
|
import org.futo.voiceinput.shared.util.loadTextFromFile
|
||||||
@ -14,8 +13,8 @@ import org.futo.voiceinput.shared.util.loadTextFromResource
|
|||||||
import java.io.File
|
import java.io.File
|
||||||
|
|
||||||
class Tokenizer(tokenJson: String) {
|
class Tokenizer(tokenJson: String) {
|
||||||
val idToToken: Array<String?>
|
private val idToToken: Array<String?>
|
||||||
val tokenToId: HashMap<String, Int> = hashMapOf()
|
private val tokenToId: HashMap<String, Int> = hashMapOf()
|
||||||
|
|
||||||
val symbolTokens: IntArray
|
val symbolTokens: IntArray
|
||||||
|
|
||||||
@ -26,8 +25,8 @@ class Tokenizer(tokenJson: String) {
|
|||||||
val noTimestampsToken: Int
|
val noTimestampsToken: Int
|
||||||
val transcribeToken: Int
|
val transcribeToken: Int
|
||||||
|
|
||||||
val startOfLanguages: Int
|
private val startOfLanguages: Int
|
||||||
val endOfLanguages: Int
|
private val endOfLanguages: Int
|
||||||
|
|
||||||
init {
|
init {
|
||||||
val data = Json.parseToJsonElement(tokenJson)
|
val data = Json.parseToJsonElement(tokenJson)
|
||||||
@ -65,19 +64,6 @@ class Tokenizer(tokenJson: String) {
|
|||||||
return tokenToId[token]
|
return tokenToId[token]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
fun toSpecialToken(token: Int): SpecialTokenKind? {
|
|
||||||
return when (token) {
|
|
||||||
decodeStartToken -> SpecialTokenKind.StartOfTranscript
|
|
||||||
decodeEndToken -> SpecialTokenKind.EndOfText
|
|
||||||
translateToken -> SpecialTokenKind.Translate
|
|
||||||
noCaptionsToken -> SpecialTokenKind.NoCaptions
|
|
||||||
noTimestampsToken -> SpecialTokenKind.NoTimestamps
|
|
||||||
transcribeToken -> SpecialTokenKind.Transcribe
|
|
||||||
else -> null
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fun toLanguage(token: Int): Language? {
|
fun toLanguage(token: Int): Language? {
|
||||||
if ((token < startOfLanguages) || (token > endOfLanguages)) return null
|
if ((token < startOfLanguages) || (token > endOfLanguages)) return null
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user