Create SoundPlayer for persistent state

2024-09-28 14:54:30 +01:00 · 2023-08-31 19:15:50 +03:00 · 2023-08-31 19:15:50 +03:00 · 3acb8b5e44
commit 3acb8b5e44
parent 731fbf1254
7 changed files with 226 additions and 165 deletions
--- a/java/src/org/futo/inputmethod/latin/uix/Action.kt
+++ b/java/src/org/futo/inputmethod/latin/uix/Action.kt
@ -36,6 +36,12 @@ interface ActionWindow {
 }
 interface PersistentActionState {
    /**
     * When called, the device may be on low memory and is requesting the action to clean up its
     * state. You can close any resources that may not be necessary anymore. This will never be
     * called when the action window is currently open. The PersistentActionState will stick around
     * after this.
     */
    suspend fun cleanUp()
 }
--- a/java/src/org/futo/inputmethod/latin/uix/actions/VoiceInputAction.kt
+++ b/java/src/org/futo/inputmethod/latin/uix/actions/VoiceInputAction.kt
@ -15,6 +15,9 @@ import org.futo.inputmethod.latin.uix.ActionWindow
 import org.futo.inputmethod.latin.uix.KeyboardManagerForAction
 import org.futo.inputmethod.latin.uix.PersistentActionState
 import org.futo.voiceinput.shared.RecognizerView
 import org.futo.voiceinput.shared.RecognizerViewListener
 import org.futo.voiceinput.shared.RecognizerViewSettings
 import org.futo.voiceinput.shared.SoundPlayer
 import org.futo.voiceinput.shared.whisper.ModelManager
 val SystemVoiceInputAction = Action(
@ -29,7 +32,8 @@ val SystemVoiceInputAction = Action(
 class VoiceInputPersistentState(val manager: KeyboardManagerForAction) : PersistentActionState {
-    var modelManager: ModelManager = ModelManager(manager.getContext())
+    val modelManager = ModelManager(manager.getContext())
    val soundPlayer = SoundPlayer(manager.getContext())
    override suspend fun cleanUp() {
        modelManager.cleanUp()
@ -43,29 +47,21 @@ val VoiceInputAction = Action(
    windowImpl = { manager, persistentState ->
        val state = persistentState as VoiceInputPersistentState
-        object : ActionWindow, RecognizerView(manager.getContext(), manager.getLifecycleScope(), state.modelManager) {
+        object : ActionWindow, RecognizerViewListener {
            private val recognizerView = RecognizerView(
                context = manager.getContext(),
                listener = this,
                settings = RecognizerViewSettings(
                    shouldShowInlinePartialResult = false,
                    shouldShowVerboseFeedback = true
                ),
                lifecycleScope = manager.getLifecycleScope(),
                modelManager = state.modelManager
            )
            init {
-                this.reset()
+                recognizerView.reset()
-                this.init()
+                recognizerView.start()
            }
            override fun onCancel() {
                this.reset()
                manager.closeActionWindow()
            }
            override fun sendResult(result: String) {
                manager.typeText(result)
                onCancel()
            }
            override fun sendPartialResult(result: String): Boolean {
                manager.typePartialText(result)
                return true
            }
            override fun requestPermission() {
                permissionResultRejected()
            }
            @Composable
@ -77,14 +73,39 @@ val VoiceInputAction = Action(
            override fun WindowContents() {
                Box(modifier = Modifier.fillMaxSize()) {
                    Box(modifier = Modifier.align(Alignment.Center)) {
-                        Content()
+                        recognizerView.Content()
                    }
                }
            }
            override fun close() {
-                this.reset()
+                recognizerView.cancel()
-                //soundPool.release()
+            }
            private var wasFinished = false
            override fun cancelled() {
                if(!wasFinished) {
                    state.soundPlayer.playCancelSound()
                }
            }
            override fun recordingStarted() {
                state.soundPlayer.playStartSound()
            }
            override fun finished(result: String) {
                wasFinished = true
                manager.typeText(result)
                manager.closeActionWindow()
            }
            override fun partialResult(result: String) {
                manager.typePartialText(result)
            }
            override fun requestPermission(onGranted: () -> Unit, onRejected: () -> Unit): Boolean {
                return false
            }
        }
    }
--- a/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/AudioRecognizer.kt
+++ b/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/AudioRecognizer.kt
@ -26,8 +26,10 @@ import kotlinx.coroutines.launch
 import kotlinx.coroutines.runBlocking
 import kotlinx.coroutines.withContext
 import kotlinx.coroutines.yield
 import org.futo.voiceinput.shared.types.AudioRecognizerListener
 import org.futo.voiceinput.shared.types.InferenceState
 import org.futo.voiceinput.shared.types.Language
 import org.futo.voiceinput.shared.types.MagnitudeState
 import org.futo.voiceinput.shared.types.ModelInferenceCallback
 import org.futo.voiceinput.shared.types.ModelLoader
 import org.futo.voiceinput.shared.whisper.DecodingConfiguration
@ -41,27 +43,6 @@ import kotlin.math.min
 import kotlin.math.pow
 import kotlin.math.sqrt
 enum class MagnitudeState {
    NOT_TALKED_YET, MIC_MAY_BE_BLOCKED, TALKING
 }
 interface AudioRecognizerListener {
    fun cancelled()
    fun finished(result: String)
    fun languageDetected(language: Language)
    fun partialResult(result: String)
    fun decodingStatus(status: InferenceState)
    fun loading()
    fun needPermission()
    fun permissionRejected()
    fun recordingStarted()
    fun updateMagnitude(magnitude: Float, state: MagnitudeState)
    fun processing()
 }
 data class AudioRecognizerSettings(
    val modelRunConfiguration: MultiModelRunConfiguration,
    val decodingConfiguration: DecodingConfiguration
@ -69,8 +50,6 @@ data class AudioRecognizerSettings(
 class ModelDoesNotExistException(val models: List<ModelLoader>) : Throwable()
 // Ideally this shouldn't load the models at all, we should have something else that loads it
 // and gives the model to AudioRecognizer
 class AudioRecognizer(
    val context: Context,
    val lifecycleScope: LifecycleCoroutineScope,
@ -122,11 +101,11 @@ class AudioRecognizer(
        isRecording = false
    }
-    fun finishRecognizer() {
+    fun finish() {
        onFinishRecording()
    }
-    fun cancelRecognizer() {
+    fun cancel() {
        reset()
        listener.cancelled()
    }
@ -142,25 +121,25 @@ class AudioRecognizer(
        myAppSettings.flags = Intent.FLAG_ACTIVITY_NEW_TASK
        context.startActivity(myAppSettings)
-        cancelRecognizer()
+        cancel()
    }
-    fun create() {
+    fun start() {
        listener.loading()
        if (context.checkSelfPermission(Manifest.permission.RECORD_AUDIO) != PackageManager.PERMISSION_GRANTED) {
-            listener.needPermission()
+            requestPermission()
        } else {
            startRecording()
        }
    }
-    fun permissionResultGranted() {
+    private fun requestPermission() {
        listener.needPermission { wasGranted ->
            if(wasGranted) {
                startRecording()
            }
-
+        }
    fun permissionResultRejected() {
        listener.permissionRejected()
    }
    @Throws(SecurityException::class)
@ -219,7 +198,7 @@ class AudioRecognizer(
            if (isRunningOutOfSpace || hasNotTalkedRecently) {
                yield()
                withContext(Dispatchers.Main) {
-                    finishRecognizer()
+                    finish()
                }
                return
            }
@ -305,7 +284,7 @@ class AudioRecognizer(
                    if (floatSamples.remaining() < nRead2) {
                        yield()
                        withContext(Dispatchers.Main) {
-                            finishRecognizer()
+                            finish()
                        }
                        break
                    }
@ -333,7 +312,7 @@ class AudioRecognizer(
            createAudioRecorder()
        } catch (e: SecurityException) {
            // It's possible we may have lost permission, so let's just ask for permission again
-            listener.needPermission()
+            requestPermission()
            return
        }
--- a/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/RecognizerView.kt
+++ b/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/RecognizerView.kt
@ -10,8 +10,10 @@ import androidx.compose.runtime.Composable
 import androidx.compose.runtime.mutableStateOf
 import androidx.lifecycle.LifecycleCoroutineScope
 import kotlinx.coroutines.launch
 import org.futo.voiceinput.shared.types.AudioRecognizerListener
 import org.futo.voiceinput.shared.types.InferenceState
 import org.futo.voiceinput.shared.types.Language
 import org.futo.voiceinput.shared.types.MagnitudeState
 import org.futo.voiceinput.shared.ui.InnerRecognize
 import org.futo.voiceinput.shared.ui.PartialDecodingResult
 import org.futo.voiceinput.shared.ui.RecognizeLoadingCircle
@ -23,32 +25,12 @@ import org.futo.voiceinput.shared.whisper.DecodingConfiguration
 import org.futo.voiceinput.shared.whisper.ModelManager
 import org.futo.voiceinput.shared.whisper.MultiModelRunConfiguration
-abstract class RecognizerView(
+data class RecognizerViewSettings(
-    private val context: Context,
+    val shouldShowVerboseFeedback: Boolean,
-    private val lifecycleScope: LifecycleCoroutineScope,
+    val shouldShowInlinePartialResult: Boolean
-    private val modelManager: ModelManager
+)
 ) {
    // TODO: Should not get settings here, pass settings to constructor
    private val shouldPlaySounds: ValueFromSettings<Boolean> = ValueFromSettings(ENABLE_SOUND, true)
    private val shouldBeVerbose: ValueFromSettings<Boolean> =
        ValueFromSettings(VERBOSE_PROGRESS, false)
-    // TODO: SoundPool should be managed by parent, not by view, as the view is short-lived
+private val VerboseAnnotations = hashMapOf(
    /* val soundPool: SoundPool = SoundPool.Builder().setMaxStreams(2).setAudioAttributes(
        AudioAttributes.Builder().setUsage(USAGE_ASSISTANCE_SONIFICATION)
            .setContentType(CONTENT_TYPE_SONIFICATION).build()
    ).build()*/
    private var startSoundId: Int = -1
    private var cancelSoundId: Int = -1
    abstract fun onCancel()
    abstract fun sendResult(result: String)
    abstract fun sendPartialResult(result: String): Boolean
    abstract fun requestPermission()
    companion object {
        private val verboseAnnotations = hashMapOf(
    InferenceState.ExtractingMel to R.string.extracting_features,
    InferenceState.LoadingModel to R.string.loading_model,
    InferenceState.Encoding to R.string.encoding,
@ -57,7 +39,7 @@ abstract class RecognizerView(
    InferenceState.DecodingStarted to R.string.decoding
 )
-        private val defaultAnnotations = hashMapOf(
+private val DefaultAnnotations = hashMapOf(
    InferenceState.ExtractingMel to R.string.processing,
    InferenceState.LoadingModel to R.string.processing,
    InferenceState.Encoding to R.string.processing,
@ -65,8 +47,27 @@ abstract class RecognizerView(
    InferenceState.SwitchingModel to R.string.switching_model,
    InferenceState.DecodingStarted to R.string.processing
 )
 interface RecognizerViewListener {
    fun cancelled()
    fun recordingStarted()
    fun finished(result: String)
    fun partialResult(result: String)
    // Return true if a permission modal was shown, otherwise return false
    fun requestPermission(onGranted: () -> Unit, onRejected: () -> Unit): Boolean
 }
 class RecognizerView(
    private val context: Context,
    private val listener: RecognizerViewListener,
    private val settings: RecognizerViewSettings,
    lifecycleScope: LifecycleCoroutineScope,
    modelManager: ModelManager
 ) {
    private val magnitudeState = mutableStateOf(0.0f)
    private val statusState = mutableStateOf(MagnitudeState.NOT_TALKED_YET)
@ -96,7 +97,7 @@ abstract class RecognizerView(
            CurrentView.InnerRecognize -> {
                Column {
                    InnerRecognize(
-                        onFinish = { recognizer.finishRecognizer() },
+                        onFinish = { recognizer.finish() },
                        magnitude = magnitudeState,
                        state = statusState
                    )
@ -111,37 +112,17 @@ abstract class RecognizerView(
        }
    }
-    fun onClose() {
+    fun cancel() {
-        recognizer.cancelRecognizer()
+        recognizer.cancel()
    }
    private val listener = object : AudioRecognizerListener {
        // Tries to play a sound. If it's not yet ready, plays it when it's ready
        private fun playSound(id: Int) {
            /*
            lifecycleScope.launch {
                shouldPlaySounds.load(context) {
                    if (it) {
                        if (soundPool.play(id, 1.0f, 1.0f, 0, 0, 1.0f) == 0) {
                            soundPool.setOnLoadCompleteListener { soundPool, sampleId, status ->
                                if ((sampleId == id) && (status == 0)) {
                                    soundPool.play(id, 1.0f, 1.0f, 0, 0, 1.0f)
                                }
                            }
                        }
                    }
                }
            }
            */
    }
    private val audioRecognizerListener = object : AudioRecognizerListener {
        override fun cancelled() {
-            playSound(cancelSoundId)
+            listener.cancelled()
            onCancel()
        }
        override fun finished(result: String) {
-            sendResult(result)
+            listener.finished(result)
        }
        override fun languageDetected(language: Language) {
@ -149,20 +130,19 @@ abstract class RecognizerView(
        }
        override fun partialResult(result: String) {
-            if (!sendPartialResult(result)) {
+            listener.partialResult(result)
-                if (result.isNotBlank()) {
+            if (settings.shouldShowInlinePartialResult && result.isNotBlank()) {
                partialDecodingText.value = result
                currentViewState.value = CurrentView.PartialDecodingResult
            }
        }
        }
        override fun decodingStatus(status: InferenceState) {
            val text = context.getString(
-                when (shouldBeVerbose.value) {
+                when (settings.shouldShowVerboseFeedback) {
-                    true -> verboseAnnotations[status]!!
+                    true -> VerboseAnnotations[status]!!
-                    false -> defaultAnnotations[status]!!
+                    false -> DefaultAnnotations[status]!!
                }
            )
@ -175,18 +155,25 @@ abstract class RecognizerView(
            currentViewState.value = CurrentView.LoadingCircle
        }
-        override fun needPermission() {
+        override fun needPermission(onResult: (Boolean) -> Unit) {
-            requestPermission()
+            val shown = listener.requestPermission(
-        }
+                onGranted = {
-
+                    onResult(true)
-        override fun permissionRejected() {
+                },
                onRejected = {
                    onResult(false)
                    currentViewState.value = CurrentView.PermissionError
                }
            )
            if(!shown) {
                currentViewState.value = CurrentView.PermissionError
            }
        }
        override fun recordingStarted() {
            updateMagnitude(0.0f, MagnitudeState.NOT_TALKED_YET)
-
+            listener.recordingStarted()
            playSound(startSoundId)
        }
        override fun updateMagnitude(magnitude: Float, state: MagnitudeState) {
@ -203,7 +190,7 @@ abstract class RecognizerView(
    // TODO: Dummy settings, should get them from constructor
    private val recognizer: AudioRecognizer = AudioRecognizer(
-        context, lifecycleScope, modelManager, listener, AudioRecognizerSettings(
+        context, lifecycleScope, modelManager, audioRecognizerListener, AudioRecognizerSettings(
            modelRunConfiguration = MultiModelRunConfiguration(
                primaryModel = ENGLISH_MODELS[0], languageSpecificModels = mapOf()
            ), decodingConfiguration = DecodingConfiguration(
@ -216,22 +203,7 @@ abstract class RecognizerView(
        recognizer.reset()
    }
-    fun init() {
+    fun start() {
-        lifecycleScope.launch {
+        recognizer.start()
            shouldBeVerbose.load(context)
        }
        //startSoundId = soundPool.load(this.context, R.raw.start, 0)
        //cancelSoundId = soundPool.load(this.context, R.raw.cancel, 0)
        recognizer.create()
    }
    fun permissionResultGranted() {
        recognizer.permissionResultGranted()
    }
    fun permissionResultRejected() {
        recognizer.permissionResultRejected()
    }
 }
--- a/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/SoundPlayer.kt
+++ b/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/SoundPlayer.kt
@ -0,0 +1,62 @@
 package org.futo.voiceinput.shared
 import android.content.Context
 import android.media.AudioAttributes
 import android.media.AudioAttributes.CONTENT_TYPE_SONIFICATION
 import android.media.AudioAttributes.USAGE_ASSISTANCE_SONIFICATION
 import android.media.SoundPool
 import java.io.Closeable
 // soundPool.play returns 0 on failure
 private const val SoundPoolPlayFailure = 0
 // status in OnLoadCompleteListener is 0 when successful
 private const val LoadStatusSuccess = 0
 class SoundPlayer(
    private val context: Context
 ): Closeable {
    private val soundPool: SoundPool = SoundPool.Builder().setMaxStreams(2).setAudioAttributes(
        AudioAttributes.Builder().setUsage(USAGE_ASSISTANCE_SONIFICATION)
            .setContentType(CONTENT_TYPE_SONIFICATION).build()
    ).build()
    private var startSound: Int = -1
    private var cancelSound: Int = -1
    init {
        startSound = soundPool.load(this.context, R.raw.start, 0)
        cancelSound = soundPool.load(this.context, R.raw.cancel, 0)
    }
    override fun close() {
        soundPool.release()
    }
    // Returns true if successful, zero if failed
    private fun playSound(id: Int): Boolean {
        return when(soundPool.play(id, 1.0f, 1.0f, 0, 0, 1.0f)) {
            SoundPoolPlayFailure -> false
            else -> true
        }
    }
    // Tries to play a sound. If it's not yet ready, plays it when it's ready
    private fun playSoundOrLoad(id: Int) {
        if (!playSound(id)) {
            soundPool.setOnLoadCompleteListener { _, sampleId, status ->
                if ((sampleId == id) && (status == LoadStatusSuccess)) {
                    playSound(id)
                }
            }
        }
    }
    fun playStartSound() {
        playSoundOrLoad(startSound)
    }
    fun playCancelSound() {
        playSoundOrLoad(cancelSound)
    }
 }
--- a/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/types/AudioRecognizerListener.kt
+++ b/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/types/AudioRecognizerListener.kt
@ -0,0 +1,21 @@
 package org.futo.voiceinput.shared.types
 enum class MagnitudeState {
    NOT_TALKED_YET, MIC_MAY_BE_BLOCKED, TALKING
 }
 interface AudioRecognizerListener {
    fun cancelled()
    fun finished(result: String)
    fun languageDetected(language: Language)
    fun partialResult(result: String)
    fun decodingStatus(status: InferenceState)
    fun loading()
    fun needPermission(onResult: (Boolean) -> Unit)
    fun recordingStarted()
    fun updateMagnitude(magnitude: Float, state: MagnitudeState)
    fun processing()
 }
--- a/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/ui/RecognizeViews.kt
+++ b/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/ui/RecognizeViews.kt
@ -27,8 +27,8 @@ import androidx.compose.ui.res.painterResource
 import androidx.compose.ui.res.stringResource
 import androidx.compose.ui.text.style.TextAlign
 import androidx.compose.ui.unit.dp
 import org.futo.voiceinput.shared.MagnitudeState
 import org.futo.voiceinput.shared.R
 import org.futo.voiceinput.shared.types.MagnitudeState
 import org.futo.voiceinput.shared.ui.theme.Typography