Create SoundPlayer for persistent state

This commit is contained in:
Aleksandras Kostarevas 2023-08-31 19:15:50 +03:00
parent 731fbf1254
commit 3acb8b5e44
7 changed files with 226 additions and 165 deletions

View File

@ -36,6 +36,12 @@ interface ActionWindow {
}
interface PersistentActionState {
/**
* When called, the device may be on low memory and is requesting the action to clean up its
* state. You can close any resources that may not be necessary anymore. This will never be
* called when the action window is currently open. The PersistentActionState will stick around
* after this.
*/
suspend fun cleanUp()
}

View File

@ -15,6 +15,9 @@ import org.futo.inputmethod.latin.uix.ActionWindow
import org.futo.inputmethod.latin.uix.KeyboardManagerForAction
import org.futo.inputmethod.latin.uix.PersistentActionState
import org.futo.voiceinput.shared.RecognizerView
import org.futo.voiceinput.shared.RecognizerViewListener
import org.futo.voiceinput.shared.RecognizerViewSettings
import org.futo.voiceinput.shared.SoundPlayer
import org.futo.voiceinput.shared.whisper.ModelManager
val SystemVoiceInputAction = Action(
@ -29,7 +32,8 @@ val SystemVoiceInputAction = Action(
class VoiceInputPersistentState(val manager: KeyboardManagerForAction) : PersistentActionState {
var modelManager: ModelManager = ModelManager(manager.getContext())
val modelManager = ModelManager(manager.getContext())
val soundPlayer = SoundPlayer(manager.getContext())
override suspend fun cleanUp() {
modelManager.cleanUp()
@ -43,29 +47,21 @@ val VoiceInputAction = Action(
windowImpl = { manager, persistentState ->
val state = persistentState as VoiceInputPersistentState
object : ActionWindow, RecognizerView(manager.getContext(), manager.getLifecycleScope(), state.modelManager) {
object : ActionWindow, RecognizerViewListener {
private val recognizerView = RecognizerView(
context = manager.getContext(),
listener = this,
settings = RecognizerViewSettings(
shouldShowInlinePartialResult = false,
shouldShowVerboseFeedback = true
),
lifecycleScope = manager.getLifecycleScope(),
modelManager = state.modelManager
)
init {
this.reset()
this.init()
}
override fun onCancel() {
this.reset()
manager.closeActionWindow()
}
override fun sendResult(result: String) {
manager.typeText(result)
onCancel()
}
override fun sendPartialResult(result: String): Boolean {
manager.typePartialText(result)
return true
}
override fun requestPermission() {
permissionResultRejected()
recognizerView.reset()
recognizerView.start()
}
@Composable
@ -77,14 +73,39 @@ val VoiceInputAction = Action(
override fun WindowContents() {
Box(modifier = Modifier.fillMaxSize()) {
Box(modifier = Modifier.align(Alignment.Center)) {
Content()
recognizerView.Content()
}
}
}
override fun close() {
this.reset()
//soundPool.release()
recognizerView.cancel()
}
private var wasFinished = false
override fun cancelled() {
if(!wasFinished) {
state.soundPlayer.playCancelSound()
}
}
override fun recordingStarted() {
state.soundPlayer.playStartSound()
}
override fun finished(result: String) {
wasFinished = true
manager.typeText(result)
manager.closeActionWindow()
}
override fun partialResult(result: String) {
manager.typePartialText(result)
}
override fun requestPermission(onGranted: () -> Unit, onRejected: () -> Unit): Boolean {
return false
}
}
}

View File

@ -26,8 +26,10 @@ import kotlinx.coroutines.launch
import kotlinx.coroutines.runBlocking
import kotlinx.coroutines.withContext
import kotlinx.coroutines.yield
import org.futo.voiceinput.shared.types.AudioRecognizerListener
import org.futo.voiceinput.shared.types.InferenceState
import org.futo.voiceinput.shared.types.Language
import org.futo.voiceinput.shared.types.MagnitudeState
import org.futo.voiceinput.shared.types.ModelInferenceCallback
import org.futo.voiceinput.shared.types.ModelLoader
import org.futo.voiceinput.shared.whisper.DecodingConfiguration
@ -41,27 +43,6 @@ import kotlin.math.min
import kotlin.math.pow
import kotlin.math.sqrt
enum class MagnitudeState {
NOT_TALKED_YET, MIC_MAY_BE_BLOCKED, TALKING
}
interface AudioRecognizerListener {
fun cancelled()
fun finished(result: String)
fun languageDetected(language: Language)
fun partialResult(result: String)
fun decodingStatus(status: InferenceState)
fun loading()
fun needPermission()
fun permissionRejected()
fun recordingStarted()
fun updateMagnitude(magnitude: Float, state: MagnitudeState)
fun processing()
}
data class AudioRecognizerSettings(
val modelRunConfiguration: MultiModelRunConfiguration,
val decodingConfiguration: DecodingConfiguration
@ -69,8 +50,6 @@ data class AudioRecognizerSettings(
class ModelDoesNotExistException(val models: List<ModelLoader>) : Throwable()
// Ideally this shouldn't load the models at all, we should have something else that loads it
// and gives the model to AudioRecognizer
class AudioRecognizer(
val context: Context,
val lifecycleScope: LifecycleCoroutineScope,
@ -122,11 +101,11 @@ class AudioRecognizer(
isRecording = false
}
fun finishRecognizer() {
fun finish() {
onFinishRecording()
}
fun cancelRecognizer() {
fun cancel() {
reset()
listener.cancelled()
}
@ -142,25 +121,25 @@ class AudioRecognizer(
myAppSettings.flags = Intent.FLAG_ACTIVITY_NEW_TASK
context.startActivity(myAppSettings)
cancelRecognizer()
cancel()
}
fun create() {
fun start() {
listener.loading()
if (context.checkSelfPermission(Manifest.permission.RECORD_AUDIO) != PackageManager.PERMISSION_GRANTED) {
listener.needPermission()
requestPermission()
} else {
startRecording()
}
}
fun permissionResultGranted() {
startRecording()
}
fun permissionResultRejected() {
listener.permissionRejected()
private fun requestPermission() {
listener.needPermission { wasGranted ->
if(wasGranted) {
startRecording()
}
}
}
@Throws(SecurityException::class)
@ -219,7 +198,7 @@ class AudioRecognizer(
if (isRunningOutOfSpace || hasNotTalkedRecently) {
yield()
withContext(Dispatchers.Main) {
finishRecognizer()
finish()
}
return
}
@ -305,7 +284,7 @@ class AudioRecognizer(
if (floatSamples.remaining() < nRead2) {
yield()
withContext(Dispatchers.Main) {
finishRecognizer()
finish()
}
break
}
@ -333,7 +312,7 @@ class AudioRecognizer(
createAudioRecorder()
} catch (e: SecurityException) {
// It's possible we may have lost permission, so let's just ask for permission again
listener.needPermission()
requestPermission()
return
}

View File

@ -10,8 +10,10 @@ import androidx.compose.runtime.Composable
import androidx.compose.runtime.mutableStateOf
import androidx.lifecycle.LifecycleCoroutineScope
import kotlinx.coroutines.launch
import org.futo.voiceinput.shared.types.AudioRecognizerListener
import org.futo.voiceinput.shared.types.InferenceState
import org.futo.voiceinput.shared.types.Language
import org.futo.voiceinput.shared.types.MagnitudeState
import org.futo.voiceinput.shared.ui.InnerRecognize
import org.futo.voiceinput.shared.ui.PartialDecodingResult
import org.futo.voiceinput.shared.ui.RecognizeLoadingCircle
@ -23,50 +25,49 @@ import org.futo.voiceinput.shared.whisper.DecodingConfiguration
import org.futo.voiceinput.shared.whisper.ModelManager
import org.futo.voiceinput.shared.whisper.MultiModelRunConfiguration
abstract class RecognizerView(
data class RecognizerViewSettings(
val shouldShowVerboseFeedback: Boolean,
val shouldShowInlinePartialResult: Boolean
)
private val VerboseAnnotations = hashMapOf(
InferenceState.ExtractingMel to R.string.extracting_features,
InferenceState.LoadingModel to R.string.loading_model,
InferenceState.Encoding to R.string.encoding,
InferenceState.DecodingLanguage to R.string.decoding,
InferenceState.SwitchingModel to R.string.switching_model,
InferenceState.DecodingStarted to R.string.decoding
)
private val DefaultAnnotations = hashMapOf(
InferenceState.ExtractingMel to R.string.processing,
InferenceState.LoadingModel to R.string.processing,
InferenceState.Encoding to R.string.processing,
InferenceState.DecodingLanguage to R.string.processing,
InferenceState.SwitchingModel to R.string.switching_model,
InferenceState.DecodingStarted to R.string.processing
)
interface RecognizerViewListener {
fun cancelled()
fun recordingStarted()
fun finished(result: String)
fun partialResult(result: String)
// Return true if a permission modal was shown, otherwise return false
fun requestPermission(onGranted: () -> Unit, onRejected: () -> Unit): Boolean
}
class RecognizerView(
private val context: Context,
private val lifecycleScope: LifecycleCoroutineScope,
private val modelManager: ModelManager
private val listener: RecognizerViewListener,
private val settings: RecognizerViewSettings,
lifecycleScope: LifecycleCoroutineScope,
modelManager: ModelManager
) {
// TODO: Should not get settings here, pass settings to constructor
private val shouldPlaySounds: ValueFromSettings<Boolean> = ValueFromSettings(ENABLE_SOUND, true)
private val shouldBeVerbose: ValueFromSettings<Boolean> =
ValueFromSettings(VERBOSE_PROGRESS, false)
// TODO: SoundPool should be managed by parent, not by view, as the view is short-lived
/* val soundPool: SoundPool = SoundPool.Builder().setMaxStreams(2).setAudioAttributes(
AudioAttributes.Builder().setUsage(USAGE_ASSISTANCE_SONIFICATION)
.setContentType(CONTENT_TYPE_SONIFICATION).build()
).build()*/
private var startSoundId: Int = -1
private var cancelSoundId: Int = -1
abstract fun onCancel()
abstract fun sendResult(result: String)
abstract fun sendPartialResult(result: String): Boolean
abstract fun requestPermission()
companion object {
private val verboseAnnotations = hashMapOf(
InferenceState.ExtractingMel to R.string.extracting_features,
InferenceState.LoadingModel to R.string.loading_model,
InferenceState.Encoding to R.string.encoding,
InferenceState.DecodingLanguage to R.string.decoding,
InferenceState.SwitchingModel to R.string.switching_model,
InferenceState.DecodingStarted to R.string.decoding
)
private val defaultAnnotations = hashMapOf(
InferenceState.ExtractingMel to R.string.processing,
InferenceState.LoadingModel to R.string.processing,
InferenceState.Encoding to R.string.processing,
InferenceState.DecodingLanguage to R.string.processing,
InferenceState.SwitchingModel to R.string.switching_model,
InferenceState.DecodingStarted to R.string.processing
)
}
private val magnitudeState = mutableStateOf(0.0f)
private val statusState = mutableStateOf(MagnitudeState.NOT_TALKED_YET)
@ -96,7 +97,7 @@ abstract class RecognizerView(
CurrentView.InnerRecognize -> {
Column {
InnerRecognize(
onFinish = { recognizer.finishRecognizer() },
onFinish = { recognizer.finish() },
magnitude = magnitudeState,
state = statusState
)
@ -111,37 +112,17 @@ abstract class RecognizerView(
}
}
fun onClose() {
recognizer.cancelRecognizer()
fun cancel() {
recognizer.cancel()
}
private val listener = object : AudioRecognizerListener {
// Tries to play a sound. If it's not yet ready, plays it when it's ready
private fun playSound(id: Int) {
/*
lifecycleScope.launch {
shouldPlaySounds.load(context) {
if (it) {
if (soundPool.play(id, 1.0f, 1.0f, 0, 0, 1.0f) == 0) {
soundPool.setOnLoadCompleteListener { soundPool, sampleId, status ->
if ((sampleId == id) && (status == 0)) {
soundPool.play(id, 1.0f, 1.0f, 0, 0, 1.0f)
}
}
}
}
}
}
*/
}
private val audioRecognizerListener = object : AudioRecognizerListener {
override fun cancelled() {
playSound(cancelSoundId)
onCancel()
listener.cancelled()
}
override fun finished(result: String) {
sendResult(result)
listener.finished(result)
}
override fun languageDetected(language: Language) {
@ -149,20 +130,19 @@ abstract class RecognizerView(
}
override fun partialResult(result: String) {
if (!sendPartialResult(result)) {
if (result.isNotBlank()) {
partialDecodingText.value = result
currentViewState.value = CurrentView.PartialDecodingResult
}
listener.partialResult(result)
if (settings.shouldShowInlinePartialResult && result.isNotBlank()) {
partialDecodingText.value = result
currentViewState.value = CurrentView.PartialDecodingResult
}
}
override fun decodingStatus(status: InferenceState) {
val text = context.getString(
when (shouldBeVerbose.value) {
true -> verboseAnnotations[status]!!
false -> defaultAnnotations[status]!!
when (settings.shouldShowVerboseFeedback) {
true -> VerboseAnnotations[status]!!
false -> DefaultAnnotations[status]!!
}
)
@ -175,18 +155,25 @@ abstract class RecognizerView(
currentViewState.value = CurrentView.LoadingCircle
}
override fun needPermission() {
requestPermission()
}
override fun needPermission(onResult: (Boolean) -> Unit) {
val shown = listener.requestPermission(
onGranted = {
onResult(true)
},
onRejected = {
onResult(false)
currentViewState.value = CurrentView.PermissionError
}
)
override fun permissionRejected() {
currentViewState.value = CurrentView.PermissionError
if(!shown) {
currentViewState.value = CurrentView.PermissionError
}
}
override fun recordingStarted() {
updateMagnitude(0.0f, MagnitudeState.NOT_TALKED_YET)
playSound(startSoundId)
listener.recordingStarted()
}
override fun updateMagnitude(magnitude: Float, state: MagnitudeState) {
@ -203,7 +190,7 @@ abstract class RecognizerView(
// TODO: Dummy settings, should get them from constructor
private val recognizer: AudioRecognizer = AudioRecognizer(
context, lifecycleScope, modelManager, listener, AudioRecognizerSettings(
context, lifecycleScope, modelManager, audioRecognizerListener, AudioRecognizerSettings(
modelRunConfiguration = MultiModelRunConfiguration(
primaryModel = ENGLISH_MODELS[0], languageSpecificModels = mapOf()
), decodingConfiguration = DecodingConfiguration(
@ -216,22 +203,7 @@ abstract class RecognizerView(
recognizer.reset()
}
fun init() {
lifecycleScope.launch {
shouldBeVerbose.load(context)
}
//startSoundId = soundPool.load(this.context, R.raw.start, 0)
//cancelSoundId = soundPool.load(this.context, R.raw.cancel, 0)
recognizer.create()
}
fun permissionResultGranted() {
recognizer.permissionResultGranted()
}
fun permissionResultRejected() {
recognizer.permissionResultRejected()
fun start() {
recognizer.start()
}
}

View File

@ -0,0 +1,62 @@
package org.futo.voiceinput.shared
import android.content.Context
import android.media.AudioAttributes
import android.media.AudioAttributes.CONTENT_TYPE_SONIFICATION
import android.media.AudioAttributes.USAGE_ASSISTANCE_SONIFICATION
import android.media.SoundPool
import java.io.Closeable
// soundPool.play returns 0 on failure
private const val SoundPoolPlayFailure = 0
// status in OnLoadCompleteListener is 0 when successful
private const val LoadStatusSuccess = 0
class SoundPlayer(
private val context: Context
): Closeable {
private val soundPool: SoundPool = SoundPool.Builder().setMaxStreams(2).setAudioAttributes(
AudioAttributes.Builder().setUsage(USAGE_ASSISTANCE_SONIFICATION)
.setContentType(CONTENT_TYPE_SONIFICATION).build()
).build()
private var startSound: Int = -1
private var cancelSound: Int = -1
init {
startSound = soundPool.load(this.context, R.raw.start, 0)
cancelSound = soundPool.load(this.context, R.raw.cancel, 0)
}
override fun close() {
soundPool.release()
}
// Returns true if successful, zero if failed
private fun playSound(id: Int): Boolean {
return when(soundPool.play(id, 1.0f, 1.0f, 0, 0, 1.0f)) {
SoundPoolPlayFailure -> false
else -> true
}
}
// Tries to play a sound. If it's not yet ready, plays it when it's ready
private fun playSoundOrLoad(id: Int) {
if (!playSound(id)) {
soundPool.setOnLoadCompleteListener { _, sampleId, status ->
if ((sampleId == id) && (status == LoadStatusSuccess)) {
playSound(id)
}
}
}
}
fun playStartSound() {
playSoundOrLoad(startSound)
}
fun playCancelSound() {
playSoundOrLoad(cancelSound)
}
}

View File

@ -0,0 +1,21 @@
package org.futo.voiceinput.shared.types
enum class MagnitudeState {
NOT_TALKED_YET, MIC_MAY_BE_BLOCKED, TALKING
}
interface AudioRecognizerListener {
fun cancelled()
fun finished(result: String)
fun languageDetected(language: Language)
fun partialResult(result: String)
fun decodingStatus(status: InferenceState)
fun loading()
fun needPermission(onResult: (Boolean) -> Unit)
fun recordingStarted()
fun updateMagnitude(magnitude: Float, state: MagnitudeState)
fun processing()
}

View File

@ -27,8 +27,8 @@ import androidx.compose.ui.res.painterResource
import androidx.compose.ui.res.stringResource
import androidx.compose.ui.text.style.TextAlign
import androidx.compose.ui.unit.dp
import org.futo.voiceinput.shared.MagnitudeState
import org.futo.voiceinput.shared.R
import org.futo.voiceinput.shared.types.MagnitudeState
import org.futo.voiceinput.shared.ui.theme.Typography