Add voice input audio focus and device display/config

This commit is contained in:
Aleksandras Kostarevas 2024-06-01 00:50:08 +03:00
parent 60bc8a2d1d
commit f2e42384bd
7 changed files with 193 additions and 38 deletions

View File

@ -29,6 +29,16 @@ val DISALLOW_SYMBOLS = SettingsKey(
default = true default = true
) )
val PREFER_BLUETOOTH = SettingsKey(
key = booleanPreferencesKey("prefer_bluetooth_recording"),
default = false
)
val AUDIO_FOCUS = SettingsKey(
key = booleanPreferencesKey("request_audio_focus"),
default = true
)
val ENGLISH_MODEL_INDEX = SettingsKey( val ENGLISH_MODEL_INDEX = SettingsKey(
key = intPreferencesKey("english_model_index"), key = intPreferencesKey("english_model_index"),
default = 0 default = 0

View File

@ -27,11 +27,13 @@ import kotlinx.coroutines.launch
import kotlinx.coroutines.withContext import kotlinx.coroutines.withContext
import kotlinx.coroutines.yield import kotlinx.coroutines.yield
import org.futo.inputmethod.latin.R import org.futo.inputmethod.latin.R
import org.futo.inputmethod.latin.uix.AUDIO_FOCUS
import org.futo.inputmethod.latin.uix.Action import org.futo.inputmethod.latin.uix.Action
import org.futo.inputmethod.latin.uix.ActionWindow import org.futo.inputmethod.latin.uix.ActionWindow
import org.futo.inputmethod.latin.uix.DISALLOW_SYMBOLS import org.futo.inputmethod.latin.uix.DISALLOW_SYMBOLS
import org.futo.inputmethod.latin.uix.ENABLE_SOUND import org.futo.inputmethod.latin.uix.ENABLE_SOUND
import org.futo.inputmethod.latin.uix.KeyboardManagerForAction import org.futo.inputmethod.latin.uix.KeyboardManagerForAction
import org.futo.inputmethod.latin.uix.PREFER_BLUETOOTH
import org.futo.inputmethod.latin.uix.PersistentActionState import org.futo.inputmethod.latin.uix.PersistentActionState
import org.futo.inputmethod.latin.uix.ResourceHelper import org.futo.inputmethod.latin.uix.ResourceHelper
import org.futo.inputmethod.latin.uix.VERBOSE_PROGRESS import org.futo.inputmethod.latin.uix.VERBOSE_PROGRESS
@ -43,6 +45,7 @@ import org.futo.voiceinput.shared.ModelDoesNotExistException
import org.futo.voiceinput.shared.RecognizerView import org.futo.voiceinput.shared.RecognizerView
import org.futo.voiceinput.shared.RecognizerViewListener import org.futo.voiceinput.shared.RecognizerViewListener
import org.futo.voiceinput.shared.RecognizerViewSettings import org.futo.voiceinput.shared.RecognizerViewSettings
import org.futo.voiceinput.shared.RecordingSettings
import org.futo.voiceinput.shared.SoundPlayer import org.futo.voiceinput.shared.SoundPlayer
import org.futo.voiceinput.shared.types.Language import org.futo.voiceinput.shared.types.Language
import org.futo.voiceinput.shared.types.ModelLoader import org.futo.voiceinput.shared.types.ModelLoader
@ -84,6 +87,8 @@ private class VoiceInputActionWindow(
val enableSound = async { context.getSetting(ENABLE_SOUND) } val enableSound = async { context.getSetting(ENABLE_SOUND) }
val verboseFeedback = async { context.getSetting(VERBOSE_PROGRESS) } val verboseFeedback = async { context.getSetting(VERBOSE_PROGRESS) }
val disallowSymbols = async { context.getSetting(DISALLOW_SYMBOLS) } val disallowSymbols = async { context.getSetting(DISALLOW_SYMBOLS) }
val useBluetoothAudio = async { context.getSetting(PREFER_BLUETOOTH) }
val requestAudioFocus = async { context.getSetting(AUDIO_FOCUS) }
val primaryModel = model val primaryModel = model
val languageSpecificModels = mutableMapOf<Language, ModelLoader>() val languageSpecificModels = mutableMapOf<Language, ModelLoader>()
@ -104,6 +109,10 @@ private class VoiceInputActionWindow(
glossary = state.userDictionaryObserver.getWords().map { it.word }, glossary = state.userDictionaryObserver.getWords().map { it.word },
languages = allowedLanguages, languages = allowedLanguages,
suppressSymbols = disallowSymbols.await() suppressSymbols = disallowSymbols.await()
),
recordingConfiguration = RecordingSettings(
preferBluetoothMic = useBluetoothAudio.await(),
requestAudioFocus = requestAudioFocus.await()
) )
) )
} }
@ -203,7 +212,7 @@ private class VoiceInputActionWindow(
} }
} }
override fun recordingStarted() { override fun recordingStarted(device: String) {
if (shouldPlaySounds) { if (shouldPlaySounds) {
state.soundPlayer.playStartSound() state.soundPlayer.playStartSound()
} }

View File

@ -1,35 +1,18 @@
package org.futo.inputmethod.latin.uix.settings.pages package org.futo.inputmethod.latin.uix.settings.pages
import android.content.Intent import android.content.Intent
import androidx.compose.foundation.layout.Box
import androidx.compose.foundation.layout.fillMaxWidth
import androidx.compose.foundation.layout.padding
import androidx.compose.material3.DropdownMenuItem
import androidx.compose.material3.ExperimentalMaterial3Api
import androidx.compose.material3.ExposedDropdownMenuBox
import androidx.compose.material3.ExposedDropdownMenuDefaults
import androidx.compose.material3.MaterialTheme
import androidx.compose.material3.Text
import androidx.compose.material3.TextField
import androidx.compose.runtime.Composable import androidx.compose.runtime.Composable
import androidx.compose.runtime.getValue
import androidx.compose.runtime.mutableStateOf
import androidx.compose.runtime.remember
import androidx.compose.runtime.setValue
import androidx.compose.ui.Alignment
import androidx.compose.ui.Modifier
import androidx.compose.ui.platform.LocalContext import androidx.compose.ui.platform.LocalContext
import androidx.compose.ui.res.painterResource import androidx.compose.ui.res.painterResource
import androidx.compose.ui.res.stringResource import androidx.compose.ui.res.stringResource
import androidx.compose.ui.tooling.preview.Preview import androidx.compose.ui.tooling.preview.Preview
import androidx.compose.ui.unit.dp
import androidx.navigation.NavHostController import androidx.navigation.NavHostController
import androidx.navigation.compose.rememberNavController import androidx.navigation.compose.rememberNavController
import org.futo.inputmethod.latin.R import org.futo.inputmethod.latin.R
import org.futo.inputmethod.latin.uix.AUDIO_FOCUS
import org.futo.inputmethod.latin.uix.DISALLOW_SYMBOLS import org.futo.inputmethod.latin.uix.DISALLOW_SYMBOLS
import org.futo.inputmethod.latin.uix.ENABLE_SOUND import org.futo.inputmethod.latin.uix.ENABLE_SOUND
import org.futo.inputmethod.latin.uix.ENGLISH_MODEL_INDEX import org.futo.inputmethod.latin.uix.PREFER_BLUETOOTH
import org.futo.inputmethod.latin.uix.SettingsKey
import org.futo.inputmethod.latin.uix.USE_SYSTEM_VOICE_INPUT import org.futo.inputmethod.latin.uix.USE_SYSTEM_VOICE_INPUT
import org.futo.inputmethod.latin.uix.VERBOSE_PROGRESS import org.futo.inputmethod.latin.uix.VERBOSE_PROGRESS
import org.futo.inputmethod.latin.uix.settings.NavigationItem import org.futo.inputmethod.latin.uix.settings.NavigationItem
@ -38,8 +21,6 @@ import org.futo.inputmethod.latin.uix.settings.ScreenTitle
import org.futo.inputmethod.latin.uix.settings.ScrollableList import org.futo.inputmethod.latin.uix.settings.ScrollableList
import org.futo.inputmethod.latin.uix.settings.SettingToggleDataStore import org.futo.inputmethod.latin.uix.settings.SettingToggleDataStore
import org.futo.inputmethod.latin.uix.settings.useDataStore import org.futo.inputmethod.latin.uix.settings.useDataStore
import org.futo.voiceinput.shared.ENGLISH_MODELS
import org.futo.voiceinput.shared.types.ModelLoader
@Preview @Preview
@Composable @Composable
@ -49,7 +30,6 @@ fun VoiceInputScreen(navController: NavHostController = rememberNavController())
ScrollableList { ScrollableList {
ScreenTitle("Voice Input", showBack = true, navController) ScreenTitle("Voice Input", showBack = true, navController)
SettingToggleDataStore( SettingToggleDataStore(
title = "Disable built-in voice input", title = "Disable built-in voice input",
subtitle = "Use voice input provided by external app", subtitle = "Use voice input provided by external app",
@ -76,10 +56,23 @@ fun VoiceInputScreen(navController: NavHostController = rememberNavController())
SettingToggleDataStore( SettingToggleDataStore(
title = "Verbose progress", title = "Verbose progress",
subtitle = "Display verbose information about model inference", subtitle = "Display verbose information such as mic being used",
setting = VERBOSE_PROGRESS setting = VERBOSE_PROGRESS
) )
SettingToggleDataStore(
title = "Prefer Bluetooth Mic",
subtitle = "There may be extra delay to recording starting as Bluetooth SCO connection must be negotiated",
setting = PREFER_BLUETOOTH
)
SettingToggleDataStore(
title = "Audio Focus",
subtitle = "Pause videos/music when voice input is activated",
setting = AUDIO_FOCUS
)
SettingToggleDataStore( SettingToggleDataStore(
title = "Suppress symbols", title = "Suppress symbols",
setting = DISALLOW_SYMBOLS setting = DISALLOW_SYMBOLS

View File

@ -5,7 +5,10 @@ import android.content.Context
import android.content.Intent import android.content.Intent
import android.content.pm.PackageManager import android.content.pm.PackageManager
import android.hardware.SensorPrivacyManager import android.hardware.SensorPrivacyManager
import android.media.AudioDeviceInfo
import android.media.AudioFocusRequest
import android.media.AudioFormat import android.media.AudioFormat
import android.media.AudioManager
import android.media.AudioRecord import android.media.AudioRecord
import android.media.MediaRecorder import android.media.MediaRecorder
import android.media.MicrophoneDirection import android.media.MicrophoneDirection
@ -42,9 +45,43 @@ import kotlin.math.min
import kotlin.math.pow import kotlin.math.pow
import kotlin.math.sqrt import kotlin.math.sqrt
private fun getRecordingDeviceKind(type: Int): String {
return when (type) {
AudioDeviceInfo.TYPE_BUILTIN_EARPIECE -> "BUILTIN"
AudioDeviceInfo.TYPE_BUILTIN_MIC -> "BUILTIN"
AudioDeviceInfo.TYPE_BLUETOOTH_SCO -> "BLUETOOTH_SCO"
AudioDeviceInfo.TYPE_BLUETOOTH_A2DP -> "BLUETOOTH_A2DP"
AudioDeviceInfo.TYPE_WIRED_HEADSET -> "WIRED_HEADSET"
AudioDeviceInfo.TYPE_HDMI -> "HDMI"
AudioDeviceInfo.TYPE_TELEPHONY -> "TELEPHONY"
AudioDeviceInfo.TYPE_DOCK -> "DOCK"
AudioDeviceInfo.TYPE_USB_ACCESSORY -> "USB_ACCESSORY"
AudioDeviceInfo.TYPE_USB_DEVICE -> "USB_DEVICE"
AudioDeviceInfo.TYPE_USB_HEADSET -> "USB_HEADSET"
AudioDeviceInfo.TYPE_FM_TUNER -> "FM_TUNER"
AudioDeviceInfo.TYPE_TV_TUNER -> "TV_TUNER"
AudioDeviceInfo.TYPE_LINE_ANALOG -> "LINE_ANALOG"
AudioDeviceInfo.TYPE_LINE_DIGITAL -> "LINE_DIGITAL"
AudioDeviceInfo.TYPE_IP -> "IP"
AudioDeviceInfo.TYPE_BUS -> "BUS"
AudioDeviceInfo.TYPE_REMOTE_SUBMIX -> "REMOTE_SUBMIX"
AudioDeviceInfo.TYPE_BLE_HEADSET -> "BLE_HEADSET"
AudioDeviceInfo.TYPE_HDMI_ARC -> "HDMI_ARC"
AudioDeviceInfo.TYPE_HDMI_EARC -> "HDMI_EARC"
AudioDeviceInfo.TYPE_DOCK_ANALOG -> "DOCK_ANALOG"
else -> "unknown@${type}"
}
}
data class RecordingSettings(
val preferBluetoothMic: Boolean,
val requestAudioFocus: Boolean
)
data class AudioRecognizerSettings( data class AudioRecognizerSettings(
val modelRunConfiguration: MultiModelRunConfiguration, val modelRunConfiguration: MultiModelRunConfiguration,
val decodingConfiguration: DecodingConfiguration val decodingConfiguration: DecodingConfiguration,
val recordingConfiguration: RecordingSettings
) )
class ModelDoesNotExistException(val models: List<ModelLoader>) : Throwable() class ModelDoesNotExistException(val models: List<ModelLoader>) : Throwable()
@ -66,6 +103,69 @@ class AudioRecognizer(
private var modelJob: Job? = null private var modelJob: Job? = null
private var loadModelJob: Job? = null private var loadModelJob: Job? = null
private var focusRequest: AudioFocusRequest? = null
private var communicationDevice = "unknown"
private fun focusAudio() {
unfocusAudio()
if(!settings.recordingConfiguration.requestAudioFocus) return
try {
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
val audioManager = context.getSystemService(Context.AUDIO_SERVICE) as AudioManager
focusRequest =
AudioFocusRequest.Builder(AudioManager.AUDIOFOCUS_GAIN_TRANSIENT_EXCLUSIVE)
.build()
audioManager.requestAudioFocus(focusRequest!!)
}
}catch(e: Exception) {
e.printStackTrace()
}
}
private fun unfocusAudio() {
try {
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
val audioManager = context.getSystemService(Context.AUDIO_SERVICE) as AudioManager
if (focusRequest != null) {
audioManager.abandonAudioFocusRequest(focusRequest!!)
}
focusRequest = null
}
}catch(e: Exception) {
e.printStackTrace()
}
}
private fun setCommunicationDevice() {
communicationDevice = "Unset"
if(!settings.recordingConfiguration.preferBluetoothMic) return
try {
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S) {
val audioManager = context.getSystemService(Context.AUDIO_SERVICE) as AudioManager
val devices = audioManager.availableCommunicationDevices
val tgtDevice = devices.firstOrNull { it.type == AudioDeviceInfo.TYPE_BLUETOOTH_SCO } ?: devices.firstOrNull { it.type == AudioDeviceInfo.TYPE_BUILTIN_MIC } ?: devices.first()
if (!audioManager.setCommunicationDevice(tgtDevice)) {
audioManager.clearCommunicationDevice()
} else {
communicationDevice =
tgtDevice.productName.toString() + " (${getRecordingDeviceKind(tgtDevice.type)})"
}
}
} catch(_: Exception) {}
}
private fun clearCommunicationDevice() {
val audioManager = context.getSystemService(Context.AUDIO_SERVICE) as AudioManager
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S) {
audioManager.clearCommunicationDevice()
}
}
@Throws(ModelDoesNotExistException::class) @Throws(ModelDoesNotExistException::class)
private fun verifyModelsExist() { private fun verifyModelsExist() {
val modelsThatDoNotExist = mutableListOf<ModelLoader>() val modelsThatDoNotExist = mutableListOf<ModelLoader>()
@ -100,6 +200,10 @@ class AudioRecognizer(
isRecording = false isRecording = false
modelRunner.cancelAll() modelRunner.cancelAll()
unfocusAudio()
clearCommunicationDevice()
} }
fun finish() { fun finish() {
@ -144,10 +248,17 @@ class AudioRecognizer(
} }
} }
@Throws(SecurityException::class) @Throws(SecurityException::class)
private fun createAudioRecorder(): AudioRecord { private fun createAudioRecorder(): AudioRecord {
val purpose = if(settings.recordingConfiguration.preferBluetoothMic) {
MediaRecorder.AudioSource.VOICE_COMMUNICATION
} else {
MediaRecorder.AudioSource.VOICE_RECOGNITION
}
val recorder = AudioRecord( val recorder = AudioRecord(
MediaRecorder.AudioSource.VOICE_RECOGNITION, purpose,
16000, 16000,
AudioFormat.CHANNEL_IN_MONO, AudioFormat.CHANNEL_IN_MONO,
AudioFormat.ENCODING_PCM_16BIT, AudioFormat.ENCODING_PCM_16BIT,
@ -160,8 +271,6 @@ class AudioRecognizer(
recorder.setPreferredMicrophoneDirection(MicrophoneDirection.MIC_DIRECTION_TOWARDS_USER) recorder.setPreferredMicrophoneDirection(MicrophoneDirection.MIC_DIRECTION_TOWARDS_USER)
} }
recorder.startRecording()
return recorder return recorder
} }
@ -310,14 +419,32 @@ class AudioRecognizer(
throw IllegalStateException("Start recording when already recording") throw IllegalStateException("Start recording when already recording")
} }
setCommunicationDevice()
val recorder = try { val recorder = try {
createAudioRecorder() createAudioRecorder()
} catch (e: SecurityException) { } catch (e: SecurityException) {
// It's possible we may have lost permission, so let's just ask for permission again // It's possible we may have lost permission, so let's just ask for permission again
clearCommunicationDevice()
requestPermission() requestPermission()
return return
} }
focusAudio()
if(communicationDevice == "Unset") {
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.P) {
communicationDevice = recorder.activeMicrophones.joinToString {
getRecordingDeviceKind(it.type)
} + " (may be stale)"
}
}
listener.recordingStarted(communicationDevice)
recorder.startRecording()
this.recorder = recorder this.recorder = recorder
isRecording = true isRecording = true
@ -336,7 +463,6 @@ class AudioRecognizer(
} }
} }
listener.recordingStarted()
} }
private val runnerCallback: ModelInferenceCallback = object : ModelInferenceCallback { private val runnerCallback: ModelInferenceCallback = object : ModelInferenceCallback {

View File

@ -23,13 +23,14 @@ data class RecognizerViewSettings(
val shouldShowInlinePartialResult: Boolean, val shouldShowInlinePartialResult: Boolean,
val modelRunConfiguration: MultiModelRunConfiguration, val modelRunConfiguration: MultiModelRunConfiguration,
val decodingConfiguration: DecodingConfiguration val decodingConfiguration: DecodingConfiguration,
val recordingConfiguration: RecordingSettings
) )
private val VerboseAnnotations = hashMapOf( private val VerboseAnnotations = hashMapOf(
InferenceState.ExtractingMel to R.string.extracting_features, InferenceState.ExtractingMel to R.string.extracting_features,
InferenceState.LoadingModel to R.string.loading_model, InferenceState.LoadingModel to R.string.loading_model,
InferenceState.Encoding to R.string.encoding, InferenceState.Encoding to R.string.processing,
InferenceState.DecodingLanguage to R.string.decoding, InferenceState.DecodingLanguage to R.string.decoding,
InferenceState.SwitchingModel to R.string.switching_model, InferenceState.SwitchingModel to R.string.switching_model,
InferenceState.DecodingStarted to R.string.decoding InferenceState.DecodingStarted to R.string.decoding
@ -47,7 +48,7 @@ private val DefaultAnnotations = hashMapOf(
interface RecognizerViewListener { interface RecognizerViewListener {
fun cancelled() fun cancelled()
fun recordingStarted() fun recordingStarted(device: String)
fun finished(result: String) fun finished(result: String)
@ -75,6 +76,8 @@ class RecognizerView(
private val partialDecodingText = mutableStateOf("") private val partialDecodingText = mutableStateOf("")
private val currentViewState = mutableStateOf(CurrentView.LoadingCircle) private val currentViewState = mutableStateOf(CurrentView.LoadingCircle)
private val currentDeviceState = mutableStateOf("Recording not started")
@Composable @Composable
fun Content() { fun Content() {
when (currentViewState.value) { when (currentViewState.value) {
@ -93,7 +96,8 @@ class RecognizerView(
CurrentView.InnerRecognize -> { CurrentView.InnerRecognize -> {
InnerRecognize( InnerRecognize(
magnitude = magnitudeState, magnitude = magnitudeState,
state = statusState state = statusState,
device = if(settings.shouldShowVerboseFeedback) { currentDeviceState } else { null }
) )
} }
@ -168,9 +172,10 @@ class RecognizerView(
} }
} }
override fun recordingStarted() { override fun recordingStarted(device: String) {
updateMagnitude(0.0f, MagnitudeState.NOT_TALKED_YET) updateMagnitude(0.0f, MagnitudeState.NOT_TALKED_YET)
listener.recordingStarted() currentDeviceState.value = device
listener.recordingStarted(device)
} }
override fun updateMagnitude(magnitude: Float, state: MagnitudeState) { override fun updateMagnitude(magnitude: Float, state: MagnitudeState) {
@ -192,7 +197,8 @@ class RecognizerView(
listener = audioRecognizerListener, listener = audioRecognizerListener,
settings = AudioRecognizerSettings( settings = AudioRecognizerSettings(
modelRunConfiguration = settings.modelRunConfiguration, modelRunConfiguration = settings.modelRunConfiguration,
decodingConfiguration = settings.decodingConfiguration decodingConfiguration = settings.decodingConfiguration,
recordingConfiguration = settings.recordingConfiguration
) )
) )

View File

@ -14,7 +14,7 @@ interface AudioRecognizerListener {
fun loading() fun loading()
fun needPermission(onResult: (Boolean) -> Unit) fun needPermission(onResult: (Boolean) -> Unit)
fun recordingStarted() fun recordingStarted(device: String)
fun updateMagnitude(magnitude: Float, state: MagnitudeState) fun updateMagnitude(magnitude: Float, state: MagnitudeState)
fun processing() fun processing()

View File

@ -54,7 +54,8 @@ fun AnimatedRecognizeCircle(magnitude: MutableFloatState = mutableFloatStateOf(0
@Composable @Composable
fun InnerRecognize( fun InnerRecognize(
magnitude: MutableFloatState = mutableFloatStateOf(0.5f), magnitude: MutableFloatState = mutableFloatStateOf(0.5f),
state: MutableState<MagnitudeState> = mutableStateOf(MagnitudeState.MIC_MAY_BE_BLOCKED) state: MutableState<MagnitudeState> = mutableStateOf(MagnitudeState.MIC_MAY_BE_BLOCKED),
device: MutableState<String>? = mutableStateOf("")
) { ) {
Box(modifier = Modifier.fillMaxSize(), contentAlignment = Alignment.Center) { Box(modifier = Modifier.fillMaxSize(), contentAlignment = Alignment.Center) {
AnimatedRecognizeCircle(magnitude = magnitude) AnimatedRecognizeCircle(magnitude = magnitude)
@ -78,6 +79,16 @@ fun InnerRecognize(
textAlign = TextAlign.Center, textAlign = TextAlign.Center,
color = MaterialTheme.colorScheme.onSurface color = MaterialTheme.colorScheme.onSurface
) )
if(device != null) {
Text(
"Device: ${device.value}",
style = Typography.labelSmall,
modifier = Modifier.fillMaxWidth().offset(x = 0.dp, y = 64.dp),
textAlign = TextAlign.Center,
color = MaterialTheme.colorScheme.onSurface.copy(alpha = 0.66f)
)
}
} }
} }