Add voice input audio focus and device display/config

This commit is contained in:
Aleksandras Kostarevas 2024-06-01 00:50:08 +03:00
parent 60bc8a2d1d
commit f2e42384bd
7 changed files with 193 additions and 38 deletions

View File

@ -29,6 +29,16 @@ val DISALLOW_SYMBOLS = SettingsKey(
default = true
)
val PREFER_BLUETOOTH = SettingsKey(
key = booleanPreferencesKey("prefer_bluetooth_recording"),
default = false
)
val AUDIO_FOCUS = SettingsKey(
key = booleanPreferencesKey("request_audio_focus"),
default = true
)
val ENGLISH_MODEL_INDEX = SettingsKey(
key = intPreferencesKey("english_model_index"),
default = 0

View File

@ -27,11 +27,13 @@ import kotlinx.coroutines.launch
import kotlinx.coroutines.withContext
import kotlinx.coroutines.yield
import org.futo.inputmethod.latin.R
import org.futo.inputmethod.latin.uix.AUDIO_FOCUS
import org.futo.inputmethod.latin.uix.Action
import org.futo.inputmethod.latin.uix.ActionWindow
import org.futo.inputmethod.latin.uix.DISALLOW_SYMBOLS
import org.futo.inputmethod.latin.uix.ENABLE_SOUND
import org.futo.inputmethod.latin.uix.KeyboardManagerForAction
import org.futo.inputmethod.latin.uix.PREFER_BLUETOOTH
import org.futo.inputmethod.latin.uix.PersistentActionState
import org.futo.inputmethod.latin.uix.ResourceHelper
import org.futo.inputmethod.latin.uix.VERBOSE_PROGRESS
@ -43,6 +45,7 @@ import org.futo.voiceinput.shared.ModelDoesNotExistException
import org.futo.voiceinput.shared.RecognizerView
import org.futo.voiceinput.shared.RecognizerViewListener
import org.futo.voiceinput.shared.RecognizerViewSettings
import org.futo.voiceinput.shared.RecordingSettings
import org.futo.voiceinput.shared.SoundPlayer
import org.futo.voiceinput.shared.types.Language
import org.futo.voiceinput.shared.types.ModelLoader
@ -84,6 +87,8 @@ private class VoiceInputActionWindow(
val enableSound = async { context.getSetting(ENABLE_SOUND) }
val verboseFeedback = async { context.getSetting(VERBOSE_PROGRESS) }
val disallowSymbols = async { context.getSetting(DISALLOW_SYMBOLS) }
val useBluetoothAudio = async { context.getSetting(PREFER_BLUETOOTH) }
val requestAudioFocus = async { context.getSetting(AUDIO_FOCUS) }
val primaryModel = model
val languageSpecificModels = mutableMapOf<Language, ModelLoader>()
@ -104,6 +109,10 @@ private class VoiceInputActionWindow(
glossary = state.userDictionaryObserver.getWords().map { it.word },
languages = allowedLanguages,
suppressSymbols = disallowSymbols.await()
),
recordingConfiguration = RecordingSettings(
preferBluetoothMic = useBluetoothAudio.await(),
requestAudioFocus = requestAudioFocus.await()
)
)
}
@ -203,7 +212,7 @@ private class VoiceInputActionWindow(
}
}
override fun recordingStarted() {
override fun recordingStarted(device: String) {
if (shouldPlaySounds) {
state.soundPlayer.playStartSound()
}

View File

@ -1,35 +1,18 @@
package org.futo.inputmethod.latin.uix.settings.pages
import android.content.Intent
import androidx.compose.foundation.layout.Box
import androidx.compose.foundation.layout.fillMaxWidth
import androidx.compose.foundation.layout.padding
import androidx.compose.material3.DropdownMenuItem
import androidx.compose.material3.ExperimentalMaterial3Api
import androidx.compose.material3.ExposedDropdownMenuBox
import androidx.compose.material3.ExposedDropdownMenuDefaults
import androidx.compose.material3.MaterialTheme
import androidx.compose.material3.Text
import androidx.compose.material3.TextField
import androidx.compose.runtime.Composable
import androidx.compose.runtime.getValue
import androidx.compose.runtime.mutableStateOf
import androidx.compose.runtime.remember
import androidx.compose.runtime.setValue
import androidx.compose.ui.Alignment
import androidx.compose.ui.Modifier
import androidx.compose.ui.platform.LocalContext
import androidx.compose.ui.res.painterResource
import androidx.compose.ui.res.stringResource
import androidx.compose.ui.tooling.preview.Preview
import androidx.compose.ui.unit.dp
import androidx.navigation.NavHostController
import androidx.navigation.compose.rememberNavController
import org.futo.inputmethod.latin.R
import org.futo.inputmethod.latin.uix.AUDIO_FOCUS
import org.futo.inputmethod.latin.uix.DISALLOW_SYMBOLS
import org.futo.inputmethod.latin.uix.ENABLE_SOUND
import org.futo.inputmethod.latin.uix.ENGLISH_MODEL_INDEX
import org.futo.inputmethod.latin.uix.SettingsKey
import org.futo.inputmethod.latin.uix.PREFER_BLUETOOTH
import org.futo.inputmethod.latin.uix.USE_SYSTEM_VOICE_INPUT
import org.futo.inputmethod.latin.uix.VERBOSE_PROGRESS
import org.futo.inputmethod.latin.uix.settings.NavigationItem
@ -38,8 +21,6 @@ import org.futo.inputmethod.latin.uix.settings.ScreenTitle
import org.futo.inputmethod.latin.uix.settings.ScrollableList
import org.futo.inputmethod.latin.uix.settings.SettingToggleDataStore
import org.futo.inputmethod.latin.uix.settings.useDataStore
import org.futo.voiceinput.shared.ENGLISH_MODELS
import org.futo.voiceinput.shared.types.ModelLoader
@Preview
@Composable
@ -49,7 +30,6 @@ fun VoiceInputScreen(navController: NavHostController = rememberNavController())
ScrollableList {
ScreenTitle("Voice Input", showBack = true, navController)
SettingToggleDataStore(
title = "Disable built-in voice input",
subtitle = "Use voice input provided by external app",
@ -76,10 +56,23 @@ fun VoiceInputScreen(navController: NavHostController = rememberNavController())
SettingToggleDataStore(
title = "Verbose progress",
subtitle = "Display verbose information about model inference",
subtitle = "Display verbose information such as mic being used",
setting = VERBOSE_PROGRESS
)
SettingToggleDataStore(
title = "Prefer Bluetooth Mic",
subtitle = "There may be extra delay to recording starting as Bluetooth SCO connection must be negotiated",
setting = PREFER_BLUETOOTH
)
SettingToggleDataStore(
title = "Audio Focus",
subtitle = "Pause videos/music when voice input is activated",
setting = AUDIO_FOCUS
)
SettingToggleDataStore(
title = "Suppress symbols",
setting = DISALLOW_SYMBOLS

View File

@ -5,7 +5,10 @@ import android.content.Context
import android.content.Intent
import android.content.pm.PackageManager
import android.hardware.SensorPrivacyManager
import android.media.AudioDeviceInfo
import android.media.AudioFocusRequest
import android.media.AudioFormat
import android.media.AudioManager
import android.media.AudioRecord
import android.media.MediaRecorder
import android.media.MicrophoneDirection
@ -42,9 +45,43 @@ import kotlin.math.min
import kotlin.math.pow
import kotlin.math.sqrt
private fun getRecordingDeviceKind(type: Int): String {
return when (type) {
AudioDeviceInfo.TYPE_BUILTIN_EARPIECE -> "BUILTIN"
AudioDeviceInfo.TYPE_BUILTIN_MIC -> "BUILTIN"
AudioDeviceInfo.TYPE_BLUETOOTH_SCO -> "BLUETOOTH_SCO"
AudioDeviceInfo.TYPE_BLUETOOTH_A2DP -> "BLUETOOTH_A2DP"
AudioDeviceInfo.TYPE_WIRED_HEADSET -> "WIRED_HEADSET"
AudioDeviceInfo.TYPE_HDMI -> "HDMI"
AudioDeviceInfo.TYPE_TELEPHONY -> "TELEPHONY"
AudioDeviceInfo.TYPE_DOCK -> "DOCK"
AudioDeviceInfo.TYPE_USB_ACCESSORY -> "USB_ACCESSORY"
AudioDeviceInfo.TYPE_USB_DEVICE -> "USB_DEVICE"
AudioDeviceInfo.TYPE_USB_HEADSET -> "USB_HEADSET"
AudioDeviceInfo.TYPE_FM_TUNER -> "FM_TUNER"
AudioDeviceInfo.TYPE_TV_TUNER -> "TV_TUNER"
AudioDeviceInfo.TYPE_LINE_ANALOG -> "LINE_ANALOG"
AudioDeviceInfo.TYPE_LINE_DIGITAL -> "LINE_DIGITAL"
AudioDeviceInfo.TYPE_IP -> "IP"
AudioDeviceInfo.TYPE_BUS -> "BUS"
AudioDeviceInfo.TYPE_REMOTE_SUBMIX -> "REMOTE_SUBMIX"
AudioDeviceInfo.TYPE_BLE_HEADSET -> "BLE_HEADSET"
AudioDeviceInfo.TYPE_HDMI_ARC -> "HDMI_ARC"
AudioDeviceInfo.TYPE_HDMI_EARC -> "HDMI_EARC"
AudioDeviceInfo.TYPE_DOCK_ANALOG -> "DOCK_ANALOG"
else -> "unknown@${type}"
}
}
data class RecordingSettings(
val preferBluetoothMic: Boolean,
val requestAudioFocus: Boolean
)
data class AudioRecognizerSettings(
val modelRunConfiguration: MultiModelRunConfiguration,
val decodingConfiguration: DecodingConfiguration
val decodingConfiguration: DecodingConfiguration,
val recordingConfiguration: RecordingSettings
)
class ModelDoesNotExistException(val models: List<ModelLoader>) : Throwable()
@ -66,6 +103,69 @@ class AudioRecognizer(
private var modelJob: Job? = null
private var loadModelJob: Job? = null
private var focusRequest: AudioFocusRequest? = null
private var communicationDevice = "unknown"
private fun focusAudio() {
unfocusAudio()
if(!settings.recordingConfiguration.requestAudioFocus) return
try {
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
val audioManager = context.getSystemService(Context.AUDIO_SERVICE) as AudioManager
focusRequest =
AudioFocusRequest.Builder(AudioManager.AUDIOFOCUS_GAIN_TRANSIENT_EXCLUSIVE)
.build()
audioManager.requestAudioFocus(focusRequest!!)
}
}catch(e: Exception) {
e.printStackTrace()
}
}
private fun unfocusAudio() {
try {
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
val audioManager = context.getSystemService(Context.AUDIO_SERVICE) as AudioManager
if (focusRequest != null) {
audioManager.abandonAudioFocusRequest(focusRequest!!)
}
focusRequest = null
}
}catch(e: Exception) {
e.printStackTrace()
}
}
private fun setCommunicationDevice() {
communicationDevice = "Unset"
if(!settings.recordingConfiguration.preferBluetoothMic) return
try {
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S) {
val audioManager = context.getSystemService(Context.AUDIO_SERVICE) as AudioManager
val devices = audioManager.availableCommunicationDevices
val tgtDevice = devices.firstOrNull { it.type == AudioDeviceInfo.TYPE_BLUETOOTH_SCO } ?: devices.firstOrNull { it.type == AudioDeviceInfo.TYPE_BUILTIN_MIC } ?: devices.first()
if (!audioManager.setCommunicationDevice(tgtDevice)) {
audioManager.clearCommunicationDevice()
} else {
communicationDevice =
tgtDevice.productName.toString() + " (${getRecordingDeviceKind(tgtDevice.type)})"
}
}
} catch(_: Exception) {}
}
private fun clearCommunicationDevice() {
val audioManager = context.getSystemService(Context.AUDIO_SERVICE) as AudioManager
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S) {
audioManager.clearCommunicationDevice()
}
}
@Throws(ModelDoesNotExistException::class)
private fun verifyModelsExist() {
val modelsThatDoNotExist = mutableListOf<ModelLoader>()
@ -100,6 +200,10 @@ class AudioRecognizer(
isRecording = false
modelRunner.cancelAll()
unfocusAudio()
clearCommunicationDevice()
}
fun finish() {
@ -144,10 +248,17 @@ class AudioRecognizer(
}
}
@Throws(SecurityException::class)
private fun createAudioRecorder(): AudioRecord {
val purpose = if(settings.recordingConfiguration.preferBluetoothMic) {
MediaRecorder.AudioSource.VOICE_COMMUNICATION
} else {
MediaRecorder.AudioSource.VOICE_RECOGNITION
}
val recorder = AudioRecord(
MediaRecorder.AudioSource.VOICE_RECOGNITION,
purpose,
16000,
AudioFormat.CHANNEL_IN_MONO,
AudioFormat.ENCODING_PCM_16BIT,
@ -160,8 +271,6 @@ class AudioRecognizer(
recorder.setPreferredMicrophoneDirection(MicrophoneDirection.MIC_DIRECTION_TOWARDS_USER)
}
recorder.startRecording()
return recorder
}
@ -310,14 +419,32 @@ class AudioRecognizer(
throw IllegalStateException("Start recording when already recording")
}
setCommunicationDevice()
val recorder = try {
createAudioRecorder()
} catch (e: SecurityException) {
// It's possible we may have lost permission, so let's just ask for permission again
clearCommunicationDevice()
requestPermission()
return
}
focusAudio()
if(communicationDevice == "Unset") {
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.P) {
communicationDevice = recorder.activeMicrophones.joinToString {
getRecordingDeviceKind(it.type)
} + " (may be stale)"
}
}
listener.recordingStarted(communicationDevice)
recorder.startRecording()
this.recorder = recorder
isRecording = true
@ -336,7 +463,6 @@ class AudioRecognizer(
}
}
listener.recordingStarted()
}
private val runnerCallback: ModelInferenceCallback = object : ModelInferenceCallback {

View File

@ -23,13 +23,14 @@ data class RecognizerViewSettings(
val shouldShowInlinePartialResult: Boolean,
val modelRunConfiguration: MultiModelRunConfiguration,
val decodingConfiguration: DecodingConfiguration
val decodingConfiguration: DecodingConfiguration,
val recordingConfiguration: RecordingSettings
)
private val VerboseAnnotations = hashMapOf(
InferenceState.ExtractingMel to R.string.extracting_features,
InferenceState.LoadingModel to R.string.loading_model,
InferenceState.Encoding to R.string.encoding,
InferenceState.Encoding to R.string.processing,
InferenceState.DecodingLanguage to R.string.decoding,
InferenceState.SwitchingModel to R.string.switching_model,
InferenceState.DecodingStarted to R.string.decoding
@ -47,7 +48,7 @@ private val DefaultAnnotations = hashMapOf(
interface RecognizerViewListener {
fun cancelled()
fun recordingStarted()
fun recordingStarted(device: String)
fun finished(result: String)
@ -75,6 +76,8 @@ class RecognizerView(
private val partialDecodingText = mutableStateOf("")
private val currentViewState = mutableStateOf(CurrentView.LoadingCircle)
private val currentDeviceState = mutableStateOf("Recording not started")
@Composable
fun Content() {
when (currentViewState.value) {
@ -93,7 +96,8 @@ class RecognizerView(
CurrentView.InnerRecognize -> {
InnerRecognize(
magnitude = magnitudeState,
state = statusState
state = statusState,
device = if(settings.shouldShowVerboseFeedback) { currentDeviceState } else { null }
)
}
@ -168,9 +172,10 @@ class RecognizerView(
}
}
override fun recordingStarted() {
override fun recordingStarted(device: String) {
updateMagnitude(0.0f, MagnitudeState.NOT_TALKED_YET)
listener.recordingStarted()
currentDeviceState.value = device
listener.recordingStarted(device)
}
override fun updateMagnitude(magnitude: Float, state: MagnitudeState) {
@ -192,7 +197,8 @@ class RecognizerView(
listener = audioRecognizerListener,
settings = AudioRecognizerSettings(
modelRunConfiguration = settings.modelRunConfiguration,
decodingConfiguration = settings.decodingConfiguration
decodingConfiguration = settings.decodingConfiguration,
recordingConfiguration = settings.recordingConfiguration
)
)

View File

@ -14,7 +14,7 @@ interface AudioRecognizerListener {
fun loading()
fun needPermission(onResult: (Boolean) -> Unit)
fun recordingStarted()
fun recordingStarted(device: String)
fun updateMagnitude(magnitude: Float, state: MagnitudeState)
fun processing()

View File

@ -54,7 +54,8 @@ fun AnimatedRecognizeCircle(magnitude: MutableFloatState = mutableFloatStateOf(0
@Composable
fun InnerRecognize(
magnitude: MutableFloatState = mutableFloatStateOf(0.5f),
state: MutableState<MagnitudeState> = mutableStateOf(MagnitudeState.MIC_MAY_BE_BLOCKED)
state: MutableState<MagnitudeState> = mutableStateOf(MagnitudeState.MIC_MAY_BE_BLOCKED),
device: MutableState<String>? = mutableStateOf("")
) {
Box(modifier = Modifier.fillMaxSize(), contentAlignment = Alignment.Center) {
AnimatedRecognizeCircle(magnitude = magnitude)
@ -78,6 +79,16 @@ fun InnerRecognize(
textAlign = TextAlign.Center,
color = MaterialTheme.colorScheme.onSurface
)
if(device != null) {
Text(
"Device: ${device.value}",
style = Typography.labelSmall,
modifier = Modifier.fillMaxWidth().offset(x = 0.dp, y = 64.dp),
textAlign = TextAlign.Center,
color = MaterialTheme.colorScheme.onSurface.copy(alpha = 0.66f)
)
}
}
}