mirror of
https://gitlab.futo.org/keyboard/latinime.git
synced 2024-09-28 14:54:30 +01:00
Add voice input audio focus and device display/config
This commit is contained in:
parent
60bc8a2d1d
commit
f2e42384bd
@ -29,6 +29,16 @@ val DISALLOW_SYMBOLS = SettingsKey(
|
||||
default = true
|
||||
)
|
||||
|
||||
val PREFER_BLUETOOTH = SettingsKey(
|
||||
key = booleanPreferencesKey("prefer_bluetooth_recording"),
|
||||
default = false
|
||||
)
|
||||
|
||||
val AUDIO_FOCUS = SettingsKey(
|
||||
key = booleanPreferencesKey("request_audio_focus"),
|
||||
default = true
|
||||
)
|
||||
|
||||
val ENGLISH_MODEL_INDEX = SettingsKey(
|
||||
key = intPreferencesKey("english_model_index"),
|
||||
default = 0
|
||||
|
@ -27,11 +27,13 @@ import kotlinx.coroutines.launch
|
||||
import kotlinx.coroutines.withContext
|
||||
import kotlinx.coroutines.yield
|
||||
import org.futo.inputmethod.latin.R
|
||||
import org.futo.inputmethod.latin.uix.AUDIO_FOCUS
|
||||
import org.futo.inputmethod.latin.uix.Action
|
||||
import org.futo.inputmethod.latin.uix.ActionWindow
|
||||
import org.futo.inputmethod.latin.uix.DISALLOW_SYMBOLS
|
||||
import org.futo.inputmethod.latin.uix.ENABLE_SOUND
|
||||
import org.futo.inputmethod.latin.uix.KeyboardManagerForAction
|
||||
import org.futo.inputmethod.latin.uix.PREFER_BLUETOOTH
|
||||
import org.futo.inputmethod.latin.uix.PersistentActionState
|
||||
import org.futo.inputmethod.latin.uix.ResourceHelper
|
||||
import org.futo.inputmethod.latin.uix.VERBOSE_PROGRESS
|
||||
@ -43,6 +45,7 @@ import org.futo.voiceinput.shared.ModelDoesNotExistException
|
||||
import org.futo.voiceinput.shared.RecognizerView
|
||||
import org.futo.voiceinput.shared.RecognizerViewListener
|
||||
import org.futo.voiceinput.shared.RecognizerViewSettings
|
||||
import org.futo.voiceinput.shared.RecordingSettings
|
||||
import org.futo.voiceinput.shared.SoundPlayer
|
||||
import org.futo.voiceinput.shared.types.Language
|
||||
import org.futo.voiceinput.shared.types.ModelLoader
|
||||
@ -84,6 +87,8 @@ private class VoiceInputActionWindow(
|
||||
val enableSound = async { context.getSetting(ENABLE_SOUND) }
|
||||
val verboseFeedback = async { context.getSetting(VERBOSE_PROGRESS) }
|
||||
val disallowSymbols = async { context.getSetting(DISALLOW_SYMBOLS) }
|
||||
val useBluetoothAudio = async { context.getSetting(PREFER_BLUETOOTH) }
|
||||
val requestAudioFocus = async { context.getSetting(AUDIO_FOCUS) }
|
||||
|
||||
val primaryModel = model
|
||||
val languageSpecificModels = mutableMapOf<Language, ModelLoader>()
|
||||
@ -104,6 +109,10 @@ private class VoiceInputActionWindow(
|
||||
glossary = state.userDictionaryObserver.getWords().map { it.word },
|
||||
languages = allowedLanguages,
|
||||
suppressSymbols = disallowSymbols.await()
|
||||
),
|
||||
recordingConfiguration = RecordingSettings(
|
||||
preferBluetoothMic = useBluetoothAudio.await(),
|
||||
requestAudioFocus = requestAudioFocus.await()
|
||||
)
|
||||
)
|
||||
}
|
||||
@ -203,7 +212,7 @@ private class VoiceInputActionWindow(
|
||||
}
|
||||
}
|
||||
|
||||
override fun recordingStarted() {
|
||||
override fun recordingStarted(device: String) {
|
||||
if (shouldPlaySounds) {
|
||||
state.soundPlayer.playStartSound()
|
||||
}
|
||||
|
@ -1,35 +1,18 @@
|
||||
package org.futo.inputmethod.latin.uix.settings.pages
|
||||
|
||||
import android.content.Intent
|
||||
import androidx.compose.foundation.layout.Box
|
||||
import androidx.compose.foundation.layout.fillMaxWidth
|
||||
import androidx.compose.foundation.layout.padding
|
||||
import androidx.compose.material3.DropdownMenuItem
|
||||
import androidx.compose.material3.ExperimentalMaterial3Api
|
||||
import androidx.compose.material3.ExposedDropdownMenuBox
|
||||
import androidx.compose.material3.ExposedDropdownMenuDefaults
|
||||
import androidx.compose.material3.MaterialTheme
|
||||
import androidx.compose.material3.Text
|
||||
import androidx.compose.material3.TextField
|
||||
import androidx.compose.runtime.Composable
|
||||
import androidx.compose.runtime.getValue
|
||||
import androidx.compose.runtime.mutableStateOf
|
||||
import androidx.compose.runtime.remember
|
||||
import androidx.compose.runtime.setValue
|
||||
import androidx.compose.ui.Alignment
|
||||
import androidx.compose.ui.Modifier
|
||||
import androidx.compose.ui.platform.LocalContext
|
||||
import androidx.compose.ui.res.painterResource
|
||||
import androidx.compose.ui.res.stringResource
|
||||
import androidx.compose.ui.tooling.preview.Preview
|
||||
import androidx.compose.ui.unit.dp
|
||||
import androidx.navigation.NavHostController
|
||||
import androidx.navigation.compose.rememberNavController
|
||||
import org.futo.inputmethod.latin.R
|
||||
import org.futo.inputmethod.latin.uix.AUDIO_FOCUS
|
||||
import org.futo.inputmethod.latin.uix.DISALLOW_SYMBOLS
|
||||
import org.futo.inputmethod.latin.uix.ENABLE_SOUND
|
||||
import org.futo.inputmethod.latin.uix.ENGLISH_MODEL_INDEX
|
||||
import org.futo.inputmethod.latin.uix.SettingsKey
|
||||
import org.futo.inputmethod.latin.uix.PREFER_BLUETOOTH
|
||||
import org.futo.inputmethod.latin.uix.USE_SYSTEM_VOICE_INPUT
|
||||
import org.futo.inputmethod.latin.uix.VERBOSE_PROGRESS
|
||||
import org.futo.inputmethod.latin.uix.settings.NavigationItem
|
||||
@ -38,8 +21,6 @@ import org.futo.inputmethod.latin.uix.settings.ScreenTitle
|
||||
import org.futo.inputmethod.latin.uix.settings.ScrollableList
|
||||
import org.futo.inputmethod.latin.uix.settings.SettingToggleDataStore
|
||||
import org.futo.inputmethod.latin.uix.settings.useDataStore
|
||||
import org.futo.voiceinput.shared.ENGLISH_MODELS
|
||||
import org.futo.voiceinput.shared.types.ModelLoader
|
||||
|
||||
@Preview
|
||||
@Composable
|
||||
@ -49,7 +30,6 @@ fun VoiceInputScreen(navController: NavHostController = rememberNavController())
|
||||
ScrollableList {
|
||||
ScreenTitle("Voice Input", showBack = true, navController)
|
||||
|
||||
|
||||
SettingToggleDataStore(
|
||||
title = "Disable built-in voice input",
|
||||
subtitle = "Use voice input provided by external app",
|
||||
@ -76,10 +56,23 @@ fun VoiceInputScreen(navController: NavHostController = rememberNavController())
|
||||
|
||||
SettingToggleDataStore(
|
||||
title = "Verbose progress",
|
||||
subtitle = "Display verbose information about model inference",
|
||||
subtitle = "Display verbose information such as mic being used",
|
||||
setting = VERBOSE_PROGRESS
|
||||
)
|
||||
|
||||
SettingToggleDataStore(
|
||||
title = "Prefer Bluetooth Mic",
|
||||
subtitle = "There may be extra delay to recording starting as Bluetooth SCO connection must be negotiated",
|
||||
setting = PREFER_BLUETOOTH
|
||||
)
|
||||
|
||||
|
||||
SettingToggleDataStore(
|
||||
title = "Audio Focus",
|
||||
subtitle = "Pause videos/music when voice input is activated",
|
||||
setting = AUDIO_FOCUS
|
||||
)
|
||||
|
||||
SettingToggleDataStore(
|
||||
title = "Suppress symbols",
|
||||
setting = DISALLOW_SYMBOLS
|
||||
|
@ -5,7 +5,10 @@ import android.content.Context
|
||||
import android.content.Intent
|
||||
import android.content.pm.PackageManager
|
||||
import android.hardware.SensorPrivacyManager
|
||||
import android.media.AudioDeviceInfo
|
||||
import android.media.AudioFocusRequest
|
||||
import android.media.AudioFormat
|
||||
import android.media.AudioManager
|
||||
import android.media.AudioRecord
|
||||
import android.media.MediaRecorder
|
||||
import android.media.MicrophoneDirection
|
||||
@ -42,9 +45,43 @@ import kotlin.math.min
|
||||
import kotlin.math.pow
|
||||
import kotlin.math.sqrt
|
||||
|
||||
private fun getRecordingDeviceKind(type: Int): String {
|
||||
return when (type) {
|
||||
AudioDeviceInfo.TYPE_BUILTIN_EARPIECE -> "BUILTIN"
|
||||
AudioDeviceInfo.TYPE_BUILTIN_MIC -> "BUILTIN"
|
||||
AudioDeviceInfo.TYPE_BLUETOOTH_SCO -> "BLUETOOTH_SCO"
|
||||
AudioDeviceInfo.TYPE_BLUETOOTH_A2DP -> "BLUETOOTH_A2DP"
|
||||
AudioDeviceInfo.TYPE_WIRED_HEADSET -> "WIRED_HEADSET"
|
||||
AudioDeviceInfo.TYPE_HDMI -> "HDMI"
|
||||
AudioDeviceInfo.TYPE_TELEPHONY -> "TELEPHONY"
|
||||
AudioDeviceInfo.TYPE_DOCK -> "DOCK"
|
||||
AudioDeviceInfo.TYPE_USB_ACCESSORY -> "USB_ACCESSORY"
|
||||
AudioDeviceInfo.TYPE_USB_DEVICE -> "USB_DEVICE"
|
||||
AudioDeviceInfo.TYPE_USB_HEADSET -> "USB_HEADSET"
|
||||
AudioDeviceInfo.TYPE_FM_TUNER -> "FM_TUNER"
|
||||
AudioDeviceInfo.TYPE_TV_TUNER -> "TV_TUNER"
|
||||
AudioDeviceInfo.TYPE_LINE_ANALOG -> "LINE_ANALOG"
|
||||
AudioDeviceInfo.TYPE_LINE_DIGITAL -> "LINE_DIGITAL"
|
||||
AudioDeviceInfo.TYPE_IP -> "IP"
|
||||
AudioDeviceInfo.TYPE_BUS -> "BUS"
|
||||
AudioDeviceInfo.TYPE_REMOTE_SUBMIX -> "REMOTE_SUBMIX"
|
||||
AudioDeviceInfo.TYPE_BLE_HEADSET -> "BLE_HEADSET"
|
||||
AudioDeviceInfo.TYPE_HDMI_ARC -> "HDMI_ARC"
|
||||
AudioDeviceInfo.TYPE_HDMI_EARC -> "HDMI_EARC"
|
||||
AudioDeviceInfo.TYPE_DOCK_ANALOG -> "DOCK_ANALOG"
|
||||
else -> "unknown@${type}"
|
||||
}
|
||||
}
|
||||
|
||||
data class RecordingSettings(
|
||||
val preferBluetoothMic: Boolean,
|
||||
val requestAudioFocus: Boolean
|
||||
)
|
||||
|
||||
data class AudioRecognizerSettings(
|
||||
val modelRunConfiguration: MultiModelRunConfiguration,
|
||||
val decodingConfiguration: DecodingConfiguration
|
||||
val decodingConfiguration: DecodingConfiguration,
|
||||
val recordingConfiguration: RecordingSettings
|
||||
)
|
||||
|
||||
class ModelDoesNotExistException(val models: List<ModelLoader>) : Throwable()
|
||||
@ -66,6 +103,69 @@ class AudioRecognizer(
|
||||
private var modelJob: Job? = null
|
||||
private var loadModelJob: Job? = null
|
||||
|
||||
private var focusRequest: AudioFocusRequest? = null
|
||||
|
||||
private var communicationDevice = "unknown"
|
||||
|
||||
private fun focusAudio() {
|
||||
unfocusAudio()
|
||||
|
||||
if(!settings.recordingConfiguration.requestAudioFocus) return
|
||||
|
||||
try {
|
||||
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
|
||||
val audioManager = context.getSystemService(Context.AUDIO_SERVICE) as AudioManager
|
||||
focusRequest =
|
||||
AudioFocusRequest.Builder(AudioManager.AUDIOFOCUS_GAIN_TRANSIENT_EXCLUSIVE)
|
||||
.build()
|
||||
audioManager.requestAudioFocus(focusRequest!!)
|
||||
}
|
||||
}catch(e: Exception) {
|
||||
e.printStackTrace()
|
||||
}
|
||||
}
|
||||
|
||||
private fun unfocusAudio() {
|
||||
try {
|
||||
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
|
||||
val audioManager = context.getSystemService(Context.AUDIO_SERVICE) as AudioManager
|
||||
if (focusRequest != null) {
|
||||
audioManager.abandonAudioFocusRequest(focusRequest!!)
|
||||
}
|
||||
focusRequest = null
|
||||
}
|
||||
}catch(e: Exception) {
|
||||
e.printStackTrace()
|
||||
}
|
||||
}
|
||||
|
||||
private fun setCommunicationDevice() {
|
||||
communicationDevice = "Unset"
|
||||
if(!settings.recordingConfiguration.preferBluetoothMic) return
|
||||
|
||||
try {
|
||||
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S) {
|
||||
val audioManager = context.getSystemService(Context.AUDIO_SERVICE) as AudioManager
|
||||
val devices = audioManager.availableCommunicationDevices
|
||||
val tgtDevice = devices.firstOrNull { it.type == AudioDeviceInfo.TYPE_BLUETOOTH_SCO } ?: devices.firstOrNull { it.type == AudioDeviceInfo.TYPE_BUILTIN_MIC } ?: devices.first()
|
||||
|
||||
if (!audioManager.setCommunicationDevice(tgtDevice)) {
|
||||
audioManager.clearCommunicationDevice()
|
||||
} else {
|
||||
communicationDevice =
|
||||
tgtDevice.productName.toString() + " (${getRecordingDeviceKind(tgtDevice.type)})"
|
||||
}
|
||||
}
|
||||
} catch(_: Exception) {}
|
||||
}
|
||||
|
||||
private fun clearCommunicationDevice() {
|
||||
val audioManager = context.getSystemService(Context.AUDIO_SERVICE) as AudioManager
|
||||
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S) {
|
||||
audioManager.clearCommunicationDevice()
|
||||
}
|
||||
}
|
||||
|
||||
@Throws(ModelDoesNotExistException::class)
|
||||
private fun verifyModelsExist() {
|
||||
val modelsThatDoNotExist = mutableListOf<ModelLoader>()
|
||||
@ -100,6 +200,10 @@ class AudioRecognizer(
|
||||
isRecording = false
|
||||
|
||||
modelRunner.cancelAll()
|
||||
|
||||
unfocusAudio()
|
||||
|
||||
clearCommunicationDevice()
|
||||
}
|
||||
|
||||
fun finish() {
|
||||
@ -144,10 +248,17 @@ class AudioRecognizer(
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Throws(SecurityException::class)
|
||||
private fun createAudioRecorder(): AudioRecord {
|
||||
val purpose = if(settings.recordingConfiguration.preferBluetoothMic) {
|
||||
MediaRecorder.AudioSource.VOICE_COMMUNICATION
|
||||
} else {
|
||||
MediaRecorder.AudioSource.VOICE_RECOGNITION
|
||||
}
|
||||
|
||||
val recorder = AudioRecord(
|
||||
MediaRecorder.AudioSource.VOICE_RECOGNITION,
|
||||
purpose,
|
||||
16000,
|
||||
AudioFormat.CHANNEL_IN_MONO,
|
||||
AudioFormat.ENCODING_PCM_16BIT,
|
||||
@ -160,8 +271,6 @@ class AudioRecognizer(
|
||||
recorder.setPreferredMicrophoneDirection(MicrophoneDirection.MIC_DIRECTION_TOWARDS_USER)
|
||||
}
|
||||
|
||||
recorder.startRecording()
|
||||
|
||||
return recorder
|
||||
}
|
||||
|
||||
@ -310,14 +419,32 @@ class AudioRecognizer(
|
||||
throw IllegalStateException("Start recording when already recording")
|
||||
}
|
||||
|
||||
setCommunicationDevice()
|
||||
|
||||
val recorder = try {
|
||||
createAudioRecorder()
|
||||
} catch (e: SecurityException) {
|
||||
// It's possible we may have lost permission, so let's just ask for permission again
|
||||
clearCommunicationDevice()
|
||||
requestPermission()
|
||||
return
|
||||
}
|
||||
|
||||
focusAudio()
|
||||
|
||||
if(communicationDevice == "Unset") {
|
||||
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.P) {
|
||||
communicationDevice = recorder.activeMicrophones.joinToString {
|
||||
getRecordingDeviceKind(it.type)
|
||||
} + " (may be stale)"
|
||||
}
|
||||
}
|
||||
|
||||
listener.recordingStarted(communicationDevice)
|
||||
|
||||
|
||||
recorder.startRecording()
|
||||
|
||||
this.recorder = recorder
|
||||
|
||||
isRecording = true
|
||||
@ -336,7 +463,6 @@ class AudioRecognizer(
|
||||
}
|
||||
}
|
||||
|
||||
listener.recordingStarted()
|
||||
}
|
||||
|
||||
private val runnerCallback: ModelInferenceCallback = object : ModelInferenceCallback {
|
||||
|
@ -23,13 +23,14 @@ data class RecognizerViewSettings(
|
||||
val shouldShowInlinePartialResult: Boolean,
|
||||
|
||||
val modelRunConfiguration: MultiModelRunConfiguration,
|
||||
val decodingConfiguration: DecodingConfiguration
|
||||
val decodingConfiguration: DecodingConfiguration,
|
||||
val recordingConfiguration: RecordingSettings
|
||||
)
|
||||
|
||||
private val VerboseAnnotations = hashMapOf(
|
||||
InferenceState.ExtractingMel to R.string.extracting_features,
|
||||
InferenceState.LoadingModel to R.string.loading_model,
|
||||
InferenceState.Encoding to R.string.encoding,
|
||||
InferenceState.Encoding to R.string.processing,
|
||||
InferenceState.DecodingLanguage to R.string.decoding,
|
||||
InferenceState.SwitchingModel to R.string.switching_model,
|
||||
InferenceState.DecodingStarted to R.string.decoding
|
||||
@ -47,7 +48,7 @@ private val DefaultAnnotations = hashMapOf(
|
||||
interface RecognizerViewListener {
|
||||
fun cancelled()
|
||||
|
||||
fun recordingStarted()
|
||||
fun recordingStarted(device: String)
|
||||
|
||||
fun finished(result: String)
|
||||
|
||||
@ -75,6 +76,8 @@ class RecognizerView(
|
||||
private val partialDecodingText = mutableStateOf("")
|
||||
private val currentViewState = mutableStateOf(CurrentView.LoadingCircle)
|
||||
|
||||
private val currentDeviceState = mutableStateOf("Recording not started")
|
||||
|
||||
@Composable
|
||||
fun Content() {
|
||||
when (currentViewState.value) {
|
||||
@ -93,7 +96,8 @@ class RecognizerView(
|
||||
CurrentView.InnerRecognize -> {
|
||||
InnerRecognize(
|
||||
magnitude = magnitudeState,
|
||||
state = statusState
|
||||
state = statusState,
|
||||
device = if(settings.shouldShowVerboseFeedback) { currentDeviceState } else { null }
|
||||
)
|
||||
}
|
||||
|
||||
@ -168,9 +172,10 @@ class RecognizerView(
|
||||
}
|
||||
}
|
||||
|
||||
override fun recordingStarted() {
|
||||
override fun recordingStarted(device: String) {
|
||||
updateMagnitude(0.0f, MagnitudeState.NOT_TALKED_YET)
|
||||
listener.recordingStarted()
|
||||
currentDeviceState.value = device
|
||||
listener.recordingStarted(device)
|
||||
}
|
||||
|
||||
override fun updateMagnitude(magnitude: Float, state: MagnitudeState) {
|
||||
@ -192,7 +197,8 @@ class RecognizerView(
|
||||
listener = audioRecognizerListener,
|
||||
settings = AudioRecognizerSettings(
|
||||
modelRunConfiguration = settings.modelRunConfiguration,
|
||||
decodingConfiguration = settings.decodingConfiguration
|
||||
decodingConfiguration = settings.decodingConfiguration,
|
||||
recordingConfiguration = settings.recordingConfiguration
|
||||
)
|
||||
)
|
||||
|
||||
|
@ -14,7 +14,7 @@ interface AudioRecognizerListener {
|
||||
fun loading()
|
||||
fun needPermission(onResult: (Boolean) -> Unit)
|
||||
|
||||
fun recordingStarted()
|
||||
fun recordingStarted(device: String)
|
||||
fun updateMagnitude(magnitude: Float, state: MagnitudeState)
|
||||
|
||||
fun processing()
|
||||
|
@ -54,7 +54,8 @@ fun AnimatedRecognizeCircle(magnitude: MutableFloatState = mutableFloatStateOf(0
|
||||
@Composable
|
||||
fun InnerRecognize(
|
||||
magnitude: MutableFloatState = mutableFloatStateOf(0.5f),
|
||||
state: MutableState<MagnitudeState> = mutableStateOf(MagnitudeState.MIC_MAY_BE_BLOCKED)
|
||||
state: MutableState<MagnitudeState> = mutableStateOf(MagnitudeState.MIC_MAY_BE_BLOCKED),
|
||||
device: MutableState<String>? = mutableStateOf("")
|
||||
) {
|
||||
Box(modifier = Modifier.fillMaxSize(), contentAlignment = Alignment.Center) {
|
||||
AnimatedRecognizeCircle(magnitude = magnitude)
|
||||
@ -78,6 +79,16 @@ fun InnerRecognize(
|
||||
textAlign = TextAlign.Center,
|
||||
color = MaterialTheme.colorScheme.onSurface
|
||||
)
|
||||
|
||||
if(device != null) {
|
||||
Text(
|
||||
"Device: ${device.value}",
|
||||
style = Typography.labelSmall,
|
||||
modifier = Modifier.fillMaxWidth().offset(x = 0.dp, y = 64.dp),
|
||||
textAlign = TextAlign.Center,
|
||||
color = MaterialTheme.colorScheme.onSurface.copy(alpha = 0.66f)
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user