diff --git a/java/src/org/futo/inputmethod/latin/uix/VoiceInputSettingKeys.kt b/java/src/org/futo/inputmethod/latin/uix/VoiceInputSettingKeys.kt index facbc03e9..1e12757e8 100644 --- a/java/src/org/futo/inputmethod/latin/uix/VoiceInputSettingKeys.kt +++ b/java/src/org/futo/inputmethod/latin/uix/VoiceInputSettingKeys.kt @@ -29,6 +29,16 @@ val DISALLOW_SYMBOLS = SettingsKey( default = true ) +val PREFER_BLUETOOTH = SettingsKey( + key = booleanPreferencesKey("prefer_bluetooth_recording"), + default = false +) + +val AUDIO_FOCUS = SettingsKey( + key = booleanPreferencesKey("request_audio_focus"), + default = true +) + val ENGLISH_MODEL_INDEX = SettingsKey( key = intPreferencesKey("english_model_index"), default = 0 diff --git a/java/src/org/futo/inputmethod/latin/uix/actions/VoiceInputAction.kt b/java/src/org/futo/inputmethod/latin/uix/actions/VoiceInputAction.kt index ced48c1b7..126b37155 100644 --- a/java/src/org/futo/inputmethod/latin/uix/actions/VoiceInputAction.kt +++ b/java/src/org/futo/inputmethod/latin/uix/actions/VoiceInputAction.kt @@ -27,11 +27,13 @@ import kotlinx.coroutines.launch import kotlinx.coroutines.withContext import kotlinx.coroutines.yield import org.futo.inputmethod.latin.R +import org.futo.inputmethod.latin.uix.AUDIO_FOCUS import org.futo.inputmethod.latin.uix.Action import org.futo.inputmethod.latin.uix.ActionWindow import org.futo.inputmethod.latin.uix.DISALLOW_SYMBOLS import org.futo.inputmethod.latin.uix.ENABLE_SOUND import org.futo.inputmethod.latin.uix.KeyboardManagerForAction +import org.futo.inputmethod.latin.uix.PREFER_BLUETOOTH import org.futo.inputmethod.latin.uix.PersistentActionState import org.futo.inputmethod.latin.uix.ResourceHelper import org.futo.inputmethod.latin.uix.VERBOSE_PROGRESS @@ -43,6 +45,7 @@ import org.futo.voiceinput.shared.ModelDoesNotExistException import org.futo.voiceinput.shared.RecognizerView import org.futo.voiceinput.shared.RecognizerViewListener import org.futo.voiceinput.shared.RecognizerViewSettings +import org.futo.voiceinput.shared.RecordingSettings import org.futo.voiceinput.shared.SoundPlayer import org.futo.voiceinput.shared.types.Language import org.futo.voiceinput.shared.types.ModelLoader @@ -84,6 +87,8 @@ private class VoiceInputActionWindow( val enableSound = async { context.getSetting(ENABLE_SOUND) } val verboseFeedback = async { context.getSetting(VERBOSE_PROGRESS) } val disallowSymbols = async { context.getSetting(DISALLOW_SYMBOLS) } + val useBluetoothAudio = async { context.getSetting(PREFER_BLUETOOTH) } + val requestAudioFocus = async { context.getSetting(AUDIO_FOCUS) } val primaryModel = model val languageSpecificModels = mutableMapOf() @@ -104,6 +109,10 @@ private class VoiceInputActionWindow( glossary = state.userDictionaryObserver.getWords().map { it.word }, languages = allowedLanguages, suppressSymbols = disallowSymbols.await() + ), + recordingConfiguration = RecordingSettings( + preferBluetoothMic = useBluetoothAudio.await(), + requestAudioFocus = requestAudioFocus.await() ) ) } @@ -203,7 +212,7 @@ private class VoiceInputActionWindow( } } - override fun recordingStarted() { + override fun recordingStarted(device: String) { if (shouldPlaySounds) { state.soundPlayer.playStartSound() } diff --git a/java/src/org/futo/inputmethod/latin/uix/settings/pages/VoiceInput.kt b/java/src/org/futo/inputmethod/latin/uix/settings/pages/VoiceInput.kt index d0dab704c..96cb8c8d3 100644 --- a/java/src/org/futo/inputmethod/latin/uix/settings/pages/VoiceInput.kt +++ b/java/src/org/futo/inputmethod/latin/uix/settings/pages/VoiceInput.kt @@ -1,35 +1,18 @@ package org.futo.inputmethod.latin.uix.settings.pages import android.content.Intent -import androidx.compose.foundation.layout.Box -import androidx.compose.foundation.layout.fillMaxWidth -import androidx.compose.foundation.layout.padding -import androidx.compose.material3.DropdownMenuItem -import androidx.compose.material3.ExperimentalMaterial3Api -import androidx.compose.material3.ExposedDropdownMenuBox -import androidx.compose.material3.ExposedDropdownMenuDefaults -import androidx.compose.material3.MaterialTheme -import androidx.compose.material3.Text -import androidx.compose.material3.TextField import androidx.compose.runtime.Composable -import androidx.compose.runtime.getValue -import androidx.compose.runtime.mutableStateOf -import androidx.compose.runtime.remember -import androidx.compose.runtime.setValue -import androidx.compose.ui.Alignment -import androidx.compose.ui.Modifier import androidx.compose.ui.platform.LocalContext import androidx.compose.ui.res.painterResource import androidx.compose.ui.res.stringResource import androidx.compose.ui.tooling.preview.Preview -import androidx.compose.ui.unit.dp import androidx.navigation.NavHostController import androidx.navigation.compose.rememberNavController import org.futo.inputmethod.latin.R +import org.futo.inputmethod.latin.uix.AUDIO_FOCUS import org.futo.inputmethod.latin.uix.DISALLOW_SYMBOLS import org.futo.inputmethod.latin.uix.ENABLE_SOUND -import org.futo.inputmethod.latin.uix.ENGLISH_MODEL_INDEX -import org.futo.inputmethod.latin.uix.SettingsKey +import org.futo.inputmethod.latin.uix.PREFER_BLUETOOTH import org.futo.inputmethod.latin.uix.USE_SYSTEM_VOICE_INPUT import org.futo.inputmethod.latin.uix.VERBOSE_PROGRESS import org.futo.inputmethod.latin.uix.settings.NavigationItem @@ -38,8 +21,6 @@ import org.futo.inputmethod.latin.uix.settings.ScreenTitle import org.futo.inputmethod.latin.uix.settings.ScrollableList import org.futo.inputmethod.latin.uix.settings.SettingToggleDataStore import org.futo.inputmethod.latin.uix.settings.useDataStore -import org.futo.voiceinput.shared.ENGLISH_MODELS -import org.futo.voiceinput.shared.types.ModelLoader @Preview @Composable @@ -49,7 +30,6 @@ fun VoiceInputScreen(navController: NavHostController = rememberNavController()) ScrollableList { ScreenTitle("Voice Input", showBack = true, navController) - SettingToggleDataStore( title = "Disable built-in voice input", subtitle = "Use voice input provided by external app", @@ -76,10 +56,23 @@ fun VoiceInputScreen(navController: NavHostController = rememberNavController()) SettingToggleDataStore( title = "Verbose progress", - subtitle = "Display verbose information about model inference", + subtitle = "Display verbose information such as mic being used", setting = VERBOSE_PROGRESS ) + SettingToggleDataStore( + title = "Prefer Bluetooth Mic", + subtitle = "There may be extra delay to recording starting as Bluetooth SCO connection must be negotiated", + setting = PREFER_BLUETOOTH + ) + + + SettingToggleDataStore( + title = "Audio Focus", + subtitle = "Pause videos/music when voice input is activated", + setting = AUDIO_FOCUS + ) + SettingToggleDataStore( title = "Suppress symbols", setting = DISALLOW_SYMBOLS diff --git a/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/AudioRecognizer.kt b/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/AudioRecognizer.kt index a9e44efc4..188158efd 100644 --- a/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/AudioRecognizer.kt +++ b/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/AudioRecognizer.kt @@ -5,7 +5,10 @@ import android.content.Context import android.content.Intent import android.content.pm.PackageManager import android.hardware.SensorPrivacyManager +import android.media.AudioDeviceInfo +import android.media.AudioFocusRequest import android.media.AudioFormat +import android.media.AudioManager import android.media.AudioRecord import android.media.MediaRecorder import android.media.MicrophoneDirection @@ -42,9 +45,43 @@ import kotlin.math.min import kotlin.math.pow import kotlin.math.sqrt +private fun getRecordingDeviceKind(type: Int): String { + return when (type) { + AudioDeviceInfo.TYPE_BUILTIN_EARPIECE -> "BUILTIN" + AudioDeviceInfo.TYPE_BUILTIN_MIC -> "BUILTIN" + AudioDeviceInfo.TYPE_BLUETOOTH_SCO -> "BLUETOOTH_SCO" + AudioDeviceInfo.TYPE_BLUETOOTH_A2DP -> "BLUETOOTH_A2DP" + AudioDeviceInfo.TYPE_WIRED_HEADSET -> "WIRED_HEADSET" + AudioDeviceInfo.TYPE_HDMI -> "HDMI" + AudioDeviceInfo.TYPE_TELEPHONY -> "TELEPHONY" + AudioDeviceInfo.TYPE_DOCK -> "DOCK" + AudioDeviceInfo.TYPE_USB_ACCESSORY -> "USB_ACCESSORY" + AudioDeviceInfo.TYPE_USB_DEVICE -> "USB_DEVICE" + AudioDeviceInfo.TYPE_USB_HEADSET -> "USB_HEADSET" + AudioDeviceInfo.TYPE_FM_TUNER -> "FM_TUNER" + AudioDeviceInfo.TYPE_TV_TUNER -> "TV_TUNER" + AudioDeviceInfo.TYPE_LINE_ANALOG -> "LINE_ANALOG" + AudioDeviceInfo.TYPE_LINE_DIGITAL -> "LINE_DIGITAL" + AudioDeviceInfo.TYPE_IP -> "IP" + AudioDeviceInfo.TYPE_BUS -> "BUS" + AudioDeviceInfo.TYPE_REMOTE_SUBMIX -> "REMOTE_SUBMIX" + AudioDeviceInfo.TYPE_BLE_HEADSET -> "BLE_HEADSET" + AudioDeviceInfo.TYPE_HDMI_ARC -> "HDMI_ARC" + AudioDeviceInfo.TYPE_HDMI_EARC -> "HDMI_EARC" + AudioDeviceInfo.TYPE_DOCK_ANALOG -> "DOCK_ANALOG" + else -> "unknown@${type}" + } +} + +data class RecordingSettings( + val preferBluetoothMic: Boolean, + val requestAudioFocus: Boolean +) + data class AudioRecognizerSettings( val modelRunConfiguration: MultiModelRunConfiguration, - val decodingConfiguration: DecodingConfiguration + val decodingConfiguration: DecodingConfiguration, + val recordingConfiguration: RecordingSettings ) class ModelDoesNotExistException(val models: List) : Throwable() @@ -66,6 +103,69 @@ class AudioRecognizer( private var modelJob: Job? = null private var loadModelJob: Job? = null + private var focusRequest: AudioFocusRequest? = null + + private var communicationDevice = "unknown" + + private fun focusAudio() { + unfocusAudio() + + if(!settings.recordingConfiguration.requestAudioFocus) return + + try { + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) { + val audioManager = context.getSystemService(Context.AUDIO_SERVICE) as AudioManager + focusRequest = + AudioFocusRequest.Builder(AudioManager.AUDIOFOCUS_GAIN_TRANSIENT_EXCLUSIVE) + .build() + audioManager.requestAudioFocus(focusRequest!!) + } + }catch(e: Exception) { + e.printStackTrace() + } + } + + private fun unfocusAudio() { + try { + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) { + val audioManager = context.getSystemService(Context.AUDIO_SERVICE) as AudioManager + if (focusRequest != null) { + audioManager.abandonAudioFocusRequest(focusRequest!!) + } + focusRequest = null + } + }catch(e: Exception) { + e.printStackTrace() + } + } + + private fun setCommunicationDevice() { + communicationDevice = "Unset" + if(!settings.recordingConfiguration.preferBluetoothMic) return + + try { + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S) { + val audioManager = context.getSystemService(Context.AUDIO_SERVICE) as AudioManager + val devices = audioManager.availableCommunicationDevices + val tgtDevice = devices.firstOrNull { it.type == AudioDeviceInfo.TYPE_BLUETOOTH_SCO } ?: devices.firstOrNull { it.type == AudioDeviceInfo.TYPE_BUILTIN_MIC } ?: devices.first() + + if (!audioManager.setCommunicationDevice(tgtDevice)) { + audioManager.clearCommunicationDevice() + } else { + communicationDevice = + tgtDevice.productName.toString() + " (${getRecordingDeviceKind(tgtDevice.type)})" + } + } + } catch(_: Exception) {} + } + + private fun clearCommunicationDevice() { + val audioManager = context.getSystemService(Context.AUDIO_SERVICE) as AudioManager + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S) { + audioManager.clearCommunicationDevice() + } + } + @Throws(ModelDoesNotExistException::class) private fun verifyModelsExist() { val modelsThatDoNotExist = mutableListOf() @@ -100,6 +200,10 @@ class AudioRecognizer( isRecording = false modelRunner.cancelAll() + + unfocusAudio() + + clearCommunicationDevice() } fun finish() { @@ -144,10 +248,17 @@ class AudioRecognizer( } } + @Throws(SecurityException::class) private fun createAudioRecorder(): AudioRecord { + val purpose = if(settings.recordingConfiguration.preferBluetoothMic) { + MediaRecorder.AudioSource.VOICE_COMMUNICATION + } else { + MediaRecorder.AudioSource.VOICE_RECOGNITION + } + val recorder = AudioRecord( - MediaRecorder.AudioSource.VOICE_RECOGNITION, + purpose, 16000, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT, @@ -160,8 +271,6 @@ class AudioRecognizer( recorder.setPreferredMicrophoneDirection(MicrophoneDirection.MIC_DIRECTION_TOWARDS_USER) } - recorder.startRecording() - return recorder } @@ -310,14 +419,32 @@ class AudioRecognizer( throw IllegalStateException("Start recording when already recording") } + setCommunicationDevice() + val recorder = try { createAudioRecorder() } catch (e: SecurityException) { // It's possible we may have lost permission, so let's just ask for permission again + clearCommunicationDevice() requestPermission() return } + focusAudio() + + if(communicationDevice == "Unset") { + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.P) { + communicationDevice = recorder.activeMicrophones.joinToString { + getRecordingDeviceKind(it.type) + } + " (may be stale)" + } + } + + listener.recordingStarted(communicationDevice) + + + recorder.startRecording() + this.recorder = recorder isRecording = true @@ -336,7 +463,6 @@ class AudioRecognizer( } } - listener.recordingStarted() } private val runnerCallback: ModelInferenceCallback = object : ModelInferenceCallback { diff --git a/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/RecognizerView.kt b/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/RecognizerView.kt index 848b31726..d1c973299 100644 --- a/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/RecognizerView.kt +++ b/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/RecognizerView.kt @@ -23,13 +23,14 @@ data class RecognizerViewSettings( val shouldShowInlinePartialResult: Boolean, val modelRunConfiguration: MultiModelRunConfiguration, - val decodingConfiguration: DecodingConfiguration + val decodingConfiguration: DecodingConfiguration, + val recordingConfiguration: RecordingSettings ) private val VerboseAnnotations = hashMapOf( InferenceState.ExtractingMel to R.string.extracting_features, InferenceState.LoadingModel to R.string.loading_model, - InferenceState.Encoding to R.string.encoding, + InferenceState.Encoding to R.string.processing, InferenceState.DecodingLanguage to R.string.decoding, InferenceState.SwitchingModel to R.string.switching_model, InferenceState.DecodingStarted to R.string.decoding @@ -47,7 +48,7 @@ private val DefaultAnnotations = hashMapOf( interface RecognizerViewListener { fun cancelled() - fun recordingStarted() + fun recordingStarted(device: String) fun finished(result: String) @@ -75,6 +76,8 @@ class RecognizerView( private val partialDecodingText = mutableStateOf("") private val currentViewState = mutableStateOf(CurrentView.LoadingCircle) + private val currentDeviceState = mutableStateOf("Recording not started") + @Composable fun Content() { when (currentViewState.value) { @@ -93,7 +96,8 @@ class RecognizerView( CurrentView.InnerRecognize -> { InnerRecognize( magnitude = magnitudeState, - state = statusState + state = statusState, + device = if(settings.shouldShowVerboseFeedback) { currentDeviceState } else { null } ) } @@ -168,9 +172,10 @@ class RecognizerView( } } - override fun recordingStarted() { + override fun recordingStarted(device: String) { updateMagnitude(0.0f, MagnitudeState.NOT_TALKED_YET) - listener.recordingStarted() + currentDeviceState.value = device + listener.recordingStarted(device) } override fun updateMagnitude(magnitude: Float, state: MagnitudeState) { @@ -192,7 +197,8 @@ class RecognizerView( listener = audioRecognizerListener, settings = AudioRecognizerSettings( modelRunConfiguration = settings.modelRunConfiguration, - decodingConfiguration = settings.decodingConfiguration + decodingConfiguration = settings.decodingConfiguration, + recordingConfiguration = settings.recordingConfiguration ) ) diff --git a/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/types/AudioRecognizerListener.kt b/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/types/AudioRecognizerListener.kt index 15a4dccef..6c59ff167 100644 --- a/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/types/AudioRecognizerListener.kt +++ b/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/types/AudioRecognizerListener.kt @@ -14,7 +14,7 @@ interface AudioRecognizerListener { fun loading() fun needPermission(onResult: (Boolean) -> Unit) - fun recordingStarted() + fun recordingStarted(device: String) fun updateMagnitude(magnitude: Float, state: MagnitudeState) fun processing() diff --git a/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/ui/RecognizeViews.kt b/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/ui/RecognizeViews.kt index 69a121c2d..ec8787c00 100644 --- a/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/ui/RecognizeViews.kt +++ b/voiceinput-shared/src/main/java/org/futo/voiceinput/shared/ui/RecognizeViews.kt @@ -54,7 +54,8 @@ fun AnimatedRecognizeCircle(magnitude: MutableFloatState = mutableFloatStateOf(0 @Composable fun InnerRecognize( magnitude: MutableFloatState = mutableFloatStateOf(0.5f), - state: MutableState = mutableStateOf(MagnitudeState.MIC_MAY_BE_BLOCKED) + state: MutableState = mutableStateOf(MagnitudeState.MIC_MAY_BE_BLOCKED), + device: MutableState? = mutableStateOf("") ) { Box(modifier = Modifier.fillMaxSize(), contentAlignment = Alignment.Center) { AnimatedRecognizeCircle(magnitude = magnitude) @@ -78,6 +79,16 @@ fun InnerRecognize( textAlign = TextAlign.Center, color = MaterialTheme.colorScheme.onSurface ) + + if(device != null) { + Text( + "Device: ${device.value}", + style = Typography.labelSmall, + modifier = Modifier.fillMaxWidth().offset(x = 0.dp, y = 64.dp), + textAlign = TextAlign.Center, + color = MaterialTheme.colorScheme.onSurface.copy(alpha = 0.66f) + ) + } } }