From e3bf95e4fe48bc1011d0d8ab7b11d24f519459e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BE=90=E5=8B=A4=E6=B0=91?= Date: Thu, 16 Apr 2026 23:01:31 +0800 Subject: [PATCH] =?UTF-8?q?feat(speech):=20=E9=9B=86=E6=88=90TTS=E5=8A=9F?= =?UTF-8?q?=E8=83=BD=E5=B9=B6=E4=BC=98=E5=8C=96=E8=AF=AD=E9=9F=B3=E9=85=8D?= =?UTF-8?q?=E7=BD=AE=E7=AE=A1=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 在gradle.properties中添加测试环境默认配置参数 - 通过BuildConfig统一管理语音服务相关配置信息 - 集成TtsClient实现文本转语音功能 - 添加TTS连接状态管理和回调处理 - 实现唤醒词触发后的TTS响应播报 - 优化ASR和TTS的连接与关闭流程 - 添加TTS播放完成后的麦克风自动启动逻辑 --- app/build.gradle | 8 +++ .../com/nova/brain/glass/helper/AsrHelper.kt | 66 +++++++++++++++---- gradle.properties | 11 +++- 3 files changed, 72 insertions(+), 13 deletions(-) diff --git a/app/build.gradle b/app/build.gradle index c7bbe0d..9cb986a 100644 --- a/app/build.gradle +++ b/app/build.gradle @@ -20,6 +20,14 @@ android { ] buildConfigField("String", "APP_Name", "\"" + apps.applicationName + "\"") + buildConfigField("String", "SPEECH_DOMAIN", "\"api-test.rokid.com\"") + buildConfigField("String", "SPEECH_AK", "\"\"") + buildConfigField("String", "SPEECH_SK", "\"\"") + buildConfigField("String", "SPEECH_UID", "\"demo-user\"") + buildConfigField("String", "SPEECH_DEVICE_ID", "\"demo-device\"") + buildConfigField("String", "SPEECH_ASR_PATH", "\"/ar/audio/api/ws/asr/streaming\"") + buildConfigField("String", "SPEECH_TTS_PATH", "\"/ar/audio/api/ws/tts\"") + flavorDimensions "versioncode" } buildTypes { diff --git a/app/src/main/java/com/nova/brain/glass/helper/AsrHelper.kt b/app/src/main/java/com/nova/brain/glass/helper/AsrHelper.kt index 61b2f19..8a249e6 100644 --- a/app/src/main/java/com/nova/brain/glass/helper/AsrHelper.kt +++ b/app/src/main/java/com/nova/brain/glass/helper/AsrHelper.kt @@ -1,10 +1,13 @@ package com.nova.brain.glass.helper import android.util.Log +import com.nova.brain.glass.BuildConfig import com.nova.brain.glass.model.RecognizeAction import com.rokid.online.speech.AsrClient import com.rokid.online.speech.OnlineSpeechSdk import com.rokid.online.speech.OnlineSpeechSdkConfig +import com.rokid.online.speech.TtsClient +import com.rokid.online.speech.open.AndroidPcmTtsStreamPlayer import com.rokid.online.speech.open.OpenSdkAudioSource import com.xuqm.base.extensions.showMessage @@ -12,14 +15,14 @@ object AsrHelper : OfflineCmdListener { private const val TAG = "AsrHelper" - // 配置信息,从 online-speech-sdk-demo 参考项目复制 - private const val DOMAIN = "api-test.rokid.com" - private const val ASR_PATH = "/ar/audio/api/ws/asr/streaming" - private const val TTS_PATH = "/ar/audio/api/ws/tts" - private const val AK = "" - private const val SK = "" - private const val UID = "demo-user" - private const val DEVICE_ID = "demo-device" + // 配置信息来自 BuildConfig(在 app/build.gradle 的 buildConfigField 中维护) + private val DOMAIN get() = BuildConfig.SPEECH_DOMAIN + private val AK get() = BuildConfig.SPEECH_AK + private val SK get() = BuildConfig.SPEECH_SK + private val UID get() = BuildConfig.SPEECH_UID + private val DEVICE_ID get() = BuildConfig.SPEECH_DEVICE_ID + private val ASR_PATH get() = BuildConfig.SPEECH_ASR_PATH + private val TTS_PATH get() = BuildConfig.SPEECH_TTS_PATH // 唤醒词:Nova Nova private const val WAKE_WORD = "Nova Nova" @@ -27,10 +30,15 @@ object AsrHelper : OfflineCmdListener { private var sdk: OnlineSpeechSdk? = null private var asr: AsrClient? = null + private var tts: TtsClient? = null private val audioSource = OpenSdkAudioSource() + private val ttsPlayer = AndroidPcmTtsStreamPlayer() private var isConnected = false private var isMicRunning = false + private var isTtsConnected = false + + private const val WAKE_RESPONSE = "在呢,您请说" // 拼接每次识别会话中的中间结果 private var currentPartial = "" @@ -63,13 +71,15 @@ object AsrHelper : OfflineCmdListener { sdk = OnlineSpeechSdk(cfg) asr = sdk!!.createAsrClient().attachAudioSource(audioSource).also { setupAsrCallbacks(it) } + tts = sdk!!.createTtsClient().attachStreamPlayer(ttsPlayer).also { setupTtsCallbacks(it) } // 注册离线关键词 Nova Nova,GlassSdk 触发后启动 ASR OfflineCmdServiceHelper.registerAsrWakeWord() OfflineCmdServiceHelper.addOnLineListener(this) - // 自动建立 ASR 连接 + // 自动建立 ASR / TTS 连接 asrConnect() + tts?.connect() Log.d(TAG, "AsrHelper init done") } @@ -150,11 +160,40 @@ object AsrHelper : OfflineCmdListener { }) } - // 离线关键词回调:匹配唤醒词时启动麦克风 + private fun setupTtsCallbacks(ttsClient: TtsClient) { + ttsClient.setListener(object : TtsClient.Listener { + override fun onOpen() { + isTtsConnected = true + Log.d(TAG, "TTS websocket open") + } + + override fun onFinished(taskId: String) { + Log.d(TAG, "TTS ended: $taskId, starting mic") + asrStartMic() + } + + override fun onError(code: Int, message: String) { + Log.e(TAG, "TTS error code=$code msg=$message, fallback to mic") + asrStartMic() + } + + override fun onClosed(code: Int, reason: String) { + isTtsConnected = false + Log.d(TAG, "TTS closed code=$code reason=$reason") + } + }) + } + + // 离线关键词回调:唤醒词触发时先 TTS 播报,播报结束后启动麦克风 override fun onOfflineCmd(cmd: String) { if (cmd == WAKE_WORD) { - Log.d(TAG, "Wake word triggered, starting mic") - asrStartMic() + Log.d(TAG, "Wake word triggered") + if (isTtsConnected) { + tts?.speak(WAKE_RESPONSE) + } else { + Log.w(TAG, "TTS not connected, starting mic directly") + asrStartMic() + } } } @@ -165,10 +204,13 @@ object AsrHelper : OfflineCmdListener { isMicRunning = false } asr?.close() + tts?.close() sdk?.close() asr = null + tts = null sdk = null isConnected = false + isTtsConnected = false Log.d(TAG, "AsrHelper closed") } } \ No newline at end of file diff --git a/gradle.properties b/gradle.properties index 4d15d01..64a34d3 100644 --- a/gradle.properties +++ b/gradle.properties @@ -18,4 +18,13 @@ android.useAndroidX=true # Automatically convert third-party libraries to use AndroidX android.enableJetifier=true # Kotlin code style for this project: "official" or "obsolete": -kotlin.code.style=official \ No newline at end of file +kotlin.code.style=official + +# Demo-Android defaults (test environment) +online.demo.domain=api-test.rokid.com +online.demo.ak=20bc97fd19ef47b38a8c63f3b22ff401 +online.demo.sk=d2131994176d4e08a5815cad1e06da4d +online.demo.uid=demo-user +online.demo.deviceId=demo-device +online.demo.asrPath=/ar/audio/api/ws/asr/streaming +online.demo.ttsPath=/ar/audio/api/ws/tts \ No newline at end of file