diff --git a/main/manager-api/src/main/resources/db/changelog/202511221450.sql b/main/manager-api/src/main/resources/db/changelog/202511221450.sql new file mode 100644 index 0000000000..5125b562fd --- /dev/null +++ b/main/manager-api/src/main/resources/db/changelog/202511221450.sql @@ -0,0 +1,17 @@ +-- 更新HuoshanDoubleStreamTTS供应器配置,增加开启链接复用选项 +UPDATE `ai_model_provider` +SET fields = '[{"key": "ws_url", "type": "string", "label": "WebSocket地址"}, {"key": "appid", "type": "string", "label": "应用ID"}, {"key": "access_token", "type": "string", "label": "访问令牌"}, {"key": "resource_id", "type": "string", "label": "资源ID"}, {"key": "speaker", "type": "string", "label": "默认音色"}, {"key": "enable_ws_reuse", "type": "boolean", "label": "是否开启链接复用", "default": true}, {"key": "speech_rate", "type": "number", "label": "语速(-50~100)"}, {"key": "loudness_rate", "type": "number", "label": "音量(-50~100)"}, {"key": "pitch", "type": "number", "label": "音高(-12~12)"}]' +WHERE id = 'SYSTEM_TTS_HSDSTTS'; + +UPDATE `ai_model_config` SET +`doc_link` = 'https://console.volcengine.com/speech/service/10007', +`remark` = '火山引擎语音合成服务配置说明: +1. 访问 https://www.volcengine.com/ 注册并开通火山引擎账号 +2. 访问 https://console.volcengine.com/speech/service/10007 开通语音合成大模型,购买音色 +3. 在页面底部获取appid和access_token +5. 资源ID固定为:volc.service_type.10029(大模型语音合成及混音) +6. 链接复用:开启WebSocket连接复用,默认true减少链接损耗(注意:复用后设备处于聆听状态时空闲链接会占并发数) +7. 语速:-50~100,可不填,正常默认值0,可填-50~100 +8. 音量:-50~100,可不填,正常默认值0,可填-50~100 +9. 音高:-12~12,可不填,正常默认值0,可填-12~12 +10. 填入配置文件中' WHERE `id` = 'TTS_HuoshanDoubleStreamTTS'; \ No newline at end of file diff --git a/main/manager-api/src/main/resources/db/changelog/db.changelog-master.yaml b/main/manager-api/src/main/resources/db/changelog/db.changelog-master.yaml index 3c14eb7f0c..57c9ea072d 100755 --- a/main/manager-api/src/main/resources/db/changelog/db.changelog-master.yaml +++ b/main/manager-api/src/main/resources/db/changelog/db.changelog-master.yaml @@ -423,3 +423,10 @@ databaseChangeLog: - sqlFile: encoding: utf8 path: classpath:db/changelog/202511131023.sql + - changeSet: + id: 202511221450 + author: RanChen + changes: + - sqlFile: + encoding: utf8 + path: classpath:db/changelog/202511221450.sql diff --git a/main/xiaozhi-server/config.yaml b/main/xiaozhi-server/config.yaml index a408739520..de804ac11e 100644 --- a/main/xiaozhi-server/config.yaml +++ b/main/xiaozhi-server/config.yaml @@ -669,6 +669,8 @@ TTS: access_token: 你的火山引擎语音合成服务access_token resource_id: volc.service_type.10029 speaker: zh_female_wanwanxiaohe_moon_bigtts + # 开启WebSocket连接复用,默认复用(注意:复用后设备处于聆听状态时空闲链接会占并发数) + enable_ws_reuse: True speech_rate: 0 loudness_rate: 0 pitch: 0 diff --git a/main/xiaozhi-server/core/providers/tts/huoshan_double_stream.py b/main/xiaozhi-server/core/providers/tts/huoshan_double_stream.py index 4c761ac2ef..6135fa175d 100644 --- a/main/xiaozhi-server/core/providers/tts/huoshan_double_stream.py +++ b/main/xiaozhi-server/core/providers/tts/huoshan_double_stream.py @@ -163,7 +163,8 @@ def __init__(self, config, delete_audio_file): self.ws_url = config.get("ws_url") self.authorization = config.get("authorization") self.header = {"Authorization": f"{self.authorization}{self.access_token}"} - self.enable_two_way = True + enable_ws_reuse_value = config.get("enable_ws_reuse", True) + self.enable_ws_reuse = False if str(enable_ws_reuse_value).lower() in ('false', 'False') else True self.tts_text = "" self.opus_encoder = opus_encoder_utils.OpusEncoderUtils( sample_rate=16000, channels=1, frame_size_ms=60 @@ -184,8 +185,14 @@ async def _ensure_connection(self): """建立新的WebSocket连接,并启动监听任务(仅第一次)""" try: if self.ws: - logger.bind(tag=TAG).info(f"使用已有链接...") - return self.ws + if self.enable_ws_reuse: + logger.bind(tag=TAG).info(f"使用已有链接...") + return self.ws + else: + try: + await self.finish_connection() + except: + pass logger.bind(tag=TAG).debug("开始建立新连接...") ws_header = { "X-Api-App-Key": self.appId, @@ -208,6 +215,22 @@ async def _ensure_connection(self): logger.bind(tag=TAG).error(f"建立连接失败: {str(e)}") self.ws = None raise + + async def finish_connection(self): + """发送 FinishConnection 事件,等待服务端返回 EVENT_ConnectionFinished""" + try: + if self.ws: + logger.bind(tag=TAG).debug("开始关闭连接...") + header = Header( + message_type=FULL_CLIENT_REQUEST, + message_type_specific_flags=MsgTypeFlagWithEvent, + serial_method=JSON, + ).as_bytes() + optional = Optional(event=EVENT_FinishConnection).as_bytes() + payload = str.encode("{}") + await self.send_event(self.ws, header, optional, payload) + except: + pass def tts_text_priority_thread(self): """火山引擎双流式TTS的文本处理线程""" @@ -224,10 +247,16 @@ def tts_text_priority_thread(self): if self.conn.client_abort: try: logger.bind(tag=TAG).info("收到打断信息,终止TTS文本处理线程") - asyncio.run_coroutine_threadsafe( - self.cancel_session(self.conn.sentence_id), - loop=self.conn.loop, - ) + if self.enable_ws_reuse: + asyncio.run_coroutine_threadsafe( + self.cancel_session(self.conn.sentence_id), + loop=self.conn.loop, + ) + else: + asyncio.run_coroutine_threadsafe( + self.finish_connection(), + loop=self.conn.loop, + ) continue except Exception as e: logger.bind(tag=TAG).error(f"取消TTS会话失败: {str(e)}") @@ -432,6 +461,11 @@ async def _start_monitor_tts_response(self): res = self.parser_response(msg) self.print_response(res, "send_text res:") + # 优先处理连接级别事件 + if res.optional.event == EVENT_ConnectionFinished: + logger.bind(tag=TAG).debug(f"链接关闭成功~~") + break + # 只处理当前活跃会话的响应 if res.optional.sessionId and self.conn.sentence_id != res.optional.sessionId: # 如果是会话结束相关事件,即使会话ID不匹配也要重置状态 @@ -461,6 +495,9 @@ async def _start_monitor_tts_response(self): logger.bind(tag=TAG).debug(f"会话结束~~") self.activate_session = False self._process_before_stop_play_files() + # 非复用模式下,会话结束后发送 FinishConnection + if not self.enable_ws_reuse: + await self.finish_connection() except websockets.ConnectionClosed: logger.bind(tag=TAG).warning("WebSocket连接已关闭") break @@ -479,6 +516,7 @@ async def _start_monitor_tts_response(self): self.ws = None # 监听任务退出时清理引用 finally: + self.activate_session = False self._monitor_task = None async def send_event(