Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions main/manager-api/src/main/resources/db/changelog/202511221450.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
-- 更新HuoshanDoubleStreamTTS供应器配置,增加开启链接复用选项
UPDATE `ai_model_provider`
SET fields = '[{"key": "ws_url", "type": "string", "label": "WebSocket地址"}, {"key": "appid", "type": "string", "label": "应用ID"}, {"key": "access_token", "type": "string", "label": "访问令牌"}, {"key": "resource_id", "type": "string", "label": "资源ID"}, {"key": "speaker", "type": "string", "label": "默认音色"}, {"key": "enable_ws_reuse", "type": "boolean", "label": "是否开启链接复用", "default": true}, {"key": "speech_rate", "type": "number", "label": "语速(-50~100)"}, {"key": "loudness_rate", "type": "number", "label": "音量(-50~100)"}, {"key": "pitch", "type": "number", "label": "音高(-12~12)"}]'
WHERE id = 'SYSTEM_TTS_HSDSTTS';

UPDATE `ai_model_config` SET
`doc_link` = 'https://console.volcengine.com/speech/service/10007',
`remark` = '火山引擎语音合成服务配置说明:
1. 访问 https://www.volcengine.com/ 注册并开通火山引擎账号
2. 访问 https://console.volcengine.com/speech/service/10007 开通语音合成大模型,购买音色
3. 在页面底部获取appid和access_token
5. 资源ID固定为:volc.service_type.10029(大模型语音合成及混音)
6. 链接复用:开启WebSocket连接复用,默认true减少链接损耗(注意:复用后设备处于聆听状态时空闲链接会占并发数)
7. 语速:-50~100,可不填,正常默认值0,可填-50~100
8. 音量:-50~100,可不填,正常默认值0,可填-50~100
9. 音高:-12~12,可不填,正常默认值0,可填-12~12
10. 填入配置文件中' WHERE `id` = 'TTS_HuoshanDoubleStreamTTS';
Original file line number Diff line number Diff line change
Expand Up @@ -423,3 +423,10 @@ databaseChangeLog:
- sqlFile:
encoding: utf8
path: classpath:db/changelog/202511131023.sql
- changeSet:
id: 202511221450
author: RanChen
changes:
- sqlFile:
encoding: utf8
path: classpath:db/changelog/202511221450.sql
2 changes: 2 additions & 0 deletions main/xiaozhi-server/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -669,6 +669,8 @@ TTS:
access_token: 你的火山引擎语音合成服务access_token
resource_id: volc.service_type.10029
speaker: zh_female_wanwanxiaohe_moon_bigtts
# 开启WebSocket连接复用,默认复用(注意:复用后设备处于聆听状态时空闲链接会占并发数)
enable_ws_reuse: True
speech_rate: 0
loudness_rate: 0
pitch: 0
Expand Down
52 changes: 45 additions & 7 deletions main/xiaozhi-server/core/providers/tts/huoshan_double_stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,8 @@ def __init__(self, config, delete_audio_file):
self.ws_url = config.get("ws_url")
self.authorization = config.get("authorization")
self.header = {"Authorization": f"{self.authorization}{self.access_token}"}
self.enable_two_way = True
enable_ws_reuse_value = config.get("enable_ws_reuse", True)
self.enable_ws_reuse = False if str(enable_ws_reuse_value).lower() in ('false', 'False') else True
self.tts_text = ""
self.opus_encoder = opus_encoder_utils.OpusEncoderUtils(
sample_rate=16000, channels=1, frame_size_ms=60
Expand All @@ -184,8 +185,14 @@ async def _ensure_connection(self):
"""建立新的WebSocket连接,并启动监听任务(仅第一次)"""
try:
if self.ws:
logger.bind(tag=TAG).info(f"使用已有链接...")
return self.ws
if self.enable_ws_reuse:
logger.bind(tag=TAG).info(f"使用已有链接...")
return self.ws
else:
try:
await self.finish_connection()
except:
pass
logger.bind(tag=TAG).debug("开始建立新连接...")
ws_header = {
"X-Api-App-Key": self.appId,
Expand All @@ -208,6 +215,22 @@ async def _ensure_connection(self):
logger.bind(tag=TAG).error(f"建立连接失败: {str(e)}")
self.ws = None
raise

async def finish_connection(self):
"""发送 FinishConnection 事件,等待服务端返回 EVENT_ConnectionFinished"""
try:
if self.ws:
logger.bind(tag=TAG).debug("开始关闭连接...")
header = Header(
message_type=FULL_CLIENT_REQUEST,
message_type_specific_flags=MsgTypeFlagWithEvent,
serial_method=JSON,
).as_bytes()
optional = Optional(event=EVENT_FinishConnection).as_bytes()
payload = str.encode("{}")
await self.send_event(self.ws, header, optional, payload)
except:
pass

def tts_text_priority_thread(self):
"""火山引擎双流式TTS的文本处理线程"""
Expand All @@ -224,10 +247,16 @@ def tts_text_priority_thread(self):
if self.conn.client_abort:
try:
logger.bind(tag=TAG).info("收到打断信息,终止TTS文本处理线程")
asyncio.run_coroutine_threadsafe(
self.cancel_session(self.conn.sentence_id),
loop=self.conn.loop,
)
if self.enable_ws_reuse:
asyncio.run_coroutine_threadsafe(
self.cancel_session(self.conn.sentence_id),
loop=self.conn.loop,
)
else:
asyncio.run_coroutine_threadsafe(
self.finish_connection(),
loop=self.conn.loop,
)
continue
except Exception as e:
logger.bind(tag=TAG).error(f"取消TTS会话失败: {str(e)}")
Expand Down Expand Up @@ -432,6 +461,11 @@ async def _start_monitor_tts_response(self):
res = self.parser_response(msg)
self.print_response(res, "send_text res:")

# 优先处理连接级别事件
if res.optional.event == EVENT_ConnectionFinished:
logger.bind(tag=TAG).debug(f"链接关闭成功~~")
break

# 只处理当前活跃会话的响应
if res.optional.sessionId and self.conn.sentence_id != res.optional.sessionId:
# 如果是会话结束相关事件,即使会话ID不匹配也要重置状态
Expand Down Expand Up @@ -461,6 +495,9 @@ async def _start_monitor_tts_response(self):
logger.bind(tag=TAG).debug(f"会话结束~~")
self.activate_session = False
self._process_before_stop_play_files()
# 非复用模式下,会话结束后发送 FinishConnection
if not self.enable_ws_reuse:
await self.finish_connection()
except websockets.ConnectionClosed:
logger.bind(tag=TAG).warning("WebSocket连接已关闭")
break
Expand All @@ -479,6 +516,7 @@ async def _start_monitor_tts_response(self):
self.ws = None
# 监听任务退出时清理引用
finally:
self.activate_session = False
self._monitor_task = None

async def send_event(
Expand Down