重构语音能力配置与逻辑，统一音频输入输出开关并优化语音回复判断

2026-06-07 08:41:16 +08:00 · 2026-04-29 18:15:34 +08:00
parent e4a7333b79
commit b7749c44fd
9 changed files with 159 additions and 76 deletions
--- a/app/agent/tools/impl/send_voice_message.py
+++ b/app/agent/tools/impl/send_voice_message.py
@@ -8,10 +8,8 @@ from pydantic import BaseModel, Field
 from app.agent.tools.base import MoviePilotTool, ToolChain
 from app.core.config import settings
 from app.helper.voice import VoiceHelper
-from app.helper.service import ServiceConfigHelper
 from app.log import logger
 from app.schemas import Notification, NotificationType
-from app.schemas.types import MessageChannel


 class SendVoiceMessageInput(BaseModel):
@@ -43,18 +41,6 @@ class SendVoiceMessageTool(MoviePilotTool):
            message = message[:40] + "..."
        return f"发送语音回复: {message}"

-    def _supports_real_voice_reply(self) -> bool:
-        channel = self._channel or ""
-        if channel == MessageChannel.Telegram.value:
-            return True
-        if channel != MessageChannel.Wechat.value:
-            return False
-        for config in ServiceConfigHelper.get_notification_configs():
-            if config.name != self._source:
-                continue
-            return (config.config or {}).get("WECHAT_MODE", "app") != "bot"
-        return False
-
    async def run(self, message: str, **kwargs) -> str:
        if not message:
            return "语音回复内容不能为空"
@@ -62,11 +48,23 @@ class SendVoiceMessageTool(MoviePilotTool):
        voice_path = None
        used_voice = False
        channel = self._channel or ""
-        if self._supports_real_voice_reply() and VoiceHelper.is_available("tts"):
+        reply_mode = VoiceHelper.resolve_reply_mode(
+            channel=channel,
+            source=self._source,
+        )
+        fallback_reason = "当前渠道不支持语音回复"
+        if not VoiceHelper.is_enabled():
+            fallback_reason = "当前未启用音频输入输出"
+        if (
+            reply_mode == VoiceHelper.REPLY_MODE_NATIVE
+            and VoiceHelper.is_available("tts")
+        ):
            voice_file = await asyncio.to_thread(VoiceHelper.synthesize_speech, message)
            if voice_file:
                voice_path = str(voice_file)
                used_voice = True
+        elif reply_mode == VoiceHelper.REPLY_MODE_NATIVE:
+            fallback_reason = "当前未配置可用的语音合成能力"

        logger.info(
            "执行工具: %s, channel=%s, use_voice=%s, text_len=%s",
@@ -85,7 +83,11 @@ class SendVoiceMessageTool(MoviePilotTool):
                username=self._username,
                text=message,
                voice_path=voice_path,
-                voice_caption=message if settings.AI_VOICE_REPLY_WITH_TEXT else None,
+                voice_caption=(
+                    message
+                    if voice_path and settings.AI_VOICE_REPLY_WITH_TEXT
+                    else None
+                ),
            )
        )
        self._agent_context["user_reply_sent"] = True
@@ -93,4 +95,4 @@ class SendVoiceMessageTool(MoviePilotTool):

        if used_voice:
            return "语音回复已发送"
-        return "当前未使用语音通道，已自动回退为文字回复"
+        return f"{fallback_reason}，已自动回退为文字回复"