fix(agent): enable voice replies for supported channels

2026-08-03 20:27:13 +08:00 · 2026-05-26 20:14:56 +08:00
parent db8363fee1
commit 896631d63e
5 changed files with 208 additions and 23 deletions
--- a/app/agent/llm/capability.py
+++ b/app/agent/llm/capability.py
@@ -734,29 +734,61 @@ class AgentCapabilityManager:
        return cls.REPLY_MODE_TEXT

    @classmethod
-    def supports_native_voice_reply(
-        cls, channel: Optional[str], source: Optional[str]
-    ) -> bool:
-        """判断当前渠道是否支持原生语音消息发送。"""
+    def _parse_message_channel(cls, channel: Optional[Any]):
+        """将渠道入参归一化为消息渠道枚举。"""
        if not channel:
+            return None
+
+        from app.schemas.types import MessageChannel
+
+        if isinstance(channel, MessageChannel):
+            return channel
+
+        channel_text = str(channel).strip()
+        if not channel_text:
+            return None
+        lowered_channel = channel_text.lower()
+        for channel_item in MessageChannel:
+            aliases = {
+                channel_item.value.lower(),
+                channel_item.name.lower(),
+                f"{MessageChannel.__name__}.{channel_item.name}".lower(),
+            }
+            if lowered_channel in aliases:
+                return channel_item
+        return None
+
+    @staticmethod
+    def _is_wechat_app_mode(source: Optional[str]) -> bool:
+        """判断企业微信来源是否为自建应用模式。"""
+        if not source:
            return False

        from app.helper.service import ServiceConfigHelper
-        from app.schemas.types import MessageChannel

-        try:
-            channel_enum = MessageChannel(channel)
-        except (TypeError, ValueError):
-            return False
-
-        if channel_enum == MessageChannel.Telegram:
-            return True
-        if channel_enum != MessageChannel.Wechat:
-            return False
-
-        # 企业微信 bot 模式不支持发送语音，只有应用模式可用。
        for config in ServiceConfigHelper.get_notification_configs():
            if config.name != source:
                continue
            return (config.config or {}).get("WECHAT_MODE", "app") != "bot"
        return False
+
+    @classmethod
+    def supports_native_voice_reply(
+            cls, channel: Optional[str], source: Optional[str]
+    ) -> bool:
+        """判断当前渠道是否支持原生语音消息发送。"""
+        from app.schemas.message import ChannelCapability, ChannelCapabilityManager
+        from app.schemas.types import MessageChannel
+
+        channel_enum = cls._parse_message_channel(channel)
+        if not channel_enum:
+            return False
+
+        if not ChannelCapabilityManager.supports_capability(
+                channel_enum, ChannelCapability.AUDIO_OUTPUT
+        ):
+            return False
+
+        if channel_enum == MessageChannel.Wechat:
+            return cls._is_wechat_app_mode(source)
+        return True
--- a/app/agent/tools/impl/send_voice_message.py
+++ b/app/agent/tools/impl/send_voice_message.py
@@ -15,8 +15,10 @@ from app.schemas import Notification, NotificationType
 class SendVoiceMessageInput(BaseModel):
    """发送语音消息工具输入。"""

-    explanation: Optional[str] = Field(None,
-        description="Clear explanation of why a voice reply is the best fit in the current context",)
+    explanation: Optional[str] = Field(
+        None,
+        description="Clear explanation of why a voice reply is the best fit in the current context",
+    )
    message: str = Field(
        ...,
        description="The spoken content to send back to the user",
@@ -24,6 +26,8 @@ class SendVoiceMessageInput(BaseModel):


 class SendVoiceMessageTool(MoviePilotTool):
+    """发送 Agent 语音回复的工具。"""
+
    name: str = "send_voice_message"
    sends_message: bool = True
    description: str = (
@@ -36,12 +40,14 @@ class SendVoiceMessageTool(MoviePilotTool):
    require_admin: bool = False

    def get_tool_message(self, **kwargs) -> Optional[str]:
+        """生成语音回复工具的执行提示。"""
        message = kwargs.get("message") or ""
        if len(message) > 40:
            message = message[:40] + "..."
        return f"发送语音回复: {message}"

    async def run(self, message: str, **kwargs) -> str:
+        """合成语音并发送到当前对话渠道，不支持时回退为文字。"""
        if not message:
            return "语音回复内容不能为空"

@@ -69,11 +75,8 @@ class SendVoiceMessageTool(MoviePilotTool):
            fallback_reason = "当前未配置可用的语音合成能力"

        logger.info(
-            "执行工具: %s, channel=%s, use_voice=%s, text_len=%s",
-            self.name,
-            channel,
-            used_voice,
-            len(message),
+            f"执行工具: {self.name}, channel={channel}, "
+            f"use_voice={used_voice}, text_len={len(message)}"
        )

        await ToolChain().async_post_message(
--- a/app/schemas/message.py
+++ b/app/schemas/message.py
@@ -273,6 +273,8 @@ class ChannelCapability(Enum):
    IMAGES = "images"
    # 支持链接
    LINKS = "links"
+    # 支持原生语音输出
+    AUDIO_OUTPUT = "audio_output"
    # 支持文件发送
    FILE_SENDING = "file_sending"
    # 支持可收口的消息处理状态提示，如 reaction 或 typing
@@ -313,6 +315,7 @@ class ChannelCapabilityManager:
                ChannelCapability.RICH_TEXT,
                ChannelCapability.IMAGES,
                ChannelCapability.LINKS,
+                ChannelCapability.AUDIO_OUTPUT,
                ChannelCapability.FILE_SENDING,
                ChannelCapability.PROCESSING_STATUS,
            },
@@ -327,6 +330,7 @@ class ChannelCapabilityManager:
            capabilities={
                ChannelCapability.IMAGES,
                ChannelCapability.LINKS,
+                ChannelCapability.AUDIO_OUTPUT,
                ChannelCapability.MENU_COMMANDS,
            },
            fallback_enabled=True,
@@ -341,6 +345,7 @@ class ChannelCapabilityManager:
                ChannelCapability.RICH_TEXT,
                ChannelCapability.IMAGES,
                ChannelCapability.LINKS,
+                ChannelCapability.AUDIO_OUTPUT,
                ChannelCapability.FILE_SENDING,
                ChannelCapability.PROCESSING_STATUS,
            },