fix(agent): enable voice replies for supported channels

This commit is contained in:
jxxghp
2026-05-26 20:14:56 +08:00
parent db8363fee1
commit 896631d63e
5 changed files with 208 additions and 23 deletions

View File

@@ -734,29 +734,61 @@ class AgentCapabilityManager:
return cls.REPLY_MODE_TEXT
@classmethod
def supports_native_voice_reply(
cls, channel: Optional[str], source: Optional[str]
) -> bool:
"""判断当前渠道是否支持原生语音消息发送。"""
def _parse_message_channel(cls, channel: Optional[Any]):
"""将渠道入参归一化为消息渠道枚举。"""
if not channel:
return None
from app.schemas.types import MessageChannel
if isinstance(channel, MessageChannel):
return channel
channel_text = str(channel).strip()
if not channel_text:
return None
lowered_channel = channel_text.lower()
for channel_item in MessageChannel:
aliases = {
channel_item.value.lower(),
channel_item.name.lower(),
f"{MessageChannel.__name__}.{channel_item.name}".lower(),
}
if lowered_channel in aliases:
return channel_item
return None
@staticmethod
def _is_wechat_app_mode(source: Optional[str]) -> bool:
"""判断企业微信来源是否为自建应用模式。"""
if not source:
return False
from app.helper.service import ServiceConfigHelper
from app.schemas.types import MessageChannel
try:
channel_enum = MessageChannel(channel)
except (TypeError, ValueError):
return False
if channel_enum == MessageChannel.Telegram:
return True
if channel_enum != MessageChannel.Wechat:
return False
# 企业微信 bot 模式不支持发送语音,只有应用模式可用。
for config in ServiceConfigHelper.get_notification_configs():
if config.name != source:
continue
return (config.config or {}).get("WECHAT_MODE", "app") != "bot"
return False
@classmethod
def supports_native_voice_reply(
cls, channel: Optional[str], source: Optional[str]
) -> bool:
"""判断当前渠道是否支持原生语音消息发送。"""
from app.schemas.message import ChannelCapability, ChannelCapabilityManager
from app.schemas.types import MessageChannel
channel_enum = cls._parse_message_channel(channel)
if not channel_enum:
return False
if not ChannelCapabilityManager.supports_capability(
channel_enum, ChannelCapability.AUDIO_OUTPUT
):
return False
if channel_enum == MessageChannel.Wechat:
return cls._is_wechat_app_mode(source)
return True

View File

@@ -15,8 +15,10 @@ from app.schemas import Notification, NotificationType
class SendVoiceMessageInput(BaseModel):
"""发送语音消息工具输入。"""
explanation: Optional[str] = Field(None,
description="Clear explanation of why a voice reply is the best fit in the current context",)
explanation: Optional[str] = Field(
None,
description="Clear explanation of why a voice reply is the best fit in the current context",
)
message: str = Field(
...,
description="The spoken content to send back to the user",
@@ -24,6 +26,8 @@ class SendVoiceMessageInput(BaseModel):
class SendVoiceMessageTool(MoviePilotTool):
"""发送 Agent 语音回复的工具。"""
name: str = "send_voice_message"
sends_message: bool = True
description: str = (
@@ -36,12 +40,14 @@ class SendVoiceMessageTool(MoviePilotTool):
require_admin: bool = False
def get_tool_message(self, **kwargs) -> Optional[str]:
"""生成语音回复工具的执行提示。"""
message = kwargs.get("message") or ""
if len(message) > 40:
message = message[:40] + "..."
return f"发送语音回复: {message}"
async def run(self, message: str, **kwargs) -> str:
"""合成语音并发送到当前对话渠道,不支持时回退为文字。"""
if not message:
return "语音回复内容不能为空"
@@ -69,11 +75,8 @@ class SendVoiceMessageTool(MoviePilotTool):
fallback_reason = "当前未配置可用的语音合成能力"
logger.info(
"执行工具: %s, channel=%s, use_voice=%s, text_len=%s",
self.name,
channel,
used_voice,
len(message),
f"执行工具: {self.name}, channel={channel}, "
f"use_voice={used_voice}, text_len={len(message)}"
)
await ToolChain().async_post_message(

View File

@@ -273,6 +273,8 @@ class ChannelCapability(Enum):
IMAGES = "images"
# 支持链接
LINKS = "links"
# 支持原生语音输出
AUDIO_OUTPUT = "audio_output"
# 支持文件发送
FILE_SENDING = "file_sending"
# 支持可收口的消息处理状态提示,如 reaction 或 typing
@@ -313,6 +315,7 @@ class ChannelCapabilityManager:
ChannelCapability.RICH_TEXT,
ChannelCapability.IMAGES,
ChannelCapability.LINKS,
ChannelCapability.AUDIO_OUTPUT,
ChannelCapability.FILE_SENDING,
ChannelCapability.PROCESSING_STATUS,
},
@@ -327,6 +330,7 @@ class ChannelCapabilityManager:
capabilities={
ChannelCapability.IMAGES,
ChannelCapability.LINKS,
ChannelCapability.AUDIO_OUTPUT,
ChannelCapability.MENU_COMMANDS,
},
fallback_enabled=True,
@@ -341,6 +345,7 @@ class ChannelCapabilityManager:
ChannelCapability.RICH_TEXT,
ChannelCapability.IMAGES,
ChannelCapability.LINKS,
ChannelCapability.AUDIO_OUTPUT,
ChannelCapability.FILE_SENDING,
ChannelCapability.PROCESSING_STATUS,
},