fix(agent): enable voice replies for supported channels

This commit is contained in:
jxxghp
2026-05-26 20:14:56 +08:00
parent db8363fee1
commit 896631d63e
5 changed files with 208 additions and 23 deletions

View File

@@ -11,6 +11,8 @@ sys.modules.setdefault("psutil", Mock())
sys.modules.setdefault("pyquery", Mock())
from app.core.config import settings
from app.schemas.message import ChannelCapability, ChannelCapabilityManager
from app.schemas.types import MessageChannel
module_path = Path(__file__).resolve().parents[1] / "app" / "agent" / "llm" / "capability.py"
spec = importlib.util.spec_from_file_location("test_agent_llm_capability_module", module_path)
@@ -157,6 +159,73 @@ class AgentCapabilityManagerTest(unittest.TestCase):
self.assertEqual(result, Path("/tmp/reply.opus"))
provider.synthesize_speech.assert_called_once_with(text="你好")
def test_native_voice_reply_supports_channels_with_audio_output(self):
"""校验 Agent 语音回复渠道支持判断覆盖常见渠道写法。"""
self.assertTrue(
AgentCapabilityManager.supports_native_voice_reply("telegram", None)
)
self.assertTrue(
AgentCapabilityManager.supports_native_voice_reply(
MessageChannel.Telegram.value, None
)
)
self.assertTrue(
AgentCapabilityManager.supports_native_voice_reply(
MessageChannel.Feishu.value, None
)
)
self.assertTrue(
AgentCapabilityManager.supports_native_voice_reply("Feishu", None)
)
self.assertFalse(
AgentCapabilityManager.supports_native_voice_reply("Slack", None)
)
def test_native_voice_reply_respects_wechat_mode(self):
"""校验企业微信只有自建应用模式允许 Agent 语音回复。"""
configs = [
SimpleNamespace(name="wechat-app", config={"WECHAT_MODE": "app"}),
SimpleNamespace(name="wechat-bot", config={"WECHAT_MODE": "bot"}),
]
with patch(
"app.helper.service.ServiceConfigHelper.get_notification_configs",
return_value=configs,
):
self.assertTrue(
AgentCapabilityManager.supports_native_voice_reply(
MessageChannel.Wechat.value, "wechat-app"
)
)
self.assertFalse(
AgentCapabilityManager.supports_native_voice_reply(
MessageChannel.Wechat.value, "wechat-bot"
)
)
self.assertFalse(
AgentCapabilityManager.supports_native_voice_reply(
MessageChannel.Wechat.value, "missing"
)
)
def test_channel_capability_marks_voice_output_channels(self):
"""校验消息渠道能力显式声明原生语音输出支持。"""
for channel in (
MessageChannel.Telegram,
MessageChannel.Feishu,
MessageChannel.Wechat,
):
self.assertTrue(
ChannelCapabilityManager.supports_capability(
channel, ChannelCapability.AUDIO_OUTPUT
)
)
self.assertFalse(
ChannelCapabilityManager.supports_capability(
MessageChannel.Slack, ChannelCapability.AUDIO_OUTPUT
)
)
def test_mimo_tts_uses_chat_completions_audio_payload(self):
provider = MiMoAudioProvider()
fake_client = Mock()

View File

@@ -1,5 +1,6 @@
import asyncio
import unittest
from pathlib import Path
from unittest.mock import AsyncMock, patch
import langchain.agents as langchain_agents
@@ -9,6 +10,7 @@ if not hasattr(langchain_agents, "create_agent"):
from app.agent.callback import StreamingHandler
from app.agent.tools.base import MoviePilotTool
from app.agent.tools.impl.send_voice_message import SendVoiceMessageTool
from app.api.endpoints.openai import _OpenAIStreamingHandler
from app.core.config import settings
from app.schemas.message import MessageResponse
@@ -397,6 +399,80 @@ class TestAgentToolStreaming(unittest.TestCase):
send_tool_message.assert_awaited_once_with("前置内容\n\n⚙️ => run test tool")
self.assertEqual(buffered_message, "")
def test_send_voice_message_uses_native_voice_for_supported_channels(self):
"""校验支持语音输出的渠道会发送原生语音消息。"""
async def _run(channel: MessageChannel):
"""运行指定渠道的语音发送工具。"""
tool = SendVoiceMessageTool(session_id="session-1", user_id="10001")
tool.set_message_attr(
channel=channel.value, source=f"{channel.name.lower()}-main", username="tester"
)
with (
patch.object(settings, "LLM_SUPPORT_AUDIO_OUTPUT", True),
patch.object(settings, "AUDIO_OUTPUT_INCLUDE_TEXT", True),
patch(
"app.agent.tools.impl.send_voice_message.AgentCapabilityManager.is_audio_output_available",
return_value=True,
),
patch(
"app.agent.tools.impl.send_voice_message.AgentCapabilityManager.synthesize_speech",
return_value=Path("/tmp/reply.opus"),
) as synthesize_speech,
patch(
"app.agent.tools.impl.send_voice_message.ToolChain.async_post_message",
new_callable=AsyncMock,
) as async_post_message,
):
result = await tool.run("你好")
return result, synthesize_speech, async_post_message
for channel in (MessageChannel.Telegram, MessageChannel.Feishu):
result, synthesize_speech, async_post_message = asyncio.run(_run(channel))
notification = async_post_message.await_args.args[0]
self.assertEqual(result, "语音回复已发送")
synthesize_speech.assert_called_once_with("你好")
self.assertEqual(notification.channel, channel)
self.assertEqual(notification.voice_path, "/tmp/reply.opus")
self.assertEqual(notification.voice_caption, "你好")
def test_send_voice_message_falls_back_for_unsupported_channels(self):
"""校验不支持语音输出的渠道继续回退为文字消息。"""
async def _run():
"""运行不支持语音输出渠道的语音发送工具。"""
tool = SendVoiceMessageTool(session_id="session-1", user_id="10001")
tool.set_message_attr(
channel=MessageChannel.Slack.value, source="slack-main", username="tester"
)
with (
patch.object(settings, "LLM_SUPPORT_AUDIO_OUTPUT", True),
patch(
"app.agent.tools.impl.send_voice_message.AgentCapabilityManager.is_audio_output_available",
return_value=True,
),
patch(
"app.agent.tools.impl.send_voice_message.AgentCapabilityManager.synthesize_speech"
) as synthesize_speech,
patch(
"app.agent.tools.impl.send_voice_message.ToolChain.async_post_message",
new_callable=AsyncMock,
) as async_post_message,
):
result = await tool.run("你好")
return result, synthesize_speech, async_post_message
result, synthesize_speech, async_post_message = asyncio.run(_run())
notification = async_post_message.await_args.args[0]
self.assertEqual(result, "当前渠道不支持语音回复,已自动回退为文字回复")
synthesize_speech.assert_not_called()
self.assertEqual(notification.text, "你好")
self.assertIsNone(notification.voice_path)
if __name__ == "__main__":
unittest.main()