mirror of
https://github.com/jxxghp/MoviePilot.git
synced 2026-05-06 20:42:43 +08:00
fix: default to text replies for voice input
This commit is contained in:
@@ -34,7 +34,6 @@ from app.chain import ChainBase
|
|||||||
from app.core.config import settings
|
from app.core.config import settings
|
||||||
from app.db.transferhistory_oper import TransferHistoryOper
|
from app.db.transferhistory_oper import TransferHistoryOper
|
||||||
from app.helper.llm import LLMHelper
|
from app.helper.llm import LLMHelper
|
||||||
from app.helper.voice import VoiceHelper
|
|
||||||
from app.log import logger
|
from app.log import logger
|
||||||
from app.schemas import Notification, NotificationType
|
from app.schemas import Notification, NotificationType
|
||||||
from app.schemas.message import ChannelCapabilityManager, ChannelCapability
|
from app.schemas.message import ChannelCapabilityManager, ChannelCapability
|
||||||
@@ -170,7 +169,6 @@ class MoviePilotAgent:
|
|||||||
self.channel = channel
|
self.channel = channel
|
||||||
self.source = source
|
self.source = source
|
||||||
self.username = username
|
self.username = username
|
||||||
self.reply_with_voice = False
|
|
||||||
self._tool_context: Dict[str, object] = {}
|
self._tool_context: Dict[str, object] = {}
|
||||||
self.output_callback: Optional[Callable[[str], None]] = None
|
self.output_callback: Optional[Callable[[str], None]] = None
|
||||||
self.force_streaming = False
|
self.force_streaming = False
|
||||||
@@ -280,8 +278,6 @@ class MoviePilotAgent:
|
|||||||
"""
|
"""
|
||||||
if self.is_background:
|
if self.is_background:
|
||||||
return self.force_streaming or callable(self.output_callback)
|
return self.force_streaming or callable(self.output_callback)
|
||||||
if self.reply_with_voice:
|
|
||||||
return False
|
|
||||||
if self.force_streaming or callable(self.output_callback):
|
if self.force_streaming or callable(self.output_callback):
|
||||||
return True
|
return True
|
||||||
# 啰嗦模式下始终需要流式输出来捕获工具调用前的 Agent 文字
|
# 啰嗦模式下始终需要流式输出来捕获工具调用前的 Agent 文字
|
||||||
@@ -379,10 +375,7 @@ class MoviePilotAgent:
|
|||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# 系统提示词
|
# 系统提示词
|
||||||
system_prompt = prompt_manager.get_agent_prompt(
|
system_prompt = prompt_manager.get_agent_prompt(channel=self.channel)
|
||||||
channel=self.channel,
|
|
||||||
prefer_voice_reply=self.reply_with_voice,
|
|
||||||
)
|
|
||||||
|
|
||||||
# LLM 模型(用于 agent 执行)
|
# LLM 模型(用于 agent 执行)
|
||||||
llm = self._initialize_llm(streaming=streaming)
|
llm = self._initialize_llm(streaming=streaming)
|
||||||
@@ -460,7 +453,6 @@ class MoviePilotAgent:
|
|||||||
f"images={len(images) if images else 0}, files={len(files) if files else 0}"
|
f"images={len(images) if images else 0}, files={len(files) if files else 0}"
|
||||||
)
|
)
|
||||||
self._tool_context = {
|
self._tool_context = {
|
||||||
"incoming_voice": self.reply_with_voice,
|
|
||||||
"user_reply_sent": False,
|
"user_reply_sent": False,
|
||||||
"reply_mode": None,
|
"reply_mode": None,
|
||||||
}
|
}
|
||||||
@@ -678,22 +670,6 @@ class MoviePilotAgent:
|
|||||||
"""
|
"""
|
||||||
通过原渠道发送消息给用户
|
通过原渠道发送消息给用户
|
||||||
"""
|
"""
|
||||||
voice_path = None
|
|
||||||
if (
|
|
||||||
self.reply_with_voice
|
|
||||||
and VoiceHelper.resolve_reply_mode(
|
|
||||||
channel=self.channel,
|
|
||||||
source=self.source,
|
|
||||||
)
|
|
||||||
== VoiceHelper.REPLY_MODE_NATIVE
|
|
||||||
and VoiceHelper.is_available("tts")
|
|
||||||
):
|
|
||||||
# 当用户本轮发来语音且 Agent 未主动调用 send_voice_message 时,
|
|
||||||
# 这里补一层自动语音回复兜底,避免最终仍只返回纯文字。
|
|
||||||
voice_file = await asyncio.to_thread(VoiceHelper.synthesize_speech, message)
|
|
||||||
if voice_file:
|
|
||||||
voice_path = str(voice_file)
|
|
||||||
|
|
||||||
await AgentChain().async_post_message(
|
await AgentChain().async_post_message(
|
||||||
Notification(
|
Notification(
|
||||||
channel=self.channel,
|
channel=self.channel,
|
||||||
@@ -703,12 +679,6 @@ class MoviePilotAgent:
|
|||||||
username=self.username,
|
username=self.username,
|
||||||
title=title,
|
title=title,
|
||||||
text=message,
|
text=message,
|
||||||
voice_path=voice_path,
|
|
||||||
voice_caption=(
|
|
||||||
message
|
|
||||||
if voice_path and settings.AI_VOICE_REPLY_WITH_TEXT
|
|
||||||
else None
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -753,7 +723,6 @@ class _MessageTask:
|
|||||||
channel: Optional[str] = None
|
channel: Optional[str] = None
|
||||||
source: Optional[str] = None
|
source: Optional[str] = None
|
||||||
username: Optional[str] = None
|
username: Optional[str] = None
|
||||||
reply_with_voice: bool = False
|
|
||||||
|
|
||||||
|
|
||||||
class AgentManager:
|
class AgentManager:
|
||||||
@@ -851,7 +820,6 @@ class AgentManager:
|
|||||||
channel: str = None,
|
channel: str = None,
|
||||||
source: str = None,
|
source: str = None,
|
||||||
username: str = None,
|
username: str = None,
|
||||||
reply_with_voice: bool = False,
|
|
||||||
) -> str:
|
) -> str:
|
||||||
"""
|
"""
|
||||||
处理用户消息:将消息放入会话队列,按顺序依次处理。
|
处理用户消息:将消息放入会话队列,按顺序依次处理。
|
||||||
@@ -866,7 +834,6 @@ class AgentManager:
|
|||||||
channel=channel,
|
channel=channel,
|
||||||
source=source,
|
source=source,
|
||||||
username=username,
|
username=username,
|
||||||
reply_with_voice=reply_with_voice,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# 获取或创建会话队列
|
# 获取或创建会话队列
|
||||||
@@ -964,7 +931,6 @@ class AgentManager:
|
|||||||
agent.source = task.source
|
agent.source = task.source
|
||||||
if task.username:
|
if task.username:
|
||||||
agent.username = task.username
|
agent.username = task.username
|
||||||
agent.reply_with_voice = task.reply_with_voice
|
|
||||||
|
|
||||||
return await agent.process(task.message, images=task.images, files=task.files)
|
return await agent.process(task.message, images=task.images, files=task.files)
|
||||||
|
|
||||||
|
|||||||
@@ -87,13 +87,10 @@ class PromptManager:
|
|||||||
logger.error(f"加载提示词失败: {prompt_name}, 错误: {e}")
|
logger.error(f"加载提示词失败: {prompt_name}, 错误: {e}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
def get_agent_prompt(
|
def get_agent_prompt(self, channel: str = None) -> str:
|
||||||
self, channel: str = None, prefer_voice_reply: bool = False
|
|
||||||
) -> str:
|
|
||||||
"""
|
"""
|
||||||
获取智能体提示词
|
获取智能体提示词
|
||||||
:param channel: 消息渠道(Telegram、微信、Slack等)
|
:param channel: 消息渠道(Telegram、微信、Slack等)
|
||||||
:param prefer_voice_reply: 是否优先使用语音回复
|
|
||||||
:return: 提示词内容
|
:return: 提示词内容
|
||||||
"""
|
"""
|
||||||
# 基础提示词只保留 MoviePilot 运行时和渠道能力相关约束。
|
# 基础提示词只保留 MoviePilot 运行时和渠道能力相关约束。
|
||||||
@@ -132,9 +129,7 @@ class PromptManager:
|
|||||||
|
|
||||||
# MoviePilot系统信息
|
# MoviePilot系统信息
|
||||||
moviepilot_info = self._get_moviepilot_info()
|
moviepilot_info = self._get_moviepilot_info()
|
||||||
voice_reply_spec = self._generate_voice_reply_instructions(
|
voice_reply_spec = self._generate_voice_reply_instructions()
|
||||||
prefer_voice_reply=prefer_voice_reply
|
|
||||||
)
|
|
||||||
|
|
||||||
# 始终替换占位符,避免后续 .format() 时因残留花括号报 KeyError
|
# 始终替换占位符,避免后续 .format() 时因残留花括号报 KeyError
|
||||||
base_prompt = base_prompt.format(
|
base_prompt = base_prompt.format(
|
||||||
@@ -326,17 +321,11 @@ class PromptManager:
|
|||||||
return "\n".join(instructions)
|
return "\n".join(instructions)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _generate_voice_reply_instructions(prefer_voice_reply: bool) -> str:
|
def _generate_voice_reply_instructions() -> str:
|
||||||
if not prefer_voice_reply:
|
|
||||||
return (
|
|
||||||
"- Voice replies: Use normal text replies by default. "
|
|
||||||
"Only call `send_voice_message` when spoken playback is clearly better than plain text."
|
|
||||||
)
|
|
||||||
return (
|
return (
|
||||||
"- Current message context: The user sent a voice message.\n"
|
"- Voice replies: Use normal text replies by default. "
|
||||||
"- Reply preference: Prioritize calling `send_voice_message` for the main user-facing reply.\n"
|
"Only call `send_voice_message` when the user explicitly asks for a voice reply "
|
||||||
"- Fallback: If native voice is unavailable on the current channel, `send_voice_message` will fall back to text.\n"
|
"or spoken playback is clearly better than plain text."
|
||||||
"- Do not repeat the same full reply again after calling `send_voice_message`."
|
|
||||||
)
|
)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|||||||
@@ -28,9 +28,10 @@ class SendVoiceMessageInput(BaseModel):
|
|||||||
class SendVoiceMessageTool(MoviePilotTool):
|
class SendVoiceMessageTool(MoviePilotTool):
|
||||||
name: str = "send_voice_message"
|
name: str = "send_voice_message"
|
||||||
description: str = (
|
description: str = (
|
||||||
"Send a voice reply to the current user. Prefer this when the user sent a voice message "
|
"Send a voice reply to the current user. Use this only when the user explicitly asks for "
|
||||||
"or when spoken playback is more natural. On channels without voice support or when TTS "
|
"a voice reply or when spoken playback is clearly better than plain text. On channels "
|
||||||
"is unavailable, it automatically falls back to sending the same content as plain text."
|
"without voice support or when TTS is unavailable, it automatically falls back to sending "
|
||||||
|
"the same content as plain text."
|
||||||
)
|
)
|
||||||
args_schema: Type[BaseModel] = SendVoiceMessageInput
|
args_schema: Type[BaseModel] = SendVoiceMessageInput
|
||||||
require_admin: bool = False
|
require_admin: bool = False
|
||||||
|
|||||||
@@ -109,8 +109,8 @@ class MessageChain(ChainBase):
|
|||||||
"""
|
"""
|
||||||
images = CommingMessage.MessageImage.normalize_list(images)
|
images = CommingMessage.MessageImage.normalize_list(images)
|
||||||
|
|
||||||
# 识别语音为文本
|
# 语音输入只用于转写为文本,不默认改变回复形式。
|
||||||
reply_with_voice = bool(audio_refs)
|
has_audio_input = bool(audio_refs)
|
||||||
if audio_refs:
|
if audio_refs:
|
||||||
transcript = self._transcribe_audio_refs(audio_refs, channel, source)
|
transcript = self._transcribe_audio_refs(audio_refs, channel, source)
|
||||||
merged_parts = []
|
merged_parts = []
|
||||||
@@ -198,13 +198,12 @@ class MessageChain(ChainBase):
|
|||||||
username=username,
|
username=username,
|
||||||
images=images,
|
images=images,
|
||||||
files=files,
|
files=files,
|
||||||
reply_with_voice=reply_with_voice,
|
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
if (
|
if (
|
||||||
settings.AI_AGENT_ENABLE
|
settings.AI_AGENT_ENABLE
|
||||||
and (settings.AI_AGENT_GLOBAL or images or files or reply_with_voice)
|
and (settings.AI_AGENT_GLOBAL or images or files or has_audio_input)
|
||||||
):
|
):
|
||||||
self._handle_ai_message(
|
self._handle_ai_message(
|
||||||
text=text,
|
text=text,
|
||||||
@@ -214,7 +213,6 @@ class MessageChain(ChainBase):
|
|||||||
username=username,
|
username=username,
|
||||||
images=images,
|
images=images,
|
||||||
files=files,
|
files=files,
|
||||||
reply_with_voice=reply_with_voice,
|
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -866,7 +864,6 @@ class MessageChain(ChainBase):
|
|||||||
username: str,
|
username: str,
|
||||||
images: Optional[List[CommingMessage.MessageImage]] = None,
|
images: Optional[List[CommingMessage.MessageImage]] = None,
|
||||||
files: Optional[List[CommingMessage.MessageAttachment]] = None,
|
files: Optional[List[CommingMessage.MessageAttachment]] = None,
|
||||||
reply_with_voice: bool = False,
|
|
||||||
session_id: Optional[str] = None,
|
session_id: Optional[str] = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
@@ -978,7 +975,6 @@ class MessageChain(ChainBase):
|
|||||||
channel=channel.value if channel else None,
|
channel=channel.value if channel else None,
|
||||||
source=source,
|
source=source,
|
||||||
username=username,
|
username=username,
|
||||||
reply_with_voice=reply_with_voice,
|
|
||||||
),
|
),
|
||||||
global_vars.loop,
|
global_vars.loop,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ import base64
|
|||||||
import json
|
import json
|
||||||
import tempfile
|
import tempfile
|
||||||
import unittest
|
import unittest
|
||||||
from pathlib import Path
|
|
||||||
from types import SimpleNamespace
|
from types import SimpleNamespace
|
||||||
from unittest.mock import AsyncMock, Mock, patch
|
from unittest.mock import AsyncMock, Mock, patch
|
||||||
from urllib.parse import quote
|
from urllib.parse import quote
|
||||||
@@ -15,7 +14,6 @@ from app.agent import MoviePilotAgent, AgentChain
|
|||||||
from app.chain.message import MessageChain
|
from app.chain.message import MessageChain
|
||||||
from app.core.config import settings
|
from app.core.config import settings
|
||||||
from app.helper.llm import LLMHelper
|
from app.helper.llm import LLMHelper
|
||||||
from app.helper.voice import VoiceHelper
|
|
||||||
from app.modules.discord import DiscordModule
|
from app.modules.discord import DiscordModule
|
||||||
from app.modules.qqbot import QQBotModule
|
from app.modules.qqbot import QQBotModule
|
||||||
from app.modules.slack import SlackModule
|
from app.modules.slack import SlackModule
|
||||||
@@ -218,7 +216,7 @@ class AgentImageSupportTest(unittest.TestCase):
|
|||||||
|
|
||||||
handle_ai_message.assert_called_once()
|
handle_ai_message.assert_called_once()
|
||||||
|
|
||||||
def test_audio_message_routes_to_agent_with_voice_reply_flag(self):
|
def test_audio_message_routes_to_agent_without_forcing_voice_reply(self):
|
||||||
chain = MessageChain()
|
chain = MessageChain()
|
||||||
|
|
||||||
with patch.object(chain, "load_cache", return_value={}), patch.object(
|
with patch.object(chain, "load_cache", return_value={}), patch.object(
|
||||||
@@ -240,7 +238,7 @@ class AgentImageSupportTest(unittest.TestCase):
|
|||||||
|
|
||||||
handle_ai_message.assert_called_once()
|
handle_ai_message.assert_called_once()
|
||||||
self.assertEqual(handle_ai_message.call_args.kwargs["text"], "帮我推荐一部电影")
|
self.assertEqual(handle_ai_message.call_args.kwargs["text"], "帮我推荐一部电影")
|
||||||
self.assertTrue(handle_ai_message.call_args.kwargs["reply_with_voice"])
|
self.assertNotIn("reply_with_voice", handle_ai_message.call_args.kwargs)
|
||||||
|
|
||||||
def test_file_message_routes_to_agent_even_when_global_agent_is_disabled(self):
|
def test_file_message_routes_to_agent_even_when_global_agent_is_disabled(self):
|
||||||
chain = MessageChain()
|
chain = MessageChain()
|
||||||
@@ -322,7 +320,7 @@ class AgentImageSupportTest(unittest.TestCase):
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_agent_send_agent_message_auto_converts_to_voice_when_supported(self):
|
def test_agent_send_agent_message_keeps_default_text_reply(self):
|
||||||
agent = MoviePilotAgent(
|
agent = MoviePilotAgent(
|
||||||
session_id="session-1",
|
session_id="session-1",
|
||||||
user_id="user-1",
|
user_id="user-1",
|
||||||
@@ -330,17 +328,8 @@ class AgentImageSupportTest(unittest.TestCase):
|
|||||||
source="telegram-test",
|
source="telegram-test",
|
||||||
username="tester",
|
username="tester",
|
||||||
)
|
)
|
||||||
agent.reply_with_voice = True
|
|
||||||
|
|
||||||
with patch.object(
|
with patch.object(
|
||||||
VoiceHelper,
|
|
||||||
"resolve_reply_mode",
|
|
||||||
return_value=VoiceHelper.REPLY_MODE_NATIVE,
|
|
||||||
), patch.object(
|
|
||||||
VoiceHelper, "is_available", return_value=True
|
|
||||||
), patch.object(
|
|
||||||
VoiceHelper, "synthesize_speech", return_value=Path("/tmp/reply.opus")
|
|
||||||
), patch.object(
|
|
||||||
AgentChain, "async_post_message", new_callable=AsyncMock
|
AgentChain, "async_post_message", new_callable=AsyncMock
|
||||||
) as async_post_message:
|
) as async_post_message:
|
||||||
import asyncio
|
import asyncio
|
||||||
@@ -348,7 +337,8 @@ class AgentImageSupportTest(unittest.TestCase):
|
|||||||
asyncio.run(agent.send_agent_message("这是语音回复"))
|
asyncio.run(agent.send_agent_message("这是语音回复"))
|
||||||
|
|
||||||
notification = async_post_message.await_args.args[0]
|
notification = async_post_message.await_args.args[0]
|
||||||
self.assertEqual(notification.voice_path, "/tmp/reply.opus")
|
self.assertIsNone(notification.voice_path)
|
||||||
|
self.assertIsNone(notification.voice_caption)
|
||||||
self.assertEqual(notification.text, "这是语音回复")
|
self.assertEqual(notification.text, "这是语音回复")
|
||||||
|
|
||||||
def test_agent_process_wraps_request_as_structured_json(self):
|
def test_agent_process_wraps_request_as_structured_json(self):
|
||||||
|
|||||||
Reference in New Issue
Block a user