Normalize Telegram HTML send_message responses

This commit is contained in:
jxxghp
2026-06-21 11:39:53 +08:00
parent 5c1b303908
commit d483b805d8
3 changed files with 196 additions and 1 deletions

View File

@@ -1,5 +1,7 @@
"""发送消息工具"""
import html as html_utils
import re
from typing import Optional, Type
from pydantic import BaseModel, Field, model_validator
@@ -17,6 +19,41 @@ SEND_MESSAGE_PARSE_MODE_ALIASES = {
"mdv2": SEND_MESSAGE_PARSE_MODE_MARKDOWN,
"html": SEND_MESSAGE_PARSE_MODE_HTML,
}
SEND_MESSAGE_HTML_ALLOWED_TAGS = {
"a",
"b",
"blockquote",
"code",
"del",
"em",
"i",
"ins",
"pre",
"s",
"span",
"strike",
"strong",
"tg-spoiler",
"u",
}
SEND_MESSAGE_HTML_NORMALIZATION_RULES = (
(re.compile(r"<\s*br\s*/?\s*>", re.IGNORECASE), "\n"),
(re.compile(r"<\s*/\s*p\s*>", re.IGNORECASE), "\n"),
(re.compile(r"<\s*p(?:\s+[^>]*)?>", re.IGNORECASE), ""),
(re.compile(r"<\s*/\s*div\s*>", re.IGNORECASE), "\n"),
(re.compile(r"<\s*div(?:\s+[^>]*)?>", re.IGNORECASE), ""),
(re.compile(r"<\s*/\s*li\s*>", re.IGNORECASE), "\n"),
(re.compile(r"<\s*li(?:\s+[^>]*)?>", re.IGNORECASE), ""),
(re.compile(r"<\s*/?\s*(?:ul|ol)(?:\s+[^>]*)?>", re.IGNORECASE), ""),
(re.compile(r"<\s*h[1-6](?:\s+[^>]*)?>", re.IGNORECASE), "<b>"),
(re.compile(r"<\s*/\s*h[1-6]\s*>", re.IGNORECASE), "</b>\n"),
)
SEND_MESSAGE_HTML_TAG_PATTERN = re.compile(
r"<\s*(/?)\s*([a-zA-Z][\w:-]*)\b([^>]*)>"
)
SEND_MESSAGE_HTML_ATTR_PATTERN_TEMPLATE = (
r"""\b{attr_name}\s*=\s*(?:"([^"]*)"|'([^']*)'|([^\s"'>]+))"""
)
class SendMessageInput(BaseModel):
@@ -52,6 +89,8 @@ class SendMessageInput(BaseModel):
if not self.message and not self.title and not self.image_url:
raise ValueError("message、title、image_url 至少需要提供一个")
self.parse_mode = SendMessageTool.normalize_parse_mode(self.parse_mode)
if self.parse_mode == SEND_MESSAGE_PARSE_MODE_HTML:
self.message = SendMessageTool.normalize_html_message(self.message)
return self
@@ -63,13 +102,17 @@ class SendMessageTool(MoviePilotTool):
ToolTag.Write,
ToolTag.Message,
ToolTag.Admin,
ToolTag.TerminalResponse,
]
sends_message: bool = True
return_direct: bool = True
description: str = (
"Send notification message to the user through configured notification channels "
"(Telegram, Slack, WeChat, etc.). Supports optional image_url on channels that can "
"send images. For Telegram, the optional parse_mode parameter controls message body "
"rendering. Supported values are HTML and MarkdownV2; leave it empty for default."
"rendering. Supported values are HTML and MarkdownV2; leave it empty for default. "
"This is a terminal response tool: after it sends the user-facing message, do not "
"send another final text reply with the same content."
)
args_schema: Type[BaseModel] = SendMessageInput
require_admin: bool = True
@@ -86,6 +129,72 @@ class SendMessageTool(MoviePilotTool):
raise ValueError("parse_mode 仅支持 MarkdownV2 或 HTML")
return normalized
@staticmethod
def _extract_html_attr(attrs: str, attr_name: str) -> Optional[str]:
"""
从 HTML 标签属性中提取指定属性值。
"""
pattern = SEND_MESSAGE_HTML_ATTR_PATTERN_TEMPLATE.format(
attr_name=re.escape(attr_name)
)
match = re.search(pattern, attrs or "", re.IGNORECASE)
if not match:
return None
return next((value for value in match.groups() if value is not None), None)
@staticmethod
def _normalize_html_tag(match: re.Match) -> str:
"""
规范化 Telegram 支持的 HTML 标签,并剥离不支持的属性。
"""
closing, tag_name, attrs = match.groups()
tag_name = tag_name.lower()
if tag_name not in SEND_MESSAGE_HTML_ALLOWED_TAGS:
raise ValueError(f"HTML 标签 <{tag_name}> 不受 Telegram 支持")
if closing:
return f"</{tag_name}>"
if tag_name == "a":
href = SendMessageTool._extract_html_attr(attrs, "href")
if not href:
raise ValueError("HTML 标签 <a> 必须包含 href 属性")
return f'<a href="{html_utils.escape(href, quote=True)}">'
if tag_name == "span":
class_name = SendMessageTool._extract_html_attr(attrs, "class")
if class_name != "tg-spoiler":
raise ValueError('HTML 标签 <span> 仅支持 class="tg-spoiler"')
return '<span class="tg-spoiler">'
if tag_name == "blockquote":
if re.search(r"(^|\s)expandable(\s|/|$)", attrs or "", re.IGNORECASE):
return "<blockquote expandable>"
return "<blockquote>"
if tag_name == "code":
class_name = SendMessageTool._extract_html_attr(attrs, "class")
if class_name and class_name.startswith("language-"):
escaped_class = html_utils.escape(class_name, quote=True)
return f'<code class="{escaped_class}">'
return "<code>"
return f"<{tag_name}>"
@staticmethod
def normalize_html_message(message: Optional[str]) -> Optional[str]:
"""
规范化 Agent 生成的 Telegram HTML 正文。
"""
if not message:
return message
normalized = message
for pattern, replacement in SEND_MESSAGE_HTML_NORMALIZATION_RULES:
normalized = pattern.sub(replacement, normalized)
return SEND_MESSAGE_HTML_TAG_PATTERN.sub(
SendMessageTool._normalize_html_tag, normalized
)
def get_tool_message(self, **kwargs) -> Optional[str]:
"""根据消息参数生成友好的提示消息"""
message = kwargs.get("message", "") or ""
@@ -117,6 +226,8 @@ class SendMessageTool(MoviePilotTool):
text = message or ""
try:
parse_mode = self.normalize_parse_mode(parse_mode)
if parse_mode == SEND_MESSAGE_PARSE_MODE_HTML:
text = self.normalize_html_message(text) or ""
except ValueError as e:
return str(e)
@@ -138,6 +249,8 @@ class SendMessageTool(MoviePilotTool):
parse_mode=parse_mode,
)
)
self._agent_context["user_reply_sent"] = True
self._agent_context["reply_mode"] = "send_message"
return "消息已发送"
except Exception as e:
logger.error(f"发送消息失败: {e}")

View File

@@ -578,6 +578,28 @@ class AgentImageSupportTest(unittest.TestCase):
self.assertEqual(payload.parse_mode, "HTML")
def test_send_message_input_normalizes_common_html_tags(self):
payload = SendMessageInput(
explanation="send html notice",
message="<h1>标题</h1><p>第一行<br>第二行</p><ul><li>A</li></ul>",
parse_mode="HTML",
)
self.assertEqual(
payload.message,
"<b>标题</b>\n第一行\n第二行\n• A\n",
)
def test_send_message_input_rejects_unsupported_html_tags(self):
with self.assertRaises(ValueError) as error:
SendMessageInput(
explanation="send html notice",
message="<table><tr><td>A</td></tr></table>",
parse_mode="HTML",
)
self.assertIn("HTML 标签 <table> 不受 Telegram 支持", str(error.exception))
def test_send_message_tool_uses_regular_notification_type(self):
"""发送消息工具应按普通通知消息登记。"""
@@ -613,6 +635,58 @@ class AgentImageSupportTest(unittest.TestCase):
self.assertEqual(notification.image, "https://example.com/poster.png")
self.assertEqual(notification.parse_mode, "HTML")
def test_send_message_tool_marks_reply_sent_after_dispatch(self):
"""发送消息工具成功发送后应终止本轮回复。"""
async def _run():
tool = SendMessageTool(session_id="session-1", user_id="10001")
agent_context = {}
tool.set_agent_context(agent_context)
tool.set_message_attr(
channel=MessageChannel.Telegram.value,
source="telegram-test",
username="tester",
)
with patch(
"app.agent.tools.base.ToolChain.async_post_message",
new_callable=AsyncMock,
):
result = await tool.run(message="<b>处理完成</b>", parse_mode="HTML")
return result, agent_context
result, agent_context = asyncio.run(_run())
self.assertEqual(result, "消息已发送")
self.assertTrue(agent_context["user_reply_sent"])
self.assertEqual(agent_context["reply_mode"], "send_message")
def test_send_message_tool_rejects_unsupported_html_before_dispatch(self):
"""发送消息工具应在进入消息链路前拒绝不支持的 HTML。"""
async def _run():
tool = SendMessageTool(session_id="session-1", user_id="10001")
tool.set_message_attr(
channel=MessageChannel.Telegram.value,
source="telegram-test",
username="tester",
)
with patch(
"app.agent.tools.base.ToolChain.async_post_message",
new_callable=AsyncMock,
) as async_post_message:
result = await tool.run(
message="<table><tr><td>A</td></tr></table>",
parse_mode="HTML",
)
return result, async_post_message
result, async_post_message = asyncio.run(_run())
self.assertIn("HTML 标签 <table> 不受 Telegram 支持", result)
async_post_message.assert_not_awaited()
def test_send_message_tool_rejects_invalid_parse_mode(self):
"""发送消息工具应拒绝不支持的格式类型。"""

View File

@@ -8,6 +8,7 @@ from app.agent.tools.impl.ask_user_choice import (
AskUserChoiceTool,
UserChoiceOptionInput,
)
from app.agent.tools.impl.send_message import SendMessageTool
from app.helper.interaction import (
AgentInteractionOption,
agent_interaction_manager,
@@ -89,6 +90,13 @@ class TestAgentInteraction(unittest.TestCase):
self.assertTrue(tool.return_direct)
self.assertIn("terminal interaction tool", tool.description)
def test_send_message_tool_returns_direct_after_sending_message(self):
"""发送消息工具发出用户可见消息后应结束当前 Agent 轮次。"""
tool = SendMessageTool(session_id="session-1", user_id="10001")
self.assertTrue(tool.return_direct)
self.assertIn("terminal response tool", tool.description)
def test_choice_tool_sends_buttons_and_registers_pending_request(self):
tool = AskUserChoiceTool(session_id="session-1", user_id="10001")
tool.set_message_attr(