Enhance Telegram message formatting: add detailed guidelines for MarkdownV2 usage, including support for strikethrough, headings, and lists. Implement smart escaping for Markdown to preserve formatting while avoiding API errors.

2026-06-28 11:12:00 +08:00 · 2025-11-17 13:49:56 +08:00
parent 6a492198a8
commit 6e329b17a9
2 changed files with 99 additions and 3 deletions
--- a/app/agent/prompt/init.py
+++ b/app/agent/prompt/init.py
@@ -68,12 +68,28 @@ class PromptManager:
        if "telegram" in channel_lower:
            return """Messages are being sent through the **Telegram** channel. You must follow these format requirements:

+**Supported Formatting:**
 - **Bold text**: Use `*text*` (single asterisk, not double asterisks)
 - **Italic text**: Use `_text_` (underscore)
 - **Code**: Use `` `text` `` (backtick)
 - **Links**: Use `[text](url)` format
- **Important**: Avoid using special characters that need escaping in MarkdownV2: `_*[]()~`>#+-=|{}.!` unless they are part of the formatting syntax
- **Best practice**: Keep formatting simple, avoid nested formatting to ensure proper rendering in Telegram"""
+- **Strikethrough**: Use `~text~` (tilde)
+
+**IMPORTANT - Headings and Lists:**
+- **DO NOT use heading syntax** (`#`, `##`, `###`) - Telegram MarkdownV2 does NOT support it
+- **Instead, use bold text for headings**: `*Heading Text*` followed by a blank line
+- **DO NOT use list syntax** (`-`, `*`, `+` at line start) - these will be escaped and won't display as lists
+- **For lists**, use plain text with line breaks, or use bold for list item labels: `*Item 1:* description`
+
+**Examples:**
+- ❌ Wrong heading: `# Main Title` or `## Subtitle`
+- ✅ Correct heading: `*Main Title*` (followed by blank line) or `*Subtitle*` (followed by blank line)
+- ❌ Wrong list: `- Item 1` or `* Item 2`
+- ✅ Correct list format: `*Item 1:* description` or use plain text with line breaks
+
+**Special Characters:**
+- Avoid using special characters that need escaping in MarkdownV2: `_*[]()~`>#+-=|{}.!` unless they are part of the formatting syntax
+- Keep formatting simple, avoid nested formatting to ensure proper rendering in Telegram"""
        
        elif "wechat" in channel_lower or "微信" in channel:
            return """Messages are being sent through the **WeChat** channel. Please follow these format requirements:
--- a/app/modules/telegram/telegram.py
+++ b/app/modules/telegram/telegram.py
@@ -240,10 +240,15 @@ class Telegram:

        try:
            if title:
+                # 标题总是转义（因为通常标题不包含Markdown格式）
                title = self.escape_markdown(title)
            if text:
                if escape_markdown:
+                    # 完全转义模式：转义所有特殊字符
                    text = self.escape_markdown(text)
+                else:
+                    # 智能转义模式：保留Markdown格式，只转义普通文本中的特殊字符
+                    text = self.escape_markdown_smart(text)
                if title:
                    caption = f"*{title}*\n{text}"
                else:
@@ -610,4 +615,79 @@ class Telegram:
        # 按 Telegram MarkdownV2 规则转义特殊字符
        if not isinstance(text, str):
            return str(text) if text is not None else ""
-        return self._markdown_escape_pattern.sub(r'\\\1', text)
+        return self._markdown_escape_pattern.sub(r'\\\1', text)
+
+    def escape_markdown_smart(self, text: str) -> str:
+        """
+        智能转义Markdown文本：只转义不在Markdown标记内的特殊字符
+        这样可以保留已有的Markdown格式（如*粗体*、_斜体_、[链接](url)等），
+        同时转义普通文本中的特殊字符以避免API错误
+        
+        注意：Telegram MarkdownV2不支持以下语法，这些字符会被转义：
+        - 标题语法（#、##、###）会被转义为 \#、\##、\###
+        - 列表语法（-、*、+）会被转义为 \-、\*、\+
+        - 引用语法（>）会被转义为 \>
+        
+        建议使用加粗文本模拟标题：*标题文本*
+        
+        :param text: 要转义的文本
+        :return: 转义后的文本
+        """
+        if not isinstance(text, str):
+            return str(text) if text is not None else ""
+        
+        # 如果没有特殊字符，直接返回
+        if not any(char in self._escape_chars for char in text):
+            return text
+        
+        # 标记受保护的区域（Markdown标记内的内容不转义）
+        protected = [False] * len(text)
+        
+        # 按优先级匹配Markdown标记（从最复杂到最简单）
+        # 1. 链接：[text](url) - 必须最先匹配
+        link_pattern = r'\[([^\]]*)\]\(([^)]*)\)'
+        for match in re.finditer(link_pattern, text):
+            for i in range(match.start(), match.end()):
+                protected[i] = True
+        
+        # 2. 粗体：*text*（单个*，不是**）
+        bold_pattern = r'(?<!\*)\*(?!\*)([^*]+?)(?<!\*)\*(?!\*)'
+        for match in re.finditer(bold_pattern, text):
+            if not any(protected[match.start():match.end()]):
+                for i in range(match.start(), match.end()):
+                    protected[i] = True
+        
+        # 3. 斜体：_text_（单个_，不是__）
+        italic_pattern = r'(?<!_)_(?!_)([^_]+?)(?<!_)_(?!_)'
+        for match in re.finditer(italic_pattern, text):
+            if not any(protected[match.start():match.end()]):
+                for i in range(match.start(), match.end()):
+                    protected[i] = True
+        
+        # 4. 代码：`text`
+        code_pattern = r'`([^`]+)`'
+        for match in re.finditer(code_pattern, text):
+            if not any(protected[match.start():match.end()]):
+                for i in range(match.start(), match.end()):
+                    protected[i] = True
+        
+        # 5. 删除线：~text~
+        strikethrough_pattern = r'~([^~]+)~'
+        for match in re.finditer(strikethrough_pattern, text):
+            if not any(protected[match.start():match.end()]):
+                for i in range(match.start(), match.end()):
+                    protected[i] = True
+        
+        # 构建结果：只转义未保护区域的特殊字符
+        result = []
+        for i, char in enumerate(text):
+            if protected[i]:
+                # 受保护区域（Markdown标记内），不转义
+                result.append(char)
+            elif char in self._escape_chars:
+                # 未保护区域，转义特殊字符
+                result.append('\\' + char)
+            else:
+                result.append(char)
+        
+        return ''.join(result)