Enhance Telegram message formatting: add detailed guidelines for MarkdownV2 usage, including support for strikethrough, headings, and lists. Implement smart escaping for Markdown to preserve formatting while avoiding API errors.

This commit is contained in:
jxxghp
2025-11-17 13:49:56 +08:00
parent 6a492198a8
commit 6e329b17a9
2 changed files with 99 additions and 3 deletions

View File

@@ -68,12 +68,28 @@ class PromptManager:
if "telegram" in channel_lower:
return """Messages are being sent through the **Telegram** channel. You must follow these format requirements:
**Supported Formatting:**
- **Bold text**: Use `*text*` (single asterisk, not double asterisks)
- **Italic text**: Use `_text_` (underscore)
- **Code**: Use `` `text` `` (backtick)
- **Links**: Use `[text](url)` format
- **Important**: Avoid using special characters that need escaping in MarkdownV2: `_*[]()~`>#+-=|{}.!` unless they are part of the formatting syntax
- **Best practice**: Keep formatting simple, avoid nested formatting to ensure proper rendering in Telegram"""
- **Strikethrough**: Use `~text~` (tilde)
**IMPORTANT - Headings and Lists:**
- **DO NOT use heading syntax** (`#`, `##`, `###`) - Telegram MarkdownV2 does NOT support it
- **Instead, use bold text for headings**: `*Heading Text*` followed by a blank line
- **DO NOT use list syntax** (`-`, `*`, `+` at line start) - these will be escaped and won't display as lists
- **For lists**, use plain text with line breaks, or use bold for list item labels: `*Item 1:* description`
**Examples:**
- ❌ Wrong heading: `# Main Title` or `## Subtitle`
- ✅ Correct heading: `*Main Title*` (followed by blank line) or `*Subtitle*` (followed by blank line)
- ❌ Wrong list: `- Item 1` or `* Item 2`
- ✅ Correct list format: `*Item 1:* description` or use plain text with line breaks
**Special Characters:**
- Avoid using special characters that need escaping in MarkdownV2: `_*[]()~`>#+-=|{}.!` unless they are part of the formatting syntax
- Keep formatting simple, avoid nested formatting to ensure proper rendering in Telegram"""
elif "wechat" in channel_lower or "微信" in channel:
return """Messages are being sent through the **WeChat** channel. Please follow these format requirements:

View File

@@ -240,10 +240,15 @@ class Telegram:
try:
if title:
# 标题总是转义因为通常标题不包含Markdown格式
title = self.escape_markdown(title)
if text:
if escape_markdown:
# 完全转义模式:转义所有特殊字符
text = self.escape_markdown(text)
else:
# 智能转义模式保留Markdown格式只转义普通文本中的特殊字符
text = self.escape_markdown_smart(text)
if title:
caption = f"*{title}*\n{text}"
else:
@@ -610,4 +615,79 @@ class Telegram:
# 按 Telegram MarkdownV2 规则转义特殊字符
if not isinstance(text, str):
return str(text) if text is not None else ""
return self._markdown_escape_pattern.sub(r'\\\1', text)
return self._markdown_escape_pattern.sub(r'\\\1', text)
def escape_markdown_smart(self, text: str) -> str:
"""
智能转义Markdown文本只转义不在Markdown标记内的特殊字符
这样可以保留已有的Markdown格式如*粗体*、_斜体_、[链接](url)等),
同时转义普通文本中的特殊字符以避免API错误
注意Telegram MarkdownV2不支持以下语法这些字符会被转义
- 标题语法(#、##、###)会被转义为 \#、\##、\###
- 列表语法(-、*、+)会被转义为 \-、\*、\+
- 引用语法(>)会被转义为 \>
建议使用加粗文本模拟标题:*标题文本*
:param text: 要转义的文本
:return: 转义后的文本
"""
if not isinstance(text, str):
return str(text) if text is not None else ""
# 如果没有特殊字符,直接返回
if not any(char in self._escape_chars for char in text):
return text
# 标记受保护的区域Markdown标记内的内容不转义
protected = [False] * len(text)
# 按优先级匹配Markdown标记从最复杂到最简单
# 1. 链接:[text](url) - 必须最先匹配
link_pattern = r'\[([^\]]*)\]\(([^)]*)\)'
for match in re.finditer(link_pattern, text):
for i in range(match.start(), match.end()):
protected[i] = True
# 2. 粗体:*text*(单个*,不是**
bold_pattern = r'(?<!\*)\*(?!\*)([^*]+?)(?<!\*)\*(?!\*)'
for match in re.finditer(bold_pattern, text):
if not any(protected[match.start():match.end()]):
for i in range(match.start(), match.end()):
protected[i] = True
# 3. 斜体_text_单个_不是__
italic_pattern = r'(?<!_)_(?!_)([^_]+?)(?<!_)_(?!_)'
for match in re.finditer(italic_pattern, text):
if not any(protected[match.start():match.end()]):
for i in range(match.start(), match.end()):
protected[i] = True
# 4. 代码:`text`
code_pattern = r'`([^`]+)`'
for match in re.finditer(code_pattern, text):
if not any(protected[match.start():match.end()]):
for i in range(match.start(), match.end()):
protected[i] = True
# 5. 删除线:~text~
strikethrough_pattern = r'~([^~]+)~'
for match in re.finditer(strikethrough_pattern, text):
if not any(protected[match.start():match.end()]):
for i in range(match.start(), match.end()):
protected[i] = True
# 构建结果:只转义未保护区域的特殊字符
result = []
for i, char in enumerate(text):
if protected[i]:
# 受保护区域Markdown标记内不转义
result.append(char)
elif char in self._escape_chars:
# 未保护区域,转义特殊字符
result.append('\\' + char)
else:
result.append(char)
return ''.join(result)