feat(agent): add audio message extraction and download support for Slack, QQ, Discord, SynologyChat, and VoceChat

This commit is contained in:
jxxghp
2026-04-13 08:36:57 +08:00
parent 8d938c2273
commit e09f9ad009
7 changed files with 601 additions and 15 deletions

View File

@@ -1,4 +1,5 @@
import json
from urllib.parse import quote, unquote
from typing import Optional, Union, List, Tuple, Any
from app.core.context import MediaInfo, Context
@@ -6,6 +7,7 @@ from app.log import logger
from app.modules import _ModuleBase, _MessageBase
from app.schemas import MessageChannel, CommingMessage, Notification, MessageResponse
from app.schemas.types import ModuleType
from app.utils.http import RequestUtils
try:
from app.modules.discord.discord import Discord
@@ -25,6 +27,20 @@ class DiscordModule(_ModuleBase, _MessageBase[Discord]):
".tiff",
".svg",
)
_AUDIO_SUFFIXES = (
".mp3",
".m4a",
".wav",
".ogg",
".oga",
".opus",
".aac",
".amr",
".flac",
".mpga",
".mpeg",
".webm",
)
def init_module(self) -> None:
"""
@@ -142,10 +158,12 @@ class DiscordModule(_ModuleBase, _MessageBase[Discord]):
text = msg_json.get("text")
chat_id = msg_json.get("chat_id")
images = self._extract_images(msg_json)
if (text or images) and userid:
audio_refs = self._extract_audio_refs(msg_json)
if (text or images or audio_refs) and userid:
logger.info(
f"收到来自 {client_config.name} 的 Discord 消息:"
f"userid={userid}, username={username}, text={text}, images={len(images) if images else 0}"
f"userid={userid}, username={username}, text={text}, "
f"images={len(images) if images else 0}, audios={len(audio_refs) if audio_refs else 0}"
)
return CommingMessage(
channel=MessageChannel.Discord,
@@ -155,6 +173,7 @@ class DiscordModule(_ModuleBase, _MessageBase[Discord]):
text=text,
chat_id=str(chat_id) if chat_id else None,
images=images,
audio_refs=audio_refs,
)
return None
@@ -181,6 +200,39 @@ class DiscordModule(_ModuleBase, _MessageBase[Discord]):
images.append(url)
return images if images else None
@classmethod
def _extract_audio_refs(cls, msg_json: dict) -> Optional[List[str]]:
"""
从Discord消息中提取音频URL
"""
attachments = msg_json.get("attachments", [])
if not attachments:
return None
audio_refs = []
for attachment in attachments:
url = attachment.get("url") or attachment.get("proxy_url")
if not url:
continue
content_type = (attachment.get("content_type") or "").lower()
filename = (attachment.get("filename") or "").lower()
if content_type.startswith("audio/") or filename.endswith(cls._AUDIO_SUFFIXES):
audio_refs.append(f"discord://file/{quote(url, safe='')}")
return audio_refs if audio_refs else None
def download_discord_file_bytes(self, file_ref: str, source: str) -> Optional[bytes]:
"""
下载Discord附件并返回原始字节
"""
if not file_ref or not file_ref.startswith("discord://file/"):
return None
if not self.get_config(source):
return None
file_url = unquote(file_ref.replace("discord://file/", "", 1))
resp = RequestUtils(timeout=30).get_res(file_url)
if resp and resp.content:
return resp.content
return None
def post_message(self, message: Notification, **kwargs) -> None:
"""
发送通知消息

View File

@@ -5,6 +5,7 @@ QQ Bot 通知模块
"""
import json
from urllib.parse import quote, unquote
from typing import Optional, List, Tuple, Union, Any
from app.core.context import MediaInfo, Context
@@ -13,6 +14,7 @@ from app.modules import _ModuleBase, _MessageBase
from app.modules.qqbot.qqbot import QQBot
from app.schemas import CommingMessage, MessageChannel, Notification
from app.schemas.types import ModuleType
from app.utils.http import RequestUtils
class QQBotModule(_ModuleBase, _MessageBase[QQBot]):
@@ -28,6 +30,20 @@ class QQBotModule(_ModuleBase, _MessageBase[QQBot]):
".tiff",
".svg",
)
_AUDIO_SUFFIXES = (
".mp3",
".m4a",
".wav",
".ogg",
".oga",
".opus",
".aac",
".amr",
".flac",
".mpga",
".mpeg",
".webm",
)
def init_module(self) -> None:
self.stop()
@@ -90,7 +106,8 @@ class QQBotModule(_ModuleBase, _MessageBase[QQBot]):
msg_type = msg_body.get("type")
content = (msg_body.get("content") or "").strip()
images = self._extract_images(msg_body)
if not content and not images:
audio_refs = self._extract_audio_refs(msg_body)
if not content and not images and not audio_refs:
return None
if msg_type == "C2C_MESSAGE_CREATE":
@@ -100,7 +117,8 @@ class QQBotModule(_ModuleBase, _MessageBase[QQBot]):
return None
logger.info(
f"收到 QQ 私聊消息: userid={user_openid}, "
f"text={(content or '')[:50]}..., images={len(images) if images else 0}"
f"text={(content or '')[:50]}..., images={len(images) if images else 0}, "
f"audios={len(audio_refs) if audio_refs else 0}"
)
return CommingMessage(
channel=MessageChannel.QQ,
@@ -109,6 +127,7 @@ class QQBotModule(_ModuleBase, _MessageBase[QQBot]):
username=user_openid,
text=content,
images=images,
audio_refs=audio_refs,
)
elif msg_type == "GROUP_AT_MESSAGE_CREATE":
author = msg_body.get("author", {})
@@ -118,7 +137,8 @@ class QQBotModule(_ModuleBase, _MessageBase[QQBot]):
userid = f"group:{group_openid}" if group_openid else member_openid
logger.info(
f"收到 QQ 群消息: group={group_openid}, userid={member_openid}, "
f"text={(content or '')[:50]}..., images={len(images) if images else 0}"
f"text={(content or '')[:50]}..., images={len(images) if images else 0}, "
f"audios={len(audio_refs) if audio_refs else 0}"
)
return CommingMessage(
channel=MessageChannel.QQ,
@@ -127,6 +147,7 @@ class QQBotModule(_ModuleBase, _MessageBase[QQBot]):
username=member_openid or group_openid,
text=content,
images=images,
audio_refs=audio_refs,
)
return None
@@ -175,6 +196,50 @@ class QQBotModule(_ModuleBase, _MessageBase[QQBot]):
deduped.append(image)
return deduped or None
@classmethod
def _extract_audio_refs(cls, msg_body: dict) -> Optional[List[str]]:
audio_refs: List[str] = []
attachments = msg_body.get("attachments") or []
if isinstance(attachments, list):
for attachment in attachments:
if not isinstance(attachment, dict):
continue
url = attachment.get("url") or attachment.get("proxy_url")
if not url:
continue
content_type = (
attachment.get("content_type")
or attachment.get("mime_type")
or ""
).lower()
filename = (
attachment.get("filename")
or attachment.get("name")
or ""
).lower()
if content_type.startswith("audio/") or filename.endswith(cls._AUDIO_SUFFIXES):
audio_refs.append(f"qq://file/{quote(url, safe='')}")
deduped = []
for audio_ref in audio_refs:
if audio_ref not in deduped:
deduped.append(audio_ref)
return deduped or None
def download_qq_file_bytes(self, file_ref: str, source: str) -> Optional[bytes]:
"""
下载QQ音频附件并返回原始字节
"""
if not file_ref or not file_ref.startswith("qq://file/"):
return None
if not self.get_config(source):
return None
file_url = unquote(file_ref.replace("qq://file/", "", 1))
resp = RequestUtils(timeout=30).get_res(file_url)
if resp and resp.content:
return resp.content
return None
def post_message(self, message: Notification, **kwargs) -> None:
for conf in self.get_configs().values():
if not self.check_message(message, conf.name):

View File

@@ -1,5 +1,6 @@
import json
import re
from urllib.parse import quote, unquote
from typing import Optional, Union, List, Tuple, Any
from app.core.context import MediaInfo, Context
@@ -11,6 +12,21 @@ from app.schemas.types import ModuleType
class SlackModule(_ModuleBase, _MessageBase[Slack]):
_AUDIO_SUFFIXES = (
".mp3",
".m4a",
".wav",
".ogg",
".oga",
".opus",
".aac",
".amr",
".flac",
".mpga",
".mpeg",
".webm",
)
def init_module(self) -> None:
"""
初始化模块
@@ -204,11 +220,13 @@ class SlackModule(_ModuleBase, _MessageBase[Slack]):
return None
if msg_json:
images = None
audio_refs = None
if msg_json.get("type") == "message":
userid = msg_json.get("user")
text = msg_json.get("text")
username = msg_json.get("user")
images = self._extract_images(msg_json)
audio_refs = self._extract_audio_refs(msg_json)
elif msg_json.get("type") == "block_actions":
userid = msg_json.get("user", {}).get("id")
callback_data = msg_json.get("actions")[0].get("value")
@@ -251,6 +269,7 @@ class SlackModule(_ModuleBase, _MessageBase[Slack]):
).strip()
username = ""
images = self._extract_images(msg_json.get("event", {}))
audio_refs = self._extract_audio_refs(msg_json.get("event", {}))
elif msg_json.get("type") == "shortcut":
userid = msg_json.get("user", {}).get("id")
text = msg_json.get("callback_id")
@@ -262,7 +281,8 @@ class SlackModule(_ModuleBase, _MessageBase[Slack]):
else:
return None
logger.info(
f"收到来自 {client_config.name} 的Slack消息userid={userid}, username={username}, text={text}, images={len(images) if images else 0}"
f"收到来自 {client_config.name} 的Slack消息userid={userid}, username={username}, "
f"text={text}, images={len(images) if images else 0}, audios={len(audio_refs) if audio_refs else 0}"
)
return CommingMessage(
channel=MessageChannel.Slack,
@@ -271,6 +291,7 @@ class SlackModule(_ModuleBase, _MessageBase[Slack]):
username=username,
text=text,
images=images,
audio_refs=audio_refs,
)
return None
@@ -297,6 +318,29 @@ class SlackModule(_ModuleBase, _MessageBase[Slack]):
images.append(url)
return images if images else None
@classmethod
def _extract_audio_refs(cls, msg_json: dict) -> Optional[List[str]]:
"""
从Slack消息中提取音频文件引用
"""
files = msg_json.get("files", [])
if not files:
return None
audio_refs = []
for file in files:
file_type = str(file.get("type", "")).lower()
file_ext = f".{str(file.get('filetype', '')).lower().lstrip('.')}"
mime_type = str(file.get("mimetype", "")).lower()
if (
file_type == "audio"
or mime_type.startswith("audio/")
or file_ext in cls._AUDIO_SUFFIXES
):
url = file.get("url_private_download") or file.get("url_private")
if url:
audio_refs.append(f"slack://file/{quote(url, safe='')}")
return audio_refs if audio_refs else None
def download_slack_file_to_data_url(self, file_url: str, source: str) -> Optional[str]:
"""
下载Slack文件并转为data URL
@@ -318,6 +362,25 @@ class SlackModule(_ModuleBase, _MessageBase[Slack]):
return f"data:{mime_type};base64,{base64.b64encode(content).decode()}"
return None
def download_slack_file_bytes(self, file_ref: str, source: str) -> Optional[bytes]:
"""
下载Slack音频文件并返回原始字节
"""
if not file_ref or not file_ref.startswith("slack://file/"):
return None
config = self.get_config(source)
if not config:
return None
client = self.get_instance(config.name)
if not client:
return None
file_url = unquote(file_ref.replace("slack://file/", "", 1))
file_data = client.download_file(file_url)
if file_data:
content, _ = file_data
return content
return None
def post_message(self, message: Notification, **kwargs) -> None:
"""
发送消息

View File

@@ -1,5 +1,6 @@
from typing import Optional, Union, List, Tuple, Any
import json
from typing import Optional, Union, List, Tuple, Any
from urllib.parse import quote, unquote
from app.core.context import MediaInfo, Context
from app.log import logger
@@ -7,6 +8,7 @@ from app.modules import _ModuleBase, _MessageBase
from app.modules.synologychat.synologychat import SynologyChat
from app.schemas import MessageChannel, CommingMessage, Notification
from app.schemas.types import ModuleType
from app.utils.http import RequestUtils
class SynologyChatModule(_ModuleBase, _MessageBase[SynologyChat]):
@@ -20,6 +22,20 @@ class SynologyChatModule(_ModuleBase, _MessageBase[SynologyChat]):
".tiff",
".svg",
)
_AUDIO_SUFFIXES = (
".mp3",
".m4a",
".wav",
".ogg",
".oga",
".opus",
".aac",
".amr",
".flac",
".mpga",
".mpeg",
".webm",
)
def init_module(self) -> None:
"""
@@ -108,14 +124,16 @@ class SynologyChatModule(_ModuleBase, _MessageBase[SynologyChat]):
# 获取用户名
user_name = message.get("username")
images = self._extract_images(message)
if (text or images) and user_id:
audio_refs = self._extract_audio_refs(message)
if (text or images or audio_refs) and user_id:
logger.info(
f"收到来自 {client_config.name} 的SynologyChat消息"
f"userid={user_id}, username={user_name}, text={text}, images={len(images) if images else 0}"
f"userid={user_id}, username={user_name}, text={text}, "
f"images={len(images) if images else 0}, audios={len(audio_refs) if audio_refs else 0}"
)
return CommingMessage(channel=MessageChannel.SynologyChat, source=client_config.name,
userid=user_id, username=user_name, text=text or "",
images=images)
images=images, audio_refs=audio_refs)
except Exception as err:
logger.debug(f"解析SynologyChat消息失败{str(err)}")
return None
@@ -151,6 +169,49 @@ class SynologyChatModule(_ModuleBase, _MessageBase[SynologyChat]):
deduped.append(image)
return deduped or None
@classmethod
def _extract_audio_refs(cls, message: dict) -> Optional[List[str]]:
audio_refs = []
for key in ("audio_url", "voice_url", "file_url"):
value = message.get(key)
if isinstance(value, str) and cls._looks_like_audio(value):
audio_refs.append(f"synology://file/{quote(value, safe='')}")
for key in ("attachments", "files"):
raw_value = message.get(key)
if not raw_value:
continue
try:
parsed = json.loads(raw_value) if isinstance(raw_value, str) else raw_value
except Exception:
parsed = raw_value
items = parsed if isinstance(parsed, list) else [parsed]
for item in items:
if isinstance(item, str) and cls._looks_like_audio(item):
audio_refs.append(f"synology://file/{quote(item, safe='')}")
elif isinstance(item, dict):
url = item.get("url") or item.get("file_url") or item.get("audio_url")
if not isinstance(url, str):
continue
content_type = (
item.get("content_type")
or item.get("mime_type")
or ""
).lower()
name = (
item.get("name")
or item.get("filename")
or ""
).lower()
if content_type.startswith("audio/") or cls._looks_like_audio(url) or name.endswith(cls._AUDIO_SUFFIXES):
audio_refs.append(f"synology://file/{quote(url, safe='')}")
deduped = []
for audio_ref in audio_refs:
if audio_ref not in deduped:
deduped.append(audio_ref)
return deduped or None
@classmethod
def _looks_like_image(cls, value: str) -> bool:
if not value or not isinstance(value, str):
@@ -160,6 +221,29 @@ class SynologyChatModule(_ModuleBase, _MessageBase[SynologyChat]):
suffix in lowered for suffix in cls._IMAGE_SUFFIXES
)
@classmethod
def _looks_like_audio(cls, value: str) -> bool:
if not value or not isinstance(value, str):
return False
lowered = value.lower()
return lowered.startswith("http") and any(
suffix in lowered for suffix in cls._AUDIO_SUFFIXES
)
def download_synologychat_file_bytes(self, file_ref: str, source: str) -> Optional[bytes]:
"""
下载 Synology Chat 音频文件并返回原始字节
"""
if not file_ref or not file_ref.startswith("synology://file/"):
return None
if not self.get_config(source):
return None
file_url = unquote(file_ref.replace("synology://file/", "", 1))
resp = RequestUtils(timeout=30).get_res(file_url)
if resp and resp.content:
return resp.content
return None
def post_message(self, message: Notification, **kwargs) -> None:
"""
发送消息

View File

@@ -21,6 +21,20 @@ class VoceChatModule(_ModuleBase, _MessageBase[VoceChat]):
".tiff",
".svg",
)
_AUDIO_SUFFIXES = (
".mp3",
".m4a",
".wav",
".ogg",
".oga",
".opus",
".aac",
".amr",
".flac",
".mpga",
".mpeg",
".webm",
)
def init_module(self) -> None:
"""
@@ -118,6 +132,7 @@ class VoceChatModule(_ModuleBase, _MessageBase[VoceChat]):
content_type = detail.get("content_type") or ""
content = detail.get("content")
images = self._extract_images(detail)
audio_refs = self._extract_audio_refs(detail)
text = None
if content_type in ("text/plain", "text/markdown") and isinstance(content, str):
text = content
@@ -132,14 +147,15 @@ class VoceChatModule(_ModuleBase, _MessageBase[VoceChat]):
userid = f"UID#{msg_body.get('from_uid')}"
# 处理消息内容
if (text or images) and userid:
if (text or images or audio_refs) and userid:
logger.info(
f"收到来自 {client_config.name} 的VoceChat消息"
f"userid={userid}, text={text}, images={len(images) if images else 0}"
f"userid={userid}, text={text}, images={len(images) if images else 0}, "
f"audios={len(audio_refs) if audio_refs else 0}"
)
return CommingMessage(channel=MessageChannel.VoceChat, source=client_config.name,
userid=userid, username=userid, text=text or "",
images=images)
images=images, audio_refs=audio_refs)
except Exception as err:
logger.error(f"VoceChat消息处理发生错误{str(err)}")
return None
@@ -182,6 +198,37 @@ class VoceChatModule(_ModuleBase, _MessageBase[VoceChat]):
return [f"vocechat://file/{quote(file_path, safe='')}"]
return None
@classmethod
def _extract_audio_refs(cls, detail: dict) -> Optional[List[str]]:
content_type = detail.get("content_type") or ""
if content_type != "vocechat/file":
return None
properties = detail.get("properties") or {}
mime_type = (
properties.get("content_type")
or properties.get("mime_type")
or properties.get("contentType")
or ""
).lower()
file_path = (
properties.get("path")
or properties.get("file_path")
or properties.get("storage_path")
or detail.get("content")
)
file_name = (
properties.get("name")
or properties.get("filename")
or (str(file_path).rsplit("/", 1)[-1] if file_path else "")
).lower()
is_audio = mime_type.startswith("audio/") or file_name.endswith(cls._AUDIO_SUFFIXES)
if not is_audio:
return None
if isinstance(file_path, str) and file_path:
return [f"vocechat://file/{quote(file_path, safe='')}"]
return None
def post_message(self, message: Notification, **kwargs) -> None:
"""
发送消息
@@ -255,3 +302,22 @@ class VoceChatModule(_ModuleBase, _MessageBase[VoceChat]):
return None
file_path = unquote(image_ref.replace("vocechat://file/", "", 1))
return client.download_file_to_data_url(file_path)
def download_vocechat_file_bytes(self, file_ref: str, source: str) -> Optional[bytes]:
"""
下载 VoceChat 文件并返回原始字节
"""
if not file_ref or not file_ref.startswith("vocechat://file/"):
return None
client_config = self.get_config(source)
if not client_config:
return None
client: VoceChat = self.get_instance(client_config.name)
if not client:
return None
file_path = unquote(file_ref.replace("vocechat://file/", "", 1))
file_data = client.download_file(file_path)
if file_data:
content, _ = file_data
return content
return None