refactor: clarify attachment data url handling

This commit is contained in:
jxxghp
2026-04-29 18:51:39 +08:00
parent 7299733960
commit 9ed5018cc2

View File

@@ -910,11 +910,13 @@ class MessageChain(ChainBase):
session_id = session_id or self._get_or_create_session_id(userid) session_id = session_id or self._get_or_create_session_id(userid)
self._bind_session_id(userid, session_id) self._bind_session_id(userid, session_id)
# 下载图片并转为base64 # 将可直接输入给 LLM 的附件统一转换为 data URL
original_images = images original_images = images
all_files = list(files or []) all_files = list(files or [])
if images and LLMHelper.supports_image_input(): if images and LLMHelper.supports_image_input():
images = self._download_images_to_base64(images, channel, source) images = self._download_attachments_to_data_urls(
images, channel, source
)
if original_images and not images and not user_message and not files: if original_images and not images and not user_message and not files:
self.post_message( self.post_message(
Notification( Notification(
@@ -922,7 +924,7 @@ class MessageChain(ChainBase):
source=source, source=source,
userid=userid, userid=userid,
username=username, username=username,
title="图片读取失败,请稍后重试", title="附件读取失败,请稍后重试",
) )
) )
return return
@@ -940,7 +942,7 @@ class MessageChain(ChainBase):
source=source, source=source,
userid=userid, userid=userid,
username=username, username=username,
title="图片读取失败,请稍后重试", title="附件读取失败,请稍后重试",
) )
) )
return return
@@ -1120,72 +1122,94 @@ class MessageChain(ChainBase):
return match.group(1) return match.group(1)
return default return default
def _download_images_to_base64( def _download_attachments_to_data_urls(
self, self,
images: List[CommingMessage.MessageImage], attachments: List[CommingMessage.MessageImage],
channel: MessageChannel, channel: MessageChannel,
source: str, source: str,
) -> List[str]: ) -> Optional[List[str]]:
""" """
下载图片并转为base64 下载可直接提供给 LLM 的附件内容,并统一转换为 data URL。
""" """
images = CommingMessage.MessageImage.normalize_list(images) attachments = CommingMessage.MessageImage.normalize_list(attachments)
if not images: if not attachments:
return None return None
base64_images = [] data_urls = []
for image in images: for attachment in attachments:
img = image.ref attachment_ref = attachment.ref
try: try:
if img.startswith("data:"): before_count = len(data_urls)
base64_images.append(img) if attachment_ref.startswith("data:"):
elif img.startswith("tg://file_id/"): data_urls.append(attachment_ref)
file_id = img.replace("tg://file_id/", "") elif attachment_ref.startswith("tg://file_id/"):
file_id = attachment_ref.replace("tg://file_id/", "")
base64_data = self.run_module( base64_data = self.run_module(
"download_telegram_file_to_base64", "download_telegram_file_to_base64",
file_id=file_id, file_id=file_id,
source=source, source=source,
) )
if base64_data: if base64_data:
base64_images.append(f"data:image/jpeg;base64,{base64_data}") data_urls.append(f"data:image/jpeg;base64,{base64_data}")
logger.info( elif attachment_ref.startswith(
"图片下载成功: channel=%s, source=%s, input=%s, output=data:image/jpeg;base64...(omitted)", "wxwork://media_id/"
channel.value if channel else None, ) or attachment_ref.startswith(
source,
img,
)
elif img.startswith("wxwork://media_id/") or img.startswith(
"wxbot://image/" "wxbot://image/"
): ):
data_url = self.run_module( data_url = self.run_module(
"download_wechat_image_to_data_url", "download_wechat_image_to_data_url",
image_ref=img, image_ref=attachment_ref,
source=source, source=source,
) )
if data_url: if data_url:
base64_images.append(data_url) data_urls.append(data_url)
elif channel == MessageChannel.Slack: elif channel == MessageChannel.Slack:
data_url = self.run_module( data_url = self.run_module(
"download_slack_file_to_data_url", file_url=img, source=source "download_slack_file_to_data_url",
) file_url=attachment_ref,
if data_url:
base64_images.append(data_url)
elif img.startswith("vocechat://file/"):
data_url = self.run_module(
"download_vocechat_image_to_data_url",
image_ref=img,
source=source, source=source,
) )
if data_url: if data_url:
base64_images.append(data_url) data_urls.append(data_url)
elif img.startswith("http"): elif attachment_ref.startswith("vocechat://file/"):
resp = RequestUtils(timeout=30).get_res(img) data_url = self.run_module(
"download_vocechat_image_to_data_url",
image_ref=attachment_ref,
source=source,
)
if data_url:
data_urls.append(data_url)
elif attachment_ref.startswith("http"):
resp = RequestUtils(timeout=30).get_res(attachment_ref)
if resp and resp.content: if resp and resp.content:
base64_data = base64.b64encode(resp.content).decode() base64_data = base64.b64encode(resp.content).decode()
mime_type = resp.headers.get("Content-Type", "image/jpeg") mime_type = resp.headers.get("Content-Type", "image/jpeg")
base64_images.append(f"data:{mime_type};base64,{base64_data}") data_urls.append(f"data:{mime_type};base64,{base64_data}")
except Exception as e: else:
logger.error(f"下载图片失败: {img}, error: {e}") logger.debug(
return base64_images if base64_images else None "暂不支持直接转换为 data URL 的附件引用: channel=%s, source=%s, ref=%s",
channel.value if channel else None,
source,
attachment_ref,
)
continue
if len(data_urls) > before_count:
logger.info(
"附件读取成功并已转换为 data URL: channel=%s, source=%s, ref=%s, mime_type=%s",
channel.value if channel else None,
source,
attachment_ref,
attachment.mime_type,
)
except Exception as err:
logger.error(
"附件读取失败,无法转换为 data URL: channel=%s, source=%s, ref=%s, error=%s",
channel.value if channel else None,
source,
attachment_ref,
err,
)
return data_urls if data_urls else None
def _build_image_attachments( def _build_image_attachments(
self, images: List[CommingMessage.MessageImage] self, images: List[CommingMessage.MessageImage]
@@ -1222,7 +1246,7 @@ class MessageChain(ChainBase):
source: str, source: str,
) -> Optional[List[dict]]: ) -> Optional[List[dict]]:
""" """
下载用户上传的件,落盘到临时目录,并生成文本镜像供 Agent 使用 下载用户上传的件,落盘到临时目录,并生成 Agent 可消费的文件描述
""" """
if not files: if not files:
return None return None
@@ -1259,7 +1283,7 @@ class MessageChain(ChainBase):
} }
) )
except Exception as err: except Exception as err:
logger.error(f"准备件上下文失败: {attachment.ref}, error: {err}") logger.error(f"准备件上下文失败: {attachment.ref}, error: {err}")
payload["error"] = str(err) payload["error"] = str(err)
prepared_files.append(payload) prepared_files.append(payload)
@@ -1269,7 +1293,7 @@ class MessageChain(ChainBase):
self, file_ref: str, channel: MessageChannel, source: str self, file_ref: str, channel: MessageChannel, source: str
) -> Optional[bytes]: ) -> Optional[bytes]:
""" """
下载消息附件的原始字节。 下载消息附件的原始字节内容
""" """
if not file_ref: if not file_ref:
return None return None
@@ -1331,7 +1355,7 @@ class MessageChain(ChainBase):
resp = RequestUtils(timeout=30).get_res(file_ref) resp = RequestUtils(timeout=30).get_res(file_ref)
return resp.content if resp and resp.content else None return resp.content if resp and resp.content else None
logger.debug( logger.debug(
"暂不支持的件引用: channel=%s, source=%s, ref=%s", "暂不支持的件引用: channel=%s, source=%s, ref=%s",
channel.value if channel else None, channel.value if channel else None,
source, source,
file_ref, file_ref,