feat: add user-friendly handling for unsupported image input errors in agent execution

2026-06-07 16:50:18 +08:00 · 2026-05-14 20:36:14 +08:00
parent 7b27b7fd16
commit 23784f614b
2 changed files with 187 additions and 1 deletions
--- a/app/agent/init.py
+++ b/app/agent/init.py
@@ -169,6 +169,7 @@ class ReplyMode(str, Enum):


 HEARTBEAT_SESSION_PREFIX = "__agent_heartbeat_"
+UNSUPPORTED_IMAGE_INPUT_MESSAGE = "当前模型不支持图片输入，请更换支持图片输入的模型，或在系统设置中关闭图片输入支持后重试。"


 class MoviePilotAgent:
@@ -376,6 +377,92 @@ class MoviePilotAgent:
            return "".join(text_parts)
        return str(content)

+    @classmethod
+    def _has_image_input_content(cls, content: Any) -> bool:
+        """
+        检查消息内容里是否包含真正会发给模型的图片块。
+        结构化 JSON 文本里的 images 字段只是给 Agent 阅读的说明，不能作为图片输入判断。
+        """
+        if isinstance(content, list):
+            return any(cls._has_image_input_content(item) for item in content)
+        if not isinstance(content, dict):
+            return False
+
+        block_type = str(content.get("type") or "").lower()
+        if block_type in {"image", "image_url", "input_image"}:
+            return True
+        if content.get("image_url") or content.get("image"):
+            return True
+        return any(cls._has_image_input_content(value) for value in content.values())
+
+    @classmethod
+    def _messages_have_image_input(cls, messages: List[BaseMessage]) -> bool:
+        """检查本轮提交给模型的消息列表中是否包含图片输入。"""
+        return any(
+            cls._has_image_input_content(getattr(message, "content", None))
+            for message in messages or []
+        )
+
+    @staticmethod
+    def _exception_detail_text(error: Exception) -> str:
+        """
+        提取异常对象里可用于匹配的文本。
+        OpenAI 兼容端点的错误详情可能藏在 body/code/status_code 等属性中。
+        """
+        parts = [str(error)]
+        for attr in ("message", "code", "status_code"):
+            value = getattr(error, attr, None)
+            if value is not None:
+                parts.append(str(value))
+        body = getattr(error, "body", None)
+        if body is not None:
+            try:
+                parts.append(json.dumps(body, ensure_ascii=False))
+            except (TypeError, ValueError):
+                parts.append(str(body))
+        return " ".join(part for part in parts if part)
+
+    @classmethod
+    def _is_unsupported_image_input_error(cls, error: Exception) -> bool:
+        """
+        判断模型服务是否在拒绝图片输入。
+        兼容 OpenAI 及 OpenAI-compatible 服务常见的错误文案，避免把普通 404 当作图片能力问题。
+        """
+        detail = cls._exception_detail_text(error).lower()
+        if "no endpoints found that support image input" in detail:
+            return True
+        if "image input" not in detail and "images" not in detail:
+            return False
+        return any(
+            marker in detail
+            for marker in (
+                "does not support",
+                "do not support",
+                "not support",
+                "not supported",
+                "unsupported",
+                "no endpoint",
+                "no endpoints",
+            )
+        )
+
+    async def _dispatch_execution_notice(self, message: str) -> None:
+        """
+        将执行层可预期的失败转成用户可读提示。
+        按当前回复模式处理，避免后台捕获任务绕过 CAPTURE_ONLY 约束。
+        """
+        if not message:
+            return
+        self._emit_output(message)
+        if self._tool_context.get("user_reply_sent"):
+            return
+
+        title = "MoviePilot助手" if self.is_background else ""
+        if self.should_dispatch_reply:
+            await self.send_agent_message(message, title=title)
+        elif self.persist_output_message:
+            await self._save_agent_message_to_db(message, title=title)
+
    def _emit_output(self, text: str):
        """
        输出当前流式文本到外部回调。
@@ -741,6 +828,12 @@ class MoviePilotAgent:
            logger.info(f"Agent执行被取消: session_id={self.session_id}")
            return "任务已取消", {}
        except Exception as e:
+            if self._messages_have_image_input(messages) and self._is_unsupported_image_input_error(e):
+                logger.warning(
+                    f"当前模型不支持图片输入，已向用户发送友好提示: {e}"
+                )
+                await self._dispatch_execution_notice(UNSUPPORTED_IMAGE_INPUT_MESSAGE)
+                return UNSUPPORTED_IMAGE_INPUT_MESSAGE, {}
            logger.error(f"Agent执行失败: {e} - {traceback.format_exc()}")
            return str(e), {}
        finally: