mirror of
https://github.com/jxxghp/MoviePilot.git
synced 2026-06-15 04:32:09 +08:00
feat: add captcha recognition agent tool
This commit is contained in:
@@ -37,6 +37,7 @@ from app.agent.tools.impl.query_media_detail import QueryMediaDetailTool
|
||||
from app.agent.tools.impl.search_torrents import SearchTorrentsTool
|
||||
from app.agent.tools.impl.get_search_results import GetSearchResultsTool
|
||||
from app.agent.tools.impl.search_web import SearchWebTool
|
||||
from app.agent.tools.impl.recognize_captcha import RecognizeCaptchaTool
|
||||
from app.agent.tools.impl.send_message import SendMessageTool
|
||||
from app.agent.tools.impl.ask_user_choice import AskUserChoiceTool
|
||||
from app.agent.tools.impl.send_local_file import SendLocalFileTool
|
||||
@@ -165,6 +166,7 @@ class MoviePilotToolFactory:
|
||||
SearchTorrentsTool,
|
||||
GetSearchResultsTool,
|
||||
SearchWebTool,
|
||||
RecognizeCaptchaTool,
|
||||
AddDownloadTool,
|
||||
QuerySubscribesTool,
|
||||
QuerySubscribeSharesTool,
|
||||
|
||||
167
app/agent/tools/impl/recognize_captcha.py
Normal file
167
app/agent/tools/impl/recognize_captcha.py
Normal file
@@ -0,0 +1,167 @@
|
||||
"""识别图形验证码工具。"""
|
||||
|
||||
import json
|
||||
from typing import Optional, Type
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from app.agent.tools.base import MoviePilotTool
|
||||
from app.agent.tools.tags import ToolTag
|
||||
from app.helper.browser import BrowserSessionHelper
|
||||
from app.helper.ocr import OcrHelper
|
||||
from app.log import logger
|
||||
|
||||
|
||||
class RecognizeCaptchaInput(BaseModel):
|
||||
"""识别图形验证码工具的输入参数模型。"""
|
||||
|
||||
explanation: Optional[str] = Field(
|
||||
None,
|
||||
description="Clear explanation of why this captcha image needs to be recognized",
|
||||
)
|
||||
image_url: str = Field(
|
||||
...,
|
||||
description=(
|
||||
"Captcha image URL obtained from the browser page, usually an img.src value. "
|
||||
"Supports http/https URLs and data:image/...;base64,... URLs."
|
||||
),
|
||||
)
|
||||
cookie: Optional[str] = Field(
|
||||
None,
|
||||
description=(
|
||||
"Optional Cookie header used to download the captcha image when the image URL "
|
||||
"requires the same authenticated browser session."
|
||||
),
|
||||
)
|
||||
user_agent: Optional[str] = Field(
|
||||
None,
|
||||
description="Optional User-Agent used when downloading the captcha image.",
|
||||
)
|
||||
allow_private_network: bool = Field(
|
||||
False,
|
||||
description="Allow captcha image URLs on localhost, loopback, private, or link-local addresses.",
|
||||
)
|
||||
|
||||
|
||||
class RecognizeCaptchaTool(MoviePilotTool):
|
||||
"""
|
||||
图形验证码识别工具,供 Agent 在浏览器自动化登录时读取验证码文本。
|
||||
"""
|
||||
|
||||
name: str = "recognize_captcha"
|
||||
tags: list[str] = [
|
||||
ToolTag.Read,
|
||||
ToolTag.Web,
|
||||
]
|
||||
description: str = (
|
||||
"Recognize a graphic captcha image and return the captcha text. "
|
||||
"Use this after browser automation extracts a captcha img.src from the page. "
|
||||
"Pass cookie and user_agent when the image URL requires the current browser session. "
|
||||
"Supports http/https image URLs and data:image/...;base64,... URLs. "
|
||||
"For safety, localhost and private network URLs are blocked by default unless "
|
||||
"allow_private_network is true."
|
||||
)
|
||||
args_schema: Type[BaseModel] = RecognizeCaptchaInput
|
||||
|
||||
def get_tool_message(self, **kwargs) -> Optional[str]:
|
||||
"""根据验证码图片参数生成友好的提示消息。"""
|
||||
image_url = str(kwargs.get("image_url") or "")
|
||||
if image_url.lower().startswith("data:image/"):
|
||||
return "识别图形验证码: data image"
|
||||
return f"识别图形验证码: {image_url}"
|
||||
|
||||
@staticmethod
|
||||
def _recognize_captcha_sync(
|
||||
image_url: str,
|
||||
cookie: Optional[str] = None,
|
||||
user_agent: Optional[str] = None,
|
||||
allow_private_network: bool = False,
|
||||
) -> str:
|
||||
"""
|
||||
在线程池中下载并识别验证码图片。
|
||||
|
||||
:param image_url: 验证码图片地址
|
||||
:param cookie: 下载图片时使用的 Cookie
|
||||
:param user_agent: 下载图片时使用的 User-Agent
|
||||
:param allow_private_network: 是否允许访问本机或私网地址
|
||||
:return: 验证码文本,失败时返回空字符串
|
||||
"""
|
||||
clean_url = (image_url or "").strip()
|
||||
if not clean_url:
|
||||
return ""
|
||||
if not clean_url.lower().startswith("data:image/"):
|
||||
BrowserSessionHelper.validate_url(
|
||||
clean_url,
|
||||
allow_private_network=allow_private_network,
|
||||
)
|
||||
return OcrHelper().get_captcha_text(
|
||||
image_url=clean_url,
|
||||
cookie=cookie,
|
||||
ua=user_agent,
|
||||
)
|
||||
|
||||
async def run(
|
||||
self,
|
||||
image_url: str,
|
||||
cookie: Optional[str] = None,
|
||||
user_agent: Optional[str] = None,
|
||||
allow_private_network: bool = False,
|
||||
**kwargs,
|
||||
) -> str:
|
||||
"""
|
||||
识别指定图片地址中的图形验证码文本。
|
||||
|
||||
:param image_url: 验证码图片地址
|
||||
:param cookie: 下载图片时使用的 Cookie
|
||||
:param user_agent: 下载图片时使用的 User-Agent
|
||||
:param allow_private_network: 是否允许访问本机或私网地址
|
||||
:return: JSON 格式的识别结果
|
||||
"""
|
||||
logger.info(f"执行工具: {self.name}, 参数: image_url={image_url}")
|
||||
|
||||
try:
|
||||
captcha_text = await self.run_blocking(
|
||||
"web",
|
||||
self._recognize_captcha_sync,
|
||||
image_url,
|
||||
cookie,
|
||||
user_agent,
|
||||
allow_private_network,
|
||||
)
|
||||
if captcha_text:
|
||||
return json.dumps(
|
||||
{
|
||||
"success": True,
|
||||
"captcha_text": captcha_text,
|
||||
"message": "验证码识别成功",
|
||||
},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
return json.dumps(
|
||||
{
|
||||
"success": False,
|
||||
"captcha_text": "",
|
||||
"message": "验证码识别失败或未返回内容",
|
||||
},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
except ValueError as err:
|
||||
logger.warning(f"验证码图片地址校验失败: {str(err)}")
|
||||
return json.dumps(
|
||||
{
|
||||
"success": False,
|
||||
"captcha_text": "",
|
||||
"message": str(err),
|
||||
},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
except Exception as err:
|
||||
logger.error(f"识别图形验证码失败: {str(err)}", exc_info=True)
|
||||
return json.dumps(
|
||||
{
|
||||
"success": False,
|
||||
"captcha_text": "",
|
||||
"message": f"识别图形验证码时发生错误: {str(err)}",
|
||||
},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
@@ -6,31 +6,63 @@ from app.utils.http import RequestUtils
|
||||
|
||||
|
||||
class OcrHelper:
|
||||
"""
|
||||
OCR 辅助类,负责获取验证码图片并调用 OCR 服务识别文本。
|
||||
"""
|
||||
|
||||
_ocr_b64_url = f"{settings.OCR_HOST}/captcha/base64"
|
||||
|
||||
def get_captcha_text(self, image_url: Optional[str] = None, image_b64: Optional[str] = None,
|
||||
cookie: Optional[str] = None, ua: Optional[str] = None):
|
||||
def get_captcha_text(
|
||||
self,
|
||||
image_url: Optional[str] = None,
|
||||
image_b64: Optional[str] = None,
|
||||
cookie: Optional[str] = None,
|
||||
ua: Optional[str] = None,
|
||||
) -> str:
|
||||
"""
|
||||
根据图片地址,获取验证码图片,并识别内容
|
||||
:param image_url: 图片地址
|
||||
:param image_b64: 图片base64,跳过图片地址下载
|
||||
:param cookie: 下载图片使用的cookie
|
||||
:param ua: 下载图片使用的ua
|
||||
:return: 验证码识别结果,失败时返回空字符串
|
||||
"""
|
||||
image_b64 = self._normalize_image_base64(image_b64)
|
||||
if image_url:
|
||||
ret = RequestUtils(ua=ua,
|
||||
cookies=cookie).get_res(image_url)
|
||||
if ret is not None:
|
||||
image_bin = ret.content
|
||||
if not image_bin:
|
||||
return ""
|
||||
image_b64 = base64.b64encode(image_bin).decode()
|
||||
data_url_b64 = self._extract_data_url_base64(image_url)
|
||||
if data_url_b64:
|
||||
image_b64 = data_url_b64
|
||||
else:
|
||||
ret = RequestUtils(ua=ua,
|
||||
cookies=cookie).get_res(image_url)
|
||||
if ret is not None:
|
||||
image_bin = ret.content
|
||||
if not image_bin:
|
||||
return ""
|
||||
image_b64 = base64.b64encode(image_bin).decode()
|
||||
if not image_b64:
|
||||
return ""
|
||||
ret = RequestUtils(content_type="application/json").post_res(
|
||||
url=self._ocr_b64_url,
|
||||
json={"base64_img": image_b64})
|
||||
if ret:
|
||||
return ret.json().get("result")
|
||||
return ret.json().get("result") or ""
|
||||
return ""
|
||||
|
||||
@staticmethod
|
||||
def _normalize_image_base64(image_b64: Optional[str]) -> str:
|
||||
"""规范化外部传入的图片 base64 内容。"""
|
||||
if not image_b64:
|
||||
return ""
|
||||
return OcrHelper._extract_data_url_base64(image_b64) or image_b64.strip()
|
||||
|
||||
@staticmethod
|
||||
def _extract_data_url_base64(image_url: Optional[str]) -> str:
|
||||
"""从 data:image/...;base64,... 地址中提取纯 base64 内容。"""
|
||||
image_url = (image_url or "").strip()
|
||||
if not image_url.lower().startswith("data:image/"):
|
||||
return ""
|
||||
metadata, separator, data = image_url.partition(",")
|
||||
if not separator or ";base64" not in metadata.lower():
|
||||
return ""
|
||||
return data.strip()
|
||||
|
||||
@@ -222,6 +222,20 @@ MoviePilot 也提供普通 REST API 给前端和自动化客户端使用。所
|
||||
|
||||
`browse_webpage` 使用持久浏览器会话,默认以当前 Agent 会话作为 `session_key`。`goto`、`snapshot`、`click`、`click_ref`、`fill`、`fill_ref`、`select`、`select_ref`、`wait` 等动作会返回页面快照,快照中的 `interactive_elements[].ref` 可用于后续 `*_ref` 操作。支持 `list_tabs`、`open_tab`、`focus_tab`、`close_tab` 管理标签页,支持 `close_session` 释放会话。出于安全考虑,默认拒绝访问 localhost、环回地址、私网地址和链路本地地址;确需访问可信内网或本机页面时,可显式传入 `allow_private_network: true`。
|
||||
|
||||
**`recognize_captcha` 图形验证码识别示例**:
|
||||
```json
|
||||
{
|
||||
"tool_name": "recognize_captcha",
|
||||
"arguments": {
|
||||
"image_url": "https://example.com/captcha.png",
|
||||
"cookie": "sid=...",
|
||||
"user_agent": "Mozilla/5.0 ..."
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
`recognize_captcha` 用于浏览器自动化登录时识别普通图形验证码。智能体可先通过 `browse_webpage` 的 `evaluate` 动作从页面元素中提取 `img.src`,再把图片地址传给该工具;支持 `http/https` 图片地址和 `data:image/...;base64,...`。当验证码图片依赖当前浏览器会话时,可传入 Cookie 与 User-Agent。出于安全考虑,默认拒绝访问 localhost、环回地址、私网地址和链路本地地址;确需访问可信内网或本机验证码图片时,可显式传入 `allow_private_network: true`。
|
||||
|
||||
### 3. 获取工具详情
|
||||
|
||||
**GET** `/api/v1/mcp/tools/{tool_name}`
|
||||
|
||||
@@ -8,7 +8,7 @@ description: >-
|
||||
interaction, such as checking a site page, confirming a JavaScript-rendered
|
||||
result, testing login state, capturing visible errors, or updating and
|
||||
validating tracker site cookies.
|
||||
allowed-tools: browse_webpage search_web query_sites update_site_cookie test_site update_site
|
||||
allowed-tools: browse_webpage recognize_captcha search_web query_sites update_site_cookie test_site update_site
|
||||
---
|
||||
|
||||
# Browser Use
|
||||
@@ -41,6 +41,9 @@ dedicated tool can complete the task more directly and safely.
|
||||
`get_content`, `screenshot`, `click`, `click_ref`, `fill`, `fill_ref`,
|
||||
`select`, `select_ref`, `evaluate`, `wait`, `list_tabs`, `open_tab`,
|
||||
`focus_tab`, `close_tab`, `close_session`.
|
||||
- `recognize_captcha` - Recognize graphic captcha text from an image URL or
|
||||
`data:image/...;base64,...` value extracted from the page. Pass Cookie and
|
||||
User-Agent when the image requires the current browser session.
|
||||
- `search_web` - Find current pages or official references before opening a
|
||||
target URL. It supports DDGS-backed `search_engine` (`auto`, `duckduckgo`,
|
||||
`google`, `brave`, etc.) and `site_url` for limiting results to a specified
|
||||
@@ -174,6 +177,28 @@ update_site_cookie site_identifier=<id> username="..." password="..." two_step_c
|
||||
Ask for missing username, password, or two-step code only when required for the
|
||||
operation. Do not expose secrets in the final answer.
|
||||
|
||||
### Login Page With A Graphic Captcha
|
||||
|
||||
When a user explicitly asks to complete a login flow that contains a normal
|
||||
graphic captcha:
|
||||
|
||||
1. Open the login page and inspect the form with `snapshot`.
|
||||
2. Extract the captcha image URL with `evaluate`, for example:
|
||||
|
||||
```text
|
||||
browse_webpage action="evaluate" script="() => document.querySelector('img[src*=\"captcha\"], img[alt*=\"验证码\"], img[title*=\"验证码\"]')?.src || ''"
|
||||
```
|
||||
|
||||
3. If the captcha image needs session cookies, extract `document.cookie` and the
|
||||
current `navigator.userAgent` with `evaluate`.
|
||||
4. Call `recognize_captcha image_url="<img.src>"` and pass `cookie` /
|
||||
`user_agent` when needed.
|
||||
5. Fill the returned `captcha_text`, submit the form, and verify the login
|
||||
result.
|
||||
|
||||
If recognition fails, refresh the captcha once and retry. Stop after a second
|
||||
failure and tell the user manual input is needed.
|
||||
|
||||
### Inspect A Tracker Page
|
||||
|
||||
When the user asks what is visible on a site page:
|
||||
@@ -187,8 +212,9 @@ When the user asks what is visible on a site page:
|
||||
|
||||
- Ask before submitting forms that create, delete, purchase, publish, or change
|
||||
account/security settings.
|
||||
- Never solve captchas, bypass access controls, or scrape private content beyond
|
||||
the user's explicit task.
|
||||
- Solve graphic captchas only for a user-requested login flow. Do not use this
|
||||
to bypass access controls, defeat anti-bot challenges, or scrape private
|
||||
content beyond the user's explicit task.
|
||||
- Do not print passwords, tokens, cookies, two-step secrets, or full session
|
||||
headers in the response.
|
||||
- Localhost, loopback, private, and link-local URLs are blocked by default. Set
|
||||
|
||||
134
tests/test_agent_recognize_captcha_tool.py
Normal file
134
tests/test_agent_recognize_captcha_tool.py
Normal file
@@ -0,0 +1,134 @@
|
||||
import asyncio
|
||||
import base64
|
||||
import json
|
||||
from unittest.mock import patch
|
||||
|
||||
from app.agent.tools.factory import MoviePilotToolFactory
|
||||
from app.agent.tools.impl.recognize_captcha import RecognizeCaptchaTool
|
||||
from app.agent.tools.manager import MoviePilotToolsManager
|
||||
from app.helper.ocr import OcrHelper
|
||||
|
||||
|
||||
class _FakeResponse:
|
||||
"""测试用响应对象,模拟 requests.Response 的最小行为。"""
|
||||
|
||||
def __init__(self, content: bytes = b"", payload: dict = None):
|
||||
"""初始化响应内容与 JSON 载荷。"""
|
||||
self.content = content
|
||||
self.payload = payload or {}
|
||||
|
||||
def json(self) -> dict:
|
||||
"""返回测试预设 JSON 内容。"""
|
||||
return self.payload
|
||||
|
||||
def __bool__(self) -> bool:
|
||||
"""模拟 requests.Response 在成功状态下为真。"""
|
||||
return True
|
||||
|
||||
|
||||
def test_factory_registers_recognize_captcha_tool():
|
||||
"""工具工厂应注册图形验证码识别工具。"""
|
||||
with patch(
|
||||
"app.agent.tools.factory.PluginManager.get_plugin_agent_tools",
|
||||
return_value=[],
|
||||
):
|
||||
tools = MoviePilotToolFactory.create_tools(
|
||||
session_id="captcha-session",
|
||||
user_id="10001",
|
||||
)
|
||||
|
||||
tool_names = {tool.name for tool in tools}
|
||||
|
||||
assert "recognize_captcha" in tool_names
|
||||
|
||||
|
||||
def test_mcp_tool_manager_exposes_recognize_captcha_schema():
|
||||
"""MCP 工具管理器应暴露验证码识别工具参数。"""
|
||||
tool = RecognizeCaptchaTool(session_id="captcha-session", user_id="10001")
|
||||
|
||||
with patch(
|
||||
"app.agent.tools.manager.MoviePilotToolFactory.create_tools",
|
||||
return_value=[tool],
|
||||
):
|
||||
manager = MoviePilotToolsManager(is_admin=True)
|
||||
|
||||
tool_definitions = manager.list_tools()
|
||||
schema = tool_definitions[0].input_schema
|
||||
|
||||
assert [item.name for item in tool_definitions] == ["recognize_captcha"]
|
||||
assert "image_url" in schema["required"]
|
||||
assert "cookie" in schema["properties"]
|
||||
assert "user_agent" in schema["properties"]
|
||||
assert "allow_private_network" in schema["properties"]
|
||||
|
||||
|
||||
def test_ocr_helper_extracts_data_url_base64_without_downloading_image():
|
||||
"""data:image 地址应直接提取 base64 内容并提交给 OCR 服务。"""
|
||||
image_b64 = base64.b64encode(b"captcha-image").decode()
|
||||
image_url = f"data:image/png;base64,{image_b64}"
|
||||
|
||||
with patch("app.helper.ocr.RequestUtils") as request_utils:
|
||||
request_utils.return_value.post_res.return_value = _FakeResponse(
|
||||
payload={"result": "a8k2"}
|
||||
)
|
||||
|
||||
result = OcrHelper().get_captcha_text(image_url=image_url)
|
||||
|
||||
assert result == "a8k2"
|
||||
request_utils.return_value.get_res.assert_not_called()
|
||||
request_utils.return_value.post_res.assert_called_once()
|
||||
assert request_utils.return_value.post_res.call_args.kwargs["json"] == {
|
||||
"base64_img": image_b64
|
||||
}
|
||||
|
||||
|
||||
def test_recognize_captcha_tool_returns_captcha_text_from_ocr_helper():
|
||||
"""验证码工具应返回结构化识别结果,便于 Agent 继续填写表单。"""
|
||||
tool = RecognizeCaptchaTool(session_id="captcha-session", user_id="10001")
|
||||
|
||||
async def _run_tool():
|
||||
"""执行一次带 mock OCR 的工具调用。"""
|
||||
with patch(
|
||||
"app.agent.tools.impl.recognize_captcha.OcrHelper.get_captcha_text",
|
||||
return_value="x7p9",
|
||||
) as recognize_mock:
|
||||
result = await tool.run(
|
||||
image_url="https://example.com/captcha.png",
|
||||
cookie="sid=abc",
|
||||
user_agent="MoviePilotTest/1.0",
|
||||
)
|
||||
return result, recognize_mock
|
||||
|
||||
result, recognize_mock = asyncio.run(_run_tool())
|
||||
payload = json.loads(result)
|
||||
|
||||
assert payload == {
|
||||
"success": True,
|
||||
"captcha_text": "x7p9",
|
||||
"message": "验证码识别成功",
|
||||
}
|
||||
recognize_mock.assert_called_once_with(
|
||||
image_url="https://example.com/captcha.png",
|
||||
cookie="sid=abc",
|
||||
ua="MoviePilotTest/1.0",
|
||||
)
|
||||
|
||||
|
||||
def test_recognize_captcha_tool_blocks_private_network_by_default():
|
||||
"""验证码工具默认应拒绝本机和私网图片地址。"""
|
||||
tool = RecognizeCaptchaTool(session_id="captcha-session", user_id="10001")
|
||||
|
||||
with patch(
|
||||
"app.agent.tools.impl.recognize_captcha.OcrHelper.get_captcha_text",
|
||||
return_value="x7p9",
|
||||
) as recognize_mock:
|
||||
result = asyncio.run(
|
||||
tool.run(image_url="http://127.0.0.1/captcha.png")
|
||||
)
|
||||
|
||||
payload = json.loads(result)
|
||||
|
||||
assert payload["success"] is False
|
||||
assert payload["captcha_text"] == ""
|
||||
assert "默认不允许访问本机或私网地址" in payload["message"]
|
||||
recognize_mock.assert_not_called()
|
||||
Reference in New Issue
Block a user