From 8f17b5246641364ceb509669f3aa31d65cf1daa1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 23 Jan 2026 15:16:09 +0000 Subject: [PATCH 1/4] Initial plan From ca18705d88d3ffacecc9ef103d370f11b5f83998 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 23 Jan 2026 15:20:06 +0000 Subject: [PATCH 2/4] Reimplemented SearchWebTool using duckduckgo-search library Co-authored-by: jxxghp <51039935+jxxghp@users.noreply.github.com> --- app/agent/tools/impl/search_web.py | 113 +++++++++++++---------------- requirements.in | 1 + 2 files changed, 51 insertions(+), 63 deletions(-) diff --git a/app/agent/tools/impl/search_web.py b/app/agent/tools/impl/search_web.py index 2642b6d2..9a0e41c0 100644 --- a/app/agent/tools/impl/search_web.py +++ b/app/agent/tools/impl/search_web.py @@ -9,7 +9,6 @@ from pydantic import BaseModel, Field from app.agent.tools.base import MoviePilotTool from app.core.config import settings from app.log import logger -from app.utils.http import AsyncRequestUtils class SearchWebInput(BaseModel): @@ -47,8 +46,8 @@ class SearchWebTool(MoviePilotTool): # 限制最大结果数 max_results = min(max(1, max_results or 5), 10) - # 使用DuckDuckGo API进行搜索 - search_results = await self._search_duckduckgo_api(query, max_results) + # 使用 duckduckgo-search 库进行搜索 + search_results = await self._search_duckduckgo(query, max_results) if not search_results: return f"未找到与 '{query}' 相关的搜索结果" @@ -65,9 +64,9 @@ class SearchWebTool(MoviePilotTool): return error_message @staticmethod - async def _search_duckduckgo_api(query: str, max_results: int) -> list: + async def _search_duckduckgo(query: str, max_results: int) -> list: """ - 使用DuckDuckGo API进行搜索 + 使用 duckduckgo-search 库进行搜索 Args: query: 搜索查询 @@ -77,71 +76,59 @@ class SearchWebTool(MoviePilotTool): 搜索结果列表 """ try: - # DuckDuckGo Instant Answer API - api_url = "https://api.duckduckgo.com/" - params = { - "q": query, - "format": "json", - "no_html": "1", - "skip_disambig": "1" - } + from duckduckgo_search import DDGS + import asyncio - # 使用代理(如果配置了) - http_utils = AsyncRequestUtils( - proxies=settings.PROXY, - timeout=10 - ) - - data = await http_utils.get_json(api_url, params=params) - - results = [] - - if data: - # 处理AbstractText(摘要) - if data.get("AbstractText"): - results.append({ - "title": data.get("Heading", query), - "snippet": data.get("AbstractText", ""), - "url": data.get("AbstractURL", ""), - "source": "DuckDuckGo Abstract" - }) - - # 处理RelatedTopics(相关主题) - related_topics = data.get("RelatedTopics", []) - for topic in related_topics[:max_results - len(results)]: - if isinstance(topic, dict): - text = topic.get("Text", "") - first_url = topic.get("FirstURL", "") - if text and first_url: - # 提取标题(通常在" - "之前) - title = text.split(" - ")[0] if " - " in text else text[:100] - snippet = text - + # duckduckgo-search 是同步库,需要在 executor 中运行 + def sync_search(): + results = [] + try: + # 使用代理(如果配置了) + ddgs_kwargs = {} + if settings.PROXY: + # duckduckgo-search 支持代理配置 + if isinstance(settings.PROXY, dict): + proxy_url = settings.PROXY.get('http') or settings.PROXY.get('https') + else: + proxy_url = settings.PROXY + + if proxy_url: + ddgs_kwargs['proxy'] = proxy_url + + # 设置超时 + ddgs_kwargs['timeout'] = 20 + + with DDGS(**ddgs_kwargs) as ddgs: + # 使用 text 方法进行搜索 + search_results = ddgs.text( + keywords=query, + max_results=max_results + ) + + for result in search_results: results.append({ - "title": title.strip(), - "snippet": snippet, - "url": first_url, - "source": "DuckDuckGo Related" + 'title': result.get('title', ''), + 'snippet': result.get('body', ''), + 'url': result.get('href', ''), + 'source': 'DuckDuckGo' }) + + except Exception as e: + logger.warning(f"duckduckgo-search 搜索失败: {e}") + raise - # 处理Results(搜索结果) - api_results = data.get("Results", []) - for result in api_results[:max_results - len(results)]: - if isinstance(result, dict): - title = result.get("Text", "") - url = result.get("FirstURL", "") - if title and url: - results.append({ - "title": title, - "snippet": result.get("Text", ""), - "url": url, - "source": "DuckDuckGo Results" - }) + return results - return results[:max_results] + # 在线程池中运行同步搜索 + loop = asyncio.get_event_loop() + results = await loop.run_in_executor(None, sync_search) + return results + except ImportError: + logger.error("duckduckgo-search 库未安装,请运行: pip install duckduckgo-search") + return [] except Exception as e: - logger.warning(f"DuckDuckGo API搜索失败: {e}") + logger.warning(f"DuckDuckGo 搜索失败: {e}") return [] @staticmethod diff --git a/requirements.in b/requirements.in index 6eb374f3..451e4cda 100644 --- a/requirements.in +++ b/requirements.in @@ -91,3 +91,4 @@ langchain-deepseek~=0.1.4 langchain-experimental~=0.3.4 openai~=1.108.2 google-generativeai~=0.8.5 +duckduckgo-search~=7.2.1 From eca339d107d4117a59a0204fc889e50a1d62c2ad Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 23 Jan 2026 15:22:45 +0000 Subject: [PATCH 3/4] Address code review comments: improve code organization and use modern asyncio Co-authored-by: jxxghp <51039935+jxxghp@users.noreply.github.com> --- app/agent/tools/impl/search_web.py | 43 ++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 14 deletions(-) diff --git a/app/agent/tools/impl/search_web.py b/app/agent/tools/impl/search_web.py index 9a0e41c0..9bafd28b 100644 --- a/app/agent/tools/impl/search_web.py +++ b/app/agent/tools/impl/search_web.py @@ -1,15 +1,20 @@ """搜索网络内容工具""" +import asyncio import json import re from typing import Optional, Type +from duckduckgo_search import DDGS from pydantic import BaseModel, Field from app.agent.tools.base import MoviePilotTool from app.core.config import settings from app.log import logger +# 搜索超时时间(秒) +SEARCH_TIMEOUT = 20 + class SearchWebInput(BaseModel): """搜索网络内容工具的输入参数模型""" @@ -63,6 +68,25 @@ class SearchWebTool(MoviePilotTool): logger.error(f"搜索网络内容失败: {e}", exc_info=True) return error_message + @staticmethod + def _get_proxy_url(proxy_setting) -> Optional[str]: + """ + 从代理设置中提取代理URL + + Args: + proxy_setting: 代理设置,可以是字符串或字典 + + Returns: + 代理URL字符串,如果没有配置则返回None + """ + if not proxy_setting: + return None + + if isinstance(proxy_setting, dict): + return proxy_setting.get('http') or proxy_setting.get('https') + + return proxy_setting + @staticmethod async def _search_duckduckgo(query: str, max_results: int) -> list: """ @@ -76,27 +100,18 @@ class SearchWebTool(MoviePilotTool): 搜索结果列表 """ try: - from duckduckgo_search import DDGS - import asyncio - # duckduckgo-search 是同步库,需要在 executor 中运行 def sync_search(): results = [] try: # 使用代理(如果配置了) ddgs_kwargs = {} - if settings.PROXY: - # duckduckgo-search 支持代理配置 - if isinstance(settings.PROXY, dict): - proxy_url = settings.PROXY.get('http') or settings.PROXY.get('https') - else: - proxy_url = settings.PROXY - - if proxy_url: - ddgs_kwargs['proxy'] = proxy_url + proxy_url = SearchWebTool._get_proxy_url(settings.PROXY) + if proxy_url: + ddgs_kwargs['proxy'] = proxy_url # 设置超时 - ddgs_kwargs['timeout'] = 20 + ddgs_kwargs['timeout'] = SEARCH_TIMEOUT with DDGS(**ddgs_kwargs) as ddgs: # 使用 text 方法进行搜索 @@ -120,7 +135,7 @@ class SearchWebTool(MoviePilotTool): return results # 在线程池中运行同步搜索 - loop = asyncio.get_event_loop() + loop = asyncio.get_running_loop() results = await loop.run_in_executor(None, sync_search) return results From 9641d33040ee7228a04ce08dc91246c247420a37 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 23 Jan 2026 15:23:52 +0000 Subject: [PATCH 4/4] Fix generator handling and update error message to reference requirements.in Co-authored-by: jxxghp <51039935+jxxghp@users.noreply.github.com> --- app/agent/tools/impl/search_web.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/app/agent/tools/impl/search_web.py b/app/agent/tools/impl/search_web.py index 9bafd28b..02f0430e 100644 --- a/app/agent/tools/impl/search_web.py +++ b/app/agent/tools/impl/search_web.py @@ -115,10 +115,10 @@ class SearchWebTool(MoviePilotTool): with DDGS(**ddgs_kwargs) as ddgs: # 使用 text 方法进行搜索 - search_results = ddgs.text( + search_results = list(ddgs.text( keywords=query, max_results=max_results - ) + )) for result in search_results: results.append({ @@ -140,7 +140,7 @@ class SearchWebTool(MoviePilotTool): return results except ImportError: - logger.error("duckduckgo-search 库未安装,请运行: pip install duckduckgo-search") + logger.error("duckduckgo-search 库未安装,请在 requirements.in 中添加依赖后重新构建") return [] except Exception as e: logger.warning(f"DuckDuckGo 搜索失败: {e}")