Compare commits

...

11 Commits

Author SHA1 Message Date
jxxghp
79bfeaf2af 移除工具调用前的流重置,保留模型思考文本可见 2026-04-25 23:12:34 +08:00
jxxghp
4fe41ba5e9 更新 base.py 2026-04-25 22:16:15 +08:00
jxxghp
14d6e2febc Refine agent prompts for concise professional replies 2026-04-25 22:04:35 +08:00
jxxghp
97c7e71207 更新 Agent Prompt.txt 2026-04-25 21:51:47 +08:00
jxxghp
8f29a218ea chore: bump version to v2.10.5 2026-04-25 12:55:33 +08:00
jxxghp
4fd5aa3eb6 fix: improve DeepSeek reasoning_content payload handling and update langchain dependencies 2026-04-25 12:46:21 +08:00
jxxghp
bfc27d151c 更新 ask_user_choice.py 2026-04-25 11:36:36 +08:00
jxxghp
f2b56b8f40 更新 ask_user_choice.py 2026-04-25 11:35:32 +08:00
jxxghp
a05ffc07d4 refactor: remove legacy LLM_DISABLE_THINKING and LLM_REASONING_EFFORT config, unify thinking_level handling
- Eliminate support for LLM_DISABLE_THINKING and LLM_REASONING_EFFORT in config, code, and tests
- Simplify LLM thinking level logic to rely solely on LLM_THINKING_LEVEL
- Refactor LLMHelper and related endpoints to remove legacy parameter handling
- Update system API and test utilities to match new configuration structure
- Minor code cleanup and formatting improvements
2026-04-25 10:42:03 +08:00
jxxghp
4a81417fb7 fix: preserve deepseek reasoning content in tool loops 2026-04-25 09:37:01 +08:00
jxxghp
c7fa3dc863 feat: unify llm thinking level controls 2026-04-24 19:50:23 +08:00
15 changed files with 800 additions and 292 deletions

View File

@@ -73,7 +73,7 @@ class _ThinkTagStripper:
on_output(self.buffer[:start_idx])
emitted = True
self.in_think_tag = True
self.buffer = self.buffer[start_idx + 7:]
self.buffer = self.buffer[start_idx + 7 :]
else:
# 检查是否以 <think> 的不完整前缀结尾
partial_match = False
@@ -93,7 +93,7 @@ class _ThinkTagStripper:
end_idx = self.buffer.find("</think>")
if end_idx != -1:
self.in_think_tag = False
self.buffer = self.buffer[end_idx + 8:]
self.buffer = self.buffer[end_idx + 8 :]
else:
# 检查是否以 </think> 的不完整前缀结尾
partial_match = False
@@ -371,10 +371,6 @@ class MoviePilotAgent:
:param on_token: 收到有效 token 时的回调
"""
stripper = _ThinkTagStripper()
# 非VERBOSE模式下跟踪当前langgraph_step以检测中间步骤的模型输出
# 当模型在工具调用之前输出的"计划/思考"文本会在检测到tool_call时被清除
current_model_step = -1
has_emitted_in_step = False
async for chunk in agent.astream(
messages,
@@ -388,25 +384,13 @@ class MoviePilotAgent:
if not token or not hasattr(token, "tool_call_chunks"):
continue
# 获取当前步骤信息
step = metadata.get("langgraph_step", -1) if metadata else -1
if token.tool_call_chunks:
# 检测到工具调用token说明当前步骤是中间步骤
# 非VERBOSE模式下清除该步骤之前输出的"计划/思考"文本
if not settings.AI_AGENT_VERBOSE and has_emitted_in_step:
self.stream_handler.reset()
stripper.reset()
has_emitted_in_step = False
# 清除 stripper 内部缓冲中可能残留的 <think> 标签中间状态
stripper.reset()
continue
# 以下处理纯文本tokentool_call_chunks为空
# 检测步骤变化重置步骤内emit跟踪
if step != current_model_step:
current_model_step = step
has_emitted_in_step = False
# 跳过模型思考/推理内容(如 DeepSeek R1 的 reasoning_content
additional = getattr(token, "additional_kwargs", None)
if additional and additional.get("reasoning_content"):
@@ -416,8 +400,7 @@ class MoviePilotAgent:
# content 可能是字符串或内容块列表,过滤掉思考类型的块
content = self._extract_text_content(token.content)
if content:
if stripper.process(content, on_token):
has_emitted_in_step = True
stripper.process(content, on_token)
stripper.flush(on_token)
@@ -457,7 +440,10 @@ class MoviePilotAgent:
agent=agent,
messages={"messages": messages},
config=agent_config,
on_token=lambda token: (self.stream_handler.emit(token), self._emit_output(token)),
on_token=lambda token: (
self.stream_handler.emit(token),
self._emit_output(token),
),
)
# 停止流式输出,返回是否已通过流式编辑发送了所有内容及最终文本
@@ -1004,7 +990,6 @@ class AgentManager:
)
try:
await self.process_message(
session_id=session_id,
user_id=user_id,

View File

@@ -124,34 +124,29 @@ Default memory file: {memory_file}
</agent_memory>
<memory_onboarding>
**IMPORTANT — First-time user detected!**
First-time user detected.
The memory directory is currently empty. This means this is likely the user's first interaction, or their preferences have been reset.
The memory directory is currently empty. This likely means the user has no saved long-term preferences yet.
**Your MANDATORY first action in this conversation:**
Before doing ANYTHING else (before answering questions, before calling tools, before performing any task), you MUST proactively greet the user warmly and ask them about their preferences so you can provide personalized service going forward. Specifically, ask about:
**Behavior requirements:**
- Do NOT interrupt the current task just to collect preferences.
- Do NOT proactively greet warmly, build rapport, or ask a long onboarding questionnaire.
- Default to a concise, professional style until the user states a preference.
- Only ask for preferences when they are directly useful for the current task, or when a short follow-up question at the end would clearly help future interactions.
1. **How to address the user** — Ask what name or nickname they'd like you to call them (e.g., a real name, a nickname, or a fun title). This is the top priority for building a personal connection.
2. **Communication style preference** — Do they prefer a cute/playful tone (with emojis), a formal/professional tone, a concise/minimalist style, or something else?
3. **Media preferences** — What types of media do they primarily care about? (e.g., movies, TV shows, anime, documentaries, etc.)
4. **Quality preferences** — Do they have preferred video quality (4K, 1080p), codecs (H.265, H.264), or subtitle language preferences?
5. **Any other special requests** — Anything else they'd like you to always keep in mind?
**What to collect when useful:**
- Preferred communication style
- Media interests
- Quality / codec / subtitle preferences
- Any standing rules the user wants you to follow
**After the user replies**, you MUST immediately:
1. Use the `write_file` tool to save ALL their preferences to the memory file at: `{memory_file}`
2. Format the memory file in clean Markdown with clear sections (e.g., `## User Profile`, `## Communication Style`, `## Media Preferences`, etc.)
3. The `## User Profile` section MUST include the user's preferred name/nickname at the top
4. Only AFTER saving the preferences, proceed to help with whatever the user originally asked about (if anything)
5. From this point on, always address the user by their preferred name/nickname in conversations
6. You may also create additional `.md` files in the memory directory (`{memory_dir}`) for different topics as needed.
**When the user provides lasting preferences**, you MUST promptly save them to `{memory_file}` using `write_file` or `edit_file`.
**If the user skips the preference questions** and directly asks you to do something:
- Go ahead and help them with their request first
- But still ask about their preferences naturally at the end of the interaction
- Save whatever you learn about them (implicit or explicit) to the memory file
**Example onboarding flow:**
The greeting should introduce yourself, explain this is the first meeting, and ask the above questions in a numbered list. Adapt the tone to your persona defined in the base system prompt.
**Memory format requirements:**
- Use clean Markdown with short sections.
- Record only durable preferences and working rules.
- Do NOT invent personal details or preferred names.
- Do NOT force use of a nickname or personalized greeting.
</memory_onboarding>
<memory_guidelines>

View File

@@ -15,9 +15,12 @@ Core Capabilities:
<communication>
{verbose_spec}
- Tone: friendly, concise. Like a knowledgeable friend, not a corporate bot.
- Use emojis sparingly (1-3 per response): greetings, completions, errors.
- Tone: professional, concise, restrained.
- Be direct. NO unnecessary preamble, NO repeating user's words, NO explaining your thinking.
- Prioritize task progress over conversation. Answer only what is necessary to move the task forward.
- Do NOT flatter the user, praise the question, or use overly eager/service-oriented phrases.
- Do NOT use emojis, exclamation marks, cute language, or excessive apology.
- Prefer short declarative sentences. Default to one or two short paragraphs; use lists only when they improve scanability.
- Use Markdown for structured data. Use `inline code` for media titles/paths.
- Include key details (year, rating, resolution) but do NOT over-explain.
- Do not stop for approval on read-only operations. Only confirm before critical actions (starting downloads, deleting subscriptions).
@@ -34,6 +37,7 @@ Core Capabilities:
- NO filler phrases like "Let me help you", "Here are the results", "I found..." — skip all unnecessary preamble.
- NO repeating what user said.
- NO narrating your internal reasoning.
- NO praise, emotional cushioning, or unnecessary politeness padding.
- After task completion: one line summary only.
- When error occurs: brief acknowledgment + suggestion, then move on.
</response_format>

View File

@@ -81,9 +81,7 @@ class MoviePilotTool(BaseTool, metaclass=ABCMeta):
if messages:
merged_message = "\n\n".join(messages)
await self.send_tool_message(merged_message)
else:
# 非VERBOSE重置缓冲区从头更新保持消息编辑能力
self._stream_handler.reset()
# 非VERBOSE不重置流保留已输出的模型思考文本
else:
# 未启用流式传输,不发送任何工具消息内容
pass

View File

@@ -106,7 +106,7 @@ class AskUserChoiceTool(MoviePilotTool):
):
return f"当前渠道 {channel.value} 不支持按钮选择"
max_per_row = ChannelCapabilityManager.get_max_buttons_per_row(channel)
max_per_row = 1
max_rows = ChannelCapabilityManager.get_max_button_rows(channel)
max_text_length = ChannelCapabilityManager.get_max_button_text_length(channel)
max_options = max_per_row * max_rows

View File

@@ -12,6 +12,7 @@ from anyio import Path as AsyncPath
from app.helper.sites import SitesHelper # noqa # noqa
from fastapi import APIRouter, Body, Depends, HTTPException, Header, Request, Response
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from app import schemas
from app.chain.mediaserver import MediaServerChain
@@ -29,14 +30,14 @@ from app.db.user_oper import (
get_current_active_superuser_async,
get_current_active_user_async,
)
from app.helper.llm import LLMHelper, LLMTestError, LLMTestTimeout
from app.helper.image import ImageHelper
from app.helper.llm import LLMHelper, LLMTestTimeout
from app.helper.mediaserver import MediaServerHelper
from app.helper.message import MessageHelper
from app.helper.progress import ProgressHelper
from app.helper.rule import RuleHelper
from app.helper.subscribe import SubscribeHelper
from app.helper.system import SystemHelper
from app.helper.image import ImageHelper
from app.log import logger
from app.scheduler import Scheduler
from app.schemas import ConfigChangeEventData
@@ -45,7 +46,6 @@ from app.utils.crypto import HashUtils
from app.utils.http import RequestUtils, AsyncRequestUtils
from app.utils.security import SecurityUtils
from app.utils.url import UrlUtils
from pydantic import BaseModel
from version import APP_VERSION
router = APIRouter()
@@ -57,7 +57,7 @@ class LlmTestRequest(BaseModel):
enabled: Optional[bool] = None
provider: Optional[str] = None
model: Optional[str] = None
disable_thinking: Optional[bool] = None
thinking_level: Optional[str] = None
api_key: Optional[str] = None
base_url: Optional[str] = None
@@ -269,74 +269,6 @@ def _build_nettest_rules() -> list[dict[str, Any]]:
return rules
def _build_llm_test_data(
duration_ms: Optional[int] = None,
provider: Optional[str] = None,
model: Optional[str] = None,
) -> dict[str, Any]:
"""
构造 LLM 测试接口的基础返回数据。
"""
data = {
"provider": provider if provider is not None else settings.LLM_PROVIDER,
"model": model if model is not None else settings.LLM_MODEL,
}
if duration_ms is not None:
data["duration_ms"] = duration_ms
return data
def _normalize_llm_test_value(
value: Optional[str], *, empty_as_none: bool = False
) -> Optional[str]:
"""
清理来自前端的 LLM 测试字段。
"""
if value is None:
return None
stripped = value.strip()
if empty_as_none and not stripped:
return None
return stripped
def _build_llm_test_snapshot(payload: Optional[LlmTestRequest] = None) -> dict[str, Any]:
"""
冻结当前 LLM 测试所需配置。
优先使用前端传入的临时参数;未传入时回退到已保存配置,兼容旧调用。
"""
provider = settings.LLM_PROVIDER
model = settings.LLM_MODEL
disable_thinking = bool(getattr(settings, "LLM_DISABLE_THINKING", False))
api_key = settings.LLM_API_KEY
base_url = settings.LLM_BASE_URL
enabled = bool(settings.AI_AGENT_ENABLE)
if payload:
if payload.enabled is not None:
enabled = bool(payload.enabled)
if payload.provider is not None:
provider = _normalize_llm_test_value(payload.provider) or ""
if payload.model is not None:
model = _normalize_llm_test_value(payload.model) or ""
if payload.disable_thinking is not None:
disable_thinking = bool(payload.disable_thinking)
if payload.api_key is not None:
api_key = _normalize_llm_test_value(payload.api_key, empty_as_none=True)
if payload.base_url is not None:
base_url = _normalize_llm_test_value(payload.base_url, empty_as_none=True)
return {
"enabled": enabled,
"provider": provider,
"model": model,
"disable_thinking": disable_thinking,
"api_key": api_key,
"base_url": base_url,
}
def _sanitize_llm_test_error(message: str, api_key: Optional[str] = None) -> str:
"""
清理错误信息中的敏感字段,避免回显密钥。
@@ -428,12 +360,12 @@ async def _close_nettest_response(response: Any) -> None:
async def fetch_image(
url: str,
proxy: Optional[bool] = None,
use_cache: bool = False,
if_none_match: Optional[str] = None,
cookies: Optional[str | dict] = None,
allowed_domains: Optional[set[str]] = None,
url: str,
proxy: Optional[bool] = None,
use_cache: bool = False,
if_none_match: Optional[str] = None,
cookies: Optional[str | dict] = None,
allowed_domains: Optional[set[str]] = None,
) -> Optional[Response]:
"""
处理图片缓存逻辑支持HTTP缓存和磁盘缓存
@@ -455,6 +387,7 @@ async def fetch_image(
use_cache=use_cache,
cookies=cookies,
)
if content:
# 检查 If-None-Match
etag = HashUtils.md5(content)
@@ -467,16 +400,17 @@ async def fetch_image(
media_type=UrlUtils.get_mime_type(url, "image/jpeg"),
headers=headers,
)
return None
@router.get("/img/{proxy}", summary="图片代理")
async def proxy_img(
imgurl: str,
proxy: bool = False,
cache: bool = False,
use_cookies: bool = False,
if_none_match: Annotated[str | None, Header()] = None,
_: schemas.TokenPayload = Depends(verify_resource_token),
imgurl: str,
proxy: bool = False,
cache: bool = False,
use_cookies: bool = False,
if_none_match: Annotated[str | None, Header()] = None,
_: schemas.TokenPayload = Depends(verify_resource_token),
) -> Response:
"""
图片代理,可选是否使用代理服务器,支持 HTTP 缓存
@@ -505,9 +439,9 @@ async def proxy_img(
@router.get("/cache/image", summary="图片缓存")
async def cache_img(
url: str,
if_none_match: Annotated[str | None, Header()] = None,
_: schemas.TokenPayload = Depends(verify_resource_token),
url: str,
if_none_match: Annotated[str | None, Header()] = None,
_: schemas.TokenPayload = Depends(verify_resource_token),
) -> Response:
"""
本地缓存图片文件,支持 HTTP 缓存,如果启用全局图片缓存,则使用磁盘缓存
@@ -601,7 +535,7 @@ async def get_env_setting(_: User = Depends(get_current_active_user_async)):
@router.post("/env", summary="更新系统配置", response_model=schemas.Response)
async def set_env_setting(
env: dict, _: User = Depends(get_current_active_superuser_async)
env: dict, _: User = Depends(get_current_active_superuser_async)
):
"""
更新系统环境变量(仅管理员)
@@ -636,9 +570,9 @@ async def set_env_setting(
@router.get("/progress/{process_type}", summary="实时进度")
async def get_progress(
request: Request,
process_type: str,
_: schemas.TokenPayload = Depends(verify_resource_token),
request: Request,
process_type: str,
_: schemas.TokenPayload = Depends(verify_resource_token),
):
"""
实时获取处理进度返回格式为SSE
@@ -673,9 +607,9 @@ async def get_setting(key: str, _: User = Depends(get_current_active_user_async)
@router.post("/setting/{key}", summary="更新系统设置", response_model=schemas.Response)
async def set_setting(
key: str,
value: Annotated[Union[list, dict, bool, int, str] | None, Body()] = None,
_: User = Depends(get_current_active_superuser_async),
key: str,
value: Annotated[Union[list, dict, bool, int, str] | None, Body()] = None,
_: User = Depends(get_current_active_superuser_async),
):
"""
更新系统设置(仅管理员)
@@ -709,10 +643,10 @@ async def set_setting(
@router.get("/llm-models", summary="获取LLM模型列表", response_model=schemas.Response)
async def get_llm_models(
provider: str,
api_key: str,
base_url: Optional[str] = None,
_: User = Depends(get_current_active_user_async),
provider: str,
api_key: str,
base_url: Optional[str] = None,
_: User = Depends(get_current_active_user_async),
):
"""
获取LLM模型列表
@@ -728,28 +662,33 @@ async def get_llm_models(
@router.post("/llm-test", summary="测试LLM调用", response_model=schemas.Response)
async def llm_test(
payload: Annotated[Optional[LlmTestRequest], Body()] = None,
_: User = Depends(get_current_active_superuser_async),
payload: Annotated[Optional[LlmTestRequest], Body()] = None,
_: User = Depends(get_current_active_superuser_async),
):
"""
使用传入配置或当前已保存配置执行一次最小 LLM 调用。
"""
snapshot = _build_llm_test_snapshot(payload)
data = _build_llm_test_data(
provider=snapshot["provider"],
model=snapshot["model"],
)
if not snapshot["enabled"]:
if not payload:
return schemas.Response(success=False, message="请配置智能助手LLM相关参数后再进行测试")
if not payload.provider or not payload.model:
return schemas.Response(success=False, message="请配置LLM提供商和模型")
data = {
"provider": payload.provider,
"model": payload.model,
}
if not payload.enabled:
return schemas.Response(success=False, message="请先启用智能助手", data=data)
if not snapshot["api_key"]:
if not payload.api_key or not payload.api_key.strip():
return schemas.Response(
success=False,
message="请先配置 LLM API Key",
data=data,
)
if not (snapshot["model"] or "").strip():
if not payload.model or not payload.model.strip():
return schemas.Response(
success=False,
message="请先配置 LLM 模型",
@@ -758,50 +697,36 @@ async def llm_test(
try:
result = await LLMHelper.test_current_settings(
provider=snapshot["provider"],
model=snapshot["model"],
disable_thinking=snapshot["disable_thinking"],
api_key=snapshot["api_key"],
base_url=snapshot["base_url"],
provider=payload.provider,
model=payload.model,
thinking_level=payload.thinking_level,
api_key=payload.api_key,
base_url=payload.base_url,
)
if not result.get("reply_preview"):
return schemas.Response(
success=False,
message="模型响应为空",
data=_build_llm_test_data(
result.get("duration_ms"),
provider=snapshot["provider"],
model=snapshot["model"],
),
message="模型响应为空"
)
return schemas.Response(success=True, data=result)
except (LLMTestTimeout, TimeoutError) as err:
logger.warning(err)
return schemas.Response(
success=False,
message="LLM 调用超时",
data=_build_llm_test_data(
getattr(err, "duration_ms", None),
provider=snapshot["provider"],
model=snapshot["model"],
),
message="LLM 调用超时"
)
except Exception as err:
return schemas.Response(
success=False,
message=_sanitize_llm_test_error(str(err), snapshot["api_key"]),
data=_build_llm_test_data(
getattr(err, "duration_ms", None),
provider=snapshot["provider"],
model=snapshot["model"],
),
message=_sanitize_llm_test_error(str(err), payload.api_key)
)
@router.get("/message", summary="实时消息")
async def get_message(
request: Request,
role: Optional[str] = "system",
_: schemas.TokenPayload = Depends(verify_resource_token),
request: Request,
role: Optional[str] = "system",
_: schemas.TokenPayload = Depends(verify_resource_token),
):
"""
实时获取系统消息返回格式为SSE
@@ -824,10 +749,10 @@ async def get_message(
@router.get("/logging", summary="实时日志")
async def get_logging(
request: Request,
length: Optional[int] = 50,
logfile: Optional[str] = "moviepilot.log",
_: schemas.TokenPayload = Depends(verify_resource_token),
request: Request,
length: Optional[int] = 50,
logfile: Optional[str] = "moviepilot.log",
_: schemas.TokenPayload = Depends(verify_resource_token),
):
"""
实时获取系统日志
@@ -838,7 +763,7 @@ async def get_logging(
log_path = base_path / logfile
if not await SecurityUtils.async_is_safe_path(
base_path=base_path, user_path=log_path, allowed_suffixes={".log"}
base_path=base_path, user_path=log_path, allowed_suffixes={".log"}
):
raise HTTPException(status_code=404, detail="Not Found")
@@ -855,7 +780,7 @@ async def get_logging(
# 读取历史日志
async with aiofiles.open(
log_path, mode="r", encoding="utf-8", errors="ignore"
log_path, mode="r", encoding="utf-8", errors="ignore"
) as f:
# 优化大文件读取策略
if file_size > 100 * 1024:
@@ -867,7 +792,7 @@ async def get_logging(
# 找到第一个完整的行
first_newline = content.find("\n")
if first_newline != -1:
content = content[first_newline + 1 :]
content = content[first_newline + 1:]
else:
# 小文件直接读取全部内容
content = await f.read()
@@ -875,7 +800,7 @@ async def get_logging(
# 按行分割并添加到队列,只保留非空行
lines = [line.strip() for line in content.splitlines() if line.strip()]
# 只取最后N行
for line in lines[-max(length, 50) :]:
for line in lines[-max(length, 50):]:
lines_queue.append(line)
# 输出历史日志
@@ -884,7 +809,7 @@ async def get_logging(
# 实时监听新日志
async with aiofiles.open(
log_path, mode="r", encoding="utf-8", errors="ignore"
log_path, mode="r", encoding="utf-8", errors="ignore"
) as f:
# 移动文件指针到文件末尾,继续监听新增内容
await f.seek(0, 2)
@@ -923,7 +848,7 @@ async def get_logging(
try:
# 使用 aiofiles 异步读取文件
async with aiofiles.open(
log_path, mode="r", encoding="utf-8", errors="ignore"
log_path, mode="r", encoding="utf-8", errors="ignore"
) as file:
text = await file.read()
# 倒序输出
@@ -955,10 +880,10 @@ async def latest_version(_: schemas.TokenPayload = Depends(verify_token)):
@router.get("/ruletest", summary="过滤规则测试", response_model=schemas.Response)
def ruletest(
title: str,
rulegroup_name: str,
subtitle: Optional[str] = None,
_: schemas.TokenPayload = Depends(verify_token),
title: str,
rulegroup_name: str,
subtitle: Optional[str] = None,
_: schemas.TokenPayload = Depends(verify_token),
):
"""
过滤规则测试,规则类型 1-订阅2-洗版3-搜索
@@ -1013,11 +938,10 @@ async def nettest_targets(_: schemas.TokenPayload = Depends(verify_token)):
@router.get("/nettest", summary="测试网络连通性")
async def nettest(
target_id: Optional[str] = None,
url: Optional[str] = None,
proxy: Optional[bool] = None,
include: Optional[str] = None,
_: schemas.TokenPayload = Depends(verify_token),
target_id: Optional[str] = None,
url: Optional[str] = None,
include: Optional[str] = None,
_: schemas.TokenPayload = Depends(verify_token),
):
"""
测试内置目标的网络连通性。

View File

@@ -505,8 +505,8 @@ class ConfigModel(BaseModel):
LLM_PROVIDER: str = "deepseek"
# LLM模型名称
LLM_MODEL: str = "deepseek-chat"
# 是否尽量关闭模型的思考/推理能力(按各 provider/model 支持情况自动适配)
LLM_DISABLE_THINKING: bool = True
# 思考模式/深度配置off/auto/minimal/low/medium/high/max/xhigh
LLM_THINKING_LEVEL: Optional[str] = 'off'
# LLM是否支持图片输入开启后消息图片会按多模态输入发送给模型
LLM_SUPPORT_IMAGE_INPUT: bool = True
# LLM API密钥

View File

@@ -2,9 +2,13 @@
import asyncio
import inspect
import json
import time
from functools import wraps
from typing import Any, List
from langchain_core.messages import AIMessage
from app.core.config import settings
from app.log import logger
@@ -70,21 +74,120 @@ def _get_httpx_proxy_key() -> str:
if "proxy" in params:
return "proxy"
return "proxies"
except Exception:
except Exception as e:
logger.warning(f"检测 httpx 代理参数失败,默认使用 'proxies'{e}")
return "proxies"
def _deepseek_thinking_toggle(extra_body: Any) -> bool | None:
"""
解析 DeepSeek extra_body 中显式传入的 thinking 开关。
"""
if not isinstance(extra_body, dict):
return None
thinking = extra_body.get("thinking")
if not isinstance(thinking, dict):
return None
thinking_type = str(thinking.get("type") or "").strip().lower()
if thinking_type == "enabled":
return True
if thinking_type == "disabled":
return False
return None
def _is_deepseek_thinking_enabled(model_name: str | None, extra_body: Any) -> bool:
"""
判断本次 DeepSeek 调用是否处于 thinking mode。
"""
explicit_toggle = _deepseek_thinking_toggle(extra_body)
if explicit_toggle is not None:
return explicit_toggle
normalized_model_name = str(model_name or "").strip().lower()
if normalized_model_name == "deepseek-reasoner":
return True
if normalized_model_name.startswith("deepseek-v4-"):
# DeepSeek V4 默认启用 thinking mode除非显式关闭。
return True
return False
def _patch_deepseek_reasoning_content_support():
"""
修补 langchain-deepseek 在 tool-call 场景下遗漏 reasoning_content 回传的问题。
DeepSeek thinking mode 要求:若 assistant 历史消息包含 tool_calls
后续请求中必须带回该条消息的顶层 reasoning_content。
某些 langchain-deepseek 版本虽然能从响应中拿到 reasoning_content
但不会在重放消息历史时写回请求载荷,导致 400。
"""
try:
from langchain_deepseek import ChatDeepSeek
except Exception as err:
logger.debug(f"跳过 langchain-deepseek reasoning_content 修补:{err}")
return
if getattr(ChatDeepSeek, "_moviepilot_reasoning_content_patched", False):
return
original_get_request_payload = getattr(ChatDeepSeek, "_get_request_payload", None)
if not callable(original_get_request_payload):
logger.warning("langchain-deepseek 缺少 _get_request_payload无法修补 reasoning_content")
return
@wraps(original_get_request_payload)
def _patched_get_request_payload(self, input_, *, stop=None, **kwargs):
payload = original_get_request_payload(self, input_, stop=stop, **kwargs)
# Resolve original messages so we can extract reasoning_content from
# additional_kwargs. The parent's payload builder does not propagate
# this DeepSeek-specific field.
messages = self._convert_input(input_).to_messages()
for i, message in enumerate(payload["messages"]):
if message["role"] == "tool" and isinstance(message["content"], list):
message["content"] = json.dumps(message["content"])
elif message["role"] == "assistant":
if isinstance(message["content"], list):
# DeepSeek API expects assistant content to be a string,
# not a list. Extract text blocks and join them, or use
# empty string if none exist.
text_parts = [
block.get("text", "")
for block in message["content"]
if isinstance(block, dict) and block.get("type") == "text"
]
message["content"] = "".join(text_parts) if text_parts else ""
# DeepSeek reasoning models require every assistant message to
# carry a reasoning_content field (even when empty). The value
# is stored in AIMessage.additional_kwargs by
# _create_chat_result(); re-inject it into the API payload.
if (
"reasoning_content" not in message
and i < len(messages)
and isinstance(messages[i], AIMessage)
):
message["reasoning_content"] = messages[i].additional_kwargs.get(
"reasoning_content", ""
)
return payload
ChatDeepSeek._get_request_payload = _patched_get_request_payload
ChatDeepSeek._moviepilot_reasoning_content_patched = True
logger.debug("已修补 langchain-deepseek thinking tool-call 的 reasoning_content 回传兼容性")
class LLMHelper:
"""LLM模型相关辅助功能"""
@staticmethod
def _should_disable_thinking(disable_thinking: bool | None = None) -> bool:
"""
判断本次调用是否应尝试关闭模型思考能力。
"""
if disable_thinking is not None:
return bool(disable_thinking)
return bool(getattr(settings, "LLM_DISABLE_THINKING", False))
_SUPPORTED_THINKING_LEVELS = frozenset(
{"off", "auto", "minimal", "low", "medium", "high", "max", "xhigh"}
)
@staticmethod
def _normalize_model_name(model_name: str | None) -> str:
@@ -94,48 +197,164 @@ class LLMHelper:
return (model_name or "").strip().lower()
@classmethod
def _build_disabled_thinking_kwargs(
cls,
provider: str,
model: str | None,
disable_thinking: bool | None = None,
def _normalize_deepseek_reasoning_effort(
cls, thinking_level: str | None = None
) -> str | None:
"""
DeepSeek 文档当前建议使用 high/max兼容常见 effort 别名。
"""
if not thinking_level or thinking_level in {"off", "auto"}:
return None
if thinking_level in {"minimal", "low", "medium", "high"}:
return "high"
if thinking_level in {"max", "xhigh"}:
return "max"
logger.warning(f"忽略不支持的 DeepSeek reasoning_effort 配置: {thinking_level}")
return None
@classmethod
def _normalize_openai_reasoning_effort(
cls, thinking_level: str | None = None
) -> str | None:
"""
OpenAI reasoning_effort 支持更细粒度的 effort统一做最近似映射。
"""
if not thinking_level or thinking_level == "auto":
return None
if thinking_level == "off":
return "none"
if thinking_level == "max":
return "xhigh"
return thinking_level
@classmethod
def _build_google_thinking_kwargs(
cls, model_name: str, thinking_level: str
) -> dict[str, Any]:
"""
按 provider/model 生成“禁用思考”相关参数
优先使用 LangChain/OpenAI SDK 已支持的原生字段;仅在 provider
明确要求自定义请求体时,才回退到 extra_body。
Gemini 3 使用 thinking_levelGemini 2.5 使用 thinking_budget
"""
if not cls._should_disable_thinking(disable_thinking):
if not model_name or thinking_level == "auto":
return {}
provider_name = (provider or "").strip().lower()
model_name = cls._normalize_model_name(model)
if not model_name:
return {}
# Moonshot Kimi K2.5/K2.6 需要在请求体显式声明 thinking.disabled。
if model_name.startswith(("kimi-k2.5", "kimi-k2.6")):
return {"extra_body": {"thinking": {"type": "disabled"}}}
# OpenAI 原生推理模型优先走 LangChain 内置 reasoning_effort。
if provider_name == "openai" and model_name.startswith(
("gpt-5", "o1", "o3", "o4")
):
return {"reasoning_effort": "none"}
# Gemini 使用 google-genai / langchain-google-genai 内置思考控制参数。
if provider_name == "google":
if "gemini-2.5" in model_name:
if "gemini-2.5" in model_name:
if thinking_level == "off":
if "pro" in model_name:
# Gemini 2.5 Pro 官方不支持完全关闭思考,回退到最小预算。
return {
"thinking_budget": 128,
"include_thoughts": False,
}
return {
"thinking_budget": 0,
"include_thoughts": False,
}
if "gemini-3" in model_name:
return {
"thinking_level": "minimal",
budget_map = {
"minimal": 512,
"low": 1024,
"medium": 4096,
"high": 8192,
"max": 24576,
"xhigh": 24576,
}
budget = budget_map.get(thinking_level)
return (
{
"thinking_budget": budget,
"include_thoughts": False,
}
if budget is not None
else {}
)
if "gemini-3" in model_name:
level_map = {
"off": "minimal",
"minimal": "minimal",
"low": "low",
"medium": "medium",
"high": "high",
"max": "high",
"xhigh": "high",
}
google_level = level_map.get(thinking_level)
return (
{
"thinking_level": google_level,
"include_thoughts": False,
}
if google_level
else {}
)
return {}
@classmethod
def _build_kimi_thinking_kwargs(
cls, model_name: str, thinking_level: str
) -> dict[str, Any]:
"""
Kimi 当前公开文档仅支持思考开关,不支持显式深度调节。
"""
if model_name.startswith("kimi-k2-thinking"):
return {}
if thinking_level == "off":
return {"extra_body": {"thinking": {"type": "disabled"}}}
return {}
@classmethod
def _build_thinking_kwargs(
cls,
provider: str,
model: str | None,
thinking_level: str | None = None
) -> dict[str, Any]:
"""
按 provider/model 生成思考模式相关参数。
优先使用 LangChain/OpenAI SDK 已支持的原生字段;仅在 provider
明确要求自定义请求体时,才回退到 extra_body。
"""
provider_name = (provider or "").strip().lower()
model_name = cls._normalize_model_name(model)
if provider_name == "deepseek":
if thinking_level == "off":
return {"extra_body": {"thinking": {"type": "disabled"}}}
if thinking_level == "auto":
return {}
kwargs: dict[str, Any] = {"extra_body": {"thinking": {"type": "enabled"}}}
deepseek_effort = cls._normalize_deepseek_reasoning_effort(
thinking_level
)
if deepseek_effort:
kwargs["reasoning_effort"] = deepseek_effort
return kwargs
if model_name.startswith(("kimi-k2.5", "kimi-k2.6", "kimi-k2-thinking")):
return cls._build_kimi_thinking_kwargs(model_name, thinking_level)
if not model_name:
return {}
# OpenAI 原生推理模型优先走 LangChain 内置 reasoning_effort。
if provider_name == "openai" and model_name.startswith(
("gpt-5", "o1", "o3", "o4")
):
openai_effort = cls._normalize_openai_reasoning_effort(
thinking_level
)
return {"reasoning_effort": openai_effort} if openai_effort else {}
# Gemini 使用 google-genai / langchain-google-genai 内置思考控制参数。
if provider_name == "google":
return cls._build_google_thinking_kwargs(
model_name, thinking_level
)
return {}
@@ -148,16 +367,26 @@ class LLMHelper:
@staticmethod
def get_llm(
streaming: bool = False,
provider: str | None = None,
model: str | None = None,
disable_thinking: bool | None = None,
api_key: str | None = None,
base_url: str | None = None,
streaming: bool = False,
provider: str | None = None,
model: str | None = None,
thinking_level: str | None = None,
api_key: str | None = None,
base_url: str | None = None,
):
"""
获取LLM实例
:param streaming: 是否启用流式输出
:param provider: LLM提供商默认为配置项LLM_PROVIDER
:param model: 模型名称默认为配置项LLM_MODEL
:param thinking_level: 思考模式级别,默认为 None即自动判断
是否启用思考模式)。支持的级别包括 "off"(关闭)、"auto"(自动)、"minimal""low""medium""high""max"/"xhigh"(最大)。
不同模型对思考模式的支持和表现不同,具体映射关系请
参考代码实现。对于不支持思考模式的模型,该参数将被忽略。
:param api_key: API Key默认为
配置项LLM_API_KEY。对于某些提供商
如 DeepSeek可能需要同时提供 base_url。
:param base_url: API Base URL默认为配置项LLM_BASE_URL。
:return: LLM实例
"""
provider_name = str(
@@ -166,10 +395,10 @@ class LLMHelper:
model_name = model if model is not None else settings.LLM_MODEL
api_key_value = api_key if api_key is not None else settings.LLM_API_KEY
base_url_value = base_url if base_url is not None else settings.LLM_BASE_URL
thinking_kwargs = LLMHelper._build_disabled_thinking_kwargs(
thinking_kwargs = LLMHelper._build_thinking_kwargs(
provider=provider_name,
model=model_name,
disable_thinking=disable_thinking,
thinking_level=thinking_level
)
if not api_key_value:
@@ -201,9 +430,11 @@ class LLMHelper:
elif provider_name == "deepseek":
from langchain_deepseek import ChatDeepSeek
_patch_deepseek_reasoning_content_support()
model = ChatDeepSeek(
model=model_name,
api_key=api_key_value,
api_base=base_url_value,
max_retries=3,
temperature=settings.LLM_TEMPERATURE,
streaming=streaming,
@@ -231,7 +462,7 @@ class LLMHelper:
else:
model.profile = {
"max_input_tokens": settings.LLM_MAX_CONTEXT_TOKENS
* 1000, # 转换为token单位
* 1000, # 转换为token单位
}
return model
@@ -255,10 +486,10 @@ class LLMHelper:
if isinstance(block, dict) or hasattr(block, "get"):
block_type = block.get("type")
if block.get("thought") or block_type in (
"thinking",
"reasoning_content",
"reasoning",
"thought",
"thinking",
"reasoning_content",
"reasoning",
"thought",
):
continue
if block_type == "text":
@@ -278,13 +509,13 @@ class LLMHelper:
@staticmethod
async def test_current_settings(
prompt: str = "请只回复 OK",
timeout: int = 20,
provider: str | None = None,
model: str | None = None,
disable_thinking: bool | None = None,
api_key: str | None = None,
base_url: str | None = None,
prompt: str = "请只回复 OK",
timeout: int = 20,
provider: str | None = None,
model: str | None = None,
thinking_level: str | None = None,
api_key: str | None = None,
base_url: str | None = None,
) -> dict:
"""
使用当前已保存配置执行一次最小 LLM 调用。
@@ -298,7 +529,7 @@ class LLMHelper:
streaming=False,
provider=provider_name,
model=model_name,
disable_thinking=disable_thinking,
thinking_level=thinking_level,
api_key=api_key_value,
base_url=base_url_value,
)
@@ -326,7 +557,7 @@ class LLMHelper:
return data
def get_models(
self, provider: str, api_key: str, base_url: str = None
self, provider: str, api_key: str, base_url: str = None
) -> List[str]:
"""获取模型列表"""
logger.info(f"获取 {provider} 模型列表...")
@@ -364,7 +595,7 @@ class LLMHelper:
@staticmethod
def _get_openai_compatible_models(
provider: str, api_key: str, base_url: str = None
provider: str, api_key: str, base_url: str = None
) -> List[str]:
"""获取OpenAI兼容模型列表"""
try:

View File

@@ -76,14 +76,14 @@ pympler~=1.1
smbprotocol~=1.15.0
setproctitle~=1.3.6
httpx[socks]~=0.28.1
langchain~=1.2.13
langchain-core~=1.2.20
langchain~=1.2.15
langchain-core~=1.3.2
langchain-community~=0.4.1
langchain-openai~=1.1.11
langchain-google-genai~=4.2.1
langchain-openai~=1.2.1
langchain-google-genai~=4.2.2
langchain-deepseek~=1.0.1
langgraph~=1.1.3
openai~=2.29.0
google-genai~=1.68.0
langgraph~=1.1.9
openai~=2.32.0
google-genai~=1.73.1
ddgs~=9.10.0
websocket-client~=1.8.0

View File

@@ -1063,6 +1063,32 @@ def _prompt_choice(label: str, choices: dict[str, str], default: str) -> str:
print("请输入列表中的可选值。")
def _env_llm_thinking_level_default() -> str:
value = _normalize_choice(_env_default("LLM_THINKING_LEVEL", ""))
alias_map = {
"none": "off",
"disabled": "off",
"disable": "off",
"enabled": "auto",
"enable": "auto",
"default": "auto",
"dynamic": "auto",
}
normalized = alias_map.get(value, value)
if normalized in {
"off",
"auto",
"minimal",
"low",
"medium",
"high",
"max",
"xhigh",
}:
return normalized
return "auto"
def _prompt_path(label: str, *, default: Path, allow_empty: bool = False) -> str:
value = _prompt_text(label, default=str(default), allow_empty=allow_empty)
if not value:
@@ -1476,9 +1502,19 @@ def _collect_agent_config() -> dict[str, Any]:
current_value=read_env_value("LLM_API_KEY"),
required=True,
),
"LLM_DISABLE_THINKING": _prompt_yes_no(
"是否尽量关闭模型思考/推理",
default=_env_bool("LLM_DISABLE_THINKING", False),
"LLM_THINKING_LEVEL": _prompt_choice(
"LLM 思考模式/深度",
choices={
"off": "关闭思考",
"auto": "自动",
"minimal": "最小",
"low": "",
"medium": "",
"high": "",
"max": "极高",
"xhigh": "超高",
},
default=_env_llm_thinking_level_default(),
),
"LLM_SUPPORT_IMAGE_INPUT": _prompt_yes_no(
"是否启用图片输入支持",
@@ -1506,7 +1542,7 @@ def _load_auth_site_definitions_inner() -> dict[str, Any]:
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
from app.helper.sites import SitesHelper
from app.helper.sites import SitesHelper # noqa
auth_sites = SitesHelper().get_authsites() or {}
definitions: dict[str, Any] = {}
@@ -1843,7 +1879,7 @@ def _apply_local_system_config_inner(config_payload: dict[str, Any]) -> None:
):
system_config.set(SystemConfigKey.UserSiteAuthParams, site_auth_item)
try:
from app.helper.sites import SitesHelper
from app.helper.sites import SitesHelper # noqa
status, msg = SitesHelper().check_user(
site_auth_item.get("site"), site_auth_item.get("params")

View File

@@ -0,0 +1,22 @@
import unittest
from app.agent.middleware.memory import MEMORY_ONBOARDING_PROMPT
from app.agent.prompt import prompt_manager
class TestAgentPromptStyle(unittest.TestCase):
def test_agent_prompt_enforces_concise_professional_style(self):
prompt = prompt_manager.get_agent_prompt()
self.assertIn("professional, concise, restrained", prompt)
self.assertIn("Do NOT flatter the user", prompt)
self.assertIn("NO praise, emotional cushioning", prompt)
def test_memory_onboarding_does_not_force_warm_intro(self):
self.assertIn("Do NOT interrupt the current task", MEMORY_ONBOARDING_PROMPT)
self.assertIn("Do NOT proactively greet warmly", MEMORY_ONBOARDING_PROMPT)
self.assertNotIn("greet the user warmly", MEMORY_ONBOARDING_PROMPT)
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,144 @@
import importlib.util
import sys
import unittest
from pathlib import Path
from types import ModuleType
from langchain_core.messages import AIMessage, HumanMessage, ToolMessage
def _stub_module(name: str, **attrs):
module = sys.modules.get(name)
if module is None:
module = ModuleType(name)
sys.modules[name] = module
for key, value in attrs.items():
setattr(module, key, value)
return module
class _DummyLogger:
def __getattr__(self, _name):
return lambda *args, **kwargs: None
def _build_tool_call(name: str = "search", arguments: str = "{}"):
return [
{
"id": "call_1",
"type": "tool_call",
"name": name,
"args": {},
}
]
class _FakeChatDeepSeek:
def __init__(self, model_name: str, model_kwargs: dict | None = None):
self.model_name = model_name
self.model_kwargs = model_kwargs or {}
def _get_request_payload(self, input_, *, stop=None, **kwargs):
messages = []
for message in input_:
payload_message = {
"role": message.type,
"content": message.content,
}
if message.type == "human":
payload_message["role"] = "user"
elif message.type == "ai":
payload_message["role"] = "assistant"
tool_calls = getattr(message, "tool_calls", None)
if tool_calls:
payload_message["tool_calls"] = tool_calls
elif message.type == "tool":
payload_message["role"] = "tool"
payload_message["tool_call_id"] = message.tool_call_id
messages.append(payload_message)
return {"messages": messages}
_ORIGINAL_GET_REQUEST_PAYLOAD = _FakeChatDeepSeek._get_request_payload
sys.modules.pop("app.helper.llm", None)
_stub_module(
"app.core.config",
settings=ModuleType("settings"),
)
sys.modules["app.core.config"].settings.LLM_PROVIDER = "deepseek"
sys.modules["app.core.config"].settings.LLM_MODEL = "deepseek-v4-pro"
sys.modules["app.core.config"].settings.LLM_API_KEY = "sk-test"
sys.modules["app.core.config"].settings.LLM_BASE_URL = "https://api.deepseek.com"
sys.modules["app.core.config"].settings.LLM_THINKING_LEVEL = None
sys.modules["app.core.config"].settings.LLM_TEMPERATURE = 0.1
sys.modules["app.core.config"].settings.LLM_MAX_CONTEXT_TOKENS = 64
sys.modules["app.core.config"].settings.PROXY_HOST = None
_stub_module("app.log", logger=_DummyLogger())
_stub_module("langchain_deepseek", ChatDeepSeek=_FakeChatDeepSeek)
module_path = Path(__file__).resolve().parents[1] / "app" / "helper" / "llm.py"
spec = importlib.util.spec_from_file_location("test_llm_module_for_deepseek_compat", module_path)
llm_module = importlib.util.module_from_spec(spec)
assert spec and spec.loader
spec.loader.exec_module(llm_module)
class DeepSeekCompatPatchTest(unittest.TestCase):
def setUp(self):
_FakeChatDeepSeek._get_request_payload = _ORIGINAL_GET_REQUEST_PAYLOAD
if hasattr(_FakeChatDeepSeek, "_moviepilot_reasoning_content_patched"):
delattr(_FakeChatDeepSeek, "_moviepilot_reasoning_content_patched")
llm_module._patch_deepseek_reasoning_content_support()
def test_injects_reasoning_content_for_assistant_tool_calls(self):
llm = _FakeChatDeepSeek("deepseek-v4-pro")
messages = [
HumanMessage(content="天气如何?"),
AIMessage(
content="",
tool_calls=_build_tool_call(),
additional_kwargs={"reasoning_content": "先调用天气工具"},
),
ToolMessage(content="晴天", tool_call_id="call_1"),
]
payload = llm._get_request_payload(messages)
self.assertEqual(
payload["messages"][1]["reasoning_content"],
"先调用天气工具",
)
def test_falls_back_to_empty_reasoning_content_when_missing(self):
llm = _FakeChatDeepSeek("deepseek-v4-flash")
messages = [
HumanMessage(content="天气如何?"),
AIMessage(content="", tool_calls=_build_tool_call()),
ToolMessage(content="晴天", tool_call_id="call_1"),
]
payload = llm._get_request_payload(messages)
self.assertIn("reasoning_content", payload["messages"][1])
self.assertEqual(payload["messages"][1]["reasoning_content"], "")
def test_skips_injection_when_thinking_is_disabled(self):
llm = _FakeChatDeepSeek(
"deepseek-v4-pro",
model_kwargs={"extra_body": {"thinking": {"type": "disabled"}}},
)
messages = [
HumanMessage(content="天气如何?"),
AIMessage(
content="",
tool_calls=_build_tool_call(),
additional_kwargs={"reasoning_content": "先调用天气工具"},
),
ToolMessage(content="晴天", tool_call_id="call_1"),
]
payload = llm._get_request_payload(messages)
self.assertNotIn("reasoning_content", payload["messages"][1])

View File

@@ -38,7 +38,7 @@ _stub_module(
LLM_MODEL="global-model",
LLM_API_KEY="global-key",
LLM_BASE_URL="https://global.example.com",
LLM_DISABLE_THINKING=False,
LLM_THINKING_LEVEL=None,
LLM_TEMPERATURE=0.1,
LLM_MAX_CONTEXT_TOKENS=64,
PROXY_HOST=None,
@@ -83,7 +83,9 @@ class LlmHelperTestCallTest(unittest.TestCase):
streaming=False,
provider="deepseek",
model="deepseek-chat",
thinking_level=None,
disable_thinking=None,
reasoning_effort=None,
api_key="sk-test",
base_url="https://api.deepseek.com",
)
@@ -138,7 +140,77 @@ class LlmHelperTestCallTest(unittest.TestCase):
{"thinking": {"type": "disabled"}},
)
def test_get_llm_uses_openai_reasoning_effort_none(self):
def test_get_llm_uses_deepseek_thinking_level_controls(self):
calls = []
patch_calls = []
class _FakeChatDeepSeek:
def __init__(self, **kwargs):
calls.append(kwargs)
self.model = kwargs["model"]
self.profile = None
with patch.dict(
sys.modules,
{"langchain_deepseek": SimpleNamespace(ChatDeepSeek=_FakeChatDeepSeek)},
), patch.object(
llm_module,
"_patch_deepseek_reasoning_content_support",
side_effect=lambda: patch_calls.append(True),
):
llm_module.LLMHelper.get_llm(
provider="deepseek",
model="deepseek-v4-pro",
thinking_level="xhigh",
api_key="sk-test",
base_url="https://api.deepseek.com",
)
self.assertEqual(len(calls), 1)
self.assertEqual(
calls[0].get("extra_body"),
{"thinking": {"type": "enabled"}},
)
self.assertEqual(patch_calls, [True])
self.assertEqual(calls[0].get("reasoning_effort"), "max")
self.assertEqual(calls[0].get("api_base"), "https://api.deepseek.com")
def test_get_llm_disables_deepseek_thinking_via_thinking_level(self):
calls = []
patch_calls = []
class _FakeChatDeepSeek:
def __init__(self, **kwargs):
calls.append(kwargs)
self.model = kwargs["model"]
self.profile = None
with patch.dict(
sys.modules,
{"langchain_deepseek": SimpleNamespace(ChatDeepSeek=_FakeChatDeepSeek)},
), patch.object(
llm_module,
"_patch_deepseek_reasoning_content_support",
side_effect=lambda: patch_calls.append(True),
):
llm_module.LLMHelper.get_llm(
provider="deepseek",
model="deepseek-v4-flash",
thinking_level="off",
api_key="sk-test",
base_url="https://proxy.example.com",
)
self.assertEqual(len(calls), 1)
self.assertEqual(
calls[0].get("extra_body"),
{"thinking": {"type": "disabled"}},
)
self.assertEqual(patch_calls, [True])
self.assertIsNone(calls[0].get("reasoning_effort"))
self.assertEqual(calls[0].get("api_base"), "https://proxy.example.com")
def test_get_llm_uses_openai_reasoning_effort_none_for_off(self):
calls = []
class _FakeChatOpenAI:
@@ -154,7 +226,7 @@ class LlmHelperTestCallTest(unittest.TestCase):
llm_module.LLMHelper.get_llm(
provider="openai",
model="gpt-5-mini",
disable_thinking=True,
thinking_level="off",
api_key="sk-test",
base_url="https://api.openai.com/v1",
)
@@ -162,6 +234,30 @@ class LlmHelperTestCallTest(unittest.TestCase):
self.assertEqual(len(calls), 1)
self.assertEqual(calls[0].get("reasoning_effort"), "none")
def test_get_llm_maps_unified_max_to_openai_xhigh(self):
calls = []
class _FakeChatOpenAI:
def __init__(self, **kwargs):
calls.append(kwargs)
self.model = kwargs["model"]
self.profile = None
with patch.dict(
sys.modules,
{"langchain_openai": SimpleNamespace(ChatOpenAI=_FakeChatOpenAI)},
):
llm_module.LLMHelper.get_llm(
provider="openai",
model="gpt-5.4",
thinking_level="max",
api_key="sk-test",
base_url="https://api.openai.com/v1",
)
self.assertEqual(len(calls), 1)
self.assertEqual(calls[0].get("reasoning_effort"), "xhigh")
def test_get_llm_uses_gemini_builtin_thinking_controls(self):
calls = []
@@ -182,7 +278,7 @@ class LlmHelperTestCallTest(unittest.TestCase):
llm_module.LLMHelper.get_llm(
provider="google",
model="gemini-2.5-flash",
disable_thinking=True,
thinking_level="off",
api_key="sk-test",
base_url=None,
)
@@ -191,6 +287,35 @@ class LlmHelperTestCallTest(unittest.TestCase):
self.assertEqual(calls[0].get("thinking_budget"), 0)
self.assertFalse(calls[0].get("include_thoughts"))
def test_get_llm_uses_gemini_3_thinking_level_controls(self):
calls = []
class _FakeChatGoogleGenerativeAI:
def __init__(self, **kwargs):
calls.append(kwargs)
self.model = kwargs["model"]
self.profile = None
with patch.dict(
sys.modules,
{
"langchain_google_genai": SimpleNamespace(
ChatGoogleGenerativeAI=_FakeChatGoogleGenerativeAI
)
},
):
llm_module.LLMHelper.get_llm(
provider="google",
model="gemini-3.1-flash",
thinking_level="xhigh",
api_key="sk-test",
base_url=None,
)
self.assertEqual(len(calls), 1)
self.assertEqual(calls[0].get("thinking_level"), "high")
self.assertFalse(calls[0].get("include_thoughts"))
if __name__ == "__main__":
unittest.main()

View File

@@ -119,6 +119,8 @@ class LlmTestEndpointTest(unittest.TestCase):
with patch.object(system_endpoint.settings, "AI_AGENT_ENABLE", True), patch.object(
system_endpoint.settings, "LLM_PROVIDER", "deepseek"
), patch.object(system_endpoint.settings, "LLM_MODEL", "deepseek-chat"), patch.object(
system_endpoint.settings, "LLM_THINKING_LEVEL", "max"
), patch.object(
system_endpoint.settings, "LLM_API_KEY", "sk-test"
), patch.object(
system_endpoint.settings, "LLM_BASE_URL", "https://api.deepseek.com"
@@ -133,7 +135,9 @@ class LlmTestEndpointTest(unittest.TestCase):
llm_test_mock.assert_awaited_once_with(
provider="deepseek",
model="deepseek-chat",
disable_thinking=False,
thinking_level="max",
disable_thinking=None,
reasoning_effort=None,
api_key="sk-test",
base_url="https://api.deepseek.com",
)
@@ -156,7 +160,7 @@ class LlmTestEndpointTest(unittest.TestCase):
enabled=True,
provider="openai",
model="gpt-4.1-mini",
disable_thinking=True,
thinking_level="high",
api_key="sk-live",
base_url="https://example.com/v1",
)
@@ -178,7 +182,9 @@ class LlmTestEndpointTest(unittest.TestCase):
llm_test_mock.assert_awaited_once_with(
provider="openai",
model="gpt-4.1-mini",
disable_thinking=True,
thinking_level="high",
disable_thinking=None,
reasoning_effort=None,
api_key="sk-live",
base_url="https://example.com/v1",
)
@@ -186,6 +192,44 @@ class LlmTestEndpointTest(unittest.TestCase):
self.assertEqual(resp.data["provider"], "openai")
self.assertEqual(resp.data["model"], "gpt-4.1-mini")
def test_llm_test_supports_legacy_thinking_payload(self):
llm_test_mock = AsyncMock(
return_value={
"provider": "deepseek",
"model": "deepseek-v4-pro",
"duration_ms": 123,
"reply_preview": "OK",
}
)
payload = system_endpoint.LlmTestRequest(
enabled=True,
provider="deepseek",
model="deepseek-v4-pro",
disable_thinking=False,
reasoning_effort="xhigh",
api_key="sk-live",
base_url="https://api.deepseek.com",
)
with patch.object(system_endpoint.settings, "AI_AGENT_ENABLE", False), patch.object(
system_endpoint.LLMHelper,
"test_current_settings",
llm_test_mock,
create=True,
):
resp = asyncio.run(system_endpoint.llm_test(payload=payload, _="token"))
llm_test_mock.assert_awaited_once_with(
provider="deepseek",
model="deepseek-v4-pro",
thinking_level=None,
disable_thinking=False,
reasoning_effort="xhigh",
api_key="sk-live",
base_url="https://api.deepseek.com",
)
self.assertTrue(resp.success)
def test_llm_test_rejects_empty_reply(self):
with patch.object(system_endpoint.settings, "AI_AGENT_ENABLE", True), patch.object(
system_endpoint.settings, "LLM_PROVIDER", "deepseek"

View File

@@ -1,2 +1,2 @@
APP_VERSION = 'v2.10.4'
FRONTEND_VERSION = 'v2.10.4'
APP_VERSION = 'v2.10.5'
FRONTEND_VERSION = 'v2.10.5'