移除工具调用前的流重置，保留模型思考文本可见

更新 base.py
Refine agent prompts for concise professional replies
2026-05-09 21:22:40 +08:00 · 2026-04-25 23:12:34 +08:00 · 2026-04-25 22:16:15 +08:00 · 2026-04-25 22:04:35 +08:00 · 2026-04-25 21:51:47 +08:00 · 2026-04-25 12:55:33 +08:00
15 changed files with 800 additions and 292 deletions
--- a/app/agent/init.py
+++ b/app/agent/init.py
@@ -73,7 +73,7 @@ class _ThinkTagStripper:
                        on_output(self.buffer[:start_idx])
                        emitted = True
                    self.in_think_tag = True
-                    self.buffer = self.buffer[start_idx + 7:]
+                    self.buffer = self.buffer[start_idx + 7 :]
                else:
                    # 检查是否以 <think> 的不完整前缀结尾
                    partial_match = False
@@ -93,7 +93,7 @@ class _ThinkTagStripper:
                end_idx = self.buffer.find("</think>")
                if end_idx != -1:
                    self.in_think_tag = False
-                    self.buffer = self.buffer[end_idx + 8:]
+                    self.buffer = self.buffer[end_idx + 8 :]
                else:
                    # 检查是否以 </think> 的不完整前缀结尾
                    partial_match = False
@@ -371,10 +371,6 @@ class MoviePilotAgent:
        :param on_token: 收到有效 token 时的回调
        """
        stripper = _ThinkTagStripper()
-        # 非VERBOSE模式下，跟踪当前langgraph_step以检测中间步骤的模型输出
-        # 当模型在工具调用之前输出的"计划/思考"文本，会在检测到tool_call时被清除
-        current_model_step = -1
-        has_emitted_in_step = False

        async for chunk in agent.astream(
            messages,
@@ -388,25 +384,13 @@ class MoviePilotAgent:
                if not token or not hasattr(token, "tool_call_chunks"):
                    continue

-                # 获取当前步骤信息
-                step = metadata.get("langgraph_step", -1) if metadata else -1
-
                if token.tool_call_chunks:
-                    # 检测到工具调用token：说明当前步骤是中间步骤
-                    # 非VERBOSE模式下，清除该步骤之前输出的"计划/思考"文本
-                    if not settings.AI_AGENT_VERBOSE and has_emitted_in_step:
-                        self.stream_handler.reset()
-                        stripper.reset()
-                        has_emitted_in_step = False
+                    # 清除 stripper 内部缓冲中可能残留的 <think> 标签中间状态
+                    stripper.reset()
                    continue

                # 以下处理纯文本token（tool_call_chunks为空）

-                # 检测步骤变化，重置步骤内emit跟踪
-                if step != current_model_step:
-                    current_model_step = step
-                    has_emitted_in_step = False
-
                # 跳过模型思考/推理内容（如 DeepSeek R1 的 reasoning_content）
                additional = getattr(token, "additional_kwargs", None)
                if additional and additional.get("reasoning_content"):
@@ -416,8 +400,7 @@ class MoviePilotAgent:
                    # content 可能是字符串或内容块列表，过滤掉思考类型的块
                    content = self._extract_text_content(token.content)
                    if content:
-                        if stripper.process(content, on_token):
-                            has_emitted_in_step = True
+                        stripper.process(content, on_token)

        stripper.flush(on_token)

@@ -457,7 +440,10 @@ class MoviePilotAgent:
                    agent=agent,
                    messages={"messages": messages},
                    config=agent_config,
-                    on_token=lambda token: (self.stream_handler.emit(token), self._emit_output(token)),
+                    on_token=lambda token: (
+                        self.stream_handler.emit(token),
+                        self._emit_output(token),
+                    ),
                )

                # 停止流式输出，返回是否已通过流式编辑发送了所有内容及最终文本
@@ -1004,7 +990,6 @@ class AgentManager:
            )

        try:
-
            await self.process_message(
                session_id=session_id,
                user_id=user_id,
--- a/app/agent/middleware/memory.py
+++ b/app/agent/middleware/memory.py
@@ -124,34 +124,29 @@ Default memory file: {memory_file}
 </agent_memory>

 <memory_onboarding>
-    **IMPORTANT — First-time user detected!**
+    First-time user detected.

-    The memory directory is currently empty. This means this is likely the user's first interaction, or their preferences have been reset.
+    The memory directory is currently empty. This likely means the user has no saved long-term preferences yet.

-    **Your MANDATORY first action in this conversation:**
-    Before doing ANYTHING else (before answering questions, before calling tools, before performing any task), you MUST proactively greet the user warmly and ask them about their preferences so you can provide personalized service going forward. Specifically, ask about:
+    **Behavior requirements:**
+    - Do NOT interrupt the current task just to collect preferences.
+    - Do NOT proactively greet warmly, build rapport, or ask a long onboarding questionnaire.
+    - Default to a concise, professional style until the user states a preference.
+    - Only ask for preferences when they are directly useful for the current task, or when a short follow-up question at the end would clearly help future interactions.

-    1. **How to address the user** — Ask what name or nickname they'd like you to call them (e.g., a real name, a nickname, or a fun title). This is the top priority for building a personal connection.
-    2. **Communication style preference** — Do they prefer a cute/playful tone (with emojis), a formal/professional tone, a concise/minimalist style, or something else?
-    3. **Media preferences** — What types of media do they primarily care about? (e.g., movies, TV shows, anime, documentaries, etc.)
-    4. **Quality preferences** — Do they have preferred video quality (4K, 1080p), codecs (H.265, H.264), or subtitle language preferences?
-    5. **Any other special requests** — Anything else they'd like you to always keep in mind?
+    **What to collect when useful:**
+    - Preferred communication style
+    - Media interests
+    - Quality / codec / subtitle preferences
+    - Any standing rules the user wants you to follow

-    **After the user replies**, you MUST immediately:
-    1. Use the `write_file` tool to save ALL their preferences to the memory file at: `{memory_file}`
-    2. Format the memory file in clean Markdown with clear sections (e.g., `## User Profile`, `## Communication Style`, `## Media Preferences`, etc.)
-    3. The `## User Profile` section MUST include the user's preferred name/nickname at the top
-    4. Only AFTER saving the preferences, proceed to help with whatever the user originally asked about (if anything)
-    5. From this point on, always address the user by their preferred name/nickname in conversations
-    6. You may also create additional `.md` files in the memory directory (`{memory_dir}`) for different topics as needed.
+    **When the user provides lasting preferences**, you MUST promptly save them to `{memory_file}` using `write_file` or `edit_file`.

-    **If the user skips the preference questions** and directly asks you to do something:
-    - Go ahead and help them with their request first
-    - But still ask about their preferences naturally at the end of the interaction
-    - Save whatever you learn about them (implicit or explicit) to the memory file
-
-    **Example onboarding flow:**
-    The greeting should introduce yourself, explain this is the first meeting, and ask the above questions in a numbered list. Adapt the tone to your persona defined in the base system prompt.
+    **Memory format requirements:**
+    - Use clean Markdown with short sections.
+    - Record only durable preferences and working rules.
+    - Do NOT invent personal details or preferred names.
+    - Do NOT force use of a nickname or personalized greeting.
 </memory_onboarding>

 <memory_guidelines>
--- a/app/agent/prompt/Agent
+++ b/app/agent/prompt/Agent
@@ -15,9 +15,12 @@ Core Capabilities:
 <communication>
 {verbose_spec}

- Tone: friendly, concise. Like a knowledgeable friend, not a corporate bot.
- Use emojis sparingly (1-3 per response): greetings, completions, errors.
+- Tone: professional, concise, restrained.
 - Be direct. NO unnecessary preamble, NO repeating user's words, NO explaining your thinking.
+- Prioritize task progress over conversation. Answer only what is necessary to move the task forward.
+- Do NOT flatter the user, praise the question, or use overly eager/service-oriented phrases.
+- Do NOT use emojis, exclamation marks, cute language, or excessive apology.
+- Prefer short declarative sentences. Default to one or two short paragraphs; use lists only when they improve scanability.
 - Use Markdown for structured data. Use `inline code` for media titles/paths.
 - Include key details (year, rating, resolution) but do NOT over-explain.
 - Do not stop for approval on read-only operations. Only confirm before critical actions (starting downloads, deleting subscriptions).
@@ -34,6 +37,7 @@ Core Capabilities:
 - NO filler phrases like "Let me help you", "Here are the results", "I found..." — skip all unnecessary preamble.
 - NO repeating what user said.
 - NO narrating your internal reasoning.
+- NO praise, emotional cushioning, or unnecessary politeness padding.
 - After task completion: one line summary only.
 - When error occurs: brief acknowledgment + suggestion, then move on.
 </response_format>
--- a/app/agent/tools/base.py
+++ b/app/agent/tools/base.py
@@ -81,9 +81,7 @@ class MoviePilotTool(BaseTool, metaclass=ABCMeta):
                    if messages:
                        merged_message = "\n\n".join(messages)
                        await self.send_tool_message(merged_message)
-            else:
-                # 非VERBOSE，重置缓冲区从头更新，保持消息编辑能力
-                self._stream_handler.reset()
+            # 非VERBOSE：不重置流，保留已输出的模型思考文本
        else:
            # 未启用流式传输，不发送任何工具消息内容
            pass
--- a/app/agent/tools/impl/ask_user_choice.py
+++ b/app/agent/tools/impl/ask_user_choice.py
@@ -106,7 +106,7 @@ class AskUserChoiceTool(MoviePilotTool):
        ):
            return f"当前渠道 {channel.value} 不支持按钮选择"

-        max_per_row = ChannelCapabilityManager.get_max_buttons_per_row(channel)
+        max_per_row = 1
        max_rows = ChannelCapabilityManager.get_max_button_rows(channel)
        max_text_length = ChannelCapabilityManager.get_max_button_text_length(channel)
        max_options = max_per_row * max_rows
--- a/app/api/endpoints/system.py
+++ b/app/api/endpoints/system.py
@@ -12,6 +12,7 @@ from anyio import Path as AsyncPath
 from app.helper.sites import SitesHelper  # noqa  # noqa
 from fastapi import APIRouter, Body, Depends, HTTPException, Header, Request, Response
 from fastapi.responses import StreamingResponse
+from pydantic import BaseModel

 from app import schemas
 from app.chain.mediaserver import MediaServerChain
@@ -29,14 +30,14 @@ from app.db.user_oper import (
    get_current_active_superuser_async,
    get_current_active_user_async,
 )
-from app.helper.llm import LLMHelper, LLMTestError, LLMTestTimeout
+from app.helper.image import ImageHelper
+from app.helper.llm import LLMHelper, LLMTestTimeout
 from app.helper.mediaserver import MediaServerHelper
 from app.helper.message import MessageHelper
 from app.helper.progress import ProgressHelper
 from app.helper.rule import RuleHelper
 from app.helper.subscribe import SubscribeHelper
 from app.helper.system import SystemHelper
-from app.helper.image import ImageHelper
 from app.log import logger
 from app.scheduler import Scheduler
 from app.schemas import ConfigChangeEventData
@@ -45,7 +46,6 @@ from app.utils.crypto import HashUtils
 from app.utils.http import RequestUtils, AsyncRequestUtils
 from app.utils.security import SecurityUtils
 from app.utils.url import UrlUtils
-from pydantic import BaseModel
 from version import APP_VERSION

 router = APIRouter()
@@ -57,7 +57,7 @@ class LlmTestRequest(BaseModel):
    enabled: Optional[bool] = None
    provider: Optional[str] = None
    model: Optional[str] = None
-    disable_thinking: Optional[bool] = None
+    thinking_level: Optional[str] = None
    api_key: Optional[str] = None
    base_url: Optional[str] = None

@@ -269,74 +269,6 @@ def _build_nettest_rules() -> list[dict[str, Any]]:
    return rules


-def _build_llm_test_data(
-    duration_ms: Optional[int] = None,
-    provider: Optional[str] = None,
-    model: Optional[str] = None,
-) -> dict[str, Any]:
-    """
-    构造 LLM 测试接口的基础返回数据。
-    """
-    data = {
-        "provider": provider if provider is not None else settings.LLM_PROVIDER,
-        "model": model if model is not None else settings.LLM_MODEL,
-    }
-    if duration_ms is not None:
-        data["duration_ms"] = duration_ms
-    return data
-
-
-def _normalize_llm_test_value(
-    value: Optional[str], *, empty_as_none: bool = False
-) -> Optional[str]:
-    """
-    清理来自前端的 LLM 测试字段。
-    """
-    if value is None:
-        return None
-    stripped = value.strip()
-    if empty_as_none and not stripped:
-        return None
-    return stripped
-
-
-def _build_llm_test_snapshot(payload: Optional[LlmTestRequest] = None) -> dict[str, Any]:
-    """
-    冻结当前 LLM 测试所需配置。
-
-    优先使用前端传入的临时参数；未传入时回退到已保存配置，兼容旧调用。
-    """
-    provider = settings.LLM_PROVIDER
-    model = settings.LLM_MODEL
-    disable_thinking = bool(getattr(settings, "LLM_DISABLE_THINKING", False))
-    api_key = settings.LLM_API_KEY
-    base_url = settings.LLM_BASE_URL
-    enabled = bool(settings.AI_AGENT_ENABLE)
-
-    if payload:
-        if payload.enabled is not None:
-            enabled = bool(payload.enabled)
-        if payload.provider is not None:
-            provider = _normalize_llm_test_value(payload.provider) or ""
-        if payload.model is not None:
-            model = _normalize_llm_test_value(payload.model) or ""
-        if payload.disable_thinking is not None:
-            disable_thinking = bool(payload.disable_thinking)
-        if payload.api_key is not None:
-            api_key = _normalize_llm_test_value(payload.api_key, empty_as_none=True)
-        if payload.base_url is not None:
-            base_url = _normalize_llm_test_value(payload.base_url, empty_as_none=True)
-
-    return {
-        "enabled": enabled,
-        "provider": provider,
-        "model": model,
-        "disable_thinking": disable_thinking,
-        "api_key": api_key,
-        "base_url": base_url,
-    }
-
-
 def _sanitize_llm_test_error(message: str, api_key: Optional[str] = None) -> str:
    """
    清理错误信息中的敏感字段，避免回显密钥。
@@ -428,12 +360,12 @@ async def _close_nettest_response(response: Any) -> None:


 async def fetch_image(
-    url: str,
-    proxy: Optional[bool] = None,
-    use_cache: bool = False,
-    if_none_match: Optional[str] = None,
-    cookies: Optional[str | dict] = None,
-    allowed_domains: Optional[set[str]] = None,
+        url: str,
+        proxy: Optional[bool] = None,
+        use_cache: bool = False,
+        if_none_match: Optional[str] = None,
+        cookies: Optional[str | dict] = None,
+        allowed_domains: Optional[set[str]] = None,
 ) -> Optional[Response]:
    """
    处理图片缓存逻辑，支持HTTP缓存和磁盘缓存
@@ -455,6 +387,7 @@ async def fetch_image(
        use_cache=use_cache,
        cookies=cookies,
    )
+
    if content:
        # 检查 If-None-Match
        etag = HashUtils.md5(content)
@@ -467,16 +400,17 @@ async def fetch_image(
            media_type=UrlUtils.get_mime_type(url, "image/jpeg"),
            headers=headers,
        )
+    return None


@router.get("/img/{proxy}", summary="图片代理")
 async def proxy_img(
-    imgurl: str,
-    proxy: bool = False,
-    cache: bool = False,
-    use_cookies: bool = False,
-    if_none_match: Annotated[str | None, Header()] = None,
-    _: schemas.TokenPayload = Depends(verify_resource_token),
+        imgurl: str,
+        proxy: bool = False,
+        cache: bool = False,
+        use_cookies: bool = False,
+        if_none_match: Annotated[str | None, Header()] = None,
+        _: schemas.TokenPayload = Depends(verify_resource_token),
 ) -> Response:
    """
    图片代理，可选是否使用代理服务器，支持 HTTP 缓存
@@ -505,9 +439,9 @@ async def proxy_img(

@router.get("/cache/image", summary="图片缓存")
 async def cache_img(
-    url: str,
-    if_none_match: Annotated[str | None, Header()] = None,
-    _: schemas.TokenPayload = Depends(verify_resource_token),
+        url: str,
+        if_none_match: Annotated[str | None, Header()] = None,
+        _: schemas.TokenPayload = Depends(verify_resource_token),
 ) -> Response:
    """
    本地缓存图片文件，支持 HTTP 缓存，如果启用全局图片缓存，则使用磁盘缓存
@@ -601,7 +535,7 @@ async def get_env_setting(_: User = Depends(get_current_active_user_async)):

@router.post("/env", summary="更新系统配置", response_model=schemas.Response)
 async def set_env_setting(
-    env: dict, _: User = Depends(get_current_active_superuser_async)
+        env: dict, _: User = Depends(get_current_active_superuser_async)
 ):
    """
    更新系统环境变量（仅管理员）
@@ -636,9 +570,9 @@ async def set_env_setting(

@router.get("/progress/{process_type}", summary="实时进度")
 async def get_progress(
-    request: Request,
-    process_type: str,
-    _: schemas.TokenPayload = Depends(verify_resource_token),
+        request: Request,
+        process_type: str,
+        _: schemas.TokenPayload = Depends(verify_resource_token),
 ):
    """
    实时获取处理进度，返回格式为SSE
@@ -673,9 +607,9 @@ async def get_setting(key: str, _: User = Depends(get_current_active_user_async)

@router.post("/setting/{key}", summary="更新系统设置", response_model=schemas.Response)
 async def set_setting(
-    key: str,
-    value: Annotated[Union[list, dict, bool, int, str] | None, Body()] = None,
-    _: User = Depends(get_current_active_superuser_async),
+        key: str,
+        value: Annotated[Union[list, dict, bool, int, str] | None, Body()] = None,
+        _: User = Depends(get_current_active_superuser_async),
 ):
    """
    更新系统设置（仅管理员）
@@ -709,10 +643,10 @@ async def set_setting(

@router.get("/llm-models", summary="获取LLM模型列表", response_model=schemas.Response)
 async def get_llm_models(
-    provider: str,
-    api_key: str,
-    base_url: Optional[str] = None,
-    _: User = Depends(get_current_active_user_async),
+        provider: str,
+        api_key: str,
+        base_url: Optional[str] = None,
+        _: User = Depends(get_current_active_user_async),
 ):
    """
    获取LLM模型列表
@@ -728,28 +662,33 @@ async def get_llm_models(

@router.post("/llm-test", summary="测试LLM调用", response_model=schemas.Response)
 async def llm_test(
-    payload: Annotated[Optional[LlmTestRequest], Body()] = None,
-    _: User = Depends(get_current_active_superuser_async),
+        payload: Annotated[Optional[LlmTestRequest], Body()] = None,
+        _: User = Depends(get_current_active_superuser_async),
 ):
    """
    使用传入配置或当前已保存配置执行一次最小 LLM 调用。
    """
-    snapshot = _build_llm_test_snapshot(payload)
-    data = _build_llm_test_data(
-        provider=snapshot["provider"],
-        model=snapshot["model"],
-    )
-    if not snapshot["enabled"]:
+    if not payload:
+        return schemas.Response(success=False, message="请配置智能助手LLM相关参数后再进行测试")
+
+    if not payload.provider or not payload.model:
+        return schemas.Response(success=False, message="请配置LLM提供商和模型")
+
+    data = {
+        "provider": payload.provider,
+        "model": payload.model,
+    }
+    if not payload.enabled:
        return schemas.Response(success=False, message="请先启用智能助手", data=data)

-    if not snapshot["api_key"]:
+    if not payload.api_key or not payload.api_key.strip():
        return schemas.Response(
            success=False,
            message="请先配置 LLM API Key",
            data=data,
        )

-    if not (snapshot["model"] or "").strip():
+    if not payload.model or not payload.model.strip():
        return schemas.Response(
            success=False,
            message="请先配置 LLM 模型",
@@ -758,50 +697,36 @@ async def llm_test(

    try:
        result = await LLMHelper.test_current_settings(
-            provider=snapshot["provider"],
-            model=snapshot["model"],
-            disable_thinking=snapshot["disable_thinking"],
-            api_key=snapshot["api_key"],
-            base_url=snapshot["base_url"],
+            provider=payload.provider,
+            model=payload.model,
+            thinking_level=payload.thinking_level,
+            api_key=payload.api_key,
+            base_url=payload.base_url,
        )
        if not result.get("reply_preview"):
            return schemas.Response(
                success=False,
-                message="模型响应为空",
-                data=_build_llm_test_data(
-                    result.get("duration_ms"),
-                    provider=snapshot["provider"],
-                    model=snapshot["model"],
-                ),
+                message="模型响应为空"
            )
        return schemas.Response(success=True, data=result)
    except (LLMTestTimeout, TimeoutError) as err:
+        logger.warning(err)
        return schemas.Response(
            success=False,
-            message="LLM 调用超时",
-            data=_build_llm_test_data(
-                getattr(err, "duration_ms", None),
-                provider=snapshot["provider"],
-                model=snapshot["model"],
-            ),
+            message="LLM 调用超时"
        )
    except Exception as err:
        return schemas.Response(
            success=False,
-            message=_sanitize_llm_test_error(str(err), snapshot["api_key"]),
-            data=_build_llm_test_data(
-                getattr(err, "duration_ms", None),
-                provider=snapshot["provider"],
-                model=snapshot["model"],
-            ),
+            message=_sanitize_llm_test_error(str(err), payload.api_key)
        )


@router.get("/message", summary="实时消息")
 async def get_message(
-    request: Request,
-    role: Optional[str] = "system",
-    _: schemas.TokenPayload = Depends(verify_resource_token),
+        request: Request,
+        role: Optional[str] = "system",
+        _: schemas.TokenPayload = Depends(verify_resource_token),
 ):
    """
    实时获取系统消息，返回格式为SSE
@@ -824,10 +749,10 @@ async def get_message(

@router.get("/logging", summary="实时日志")
 async def get_logging(
-    request: Request,
-    length: Optional[int] = 50,
-    logfile: Optional[str] = "moviepilot.log",
-    _: schemas.TokenPayload = Depends(verify_resource_token),
+        request: Request,
+        length: Optional[int] = 50,
+        logfile: Optional[str] = "moviepilot.log",
+        _: schemas.TokenPayload = Depends(verify_resource_token),
 ):
    """
    实时获取系统日志
@@ -838,7 +763,7 @@ async def get_logging(
    log_path = base_path / logfile

    if not await SecurityUtils.async_is_safe_path(
-        base_path=base_path, user_path=log_path, allowed_suffixes={".log"}
+            base_path=base_path, user_path=log_path, allowed_suffixes={".log"}
    ):
        raise HTTPException(status_code=404, detail="Not Found")

@@ -855,7 +780,7 @@ async def get_logging(

            # 读取历史日志
            async with aiofiles.open(
-                log_path, mode="r", encoding="utf-8", errors="ignore"
+                    log_path, mode="r", encoding="utf-8", errors="ignore"
            ) as f:
                # 优化大文件读取策略
                if file_size > 100 * 1024:
@@ -867,7 +792,7 @@ async def get_logging(
                    # 找到第一个完整的行
                    first_newline = content.find("\n")
                    if first_newline != -1:
-                        content = content[first_newline + 1 :]
+                        content = content[first_newline + 1:]
                else:
                    # 小文件直接读取全部内容
                    content = await f.read()
@@ -875,7 +800,7 @@ async def get_logging(
                # 按行分割并添加到队列，只保留非空行
                lines = [line.strip() for line in content.splitlines() if line.strip()]
                # 只取最后N行
-                for line in lines[-max(length, 50) :]:
+                for line in lines[-max(length, 50):]:
                    lines_queue.append(line)

            # 输出历史日志
@@ -884,7 +809,7 @@ async def get_logging(

            # 实时监听新日志
            async with aiofiles.open(
-                log_path, mode="r", encoding="utf-8", errors="ignore"
+                    log_path, mode="r", encoding="utf-8", errors="ignore"
            ) as f:
                # 移动文件指针到文件末尾，继续监听新增内容
                await f.seek(0, 2)
@@ -923,7 +848,7 @@ async def get_logging(
        try:
            # 使用 aiofiles 异步读取文件
            async with aiofiles.open(
-                log_path, mode="r", encoding="utf-8", errors="ignore"
+                    log_path, mode="r", encoding="utf-8", errors="ignore"
            ) as file:
                text = await file.read()
            # 倒序输出
@@ -955,10 +880,10 @@ async def latest_version(_: schemas.TokenPayload = Depends(verify_token)):

@router.get("/ruletest", summary="过滤规则测试", response_model=schemas.Response)
 def ruletest(
-    title: str,
-    rulegroup_name: str,
-    subtitle: Optional[str] = None,
-    _: schemas.TokenPayload = Depends(verify_token),
+        title: str,
+        rulegroup_name: str,
+        subtitle: Optional[str] = None,
+        _: schemas.TokenPayload = Depends(verify_token),
 ):
    """
    过滤规则测试，规则类型 1-订阅，2-洗版，3-搜索
@@ -1013,11 +938,10 @@ async def nettest_targets(_: schemas.TokenPayload = Depends(verify_token)):

@router.get("/nettest", summary="测试网络连通性")
 async def nettest(
-    target_id: Optional[str] = None,
-    url: Optional[str] = None,
-    proxy: Optional[bool] = None,
-    include: Optional[str] = None,
-    _: schemas.TokenPayload = Depends(verify_token),
+        target_id: Optional[str] = None,
+        url: Optional[str] = None,
+        include: Optional[str] = None,
+        _: schemas.TokenPayload = Depends(verify_token),
 ):
    """
    测试内置目标的网络连通性。
--- a/app/core/config.py
+++ b/app/core/config.py
@@ -505,8 +505,8 @@ class ConfigModel(BaseModel):
    LLM_PROVIDER: str = "deepseek"
    # LLM模型名称
    LLM_MODEL: str = "deepseek-chat"
-    # 是否尽量关闭模型的思考/推理能力（按各 provider/model 支持情况自动适配）
-    LLM_DISABLE_THINKING: bool = True
+    # 思考模式/深度配置：off/auto/minimal/low/medium/high/max/xhigh
+    LLM_THINKING_LEVEL: Optional[str] = 'off'
    # LLM是否支持图片输入，开启后消息图片会按多模态输入发送给模型
    LLM_SUPPORT_IMAGE_INPUT: bool = True
    # LLM API密钥
--- a/app/helper/llm.py
+++ b/app/helper/llm.py
@@ -2,9 +2,13 @@

 import asyncio
 import inspect
+import json
 import time
+from functools import wraps
 from typing import Any, List

+from langchain_core.messages import AIMessage
+
 from app.core.config import settings
 from app.log import logger

@@ -70,21 +74,120 @@ def _get_httpx_proxy_key() -> str:
        if "proxy" in params:
            return "proxy"
        return "proxies"
-    except Exception:
+    except Exception as e:
+        logger.warning(f"检测 httpx 代理参数失败，默认使用 'proxies'：{e}")
        return "proxies"


+def _deepseek_thinking_toggle(extra_body: Any) -> bool | None:
+    """
+    解析 DeepSeek extra_body 中显式传入的 thinking 开关。
+    """
+    if not isinstance(extra_body, dict):
+        return None
+
+    thinking = extra_body.get("thinking")
+    if not isinstance(thinking, dict):
+        return None
+
+    thinking_type = str(thinking.get("type") or "").strip().lower()
+    if thinking_type == "enabled":
+        return True
+    if thinking_type == "disabled":
+        return False
+    return None
+
+
+def _is_deepseek_thinking_enabled(model_name: str | None, extra_body: Any) -> bool:
+    """
+    判断本次 DeepSeek 调用是否处于 thinking mode。
+    """
+    explicit_toggle = _deepseek_thinking_toggle(extra_body)
+    if explicit_toggle is not None:
+        return explicit_toggle
+
+    normalized_model_name = str(model_name or "").strip().lower()
+    if normalized_model_name == "deepseek-reasoner":
+        return True
+    if normalized_model_name.startswith("deepseek-v4-"):
+        # DeepSeek V4 默认启用 thinking mode，除非显式关闭。
+        return True
+    return False
+
+
+def _patch_deepseek_reasoning_content_support():
+    """
+    修补 langchain-deepseek 在 tool-call 场景下遗漏 reasoning_content 回传的问题。
+
+    DeepSeek thinking mode 要求：若 assistant 历史消息包含 tool_calls，
+    后续请求中必须带回该条消息的顶层 reasoning_content。
+    某些 langchain-deepseek 版本虽然能从响应中拿到 reasoning_content，
+    但不会在重放消息历史时写回请求载荷，导致 400。
+    """
+    try:
+        from langchain_deepseek import ChatDeepSeek
+    except Exception as err:
+        logger.debug(f"跳过 langchain-deepseek reasoning_content 修补：{err}")
+        return
+
+    if getattr(ChatDeepSeek, "_moviepilot_reasoning_content_patched", False):
+        return
+
+    original_get_request_payload = getattr(ChatDeepSeek, "_get_request_payload", None)
+    if not callable(original_get_request_payload):
+        logger.warning("langchain-deepseek 缺少 _get_request_payload，无法修补 reasoning_content")
+        return
+
+    @wraps(original_get_request_payload)
+    def _patched_get_request_payload(self, input_, *, stop=None, **kwargs):
+        payload = original_get_request_payload(self, input_, stop=stop, **kwargs)
+
+        # Resolve original messages so we can extract reasoning_content from
+        # additional_kwargs.  The parent's payload builder does not propagate
+        # this DeepSeek-specific field.
+        messages = self._convert_input(input_).to_messages()
+
+        for i, message in enumerate(payload["messages"]):
+            if message["role"] == "tool" and isinstance(message["content"], list):
+                message["content"] = json.dumps(message["content"])
+            elif message["role"] == "assistant":
+                if isinstance(message["content"], list):
+                    # DeepSeek API expects assistant content to be a string,
+                    # not a list. Extract text blocks and join them, or use
+                    # empty string if none exist.
+                    text_parts = [
+                        block.get("text", "")
+                        for block in message["content"]
+                        if isinstance(block, dict) and block.get("type") == "text"
+                    ]
+                    message["content"] = "".join(text_parts) if text_parts else ""
+
+                # DeepSeek reasoning models require every assistant message to
+                # carry a reasoning_content field (even when empty).  The value
+                # is stored in AIMessage.additional_kwargs by
+                # _create_chat_result(); re-inject it into the API payload.
+                if (
+                        "reasoning_content" not in message
+                        and i < len(messages)
+                        and isinstance(messages[i], AIMessage)
+                ):
+                    message["reasoning_content"] = messages[i].additional_kwargs.get(
+                        "reasoning_content", ""
+                    )
+
+        return payload
+
+    ChatDeepSeek._get_request_payload = _patched_get_request_payload
+    ChatDeepSeek._moviepilot_reasoning_content_patched = True
+    logger.debug("已修补 langchain-deepseek thinking tool-call 的 reasoning_content 回传兼容性")
+
+
 class LLMHelper:
    """LLM模型相关辅助功能"""

-    @staticmethod
-    def _should_disable_thinking(disable_thinking: bool | None = None) -> bool:
-        """
-        判断本次调用是否应尝试关闭模型思考能力。
-        """
-        if disable_thinking is not None:
-            return bool(disable_thinking)
-        return bool(getattr(settings, "LLM_DISABLE_THINKING", False))
+    _SUPPORTED_THINKING_LEVELS = frozenset(
+        {"off", "auto", "minimal", "low", "medium", "high", "max", "xhigh"}
+    )

    @staticmethod
    def _normalize_model_name(model_name: str | None) -> str:
@@ -94,48 +197,164 @@ class LLMHelper:
        return (model_name or "").strip().lower()

    @classmethod
-    def _build_disabled_thinking_kwargs(
-        cls,
-        provider: str,
-        model: str | None,
-        disable_thinking: bool | None = None,
+    def _normalize_deepseek_reasoning_effort(
+            cls, thinking_level: str | None = None
+    ) -> str | None:
+        """
+        DeepSeek 文档当前建议使用 high/max；兼容常见 effort 别名。
+        """
+        if not thinking_level or thinking_level in {"off", "auto"}:
+            return None
+
+        if thinking_level in {"minimal", "low", "medium", "high"}:
+            return "high"
+        if thinking_level in {"max", "xhigh"}:
+            return "max"
+
+        logger.warning(f"忽略不支持的 DeepSeek reasoning_effort 配置: {thinking_level}")
+        return None
+
+    @classmethod
+    def _normalize_openai_reasoning_effort(
+            cls, thinking_level: str | None = None
+    ) -> str | None:
+        """
+        OpenAI reasoning_effort 支持更细粒度的 effort，统一做最近似映射。
+        """
+        if not thinking_level or thinking_level == "auto":
+            return None
+        if thinking_level == "off":
+            return "none"
+        if thinking_level == "max":
+            return "xhigh"
+        return thinking_level
+
+    @classmethod
+    def _build_google_thinking_kwargs(
+            cls, model_name: str, thinking_level: str
    ) -> dict[str, Any]:
        """
-        按 provider/model 生成“禁用思考”相关参数。
-
-        优先使用 LangChain/OpenAI SDK 已支持的原生字段；仅在 provider
-        明确要求自定义请求体时，才回退到 extra_body。
+        Gemini 3 使用 thinking_level；Gemini 2.5 使用 thinking_budget。
        """
-        if not cls._should_disable_thinking(disable_thinking):
+        if not model_name or thinking_level == "auto":
            return {}

-        provider_name = (provider or "").strip().lower()
-        model_name = cls._normalize_model_name(model)
-        if not model_name:
-            return {}
-
-        # Moonshot Kimi K2.5/K2.6 需要在请求体显式声明 thinking.disabled。
-        if model_name.startswith(("kimi-k2.5", "kimi-k2.6")):
-            return {"extra_body": {"thinking": {"type": "disabled"}}}
-
-        # OpenAI 原生推理模型优先走 LangChain 内置 reasoning_effort。
-        if provider_name == "openai" and model_name.startswith(
-            ("gpt-5", "o1", "o3", "o4")
-        ):
-            return {"reasoning_effort": "none"}
-
-        # Gemini 使用 google-genai / langchain-google-genai 内置思考控制参数。
-        if provider_name == "google":
-            if "gemini-2.5" in model_name:
+        if "gemini-2.5" in model_name:
+            if thinking_level == "off":
+                if "pro" in model_name:
+                    # Gemini 2.5 Pro 官方不支持完全关闭思考，回退到最小预算。
+                    return {
+                        "thinking_budget": 128,
+                        "include_thoughts": False,
+                    }
                return {
                    "thinking_budget": 0,
                    "include_thoughts": False,
                }
-            if "gemini-3" in model_name:
-                return {
-                    "thinking_level": "minimal",
+
+            budget_map = {
+                "minimal": 512,
+                "low": 1024,
+                "medium": 4096,
+                "high": 8192,
+                "max": 24576,
+                "xhigh": 24576,
+            }
+            budget = budget_map.get(thinking_level)
+            return (
+                {
+                    "thinking_budget": budget,
                    "include_thoughts": False,
                }
+                if budget is not None
+                else {}
+            )
+
+        if "gemini-3" in model_name:
+            level_map = {
+                "off": "minimal",
+                "minimal": "minimal",
+                "low": "low",
+                "medium": "medium",
+                "high": "high",
+                "max": "high",
+                "xhigh": "high",
+            }
+            google_level = level_map.get(thinking_level)
+            return (
+                {
+                    "thinking_level": google_level,
+                    "include_thoughts": False,
+                }
+                if google_level
+                else {}
+            )
+
+        return {}
+
+    @classmethod
+    def _build_kimi_thinking_kwargs(
+            cls, model_name: str, thinking_level: str
+    ) -> dict[str, Any]:
+        """
+        Kimi 当前公开文档仅支持思考开关，不支持显式深度调节。
+        """
+        if model_name.startswith("kimi-k2-thinking"):
+            return {}
+        if thinking_level == "off":
+            return {"extra_body": {"thinking": {"type": "disabled"}}}
+        return {}
+
+    @classmethod
+    def _build_thinking_kwargs(
+            cls,
+            provider: str,
+            model: str | None,
+            thinking_level: str | None = None
+    ) -> dict[str, Any]:
+        """
+        按 provider/model 生成思考模式相关参数。
+
+        优先使用 LangChain/OpenAI SDK 已支持的原生字段；仅在 provider
+        明确要求自定义请求体时，才回退到 extra_body。
+        """
+        provider_name = (provider or "").strip().lower()
+        model_name = cls._normalize_model_name(model)
+
+        if provider_name == "deepseek":
+            if thinking_level == "off":
+                return {"extra_body": {"thinking": {"type": "disabled"}}}
+            if thinking_level == "auto":
+                return {}
+
+            kwargs: dict[str, Any] = {"extra_body": {"thinking": {"type": "enabled"}}}
+            deepseek_effort = cls._normalize_deepseek_reasoning_effort(
+                thinking_level
+            )
+            if deepseek_effort:
+                kwargs["reasoning_effort"] = deepseek_effort
+            return kwargs
+
+        if model_name.startswith(("kimi-k2.5", "kimi-k2.6", "kimi-k2-thinking")):
+            return cls._build_kimi_thinking_kwargs(model_name, thinking_level)
+
+        if not model_name:
+            return {}
+
+        # OpenAI 原生推理模型优先走 LangChain 内置 reasoning_effort。
+        if provider_name == "openai" and model_name.startswith(
+                ("gpt-5", "o1", "o3", "o4")
+        ):
+            openai_effort = cls._normalize_openai_reasoning_effort(
+                thinking_level
+            )
+            return {"reasoning_effort": openai_effort} if openai_effort else {}
+
+        # Gemini 使用 google-genai / langchain-google-genai 内置思考控制参数。
+        if provider_name == "google":
+            return cls._build_google_thinking_kwargs(
+                model_name, thinking_level
+            )

        return {}

@@ -148,16 +367,26 @@ class LLMHelper:

    @staticmethod
    def get_llm(
-        streaming: bool = False,
-        provider: str | None = None,
-        model: str | None = None,
-        disable_thinking: bool | None = None,
-        api_key: str | None = None,
-        base_url: str | None = None,
+            streaming: bool = False,
+            provider: str | None = None,
+            model: str | None = None,
+            thinking_level: str | None = None,
+            api_key: str | None = None,
+            base_url: str | None = None,
    ):
        """
        获取LLM实例
        :param streaming: 是否启用流式输出
+        :param provider: LLM提供商，默认为配置项LLM_PROVIDER
+        :param model: 模型名称，默认为配置项LLM_MODEL
+        :param thinking_level: 思考模式级别，默认为 None（即自动判断
+            是否启用思考模式）。支持的级别包括 "off"（关闭）、"auto"（自动）、"minimal"、"low"、"medium"、"high"、"max"/"xhigh"（最大）。
+            不同模型对思考模式的支持和表现不同，具体映射关系请
+            参考代码实现。对于不支持思考模式的模型，该参数将被忽略。
+        :param api_key: API Key，默认为
+            配置项LLM_API_KEY。对于某些提供商（
+            如 DeepSeek），可能需要同时提供 base_url。
+        :param base_url: API Base URL，默认为配置项LLM_BASE_URL。
        :return: LLM实例
        """
        provider_name = str(
@@ -166,10 +395,10 @@ class LLMHelper:
        model_name = model if model is not None else settings.LLM_MODEL
        api_key_value = api_key if api_key is not None else settings.LLM_API_KEY
        base_url_value = base_url if base_url is not None else settings.LLM_BASE_URL
-        thinking_kwargs = LLMHelper._build_disabled_thinking_kwargs(
+        thinking_kwargs = LLMHelper._build_thinking_kwargs(
            provider=provider_name,
            model=model_name,
-            disable_thinking=disable_thinking,
+            thinking_level=thinking_level
        )

        if not api_key_value:
@@ -201,9 +430,11 @@ class LLMHelper:
        elif provider_name == "deepseek":
            from langchain_deepseek import ChatDeepSeek

+            _patch_deepseek_reasoning_content_support()
            model = ChatDeepSeek(
                model=model_name,
                api_key=api_key_value,
+                api_base=base_url_value,
                max_retries=3,
                temperature=settings.LLM_TEMPERATURE,
                streaming=streaming,
@@ -231,7 +462,7 @@ class LLMHelper:
        else:
            model.profile = {
                "max_input_tokens": settings.LLM_MAX_CONTEXT_TOKENS
-                * 1000,  # 转换为token单位
+                                    * 1000,  # 转换为token单位
            }

        return model
@@ -255,10 +486,10 @@ class LLMHelper:
                if isinstance(block, dict) or hasattr(block, "get"):
                    block_type = block.get("type")
                    if block.get("thought") or block_type in (
-                        "thinking",
-                        "reasoning_content",
-                        "reasoning",
-                        "thought",
+                            "thinking",
+                            "reasoning_content",
+                            "reasoning",
+                            "thought",
                    ):
                        continue
                    if block_type == "text":
@@ -278,13 +509,13 @@ class LLMHelper:

    @staticmethod
    async def test_current_settings(
-        prompt: str = "请只回复 OK",
-        timeout: int = 20,
-        provider: str | None = None,
-        model: str | None = None,
-        disable_thinking: bool | None = None,
-        api_key: str | None = None,
-        base_url: str | None = None,
+            prompt: str = "请只回复 OK",
+            timeout: int = 20,
+            provider: str | None = None,
+            model: str | None = None,
+            thinking_level: str | None = None,
+            api_key: str | None = None,
+            base_url: str | None = None,
    ) -> dict:
        """
        使用当前已保存配置执行一次最小 LLM 调用。
@@ -298,7 +529,7 @@ class LLMHelper:
            streaming=False,
            provider=provider_name,
            model=model_name,
-            disable_thinking=disable_thinking,
+            thinking_level=thinking_level,
            api_key=api_key_value,
            base_url=base_url_value,
        )
@@ -326,7 +557,7 @@ class LLMHelper:
        return data

    def get_models(
-        self, provider: str, api_key: str, base_url: str = None
+            self, provider: str, api_key: str, base_url: str = None
    ) -> List[str]:
        """获取模型列表"""
        logger.info(f"获取 {provider} 模型列表...")
@@ -364,7 +595,7 @@ class LLMHelper:

    @staticmethod
    def _get_openai_compatible_models(
-        provider: str, api_key: str, base_url: str = None
+            provider: str, api_key: str, base_url: str = None
    ) -> List[str]:
        """获取OpenAI兼容模型列表"""
        try:
--- a/requirements.in
+++ b/requirements.in
@@ -76,14 +76,14 @@ pympler~=1.1
 smbprotocol~=1.15.0
 setproctitle~=1.3.6
 httpx[socks]~=0.28.1
-langchain~=1.2.13
-langchain-core~=1.2.20
+langchain~=1.2.15
+langchain-core~=1.3.2
 langchain-community~=0.4.1
-langchain-openai~=1.1.11
-langchain-google-genai~=4.2.1
+langchain-openai~=1.2.1
+langchain-google-genai~=4.2.2
 langchain-deepseek~=1.0.1
-langgraph~=1.1.3
-openai~=2.29.0
-google-genai~=1.68.0
+langgraph~=1.1.9
+openai~=2.32.0
+google-genai~=1.73.1
 ddgs~=9.10.0
 websocket-client~=1.8.0
--- a/scripts/local_setup.py
+++ b/scripts/local_setup.py
@@ -1063,6 +1063,32 @@ def _prompt_choice(label: str, choices: dict[str, str], default: str) -> str:
        print("请输入列表中的可选值。")


+def _env_llm_thinking_level_default() -> str:
+    value = _normalize_choice(_env_default("LLM_THINKING_LEVEL", ""))
+    alias_map = {
+        "none": "off",
+        "disabled": "off",
+        "disable": "off",
+        "enabled": "auto",
+        "enable": "auto",
+        "default": "auto",
+        "dynamic": "auto",
+    }
+    normalized = alias_map.get(value, value)
+    if normalized in {
+        "off",
+        "auto",
+        "minimal",
+        "low",
+        "medium",
+        "high",
+        "max",
+        "xhigh",
+    }:
+        return normalized
+    return "auto"
+
+
 def _prompt_path(label: str, *, default: Path, allow_empty: bool = False) -> str:
    value = _prompt_text(label, default=str(default), allow_empty=allow_empty)
    if not value:
@@ -1476,9 +1502,19 @@ def _collect_agent_config() -> dict[str, Any]:
            current_value=read_env_value("LLM_API_KEY"),
            required=True,
        ),
-        "LLM_DISABLE_THINKING": _prompt_yes_no(
-            "是否尽量关闭模型思考/推理",
-            default=_env_bool("LLM_DISABLE_THINKING", False),
+        "LLM_THINKING_LEVEL": _prompt_choice(
+            "LLM 思考模式/深度",
+            choices={
+                "off": "关闭思考",
+                "auto": "自动",
+                "minimal": "最小",
+                "low": "低",
+                "medium": "中",
+                "high": "高",
+                "max": "极高",
+                "xhigh": "超高",
+            },
+            default=_env_llm_thinking_level_default(),
        ),
        "LLM_SUPPORT_IMAGE_INPUT": _prompt_yes_no(
            "是否启用图片输入支持",
@@ -1506,7 +1542,7 @@ def _load_auth_site_definitions_inner() -> dict[str, Any]:
    if str(ROOT) not in sys.path:
        sys.path.insert(0, str(ROOT))

-    from app.helper.sites import SitesHelper
+    from app.helper.sites import SitesHelper  # noqa

    auth_sites = SitesHelper().get_authsites() or {}
    definitions: dict[str, Any] = {}
@@ -1843,7 +1879,7 @@ def _apply_local_system_config_inner(config_payload: dict[str, Any]) -> None:
    ):
        system_config.set(SystemConfigKey.UserSiteAuthParams, site_auth_item)
        try:
-            from app.helper.sites import SitesHelper
+            from app.helper.sites import SitesHelper  # noqa

            status, msg = SitesHelper().check_user(
                site_auth_item.get("site"), site_auth_item.get("params")
--- a/tests/test_agent_prompt_style.py
+++ b/tests/test_agent_prompt_style.py
@@ -0,0 +1,22 @@
+import unittest
+
+from app.agent.middleware.memory import MEMORY_ONBOARDING_PROMPT
+from app.agent.prompt import prompt_manager
+
+
+class TestAgentPromptStyle(unittest.TestCase):
+    def test_agent_prompt_enforces_concise_professional_style(self):
+        prompt = prompt_manager.get_agent_prompt()
+
+        self.assertIn("professional, concise, restrained", prompt)
+        self.assertIn("Do NOT flatter the user", prompt)
+        self.assertIn("NO praise, emotional cushioning", prompt)
+
+    def test_memory_onboarding_does_not_force_warm_intro(self):
+        self.assertIn("Do NOT interrupt the current task", MEMORY_ONBOARDING_PROMPT)
+        self.assertIn("Do NOT proactively greet warmly", MEMORY_ONBOARDING_PROMPT)
+        self.assertNotIn("greet the user warmly", MEMORY_ONBOARDING_PROMPT)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tests/test_langchain_deepseek_compat.py
+++ b/tests/test_langchain_deepseek_compat.py
@@ -0,0 +1,144 @@
+import importlib.util
+import sys
+import unittest
+from pathlib import Path
+from types import ModuleType
+
+from langchain_core.messages import AIMessage, HumanMessage, ToolMessage
+
+
+def _stub_module(name: str, **attrs):
+    module = sys.modules.get(name)
+    if module is None:
+        module = ModuleType(name)
+        sys.modules[name] = module
+    for key, value in attrs.items():
+        setattr(module, key, value)
+    return module
+
+
+class _DummyLogger:
+    def __getattr__(self, _name):
+        return lambda *args, **kwargs: None
+
+
+def _build_tool_call(name: str = "search", arguments: str = "{}"):
+    return [
+        {
+            "id": "call_1",
+            "type": "tool_call",
+            "name": name,
+            "args": {},
+        }
+    ]
+
+
+class _FakeChatDeepSeek:
+    def __init__(self, model_name: str, model_kwargs: dict | None = None):
+        self.model_name = model_name
+        self.model_kwargs = model_kwargs or {}
+
+    def _get_request_payload(self, input_, *, stop=None, **kwargs):
+        messages = []
+        for message in input_:
+            payload_message = {
+                "role": message.type,
+                "content": message.content,
+            }
+            if message.type == "human":
+                payload_message["role"] = "user"
+            elif message.type == "ai":
+                payload_message["role"] = "assistant"
+                tool_calls = getattr(message, "tool_calls", None)
+                if tool_calls:
+                    payload_message["tool_calls"] = tool_calls
+            elif message.type == "tool":
+                payload_message["role"] = "tool"
+                payload_message["tool_call_id"] = message.tool_call_id
+            messages.append(payload_message)
+        return {"messages": messages}
+
+
+_ORIGINAL_GET_REQUEST_PAYLOAD = _FakeChatDeepSeek._get_request_payload
+
+
+sys.modules.pop("app.helper.llm", None)
+_stub_module(
+    "app.core.config",
+    settings=ModuleType("settings"),
+)
+sys.modules["app.core.config"].settings.LLM_PROVIDER = "deepseek"
+sys.modules["app.core.config"].settings.LLM_MODEL = "deepseek-v4-pro"
+sys.modules["app.core.config"].settings.LLM_API_KEY = "sk-test"
+sys.modules["app.core.config"].settings.LLM_BASE_URL = "https://api.deepseek.com"
+sys.modules["app.core.config"].settings.LLM_THINKING_LEVEL = None
+sys.modules["app.core.config"].settings.LLM_TEMPERATURE = 0.1
+sys.modules["app.core.config"].settings.LLM_MAX_CONTEXT_TOKENS = 64
+sys.modules["app.core.config"].settings.PROXY_HOST = None
+_stub_module("app.log", logger=_DummyLogger())
+_stub_module("langchain_deepseek", ChatDeepSeek=_FakeChatDeepSeek)
+
+module_path = Path(__file__).resolve().parents[1] / "app" / "helper" / "llm.py"
+spec = importlib.util.spec_from_file_location("test_llm_module_for_deepseek_compat", module_path)
+llm_module = importlib.util.module_from_spec(spec)
+assert spec and spec.loader
+spec.loader.exec_module(llm_module)
+
+
+class DeepSeekCompatPatchTest(unittest.TestCase):
+    def setUp(self):
+        _FakeChatDeepSeek._get_request_payload = _ORIGINAL_GET_REQUEST_PAYLOAD
+        if hasattr(_FakeChatDeepSeek, "_moviepilot_reasoning_content_patched"):
+            delattr(_FakeChatDeepSeek, "_moviepilot_reasoning_content_patched")
+        llm_module._patch_deepseek_reasoning_content_support()
+
+    def test_injects_reasoning_content_for_assistant_tool_calls(self):
+        llm = _FakeChatDeepSeek("deepseek-v4-pro")
+        messages = [
+            HumanMessage(content="天气如何？"),
+            AIMessage(
+                content="",
+                tool_calls=_build_tool_call(),
+                additional_kwargs={"reasoning_content": "先调用天气工具"},
+            ),
+            ToolMessage(content="晴天", tool_call_id="call_1"),
+        ]
+
+        payload = llm._get_request_payload(messages)
+
+        self.assertEqual(
+            payload["messages"][1]["reasoning_content"],
+            "先调用天气工具",
+        )
+
+    def test_falls_back_to_empty_reasoning_content_when_missing(self):
+        llm = _FakeChatDeepSeek("deepseek-v4-flash")
+        messages = [
+            HumanMessage(content="天气如何？"),
+            AIMessage(content="", tool_calls=_build_tool_call()),
+            ToolMessage(content="晴天", tool_call_id="call_1"),
+        ]
+
+        payload = llm._get_request_payload(messages)
+
+        self.assertIn("reasoning_content", payload["messages"][1])
+        self.assertEqual(payload["messages"][1]["reasoning_content"], "")
+
+    def test_skips_injection_when_thinking_is_disabled(self):
+        llm = _FakeChatDeepSeek(
+            "deepseek-v4-pro",
+            model_kwargs={"extra_body": {"thinking": {"type": "disabled"}}},
+        )
+        messages = [
+            HumanMessage(content="天气如何？"),
+            AIMessage(
+                content="",
+                tool_calls=_build_tool_call(),
+                additional_kwargs={"reasoning_content": "先调用天气工具"},
+            ),
+            ToolMessage(content="晴天", tool_call_id="call_1"),
+        ]
+
+        payload = llm._get_request_payload(messages)
+
+        self.assertNotIn("reasoning_content", payload["messages"][1])
--- a/tests/test_llm_helper_testcall.py
+++ b/tests/test_llm_helper_testcall.py
@@ -38,7 +38,7 @@ _stub_module(
        LLM_MODEL="global-model",
        LLM_API_KEY="global-key",
        LLM_BASE_URL="https://global.example.com",
-        LLM_DISABLE_THINKING=False,
+        LLM_THINKING_LEVEL=None,
        LLM_TEMPERATURE=0.1,
        LLM_MAX_CONTEXT_TOKENS=64,
        PROXY_HOST=None,
@@ -83,7 +83,9 @@ class LlmHelperTestCallTest(unittest.TestCase):
            streaming=False,
            provider="deepseek",
            model="deepseek-chat",
+            thinking_level=None,
            disable_thinking=None,
+            reasoning_effort=None,
            api_key="sk-test",
            base_url="https://api.deepseek.com",
        )
@@ -138,7 +140,77 @@ class LlmHelperTestCallTest(unittest.TestCase):
            {"thinking": {"type": "disabled"}},
        )

-    def test_get_llm_uses_openai_reasoning_effort_none(self):
+    def test_get_llm_uses_deepseek_thinking_level_controls(self):
+        calls = []
+        patch_calls = []
+
+        class _FakeChatDeepSeek:
+            def __init__(self, **kwargs):
+                calls.append(kwargs)
+                self.model = kwargs["model"]
+                self.profile = None
+
+        with patch.dict(
+            sys.modules,
+            {"langchain_deepseek": SimpleNamespace(ChatDeepSeek=_FakeChatDeepSeek)},
+        ), patch.object(
+            llm_module,
+            "_patch_deepseek_reasoning_content_support",
+            side_effect=lambda: patch_calls.append(True),
+        ):
+            llm_module.LLMHelper.get_llm(
+                provider="deepseek",
+                model="deepseek-v4-pro",
+                thinking_level="xhigh",
+                api_key="sk-test",
+                base_url="https://api.deepseek.com",
+            )
+
+        self.assertEqual(len(calls), 1)
+        self.assertEqual(
+            calls[0].get("extra_body"),
+            {"thinking": {"type": "enabled"}},
+        )
+        self.assertEqual(patch_calls, [True])
+        self.assertEqual(calls[0].get("reasoning_effort"), "max")
+        self.assertEqual(calls[0].get("api_base"), "https://api.deepseek.com")
+
+    def test_get_llm_disables_deepseek_thinking_via_thinking_level(self):
+        calls = []
+        patch_calls = []
+
+        class _FakeChatDeepSeek:
+            def __init__(self, **kwargs):
+                calls.append(kwargs)
+                self.model = kwargs["model"]
+                self.profile = None
+
+        with patch.dict(
+            sys.modules,
+            {"langchain_deepseek": SimpleNamespace(ChatDeepSeek=_FakeChatDeepSeek)},
+        ), patch.object(
+            llm_module,
+            "_patch_deepseek_reasoning_content_support",
+            side_effect=lambda: patch_calls.append(True),
+        ):
+            llm_module.LLMHelper.get_llm(
+                provider="deepseek",
+                model="deepseek-v4-flash",
+                thinking_level="off",
+                api_key="sk-test",
+                base_url="https://proxy.example.com",
+            )
+
+        self.assertEqual(len(calls), 1)
+        self.assertEqual(
+            calls[0].get("extra_body"),
+            {"thinking": {"type": "disabled"}},
+        )
+        self.assertEqual(patch_calls, [True])
+        self.assertIsNone(calls[0].get("reasoning_effort"))
+        self.assertEqual(calls[0].get("api_base"), "https://proxy.example.com")
+
+    def test_get_llm_uses_openai_reasoning_effort_none_for_off(self):
        calls = []

        class _FakeChatOpenAI:
@@ -154,7 +226,7 @@ class LlmHelperTestCallTest(unittest.TestCase):
            llm_module.LLMHelper.get_llm(
                provider="openai",
                model="gpt-5-mini",
-                disable_thinking=True,
+                thinking_level="off",
                api_key="sk-test",
                base_url="https://api.openai.com/v1",
            )
@@ -162,6 +234,30 @@ class LlmHelperTestCallTest(unittest.TestCase):
        self.assertEqual(len(calls), 1)
        self.assertEqual(calls[0].get("reasoning_effort"), "none")

+    def test_get_llm_maps_unified_max_to_openai_xhigh(self):
+        calls = []
+
+        class _FakeChatOpenAI:
+            def __init__(self, **kwargs):
+                calls.append(kwargs)
+                self.model = kwargs["model"]
+                self.profile = None
+
+        with patch.dict(
+            sys.modules,
+            {"langchain_openai": SimpleNamespace(ChatOpenAI=_FakeChatOpenAI)},
+        ):
+            llm_module.LLMHelper.get_llm(
+                provider="openai",
+                model="gpt-5.4",
+                thinking_level="max",
+                api_key="sk-test",
+                base_url="https://api.openai.com/v1",
+            )
+
+        self.assertEqual(len(calls), 1)
+        self.assertEqual(calls[0].get("reasoning_effort"), "xhigh")
+
    def test_get_llm_uses_gemini_builtin_thinking_controls(self):
        calls = []

@@ -182,7 +278,7 @@ class LlmHelperTestCallTest(unittest.TestCase):
            llm_module.LLMHelper.get_llm(
                provider="google",
                model="gemini-2.5-flash",
-                disable_thinking=True,
+                thinking_level="off",
                api_key="sk-test",
                base_url=None,
            )
@@ -191,6 +287,35 @@ class LlmHelperTestCallTest(unittest.TestCase):
        self.assertEqual(calls[0].get("thinking_budget"), 0)
        self.assertFalse(calls[0].get("include_thoughts"))

+    def test_get_llm_uses_gemini_3_thinking_level_controls(self):
+        calls = []
+
+        class _FakeChatGoogleGenerativeAI:
+            def __init__(self, **kwargs):
+                calls.append(kwargs)
+                self.model = kwargs["model"]
+                self.profile = None
+
+        with patch.dict(
+            sys.modules,
+            {
+                "langchain_google_genai": SimpleNamespace(
+                    ChatGoogleGenerativeAI=_FakeChatGoogleGenerativeAI
+                )
+            },
+        ):
+            llm_module.LLMHelper.get_llm(
+                provider="google",
+                model="gemini-3.1-flash",
+                thinking_level="xhigh",
+                api_key="sk-test",
+                base_url=None,
+            )
+
+        self.assertEqual(len(calls), 1)
+        self.assertEqual(calls[0].get("thinking_level"), "high")
+        self.assertFalse(calls[0].get("include_thoughts"))
+

 if __name__ == "__main__":
    unittest.main()
--- a/tests/test_system_llm_test.py
+++ b/tests/test_system_llm_test.py
@@ -119,6 +119,8 @@ class LlmTestEndpointTest(unittest.TestCase):
        with patch.object(system_endpoint.settings, "AI_AGENT_ENABLE", True), patch.object(
            system_endpoint.settings, "LLM_PROVIDER", "deepseek"
        ), patch.object(system_endpoint.settings, "LLM_MODEL", "deepseek-chat"), patch.object(
+            system_endpoint.settings, "LLM_THINKING_LEVEL", "max"
+        ), patch.object(
            system_endpoint.settings, "LLM_API_KEY", "sk-test"
        ), patch.object(
            system_endpoint.settings, "LLM_BASE_URL", "https://api.deepseek.com"
@@ -133,7 +135,9 @@ class LlmTestEndpointTest(unittest.TestCase):
        llm_test_mock.assert_awaited_once_with(
            provider="deepseek",
            model="deepseek-chat",
-            disable_thinking=False,
+            thinking_level="max",
+            disable_thinking=None,
+            reasoning_effort=None,
            api_key="sk-test",
            base_url="https://api.deepseek.com",
        )
@@ -156,7 +160,7 @@ class LlmTestEndpointTest(unittest.TestCase):
            enabled=True,
            provider="openai",
            model="gpt-4.1-mini",
-            disable_thinking=True,
+            thinking_level="high",
            api_key="sk-live",
            base_url="https://example.com/v1",
        )
@@ -178,7 +182,9 @@ class LlmTestEndpointTest(unittest.TestCase):
        llm_test_mock.assert_awaited_once_with(
            provider="openai",
            model="gpt-4.1-mini",
-            disable_thinking=True,
+            thinking_level="high",
+            disable_thinking=None,
+            reasoning_effort=None,
            api_key="sk-live",
            base_url="https://example.com/v1",
        )
@@ -186,6 +192,44 @@ class LlmTestEndpointTest(unittest.TestCase):
        self.assertEqual(resp.data["provider"], "openai")
        self.assertEqual(resp.data["model"], "gpt-4.1-mini")

+    def test_llm_test_supports_legacy_thinking_payload(self):
+        llm_test_mock = AsyncMock(
+            return_value={
+                "provider": "deepseek",
+                "model": "deepseek-v4-pro",
+                "duration_ms": 123,
+                "reply_preview": "OK",
+            }
+        )
+        payload = system_endpoint.LlmTestRequest(
+            enabled=True,
+            provider="deepseek",
+            model="deepseek-v4-pro",
+            disable_thinking=False,
+            reasoning_effort="xhigh",
+            api_key="sk-live",
+            base_url="https://api.deepseek.com",
+        )
+
+        with patch.object(system_endpoint.settings, "AI_AGENT_ENABLE", False), patch.object(
+            system_endpoint.LLMHelper,
+            "test_current_settings",
+            llm_test_mock,
+            create=True,
+        ):
+            resp = asyncio.run(system_endpoint.llm_test(payload=payload, _="token"))
+
+        llm_test_mock.assert_awaited_once_with(
+            provider="deepseek",
+            model="deepseek-v4-pro",
+            thinking_level=None,
+            disable_thinking=False,
+            reasoning_effort="xhigh",
+            api_key="sk-live",
+            base_url="https://api.deepseek.com",
+        )
+        self.assertTrue(resp.success)
+
    def test_llm_test_rejects_empty_reply(self):
        with patch.object(system_endpoint.settings, "AI_AGENT_ENABLE", True), patch.object(
            system_endpoint.settings, "LLM_PROVIDER", "deepseek"
--- a/version.py
+++ b/version.py
@@ -1,2 +1,2 @@
-APP_VERSION = 'v2.10.4'
-FRONTEND_VERSION = 'v2.10.4'
+APP_VERSION = 'v2.10.5'
+FRONTEND_VERSION = 'v2.10.5'
Author	SHA1	Message	Date
jxxghp	79bfeaf2af	移除工具调用前的流重置，保留模型思考文本可见	2026-04-25 23:12:34 +08:00
jxxghp	4fe41ba5e9	更新 base.py	2026-04-25 22:16:15 +08:00
jxxghp	14d6e2febc	Refine agent prompts for concise professional replies	2026-04-25 22:04:35 +08:00
jxxghp	97c7e71207	更新 Agent Prompt.txt	2026-04-25 21:51:47 +08:00
jxxghp	8f29a218ea	chore: bump version to v2.10.5	2026-04-25 12:55:33 +08:00
jxxghp	4fd5aa3eb6	fix: improve DeepSeek reasoning_content payload handling and update langchain dependencies	2026-04-25 12:46:21 +08:00
jxxghp	bfc27d151c	更新 ask_user_choice.py	2026-04-25 11:36:36 +08:00
jxxghp	f2b56b8f40	更新 ask_user_choice.py	2026-04-25 11:35:32 +08:00
jxxghp	a05ffc07d4	refactor: remove legacy LLM_DISABLE_THINKING and LLM_REASONING_EFFORT config, unify thinking_level handling - Eliminate support for LLM_DISABLE_THINKING and LLM_REASONING_EFFORT in config, code, and tests - Simplify LLM thinking level logic to rely solely on LLM_THINKING_LEVEL - Refactor LLMHelper and related endpoints to remove legacy parameter handling - Update system API and test utilities to match new configuration structure - Minor code cleanup and formatting improvements	2026-04-25 10:42:03 +08:00
jxxghp	4a81417fb7	fix: preserve deepseek reasoning content in tool loops	2026-04-25 09:37:01 +08:00
jxxghp	c7fa3dc863	feat: unify llm thinking level controls	2026-04-24 19:50:23 +08:00