From 7c63438c0ea08510484ef2b3caaee36c61662f7e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=99=B4=E5=A4=A9?= <keh5@vip.qq.com>
Date: Mon, 20 Apr 2026 13:02:05 +0800
Subject: [PATCH] =?UTF-8?q?feat(assistant):=20=E8=AF=86=E5=88=AB=E6=9C=AC?=
 =?UTF-8?q?=E5=9C=B0=20LLM=20=E6=9C=8D=E5=8A=A1=E7=AB=AF=E5=B8=B8=E8=A7=81?=
 =?UTF-8?q?=E9=94=99=E8=AF=AF=E5=B9=B6=E7=BB=99=E5=87=BA=E4=BF=AE=E5=A4=8D?=
 =?UTF-8?q?=E6=8C=87=E5=BC=95?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

用户反馈：切本地 vLLM（Qwen/Qwen3-30B-A3B）后在助手里调用工具报错：
  "auto" tool choice requires --enable-auto-tool-choice and
  --tool-call-parser to be set

这是 vLLM 0.6+ 的默认安全策略 —— 必须在启动参数显式开启工具调用
才允许客户端在 body 里带 tools 字段。ClawPanel 发的请求符合 OpenAI
规范，不是我们的 bug，但用户面对这个原始报错字面上看不出是 vLLM
配置问题也不知道怎么修。

## 解决

新增 src/lib/model-error-diagnosis.js，提供 enhanceModelCallError：
保留原始错误文本 + 附加中文修复指引。目前覆盖 5 类常见本地部署错误：

1. **vLLM tool choice 限制**（本次用户实际踩的）
   - 给出 --enable-auto-tool-choice + --tool-call-parser 启动命令
   - Qwen / Mistral / Llama 各系列推荐的 parser 值
   - 建议临时切到"聊天"模式规避

2. **llama.cpp / LM Studio 旧版本不支持 tools**
3. **Ollama 模型不支持 tools**
4. **模型 ID 不存在 / 404**
5. **上下文超长 / token limit**

在 assistant.js 的 5 个错误抛出点统一接入：
- callChatCompletions（OpenAI 聊天模式）
- callResponsesAPI（新 /v1/responses 接口）
- callAnthropicMessages（Claude）
- callGeminiGenerate（Gemini）
- callAIWithTools（工具模式，就是用户踩坑的那条路径）

## 验证

- npm run build 通过
- assistant chunk 从 153.98KB → 156.24KB（gzip +0.86KB），合理
- 所有增强都走 try { parseJSON } 之后，不会影响原有错误处理路径

## 相关

- #Compat-5 系列的一部分（运行时错误诊断）
- 用户场景：vLLM + Qwen3 MoE，切换到本地模型后调工具
- 用户侧实际修复命令：
  vllm serve <model> --enable-auto-tool-choice --tool-call-parser hermes
---
 src/lib/model-error-diagnosis.js | 114 +++++++++++++++++++++++++++++++
 src/pages/assistant.js           |  17 +++--
 2 files changed, 126 insertions(+), 5 deletions(-)
 create mode 100644 src/lib/model-error-diagnosis.js
diff --git a/src/lib/model-error-diagnosis.js b/src/lib/model-error-diagnosis.js
new file mode 100644
index 0000000..d14fe6c
--- /dev/null
+++ b/src/lib/model-error-diagnosis.js
@@ -0,0 +1,114 @@
+/**
+ * 模型调用运行时错误诊断
+ *
+ * 针对常见本地部署场景（vLLM / llama.cpp / Ollama / LM Studio 等）
+ * 把晦涩的服务端报错转成用户可操作的修复指引。
+ *
+ * 与 error-diagnosis.js 的区别：
+ * - error-diagnosis.js 针对 npm install / upgrade 等安装期错误
+ * - 本文件针对模型调用运行时错误（400/422/不支持 tools 等）
+ */
+
+/**
+ * 识别并增强模型调用错误消息。保留原文 + 附加诊断和修复建议。
+ *
+ * @param {string|Error|unknown} err - 原始错误（字符串或 Error）
+ * @returns {string} 增强后的错误消息（可能是原文，也可能带了修复指引）
+ */
+export function enhanceModelCallError(err) {
+  const msg = typeof err === 'string' ? err : (err?.message || String(err))
+  const s = msg.toLowerCase()
+
+  // ── vLLM: 工具调用需要启动参数 ──
+  // 典型错误消息示例：
+  //   `"auto" tool choice requires --enable-auto-tool-choice and --tool-call-parser to be set`
+  //   `--enable-auto-tool-choice must be set`
+  if (
+    s.includes('enable-auto-tool-choice') ||
+    s.includes('tool-call-parser') ||
+    (s.includes('tool choice') && s.includes('requires'))
+  ) {
+    return (
+      msg +
+      '\n\n' +
+      '💡 这是 vLLM 服务端配置限制（不是 ClawPanel / OpenClaw 的 bug）：\n' +
+      'vLLM 默认禁用工具调用，必须在启动时显式开启。请以如下方式重启 vLLM：\n\n' +
+      '  vllm serve <your-model> \\\n' +
+      '    --enable-auto-tool-choice \\\n' +
+      '    --tool-call-parser hermes\n\n' +
+      '不同模型系列建议的 parser：\n' +
+      '  • Qwen2.5 / Qwen3 / Hermes 系列 → --tool-call-parser hermes\n' +
+      '  • Mistral / Mixtral 系列 → --tool-call-parser mistral\n' +
+      '  • Llama 3 / 3.1 / 3.2 系列 → --tool-call-parser llama3_json\n\n' +
+      '或者在助手右上角切换到「聊天」模式（不带工具）临时规避。'
+    )
+  }
+
+  // ── llama.cpp / LM Studio: 旧版本不支持工具调用 ──
+  if (
+    (s.includes('grammar') && s.includes('tools')) ||
+    (s.includes('llama') && s.includes('tools') && s.includes('not supported'))
+  ) {
+    return (
+      msg +
+      '\n\n' +
+      '💡 当前 llama.cpp / LM Studio 版本可能不支持原生工具调用。\n' +
+      '请升级到支持 --chat-template-kwargs 的新版本，\n' +
+      '或在助手右上角切换到「聊天」模式（不带工具）临时规避。'
+    )
+  }
+
+  // ── Ollama: 该模型不支持 tools ──
+  // 典型错误：`registry.ollama.ai/library/llama2:latest does not support tools`
+  if (
+    s.includes('does not support tools') ||
+    (s.includes('model') && s.includes('does not support') && s.includes('tool'))
+  ) {
+    return (
+      msg +
+      '\n\n' +
+      '💡 当前 Ollama 模型不支持工具调用。\n' +
+      '请换成支持 tools 的模型，推荐：\n' +
+      '  • qwen2.5（各 size）\n' +
+      '  • llama3.1 / llama3.2\n' +
+      '  • mistral-nemo / mixtral\n\n' +
+      '或在助手右上角切换到「聊天」模式（不带工具）临时规避。'
+    )
+  }
+
+  // ── 模型 ID 不存在 / 404 ──
+  // 尽量避免误伤：只有错误文本同时含 "model" 和明确的 not-found 信号才匹配
+  if (
+    s.includes('model') &&
+    (s.includes('not found') || s.includes('does not exist') || s.includes('no such model'))
+  ) {
+    return (
+      msg +
+      '\n\n' +
+      '💡 服务端找不到指定的模型 ID。\n' +
+      '请到模型配置页确认：\n' +
+      '  1. 模型 ID 是否与服务端实际加载的一致（大小写敏感）\n' +
+      '  2. 服务端是否已加载该模型（vLLM/Ollama 都需要预加载）'
+    )
+  }
+
+  // ── 上下文过长 ──
+  if (
+    s.includes('context length') ||
+    s.includes('maximum context') ||
+    s.includes('token limit') ||
+    (s.includes('too many tokens') && s.includes('context'))
+  ) {
+    return (
+      msg +
+      '\n\n' +
+      '💡 消息长度超过模型的上下文窗口。\n' +
+      '可以：\n' +
+      '  1. 在助手里点「新会话」开启新对话\n' +
+      '  2. 换一个更大窗口的模型\n' +
+      '  3. vLLM 启动时用 --max-model-len 指定更大窗口'
+    )
+  }
+
+  return msg
+}
diff --git a/src/pages/assistant.js b/src/pages/assistant.js
index 8889222..52c0f99 100644
--- a/src/pages/assistant.js
+++ b/src/pages/assistant.js
@@ -12,6 +12,7 @@ import { icon, statusIcon } from '../lib/icons.js'
 import { QTCOOL, PROVIDER_PRESETS, API_TYPES as SHARED_API_TYPES, fetchQtcoolModels } from '../lib/model-presets.js'
 import { t } from '../lib/i18n.js'
 import { getActiveEngineId } from '../lib/engine-manager.js'
+import { enhanceModelCallError } from '../lib/model-error-diagnosis.js'
 
 // ── 常量 ──
 const STORAGE_KEY = 'clawpanel-assistant'
@@ -1910,7 +1911,8 @@ async function callChatCompletions(base, messages, onChunk) {
     } catch {
       if (errText) errMsg += `: ${errText.slice(0, 200)}`
     }
-    throw new Error(errMsg)
+    // #Compat-5: 识别 vLLM/Ollama 等本地服务端的常见拒绝消息，附加修复指引
+    throw new Error(enhanceModelCallError(errMsg))
   }
 
   // 检测响应是否为 SSE 流式
@@ -2004,7 +2006,8 @@ async function callResponsesAPI(base, messages, onChunk) {
     } catch {
       if (errText) errMsg += `: ${errText.slice(0, 200)}`
     }
-    throw new Error(errMsg)
+    // #Compat-5: 识别本地服务端拒绝消息，附加修复指引
+    throw new Error(enhanceModelCallError(errMsg))
   }
 
   await readSSEStream(resp, (json) => {
@@ -2063,7 +2066,8 @@ async function callAnthropicMessages(base, messages, onChunk) {
     } catch {
       if (errText) errMsg += `: ${errText.slice(0, 200)}`
     }
-    throw new Error(errMsg)
+    // #Compat-5: 识别本地服务端拒绝消息，附加修复指引
+    throw new Error(enhanceModelCallError(errMsg))
   }
 
   _lastDebugInfo.streaming = true
@@ -2131,7 +2135,8 @@ async function callGeminiGenerate(base, messages, onChunk) {
     const errText = await resp.text().catch(() => '')
     let errMsg = `API 错误 ${resp.status}`
     try { errMsg = JSON.parse(errText).error?.message || errMsg } catch {}
-    throw new Error(errMsg)
+    // #Compat-5: 识别本地服务端拒绝消息，附加修复指引
+    throw new Error(enhanceModelCallError(errMsg))
   }
 
   _lastDebugInfo.streaming = true
@@ -2580,7 +2585,9 @@ async function callAIWithTools(messages, onStatus, onToolProgress, onChunk) {
       const errText = await resp.text().catch(() => '')
       let errMsg = `API 错误 ${resp.status}`
       try { errMsg = JSON.parse(errText).error?.message || errMsg } catch {}
-      throw new Error(errMsg)
+      // #Compat-5: callAIWithTools 场景下 tools 带进 body 最容易踩 vLLM tool choice 限制，
+      // 识别并给出启动参数指引，避免用户一脸懵
+      throw new Error(enhanceModelCallError(errMsg))
     }
 
     // 流式累积状态