feat(assistant): 识别本地 LLM 服务端常见错误并给出修复指引

用户反馈：切本地 vLLM（Qwen/Qwen3-30B-A3B）后在助手里调用工具报错： "auto" tool choice requires --enable-auto-tool-choice and --tool-call-parser to be set 这是 vLLM 0.6+ 的默认安全策略 —— 必须在启动参数显式开启工具调用才允许客户端在 body 里带 tools 字段。ClawPanel 发的请求符合 OpenAI 规范，不是我们的 bug，但用户面对这个原始报错字面上看不出是 vLLM 配置问题也不知道怎么修。 ## 解决新增 src/lib/model-error-diagnosis.js，提供 enhanceModelCallError：保留原始错误文本 + 附加中文修复指引。目前覆盖 5 类常见本地部署错误： 1. **vLLM tool choice 限制**（本次用户实际踩的） - 给出 --enable-auto-tool-choice + --tool-call-parser 启动命令 - Qwen / Mistral / Llama 各系列推荐的 parser 值 - 建议临时切到"聊天"模式规避 2. **llama.cpp / LM Studio 旧版本不支持 tools** 3. **Ollama 模型不支持 tools** 4. **模型 ID 不存在 / 404** 5. **上下文超长 / token limit** 在 assistant.js 的 5 个错误抛出点统一接入： - callChatCompletions（OpenAI 聊天模式） - callResponsesAPI（新 /v1/responses 接口） - callAnthropicMessages（Claude） - callGeminiGenerate（Gemini） - callAIWithTools（工具模式，就是用户踩坑的那条路径） ## 验证 - npm run build 通过 - assistant chunk 从 153.98KB → 156.24KB（gzip +0.86KB），合理 - 所有增强都走 try { parseJSON } 之后，不会影响原有错误处理路径 ## 相关 - #Compat-5 系列的一部分（运行时错误诊断） - 用户场景：vLLM + Qwen3 MoE，切换到本地模型后调工具 - 用户侧实际修复命令： vllm serve <model> --enable-auto-tool-choice --tool-call-parser hermes
2026-06-26 10:11:36 +08:00 · 2026-04-20 13:02:05 +08:00
parent e39233f2c1
commit 7c63438c0e
2 changed files with 126 additions and 5 deletions
--- a/src/lib/model-error-diagnosis.js
+++ b/src/lib/model-error-diagnosis.js
@@ -0,0 +1,114 @@
+/**
+ * 模型调用运行时错误诊断
+ *
+ * 针对常见本地部署场景（vLLM / llama.cpp / Ollama / LM Studio 等）
+ * 把晦涩的服务端报错转成用户可操作的修复指引。
+ *
+ * 与 error-diagnosis.js 的区别：
+ * - error-diagnosis.js 针对 npm install / upgrade 等安装期错误
+ * - 本文件针对模型调用运行时错误（400/422/不支持 tools 等）
+ */
+
+/**
+ * 识别并增强模型调用错误消息。保留原文 + 附加诊断和修复建议。
+ *
+ * @param {string|Error|unknown} err - 原始错误（字符串或 Error）
+ * @returns {string} 增强后的错误消息（可能是原文，也可能带了修复指引）
+ */
+export function enhanceModelCallError(err) {
+  const msg = typeof err === 'string' ? err : (err?.message || String(err))
+  const s = msg.toLowerCase()
+
+  // ── vLLM: 工具调用需要启动参数 ──
+  // 典型错误消息示例：
+  //   `"auto" tool choice requires --enable-auto-tool-choice and --tool-call-parser to be set`
+  //   `--enable-auto-tool-choice must be set`
+  if (
+    s.includes('enable-auto-tool-choice') ||
+    s.includes('tool-call-parser') ||
+    (s.includes('tool choice') && s.includes('requires'))
+  ) {
+    return (
+      msg +
+      '\n\n' +
+      '💡 这是 vLLM 服务端配置限制（不是 ClawPanel / OpenClaw 的 bug）：\n' +
+      'vLLM 默认禁用工具调用，必须在启动时显式开启。请以如下方式重启 vLLM：\n\n' +
+      '  vllm serve <your-model> \\\n' +
+      '    --enable-auto-tool-choice \\\n' +
+      '    --tool-call-parser hermes\n\n' +
+      '不同模型系列建议的 parser：\n' +
+      '  • Qwen2.5 / Qwen3 / Hermes 系列 → --tool-call-parser hermes\n' +
+      '  • Mistral / Mixtral 系列 → --tool-call-parser mistral\n' +
+      '  • Llama 3 / 3.1 / 3.2 系列 → --tool-call-parser llama3_json\n\n' +
+      '或者在助手右上角切换到「聊天」模式（不带工具）临时规避。'
+    )
+  }
+
+  // ── llama.cpp / LM Studio: 旧版本不支持工具调用 ──
+  if (
+    (s.includes('grammar') && s.includes('tools')) ||
+    (s.includes('llama') && s.includes('tools') && s.includes('not supported'))
+  ) {
+    return (
+      msg +
+      '\n\n' +
+      '💡 当前 llama.cpp / LM Studio 版本可能不支持原生工具调用。\n' +
+      '请升级到支持 --chat-template-kwargs 的新版本，\n' +
+      '或在助手右上角切换到「聊天」模式（不带工具）临时规避。'
+    )
+  }
+
+  // ── Ollama: 该模型不支持 tools ──
+  // 典型错误：`registry.ollama.ai/library/llama2:latest does not support tools`
+  if (
+    s.includes('does not support tools') ||
+    (s.includes('model') && s.includes('does not support') && s.includes('tool'))
+  ) {
+    return (
+      msg +
+      '\n\n' +
+      '💡 当前 Ollama 模型不支持工具调用。\n' +
+      '请换成支持 tools 的模型，推荐：\n' +
+      '  • qwen2.5（各 size）\n' +
+      '  • llama3.1 / llama3.2\n' +
+      '  • mistral-nemo / mixtral\n\n' +
+      '或在助手右上角切换到「聊天」模式（不带工具）临时规避。'
+    )
+  }
+
+  // ── 模型 ID 不存在 / 404 ──
+  // 尽量避免误伤：只有错误文本同时含 "model" 和明确的 not-found 信号才匹配
+  if (
+    s.includes('model') &&
+    (s.includes('not found') || s.includes('does not exist') || s.includes('no such model'))
+  ) {
+    return (
+      msg +
+      '\n\n' +
+      '💡 服务端找不到指定的模型 ID。\n' +
+      '请到模型配置页确认：\n' +
+      '  1. 模型 ID 是否与服务端实际加载的一致（大小写敏感）\n' +
+      '  2. 服务端是否已加载该模型（vLLM/Ollama 都需要预加载）'
+    )
+  }
+
+  // ── 上下文过长 ──
+  if (
+    s.includes('context length') ||
+    s.includes('maximum context') ||
+    s.includes('token limit') ||
+    (s.includes('too many tokens') && s.includes('context'))
+  ) {
+    return (
+      msg +
+      '\n\n' +
+      '💡 消息长度超过模型的上下文窗口。\n' +
+      '可以：\n' +
+      '  1. 在助手里点「新会话」开启新对话\n' +
+      '  2. 换一个更大窗口的模型\n' +
+      '  3. vLLM 启动时用 --max-model-len 指定更大窗口'
+    )
+  }
+
+  return msg
+}
--- a/src/pages/assistant.js
+++ b/src/pages/assistant.js
@@ -12,6 +12,7 @@ import { icon, statusIcon } from '../lib/icons.js'
 import { QTCOOL, PROVIDER_PRESETS, API_TYPES as SHARED_API_TYPES, fetchQtcoolModels } from '../lib/model-presets.js'
 import { t } from '../lib/i18n.js'
 import { getActiveEngineId } from '../lib/engine-manager.js'
+import { enhanceModelCallError } from '../lib/model-error-diagnosis.js'

 // ── 常量 ──
 const STORAGE_KEY = 'clawpanel-assistant'
@@ -1910,7 +1911,8 @@ async function callChatCompletions(base, messages, onChunk) {
    } catch {
      if (errText) errMsg += `: ${errText.slice(0, 200)}`
    }
-    throw new Error(errMsg)
+    // #Compat-5: 识别 vLLM/Ollama 等本地服务端的常见拒绝消息，附加修复指引
+    throw new Error(enhanceModelCallError(errMsg))
  }

  // 检测响应是否为 SSE 流式
@@ -2004,7 +2006,8 @@ async function callResponsesAPI(base, messages, onChunk) {
    } catch {
      if (errText) errMsg += `: ${errText.slice(0, 200)}`
    }
-    throw new Error(errMsg)
+    // #Compat-5: 识别本地服务端拒绝消息，附加修复指引
+    throw new Error(enhanceModelCallError(errMsg))
  }

  await readSSEStream(resp, (json) => {
@@ -2063,7 +2066,8 @@ async function callAnthropicMessages(base, messages, onChunk) {
    } catch {
      if (errText) errMsg += `: ${errText.slice(0, 200)}`
    }
-    throw new Error(errMsg)
+    // #Compat-5: 识别本地服务端拒绝消息，附加修复指引
+    throw new Error(enhanceModelCallError(errMsg))
  }

  _lastDebugInfo.streaming = true
@@ -2131,7 +2135,8 @@ async function callGeminiGenerate(base, messages, onChunk) {
    const errText = await resp.text().catch(() => '')
    let errMsg = `API 错误 ${resp.status}`
    try { errMsg = JSON.parse(errText).error?.message || errMsg } catch {}
-    throw new Error(errMsg)
+    // #Compat-5: 识别本地服务端拒绝消息，附加修复指引
+    throw new Error(enhanceModelCallError(errMsg))
  }

  _lastDebugInfo.streaming = true
@@ -2580,7 +2585,9 @@ async function callAIWithTools(messages, onStatus, onToolProgress, onChunk) {
      const errText = await resp.text().catch(() => '')
      let errMsg = `API 错误 ${resp.status}`
      try { errMsg = JSON.parse(errText).error?.message || errMsg } catch {}
-      throw new Error(errMsg)
+      // #Compat-5: callAIWithTools 场景下 tools 带进 body 最容易踩 vLLM tool choice 限制，
+      // 识别并给出启动参数指引，避免用户一脸懵
+      throw new Error(enhanceModelCallError(errMsg))
    }

    // 流式累积状态