mirror of
https://github.com/qingchencloud/clawpanel.git
synced 2026-05-06 20:02:49 +08:00
fix(gateway): debounce restart with single-flight queue (#248)
Root cause for #243 / #244 / #240: model edits trigger api.restartGateway() with only 300ms debounce. Fast consecutive edits stack up restart calls, creating zombie Gateway processes, failed restarts, and CPU fan spikes. Layer A (frontend): - New src/lib/gateway-restart-queue.js: 3s debounce + single-flight lock + reschedule on in-flight request - Refactor src/pages/models.js doAutoSave: write config immediately, schedule restart via queue with 'Apply now' toast button - Subscribe to queue state for unified success/failure toast - Add i18n: models.configQueued, models.applyNow Layer B (backend): - src-tauri/src/commands/config.rs: wrap restart_gateway / reload_gateway with tokio::sync::Mutex + 2s cooldown - Cargo.toml: add tokio 'sync' feature - scripts/dev-api.js: same guard for Web mode (inflight promise reuse + 2s cooldown) Effects: - 10 rapid edits within 3s -> 1 restart (was 10+ with races) - Backend serializes concurrent restart calls, no zombie spawns - User sees single 'Apply now' toast instead of restart storm Refs #243 #244 #240
This commit is contained in:
@@ -2225,6 +2225,43 @@ function triggerGatewayReloadNonBlocking(reason) {
|
||||
}, 0)
|
||||
}
|
||||
|
||||
// Gateway 重启的单飞行锁 + 2s 冷却(配合前端 gateway-restart-queue.js 的 3s 防抖)
|
||||
// 避免 issue #243 / #240:前端穿透节流时,后端也能合并重复请求
|
||||
let _gwRestartInflight = null
|
||||
let _gwRestartLastFinishedAt = 0
|
||||
const GW_RESTART_COOLDOWN_MS = 2000
|
||||
|
||||
async function guardedGatewayRestart(source = 'unknown') {
|
||||
if (process.env.DISABLE_GATEWAY_SPAWN === '1' || process.env.DISABLE_GATEWAY_SPAWN === 'true') {
|
||||
throw new Error('本地 Gateway 启动已禁用(DISABLE_GATEWAY_SPAWN=1)')
|
||||
}
|
||||
if (!isMac && !isLinux) {
|
||||
throw new Error('Windows 请使用 Tauri 桌面应用')
|
||||
}
|
||||
|
||||
// 进行中的调用:复用同一个 Promise,不重复执行
|
||||
if (_gwRestartInflight) {
|
||||
return _gwRestartInflight
|
||||
}
|
||||
|
||||
// 冷却期:刚重启完 2 秒内直接返回合并提示
|
||||
if (Date.now() - _gwRestartLastFinishedAt < GW_RESTART_COOLDOWN_MS) {
|
||||
return 'Gateway 刚重启过,本次请求已合并(冷却中)'
|
||||
}
|
||||
|
||||
_gwRestartInflight = (async () => {
|
||||
try {
|
||||
await handlers.restart_service({ label: 'ai.openclaw.gateway' })
|
||||
return 'Gateway 已重启'
|
||||
} finally {
|
||||
_gwRestartLastFinishedAt = Date.now()
|
||||
_gwRestartInflight = null
|
||||
}
|
||||
})()
|
||||
|
||||
return _gwRestartInflight
|
||||
}
|
||||
|
||||
// === macOS 服务管理 ===
|
||||
|
||||
function macCheckService(label) {
|
||||
@@ -3235,25 +3272,11 @@ const handlers = {
|
||||
},
|
||||
|
||||
async reload_gateway() {
|
||||
if (process.env.DISABLE_GATEWAY_SPAWN === '1' || process.env.DISABLE_GATEWAY_SPAWN === 'true') {
|
||||
throw new Error('本地 Gateway 启动已禁用(DISABLE_GATEWAY_SPAWN=1)')
|
||||
}
|
||||
if (!isMac && !isLinux) {
|
||||
throw new Error('Windows 请使用 Tauri 桌面应用')
|
||||
}
|
||||
await handlers.restart_service({ label: 'ai.openclaw.gateway' })
|
||||
return 'Gateway 已重启'
|
||||
return guardedGatewayRestart('reload_gateway')
|
||||
},
|
||||
|
||||
async restart_gateway() {
|
||||
if (process.env.DISABLE_GATEWAY_SPAWN === '1' || process.env.DISABLE_GATEWAY_SPAWN === 'true') {
|
||||
throw new Error('本地 Gateway 启动已禁用(DISABLE_GATEWAY_SPAWN=1)')
|
||||
}
|
||||
if (!isMac && !isLinux) {
|
||||
throw new Error('Windows 请使用 Tauri 桌面应用')
|
||||
}
|
||||
await handlers.restart_service({ label: 'ai.openclaw.gateway' })
|
||||
return 'Gateway 已重启'
|
||||
return guardedGatewayRestart('restart_gateway')
|
||||
},
|
||||
|
||||
// === 消息渠道管理 ===
|
||||
|
||||
@@ -31,7 +31,7 @@ rand = "0.8"
|
||||
base64 = "0.22"
|
||||
urlencoding = "2"
|
||||
regex = "1"
|
||||
tokio = { version = "1", features = ["process", "time"] }
|
||||
tokio = { version = "1", features = ["process", "time", "sync"] }
|
||||
|
||||
[target.'cfg(unix)'.dependencies]
|
||||
flate2 = "1"
|
||||
|
||||
@@ -4963,15 +4963,52 @@ async fn reload_gateway_internal(app: Option<&tauri::AppHandle>) -> Result<Strin
|
||||
}
|
||||
}
|
||||
|
||||
/// 全局 Gateway 重启 mutex(单飞行锁)
|
||||
/// 保证同时只有一个重启操作在运行,彻底避免僵尸进程堆积(issue #243)
|
||||
static RESTART_MUTEX: tokio::sync::Mutex<()> = tokio::sync::Mutex::const_new(());
|
||||
/// 上一次重启完成的时间戳(用于 2 秒冷却,防止穿透式重复调用)
|
||||
static LAST_RESTART_FINISHED_AT: std::sync::Mutex<Option<std::time::Instant>> =
|
||||
std::sync::Mutex::new(None);
|
||||
|
||||
const RESTART_COOLDOWN: std::time::Duration = std::time::Duration::from_secs(2);
|
||||
|
||||
/// 带单飞行锁和 2s 冷却的 restart 入口
|
||||
/// 即使前端穿透节流发来多个请求,后端也只串行执行,且 2s 内不重复
|
||||
async fn restart_gateway_guarded(app: Option<&tauri::AppHandle>) -> Result<String, String> {
|
||||
// 获取 mutex:并发调用时串行化
|
||||
let _guard = RESTART_MUTEX.lock().await;
|
||||
|
||||
// 2 秒冷却:如果刚刚才完成一次重启,跳过本次(配置已被前一次生效)
|
||||
let last_finished = {
|
||||
let guard = LAST_RESTART_FINISHED_AT.lock().unwrap();
|
||||
*guard
|
||||
};
|
||||
if let Some(last) = last_finished {
|
||||
if last.elapsed() < RESTART_COOLDOWN {
|
||||
return Ok("Gateway 刚重启过,本次请求已合并(冷却中)".to_string());
|
||||
}
|
||||
}
|
||||
|
||||
let result = reload_gateway_internal(app).await;
|
||||
|
||||
// 无论成功失败都记录时间,避免失败后被重试风暴压爆
|
||||
{
|
||||
let mut guard = LAST_RESTART_FINISHED_AT.lock().unwrap();
|
||||
*guard = Some(std::time::Instant::now());
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
#[tauri::command]
|
||||
pub async fn reload_gateway(app: tauri::AppHandle) -> Result<String, String> {
|
||||
reload_gateway_internal(Some(&app)).await
|
||||
restart_gateway_guarded(Some(&app)).await
|
||||
}
|
||||
|
||||
/// 重启 Gateway 服务(与 reload_gateway 相同实现)
|
||||
#[tauri::command]
|
||||
pub async fn restart_gateway(app: tauri::AppHandle) -> Result<String, String> {
|
||||
reload_gateway_internal(Some(&app)).await
|
||||
restart_gateway_guarded(Some(&app)).await
|
||||
}
|
||||
|
||||
/// 运行 openclaw doctor --fix 自动修复配置问题
|
||||
|
||||
153
src/lib/gateway-restart-queue.js
Normal file
153
src/lib/gateway-restart-queue.js
Normal file
@@ -0,0 +1,153 @@
|
||||
/**
|
||||
* Gateway 重启防抖队列
|
||||
*
|
||||
* 解决 issue #243 #244 #240:用户快速连续改动配置时,之前会触发多次 Gateway 重启
|
||||
* 导致僵尸进程堆积、风扇狂转、重启失败等问题。
|
||||
*
|
||||
* 设计:
|
||||
* - 默认 3s 空闲防抖:连续编辑时每次重置计时
|
||||
* - 单飞行锁:前一次重启未完成时,新请求只设"补重启"标记
|
||||
* - 立即执行入口:UI 提供 "立即重载" 按钮跳过倒计时
|
||||
* - 事件订阅:供顶部状态条 / toast 展示倒计时与结果
|
||||
*
|
||||
* 对外 API:
|
||||
* scheduleGatewayRestart({ delay, reason }) // 入队
|
||||
* fireRestartNow() // 跳过倒计时
|
||||
* cancelPendingRestart() // 取消
|
||||
* hasPendingRestart() / isRestartInFlight() // 状态查询
|
||||
* onRestartState(cb) // 订阅状态变化
|
||||
*/
|
||||
|
||||
import { api } from './tauri-api.js'
|
||||
|
||||
const DEFAULT_DELAY_MS = 3000
|
||||
const RESCHEDULE_DELAY_MS = 500
|
||||
|
||||
let _pendingTimer = null
|
||||
let _scheduledAt = 0
|
||||
let _scheduledDelay = 0
|
||||
let _currentReason = ''
|
||||
let _inflight = false
|
||||
let _needRerun = false
|
||||
let _listeners = []
|
||||
|
||||
function emit(eventName, detail = {}) {
|
||||
const payload = {
|
||||
event: eventName,
|
||||
reason: _currentReason,
|
||||
pending: hasPendingRestart(),
|
||||
inflight: _inflight,
|
||||
scheduledAt: _scheduledAt,
|
||||
delay: _scheduledDelay,
|
||||
...detail,
|
||||
}
|
||||
_listeners.forEach(fn => {
|
||||
try { fn(payload) } catch (_) { /* 忽略订阅方异常 */ }
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* 预约一次 Gateway 重启。多次调用会合并为一次。
|
||||
* @param {Object} opts
|
||||
* @param {number} [opts.delay=3000] 空闲多久后触发(毫秒)
|
||||
* @param {string} [opts.reason='config-change'] 触发原因(用于日志/UI)
|
||||
*/
|
||||
export function scheduleGatewayRestart(opts = {}) {
|
||||
const delay = Number.isFinite(opts.delay) ? opts.delay : DEFAULT_DELAY_MS
|
||||
const reason = opts.reason || 'config-change'
|
||||
|
||||
if (_pendingTimer) clearTimeout(_pendingTimer)
|
||||
_scheduledAt = Date.now()
|
||||
_scheduledDelay = delay
|
||||
_currentReason = reason
|
||||
|
||||
if (_inflight) {
|
||||
_needRerun = true
|
||||
emit('deferred')
|
||||
return
|
||||
}
|
||||
|
||||
_pendingTimer = setTimeout(runRestart, delay)
|
||||
emit('scheduled')
|
||||
}
|
||||
|
||||
/**
|
||||
* 跳过倒计时,立即执行重启。
|
||||
*/
|
||||
export function fireRestartNow() {
|
||||
if (_pendingTimer) {
|
||||
clearTimeout(_pendingTimer)
|
||||
_pendingTimer = null
|
||||
}
|
||||
if (_inflight) {
|
||||
_needRerun = true
|
||||
emit('deferred')
|
||||
return
|
||||
}
|
||||
runRestart()
|
||||
}
|
||||
|
||||
/**
|
||||
* 取消待执行的重启。用户显式拒绝、页面卸载时调用。
|
||||
*/
|
||||
export function cancelPendingRestart() {
|
||||
if (_pendingTimer) {
|
||||
clearTimeout(_pendingTimer)
|
||||
_pendingTimer = null
|
||||
}
|
||||
_needRerun = false
|
||||
_scheduledAt = 0
|
||||
emit('cancelled')
|
||||
}
|
||||
|
||||
export function hasPendingRestart() {
|
||||
return _pendingTimer !== null
|
||||
}
|
||||
|
||||
export function isRestartInFlight() {
|
||||
return _inflight
|
||||
}
|
||||
|
||||
export function getPendingInfo() {
|
||||
if (!_pendingTimer) return null
|
||||
const elapsed = Date.now() - _scheduledAt
|
||||
return {
|
||||
reason: _currentReason,
|
||||
delay: _scheduledDelay,
|
||||
remaining: Math.max(0, _scheduledDelay - elapsed),
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 订阅重启状态事件。返回取消订阅函数。
|
||||
* 事件类型:
|
||||
* - scheduled / deferred / cancelled
|
||||
* - started / succeeded / failed
|
||||
*/
|
||||
export function onRestartState(fn) {
|
||||
_listeners.push(fn)
|
||||
return () => {
|
||||
_listeners = _listeners.filter(cb => cb !== fn)
|
||||
}
|
||||
}
|
||||
|
||||
async function runRestart() {
|
||||
_pendingTimer = null
|
||||
_inflight = true
|
||||
emit('started')
|
||||
|
||||
try {
|
||||
const result = await api.restartGateway()
|
||||
emit('succeeded', { result })
|
||||
} catch (err) {
|
||||
emit('failed', { error: err?.message ? err.message : String(err) })
|
||||
} finally {
|
||||
_inflight = false
|
||||
}
|
||||
|
||||
// 运行期间有新请求 → 稍等 500ms 再跑一次
|
||||
if (_needRerun) {
|
||||
_needRerun = false
|
||||
scheduleGatewayRestart({ delay: RESCHEDULE_DELAY_MS, reason: 'rescheduled' })
|
||||
}
|
||||
}
|
||||
@@ -148,6 +148,8 @@ export default {
|
||||
saveFailed: _('保存失败', 'Save failed', '儲存失敗', '保存失敗', '저장 실패'),
|
||||
autoSaveFailed: _('自动保存失败', 'Auto-save failed', '自動儲存失敗'),
|
||||
configSavedRestarting: _('配置已保存,正在重启 Gateway...', 'Config saved, restarting Gateway...', '設定已儲存,正在重啟 Gateway...'),
|
||||
configQueued: _('配置已保存,即将重载 Gateway…', 'Config saved. Gateway will reload shortly…', '設定已儲存,即將重載 Gateway…'),
|
||||
applyNow: _('立即生效', 'Apply now', '立即生效'),
|
||||
configEffective: _('配置已生效,Gateway 已重启', 'Config applied, Gateway restarted', '設定已生效,Gateway 已重啟'),
|
||||
retryRestart: _('重试', 'Retry', '重試'),
|
||||
restarting: _('正在重启 Gateway...', 'Restarting Gateway...', '正在重啟 Gateway...'),
|
||||
|
||||
@@ -8,6 +8,7 @@ import { showModal, showConfirm } from '../components/modal.js'
|
||||
import { icon, statusIcon } from '../lib/icons.js'
|
||||
import { API_TYPES, PROVIDER_PRESETS, QTCOOL, MODEL_PRESETS, fetchQtcoolModels } from '../lib/model-presets.js'
|
||||
import { t } from '../lib/i18n.js'
|
||||
import { scheduleGatewayRestart, fireRestartNow, cancelPendingRestart, onRestartState } from '../lib/gateway-restart-queue.js'
|
||||
|
||||
export async function render() {
|
||||
const page = document.createElement('div')
|
||||
@@ -351,7 +352,8 @@ async function undo(page, state) {
|
||||
toast(t('models.undone'), 'info')
|
||||
}
|
||||
|
||||
// 自动保存(防抖 300ms)
|
||||
// 自动保存(防抖 300ms)+ Gateway 重启队列(3s 防抖 + 单飞行锁)
|
||||
// 解决 issue #243 / #244 / #240:快速连续编辑不再触发多次重启
|
||||
let _saveTimer = null
|
||||
let _batchTestAbort = null // 批量测试终止控制器
|
||||
|
||||
@@ -359,6 +361,7 @@ export function cleanup() {
|
||||
clearTimeout(_saveTimer)
|
||||
_saveTimer = null
|
||||
if (_batchTestAbort) { _batchTestAbort.abort = true; _batchTestAbort = null }
|
||||
cancelPendingRestart()
|
||||
}
|
||||
function autoSave(state) {
|
||||
clearTimeout(_saveTimer)
|
||||
@@ -430,33 +433,46 @@ async function doAutoSave(state) {
|
||||
normalizeProviderUrls(state.config)
|
||||
await api.writeOpenclawConfig(state.config)
|
||||
|
||||
// 重启 Gateway 使配置生效(Gateway 不支持 SIGHUP 热重载)
|
||||
toast(t('models.configSavedRestarting'), 'info')
|
||||
try {
|
||||
await api.restartGateway()
|
||||
toast(t('models.configEffective'), 'success')
|
||||
} catch (e) {
|
||||
// 重启失败时提供手动重试按钮
|
||||
const restartBtn = document.createElement('button')
|
||||
restartBtn.className = 'btn btn-sm btn-primary'
|
||||
restartBtn.textContent = t('models.retryRestart')
|
||||
restartBtn.style.marginLeft = '8px'
|
||||
restartBtn.onclick = async () => {
|
||||
try {
|
||||
toast(t('models.restarting'), 'info')
|
||||
await api.restartGateway()
|
||||
toast(t('models.restartOk'), 'success')
|
||||
} catch (e2) {
|
||||
toast(t('models.restartFailed') + ': ' + e2.message, 'error')
|
||||
}
|
||||
}
|
||||
toast(t('models.configSavedGwFailed') + ': ' + e.message, 'warning', { action: restartBtn })
|
||||
}
|
||||
// 配置已写入。使用 3s 防抖 + 单飞行锁排队重启,避免快速连续编辑触发多次重启。
|
||||
showRestartPendingToast()
|
||||
scheduleGatewayRestart({ reason: 'models-page' })
|
||||
} catch (e) {
|
||||
toast(t('models.autoSaveFailed') + ': ' + e, 'error')
|
||||
}
|
||||
}
|
||||
|
||||
function showRestartPendingToast() {
|
||||
const applyNow = document.createElement('button')
|
||||
applyNow.className = 'btn btn-sm btn-primary'
|
||||
applyNow.textContent = t('models.applyNow')
|
||||
applyNow.style.marginLeft = '8px'
|
||||
applyNow.onclick = () => fireRestartNow()
|
||||
toast(t('models.configQueued'), 'info', { action: applyNow, duration: 3500 })
|
||||
}
|
||||
|
||||
/**
|
||||
* 处理重启队列事件并展示 toast。监听在模块级别,全生命周期生效。
|
||||
* - succeeded → 成功提示
|
||||
* - failed → 失败提示 + 重试按钮
|
||||
*/
|
||||
function handleRestartState(ev) {
|
||||
if (ev.event === 'succeeded') {
|
||||
toast(t('models.configEffective'), 'success')
|
||||
} else if (ev.event === 'failed') {
|
||||
const retryBtn = document.createElement('button')
|
||||
retryBtn.className = 'btn btn-sm btn-primary'
|
||||
retryBtn.textContent = t('models.retryRestart')
|
||||
retryBtn.style.marginLeft = '8px'
|
||||
retryBtn.onclick = () => scheduleGatewayRestart({ delay: 0, reason: 'retry' })
|
||||
toast(t('models.configSavedGwFailed') + ': ' + ev.error, 'warning', { action: retryBtn, duration: 6000 })
|
||||
}
|
||||
}
|
||||
|
||||
let _restartStateOff = null
|
||||
if (typeof window !== 'undefined' && !_restartStateOff) {
|
||||
_restartStateOff = onRestartState(handleRestartState)
|
||||
}
|
||||
|
||||
// 更新撤销按钮状态
|
||||
function updateUndoBtn(page, state) {
|
||||
const btn = page.querySelector('#btn-undo')
|
||||
|
||||
Reference in New Issue
Block a user