feat(guardian): 强化Gateway守护与Windows启停安全

This commit is contained in:
晴天
2026-03-10 00:28:09 +08:00
parent 743af933bd
commit f32bce12cb
6 changed files with 720 additions and 127 deletions

View File

@@ -10,6 +10,23 @@ use std::process::Command;
use crate::models::types::VersionInfo;
struct GuardianPause {
reason: &'static str,
}
impl GuardianPause {
fn new(reason: &'static str) -> Self {
crate::commands::service::guardian_pause(reason);
Self { reason }
}
}
impl Drop for GuardianPause {
fn drop(&mut self) {
crate::commands::service::guardian_resume(self.reason);
}
}
/// 预设 npm 源列表
const DEFAULT_REGISTRY: &str = "https://registry.npmmirror.com";
@@ -548,6 +565,7 @@ pub async fn upgrade_openclaw(
use std::io::{BufRead, BufReader};
use std::process::Stdio;
use tauri::Emitter;
let _guardian_pause = GuardianPause::new("upgrade");
let current_source = detect_installed_source();
let pkg_name = npm_package_name(&source);
@@ -715,6 +733,8 @@ pub async fn uninstall_openclaw(
use std::io::{BufRead, BufReader};
use std::process::Stdio;
use tauri::Emitter;
let _guardian_pause = GuardianPause::new("uninstall openclaw");
crate::commands::service::guardian_mark_manual_stop();
let source = detect_installed_source();
let pkg = npm_package_name(&source);
@@ -1362,6 +1382,7 @@ pub async fn list_remote_models(base_url: String, api_key: String) -> Result<Vec
#[tauri::command]
pub async fn install_gateway() -> Result<String, String> {
use crate::utils::openclaw_command_async;
let _guardian_pause = GuardianPause::new("install gateway");
// 先检测 openclaw CLI 是否可用
let cli_check = openclaw_command_async().arg("--version").output().await;
match cli_check {
@@ -1394,6 +1415,8 @@ pub async fn install_gateway() -> Result<String, String> {
/// Linux: pkill
#[tauri::command]
pub fn uninstall_gateway() -> Result<String, String> {
let _guardian_pause = GuardianPause::new("uninstall gateway");
crate::commands::service::guardian_mark_manual_stop();
#[cfg(target_os = "macos")]
{
let uid = get_uid()?;
@@ -1425,7 +1448,6 @@ pub fn uninstall_gateway() -> Result<String, String> {
.args(["-f", "openclaw.*gateway"])
.output();
}
Ok("Gateway 服务已卸载".to_string())
}

View File

@@ -1,9 +1,14 @@
/// 服务管理命令
/// macOS: launchctl + LaunchAgents plist
/// Windows: openclaw CLI + 进程检测
use std::collections::HashMap;
use std::collections::{HashMap, HashSet};
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::{Arc, Mutex, OnceLock};
use std::time::{Duration, Instant};
use crate::models::types::ServiceStatus;
use serde::Serialize;
use tauri::Emitter;
/// OpenClaw 官方服务的友好名称映射
fn description_map() -> HashMap<&'static str, &'static str> {
@@ -13,6 +18,334 @@ fn description_map() -> HashMap<&'static str, &'static str> {
])
}
fn looks_like_gateway_command_line(command_line: &str) -> bool {
let text = command_line.to_ascii_lowercase();
text.contains("openclaw") && text.contains("gateway")
}
fn parse_listening_pids_from_netstat(stdout: &str, port: u16) -> Vec<u32> {
let port_pattern = format!(":{port}");
let mut pids = HashSet::new();
for line in stdout.lines() {
let trimmed = line.trim();
if !(trimmed.contains("LISTENING") || trimmed.contains("侦听")) {
continue;
}
let parts: Vec<&str> = trimmed.split_whitespace().collect();
if parts.len() < 5 {
continue;
}
let Some(local_addr) = parts.get(1) else {
continue;
};
if !local_addr.ends_with(&port_pattern) {
continue;
}
if let Ok(pid) = parts[4].parse::<u32>() {
if pid > 0 {
pids.insert(pid);
}
}
}
let mut ordered: Vec<u32> = pids.into_iter().collect();
ordered.sort_unstable();
ordered
}
const GUARDIAN_INTERVAL: Duration = Duration::from_secs(15);
const GUARDIAN_RESTART_COOLDOWN: Duration = Duration::from_secs(60);
const GUARDIAN_STABLE_WINDOW: Duration = Duration::from_secs(120);
const GUARDIAN_MAX_AUTO_RESTART: u32 = 3;
#[derive(Debug, Default)]
struct GuardianRuntimeState {
last_seen_running: Option<bool>,
running_since: Option<Instant>,
auto_restart_count: u32,
last_restart_time: Option<Instant>,
manual_hold: bool,
pause_reason: Option<String>,
give_up: bool,
}
#[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct GuardianStatus {
pub backend_managed: bool,
pub paused: bool,
pub manual_hold: bool,
pub give_up: bool,
pub auto_restart_count: u32,
}
#[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
struct GuardianEventPayload {
kind: String,
auto_restart_count: u32,
message: String,
}
static GUARDIAN_STATE: OnceLock<Arc<Mutex<GuardianRuntimeState>>> = OnceLock::new();
static GUARDIAN_STARTED: AtomicBool = AtomicBool::new(false);
fn guardian_state() -> &'static Arc<Mutex<GuardianRuntimeState>> {
GUARDIAN_STATE.get_or_init(|| Arc::new(Mutex::new(GuardianRuntimeState::default())))
}
fn guardian_log(message: &str) {
let log_dir = crate::commands::openclaw_dir().join("logs");
let _ = std::fs::create_dir_all(&log_dir);
let path = log_dir.join("guardian.log");
let line = format!(
"[{}] {}\n",
chrono::Local::now().format("%Y-%m-%d %H:%M:%S"),
message
);
let _ = std::fs::OpenOptions::new()
.create(true)
.append(true)
.open(path)
.and_then(|mut f| std::io::Write::write_all(&mut f, line.as_bytes()));
}
fn guardian_snapshot() -> GuardianStatus {
let state = guardian_state().lock().unwrap();
GuardianStatus {
backend_managed: true,
paused: state.pause_reason.is_some(),
manual_hold: state.manual_hold,
give_up: state.give_up,
auto_restart_count: state.auto_restart_count,
}
}
pub(crate) fn guardian_mark_manual_stop() {
let mut state = guardian_state().lock().unwrap();
state.manual_hold = true;
state.give_up = false;
state.auto_restart_count = 0;
state.last_restart_time = None;
state.running_since = None;
guardian_log("用户主动停止 Gateway后端守护进入手动停机保持状态");
}
pub(crate) fn guardian_mark_manual_start() {
let mut state = guardian_state().lock().unwrap();
state.manual_hold = false;
state.give_up = false;
state.auto_restart_count = 0;
state.last_restart_time = None;
state.running_since = None;
guardian_log("用户主动启动/恢复 Gateway后端守护已重置自动重启状态");
}
pub(crate) fn guardian_pause(reason: &str) {
let mut state = guardian_state().lock().unwrap();
state.pause_reason = Some(reason.to_string());
state.give_up = false;
guardian_log(&format!("后端守护已暂停: {reason}"));
}
pub(crate) fn guardian_resume(reason: &str) {
let mut state = guardian_state().lock().unwrap();
state.pause_reason = None;
state.running_since = None;
guardian_log(&format!("后端守护已恢复: {reason}"));
}
fn gateway_config_exists() -> bool {
crate::commands::openclaw_dir()
.join("openclaw.json")
.exists()
}
async fn gateway_service_status() -> Result<Option<ServiceStatus>, String> {
let mut services = get_services_status().await?;
if let Some(index) = services
.iter()
.position(|svc| svc.label == "ai.openclaw.gateway")
{
return Ok(Some(services.remove(index)));
}
Ok(services.into_iter().next())
}
async fn guardian_tick(app: &tauri::AppHandle) {
let snapshot = match gateway_service_status().await {
Ok(Some(svc)) => svc,
Ok(None) => return,
Err(err) => {
guardian_log(&format!("读取 Gateway 状态失败: {err}"));
return;
}
};
let ready = snapshot.cli_installed && gateway_config_exists();
let running = snapshot.running;
let now = Instant::now();
let (restart_attempt, emit_give_up) = {
let mut state = guardian_state().lock().unwrap();
let mut restart_attempt = None::<u32>;
let mut emit_give_up = None::<String>;
if state.last_seen_running.is_none() {
state.last_seen_running = Some(running);
state.running_since = running.then_some(now);
return;
}
if !ready {
state.last_seen_running = Some(running);
state.running_since = running.then_some(now);
return;
}
if state.pause_reason.is_some() {
state.last_seen_running = Some(running);
state.running_since = if running {
state.running_since.or(Some(now))
} else {
None
};
return;
}
if running {
if state.last_seen_running != Some(true) {
if state.manual_hold || state.give_up {
state.manual_hold = false;
state.give_up = false;
state.auto_restart_count = 0;
state.last_restart_time = None;
guardian_log("检测到 Gateway 已重新运行,后端守护已退出手动停机/放弃状态");
}
state.running_since = Some(now);
}
if state.auto_restart_count > 0
&& state
.running_since
.map(|ts| now.duration_since(ts) >= GUARDIAN_STABLE_WINDOW)
.unwrap_or(false)
{
state.auto_restart_count = 0;
state.last_restart_time = None;
guardian_log("Gateway 已稳定运行,后端守护已清零自动重启计数");
}
state.last_seen_running = Some(true);
return;
}
let was_running = state.last_seen_running == Some(true);
state.last_seen_running = Some(false);
state.running_since = None;
if !was_running || state.manual_hold || state.give_up {
return;
}
if let Some(last) = state.last_restart_time {
if now.duration_since(last) < GUARDIAN_RESTART_COOLDOWN {
return;
}
}
if state.auto_restart_count >= GUARDIAN_MAX_AUTO_RESTART {
state.give_up = true;
let message = format!(
"Gateway 连续自动重启 {} 次后仍异常,后端守护已停止自动拉起",
GUARDIAN_MAX_AUTO_RESTART
);
guardian_log(&message);
emit_give_up = Some(message);
(restart_attempt, emit_give_up)
} else {
state.auto_restart_count += 1;
state.last_restart_time = Some(now);
restart_attempt = Some(state.auto_restart_count);
(restart_attempt, emit_give_up)
}
};
if let Some(attempt) = restart_attempt {
guardian_log(&format!(
"检测到 Gateway 异常退出,后端守护开始自动重启 ({attempt}/{GUARDIAN_MAX_AUTO_RESTART})"
));
if let Err(err) = start_service_impl_internal("ai.openclaw.gateway").await {
guardian_log(&format!("后端守护自动重启失败: {err}"));
}
}
if let Some(message) = emit_give_up {
let payload = GuardianEventPayload {
kind: "give_up".into(),
auto_restart_count: GUARDIAN_MAX_AUTO_RESTART,
message,
};
let _ = app.emit("guardian-event", payload);
}
}
async fn start_service_impl_internal(label: &str) -> Result<(), String> {
#[cfg(target_os = "macos")]
{
platform::start_service_impl(label)
}
#[cfg(not(target_os = "macos"))]
{
platform::start_service_impl(label).await
}
}
async fn stop_service_impl_internal(label: &str) -> Result<(), String> {
#[cfg(target_os = "macos")]
{
platform::stop_service_impl(label)
}
#[cfg(not(target_os = "macos"))]
{
platform::stop_service_impl(label).await
}
}
async fn restart_service_impl_internal(label: &str) -> Result<(), String> {
#[cfg(target_os = "macos")]
{
platform::restart_service_impl(label)
}
#[cfg(not(target_os = "macos"))]
{
platform::restart_service_impl(label).await
}
}
pub fn start_backend_guardian(app: tauri::AppHandle) {
if GUARDIAN_STARTED.swap(true, Ordering::SeqCst) {
return;
}
guardian_log("后端守护循环已启动");
tauri::async_runtime::spawn(async move {
loop {
guardian_tick(&app).await;
tokio::time::sleep(GUARDIAN_INTERVAL).await;
}
});
}
#[tauri::command]
pub fn guardian_status() -> Result<GuardianStatus, String> {
Ok(guardian_snapshot())
}
// ===== macOS 实现 =====
#[cfg(target_os = "macos")]
@@ -264,13 +597,17 @@ mod platform {
#[cfg(target_os = "windows")]
mod platform {
use std::fs::{self, OpenOptions};
use std::io::Write;
use std::os::windows::process::CommandExt;
use std::process::Stdio;
use std::sync::Mutex;
use tokio::process::Command as TokioCommand;
/// 缓存 is_cli_installed 结果,避免每 15 秒 polling 都 spawn cmd.exe
static CLI_CACHE: Mutex<Option<(bool, std::time::Instant)>> = Mutex::new(None);
const CLI_CACHE_TTL: std::time::Duration = std::time::Duration::from_secs(60);
const CREATE_NO_WINDOW: u32 = 0x08000000;
/// Windows 不需要 UID
pub fn current_uid() -> Result<u32, String> {
@@ -308,7 +645,6 @@ mod platform {
let mut cmd = std::process::Command::new("cmd");
cmd.args(["/c", "openclaw", "--version"]);
cmd.env("PATH", crate::commands::enhanced_path());
const CREATE_NO_WINDOW: u32 = 0x08000000;
cmd.creation_flags(CREATE_NO_WINDOW);
if let Ok(o) = cmd.output() {
if o.status.success() {
@@ -342,24 +678,124 @@ mod platform {
18789
}
/// 通过端口探测检测 Gateway 状态
fn query_listening_pids(port: u16) -> Result<Vec<u32>, String> {
let output = std::process::Command::new("netstat")
.args(["-ano"])
.creation_flags(CREATE_NO_WINDOW)
.output()
.map_err(|e| format!("netstat 失败: {e}"))?;
Ok(super::parse_listening_pids_from_netstat(
&String::from_utf8_lossy(&output.stdout),
port,
))
}
fn query_process_command_line(pid: u32) -> Option<String> {
let script = format!(
r#"$p = Get-CimInstance Win32_Process -Filter "ProcessId = {pid}"; if ($p) {{ [Console]::Out.Write($p.CommandLine) }}"#,
);
let output = std::process::Command::new("powershell.exe")
.args(["-NoProfile", "-Command", &script])
.creation_flags(CREATE_NO_WINDOW)
.output()
.ok()?;
if !output.status.success() {
return None;
}
let text = String::from_utf8_lossy(&output.stdout).trim().to_string();
if text.is_empty() {
None
} else {
Some(text)
}
}
fn inspect_port_owners(port: u16) -> Result<(Vec<u32>, Vec<u32>), String> {
let listening_pids = query_listening_pids(port)?;
let mut gateway_pids = Vec::new();
let mut foreign_pids = Vec::new();
for pid in listening_pids {
match query_process_command_line(pid) {
Some(command_line) if super::looks_like_gateway_command_line(&command_line) => {
gateway_pids.push(pid);
}
_ => foreign_pids.push(pid),
}
}
gateway_pids.sort_unstable();
gateway_pids.dedup();
foreign_pids.sort_unstable();
foreign_pids.dedup();
Ok((gateway_pids, foreign_pids))
}
fn format_pid_list(pids: &[u32]) -> String {
pids.iter()
.map(u32::to_string)
.collect::<Vec<_>>()
.join(", ")
}
pub fn check_service_status(_uid: u32, _label: &str) -> (bool, Option<u32>) {
let port = read_gateway_port();
let addr = format!("127.0.0.1:{port}");
match std::net::TcpStream::connect_timeout(
&addr
.parse()
.unwrap_or_else(|_| "127.0.0.1:18789".parse().unwrap()),
std::time::Duration::from_millis(150),
) {
Ok(_) => (true, None),
match inspect_port_owners(port) {
Ok((gateway_pids, _)) => {
let pid = gateway_pids.first().copied();
(pid.is_some(), pid)
}
Err(_) => (false, None),
}
}
fn cleanup_legacy_gateway_window() {
let _ = std::process::Command::new("taskkill")
.args([
"/f",
"/t",
"/fi",
&format!("WINDOWTITLE eq {GATEWAY_WINDOW_TITLE}"),
])
.creation_flags(CREATE_NO_WINDOW)
.output();
}
fn create_gateway_log_files() -> Result<(std::fs::File, std::fs::File), String> {
let log_dir = dirs::home_dir()
.unwrap_or_default()
.join(".openclaw")
.join("logs");
fs::create_dir_all(&log_dir).map_err(|e| format!("创建日志目录失败: {e}"))?;
let mut stdout_log = OpenOptions::new()
.create(true)
.append(true)
.open(log_dir.join("gateway.log"))
.map_err(|e| format!("创建日志文件失败: {e}"))?;
let stderr_log = OpenOptions::new()
.create(true)
.append(true)
.open(log_dir.join("gateway.err.log"))
.map_err(|e| format!("创建错误日志文件失败: {e}"))?;
let _ = writeln!(
stdout_log,
"\n[{}] [ClawPanel] Hidden-start Gateway on Windows",
chrono::Local::now().to_rfc3339()
);
Ok((stdout_log, stderr_log))
}
const GATEWAY_WINDOW_TITLE: &str = "OpenClaw Gateway";
/// 在可见终端窗口中启动 Gateway用户可直接看到输出
/// 在后台隐藏启动 Gateway避免守护重试时不断弹出终端窗口
pub async fn start_service_impl(_label: &str) -> Result<(), String> {
if !is_cli_installed() {
return Err(
@@ -367,39 +803,65 @@ mod platform {
.into(),
);
}
if check_service_status(0, "").0 {
let port = read_gateway_port();
let (gateway_pids, foreign_pids) = inspect_port_owners(port)?;
if !gateway_pids.is_empty() {
return Ok(());
}
if !foreign_pids.is_empty() {
return Err(format!(
"端口 {port} 已被非 Gateway 进程占用 (PID: {}),已阻止启动以避免无限重启",
format_pid_list(&foreign_pids)
));
}
let enhanced = crate::commands::enhanced_path();
// 用 cmd /c start 打开新的可见终端窗口运行 Gateway
// 父 cmd 用 CREATE_NO_WINDOW 避免自身闪窗,子窗口由 start 创建
const CREATE_NO_WINDOW: u32 = 0x08000000;
let start_cmd = format!(
"start \"{}\" cmd /k openclaw gateway",
GATEWAY_WINDOW_TITLE
);
let (stdout_log, stderr_log) = create_gateway_log_files()?;
std::process::Command::new("cmd")
.raw_arg(format!("/c {}", start_cmd))
.args(["/c", "openclaw", "gateway"])
.env("PATH", &enhanced)
.creation_flags(CREATE_NO_WINDOW)
.stdin(Stdio::null())
.stdout(stdout_log)
.stderr(stderr_log)
.spawn()
.map_err(|e| format!("启动 Gateway 失败: {e}"))?;
for _ in 0..25 {
for _ in 0..50 {
tokio::time::sleep(std::time::Duration::from_millis(200)).await;
if check_service_status(0, "").0 {
return Ok(());
}
}
Err("Gateway 启动超时,请检查终端窗口中的错误信息".into())
let (_, foreign_pids_after) = inspect_port_owners(port)?;
if !foreign_pids_after.is_empty() {
return Err(format!(
"Gateway 启动失败,端口 {port} 已被其他进程占用 (PID: {})",
format_pid_list(&foreign_pids_after)
));
}
Err("Gateway 启动超时,请查看 gateway.err.log".into())
}
/// 关闭 Gateway(兼容旧版隐藏进程和新版可见终端)
/// 关闭 Gateway,只允许停止已确认的 Gateway 进程
pub async fn stop_service_impl(_label: &str) -> Result<(), String> {
const CREATE_NO_WINDOW: u32 = 0x08000000;
let port = read_gateway_port();
let (gateway_pids, foreign_pids) = inspect_port_owners(port)?;
if gateway_pids.is_empty() {
if !foreign_pids.is_empty() {
return Err(format!(
"端口 {port} 当前由非 Gateway 进程占用 (PID: {}),已拒绝停止以避免误杀",
format_pid_list(&foreign_pids)
));
}
cleanup_legacy_gateway_window();
return Ok(());
}
// 先尝试优雅停止
let _ = crate::utils::openclaw_command_async()
.args(["gateway", "stop"])
@@ -407,93 +869,38 @@ mod platform {
.await;
// 等一下看是否停了
for _ in 0..5 {
for _ in 0..10 {
tokio::time::sleep(std::time::Duration::from_millis(300)).await;
if !check_service_status(0, "").0 {
// 关闭残留终端窗口
let _ = TokioCommand::new("cmd")
.args(["/c", "taskkill", "/f", "/t", "/fi", &format!("WINDOWTITLE eq {}", GATEWAY_WINDOW_TITLE)])
.creation_flags(CREATE_NO_WINDOW)
.output()
.await;
cleanup_legacy_gateway_window();
return Ok(());
}
}
// 优雅停止失败,按端口查找进程并强杀(最可靠)
let port = read_gateway_port();
let _ = kill_by_port(port).await;
// 等端口释放
for _ in 0..5 {
tokio::time::sleep(std::time::Duration::from_millis(300)).await;
if !check_service_status(0, "").0 {
break;
}
}
// 关闭残留终端窗口(仅做清理,不影响进程停止)
let _ = TokioCommand::new("cmd")
.args(["/c", "taskkill", "/f", "/t", "/fi", &format!("WINDOWTITLE eq {}", GATEWAY_WINDOW_TITLE)])
.creation_flags(CREATE_NO_WINDOW)
.output()
.await;
Ok(())
}
/// 通过 netstat 查找占用端口的 PID 并强制杀掉(在 Rust 侧解析,避免 cmd for/f 引号问题)
async fn kill_by_port(port: u16) -> Result<(), String> {
const CREATE_NO_WINDOW: u32 = 0x08000000;
let output = TokioCommand::new("cmd")
.args(["/c", "netstat", "-ano"])
.creation_flags(CREATE_NO_WINDOW)
.output()
.await
.map_err(|e| format!("netstat 失败: {e}"))?;
let stdout = String::from_utf8_lossy(&output.stdout);
let port_pattern = format!(":{port}");
let mut pids = std::collections::HashSet::new();
for line in stdout.lines() {
let trimmed = line.trim();
if !trimmed.contains("LISTENING") || !trimmed.contains(&port_pattern) {
continue;
}
// 确认是本地地址端口精确匹配(避免 :1878 匹配 :18789
let parts: Vec<&str> = trimmed.split_whitespace().collect();
if parts.len() >= 5 {
if let Some(addr) = parts.get(1) {
if addr.ends_with(&port_pattern) {
if let Ok(pid) = parts[4].parse::<u32>() {
if pid > 0 {
pids.insert(pid);
}
}
}
}
}
}
for pid in pids {
let _ = TokioCommand::new("cmd")
.args(["/c", "taskkill", "/f", "/t", "/pid", &pid.to_string()])
// 优雅停止失败,只对已确认的 Gateway PID 做强制终止
for pid in gateway_pids {
let _ = TokioCommand::new("taskkill")
.args(["/f", "/t", "/pid", &pid.to_string()])
.creation_flags(CREATE_NO_WINDOW)
.output()
.await;
}
Ok(())
for _ in 0..10 {
tokio::time::sleep(std::time::Duration::from_millis(300)).await;
if !check_service_status(0, "").0 {
cleanup_legacy_gateway_window();
return Ok(());
}
}
Err(format!(
"停止 Gateway 失败,端口 {port} 仍被 Gateway 进程占用"
))
}
pub async fn restart_service_impl(_label: &str) -> Result<(), String> {
let _ = stop_service_impl(_label).await;
for _ in 0..10 {
if !check_service_status(0, "").0 {
break;
}
tokio::time::sleep(std::time::Duration::from_millis(300)).await;
}
stop_service_impl(_label).await?;
start_service_impl(_label).await
}
}
@@ -643,24 +1050,51 @@ pub async fn get_services_status() -> Result<Vec<ServiceStatus>, String> {
#[tauri::command]
pub async fn start_service(label: String) -> Result<(), String> {
#[cfg(target_os = "macos")]
return platform::start_service_impl(&label);
#[cfg(not(target_os = "macos"))]
platform::start_service_impl(&label).await
guardian_mark_manual_start();
start_service_impl_internal(&label).await
}
#[tauri::command]
pub async fn stop_service(label: String) -> Result<(), String> {
#[cfg(target_os = "macos")]
return platform::stop_service_impl(&label);
#[cfg(not(target_os = "macos"))]
platform::stop_service_impl(&label).await
guardian_mark_manual_stop();
stop_service_impl_internal(&label).await
}
#[tauri::command]
pub async fn restart_service(label: String) -> Result<(), String> {
#[cfg(target_os = "macos")]
return platform::restart_service_impl(&label);
#[cfg(not(target_os = "macos"))]
platform::restart_service_impl(&label).await
guardian_pause("manual restart");
guardian_mark_manual_start();
let result = restart_service_impl_internal(&label).await;
guardian_resume("manual restart");
result
}
#[cfg(test)]
mod tests {
use super::{looks_like_gateway_command_line, parse_listening_pids_from_netstat};
#[test]
fn _openclaw_gateway_命令行识别为_gateway_进程() {
assert!(looks_like_gateway_command_line(
r#""C:\Program Files\nodejs\node.exe" "C:\Users\me\AppData\Roaming\npm\node_modules\@qingchencloud\openclaw-zh\bin\openclaw.js" gateway"#,
));
assert!(!looks_like_gateway_command_line(
r#""C:\Program Files\nodejs\node.exe" "C:\app\server.js""#,
));
assert!(!looks_like_gateway_command_line(
r#""C:\Program Files\SomeApp\someapp.exe" --port 18789"#,
));
}
#[test]
fn _pid() {
let netstat = r#"
TCP 0.0.0.0:18789 0.0.0.0:0 LISTENING 1234
TCP 127.0.0.1:18790 0.0.0.0:0 LISTENING 2222
TCP [::]:18789 [::]:0 LISTENING 3333
"#;
let pids = parse_listening_pids_from_netstat(netstat, 18789);
assert_eq!(pids, vec![1234, 3333]);
}
}

View File

@@ -55,6 +55,7 @@ pub fn run() {
}
})
.setup(|app| {
service::start_backend_guardian(app.handle().clone());
tray::setup_tray(app.handle())?;
Ok(())
})
@@ -101,6 +102,7 @@ pub fn run() {
service::start_service,
service::stop_service,
service::restart_service,
service::guardian_status,
// 日志
logs::read_log_tail,
logs::search_log,

View File

@@ -3,6 +3,12 @@
* 管理 openclaw 安装状态,供各组件查询
*/
import { api } from './tauri-api.js'
import {
evaluateAutoRestartAttempt,
shouldResetAutoRestartCount,
} from './gateway-guardian-policy.js'
const isTauri = !!window.__TAURI_INTERNALS__
let _openclawReady = false
let _gatewayRunning = false
@@ -17,8 +23,7 @@ let _isUpgrading = false // 升级/切换版本期间,阻止 setup 跳转
let _userStopped = false // 用户主动停止,不自动拉起
let _autoRestartCount = 0 // 自动重启次数
let _lastRestartTime = 0 // 上次重启时间
const MAX_AUTO_RESTART = 3 // 最大连续自动重启次数
const RESTART_COOLDOWN = 60000 // 重启冷却期 60s
let _gatewayRunningSince = 0 // Gateway 最近一次进入稳定运行状态的时间
let _guardianListeners = [] // 守护放弃时的回调
/** openclaw 是否就绪CLI 已安装 + 配置文件存在) */
@@ -36,7 +41,12 @@ export function isUpgrading() { return _isUpgrading }
export function setUserStopped(v) { _userStopped = !!v }
/** 重置自动重启计数(用户手动启动后重置) */
export function resetAutoRestart() { _autoRestartCount = 0; _userStopped = false }
export function resetAutoRestart() {
_autoRestartCount = 0
_lastRestartTime = 0
_gatewayRunningSince = 0
_userStopped = false
}
/** 监听守护放弃事件连续重启失败后触发UI 可弹出恢复选项) */
export function onGuardianGiveUp(fn) {
@@ -134,11 +144,14 @@ function _setGatewayRunning(val) {
_gatewayRunning = val
if (changed) {
if (val) {
// Gateway 恢复运行,重置计数
_autoRestartCount = 0
} else if (wasRunning && !_userStopped && !_isUpgrading && _openclawReady) {
// 仅记录恢复运行时间,避免短暂存活就把重启计数清零
_gatewayRunningSince = Date.now()
} else if (!isTauri && wasRunning && !_userStopped && !_isUpgrading && _openclawReady) {
_gatewayRunningSince = 0
// Gateway 意外停止,尝试自动重启
_tryAutoRestart()
} else if (!val) {
_gatewayRunningSince = 0
}
_gwListeners.forEach(fn => { try { fn(val) } catch {} })
}
@@ -146,16 +159,23 @@ function _setGatewayRunning(val) {
async function _tryAutoRestart() {
const now = Date.now()
// 冷却期内不重复重启
if (now - _lastRestartTime < RESTART_COOLDOWN) return
if (_autoRestartCount >= MAX_AUTO_RESTART) {
console.warn(`[guardian] Gateway 已连续自动重启 ${MAX_AUTO_RESTART} 次,停止守护,请手动检查`)
const decision = evaluateAutoRestartAttempt({
now,
lastRestartTime: _lastRestartTime,
autoRestartCount: _autoRestartCount,
})
if (decision.action === 'cooldown') return
if (decision.action === 'give_up') {
console.warn('[guardian] Gateway 已达到自动重启上限,停止守护,请手动检查')
_guardianListeners.forEach(fn => { try { fn() } catch {} })
return
}
_autoRestartCount++
_lastRestartTime = now
console.log(`[guardian] Gateway 意外停止,自动重启 (${_autoRestartCount}/${MAX_AUTO_RESTART})...`)
_autoRestartCount = decision.autoRestartCount
_lastRestartTime = decision.lastRestartTime
console.log(`[guardian] Gateway 意外停止,自动重启 (${_autoRestartCount}/3)...`)
try {
await api.startService('ai.openclaw.gateway')
console.log('[guardian] Gateway 自动重启成功')
@@ -173,7 +193,15 @@ export async function refreshGatewayStatus() {
const nowRunning = services[0]?.running === true
if (nowRunning) {
_gwStopCount = 0
_setGatewayRunning(true)
if (!_gatewayRunning) {
_setGatewayRunning(true)
} else if (shouldResetAutoRestartCount({
autoRestartCount: _autoRestartCount,
runningSince: _gatewayRunningSince,
now: Date.now(),
})) {
_autoRestartCount = 0
}
} else {
_gwStopCount++
if (_gwStopCount >= 2 || !_gatewayRunning) {

View File

@@ -0,0 +1,38 @@
/**
* Gateway 守护策略
* 纯函数,便于测试自动重启与计数重置规则
*/
export const MAX_AUTO_RESTART = 3
export const RESTART_COOLDOWN = 60000
export const STABLE_RUNNING_MS = 120000
export function evaluateAutoRestartAttempt({
now,
lastRestartTime,
autoRestartCount,
}) {
if (now - lastRestartTime < RESTART_COOLDOWN) {
return { action: 'cooldown' }
}
if (autoRestartCount >= MAX_AUTO_RESTART) {
return { action: 'give_up' }
}
return {
action: 'restart',
autoRestartCount: autoRestartCount + 1,
lastRestartTime: now,
}
}
export function shouldResetAutoRestartCount({
autoRestartCount,
runningSince,
now,
}) {
if (autoRestartCount <= 0) return false
if (!runningSince) return false
return now - runningSince >= STABLE_RUNNING_MS
}

View File

@@ -0,0 +1,69 @@
import test from 'node:test'
import assert from 'node:assert/strict'
import {
MAX_AUTO_RESTART,
RESTART_COOLDOWN,
STABLE_RUNNING_MS,
evaluateAutoRestartAttempt,
shouldResetAutoRestartCount,
} from '../src/lib/gateway-guardian-policy.js'
test('短暂恢复运行不应立即清零自动重启计数', () => {
assert.equal(
shouldResetAutoRestartCount({
autoRestartCount: 2,
runningSince: 10_000,
now: 10_000 + STABLE_RUNNING_MS - 1,
}),
false,
)
})
test('稳定运行超过阈值后才允许清零自动重启计数', () => {
assert.equal(
shouldResetAutoRestartCount({
autoRestartCount: 2,
runningSince: 10_000,
now: 10_000 + STABLE_RUNNING_MS,
}),
true,
)
})
test('达到最大自动重启次数后必须停止守护', () => {
assert.deepEqual(
evaluateAutoRestartAttempt({
now: 90_000,
lastRestartTime: 0,
autoRestartCount: MAX_AUTO_RESTART,
}),
{ action: 'give_up' },
)
})
test('冷却时间内不应重复自动重启', () => {
assert.deepEqual(
evaluateAutoRestartAttempt({
now: RESTART_COOLDOWN - 1,
lastRestartTime: 0,
autoRestartCount: 1,
}),
{ action: 'cooldown' },
)
})
test('满足条件时应增加自动重启计数并记录重启时间', () => {
assert.deepEqual(
evaluateAutoRestartAttempt({
now: 120_000,
lastRestartTime: 0,
autoRestartCount: 1,
}),
{
action: 'restart',
autoRestartCount: 2,
lastRestartTime: 120_000,
},
)
})