mirror of
https://github.com/qingchencloud/clawpanel.git
synced 2026-05-07 05:32:47 +08:00
feat(guardian): 强化Gateway守护与Windows启停安全
This commit is contained in:
@@ -10,6 +10,23 @@ use std::process::Command;
|
||||
|
||||
use crate::models::types::VersionInfo;
|
||||
|
||||
struct GuardianPause {
|
||||
reason: &'static str,
|
||||
}
|
||||
|
||||
impl GuardianPause {
|
||||
fn new(reason: &'static str) -> Self {
|
||||
crate::commands::service::guardian_pause(reason);
|
||||
Self { reason }
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for GuardianPause {
|
||||
fn drop(&mut self) {
|
||||
crate::commands::service::guardian_resume(self.reason);
|
||||
}
|
||||
}
|
||||
|
||||
/// 预设 npm 源列表
|
||||
const DEFAULT_REGISTRY: &str = "https://registry.npmmirror.com";
|
||||
|
||||
@@ -548,6 +565,7 @@ pub async fn upgrade_openclaw(
|
||||
use std::io::{BufRead, BufReader};
|
||||
use std::process::Stdio;
|
||||
use tauri::Emitter;
|
||||
let _guardian_pause = GuardianPause::new("upgrade");
|
||||
|
||||
let current_source = detect_installed_source();
|
||||
let pkg_name = npm_package_name(&source);
|
||||
@@ -715,6 +733,8 @@ pub async fn uninstall_openclaw(
|
||||
use std::io::{BufRead, BufReader};
|
||||
use std::process::Stdio;
|
||||
use tauri::Emitter;
|
||||
let _guardian_pause = GuardianPause::new("uninstall openclaw");
|
||||
crate::commands::service::guardian_mark_manual_stop();
|
||||
|
||||
let source = detect_installed_source();
|
||||
let pkg = npm_package_name(&source);
|
||||
@@ -1362,6 +1382,7 @@ pub async fn list_remote_models(base_url: String, api_key: String) -> Result<Vec
|
||||
#[tauri::command]
|
||||
pub async fn install_gateway() -> Result<String, String> {
|
||||
use crate::utils::openclaw_command_async;
|
||||
let _guardian_pause = GuardianPause::new("install gateway");
|
||||
// 先检测 openclaw CLI 是否可用
|
||||
let cli_check = openclaw_command_async().arg("--version").output().await;
|
||||
match cli_check {
|
||||
@@ -1394,6 +1415,8 @@ pub async fn install_gateway() -> Result<String, String> {
|
||||
/// Linux: pkill
|
||||
#[tauri::command]
|
||||
pub fn uninstall_gateway() -> Result<String, String> {
|
||||
let _guardian_pause = GuardianPause::new("uninstall gateway");
|
||||
crate::commands::service::guardian_mark_manual_stop();
|
||||
#[cfg(target_os = "macos")]
|
||||
{
|
||||
let uid = get_uid()?;
|
||||
@@ -1425,7 +1448,6 @@ pub fn uninstall_gateway() -> Result<String, String> {
|
||||
.args(["-f", "openclaw.*gateway"])
|
||||
.output();
|
||||
}
|
||||
|
||||
Ok("Gateway 服务已卸载".to_string())
|
||||
}
|
||||
|
||||
|
||||
@@ -1,9 +1,14 @@
|
||||
/// 服务管理命令
|
||||
/// macOS: launchctl + LaunchAgents plist
|
||||
/// Windows: openclaw CLI + 进程检测
|
||||
use std::collections::HashMap;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::{Arc, Mutex, OnceLock};
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use crate::models::types::ServiceStatus;
|
||||
use serde::Serialize;
|
||||
use tauri::Emitter;
|
||||
|
||||
/// OpenClaw 官方服务的友好名称映射
|
||||
fn description_map() -> HashMap<&'static str, &'static str> {
|
||||
@@ -13,6 +18,334 @@ fn description_map() -> HashMap<&'static str, &'static str> {
|
||||
])
|
||||
}
|
||||
|
||||
fn looks_like_gateway_command_line(command_line: &str) -> bool {
|
||||
let text = command_line.to_ascii_lowercase();
|
||||
text.contains("openclaw") && text.contains("gateway")
|
||||
}
|
||||
|
||||
fn parse_listening_pids_from_netstat(stdout: &str, port: u16) -> Vec<u32> {
|
||||
let port_pattern = format!(":{port}");
|
||||
let mut pids = HashSet::new();
|
||||
|
||||
for line in stdout.lines() {
|
||||
let trimmed = line.trim();
|
||||
if !(trimmed.contains("LISTENING") || trimmed.contains("侦听")) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let parts: Vec<&str> = trimmed.split_whitespace().collect();
|
||||
if parts.len() < 5 {
|
||||
continue;
|
||||
}
|
||||
|
||||
let Some(local_addr) = parts.get(1) else {
|
||||
continue;
|
||||
};
|
||||
|
||||
if !local_addr.ends_with(&port_pattern) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Ok(pid) = parts[4].parse::<u32>() {
|
||||
if pid > 0 {
|
||||
pids.insert(pid);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut ordered: Vec<u32> = pids.into_iter().collect();
|
||||
ordered.sort_unstable();
|
||||
ordered
|
||||
}
|
||||
|
||||
const GUARDIAN_INTERVAL: Duration = Duration::from_secs(15);
|
||||
const GUARDIAN_RESTART_COOLDOWN: Duration = Duration::from_secs(60);
|
||||
const GUARDIAN_STABLE_WINDOW: Duration = Duration::from_secs(120);
|
||||
const GUARDIAN_MAX_AUTO_RESTART: u32 = 3;
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
struct GuardianRuntimeState {
|
||||
last_seen_running: Option<bool>,
|
||||
running_since: Option<Instant>,
|
||||
auto_restart_count: u32,
|
||||
last_restart_time: Option<Instant>,
|
||||
manual_hold: bool,
|
||||
pause_reason: Option<String>,
|
||||
give_up: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct GuardianStatus {
|
||||
pub backend_managed: bool,
|
||||
pub paused: bool,
|
||||
pub manual_hold: bool,
|
||||
pub give_up: bool,
|
||||
pub auto_restart_count: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct GuardianEventPayload {
|
||||
kind: String,
|
||||
auto_restart_count: u32,
|
||||
message: String,
|
||||
}
|
||||
|
||||
static GUARDIAN_STATE: OnceLock<Arc<Mutex<GuardianRuntimeState>>> = OnceLock::new();
|
||||
static GUARDIAN_STARTED: AtomicBool = AtomicBool::new(false);
|
||||
|
||||
fn guardian_state() -> &'static Arc<Mutex<GuardianRuntimeState>> {
|
||||
GUARDIAN_STATE.get_or_init(|| Arc::new(Mutex::new(GuardianRuntimeState::default())))
|
||||
}
|
||||
|
||||
fn guardian_log(message: &str) {
|
||||
let log_dir = crate::commands::openclaw_dir().join("logs");
|
||||
let _ = std::fs::create_dir_all(&log_dir);
|
||||
let path = log_dir.join("guardian.log");
|
||||
let line = format!(
|
||||
"[{}] {}\n",
|
||||
chrono::Local::now().format("%Y-%m-%d %H:%M:%S"),
|
||||
message
|
||||
);
|
||||
let _ = std::fs::OpenOptions::new()
|
||||
.create(true)
|
||||
.append(true)
|
||||
.open(path)
|
||||
.and_then(|mut f| std::io::Write::write_all(&mut f, line.as_bytes()));
|
||||
}
|
||||
|
||||
fn guardian_snapshot() -> GuardianStatus {
|
||||
let state = guardian_state().lock().unwrap();
|
||||
GuardianStatus {
|
||||
backend_managed: true,
|
||||
paused: state.pause_reason.is_some(),
|
||||
manual_hold: state.manual_hold,
|
||||
give_up: state.give_up,
|
||||
auto_restart_count: state.auto_restart_count,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn guardian_mark_manual_stop() {
|
||||
let mut state = guardian_state().lock().unwrap();
|
||||
state.manual_hold = true;
|
||||
state.give_up = false;
|
||||
state.auto_restart_count = 0;
|
||||
state.last_restart_time = None;
|
||||
state.running_since = None;
|
||||
guardian_log("用户主动停止 Gateway,后端守护进入手动停机保持状态");
|
||||
}
|
||||
|
||||
pub(crate) fn guardian_mark_manual_start() {
|
||||
let mut state = guardian_state().lock().unwrap();
|
||||
state.manual_hold = false;
|
||||
state.give_up = false;
|
||||
state.auto_restart_count = 0;
|
||||
state.last_restart_time = None;
|
||||
state.running_since = None;
|
||||
guardian_log("用户主动启动/恢复 Gateway,后端守护已重置自动重启状态");
|
||||
}
|
||||
|
||||
pub(crate) fn guardian_pause(reason: &str) {
|
||||
let mut state = guardian_state().lock().unwrap();
|
||||
state.pause_reason = Some(reason.to_string());
|
||||
state.give_up = false;
|
||||
guardian_log(&format!("后端守护已暂停: {reason}"));
|
||||
}
|
||||
|
||||
pub(crate) fn guardian_resume(reason: &str) {
|
||||
let mut state = guardian_state().lock().unwrap();
|
||||
state.pause_reason = None;
|
||||
state.running_since = None;
|
||||
guardian_log(&format!("后端守护已恢复: {reason}"));
|
||||
}
|
||||
|
||||
fn gateway_config_exists() -> bool {
|
||||
crate::commands::openclaw_dir()
|
||||
.join("openclaw.json")
|
||||
.exists()
|
||||
}
|
||||
|
||||
async fn gateway_service_status() -> Result<Option<ServiceStatus>, String> {
|
||||
let mut services = get_services_status().await?;
|
||||
if let Some(index) = services
|
||||
.iter()
|
||||
.position(|svc| svc.label == "ai.openclaw.gateway")
|
||||
{
|
||||
return Ok(Some(services.remove(index)));
|
||||
}
|
||||
Ok(services.into_iter().next())
|
||||
}
|
||||
|
||||
async fn guardian_tick(app: &tauri::AppHandle) {
|
||||
let snapshot = match gateway_service_status().await {
|
||||
Ok(Some(svc)) => svc,
|
||||
Ok(None) => return,
|
||||
Err(err) => {
|
||||
guardian_log(&format!("读取 Gateway 状态失败: {err}"));
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let ready = snapshot.cli_installed && gateway_config_exists();
|
||||
let running = snapshot.running;
|
||||
let now = Instant::now();
|
||||
let (restart_attempt, emit_give_up) = {
|
||||
let mut state = guardian_state().lock().unwrap();
|
||||
let mut restart_attempt = None::<u32>;
|
||||
let mut emit_give_up = None::<String>;
|
||||
|
||||
if state.last_seen_running.is_none() {
|
||||
state.last_seen_running = Some(running);
|
||||
state.running_since = running.then_some(now);
|
||||
return;
|
||||
}
|
||||
|
||||
if !ready {
|
||||
state.last_seen_running = Some(running);
|
||||
state.running_since = running.then_some(now);
|
||||
return;
|
||||
}
|
||||
|
||||
if state.pause_reason.is_some() {
|
||||
state.last_seen_running = Some(running);
|
||||
state.running_since = if running {
|
||||
state.running_since.or(Some(now))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
return;
|
||||
}
|
||||
|
||||
if running {
|
||||
if state.last_seen_running != Some(true) {
|
||||
if state.manual_hold || state.give_up {
|
||||
state.manual_hold = false;
|
||||
state.give_up = false;
|
||||
state.auto_restart_count = 0;
|
||||
state.last_restart_time = None;
|
||||
guardian_log("检测到 Gateway 已重新运行,后端守护已退出手动停机/放弃状态");
|
||||
}
|
||||
state.running_since = Some(now);
|
||||
}
|
||||
|
||||
if state.auto_restart_count > 0
|
||||
&& state
|
||||
.running_since
|
||||
.map(|ts| now.duration_since(ts) >= GUARDIAN_STABLE_WINDOW)
|
||||
.unwrap_or(false)
|
||||
{
|
||||
state.auto_restart_count = 0;
|
||||
state.last_restart_time = None;
|
||||
guardian_log("Gateway 已稳定运行,后端守护已清零自动重启计数");
|
||||
}
|
||||
|
||||
state.last_seen_running = Some(true);
|
||||
return;
|
||||
}
|
||||
|
||||
let was_running = state.last_seen_running == Some(true);
|
||||
state.last_seen_running = Some(false);
|
||||
state.running_since = None;
|
||||
|
||||
if !was_running || state.manual_hold || state.give_up {
|
||||
return;
|
||||
}
|
||||
|
||||
if let Some(last) = state.last_restart_time {
|
||||
if now.duration_since(last) < GUARDIAN_RESTART_COOLDOWN {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if state.auto_restart_count >= GUARDIAN_MAX_AUTO_RESTART {
|
||||
state.give_up = true;
|
||||
let message = format!(
|
||||
"Gateway 连续自动重启 {} 次后仍异常,后端守护已停止自动拉起",
|
||||
GUARDIAN_MAX_AUTO_RESTART
|
||||
);
|
||||
guardian_log(&message);
|
||||
emit_give_up = Some(message);
|
||||
(restart_attempt, emit_give_up)
|
||||
} else {
|
||||
state.auto_restart_count += 1;
|
||||
state.last_restart_time = Some(now);
|
||||
restart_attempt = Some(state.auto_restart_count);
|
||||
(restart_attempt, emit_give_up)
|
||||
}
|
||||
};
|
||||
|
||||
if let Some(attempt) = restart_attempt {
|
||||
guardian_log(&format!(
|
||||
"检测到 Gateway 异常退出,后端守护开始自动重启 ({attempt}/{GUARDIAN_MAX_AUTO_RESTART})"
|
||||
));
|
||||
if let Err(err) = start_service_impl_internal("ai.openclaw.gateway").await {
|
||||
guardian_log(&format!("后端守护自动重启失败: {err}"));
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(message) = emit_give_up {
|
||||
let payload = GuardianEventPayload {
|
||||
kind: "give_up".into(),
|
||||
auto_restart_count: GUARDIAN_MAX_AUTO_RESTART,
|
||||
message,
|
||||
};
|
||||
let _ = app.emit("guardian-event", payload);
|
||||
}
|
||||
}
|
||||
|
||||
async fn start_service_impl_internal(label: &str) -> Result<(), String> {
|
||||
#[cfg(target_os = "macos")]
|
||||
{
|
||||
platform::start_service_impl(label)
|
||||
}
|
||||
#[cfg(not(target_os = "macos"))]
|
||||
{
|
||||
platform::start_service_impl(label).await
|
||||
}
|
||||
}
|
||||
|
||||
async fn stop_service_impl_internal(label: &str) -> Result<(), String> {
|
||||
#[cfg(target_os = "macos")]
|
||||
{
|
||||
platform::stop_service_impl(label)
|
||||
}
|
||||
#[cfg(not(target_os = "macos"))]
|
||||
{
|
||||
platform::stop_service_impl(label).await
|
||||
}
|
||||
}
|
||||
|
||||
async fn restart_service_impl_internal(label: &str) -> Result<(), String> {
|
||||
#[cfg(target_os = "macos")]
|
||||
{
|
||||
platform::restart_service_impl(label)
|
||||
}
|
||||
#[cfg(not(target_os = "macos"))]
|
||||
{
|
||||
platform::restart_service_impl(label).await
|
||||
}
|
||||
}
|
||||
|
||||
pub fn start_backend_guardian(app: tauri::AppHandle) {
|
||||
if GUARDIAN_STARTED.swap(true, Ordering::SeqCst) {
|
||||
return;
|
||||
}
|
||||
|
||||
guardian_log("后端守护循环已启动");
|
||||
tauri::async_runtime::spawn(async move {
|
||||
loop {
|
||||
guardian_tick(&app).await;
|
||||
tokio::time::sleep(GUARDIAN_INTERVAL).await;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
#[tauri::command]
|
||||
pub fn guardian_status() -> Result<GuardianStatus, String> {
|
||||
Ok(guardian_snapshot())
|
||||
}
|
||||
|
||||
// ===== macOS 实现 =====
|
||||
|
||||
#[cfg(target_os = "macos")]
|
||||
@@ -264,13 +597,17 @@ mod platform {
|
||||
|
||||
#[cfg(target_os = "windows")]
|
||||
mod platform {
|
||||
use std::fs::{self, OpenOptions};
|
||||
use std::io::Write;
|
||||
use std::os::windows::process::CommandExt;
|
||||
use std::process::Stdio;
|
||||
use std::sync::Mutex;
|
||||
use tokio::process::Command as TokioCommand;
|
||||
|
||||
/// 缓存 is_cli_installed 结果,避免每 15 秒 polling 都 spawn cmd.exe
|
||||
static CLI_CACHE: Mutex<Option<(bool, std::time::Instant)>> = Mutex::new(None);
|
||||
const CLI_CACHE_TTL: std::time::Duration = std::time::Duration::from_secs(60);
|
||||
const CREATE_NO_WINDOW: u32 = 0x08000000;
|
||||
|
||||
/// Windows 不需要 UID
|
||||
pub fn current_uid() -> Result<u32, String> {
|
||||
@@ -308,7 +645,6 @@ mod platform {
|
||||
let mut cmd = std::process::Command::new("cmd");
|
||||
cmd.args(["/c", "openclaw", "--version"]);
|
||||
cmd.env("PATH", crate::commands::enhanced_path());
|
||||
const CREATE_NO_WINDOW: u32 = 0x08000000;
|
||||
cmd.creation_flags(CREATE_NO_WINDOW);
|
||||
if let Ok(o) = cmd.output() {
|
||||
if o.status.success() {
|
||||
@@ -342,24 +678,124 @@ mod platform {
|
||||
18789
|
||||
}
|
||||
|
||||
/// 通过端口探测检测 Gateway 状态
|
||||
fn query_listening_pids(port: u16) -> Result<Vec<u32>, String> {
|
||||
let output = std::process::Command::new("netstat")
|
||||
.args(["-ano"])
|
||||
.creation_flags(CREATE_NO_WINDOW)
|
||||
.output()
|
||||
.map_err(|e| format!("netstat 失败: {e}"))?;
|
||||
|
||||
Ok(super::parse_listening_pids_from_netstat(
|
||||
&String::from_utf8_lossy(&output.stdout),
|
||||
port,
|
||||
))
|
||||
}
|
||||
|
||||
fn query_process_command_line(pid: u32) -> Option<String> {
|
||||
let script = format!(
|
||||
r#"$p = Get-CimInstance Win32_Process -Filter "ProcessId = {pid}"; if ($p) {{ [Console]::Out.Write($p.CommandLine) }}"#,
|
||||
);
|
||||
|
||||
let output = std::process::Command::new("powershell.exe")
|
||||
.args(["-NoProfile", "-Command", &script])
|
||||
.creation_flags(CREATE_NO_WINDOW)
|
||||
.output()
|
||||
.ok()?;
|
||||
|
||||
if !output.status.success() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let text = String::from_utf8_lossy(&output.stdout).trim().to_string();
|
||||
if text.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(text)
|
||||
}
|
||||
}
|
||||
|
||||
fn inspect_port_owners(port: u16) -> Result<(Vec<u32>, Vec<u32>), String> {
|
||||
let listening_pids = query_listening_pids(port)?;
|
||||
let mut gateway_pids = Vec::new();
|
||||
let mut foreign_pids = Vec::new();
|
||||
|
||||
for pid in listening_pids {
|
||||
match query_process_command_line(pid) {
|
||||
Some(command_line) if super::looks_like_gateway_command_line(&command_line) => {
|
||||
gateway_pids.push(pid);
|
||||
}
|
||||
_ => foreign_pids.push(pid),
|
||||
}
|
||||
}
|
||||
|
||||
gateway_pids.sort_unstable();
|
||||
gateway_pids.dedup();
|
||||
foreign_pids.sort_unstable();
|
||||
foreign_pids.dedup();
|
||||
Ok((gateway_pids, foreign_pids))
|
||||
}
|
||||
|
||||
fn format_pid_list(pids: &[u32]) -> String {
|
||||
pids.iter()
|
||||
.map(u32::to_string)
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ")
|
||||
}
|
||||
|
||||
pub fn check_service_status(_uid: u32, _label: &str) -> (bool, Option<u32>) {
|
||||
let port = read_gateway_port();
|
||||
let addr = format!("127.0.0.1:{port}");
|
||||
match std::net::TcpStream::connect_timeout(
|
||||
&addr
|
||||
.parse()
|
||||
.unwrap_or_else(|_| "127.0.0.1:18789".parse().unwrap()),
|
||||
std::time::Duration::from_millis(150),
|
||||
) {
|
||||
Ok(_) => (true, None),
|
||||
match inspect_port_owners(port) {
|
||||
Ok((gateway_pids, _)) => {
|
||||
let pid = gateway_pids.first().copied();
|
||||
(pid.is_some(), pid)
|
||||
}
|
||||
Err(_) => (false, None),
|
||||
}
|
||||
}
|
||||
|
||||
fn cleanup_legacy_gateway_window() {
|
||||
let _ = std::process::Command::new("taskkill")
|
||||
.args([
|
||||
"/f",
|
||||
"/t",
|
||||
"/fi",
|
||||
&format!("WINDOWTITLE eq {GATEWAY_WINDOW_TITLE}"),
|
||||
])
|
||||
.creation_flags(CREATE_NO_WINDOW)
|
||||
.output();
|
||||
}
|
||||
|
||||
fn create_gateway_log_files() -> Result<(std::fs::File, std::fs::File), String> {
|
||||
let log_dir = dirs::home_dir()
|
||||
.unwrap_or_default()
|
||||
.join(".openclaw")
|
||||
.join("logs");
|
||||
fs::create_dir_all(&log_dir).map_err(|e| format!("创建日志目录失败: {e}"))?;
|
||||
|
||||
let mut stdout_log = OpenOptions::new()
|
||||
.create(true)
|
||||
.append(true)
|
||||
.open(log_dir.join("gateway.log"))
|
||||
.map_err(|e| format!("创建日志文件失败: {e}"))?;
|
||||
|
||||
let stderr_log = OpenOptions::new()
|
||||
.create(true)
|
||||
.append(true)
|
||||
.open(log_dir.join("gateway.err.log"))
|
||||
.map_err(|e| format!("创建错误日志文件失败: {e}"))?;
|
||||
|
||||
let _ = writeln!(
|
||||
stdout_log,
|
||||
"\n[{}] [ClawPanel] Hidden-start Gateway on Windows",
|
||||
chrono::Local::now().to_rfc3339()
|
||||
);
|
||||
|
||||
Ok((stdout_log, stderr_log))
|
||||
}
|
||||
|
||||
const GATEWAY_WINDOW_TITLE: &str = "OpenClaw Gateway";
|
||||
|
||||
/// 在可见终端窗口中启动 Gateway,用户可直接看到输出
|
||||
/// 在后台隐藏启动 Gateway,避免守护重试时不断弹出终端窗口
|
||||
pub async fn start_service_impl(_label: &str) -> Result<(), String> {
|
||||
if !is_cli_installed() {
|
||||
return Err(
|
||||
@@ -367,39 +803,65 @@ mod platform {
|
||||
.into(),
|
||||
);
|
||||
}
|
||||
if check_service_status(0, "").0 {
|
||||
|
||||
let port = read_gateway_port();
|
||||
let (gateway_pids, foreign_pids) = inspect_port_owners(port)?;
|
||||
if !gateway_pids.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
if !foreign_pids.is_empty() {
|
||||
return Err(format!(
|
||||
"端口 {port} 已被非 Gateway 进程占用 (PID: {}),已阻止启动以避免无限重启",
|
||||
format_pid_list(&foreign_pids)
|
||||
));
|
||||
}
|
||||
|
||||
let enhanced = crate::commands::enhanced_path();
|
||||
|
||||
// 用 cmd /c start 打开新的可见终端窗口运行 Gateway
|
||||
// 父 cmd 用 CREATE_NO_WINDOW 避免自身闪窗,子窗口由 start 创建
|
||||
const CREATE_NO_WINDOW: u32 = 0x08000000;
|
||||
let start_cmd = format!(
|
||||
"start \"{}\" cmd /k openclaw gateway",
|
||||
GATEWAY_WINDOW_TITLE
|
||||
);
|
||||
let (stdout_log, stderr_log) = create_gateway_log_files()?;
|
||||
|
||||
std::process::Command::new("cmd")
|
||||
.raw_arg(format!("/c {}", start_cmd))
|
||||
.args(["/c", "openclaw", "gateway"])
|
||||
.env("PATH", &enhanced)
|
||||
.creation_flags(CREATE_NO_WINDOW)
|
||||
.stdin(Stdio::null())
|
||||
.stdout(stdout_log)
|
||||
.stderr(stderr_log)
|
||||
.spawn()
|
||||
.map_err(|e| format!("启动 Gateway 失败: {e}"))?;
|
||||
|
||||
for _ in 0..25 {
|
||||
for _ in 0..50 {
|
||||
tokio::time::sleep(std::time::Duration::from_millis(200)).await;
|
||||
if check_service_status(0, "").0 {
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
Err("Gateway 启动超时,请检查终端窗口中的错误信息".into())
|
||||
|
||||
let (_, foreign_pids_after) = inspect_port_owners(port)?;
|
||||
if !foreign_pids_after.is_empty() {
|
||||
return Err(format!(
|
||||
"Gateway 启动失败,端口 {port} 已被其他进程占用 (PID: {})",
|
||||
format_pid_list(&foreign_pids_after)
|
||||
));
|
||||
}
|
||||
|
||||
Err("Gateway 启动超时,请查看 gateway.err.log".into())
|
||||
}
|
||||
|
||||
/// 关闭 Gateway(兼容旧版隐藏进程和新版可见终端)
|
||||
/// 关闭 Gateway,只允许停止已确认的 Gateway 进程
|
||||
pub async fn stop_service_impl(_label: &str) -> Result<(), String> {
|
||||
const CREATE_NO_WINDOW: u32 = 0x08000000;
|
||||
let port = read_gateway_port();
|
||||
let (gateway_pids, foreign_pids) = inspect_port_owners(port)?;
|
||||
if gateway_pids.is_empty() {
|
||||
if !foreign_pids.is_empty() {
|
||||
return Err(format!(
|
||||
"端口 {port} 当前由非 Gateway 进程占用 (PID: {}),已拒绝停止以避免误杀",
|
||||
format_pid_list(&foreign_pids)
|
||||
));
|
||||
}
|
||||
cleanup_legacy_gateway_window();
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// 先尝试优雅停止
|
||||
let _ = crate::utils::openclaw_command_async()
|
||||
.args(["gateway", "stop"])
|
||||
@@ -407,93 +869,38 @@ mod platform {
|
||||
.await;
|
||||
|
||||
// 等一下看是否停了
|
||||
for _ in 0..5 {
|
||||
for _ in 0..10 {
|
||||
tokio::time::sleep(std::time::Duration::from_millis(300)).await;
|
||||
if !check_service_status(0, "").0 {
|
||||
// 关闭残留终端窗口
|
||||
let _ = TokioCommand::new("cmd")
|
||||
.args(["/c", "taskkill", "/f", "/t", "/fi", &format!("WINDOWTITLE eq {}", GATEWAY_WINDOW_TITLE)])
|
||||
.creation_flags(CREATE_NO_WINDOW)
|
||||
.output()
|
||||
.await;
|
||||
cleanup_legacy_gateway_window();
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
||||
// 优雅停止失败,按端口查找进程并强杀(最可靠)
|
||||
let port = read_gateway_port();
|
||||
let _ = kill_by_port(port).await;
|
||||
|
||||
// 等端口释放
|
||||
for _ in 0..5 {
|
||||
tokio::time::sleep(std::time::Duration::from_millis(300)).await;
|
||||
if !check_service_status(0, "").0 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// 关闭残留终端窗口(仅做清理,不影响进程停止)
|
||||
let _ = TokioCommand::new("cmd")
|
||||
.args(["/c", "taskkill", "/f", "/t", "/fi", &format!("WINDOWTITLE eq {}", GATEWAY_WINDOW_TITLE)])
|
||||
.creation_flags(CREATE_NO_WINDOW)
|
||||
.output()
|
||||
.await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// 通过 netstat 查找占用端口的 PID 并强制杀掉(在 Rust 侧解析,避免 cmd for/f 引号问题)
|
||||
async fn kill_by_port(port: u16) -> Result<(), String> {
|
||||
const CREATE_NO_WINDOW: u32 = 0x08000000;
|
||||
let output = TokioCommand::new("cmd")
|
||||
.args(["/c", "netstat", "-ano"])
|
||||
.creation_flags(CREATE_NO_WINDOW)
|
||||
.output()
|
||||
.await
|
||||
.map_err(|e| format!("netstat 失败: {e}"))?;
|
||||
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
let port_pattern = format!(":{port}");
|
||||
let mut pids = std::collections::HashSet::new();
|
||||
|
||||
for line in stdout.lines() {
|
||||
let trimmed = line.trim();
|
||||
if !trimmed.contains("LISTENING") || !trimmed.contains(&port_pattern) {
|
||||
continue;
|
||||
}
|
||||
// 确认是本地地址端口精确匹配(避免 :1878 匹配 :18789)
|
||||
let parts: Vec<&str> = trimmed.split_whitespace().collect();
|
||||
if parts.len() >= 5 {
|
||||
if let Some(addr) = parts.get(1) {
|
||||
if addr.ends_with(&port_pattern) {
|
||||
if let Ok(pid) = parts[4].parse::<u32>() {
|
||||
if pid > 0 {
|
||||
pids.insert(pid);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for pid in pids {
|
||||
let _ = TokioCommand::new("cmd")
|
||||
.args(["/c", "taskkill", "/f", "/t", "/pid", &pid.to_string()])
|
||||
// 优雅停止失败,只对已确认的 Gateway PID 做强制终止
|
||||
for pid in gateway_pids {
|
||||
let _ = TokioCommand::new("taskkill")
|
||||
.args(["/f", "/t", "/pid", &pid.to_string()])
|
||||
.creation_flags(CREATE_NO_WINDOW)
|
||||
.output()
|
||||
.await;
|
||||
}
|
||||
Ok(())
|
||||
|
||||
for _ in 0..10 {
|
||||
tokio::time::sleep(std::time::Duration::from_millis(300)).await;
|
||||
if !check_service_status(0, "").0 {
|
||||
cleanup_legacy_gateway_window();
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
||||
Err(format!(
|
||||
"停止 Gateway 失败,端口 {port} 仍被 Gateway 进程占用"
|
||||
))
|
||||
}
|
||||
|
||||
pub async fn restart_service_impl(_label: &str) -> Result<(), String> {
|
||||
let _ = stop_service_impl(_label).await;
|
||||
for _ in 0..10 {
|
||||
if !check_service_status(0, "").0 {
|
||||
break;
|
||||
}
|
||||
tokio::time::sleep(std::time::Duration::from_millis(300)).await;
|
||||
}
|
||||
stop_service_impl(_label).await?;
|
||||
start_service_impl(_label).await
|
||||
}
|
||||
}
|
||||
@@ -643,24 +1050,51 @@ pub async fn get_services_status() -> Result<Vec<ServiceStatus>, String> {
|
||||
|
||||
#[tauri::command]
|
||||
pub async fn start_service(label: String) -> Result<(), String> {
|
||||
#[cfg(target_os = "macos")]
|
||||
return platform::start_service_impl(&label);
|
||||
#[cfg(not(target_os = "macos"))]
|
||||
platform::start_service_impl(&label).await
|
||||
guardian_mark_manual_start();
|
||||
start_service_impl_internal(&label).await
|
||||
}
|
||||
|
||||
#[tauri::command]
|
||||
pub async fn stop_service(label: String) -> Result<(), String> {
|
||||
#[cfg(target_os = "macos")]
|
||||
return platform::stop_service_impl(&label);
|
||||
#[cfg(not(target_os = "macos"))]
|
||||
platform::stop_service_impl(&label).await
|
||||
guardian_mark_manual_stop();
|
||||
stop_service_impl_internal(&label).await
|
||||
}
|
||||
|
||||
#[tauri::command]
|
||||
pub async fn restart_service(label: String) -> Result<(), String> {
|
||||
#[cfg(target_os = "macos")]
|
||||
return platform::restart_service_impl(&label);
|
||||
#[cfg(not(target_os = "macos"))]
|
||||
platform::restart_service_impl(&label).await
|
||||
guardian_pause("manual restart");
|
||||
guardian_mark_manual_start();
|
||||
let result = restart_service_impl_internal(&label).await;
|
||||
guardian_resume("manual restart");
|
||||
result
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{looks_like_gateway_command_line, parse_listening_pids_from_netstat};
|
||||
|
||||
#[test]
|
||||
fn 只把_openclaw_gateway_命令行识别为_gateway_进程() {
|
||||
assert!(looks_like_gateway_command_line(
|
||||
r#""C:\Program Files\nodejs\node.exe" "C:\Users\me\AppData\Roaming\npm\node_modules\@qingchencloud\openclaw-zh\bin\openclaw.js" gateway"#,
|
||||
));
|
||||
assert!(!looks_like_gateway_command_line(
|
||||
r#""C:\Program Files\nodejs\node.exe" "C:\app\server.js""#,
|
||||
));
|
||||
assert!(!looks_like_gateway_command_line(
|
||||
r#""C:\Program Files\SomeApp\someapp.exe" --port 18789"#,
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn 只解析目标端口的监听_pid() {
|
||||
let netstat = r#"
|
||||
TCP 0.0.0.0:18789 0.0.0.0:0 LISTENING 1234
|
||||
TCP 127.0.0.1:18790 0.0.0.0:0 LISTENING 2222
|
||||
TCP [::]:18789 [::]:0 LISTENING 3333
|
||||
"#;
|
||||
|
||||
let pids = parse_listening_pids_from_netstat(netstat, 18789);
|
||||
assert_eq!(pids, vec![1234, 3333]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -55,6 +55,7 @@ pub fn run() {
|
||||
}
|
||||
})
|
||||
.setup(|app| {
|
||||
service::start_backend_guardian(app.handle().clone());
|
||||
tray::setup_tray(app.handle())?;
|
||||
Ok(())
|
||||
})
|
||||
@@ -101,6 +102,7 @@ pub fn run() {
|
||||
service::start_service,
|
||||
service::stop_service,
|
||||
service::restart_service,
|
||||
service::guardian_status,
|
||||
// 日志
|
||||
logs::read_log_tail,
|
||||
logs::search_log,
|
||||
|
||||
@@ -3,6 +3,12 @@
|
||||
* 管理 openclaw 安装状态,供各组件查询
|
||||
*/
|
||||
import { api } from './tauri-api.js'
|
||||
import {
|
||||
evaluateAutoRestartAttempt,
|
||||
shouldResetAutoRestartCount,
|
||||
} from './gateway-guardian-policy.js'
|
||||
|
||||
const isTauri = !!window.__TAURI_INTERNALS__
|
||||
|
||||
let _openclawReady = false
|
||||
let _gatewayRunning = false
|
||||
@@ -17,8 +23,7 @@ let _isUpgrading = false // 升级/切换版本期间,阻止 setup 跳转
|
||||
let _userStopped = false // 用户主动停止,不自动拉起
|
||||
let _autoRestartCount = 0 // 自动重启次数
|
||||
let _lastRestartTime = 0 // 上次重启时间
|
||||
const MAX_AUTO_RESTART = 3 // 最大连续自动重启次数
|
||||
const RESTART_COOLDOWN = 60000 // 重启冷却期 60s
|
||||
let _gatewayRunningSince = 0 // Gateway 最近一次进入稳定运行状态的时间
|
||||
let _guardianListeners = [] // 守护放弃时的回调
|
||||
|
||||
/** openclaw 是否就绪(CLI 已安装 + 配置文件存在) */
|
||||
@@ -36,7 +41,12 @@ export function isUpgrading() { return _isUpgrading }
|
||||
export function setUserStopped(v) { _userStopped = !!v }
|
||||
|
||||
/** 重置自动重启计数(用户手动启动后重置) */
|
||||
export function resetAutoRestart() { _autoRestartCount = 0; _userStopped = false }
|
||||
export function resetAutoRestart() {
|
||||
_autoRestartCount = 0
|
||||
_lastRestartTime = 0
|
||||
_gatewayRunningSince = 0
|
||||
_userStopped = false
|
||||
}
|
||||
|
||||
/** 监听守护放弃事件(连续重启失败后触发,UI 可弹出恢复选项) */
|
||||
export function onGuardianGiveUp(fn) {
|
||||
@@ -134,11 +144,14 @@ function _setGatewayRunning(val) {
|
||||
_gatewayRunning = val
|
||||
if (changed) {
|
||||
if (val) {
|
||||
// Gateway 恢复运行,重置计数
|
||||
_autoRestartCount = 0
|
||||
} else if (wasRunning && !_userStopped && !_isUpgrading && _openclawReady) {
|
||||
// 仅记录恢复运行时间,避免短暂存活就把重启计数清零
|
||||
_gatewayRunningSince = Date.now()
|
||||
} else if (!isTauri && wasRunning && !_userStopped && !_isUpgrading && _openclawReady) {
|
||||
_gatewayRunningSince = 0
|
||||
// Gateway 意外停止,尝试自动重启
|
||||
_tryAutoRestart()
|
||||
} else if (!val) {
|
||||
_gatewayRunningSince = 0
|
||||
}
|
||||
_gwListeners.forEach(fn => { try { fn(val) } catch {} })
|
||||
}
|
||||
@@ -146,16 +159,23 @@ function _setGatewayRunning(val) {
|
||||
|
||||
async function _tryAutoRestart() {
|
||||
const now = Date.now()
|
||||
// 冷却期内不重复重启
|
||||
if (now - _lastRestartTime < RESTART_COOLDOWN) return
|
||||
if (_autoRestartCount >= MAX_AUTO_RESTART) {
|
||||
console.warn(`[guardian] Gateway 已连续自动重启 ${MAX_AUTO_RESTART} 次,停止守护,请手动检查`)
|
||||
const decision = evaluateAutoRestartAttempt({
|
||||
now,
|
||||
lastRestartTime: _lastRestartTime,
|
||||
autoRestartCount: _autoRestartCount,
|
||||
})
|
||||
|
||||
if (decision.action === 'cooldown') return
|
||||
|
||||
if (decision.action === 'give_up') {
|
||||
console.warn('[guardian] Gateway 已达到自动重启上限,停止守护,请手动检查')
|
||||
_guardianListeners.forEach(fn => { try { fn() } catch {} })
|
||||
return
|
||||
}
|
||||
_autoRestartCount++
|
||||
_lastRestartTime = now
|
||||
console.log(`[guardian] Gateway 意外停止,自动重启 (${_autoRestartCount}/${MAX_AUTO_RESTART})...`)
|
||||
|
||||
_autoRestartCount = decision.autoRestartCount
|
||||
_lastRestartTime = decision.lastRestartTime
|
||||
console.log(`[guardian] Gateway 意外停止,自动重启 (${_autoRestartCount}/3)...`)
|
||||
try {
|
||||
await api.startService('ai.openclaw.gateway')
|
||||
console.log('[guardian] Gateway 自动重启成功')
|
||||
@@ -173,7 +193,15 @@ export async function refreshGatewayStatus() {
|
||||
const nowRunning = services[0]?.running === true
|
||||
if (nowRunning) {
|
||||
_gwStopCount = 0
|
||||
_setGatewayRunning(true)
|
||||
if (!_gatewayRunning) {
|
||||
_setGatewayRunning(true)
|
||||
} else if (shouldResetAutoRestartCount({
|
||||
autoRestartCount: _autoRestartCount,
|
||||
runningSince: _gatewayRunningSince,
|
||||
now: Date.now(),
|
||||
})) {
|
||||
_autoRestartCount = 0
|
||||
}
|
||||
} else {
|
||||
_gwStopCount++
|
||||
if (_gwStopCount >= 2 || !_gatewayRunning) {
|
||||
|
||||
38
src/lib/gateway-guardian-policy.js
Normal file
38
src/lib/gateway-guardian-policy.js
Normal file
@@ -0,0 +1,38 @@
|
||||
/**
|
||||
* Gateway 守护策略
|
||||
* 纯函数,便于测试自动重启与计数重置规则
|
||||
*/
|
||||
|
||||
export const MAX_AUTO_RESTART = 3
|
||||
export const RESTART_COOLDOWN = 60000
|
||||
export const STABLE_RUNNING_MS = 120000
|
||||
|
||||
export function evaluateAutoRestartAttempt({
|
||||
now,
|
||||
lastRestartTime,
|
||||
autoRestartCount,
|
||||
}) {
|
||||
if (now - lastRestartTime < RESTART_COOLDOWN) {
|
||||
return { action: 'cooldown' }
|
||||
}
|
||||
|
||||
if (autoRestartCount >= MAX_AUTO_RESTART) {
|
||||
return { action: 'give_up' }
|
||||
}
|
||||
|
||||
return {
|
||||
action: 'restart',
|
||||
autoRestartCount: autoRestartCount + 1,
|
||||
lastRestartTime: now,
|
||||
}
|
||||
}
|
||||
|
||||
export function shouldResetAutoRestartCount({
|
||||
autoRestartCount,
|
||||
runningSince,
|
||||
now,
|
||||
}) {
|
||||
if (autoRestartCount <= 0) return false
|
||||
if (!runningSince) return false
|
||||
return now - runningSince >= STABLE_RUNNING_MS
|
||||
}
|
||||
69
tests/gateway-guardian-policy.test.js
Normal file
69
tests/gateway-guardian-policy.test.js
Normal file
@@ -0,0 +1,69 @@
|
||||
import test from 'node:test'
|
||||
import assert from 'node:assert/strict'
|
||||
|
||||
import {
|
||||
MAX_AUTO_RESTART,
|
||||
RESTART_COOLDOWN,
|
||||
STABLE_RUNNING_MS,
|
||||
evaluateAutoRestartAttempt,
|
||||
shouldResetAutoRestartCount,
|
||||
} from '../src/lib/gateway-guardian-policy.js'
|
||||
|
||||
test('短暂恢复运行不应立即清零自动重启计数', () => {
|
||||
assert.equal(
|
||||
shouldResetAutoRestartCount({
|
||||
autoRestartCount: 2,
|
||||
runningSince: 10_000,
|
||||
now: 10_000 + STABLE_RUNNING_MS - 1,
|
||||
}),
|
||||
false,
|
||||
)
|
||||
})
|
||||
|
||||
test('稳定运行超过阈值后才允许清零自动重启计数', () => {
|
||||
assert.equal(
|
||||
shouldResetAutoRestartCount({
|
||||
autoRestartCount: 2,
|
||||
runningSince: 10_000,
|
||||
now: 10_000 + STABLE_RUNNING_MS,
|
||||
}),
|
||||
true,
|
||||
)
|
||||
})
|
||||
|
||||
test('达到最大自动重启次数后必须停止守护', () => {
|
||||
assert.deepEqual(
|
||||
evaluateAutoRestartAttempt({
|
||||
now: 90_000,
|
||||
lastRestartTime: 0,
|
||||
autoRestartCount: MAX_AUTO_RESTART,
|
||||
}),
|
||||
{ action: 'give_up' },
|
||||
)
|
||||
})
|
||||
|
||||
test('冷却时间内不应重复自动重启', () => {
|
||||
assert.deepEqual(
|
||||
evaluateAutoRestartAttempt({
|
||||
now: RESTART_COOLDOWN - 1,
|
||||
lastRestartTime: 0,
|
||||
autoRestartCount: 1,
|
||||
}),
|
||||
{ action: 'cooldown' },
|
||||
)
|
||||
})
|
||||
|
||||
test('满足条件时应增加自动重启计数并记录重启时间', () => {
|
||||
assert.deepEqual(
|
||||
evaluateAutoRestartAttempt({
|
||||
now: 120_000,
|
||||
lastRestartTime: 0,
|
||||
autoRestartCount: 1,
|
||||
}),
|
||||
{
|
||||
action: 'restart',
|
||||
autoRestartCount: 2,
|
||||
lastRestartTime: 120_000,
|
||||
},
|
||||
)
|
||||
})
|
||||
Reference in New Issue
Block a user