Files
clawpanel/src-tauri/src/commands/service.rs

2208 lines
81 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/// 服务管理命令
///
/// 检测策略(跨平台统一):
/// 1. TCP 连 127.0.0.1:{port},超时 1.5s
/// 2. 连通 → 认为 Gateway 在运行
///
/// 不依赖任何系统命令(无 netstat / PowerShell / launchctl / openclaw health
/// 无权限问题,逻辑一致。
use std::collections::HashMap;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::{Arc, Mutex, OnceLock};
use std::time::{Duration, Instant};
use crate::models::types::ServiceStatus;
use serde::{Deserialize, Serialize};
use tauri::Emitter;
/// OpenClaw 官方服务的友好名称映射
fn description_map() -> HashMap<&'static str, &'static str> {
HashMap::from([
("ai.openclaw.gateway", "OpenClaw Gateway"),
("ai.openclaw.node", "OpenClaw Node Host"),
])
}
const GUARDIAN_INTERVAL: Duration = Duration::from_secs(15);
const GUARDIAN_RESTART_COOLDOWN: Duration = Duration::from_secs(60);
const GUARDIAN_STABLE_WINDOW: Duration = Duration::from_secs(120);
const GUARDIAN_MAX_AUTO_RESTART: u32 = 3;
const GATEWAY_CONFIG_AUTO_FIX_COOLDOWN: Duration = Duration::from_secs(120);
#[derive(Debug, Default)]
struct GuardianRuntimeState {
last_seen_running: Option<bool>,
running_since: Option<Instant>,
auto_restart_count: u32,
last_restart_time: Option<Instant>,
manual_hold: bool,
pause_reason: Option<String>,
give_up: bool,
}
#[derive(Debug, Default)]
struct GatewayConfigAutoFixState {
last_attempt: Option<Instant>,
in_progress: bool,
}
#[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct GuardianStatus {
pub backend_managed: bool,
pub paused: bool,
pub manual_hold: bool,
pub give_up: bool,
pub auto_restart_count: u32,
}
#[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
struct GuardianEventPayload {
kind: String,
auto_restart_count: u32,
message: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct GatewayOwnerRecord {
pid: Option<u32>,
port: u16,
cli_path: Option<String>,
openclaw_dir: String,
started_at: String,
started_by: String,
}
fn normalize_owned_path(path: impl AsRef<std::path::Path>) -> String {
let path_ref = path.as_ref();
path_ref
.canonicalize()
.unwrap_or_else(|_| path_ref.to_path_buf())
.to_string_lossy()
.to_string()
}
fn gateway_owner_path() -> std::path::PathBuf {
crate::commands::openclaw_dir().join("gateway-owner.json")
}
fn current_gateway_owner_signature() -> (u16, String, Option<String>) {
let openclaw_dir = normalize_owned_path(crate::commands::openclaw_dir());
let cli_path = crate::utils::resolve_openclaw_cli_path()
.map(|p| normalize_owned_path(std::path::PathBuf::from(p)));
(
crate::commands::gateway_listen_port(),
openclaw_dir,
cli_path,
)
}
fn matches_current_gateway_owner_signature(owner: &GatewayOwnerRecord) -> bool {
if owner.started_by != "clawpanel" {
return false;
}
let (port, openclaw_dir, cli_path) = current_gateway_owner_signature();
if owner.port != port {
return false;
}
if normalize_owned_path(&owner.openclaw_dir) != openclaw_dir {
return false;
}
let owner_cli_path = owner.cli_path.as_ref().map(normalize_owned_path);
// 仅当双方都有 cli_path 且不同才视为不匹配;任一侧缺失时放宽为兼容(向后兼容旧记录/未绑定 CLI
match (owner_cli_path.as_deref(), cli_path.as_deref()) {
(Some(a), Some(b)) => a == b,
_ => true,
}
}
fn gateway_owner_pid_needs_refresh(owner: &GatewayOwnerRecord, pid: Option<u32>) -> bool {
matches_current_gateway_owner_signature(owner)
&& matches!(pid, Some(current_pid) if owner.pid != Some(current_pid))
}
fn read_gateway_owner() -> Option<GatewayOwnerRecord> {
let content = std::fs::read_to_string(gateway_owner_path()).ok()?;
serde_json::from_str(&content).ok()
}
fn write_gateway_owner(pid: Option<u32>) -> Result<(), String> {
let owner_path = gateway_owner_path();
if let Some(parent) = owner_path.parent() {
std::fs::create_dir_all(parent).map_err(|e| format!("创建 Gateway owner 目录失败: {e}"))?;
}
let (port, openclaw_dir, cli_path) = current_gateway_owner_signature();
let record = GatewayOwnerRecord {
pid,
port,
cli_path,
openclaw_dir,
started_at: chrono::Local::now().to_rfc3339(),
started_by: "clawpanel".into(),
};
let content = serde_json::to_string_pretty(&record)
.map_err(|e| format!("序列化 Gateway owner 失败: {e}"))?;
std::fs::write(owner_path, content).map_err(|e| format!("写入 Gateway owner 失败: {e}"))
}
fn clear_gateway_owner() {
let _ = std::fs::remove_file(gateway_owner_path());
}
fn is_current_gateway_owner(owner: &GatewayOwnerRecord, _pid: Option<u32>) -> bool {
matches_current_gateway_owner_signature(owner)
}
/// 判断是否可以安全地自动认领 Gateway端口 + 数据目录匹配即可(忽略 started_by
fn should_auto_claim_gateway(owner: &Option<GatewayOwnerRecord>) -> bool {
let (port, openclaw_dir, _cli_path) = current_gateway_owner_signature();
match owner {
None => true, // 无 owner 文件 → 自动认领
Some(record) => {
// owner 文件存在但签名不完全匹配 → 仅按 port + openclaw_dir 判断
record.port == port && normalize_owned_path(&record.openclaw_dir) == openclaw_dir
}
}
}
fn foreign_gateway_error(pid: Option<u32>) -> String {
let pid_suffix = pid
.map(|value| format!(" (PID: {value})"))
.unwrap_or_default();
format!(
"检测到端口 {} 上已有其他 OpenClaw Gateway 正在运行{},且不属于当前面板实例。为避免误接管,请先关闭该实例,或将当前 CLI/目录绑定到它对应的安装。",
crate::commands::gateway_listen_port(),
pid_suffix
)
}
fn ensure_owned_gateway_or_err(pid: Option<u32>) -> Result<(), String> {
let owner = read_gateway_owner();
if let Some(ref record) = owner {
if is_current_gateway_owner(record, pid) {
if gateway_owner_pid_needs_refresh(record, pid) {
write_gateway_owner(pid)?;
}
return Ok(());
}
}
// 无有效 owner 或签名不匹配 → 尝试自动认领(端口 + 数据目录匹配即可)
if should_auto_claim_gateway(&owner) {
write_gateway_owner(pid)?;
return Ok(());
}
Err(foreign_gateway_error(pid))
}
async fn current_gateway_runtime(label: &str) -> (bool, Option<u32>) {
#[cfg(target_os = "windows")]
{
platform::check_service_status(0, label)
}
#[cfg(target_os = "macos")]
{
platform::check_service_status(0, label)
}
#[cfg(target_os = "linux")]
{
platform::check_service_status(0, label).await
}
}
async fn wait_for_gateway_running(label: &str, timeout: Duration) -> Result<(), String> {
let deadline = Instant::now() + timeout;
while Instant::now() < deadline {
let (running, pid) = current_gateway_runtime(label).await;
if running {
write_gateway_owner(pid)?;
return Ok(());
}
tokio::time::sleep(Duration::from_millis(300)).await;
}
Err(format!(
"Gateway 启动超时,请查看 {}",
crate::commands::openclaw_dir()
.join("logs")
.join("gateway.err.log")
.display()
))
}
async fn wait_for_gateway_stopped(label: &str, timeout: Duration) -> Result<(), String> {
let deadline = Instant::now() + timeout;
while Instant::now() < deadline {
let (running, _) = current_gateway_runtime(label).await;
if !running {
clear_gateway_owner();
return Ok(());
}
tokio::time::sleep(Duration::from_millis(300)).await;
}
Err("Gateway 停止超时,请手动检查进程".into())
}
fn gateway_err_log_path() -> std::path::PathBuf {
crate::commands::openclaw_dir()
.join("logs")
.join("gateway.err.log")
}
fn read_gateway_error_log_excerpt(max_bytes: usize) -> String {
let bytes = match std::fs::read(gateway_err_log_path()) {
Ok(content) => content,
Err(_) => return String::new(),
};
if bytes.is_empty() {
return String::new();
}
let tail = if bytes.len() > max_bytes {
&bytes[bytes.len() - max_bytes..]
} else {
&bytes[..]
};
String::from_utf8_lossy(tail).to_string()
}
fn looks_like_gateway_config_mismatch(reason: &str) -> bool {
let combined = format!("{}\n{}", reason, read_gateway_error_log_excerpt(8192)).to_lowercase();
let has_invalid = combined.contains("config invalid") || combined.contains("invalid config");
let has_newer_version = combined.contains("config was last written by a newer openclaw");
let has_schema_mismatch = combined.contains("must not have additional properties")
|| combined.contains("must not have additional property")
|| combined.contains("plugins.entries.memory-core.config")
|| combined.contains("additional properties");
let mentions_doctor_fix = combined.contains("doctor --fix");
(has_invalid && (has_schema_mismatch || mentions_doctor_fix))
|| (has_newer_version && mentions_doctor_fix)
}
/// 直接修复 openclaw.json 中 plugins.entries.*.config 的多余属性
/// 当 `openclaw doctor --fix` 无法修复时作为二级回退
fn try_direct_config_strip() -> Result<bool, String> {
let config_path = crate::commands::openclaw_dir().join("openclaw.json");
let raw =
std::fs::read_to_string(&config_path).map_err(|e| format!("读取配置文件失败: {e}"))?;
let mut doc: serde_json::Value =
serde_json::from_str(&raw).map_err(|e| format!("解析配置文件失败: {e}"))?;
// 从错误日志中提取哪些 plugin entry 有 additional properties
let err_log = read_gateway_error_log_excerpt(8192).to_lowercase();
let mut changed = false;
// 匹配形如 "plugins.entries.XXX.config: invalid config" 的模式
if let Some(entries) = doc
.pointer_mut("/plugins/entries")
.and_then(|v| v.as_object_mut())
{
let entry_names: Vec<String> = entries.keys().cloned().collect();
for name in &entry_names {
let pattern = format!("plugins.entries.{}.config", name).to_lowercase();
if err_log.contains(&pattern) {
if let Some(entry) = entries.get_mut(name) {
if let Some(obj) = entry.as_object_mut() {
if obj.contains_key("config") {
guardian_log(&format!(
"直接修复: 清空 plugins.entries.{name}.config含多余属性"
));
obj.remove("config");
changed = true;
}
}
}
}
}
}
// 通用回退:如果错误日志提到 additional properties 但没匹配到具体 entry
// 清空所有 plugin entry 的 config
if !changed
&& (err_log.contains("additional properties") || err_log.contains("additional property"))
{
if let Some(entries) = doc
.pointer_mut("/plugins/entries")
.and_then(|v| v.as_object_mut())
{
let entry_names: Vec<String> = entries.keys().cloned().collect();
for name in &entry_names {
if let Some(entry) = entries.get_mut(name) {
if let Some(obj) = entry.as_object_mut() {
if obj.contains_key("config") {
let config = obj.get("config").unwrap();
if config.is_object()
&& config.as_object().map(|m| !m.is_empty()).unwrap_or(false)
{
guardian_log(&format!(
"直接修复(通用回退): 清空 plugins.entries.{name}.config"
));
obj.remove("config");
changed = true;
}
}
}
}
}
}
}
if changed {
let formatted =
serde_json::to_string_pretty(&doc).map_err(|e| format!("序列化配置失败: {e}"))?;
std::fs::write(&config_path, formatted).map_err(|e| format!("写入配置文件失败: {e}"))?;
guardian_log("直接修复: 已写回 openclaw.json");
}
Ok(changed)
}
static GUARDIAN_STATE: OnceLock<Arc<Mutex<GuardianRuntimeState>>> = OnceLock::new();
static GUARDIAN_STARTED: AtomicBool = AtomicBool::new(false);
static GATEWAY_CONFIG_AUTO_FIX_STATE: OnceLock<Arc<Mutex<GatewayConfigAutoFixState>>> =
OnceLock::new();
fn gateway_config_auto_fix_state() -> &'static Arc<Mutex<GatewayConfigAutoFixState>> {
GATEWAY_CONFIG_AUTO_FIX_STATE
.get_or_init(|| Arc::new(Mutex::new(GatewayConfigAutoFixState::default())))
}
fn finish_gateway_config_auto_fix_attempt() {
let mut state = gateway_config_auto_fix_state().lock().unwrap();
state.in_progress = false;
}
async fn try_auto_fix_gateway_config(
reason: &str,
app: Option<&tauri::AppHandle>,
) -> Result<bool, String> {
if !looks_like_gateway_config_mismatch(reason) {
return Ok(false);
}
{
let mut state = gateway_config_auto_fix_state().lock().unwrap();
if state.in_progress {
return Ok(false);
}
if let Some(last_attempt) = state.last_attempt {
if last_attempt.elapsed() < GATEWAY_CONFIG_AUTO_FIX_COOLDOWN {
return Ok(false);
}
}
state.in_progress = true;
state.last_attempt = Some(Instant::now());
}
guardian_log("检测到 Gateway 启动疑似配置失配,尝试自动执行 openclaw doctor --fix");
emit_guardian_event(
app,
"auto_fix_start",
"检测到 Gateway 配置异常,正在自动执行 openclaw doctor --fix…",
);
let result = tokio::time::timeout(
Duration::from_secs(30),
crate::utils::openclaw_command_async()
.args(["doctor", "--fix"])
.output(),
)
.await;
finish_gateway_config_auto_fix_attempt();
match result {
Ok(Ok(output)) => {
let stdout = String::from_utf8_lossy(&output.stdout).trim().to_string();
let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string();
if output.status.success() {
let summary = if !stderr.is_empty() { stderr } else { stdout };
if summary.is_empty() {
guardian_log("自动执行 openclaw doctor --fix 成功");
} else {
guardian_log(&format!("自动执行 openclaw doctor --fix 成功: {summary}"));
}
Ok(true)
} else {
let summary = if !stderr.is_empty() { stderr } else { stdout };
let detail = if summary.is_empty() {
"doctor --fix 返回失败".to_string()
} else {
summary
};
guardian_log(&format!("自动执行 openclaw doctor --fix 失败: {detail}"));
emit_guardian_event(
app,
"auto_fix_failure",
format!("已尝试自动执行 openclaw doctor --fix但修复失败{detail}"),
);
Err(format!(
"检测到 Gateway 配置异常,已尝试自动执行 openclaw doctor --fix但修复失败{detail}"
))
}
}
Ok(Err(err)) => {
guardian_log(&format!("自动执行 openclaw doctor --fix 失败: {err}"));
emit_guardian_event(
app,
"auto_fix_failure",
format!("已尝试自动执行 openclaw doctor --fix但命令执行失败{err}"),
);
Err(format!(
"检测到 Gateway 配置异常,已尝试自动执行 openclaw doctor --fix但命令执行失败{err}"
))
}
Err(_) => {
guardian_log("自动执行 openclaw doctor --fix 超时 (30s)");
emit_guardian_event(
app,
"auto_fix_failure",
"已尝试自动执行 openclaw doctor --fix但修复超时 (30s)",
);
Err(
"检测到 Gateway 配置异常,已尝试自动执行 openclaw doctor --fix但修复超时 (30s)"
.into(),
)
}
}
}
fn guardian_state() -> &'static Arc<Mutex<GuardianRuntimeState>> {
GUARDIAN_STATE.get_or_init(|| Arc::new(Mutex::new(GuardianRuntimeState::default())))
}
fn guardian_log(message: &str) {
let log_dir = crate::commands::openclaw_dir().join("logs");
let _ = std::fs::create_dir_all(&log_dir);
let path = log_dir.join("guardian.log");
let line = format!(
"[{}] {}\n",
chrono::Local::now().format("%Y-%m-%d %H:%M:%S"),
message
);
let _ = std::fs::OpenOptions::new()
.create(true)
.append(true)
.open(path)
.and_then(|mut f| std::io::Write::write_all(&mut f, line.as_bytes()));
}
fn emit_guardian_event(app: Option<&tauri::AppHandle>, kind: &str, message: impl Into<String>) {
if let Some(app) = app {
let payload = GuardianEventPayload {
kind: kind.to_string(),
auto_restart_count: 0,
message: message.into(),
};
let _ = app.emit("guardian-event", payload);
}
}
fn guardian_snapshot() -> GuardianStatus {
let state = guardian_state().lock().unwrap();
GuardianStatus {
backend_managed: true,
paused: state.pause_reason.is_some(),
manual_hold: state.manual_hold,
give_up: state.give_up,
auto_restart_count: state.auto_restart_count,
}
}
pub(crate) fn guardian_mark_manual_stop() {
let mut state = guardian_state().lock().unwrap();
state.manual_hold = true;
state.give_up = false;
state.auto_restart_count = 0;
state.last_restart_time = None;
state.running_since = None;
guardian_log("用户主动停止 Gateway后端守护进入手动停机保持状态");
}
pub(crate) fn guardian_mark_manual_start() {
let mut state = guardian_state().lock().unwrap();
state.manual_hold = false;
state.give_up = false;
state.auto_restart_count = 0;
state.last_restart_time = None;
state.running_since = None;
guardian_log("用户主动启动/恢复 Gateway后端守护已重置自动重启状态");
}
pub(crate) fn guardian_pause(reason: &str) {
let mut state = guardian_state().lock().unwrap();
state.pause_reason = Some(reason.to_string());
state.give_up = false;
guardian_log(&format!("后端守护已暂停: {reason}"));
}
pub(crate) fn guardian_resume(reason: &str) {
let mut state = guardian_state().lock().unwrap();
state.pause_reason = None;
state.running_since = None;
guardian_log(&format!("后端守护已恢复: {reason}"));
}
fn gateway_config_exists() -> bool {
crate::commands::openclaw_dir()
.join("openclaw.json")
.exists()
}
async fn gateway_service_status() -> Result<Option<ServiceStatus>, String> {
let mut services = get_services_status().await?;
if let Some(index) = services
.iter()
.position(|svc| svc.label == "ai.openclaw.gateway")
{
return Ok(Some(services.remove(index)));
}
Ok(services.into_iter().next())
}
async fn guardian_tick(app: &tauri::AppHandle) {
let snapshot = match gateway_service_status().await {
Ok(Some(svc)) => svc,
Ok(None) => return,
Err(err) => {
guardian_log(&format!("读取 Gateway 状态失败: {err}"));
return;
}
};
let ready = snapshot.cli_installed && gateway_config_exists();
let running = snapshot.running;
let now = Instant::now();
let (restart_attempt, emit_give_up) = {
let mut state = guardian_state().lock().unwrap();
let mut restart_attempt = None::<u32>;
let mut emit_give_up = None::<String>;
if state.last_seen_running.is_none() {
state.last_seen_running = Some(running);
state.running_since = running.then_some(now);
return;
}
if !ready {
state.last_seen_running = Some(running);
state.running_since = running.then_some(now);
return;
}
if state.pause_reason.is_some() {
state.last_seen_running = Some(running);
state.running_since = if running {
state.running_since.or(Some(now))
} else {
None
};
return;
}
if running {
if state.last_seen_running != Some(true) {
if state.manual_hold || state.give_up {
state.manual_hold = false;
state.give_up = false;
state.auto_restart_count = 0;
state.last_restart_time = None;
guardian_log("检测到 Gateway 已重新运行,后端守护已退出手动停机/放弃状态");
}
state.running_since = Some(now);
}
if state.auto_restart_count > 0
&& state
.running_since
.map(|ts| now.duration_since(ts) >= GUARDIAN_STABLE_WINDOW)
.unwrap_or(false)
{
state.auto_restart_count = 0;
state.last_restart_time = None;
guardian_log("Gateway 已稳定运行,后端守护已清零自动重启计数");
}
state.last_seen_running = Some(true);
return;
}
let was_running = state.last_seen_running == Some(true);
state.last_seen_running = Some(false);
state.running_since = None;
if !was_running || state.manual_hold || state.give_up {
return;
}
if std::env::consts::OS == "windows" {
state.manual_hold = true;
state.auto_restart_count = 0;
state.last_restart_time = None;
guardian_log("检测到 Windows Gateway 终端窗口已关闭,按用户停机处理,不自动重启");
return;
}
if let Some(last) = state.last_restart_time {
if now.duration_since(last) < GUARDIAN_RESTART_COOLDOWN {
return;
}
}
if state.auto_restart_count >= GUARDIAN_MAX_AUTO_RESTART {
state.give_up = true;
let message = format!(
"Gateway 连续自动重启 {} 次后仍异常,后端守护已停止自动拉起",
GUARDIAN_MAX_AUTO_RESTART
);
guardian_log(&message);
emit_give_up = Some(message);
(restart_attempt, emit_give_up)
} else {
state.auto_restart_count += 1;
state.last_restart_time = Some(now);
restart_attempt = Some(state.auto_restart_count);
(restart_attempt, emit_give_up)
}
};
if let Some(attempt) = restart_attempt {
guardian_log(&format!(
"检测到 Gateway 异常退出,后端守护开始自动重启 ({attempt}/{GUARDIAN_MAX_AUTO_RESTART})"
));
if let Err(err) = start_service_impl_internal("ai.openclaw.gateway", Some(app)).await {
guardian_log(&format!("后端守护自动重启失败: {err}"));
}
}
if let Some(message) = emit_give_up {
let payload = GuardianEventPayload {
kind: "give_up".into(),
auto_restart_count: GUARDIAN_MAX_AUTO_RESTART,
message,
};
let _ = app.emit("guardian-event", payload);
}
}
async fn start_service_impl_internal(
label: &str,
app: Option<&tauri::AppHandle>,
) -> Result<(), String> {
match start_service_impl_internal_once(label).await {
Ok(()) => Ok(()),
Err(err) => match try_auto_fix_gateway_config(&err, app).await {
Ok(true) => {
guardian_log("自动修复完成,准备重试启动 Gateway");
emit_guardian_event(
app,
"auto_fix_retry",
"已自动修复配置,正在重试启动 Gateway…",
);
#[cfg(target_os = "windows")]
{
platform::cleanup_zombie_gateway_processes();
}
tokio::time::sleep(Duration::from_millis(500)).await;
match start_service_impl_internal_once(label).await {
Ok(()) => {
emit_guardian_event(
app,
"auto_fix_success",
"已自动修复配置并成功重试启动 Gateway。",
);
Ok(())
}
Err(retry_err) => {
// 二级回退doctor --fix 没解决问题,尝试直接修改 JSON
if looks_like_gateway_config_mismatch(&retry_err) {
guardian_log("doctor --fix 后仍失败,尝试直接修复 openclaw.json");
match try_direct_config_strip() {
Ok(true) => {
emit_guardian_event(
app,
"auto_fix_retry",
"已直接修复配置文件,正在再次重试启动 Gateway…",
);
#[cfg(target_os = "windows")]
{
platform::cleanup_zombie_gateway_processes();
}
tokio::time::sleep(Duration::from_millis(500)).await;
match start_service_impl_internal_once(label).await {
Ok(()) => {
emit_guardian_event(
app,
"auto_fix_success",
"已直接修复配置并成功启动 Gateway。",
);
return Ok(());
}
Err(e) => {
emit_guardian_event(
app,
"auto_fix_failure",
format!("直接修复后仍启动失败:{e}"),
);
}
}
}
Ok(false) => {
guardian_log("直接修复未找到可清理的配置项");
}
Err(e) => {
guardian_log(&format!("直接修复失败: {e}"));
}
}
}
emit_guardian_event(
app,
"auto_fix_failure",
format!(
"已自动执行 openclaw doctor --fix 并重试启动 Gateway但仍失败{retry_err}"
),
);
Err(format!(
"{retry_err}\n(已自动执行 openclaw doctor --fix + 直接修复并重试启动 Gateway"
))
}
}
}
Ok(false) => Err(err),
Err(fix_err) => Err(format!("{err}\n{fix_err}")),
},
}
}
async fn start_service_impl_internal_once(label: &str) -> Result<(), String> {
#[cfg(target_os = "macos")]
{
platform::start_service_impl(label)?;
}
#[cfg(not(target_os = "macos"))]
{
platform::start_service_impl(label).await?;
}
wait_for_gateway_running(label, Duration::from_secs(15)).await
}
async fn stop_service_impl_internal(label: &str) -> Result<(), String> {
#[cfg(target_os = "macos")]
{
platform::stop_service_impl(label)?;
}
#[cfg(not(target_os = "macos"))]
{
platform::stop_service_impl(label).await?;
}
wait_for_gateway_stopped(label, Duration::from_secs(10)).await
}
async fn restart_service_impl_internal(
label: &str,
app: Option<&tauri::AppHandle>,
) -> Result<(), String> {
stop_service_impl_internal(label).await?;
start_service_impl_internal(label, app).await
}
pub fn start_backend_guardian(app: tauri::AppHandle) {
if GUARDIAN_STARTED.swap(true, Ordering::SeqCst) {
return;
}
// Windows 重启后清理残留的僵尸 Gateway 进程(防止多进程堆积)
#[cfg(target_os = "windows")]
{
platform::cleanup_zombie_gateway_processes();
}
guardian_log("后端守护循环已启动");
tauri::async_runtime::spawn(async move {
loop {
guardian_tick(&app).await;
tokio::time::sleep(GUARDIAN_INTERVAL).await;
}
});
}
#[tauri::command]
pub fn guardian_status() -> Result<GuardianStatus, String> {
Ok(guardian_snapshot())
}
// ===== macOS 实现 =====
#[cfg(target_os = "macos")]
mod platform {
use std::fs;
use std::path::PathBuf;
use std::process::Command;
const OPENCLAW_PREFIXES: &[&str] = &["ai.openclaw."];
fn common_cli_candidates() -> Vec<PathBuf> {
let mut candidates = Vec::new();
// standalone 安装目录(集中管理,避免多处硬编码)
for sa_dir in crate::commands::config::all_standalone_dirs() {
candidates.push(sa_dir.join("openclaw"));
}
// Homebrew 路径(非 standalone保留
candidates.push(PathBuf::from("/opt/homebrew/bin/openclaw"));
candidates.push(PathBuf::from("/usr/local/bin/openclaw"));
candidates
}
/// macOS 上 CLI 是否安装(兼容手动安装 / standalone / Homebrew
pub fn is_cli_installed() -> bool {
crate::utils::resolve_openclaw_cli_path().is_some()
|| common_cli_candidates().into_iter().any(|p| p.exists())
}
pub fn current_uid() -> Result<u32, String> {
let output = Command::new("id")
.arg("-u")
.output()
.map_err(|e| format!("获取 UID 失败: {e}"))?;
let uid_str = String::from_utf8_lossy(&output.stdout).trim().to_string();
uid_str
.parse::<u32>()
.map_err(|e| format!("解析 UID 失败: {e}"))
}
/// 动态扫描 LaunchAgents 目录,只返回 OpenClaw 核心服务
pub fn scan_service_labels() -> Vec<String> {
let home = dirs::home_dir().unwrap_or_default();
let agents_dir = home.join("Library/LaunchAgents");
let mut labels = Vec::new();
if let Ok(entries) = fs::read_dir(&agents_dir) {
for entry in entries.flatten() {
let name = entry.file_name().to_string_lossy().to_string();
if !name.ends_with(".plist") {
continue;
}
let label = name.trim_end_matches(".plist");
if OPENCLAW_PREFIXES.iter().any(|p| label.starts_with(p)) {
labels.push(label.to_string());
}
}
}
labels.sort();
if labels.is_empty() {
labels.push("ai.openclaw.gateway".to_string());
}
labels
}
fn plist_path(label: &str) -> String {
let home = dirs::home_dir().unwrap_or_default();
format!("{}/Library/LaunchAgents/{}.plist", home.display(), label)
}
/// 跨平台统一检测TCP 连端口 + lsof 获取 PID
pub fn check_service_status(_uid: u32, _label: &str) -> (bool, Option<u32>) {
let port = crate::commands::gateway_listen_port();
let addr = format!("127.0.0.1:{port}");
let socket_addr = match addr.parse() {
Ok(a) => a,
Err(_) => return (false, None),
};
// 两次尝试:第一次 1 秒,失败后短暂等待再用 2 秒重试,避免瞬态超时误判
let connected =
std::net::TcpStream::connect_timeout(&socket_addr, std::time::Duration::from_secs(1))
.is_ok()
|| {
std::thread::sleep(std::time::Duration::from_millis(300));
std::net::TcpStream::connect_timeout(
&socket_addr,
std::time::Duration::from_secs(2),
)
.is_ok()
};
if connected {
let pid = get_pid_by_lsof(port);
(true, pid)
} else {
(false, None)
}
}
/// 通过 lsof 获取监听指定端口的进程 PID
fn get_pid_by_lsof(port: u16) -> Option<u32> {
let output = Command::new("lsof")
.args(["-i", &format!("TCP:{}", port), "-sTCP:LISTEN", "-t"])
.output()
.ok()?;
let text = String::from_utf8_lossy(&output.stdout);
text.lines().next()?.trim().parse::<u32>().ok()
}
/// launchctl 失败时的回退:直接通过 CLI spawn Gateway 进程
fn start_gateway_direct() -> Result<(), String> {
// 启动前再次检查端口(防止 launchctl→direct 回退链路中重复拉起)
let port = crate::commands::gateway_listen_port();
if let Ok(addr) = format!("127.0.0.1:{port}").parse::<std::net::SocketAddr>() {
if std::net::TcpStream::connect_timeout(&addr, std::time::Duration::from_millis(500))
.is_ok()
{
return Err(format!("端口 {} 已被占用,跳过 direct 启动", port));
}
}
let log_dir = crate::commands::openclaw_dir().join("logs");
fs::create_dir_all(&log_dir).ok();
let stdout_log = fs::OpenOptions::new()
.create(true)
.append(true)
.open(log_dir.join("gateway.log"))
.map_err(|e| format!("创建日志文件失败: {e}"))?;
let stderr_log = fs::OpenOptions::new()
.create(true)
.append(true)
.open(log_dir.join("gateway.err.log"))
.map_err(|e| format!("创建错误日志文件失败: {e}"))?;
let mut cmd = crate::utils::openclaw_command();
cmd.arg("gateway")
.stdin(std::process::Stdio::null())
.stdout(stdout_log)
.stderr(stderr_log);
cmd.spawn().map_err(|e| {
if e.kind() == std::io::ErrorKind::NotFound {
"OpenClaw CLI 未找到,请确认已安装并重启 ClawPanel。".to_string()
} else {
format!("启动 Gateway 失败: {e}")
}
})?;
// 等 Gateway 初始化(最多 10s轮询端口就绪
let port = crate::commands::gateway_listen_port();
let addr = format!("127.0.0.1:{port}");
let addr = match addr.parse() {
Ok(a) => a,
Err(_) => {
return Err(format!("端口 {port} 解析失败"));
}
};
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(10);
while std::time::Instant::now() < deadline {
std::thread::sleep(std::time::Duration::from_millis(500));
if std::net::TcpStream::connect_timeout(&addr, std::time::Duration::from_millis(200))
.is_ok()
{
return Ok(());
}
}
Err(format!(
"Gateway 启动超时,请查看 {}",
log_dir.join("gateway.err.log").display()
))
}
pub fn start_service_impl(label: &str) -> Result<(), String> {
// 启动前检查端口是否已被占用,防止重复拉起导致端口冲突和内存浪费
let port = crate::commands::gateway_listen_port();
let pre_check_addr: std::net::SocketAddr = format!("127.0.0.1:{port}")
.parse()
.map_err(|_| format!("端口 {port} 解析失败"))?;
if std::net::TcpStream::connect_timeout(
&pre_check_addr,
std::time::Duration::from_millis(500),
)
.is_ok()
{
return Err(format!(
"端口 {} 已被占用Gateway 可能已在运行中(或其他程序占用了该端口)",
port
));
}
let uid = current_uid()?;
let path = plist_path(label);
let domain_target = format!("gui/{}", uid);
let service_target = format!("gui/{}/{}", uid, label);
// 先尝试 plist 文件是否存在
if !std::path::Path::new(&path).exists() {
return start_gateway_direct();
}
// Issue #91: 先检查服务是否已注册,避免重复 bootstrap 触发 macOS "后台项已添加" 通知
let already_registered = Command::new("launchctl")
.args(["print", &service_target])
.output()
.map(|out| out.status.success())
.unwrap_or(false);
if !already_registered {
let bootstrap_out = Command::new("launchctl")
.args(["bootstrap", &domain_target, &path])
.output()
.map_err(|e| format!("bootstrap 失败: {e}"))?;
if !bootstrap_out.status.success() {
let stderr = String::from_utf8_lossy(&bootstrap_out.stderr);
if !stderr.contains("already bootstrapped") && !stderr.trim().is_empty() {
return start_gateway_direct();
}
}
}
let kickstart_out = Command::new("launchctl")
.args(["kickstart", &service_target])
.output()
.map_err(|e| format!("kickstart 失败: {e}"))?;
if !kickstart_out.status.success() {
let stderr = String::from_utf8_lossy(&kickstart_out.stderr);
if !stderr.trim().is_empty() {
// kickstart 也失败,回退到直接启动
return start_gateway_direct();
}
}
Ok(())
}
pub fn stop_service_impl(label: &str) -> Result<(), String> {
let uid = current_uid()?;
let service_target = format!("gui/{}/{}", uid, label);
let output = Command::new("launchctl")
.args(["bootout", &service_target])
.output()
.map_err(|e| format!("停止失败: {e}"))?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
if !stderr.contains("No such process")
&& !stderr.contains("Could not find specified service")
&& !stderr.trim().is_empty()
{
return Err(format!("停止 {label} 失败: {stderr}"));
}
}
Ok(())
}
#[allow(dead_code)]
pub fn restart_service_impl(label: &str) -> Result<(), String> {
let uid = current_uid()?;
let path = plist_path(label);
let domain_target = format!("gui/{}", uid);
let service_target = format!("gui/{}/{}", uid, label);
// 先停
let _ = Command::new("launchctl")
.args(["bootout", &service_target])
.output();
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(3);
loop {
let (running, _) = check_service_status(uid, label);
if !running || std::time::Instant::now() >= deadline {
break;
}
std::thread::sleep(std::time::Duration::from_millis(200));
}
// plist 不存在,直接用 CLI 启动
if !std::path::Path::new(&path).exists() {
return start_gateway_direct();
}
let bootstrap_out = Command::new("launchctl")
.args(["bootstrap", &domain_target, &path])
.output()
.map_err(|e| format!("重启 bootstrap 失败: {e}"))?;
if !bootstrap_out.status.success() {
let stderr = String::from_utf8_lossy(&bootstrap_out.stderr);
if !stderr.contains("already bootstrapped") && !stderr.trim().is_empty() {
// launchctl 失败,回退到直接启动
return start_gateway_direct();
}
}
let kickstart_out = Command::new("launchctl")
.args(["kickstart", "-k", &service_target])
.output()
.map_err(|e| format!("重启 kickstart 失败: {e}"))?;
if !kickstart_out.status.success() {
let stderr = String::from_utf8_lossy(&kickstart_out.stderr);
if !stderr.trim().is_empty() {
// kickstart 也失败,回退到直接启动
return start_gateway_direct();
}
}
Ok(())
}
}
// ===== Windows 实现 =====
#[cfg(target_os = "windows")]
mod platform {
use std::env;
use std::fs::{self, OpenOptions};
use std::io::Write;
use std::os::windows::process::CommandExt;
use std::path::{Path, PathBuf};
use std::process::Command as StdCommand;
use std::sync::Mutex;
use std::time::{Duration, Instant};
/// 缓存 is_cli_installed 结果,避免每 15 秒 polling 都 spawn cmd.exe
static CLI_CACHE: Mutex<Option<(bool, std::time::Instant)>> = Mutex::new(None);
const CLI_CACHE_TTL: std::time::Duration = std::time::Duration::from_secs(60);
const CREATE_NO_WINDOW: u32 = 0x08000000;
/// 记录最后一次成功启动的 Gateway PID避免误判旧进程为新进程
static LAST_KNOWN_GATEWAY_PID: Mutex<Option<u32>> = Mutex::new(None);
/// 记录当前活跃的 Gateway 子进程(用于 stop 时精确 kill
static ACTIVE_GATEWAY_CHILD: Mutex<Option<u32>> = Mutex::new(None);
/// 检查 Gateway 端口是否有响应(阻塞式 HTTP /health3s 超时)
/// 单次探测;若需要对瞬态抖动更宽容,使用 `is_gateway_port_responsive_with_retry`
fn is_gateway_port_responsive(port: u16) -> bool {
use std::io::{Read, Write as IoWrite};
use std::net::TcpStream;
let addr = format!("127.0.0.1:{port}");
let mut stream =
match TcpStream::connect_timeout(&addr.parse().unwrap(), Duration::from_secs(3)) {
Ok(s) => s,
Err(_) => return false,
};
let _ = stream.set_read_timeout(Some(Duration::from_secs(3)));
let _ = stream.set_write_timeout(Some(Duration::from_secs(2)));
let req = format!("GET /health HTTP/1.0\r\nHost: 127.0.0.1:{port}\r\n\r\n");
if stream.write_all(req.as_bytes()).is_err() {
return false;
}
let mut buf = [0u8; 256];
match stream.read(&mut buf) {
Ok(n) if n > 0 => {
let resp = String::from_utf8_lossy(&buf[..n]);
resp.contains("200") || resp.contains("OK")
}
_ => false,
}
}
/// 带重试的 /health 健康检查issue #244 的关键修复
///
/// 原 cleanup 只做 1 次 /health 判断,若 Gateway 刚启动仍在做初始化(加载插件、
/// 连接数据库、等 network warm-up一次请求就可能超时被误判为僵尸并 kill —
/// 接着 start_service_impl 又会 Hidden-start 一个新实例,循环往复。
///
/// 改为 retries 次重试、每次间隔 interval 后才定性,给健康 Gateway 更宽容的启动窗口。
fn is_gateway_port_responsive_with_retry(port: u16, retries: u32, interval: Duration) -> bool {
for attempt in 0..retries {
if attempt > 0 {
std::thread::sleep(interval);
}
if is_gateway_port_responsive(port) {
return true;
}
}
false
}
/// 从 netstat 输出中提取监听指定端口的所有 PID
fn find_listening_pids(port: u16) -> Vec<u32> {
let output = match StdCommand::new("netstat")
.args(["-ano", "-p", "TCP"])
.creation_flags(CREATE_NO_WINDOW)
.output()
{
Ok(o) => String::from_utf8_lossy(&o.stdout).to_string(),
Err(_) => return vec![],
};
let mut pids = vec![];
for line in output.lines() {
let line = line.trim();
if !line.contains(&format!(":{port}")) || !line.contains("LISTENING") {
continue;
}
let parts: Vec<&str> = line.split_whitespace().collect();
if parts.len() < 5 {
continue;
}
if let Ok(pid) = parts.last().unwrap().parse::<u32>() {
if pid > 0 && !pids.contains(&pid) {
pids.push(pid);
}
}
}
pids
}
/// 清理残留的僵尸 Gateway 进程(启动时调用,防止 Windows 重启后多进程堆积)
///
/// issue #244 修复:原实现只做 1 次 /health 检测Gateway 刚 ready 仍在跑
/// startup hooks / channel connect 时,单次探测可能超时 → 被误杀 → 触发 Hidden-start
/// 又起一个新的,循环往复。改为 3 次重试(间隔 800ms才算"真僵尸"。
pub(crate) fn cleanup_zombie_gateway_processes() {
let port = crate::commands::gateway_listen_port();
let pids = find_listening_pids(port);
if pids.is_empty() {
return;
}
// 带重试的 /health 检测 —— 最多等 3 * 800ms = 2.4s 才判定僵尸
let responsive =
is_gateway_port_responsive_with_retry(port, 3, std::time::Duration::from_millis(800));
for pid in &pids {
let pid = *pid;
if let Some(cmdline) = read_process_command_line(pid) {
let cmdline_lower = cmdline.to_lowercase();
let is_gateway =
cmdline_lower.contains("openclaw") && cmdline_lower.contains("gateway");
let our_pid = *LAST_KNOWN_GATEWAY_PID.lock().unwrap();
if is_gateway {
if !responsive {
// 3 次 /health 全部失败 → 僵尸进程,强制终止
super::guardian_log(&format!(
"检测到僵尸 Gateway 进程 (PID {pid}):端口 {port} 占用但 /health 连续 3 次无响应,强制终止"
));
kill_process_tree(pid);
} else if Some(pid) != our_pid {
// /health 有响应但不是当前实例启动的 → 采纳为已知进程,不杀
super::guardian_log(&format!(
"检测到健康的 Gateway 进程 (PID {pid})/health 正常响应,已采纳"
));
let mut known = LAST_KNOWN_GATEWAY_PID.lock().unwrap();
*known = Some(pid);
}
// is_gateway + responsive + 本就是我们的 PID → 无需任何操作
}
}
// 读不到命令行时,不做假设,避免误杀其他进程
}
}
fn read_process_command_line(pid: u32) -> Option<String> {
// 优先用 PowerShell Get-CimInstancewmic 在 Win11 已弃用)
// fallback 到 wmic 以兼容旧版 Windows
let ps_output = StdCommand::new("powershell")
.args([
"-NoProfile",
"-Command",
&format!(
"(Get-CimInstance Win32_Process -Filter 'ProcessId={}').CommandLine",
pid
),
])
.creation_flags(CREATE_NO_WINDOW)
.output();
if let Ok(o) = ps_output {
let text = String::from_utf8_lossy(&o.stdout).trim().to_string();
if !text.is_empty() {
return Some(text);
}
}
// fallback: wmic兼容 Win10 及更早版本)
let output = match StdCommand::new("wmic")
.args([
"process",
"where",
&format!("ProcessId={pid}"),
"get",
"CommandLine",
"/format:list",
])
.creation_flags(CREATE_NO_WINDOW)
.output()
{
Ok(o) => String::from_utf8_lossy(&o.stdout).to_string(),
Err(_) => return None,
};
for line in output.lines() {
let line = line.trim();
if let Some(cmd) = line.strip_prefix("CommandLine=") {
return Some(cmd.to_string());
}
}
None
}
fn kill_process_tree(pid: u32) {
// 先尝试 /ti包含子进程
let _ = StdCommand::new("taskkill")
.args(["/f", "/t", "/pid", &pid.to_string()])
.creation_flags(CREATE_NO_WINDOW)
.output();
}
/// 获取 Gateway 端口对应的真实 PID仅返回 OpenClaw Gateway 的 PID
fn get_gateway_pid_by_port(port: u16) -> Option<u32> {
let output = match StdCommand::new("netstat")
.args(["-ano", "-p", "TCP"])
.creation_flags(CREATE_NO_WINDOW)
.output()
{
Ok(o) => String::from_utf8_lossy(&o.stdout).to_string(),
Err(_) => return None,
};
for line in output.lines() {
let line = line.trim();
if !line.contains(&format!(":{port}")) || !line.contains("LISTENING") {
continue;
}
let parts: Vec<&str> = line.split_whitespace().collect();
if parts.len() < 5 {
continue;
}
let pid = match parts.last().unwrap().parse::<u32>() {
Ok(p) => p,
Err(_) => continue,
};
// 验证命令行
if let Some(cmdline) = read_process_command_line(pid) {
let cmdline_lower = cmdline.to_lowercase();
if cmdline_lower.contains("openclaw") && cmdline_lower.contains("gateway") {
return Some(pid);
}
} else {
// 读不到命令行时,不做假设,避免误杀其他进程
continue;
}
}
None
}
/// 验证指定 PID 是否还活着
fn is_process_alive(pid: u32) -> bool {
let output = StdCommand::new("tasklist")
.args(["/fi", &format!("PID eq {pid}"), "/nh"])
.creation_flags(CREATE_NO_WINDOW)
.output();
match output {
Ok(o) => {
let stdout = String::from_utf8_lossy(&o.stdout);
// tasklist /nh 输出格式: "node.exe 1234 Console 1 50,000 K"
// 行首是进程名PID 在中间,需要检查行中是否包含该 PID
for line in stdout.lines() {
let trimmed = line.trim();
// 跳过空行和 "INFO: No tasks" 之类的提示
if trimmed.is_empty() || trimmed.starts_with("INFO:") {
continue;
}
// 检查行中是否包含该 PID作为独立的数字字段
let fields: Vec<&str> = trimmed.split_whitespace().collect();
if fields.len() >= 2 {
if let Ok(line_pid) = fields[1].parse::<u32>() {
if line_pid == pid {
return true;
}
}
}
}
false
}
Err(_) => false,
}
}
/// Windows 不需要 UID
pub fn current_uid() -> Result<u32, String> {
Ok(0)
}
/// 检测 openclaw CLI 是否已安装(带 60s 缓存,避免频繁 spawn 进程)
pub fn is_cli_installed() -> bool {
// 检查缓存
if let Ok(guard) = CLI_CACHE.lock() {
if let Some((val, ts)) = *guard {
if ts.elapsed() < CLI_CACHE_TTL {
return val;
}
}
}
let result = check_cli_installed_inner();
if let Ok(mut guard) = CLI_CACHE.lock() {
*guard = Some((result, std::time::Instant::now()));
}
result
}
pub fn invalidate_cli_cache() {
if let Ok(mut guard) = CLI_CACHE.lock() {
*guard = None;
}
}
fn candidate_cli_paths() -> Vec<PathBuf> {
let mut candidates = Vec::new();
// standalone 安装目录(集中管理,避免多处硬编码)
for sa_dir in crate::commands::config::all_standalone_dirs() {
candidates.push(sa_dir.join("openclaw.cmd"));
}
if let Ok(appdata) = env::var("APPDATA") {
candidates.push(Path::new(&appdata).join("npm").join("openclaw.cmd"));
}
if let Ok(localappdata) = env::var("LOCALAPPDATA") {
candidates.push(
Path::new(&localappdata)
.join("Programs")
.join("nodejs")
.join("node_modules")
.join("@qingchencloud")
.join("openclaw-zh")
.join("bin")
.join("openclaw.js"),
);
}
for segment in crate::commands::enhanced_path().split(';') {
let dir = segment.trim();
if dir.is_empty() {
continue;
}
let base = Path::new(dir);
candidates.push(base.join("openclaw.cmd"));
candidates.push(base.join("openclaw"));
candidates.push(
base.join("node_modules")
.join("@qingchencloud")
.join("openclaw-zh")
.join("bin")
.join("openclaw.js"),
);
}
candidates
}
fn check_cli_installed_inner() -> bool {
if let Some(path) = crate::utils::resolve_openclaw_cli_path() {
if Path::new(&path).exists() {
return true;
}
}
// 方式1: 检查常见文件路径(零进程,最快)
for path in candidate_cli_paths() {
if path.exists() {
return true;
}
}
// 方式2: 通过 where 查找(兼容 nvm、自定义 prefix 等)
// 过滤掉第三方 openclaw如 CherryStudio 的 .cherrystudio/bin/openclaw.exe
let mut where_cmd = std::process::Command::new("where");
where_cmd.arg("openclaw");
where_cmd.env("PATH", crate::commands::enhanced_path());
where_cmd.creation_flags(CREATE_NO_WINDOW);
if let Ok(o) = where_cmd.output() {
if o.status.success() {
let stdout = String::from_utf8_lossy(&o.stdout);
for line in stdout.lines() {
let p = line.trim().to_lowercase();
// 跳过已知第三方 openclaw 路径
if p.contains(".cherrystudio") || p.contains("cherry-studio") {
continue;
}
if !p.is_empty() {
return true;
}
}
}
}
false
}
/// Windows 上始终返回 Gateway 标签(不管 CLI 是否安装)
pub fn scan_service_labels() -> Vec<String> {
vec!["ai.openclaw.gateway".to_string()]
}
/// 检测 Gateway 是否在运行,并返回其 PID
/// 策略:先 TCP 端口检测连通性,再用 netstat+PowerShell 验证命令行是 OpenClaw Gateway
pub fn check_service_status(_uid: u32, _label: &str) -> (bool, Option<u32>) {
let port = crate::commands::gateway_listen_port();
let addr = format!("127.0.0.1:{port}");
let socket_addr = match addr.parse() {
Ok(a) => a,
Err(_) => return (false, None),
};
// 两次尝试:第一次 1 秒,失败后短暂等待再用 2 秒重试,避免瞬态超时误判
let connected =
std::net::TcpStream::connect_timeout(&socket_addr, Duration::from_secs(1)).is_ok() || {
std::thread::sleep(Duration::from_millis(300));
std::net::TcpStream::connect_timeout(&socket_addr, Duration::from_secs(2)).is_ok()
};
if !connected {
// 端口不通,先清空已知的僵死 PID
let mut known = LAST_KNOWN_GATEWAY_PID.lock().unwrap();
*known = None;
return (false, None);
}
// 端口通了PID 识别仅作为增强信息
if let Some(pid) = get_gateway_pid_by_port(port) {
let mut known = LAST_KNOWN_GATEWAY_PID.lock().unwrap();
*known = Some(pid);
(true, Some(pid))
} else {
// 避免因命令行查询失败误判为“未运行”并触发重复拉起
(true, None)
}
}
fn cleanup_legacy_gateway_window() {
let _ = std::process::Command::new("taskkill")
.args([
"/f",
"/t",
"/fi",
&format!("WINDOWTITLE eq {GATEWAY_WINDOW_TITLE}"),
])
.creation_flags(CREATE_NO_WINDOW)
.output();
}
#[allow(dead_code)]
fn create_gateway_log_files() -> Result<(std::fs::File, std::fs::File), String> {
let log_dir = crate::commands::openclaw_dir().join("logs");
fs::create_dir_all(&log_dir).map_err(|e| format!("创建日志目录失败: {e}"))?;
let mut stdout_log = OpenOptions::new()
.create(true)
.append(true)
.open(log_dir.join("gateway.log"))
.map_err(|e| format!("创建日志文件失败: {e}"))?;
let stderr_log = OpenOptions::new()
.create(true)
.append(true)
.open(log_dir.join("gateway.err.log"))
.map_err(|e| format!("创建错误日志文件失败: {e}"))?;
let _ = writeln!(
stdout_log,
"\n[{}] [ClawPanel] Hidden-start Gateway on Windows",
chrono::Local::now().to_rfc3339()
);
Ok((stdout_log, stderr_log))
}
const GATEWAY_WINDOW_TITLE: &str = "OpenClaw Gateway";
fn quote_batch_path(value: &str) -> String {
format!("\"{}\"", value.replace('"', ""))
}
fn gateway_terminal_command(cli: &str) -> String {
if cli.eq_ignore_ascii_case("openclaw") {
return "openclaw gateway".into();
}
if cli.to_ascii_lowercase().ends_with(".js") {
return format!("node {} gateway", quote_batch_path(cli));
}
format!("{} gateway", quote_batch_path(cli))
}
fn write_gateway_terminal_runner(openclaw_dir: &Path, cli: &str) -> Result<PathBuf, String> {
fs::create_dir_all(openclaw_dir).map_err(|e| format!("创建 OpenClaw 目录失败: {e}"))?;
let runner_path = openclaw_dir.join("clawpanel-gateway.cmd");
let content = format!(
"@echo off\r\ntitle {GATEWAY_WINDOW_TITLE}\r\necho OpenClaw Gateway is running. Keep this window open.\r\necho Close this window to stop Gateway.\r\necho.\r\n{}\r\necho.\r\necho Gateway exited. You can close this window.\r\n",
gateway_terminal_command(cli)
);
fs::write(&runner_path, content).map_err(|e| format!("写入 Gateway 启动脚本失败: {e}"))?;
Ok(runner_path)
}
/// 在 Windows 上打开一个可见终端启动 Gateway
///
/// 关键:必须通过 `cmd.exe` 内置的 `start` 命令拉起新控制台。
/// 直接 `StdCommand::new("cmd").creation_flags(CREATE_NEW_CONSOLE)` 在
/// Rust 默认 `Stdio::inherit` + `STARTF_USESTDHANDLES` 影响下CREATE_NEW_CONSOLE
/// 会被吞掉(子进程能跑起来但 MainWindowHandle=0、无可见窗口
/// 通过外层 `cmd /c start "<title>" cmd /K runner.cmd` 让 `start` 用全新的
/// `CreateProcess` 拉起子进程stdio 不继承、控制台真正分离,稳定弹出可见窗口。
pub async fn start_service_impl(_label: &str) -> Result<(), String> {
if !is_cli_installed() {
return Err(
"openclaw CLI 未安装,请先通过 npm install -g @qingchencloud/openclaw-zh 安装"
.into(),
);
}
let (running, pid) = check_service_status(0, "");
if running {
if pid.is_some() {
return Ok(());
}
return Err(format!(
"端口 {} 被未知进程占用,请先关闭占用该端口的程序",
crate::commands::gateway_listen_port()
));
}
cleanup_zombie_gateway_processes();
let before_pid = *LAST_KNOWN_GATEWAY_PID.lock().unwrap();
let cli = crate::utils::resolve_openclaw_cli_path().unwrap_or_else(|| "openclaw".into());
let openclaw_dir = crate::commands::openclaw_dir();
let config_path = openclaw_dir.join("openclaw.json");
let runner_path = write_gateway_terminal_runner(&openclaw_dir, &cli)?;
let runner_path_str = runner_path.to_string_lossy().to_string();
let openclaw_dir_str = openclaw_dir.to_string_lossy().to_string();
// 外层 cmd /c 自身用 CREATE_NO_WINDOW 隐藏(短命桥接进程),
// 内部 `start` 会创建一个真正可见的新控制台窗口运行 runner.cmd。
let mut cmd = StdCommand::new("cmd");
cmd.args([
"/c",
"start",
GATEWAY_WINDOW_TITLE,
"/D",
openclaw_dir_str.as_str(),
"cmd",
"/D",
"/K",
runner_path_str.as_str(),
])
.creation_flags(CREATE_NO_WINDOW)
.env("PATH", crate::commands::enhanced_path())
.env("OPENCLAW_HOME", &openclaw_dir)
.env("OPENCLAW_STATE_DIR", &openclaw_dir)
.env("OPENCLAW_CONFIG_PATH", &config_path)
.current_dir(&openclaw_dir);
crate::commands::apply_proxy_env(&mut cmd);
let status = cmd
.status()
.map_err(|e| format!("启动 Gateway 失败: {e}"))?;
if !status.success() {
return Err(format!(
"启动 Gateway 失败cmd /c start 退出码 {:?}",
status.code()
));
}
// 轮询等待:端口就绪 AND PID 与之前不同(新 Gateway 进程已接管端口)
// 外层 cmd /c start 是 detached 桥接进程,无法用 spawn().id() 跟踪真正的 Gateway。
// 改为 polling netstat 拿到监听端口的 PID作为真实 Gateway PID 记录。
let deadline = Instant::now() + Duration::from_secs(20);
while Instant::now() < deadline {
tokio::time::sleep(Duration::from_millis(300)).await;
let (running2, pid2) = check_service_status(0, "");
if let (true, Some(current_pid)) = (running2, pid2) {
let is_new = Some(current_pid) != before_pid;
if is_new && is_process_alive(current_pid) {
// 记录真实 Gateway PID 供 stop 时精确 kill
let mut active = ACTIVE_GATEWAY_CHILD.lock().unwrap();
*active = Some(current_pid);
return Ok(());
}
}
}
Err("Gateway 启动超时,请查看弹出的终端窗口或 gateway.err.log".into())
}
/// 关闭 Gateway精确 kill Gateway 进程,不误杀其他 node.exe
pub async fn stop_service_impl(_label: &str) -> Result<(), String> {
let port = crate::commands::gateway_listen_port();
// 端口不通 → 已停止
if !check_service_status(0, "").0 {
cleanup_legacy_gateway_window();
// 清空已记录的 PID
{
let mut known = LAST_KNOWN_GATEWAY_PID.lock().unwrap();
*known = None;
}
{
let mut active = ACTIVE_GATEWAY_CHILD.lock().unwrap();
*active = None;
}
return Ok(());
}
// 先尝试 openclaw gateway stop
let _ = crate::utils::openclaw_command_async()
.args(["gateway", "stop"])
.output()
.await;
for _ in 0..10 {
tokio::time::sleep(Duration::from_millis(300)).await;
if !check_service_status(0, "").0 {
cleanup_legacy_gateway_window();
let mut known = LAST_KNOWN_GATEWAY_PID.lock().unwrap();
*known = None;
let mut active = ACTIVE_GATEWAY_CHILD.lock().unwrap();
*active = None;
return Ok(());
}
}
// 精确 kill只杀 Gateway 进程,不杀所有 node.exe
// 1. 用记录的活跃子进程 PID
let pids_to_kill: Vec<u32> = {
let active = ACTIVE_GATEWAY_CHILD.lock().unwrap();
let known = LAST_KNOWN_GATEWAY_PID.lock().unwrap();
[active.as_ref(), known.as_ref()]
.into_iter()
.flatten()
.copied()
.collect()
};
for &pid in &pids_to_kill {
if pid > 0 && is_process_alive(pid) {
kill_process_tree(pid);
}
}
// 2. 再用 netstat 找当前端口上的 Gateway PID兜底
if let Some(gw_pid) = get_gateway_pid_by_port(port) {
if !pids_to_kill.contains(&gw_pid) {
kill_process_tree(gw_pid);
}
}
tokio::time::sleep(Duration::from_millis(500)).await;
cleanup_legacy_gateway_window();
if !check_service_status(0, "").0 {
// 清空记录
let mut known = LAST_KNOWN_GATEWAY_PID.lock().unwrap();
*known = None;
let mut active = ACTIVE_GATEWAY_CHILD.lock().unwrap();
*active = None;
Ok(())
} else {
Err("停止 Gateway 失败,请手动检查进程".into())
}
}
#[allow(dead_code)]
pub async fn restart_service_impl(_label: &str) -> Result<(), String> {
stop_service_impl(_label).await?;
start_service_impl(_label).await
}
}
// ===== Linux 实现(与 Windows 类似,使用 openclaw CLI =====
#[cfg(target_os = "linux")]
mod platform {
use std::env;
use std::path::PathBuf;
use std::sync::Mutex;
use std::time::Duration;
static CLI_CACHE: Mutex<Option<(bool, std::time::Instant)>> = Mutex::new(None);
const CLI_CACHE_TTL: std::time::Duration = std::time::Duration::from_secs(60);
pub fn current_uid() -> Result<u32, String> {
let output = std::process::Command::new("id")
.arg("-u")
.output()
.map_err(|e| format!("获取 UID 失败: {e}"))?;
let uid_str = String::from_utf8_lossy(&output.stdout).trim().to_string();
uid_str
.parse::<u32>()
.map_err(|e| format!("解析 UID 失败: {e}"))
}
/// Linux 上检测 CLI 是否安装(带缓存)
pub fn is_cli_installed() -> bool {
if let Ok(guard) = CLI_CACHE.lock() {
if let Some((val, ts)) = *guard {
if ts.elapsed() < CLI_CACHE_TTL {
return val;
}
}
}
let result = candidate_cli_paths().into_iter().any(|p| p.exists())
|| std::process::Command::new("which")
.arg("openclaw")
.env("PATH", crate::commands::enhanced_path())
.output()
.map(|o| o.status.success())
.unwrap_or(false);
if let Ok(mut guard) = CLI_CACHE.lock() {
*guard = Some((result, std::time::Instant::now()));
}
result
}
fn candidate_cli_paths() -> Vec<PathBuf> {
let mut candidates = Vec::new();
if let Ok(home) = env::var("HOME") {
candidates.push(PathBuf::from(&home).join(".openclaw").join("openclaw"));
candidates.push(
PathBuf::from(&home)
.join(".npm-global")
.join("bin")
.join("openclaw"),
);
candidates.push(
PathBuf::from(&home)
.join("node_modules")
.join(".bin")
.join("openclaw"),
);
}
// standalone 安装目录(集中管理,避免多处硬编码)
for sa_dir in crate::commands::config::all_standalone_dirs() {
candidates.push(sa_dir.join("openclaw"));
}
candidates.push(PathBuf::from("/usr/local/bin/openclaw"));
candidates.push(PathBuf::from("/usr/bin/openclaw"));
for segment in crate::commands::enhanced_path().split(':') {
let dir = segment.trim();
if dir.is_empty() {
continue;
}
let base = PathBuf::from(dir);
candidates.push(base.join("openclaw"));
}
candidates
}
pub fn scan_service_labels() -> Vec<String> {
vec!["ai.openclaw.gateway".to_string()]
}
/// 跨平台统一检测TCP 连端口
#[allow(dead_code)]
pub async fn check_service_status(_uid: u32, _label: &str) -> (bool, Option<u32>) {
let port = crate::commands::gateway_listen_port();
let addr = format!("127.0.0.1:{port}");
let socket_addr: std::net::SocketAddr = match addr.parse() {
Ok(a) => a,
Err(_) => return (false, None),
};
// 使用 spawn_blocking 避免阻塞 Tokio 运行时
let result = tokio::task::spawn_blocking(move || {
std::net::TcpStream::connect_timeout(&socket_addr, std::time::Duration::from_secs(1))
.is_ok()
})
.await
.unwrap_or(false);
if result {
(true, None)
} else {
(false, None)
}
}
/// 清理残留的 Gateway 进程Linux 版:通过 fuser 查端口占用进程并 kill
fn cleanup_zombie_gateway_processes() {
let port = crate::commands::gateway_listen_port();
// 尝试用 fuser 找到端口占用进程
if let Ok(output) = std::process::Command::new("fuser")
.args([&format!("{port}/tcp")])
.output()
{
let pids = String::from_utf8_lossy(&output.stdout);
for pid_str in pids.split_whitespace() {
if let Ok(pid) = pid_str.trim().parse::<u32>() {
let _ = std::process::Command::new("kill")
.args(["-9", &pid.to_string()])
.output();
eprintln!("[cleanup_zombie] killed PID {pid} on port {port}");
}
}
}
}
async fn gateway_command(action: &str) -> Result<(), String> {
if !is_cli_installed() {
return Err(
"openclaw CLI 未安装,请先通过 npm install -g @qingchencloud/openclaw-zh 安装"
.into(),
);
}
let action_owned = action.to_string();
let mut child = crate::utils::openclaw_command_async()
.args(["gateway", &action_owned])
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())
.spawn()
.map_err(|e| format!("执行 openclaw gateway {action_owned} 失败: {e}"))?;
// 带超时等待命令完成(防止 restart 时旧进程卡死导致永远阻塞)
let timeout = if action_owned == "stop" || action_owned == "restart" {
Duration::from_secs(20)
} else {
Duration::from_secs(30)
};
match tokio::time::timeout(timeout, child.wait()).await {
Ok(Ok(status)) => {
if !status.success() {
let stderr = if let Some(mut err) = child.stderr.take() {
let mut buf = String::new();
use tokio::io::AsyncReadExt;
let _ = err.read_to_string(&mut buf).await;
buf
} else {
String::new()
};
if action_owned == "restart" {
eprintln!("[gateway_command] restart 失败,尝试强制清理后重启");
cleanup_zombie_gateway_processes();
tokio::time::sleep(Duration::from_millis(500)).await;
return start_service_impl("ai.openclaw.gateway").await;
}
return Err(format!("openclaw gateway {action_owned} 失败: {stderr}"));
}
Ok(())
}
Ok(Err(e)) => Err(format!("openclaw gateway {action_owned} 进程异常: {e}")),
Err(_) => {
let _ = child.kill().await;
eprintln!(
"[gateway_command] openclaw gateway {} 超时 ({}s),强制终止",
action_owned,
timeout.as_secs()
);
if action_owned == "restart" || action_owned == "stop" {
cleanup_zombie_gateway_processes();
tokio::time::sleep(Duration::from_millis(500)).await;
if action_owned == "restart" {
return start_service_impl("ai.openclaw.gateway").await;
}
return Ok(());
}
Err(format!("openclaw gateway {action_owned} 超时"))
}
}
}
pub async fn start_service_impl(_label: &str) -> Result<(), String> {
if !is_cli_installed() {
return Err(
"openclaw CLI 未安装,请先通过 npm install -g @qingchencloud/openclaw-zh 安装"
.into(),
);
}
// 启动前检查端口是否已被占用,防止重复拉起导致端口冲突和内存浪费
let port = crate::commands::gateway_listen_port();
let pre_check_addr: std::net::SocketAddr = format!("127.0.0.1:{port}")
.parse()
.map_err(|_| format!("端口 {port} 解析失败"))?;
let already_occupied = tokio::task::spawn_blocking(move || {
std::net::TcpStream::connect_timeout(
&pre_check_addr,
std::time::Duration::from_millis(500),
)
.is_ok()
})
.await
.unwrap_or(false);
if already_occupied {
return Err(format!(
"端口 {} 已被占用Gateway 可能已在运行中(或其他程序占用了该端口)",
port
));
}
let output = crate::utils::openclaw_command_async()
.args(["gateway", "start"])
.output()
.await
.map_err(|e| format!("执行 openclaw gateway start 失败: {e}"))?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Err(format!("openclaw gateway start 失败: {stderr}"));
}
// 等端口就绪(最多 15s
let port = crate::commands::gateway_listen_port();
let addr: std::net::SocketAddr = match format!("127.0.0.1:{port}").parse() {
Ok(a) => a,
Err(_) => return Err(format!("端口 {port} 解析失败")),
};
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(15);
while std::time::Instant::now() < deadline {
tokio::time::sleep(std::time::Duration::from_millis(500)).await;
let addr_clone = addr;
let connected = tokio::task::spawn_blocking(move || {
std::net::TcpStream::connect_timeout(
&addr_clone,
std::time::Duration::from_millis(200),
)
.is_ok()
})
.await
.unwrap_or(false);
if connected {
return Ok(());
}
}
Err(format!(
"Gateway 启动超时,请查看 {}",
crate::commands::openclaw_dir()
.join("logs")
.join("gateway.err.log")
.display()
))
}
pub async fn stop_service_impl(_label: &str) -> Result<(), String> {
gateway_command("stop").await
}
#[allow(dead_code)]
pub async fn restart_service_impl(_label: &str) -> Result<(), String> {
gateway_command("restart").await
}
}
#[cfg(target_os = "windows")]
pub fn invalidate_cli_detection_cache() {
platform::invalidate_cli_cache();
}
#[cfg(not(target_os = "windows"))]
pub fn invalidate_cli_detection_cache() {}
// ===== 跨平台公共接口 =====
/// 跨平台统一的服务状态检测:纯 TCP 端口连通性macOS/Linux 使用)
#[cfg(not(target_os = "windows"))]
#[allow(dead_code)]
fn check_tcp_service_status(_uid: u32, _label: &str) -> (bool, Option<u32>) {
let port = crate::commands::gateway_listen_port();
let addr = format!("127.0.0.1:{port}");
let socket_addr = match addr.parse() {
Ok(a) => a,
Err(_) => return (false, None),
};
match std::net::TcpStream::connect_timeout(&socket_addr, Duration::from_secs(1)) {
Ok(_) => (true, None),
Err(_) => (false, None),
}
}
#[tauri::command]
pub async fn get_services_status() -> Result<Vec<ServiceStatus>, String> {
let _uid = platform::current_uid()?;
let labels = platform::scan_service_labels();
let desc_map = description_map();
let cli_installed = platform::is_cli_installed();
let mut results = Vec::new();
for label in labels.iter().map(String::as_str) {
let (running, pid) = current_gateway_runtime(label).await;
let owner = read_gateway_owner();
let mut owned_by_current_instance = running
&& owner
.as_ref()
.map(|record| is_current_gateway_owner(record, pid))
.unwrap_or(false);
if owned_by_current_instance {
if let Some(record) = owner.as_ref() {
if gateway_owner_pid_needs_refresh(record, pid) {
let _ = write_gateway_owner(pid);
}
}
}
// 自动认领Gateway 在运行但无有效 owner且端口 + 数据目录匹配 → 自动写入 owner
if running && !owned_by_current_instance && should_auto_claim_gateway(&owner) {
let _ = write_gateway_owner(pid);
owned_by_current_instance = true;
}
let ownership = if !running {
Some("stopped".to_string())
} else if owned_by_current_instance {
Some("owned".to_string())
} else {
Some("foreign".to_string())
};
results.push(ServiceStatus {
label: label.to_string(),
pid,
running,
description: desc_map.get(label).unwrap_or(&"").to_string(),
cli_installed,
ownership,
owned_by_current_instance: Some(owned_by_current_instance),
});
}
Ok(results)
}
#[tauri::command]
pub async fn start_service(app: tauri::AppHandle, label: String) -> Result<(), String> {
let (running, pid) = current_gateway_runtime(&label).await;
if running {
ensure_owned_gateway_or_err(pid)?;
write_gateway_owner(pid)?;
guardian_mark_manual_start();
return Ok(());
}
guardian_mark_manual_start();
start_service_impl_internal(&label, Some(&app)).await
}
#[tauri::command]
pub async fn stop_service(label: String) -> Result<(), String> {
let (running, pid) = current_gateway_runtime(&label).await;
if running {
ensure_owned_gateway_or_err(pid)?;
}
guardian_mark_manual_stop();
stop_service_impl_internal(&label).await
}
#[tauri::command]
pub async fn restart_service(app: tauri::AppHandle, label: String) -> Result<(), String> {
let (running, pid) = current_gateway_runtime(&label).await;
if running {
ensure_owned_gateway_or_err(pid)?;
}
guardian_pause("manual restart");
guardian_mark_manual_start();
let result = restart_service_impl_internal(&label, Some(&app)).await;
guardian_resume("manual restart");
result
}
/// 认领外部 Gateway将 gateway-owner.json 强制覆写为当前面板实例签名
#[tauri::command]
pub async fn claim_gateway() -> Result<(), String> {
let (running, pid) = current_gateway_runtime("ai.openclaw.gateway").await;
if !running {
return Err("Gateway 未运行,无需认领".into());
}
write_gateway_owner(pid)?;
Ok(())
}
/// 轻量 TCP 端口探测:检测 Gateway 端口是否可连通(用于 WS 连接前的就绪等待)
#[tauri::command]
pub async fn probe_gateway_port() -> bool {
let port = crate::commands::gateway_listen_port();
let addr = format!("127.0.0.1:{port}");
tokio::net::TcpStream::connect(&addr).await.is_ok()
}