Compare commits

..

6 Commits

Author SHA1 Message Date
Awuqing
1e386e1205 fix: route sms otp through webhook notifier 2026-04-25 22:09:26 +08:00
Awuqing
6e7a884c64 fix: annotate validated sms webhook request 2026-04-25 21:57:52 +08:00
Awuqing
0b2263086f fix: harden sms webhook target validation 2026-04-25 21:50:20 +08:00
Awuqing
2f494818cf fix: store trusted device token in httponly cookie 2026-04-25 21:36:08 +08:00
Awuqing
7dfd12254b feat: add complete MFA support 2026-04-25 21:14:39 +08:00
Awuqing
2997e971a6 docs: add community and sponsors pages with dynamic GitHub contributor integration 2026-04-25 18:50:54 +08:00
71 changed files with 487 additions and 4083 deletions

View File

@@ -116,15 +116,12 @@ jobs:
fi
cp deploy/nginx.conf "${ARCHIVE_NAME}/nginx.conf" 2>/dev/null || true
tar czf "${ARCHIVE_NAME}.tar.gz" "${ARCHIVE_NAME}"
cp "${ARCHIVE_NAME}.tar.gz" "backupx-${{ matrix.goos }}-${{ matrix.goarch }}.tar.gz"
- name: Upload to GitHub Release
uses: softprops/action-gh-release@v2
with:
tag_name: ${{ env.VERSION }}
files: |
backupx-${{ env.VERSION }}-${{ matrix.goos }}-${{ matrix.goarch }}.tar.gz
backupx-${{ matrix.goos }}-${{ matrix.goarch }}.tar.gz
files: backupx-${{ env.VERSION }}-${{ matrix.goos }}-${{ matrix.goarch }}.tar.gz
generate_release_notes: true
# ─── Job 3: Docker 多架构 → Docker Hub ───

View File

@@ -62,8 +62,6 @@ curl -LO https://github.com/Awuqing/BackupX/releases/latest/download/backupx-lin
tar xzf backupx-*.tar.gz && cd backupx-* && sudo ./install.sh
```
For ARM64 hosts, use `backupx-linux-arm64.tar.gz`. The archive contains `backupx`, `web/`, `config.example.yaml`, and `install.sh`; run `install.sh` from the extracted directory.
Open `http://your-server:8340`, create the admin account, then follow the [5-minute Quick Start](https://awuqing.github.io/BackupX/docs/getting-started/quick-start).
## Documentation

View File

@@ -62,8 +62,6 @@ curl -LO https://github.com/Awuqing/BackupX/releases/latest/download/backupx-lin
tar xzf backupx-*.tar.gz && cd backupx-* && sudo ./install.sh
```
ARM64 主机请下载 `backupx-linux-arm64.tar.gz`。预编译包内包含 `backupx``web/``config.example.yaml``install.sh`,请在解压后的目录内执行 `install.sh`
打开 `http://your-server:8340`,创建管理员账户,按 [5 分钟快速开始](https://awuqing.github.io/BackupX/zh-Hans/docs/getting-started/quick-start) 完成首次备份。
## 文档

View File

@@ -1,10 +1,6 @@
#!/bin/sh
set -e
if [ "${1:-}" = "agent" ]; then
exec /app/bin/backupx "$@"
fi
# Backend listens on internal port 8341, Nginx exposes 8340
export BACKUPX_SERVER_PORT="${BACKUPX_SERVER_PORT_INTERNAL:-8341}"

View File

@@ -1,25 +1,17 @@
#!/bin/sh
set -eu
SCRIPT_DIR=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)
PROJECT_ROOT=$(CDPATH= cd -- "$SCRIPT_DIR/.." && pwd)
PROJECT_ROOT=$(CDPATH= cd -- "$(dirname -- "$0")/.." && pwd)
PREFIX="${PREFIX:-/opt/backupx}"
ETC_DIR="${ETC_DIR:-/etc/backupx}"
SERVICE_NAME="backupx"
APP_USER="backupx"
APP_GROUP="backupx"
if [ -f "$SCRIPT_DIR/backupx" ] && [ -d "$SCRIPT_DIR/web" ]; then
BIN_SOURCE="${BIN_SOURCE:-$SCRIPT_DIR/backupx}"
WEB_SOURCE="${WEB_SOURCE:-$SCRIPT_DIR/web}"
CONFIG_TEMPLATE="${CONFIG_TEMPLATE:-$SCRIPT_DIR/config.example.yaml}"
NGINX_SOURCE="${NGINX_SOURCE:-$SCRIPT_DIR/nginx.conf}"
else
BIN_SOURCE="${BIN_SOURCE:-$PROJECT_ROOT/server/backupx}"
WEB_SOURCE="${WEB_SOURCE:-$PROJECT_ROOT/web/dist}"
CONFIG_TEMPLATE="${CONFIG_TEMPLATE:-$PROJECT_ROOT/server/config.example.yaml}"
NGINX_SOURCE="${NGINX_SOURCE:-$PROJECT_ROOT/deploy/nginx.conf}"
fi
BIN_SOURCE="${BIN_SOURCE:-$PROJECT_ROOT/server/backupx}"
WEB_SOURCE="${WEB_SOURCE:-$PROJECT_ROOT/web/dist}"
CONFIG_TEMPLATE="${CONFIG_TEMPLATE:-$PROJECT_ROOT/server/config.example.yaml}"
SERVICE_SOURCE="${SERVICE_SOURCE:-$PROJECT_ROOT/deploy/backupx.service}"
NGINX_SOURCE="${NGINX_SOURCE:-$PROJECT_ROOT/deploy/nginx.conf}"
if [ "$(id -u)" -ne 0 ]; then
echo "请使用 root 或 sudo 执行安装脚本。" >&2
@@ -28,20 +20,13 @@ fi
if [ ! -f "$BIN_SOURCE" ]; then
echo "未找到后端二进制:$BIN_SOURCE" >&2
echo "源码树安装请先执行cd \"$PROJECT_ROOT/server\" && go build -o backupx ./cmd/backupx" >&2
echo "发布包安装请确认当前目录包含 ./backupx、./web 和 ./install.sh。" >&2
echo "请先执行cd \"$PROJECT_ROOT/server\" && go build -o backupx ./cmd/backupx" >&2
exit 1
fi
if [ ! -d "$WEB_SOURCE" ]; then
echo "未找到前端构建产物:$WEB_SOURCE" >&2
echo "源码树安装请先执行cd \"$PROJECT_ROOT/web\" && npm run build" >&2
echo "发布包安装请确认当前目录包含 ./web。" >&2
exit 1
fi
if [ ! -f "$CONFIG_TEMPLATE" ]; then
echo "未找到配置模板:$CONFIG_TEMPLATE" >&2
echo "请先执行cd \"$PROJECT_ROOT/web\" && npm run build" >&2
exit 1
fi
@@ -62,34 +47,11 @@ if [ ! -f "$ETC_DIR/config.yaml" ]; then
install -m 0640 "$CONFIG_TEMPLATE" "$ETC_DIR/config.yaml"
fi
if [ -f "$SERVICE_SOURCE" ]; then
install -m 0644 "$SERVICE_SOURCE" "/etc/systemd/system/$SERVICE_NAME.service"
else
cat > "/etc/systemd/system/$SERVICE_NAME.service" <<UNIT
[Unit]
Description=BackupX API Service
After=network-online.target
Wants=network-online.target
[Service]
Type=simple
User=$APP_USER
Group=$APP_GROUP
WorkingDirectory=$PREFIX
ExecStart=$PREFIX/bin/backupx -config $ETC_DIR/config.yaml
Restart=on-failure
RestartSec=5
NoNewPrivileges=true
LimitNOFILE=65535
[Install]
WantedBy=multi-user.target
UNIT
fi
install -m 0644 "$SERVICE_SOURCE" "/etc/systemd/system/$SERVICE_NAME.service"
systemctl daemon-reload
systemctl enable --now "$SERVICE_NAME"
if [ -d "/etc/nginx/conf.d" ] && [ -f "$NGINX_SOURCE" ]; then
if [ -d "/etc/nginx/conf.d" ]; then
install -m 0644 "$NGINX_SOURCE" "/etc/nginx/conf.d/$SERVICE_NAME.conf"
if command -v nginx >/dev/null 2>&1; then
nginx -t

View File

@@ -22,8 +22,6 @@ services:
# - /home/user/data:/mnt/data:ro
environment:
- TZ=Asia/Shanghai
# 远程 Agent 需要通过公网或可路由地址连接 Master 时,取消注释并改成真实 URL
# - BACKUPX_SERVER_EXTERNAL_URL=https://backup.example.com
# 通过 BACKUPX_ 前缀环境变量覆盖配置:
# - BACKUPX_LOG_LEVEL=debug
# - BACKUPX_BACKUP_MAX_CONCURRENT=4

View File

@@ -25,19 +25,6 @@ The installer performs these steps automatically:
4. Installs `backupx.service` (systemd), enabled at boot
5. (Optional) installs an Nginx site file — see [Nginx Reverse Proxy](./nginx)
For multi-node clusters, edit `/etc/backupx/config.yaml` after installation and set the Master URL that remote Agents can reach:
```yaml
server:
external_url: "https://backup.example.com"
```
Restart BackupX after changing it:
```bash
sudo systemctl restart backupx
```
## From source
```bash

View File

@@ -15,14 +15,13 @@ server:
host: "0.0.0.0" # BACKUPX_SERVER_HOST
port: 8340 # BACKUPX_SERVER_PORT
mode: "release" # release | debug
external_url: "" # BACKUPX_SERVER_EXTERNAL_URL — public Master URL for Agent install scripts
database:
path: "./data/backupx.db" # BACKUPX_DATABASE_PATH — embedded SQLite
security:
jwt_secret: "" # BACKUPX_SECURITY_JWT_SECRET — auto-generated if empty
jwt_expire: "24h" # BACKUPX_SECURITY_JWT_EXPIRE
jwt_expires_in: "24h"
encryption_key: "" # AES-256-GCM key for storage config encryption
backup:
@@ -47,20 +46,7 @@ The environment wins when both file and env are set. All dot-paths become unders
| Config key | Env variable |
|------------|--------------|
| `server.port` | `BACKUPX_SERVER_PORT` |
| `server.external_url` | `BACKUPX_SERVER_EXTERNAL_URL` |
| `security.jwt_expire` | `BACKUPX_SECURITY_JWT_EXPIRE` |
| `log.level` | `BACKUPX_LOG_LEVEL` |
| `backup.max_concurrent` | `BACKUPX_BACKUP_MAX_CONCURRENT` |
| `backup.temp_dir` | `BACKUPX_BACKUP_TEMP_DIR` |
| `backup.bandwidth_limit` | `BACKUPX_BACKUP_BANDWIDTH_LIMIT` |
## Master external URL
Set `server.external_url` when BackupX is behind Docker, Nginx, a load balancer, or any reverse proxy whose internal Host is not reachable by remote Agents:
```yaml
server:
external_url: "https://backup.example.com"
```
This value is used when BackupX renders one-click Agent install scripts and docker-compose snippets. It must be reachable from every Agent host. Leave it empty only when `X-Forwarded-Proto` / `X-Forwarded-Host` are reliable and point to the same URL that Agents can access.

View File

@@ -25,8 +25,6 @@ services:
- /etc/nginx:/mnt/nginx-conf:ro
environment:
- TZ=Asia/Shanghai
# Required when remote Agents must connect through a public or routed URL:
# - BACKUPX_SERVER_EXTERNAL_URL=https://backup.example.com
- BACKUPX_LOG_LEVEL=info
- BACKUPX_BACKUP_MAX_CONCURRENT=2
@@ -44,17 +42,6 @@ docker compose up -d
To back up files from the host, mount them into the container. When creating a file-type task in the web UI, point the source path at the mount location (e.g. `/mnt/www`). Make sure the directory is visible inside the container.
## Multi-node clusters
When deploying Agents on other machines, set `BACKUPX_SERVER_EXTERNAL_URL` on the Master container to the URL that those Agents can reach:
```yaml
environment:
- BACKUPX_SERVER_EXTERNAL_URL=https://backup.example.com
```
Use an HTTPS URL if Agents cross untrusted networks. The generated one-click install scripts and docker-compose snippets use this value as `BACKUPX_AGENT_MASTER`.
## Environment variables
All configuration keys can be overridden with the `BACKUPX_` prefix:

View File

@@ -8,8 +8,6 @@ description: File, MySQL, PostgreSQL, SQLite and SAP HANA — what they back up
BackupX supports five built-in backup types. Type determines which runner executes the job.
When a task is routed to a remote Agent, the source tools and paths are resolved on that Agent host. Multi-target uploads are still tracked per storage target; if at least one target succeeds, the backup record is marked successful and the per-target result table shows partial failures.
## File / Directory
Tars (and optionally gzips) one or more filesystem paths.

View File

@@ -28,19 +28,6 @@ BackupX supports Master-Agent mode: backup tasks can be routed to specific nodes
## Walkthrough
### 0. Set the Master URL for production clusters
Before generating Agent install commands, make sure the Master URL shown to Agents is stable and reachable from every target host.
If BackupX runs behind Docker, Nginx, a load balancer, or an outer reverse proxy, configure `server.external_url` or `BACKUPX_SERVER_EXTERNAL_URL` on the Master:
```yaml title="config.yaml"
server:
external_url: "https://backup.example.com"
```
This URL is baked into systemd units, foreground commands, and docker-compose snippets. If it is wrong, Agents will install successfully but stay offline because they keep polling an internal or browser-only address.
### 1. Open the install wizard
In the Web Console → **Node Management****Add Node**. You'll see a three-step wizard.
@@ -62,8 +49,6 @@ The script runs automatically and:
5. Runs `systemctl enable --now backupx-agent`
6. Polls `/api/v1/agent/self` until the master confirms `status: online` (up to 30 s)
Docker mode uses the same `BACKUPX_AGENT_MASTER`, `BACKUPX_AGENT_TOKEN`, and `BACKUPX_AGENT_TEMP_DIR=/var/lib/backupx-agent/tmp` environment contract. After starting the container, the installer also probes `/api/v1/agent/self`; if the node does not come online, it prints `docker ps` and `docker logs --tail=100 backupx-agent` diagnostics before exiting non-zero.
If you choose the URL-based fallback command and `curl` prints HTML or the shell reports `Syntax error: newline unexpected`, the install URL is being served by the web console instead of the backend. Ensure either `/api/install/` or `/install/` is forwarded to the BackupX backend, or use the embedded command generated by the console.
Reruns are idempotent — to upgrade or re-provision, simply generate a new install command and run it again. The one-time install link expires after its TTL or after first consumption, whichever is sooner.
@@ -83,15 +68,9 @@ In the **Backup Tasks** page, pick the target node when creating the task. When
- Local (`nodeId=0`) → Master executes in-process
- Remote node → Master enqueues the command → Agent claims → Agent runs locally → uploads → reports back
The node table shows the Agent health and command queue state: pending/dispatched depth, running long commands, timeouts, oldest active command age, and the latest Agent-side error. The same queue depth, running-command, and timeout snapshots are exported as Prometheus metrics:
- `backupx_agent_command_queue_depth`
- `backupx_agent_command_running`
- `backupx_agent_command_timeout_total`
## Known limitations
- **Encrypted backups are Master-only** — the Agent doesn't hold Master's AES-256 key. Creating or updating a task with `encrypt: true` and a remote node or node pool is rejected up front
- **Encrypted backups don't work via Agent** — the Agent doesn't hold Master's AES-256 key. Tasks with `encrypt: true` will fail if routed to an Agent
- **Directory browser timeout** — remote dir listing is a synchronous RPC through the queue (15s default)
- **Dispatched command timeout** — claimed-but-unfinished commands are marked `timeout` after 10 minutes

View File

@@ -42,8 +42,6 @@ Go to **Backup Tasks → New**. Three steps:
2. **Source** — paths for file backup (multi-source supported), or connection info for databases
3. **Storage & policy** — pick target(s), compression, retention days, encryption on/off
For Agent-routed tasks, encryption must stay off because the Agent never receives the Master's encryption key. BackupX rejects remote-node or node-pool tasks with encryption enabled during create/update.
Save, then click **Run Now** to trigger a test. Live logs stream on the **Backup Records** page.
:::note

View File

@@ -25,19 +25,6 @@ sudo ./install.sh
4. 安装并启用 `backupx.service` systemd 单元
5. (可选)生成 Nginx 站点配置 — 参见 [Nginx 反向代理](./nginx)
如果要部署多节点集群,安装后请编辑 `/etc/backupx/config.yaml`,设置远程 Agent 可访问到的 Master URL
```yaml
server:
external_url: "https://backup.example.com"
```
修改后重启 BackupX
```bash
sudo systemctl restart backupx
```
## 从源码构建
```bash

View File

@@ -15,14 +15,13 @@ server:
host: "0.0.0.0" # BACKUPX_SERVER_HOST
port: 8340 # BACKUPX_SERVER_PORT
mode: "release" # release | debug
external_url: "" # BACKUPX_SERVER_EXTERNAL_URL — Agent 安装脚本使用的 Master 对外 URL
database:
path: "./data/backupx.db" # BACKUPX_DATABASE_PATH — 内嵌 SQLite
security:
jwt_secret: "" # BACKUPX_SECURITY_JWT_SECRET — 留空自动生成
jwt_expire: "24h" # BACKUPX_SECURITY_JWT_EXPIRE
jwt_expires_in: "24h"
encryption_key: "" # 用于加密存储配置的 AES-256-GCM 密钥
backup:
@@ -47,20 +46,7 @@ log:
| 配置项 | 环境变量 |
|--------|----------|
| `server.port` | `BACKUPX_SERVER_PORT` |
| `server.external_url` | `BACKUPX_SERVER_EXTERNAL_URL` |
| `security.jwt_expire` | `BACKUPX_SECURITY_JWT_EXPIRE` |
| `log.level` | `BACKUPX_LOG_LEVEL` |
| `backup.max_concurrent` | `BACKUPX_BACKUP_MAX_CONCURRENT` |
| `backup.temp_dir` | `BACKUPX_BACKUP_TEMP_DIR` |
| `backup.bandwidth_limit` | `BACKUPX_BACKUP_BANDWIDTH_LIMIT` |
## Master 对外 URL
当 BackupX 部署在 Docker、Nginx、负载均衡或多层反向代理后面且后端收到的内部 Host 不是远程 Agent 可访问地址时,请配置 `server.external_url`
```yaml
server:
external_url: "https://backup.example.com"
```
BackupX 会用这个地址渲染一键 Agent 安装脚本和 docker-compose 片段。该地址必须能被所有 Agent 主机访问。只有在 `X-Forwarded-Proto` / `X-Forwarded-Host` 可靠且正好指向 Agent 可访问地址时,才建议留空。

View File

@@ -25,8 +25,6 @@ services:
- /etc/nginx:/mnt/nginx-conf:ro
environment:
- TZ=Asia/Shanghai
# 远程 Agent 需要通过公网或可路由地址连接 Master 时必须配置:
# - BACKUPX_SERVER_EXTERNAL_URL=https://backup.example.com
- BACKUPX_LOG_LEVEL=info
- BACKUPX_BACKUP_MAX_CONCURRENT=2
@@ -44,17 +42,6 @@ docker compose up -d
想备份宿主机上的文件,需要将对应路径挂载进容器。在 Web UI 创建文件类型任务时,把源路径指向挂载后的容器内路径(如 `/mnt/www`)。
## 多节点集群
如果要在其他机器部署 Agent请在 Master 容器上设置 `BACKUPX_SERVER_EXTERNAL_URL`,值为所有 Agent 都能访问到的 URL
```yaml
environment:
- BACKUPX_SERVER_EXTERNAL_URL=https://backup.example.com
```
Agent 跨不可信网络访问时建议使用 HTTPS。控制台生成的一键安装脚本和 docker-compose 片段会把这个值写成 `BACKUPX_AGENT_MASTER`。
## 环境变量
所有配置项都可以通过 `BACKUPX_` 前缀环境变量覆盖:

View File

@@ -8,8 +8,6 @@ description: 文件、MySQL、PostgreSQL、SQLite 和 SAP HANA — 各自的能
BackupX 支持五种内置备份类型,类型决定了用哪个 runner 执行。
当任务路由到远程 Agent 时,源路径和外部工具都会在该 Agent 主机上解析。多存储目标上传仍会逐目标记录结果;只要至少一个目标上传成功,备份记录即为成功,详情中的目标结果表会展示部分失败。
## 文件 / 目录
打包(可选 gzip一个或多个文件系统路径。

View File

@@ -28,19 +28,6 @@ BackupX 支持 Master-Agent 模式:备份任务可以指定在哪个节点执
## 一键部署步骤
### 0. 为生产集群设置 Master 对外 URL
生成 Agent 安装命令前,请先确认 Master URL 对所有目标主机稳定可达。
如果 BackupX 部署在 Docker、Nginx、负载均衡或外层反向代理后面请在 Master 配置 `server.external_url` 或环境变量 `BACKUPX_SERVER_EXTERNAL_URL`
```yaml title="config.yaml"
server:
external_url: "https://backup.example.com"
```
该 URL 会写入 systemd 单元、前台运行命令和 docker-compose 片段。如果地址不正确Agent 可能安装成功但始终离线,因为它会持续轮询一个内网地址或仅浏览器可访问的地址。
### 1. 打开安装向导
Web 控制台 → **节点管理****添加节点**,打开三步向导:
@@ -62,8 +49,6 @@ Web 控制台 → **节点管理** → **添加节点**,打开三步向导:
5. 执行 `systemctl enable --now backupx-agent`
6. 轮询 `/api/v1/agent/self`,直到 Master 确认 `status: online`(最多 30 秒)
Docker 模式使用同一组环境变量约定:`BACKUPX_AGENT_MASTER`、`BACKUPX_AGENT_TOKEN` 和 `BACKUPX_AGENT_TEMP_DIR=/var/lib/backupx-agent/tmp`。容器启动后,安装脚本同样会探测 `/api/v1/agent/self`;如果节点没有上线,会输出 `docker ps` 与 `docker logs --tail=100 backupx-agent` 排查命令,并以非零状态退出。
如果使用 URL 备用命令时 `curl` 输出 HTML或 shell 报 `Syntax error: newline unexpected`,说明安装 URL 被 Web 控制台接管而不是转发到后端。需要确保 `/api/install/``/install/` 至少一个路径能转发到 BackupX 后端,或改用控制台生成的嵌入式命令。
脚本是幂等的:升级或重装只需重新生成一条安装命令再跑一次。一次性安装链接在 TTL 到期或被首次消费后立即作废。
@@ -83,15 +68,9 @@ Docker 模式使用同一组环境变量约定:`BACKUPX_AGENT_MASTER`、`BACKU
- 本机 / 未指定(`nodeId=0`Master 进程内直接执行
- 远程节点Master 写入命令队列 → Agent 拉取 → Agent 本地执行 → 上传 → 回报
节点列表会展示 Agent 健康与命令队列状态pending/dispatched 深度、运行中的长任务、超时数、最旧活跃命令年龄和最近 Agent 错误。同样的队列深度、运行中命令数和超时快照会导出为 Prometheus 指标:
- `backupx_agent_command_queue_depth`
- `backupx_agent_command_running`
- `backupx_agent_command_timeout_total`
## 已知限制
- **加密备份仅支持 Master 本机执行**Agent 不持有 Master 的 AES-256 密钥。创建或更新任务时,如果 `encrypt: true` 且选择了远程节点或节点池,会在入口直接拒绝
- **Agent 不支持加密备份**Agent 不持有 Master 的 AES-256 密钥。`encrypt: true` 的任务路由到 Agent 时会直接上报失败
- **目录浏览超时**:远程目录浏览通过命令队列做同步 RPC默认 15s 超时
- **派发命令超时**Agent 领取但未完成的命令超过 10 分钟会被置 `timeout`

View File

@@ -42,8 +42,6 @@ description: 部署 BackupX、添加存储目标、创建第一个备份任务
2. **源配置** — 文件备份选择源路径(支持多个),数据库备份填写连接信息
3. **存储与策略** — 选择存储目标(支持多个)、压缩策略、保留天数、是否加密
对于路由到 Agent 的任务,加密必须关闭,因为 Agent 不会拿到 Master 的加密密钥。BackupX 会在创建/更新阶段拒绝开启加密的远程节点或节点池任务。
保存后可点击 **立即执行** 测试,**备份记录** 页面实时查看执行日志。
:::note

View File

@@ -3,7 +3,6 @@ server:
host: "0.0.0.0"
port: 8340
mode: "release" # debug | release
external_url: "" # 可选Master 对 Agent 可达的 URL例如 https://backup.example.com
database:
path: "./data/backupx.db" # SQLite 数据库路径

View File

@@ -143,24 +143,13 @@ func (c *MasterClient) GetTaskSpec(ctx context.Context, taskID uint) (*TaskSpec,
// RecordUpdate 与 service.AgentRecordUpdate 对齐
type RecordUpdate struct {
Status string `json:"status,omitempty"`
FileName string `json:"fileName,omitempty"`
FileSize int64 `json:"fileSize,omitempty"`
Checksum string `json:"checksum,omitempty"`
StoragePath string `json:"storagePath,omitempty"`
StorageTargetID uint `json:"storageTargetId,omitempty"`
StorageUploadResults []StorageResultItem `json:"storageUploadResults,omitempty"`
ErrorMessage string `json:"errorMessage,omitempty"`
LogAppend string `json:"logAppend,omitempty"`
}
type StorageResultItem struct {
StorageTargetID uint `json:"storageTargetId"`
StorageTargetName string `json:"storageTargetName"`
Status string `json:"status"`
StoragePath string `json:"storagePath,omitempty"`
FileSize int64 `json:"fileSize,omitempty"`
Error string `json:"error,omitempty"`
Status string `json:"status,omitempty"`
FileName string `json:"fileName,omitempty"`
FileSize int64 `json:"fileSize,omitempty"`
Checksum string `json:"checksum,omitempty"`
StoragePath string `json:"storagePath,omitempty"`
ErrorMessage string `json:"errorMessage,omitempty"`
LogAppend string `json:"logAppend,omitempty"`
}
// UpdateRecord 上报备份记录的状态/日志

View File

@@ -26,7 +26,7 @@ type Config struct {
HeartbeatInterval string `yaml:"heartbeatInterval"`
// PollInterval 命令轮询间隔,默认 5s
PollInterval string `yaml:"pollInterval"`
// TempDir 备份临时目录,默认 /var/lib/backupx-agent/tmp
// TempDir 备份临时目录,默认 /tmp/backupx-agent
TempDir string `yaml:"tempDir"`
// InsecureSkipTLSVerify 测试环境允许跳过 TLS 证书校验
InsecureSkipTLSVerify bool `yaml:"insecureSkipTlsVerify"`
@@ -98,7 +98,7 @@ func applyConfigDefaults(cfg *Config) (*Config, error) {
cfg.PollInterval = "5s"
}
if cfg.TempDir == "" {
cfg.TempDir = "/var/lib/backupx-agent/tmp"
cfg.TempDir = "/tmp/backupx-agent"
}
cfg.Master = strings.TrimRight(strings.TrimSpace(cfg.Master), "/")
return cfg, nil

View File

@@ -50,7 +50,7 @@ func TestLoadConfigDefaults(t *testing.T) {
if cfg.HeartbeatInterval != "15s" || cfg.PollInterval != "5s" {
t.Errorf("default intervals not applied: %+v", cfg)
}
if cfg.TempDir != "/var/lib/backupx-agent/tmp" {
if cfg.TempDir != "/tmp/backupx-agent" {
t.Errorf("default tempdir: %q", cfg.TempDir)
}
}

View File

@@ -4,7 +4,6 @@ import (
"context"
"crypto/sha256"
"encoding/hex"
"encoding/json"
"fmt"
"io"
"os"
@@ -20,10 +19,10 @@ import (
// Executor 负责在 Agent 本地执行命令。
type Executor struct {
client *MasterClient
tempDir string
backupRegistry *backup.Registry
storageRegistry *storage.Registry
client *MasterClient
tempDir string
backupRegistry *backup.Registry
storageRegistry *storage.Registry
}
// NewExecutor 构造执行器。预先初始化 backup runner 与 storage registry。
@@ -60,11 +59,6 @@ func NewExecutor(client *MasterClient, tempDir string) *Executor {
// 注意Agent 当前不支持 Encrypt=true加密密钥不下发到 Agent避免密钥扩散
// 遇到启用加密的任务会向 Master 上报失败并返回错误。
func (e *Executor) ExecuteRunTask(ctx context.Context, taskID, recordID uint) error {
if err := e.ensureTempDir(); err != nil {
e.reportRecordFailure(ctx, recordID, err.Error())
return err
}
// 1) 拉取任务规格
spec, err := e.client.GetTaskSpec(ctx, taskID)
if err != nil {
@@ -80,6 +74,10 @@ func (e *Executor) ExecuteRunTask(ctx context.Context, taskID, recordID uint) er
// 2) 构造 backup.TaskSpec 并找对应 runner
startedAt := time.Now().UTC()
if err := os.MkdirAll(e.tempDir, 0o755); err != nil {
e.reportRecordFailure(ctx, recordID, fmt.Sprintf("创建临时目录失败: %v", err))
return err
}
backupSpec := buildBackupTaskSpec(spec, startedAt, e.tempDir)
runner, err := e.backupRegistry.Runner(backupSpec.Type)
if err != nil {
@@ -126,52 +124,22 @@ func (e *Executor) ExecuteRunTask(ctx context.Context, taskID, recordID uint) er
e.reportRecordFailure(ctx, recordID, "没有关联的存储目标")
return fmt.Errorf("no storage targets")
}
uploadResults := make([]StorageResultItem, 0, len(spec.StorageTargets))
selectedStorageTargetID := uint(0)
var uploadErrors []string
for _, target := range spec.StorageTargets {
if err := e.uploadToTarget(ctx, recordID, target, finalPath, storagePath, fileSize, spec.TaskID); err != nil {
uploadResults = append(uploadResults, StorageResultItem{
StorageTargetID: target.ID,
StorageTargetName: target.Name,
Status: "failed",
Error: err.Error(),
})
uploadErrors = append(uploadErrors, fmt.Sprintf("%s: %v", target.Name, err))
e.appendLog(ctx, recordID, fmt.Sprintf("[agent] 上传到存储目标 %s 失败: %v\n", target.Name, err))
continue
e.reportRecordFailure(ctx, recordID, fmt.Sprintf("上传到 %s 失败: %v", target.Name, err))
return err
}
if selectedStorageTargetID == 0 {
selectedStorageTargetID = target.ID
}
uploadResults = append(uploadResults, StorageResultItem{
StorageTargetID: target.ID,
StorageTargetName: target.Name,
Status: "success",
StoragePath: storagePath,
FileSize: fileSize,
})
e.appendLog(ctx, recordID, fmt.Sprintf("[agent] 已上传到存储目标 %s\n", target.Name))
}
if selectedStorageTargetID == 0 {
msg := strings.Join(uploadErrors, "; ")
if msg == "" {
msg = "所有存储目标上传均失败"
}
e.reportRecordFailureWithUploadResults(ctx, recordID, msg, uploadResults)
return fmt.Errorf("%s", msg)
}
// 6) 上报最终成功
return e.client.UpdateRecord(ctx, recordID, RecordUpdate{
Status: "success",
FileName: fileName,
FileSize: fileSize,
Checksum: checksum,
StoragePath: storagePath,
StorageTargetID: selectedStorageTargetID,
StorageUploadResults: uploadResults,
LogAppend: fmt.Sprintf("[agent] 任务完成,总计 %d 字节\n", fileSize),
Status: "success",
FileName: fileName,
FileSize: fileSize,
Checksum: checksum,
StoragePath: storagePath,
LogAppend: fmt.Sprintf("[agent] 任务完成,总计 %d 字节\n", fileSize),
})
}
@@ -207,22 +175,31 @@ func (e *Executor) appendLog(ctx context.Context, recordID uint, line string) {
// reportRecordFailure 上报失败状态
func (e *Executor) reportRecordFailure(ctx context.Context, recordID uint, msg string) {
e.reportRecordFailureWithUploadResults(ctx, recordID, msg, nil)
}
func (e *Executor) reportRecordFailureWithUploadResults(ctx context.Context, recordID uint, msg string, uploadResults []StorageResultItem) {
_ = e.client.UpdateRecord(ctx, recordID, RecordUpdate{
Status: "failed",
ErrorMessage: msg,
StorageUploadResults: uploadResults,
LogAppend: fmt.Sprintf("[agent] 错误: %s\n", msg),
Status: "failed",
ErrorMessage: msg,
LogAppend: fmt.Sprintf("[agent] 错误: %s\n", msg),
})
}
// buildBackupTaskSpec 把 AgentTaskSpec 转换为 backup.TaskSpec。
func buildBackupTaskSpec(spec *TaskSpec, startedAt time.Time, tempDir string) backup.TaskSpec {
sourcePaths := parseStringListField(spec.SourcePaths)
excludes := parseStringListField(spec.ExcludePatterns)
var sourcePaths []string
if strings.TrimSpace(spec.SourcePaths) != "" {
for _, p := range strings.Split(spec.SourcePaths, "\n") {
if p = strings.TrimSpace(p); p != "" {
sourcePaths = append(sourcePaths, p)
}
}
}
var excludes []string
if strings.TrimSpace(spec.ExcludePatterns) != "" {
for _, p := range strings.Split(spec.ExcludePatterns, "\n") {
if p = strings.TrimSpace(p); p != "" {
excludes = append(excludes, p)
}
}
}
return backup.TaskSpec{
ID: spec.TaskID,
Name: spec.Name,
@@ -245,37 +222,6 @@ func buildBackupTaskSpec(spec *TaskSpec, startedAt time.Time, tempDir string) ba
}
}
func (e *Executor) ensureTempDir() error {
if err := os.MkdirAll(e.tempDir, 0o755); err != nil {
return fmt.Errorf("create agent temp dir: %w", err)
}
return nil
}
func parseStringListField(value string) []string {
trimmed := strings.TrimSpace(value)
if trimmed == "" || trimmed == "[]" {
return nil
}
var jsonItems []string
if err := json.Unmarshal([]byte(trimmed), &jsonItems); err == nil {
return compactStringList(jsonItems)
}
return compactStringList(strings.FieldsFunc(trimmed, func(r rune) bool {
return r == '\n' || r == '\r'
}))
}
func compactStringList(items []string) []string {
result := make([]string, 0, len(items))
for _, item := range items {
if trimmed := strings.TrimSpace(item); trimmed != "" {
result = append(result, trimmed)
}
}
return result
}
// recordLogger 把 runner 日志回传到 Master 记录。
// 实现 backup.LogWriter每条日志追加到 record.log_content。
type recordLogger struct {
@@ -294,8 +240,8 @@ func (l *recordLogger) WriteLine(message string) {
// restoreLogger 把 runner 日志回传到 Master 恢复记录。
type restoreLogger struct {
ctx context.Context
client *MasterClient
ctx context.Context
client *MasterClient
restoreID uint
}
@@ -324,11 +270,6 @@ func (e *Executor) DeleteStorageObject(ctx context.Context, targetType string, t
// - 执行backup.Registry.Runner(spec.Type).Restore
// - 上报:通过 UpdateRestorestatus/logAppend
func (e *Executor) ExecuteRestore(ctx context.Context, restoreRecordID uint) error {
if err := e.ensureTempDir(); err != nil {
e.reportRestoreFailure(ctx, restoreRecordID, err.Error())
return err
}
spec, err := e.client.GetRestoreSpec(ctx, restoreRecordID)
if err != nil {
e.reportRestoreFailure(ctx, restoreRecordID, fmt.Sprintf("拉取恢复规格失败: %v", err))
@@ -341,6 +282,10 @@ func (e *Executor) ExecuteRestore(ctx context.Context, restoreRecordID uint) err
}
e.appendRestoreLog(ctx, restoreRecordID, fmt.Sprintf("[agent] 开始恢复 %s (type=%s)\n", spec.TaskName, spec.Type))
if err := os.MkdirAll(e.tempDir, 0o755); err != nil {
e.reportRestoreFailure(ctx, restoreRecordID, fmt.Sprintf("创建临时目录失败: %v", err))
return err
}
tmpDir, err := os.MkdirTemp(e.tempDir, "restore-*")
if err != nil {
e.reportRestoreFailure(ctx, restoreRecordID, fmt.Sprintf("创建恢复临时目录失败: %v", err))

View File

@@ -1,233 +0,0 @@
package agent
import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"reflect"
"strings"
"testing"
"time"
"backupx/server/internal/storage"
)
func TestBuildBackupTaskSpecParsesJSONSourcePaths(t *testing.T) {
spec := &TaskSpec{
TaskID: 7,
Name: "root-files",
Type: "file",
SourcePaths: `["/root","/etc"]`,
ExcludePatterns: `["*.log","tmp"]`,
}
got := buildBackupTaskSpec(spec, time.Unix(0, 0), "/var/lib/backupx-agent/tmp")
if !reflect.DeepEqual(got.SourcePaths, []string{"/root", "/etc"}) {
t.Fatalf("source paths = %#v", got.SourcePaths)
}
if !reflect.DeepEqual(got.ExcludePatterns, []string{"*.log", "tmp"}) {
t.Fatalf("exclude patterns = %#v", got.ExcludePatterns)
}
}
func TestParseStringListFieldKeepsLegacyLineFormat(t *testing.T) {
got := parseStringListField("/root\n /etc \n")
want := []string{"/root", "/etc"}
if !reflect.DeepEqual(got, want) {
t.Fatalf("paths = %#v, want %#v", got, want)
}
}
func TestExecuteRunTaskRecordsPerTargetUploadResults(t *testing.T) {
sourceDir := t.TempDir()
if err := os.WriteFile(filepath.Join(sourceDir, "index.html"), []byte("hello"), 0o644); err != nil {
t.Fatalf("WriteFile returned error: %v", err)
}
var finalUpdate RecordUpdate
var updates []RecordUpdate
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch {
case r.Method == http.MethodGet && r.URL.Path == "/api/agent/tasks/1":
writeAgentEnvelope(t, w, TaskSpec{
TaskID: 1,
Name: "site",
Type: "file",
SourcePath: sourceDir,
Compression: "gzip",
StorageTargets: []StorageTargetConfig{
{ID: 11, Name: "broken", Type: "agent_test_storage", Config: json.RawMessage(`{"name":"broken"}`)},
{ID: 12, Name: "ok", Type: "agent_test_storage", Config: json.RawMessage(`{"name":"ok"}`)},
},
})
case r.Method == http.MethodPost && r.URL.Path == "/api/agent/records/99":
var update RecordUpdate
if err := json.NewDecoder(r.Body).Decode(&update); err != nil {
t.Fatalf("Decode update returned error: %v", err)
}
updates = append(updates, update)
if update.Status != "" {
finalUpdate = update
}
writeAgentEnvelope(t, w, map[string]string{"status": "ok"})
default:
http.NotFound(w, r)
}
}))
defer server.Close()
executor := NewExecutor(NewMasterClient(server.URL, "token", false), filepath.Join(t.TempDir(), "tmp"))
executor.storageRegistry = storage.NewRegistry(&agentTestStorageFactory{
providers: map[string]*agentTestStorageProvider{
"broken": {name: "broken", failUpload: true},
"ok": {name: "ok", objects: map[string][]byte{}},
},
})
if err := executor.ExecuteRunTask(context.Background(), 1, 99); err != nil {
t.Fatalf("ExecuteRunTask returned error: %v", err)
}
if len(updates) == 0 || finalUpdate.Status != "success" {
t.Fatalf("expected final success update, got updates=%#v final=%#v", updates, finalUpdate)
}
if finalUpdate.StorageTargetID != 12 {
t.Fatalf("expected first successful target 12, got %d", finalUpdate.StorageTargetID)
}
if len(finalUpdate.StorageUploadResults) != 2 {
t.Fatalf("expected two upload results, got %#v", finalUpdate.StorageUploadResults)
}
if finalUpdate.StorageUploadResults[0].Status != "failed" || finalUpdate.StorageUploadResults[1].Status != "success" {
t.Fatalf("unexpected upload results: %#v", finalUpdate.StorageUploadResults)
}
if finalUpdate.StoragePath == "" || finalUpdate.FileSize <= 0 || finalUpdate.Checksum == "" {
t.Fatalf("expected artifact metadata in final update, got %#v", finalUpdate)
}
}
func TestExecuteRunTaskReportsPerTargetUploadResultsWhenAllTargetsFail(t *testing.T) {
sourceDir := t.TempDir()
if err := os.WriteFile(filepath.Join(sourceDir, "index.html"), []byte("hello"), 0o644); err != nil {
t.Fatalf("WriteFile returned error: %v", err)
}
var finalUpdate RecordUpdate
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch {
case r.Method == http.MethodGet && r.URL.Path == "/api/agent/tasks/1":
writeAgentEnvelope(t, w, TaskSpec{
TaskID: 1,
Name: "site",
Type: "file",
SourcePath: sourceDir,
Compression: "gzip",
StorageTargets: []StorageTargetConfig{
{ID: 11, Name: "broken-a", Type: "agent_test_storage", Config: json.RawMessage(`{"name":"broken-a"}`)},
{ID: 12, Name: "broken-b", Type: "agent_test_storage", Config: json.RawMessage(`{"name":"broken-b"}`)},
},
})
case r.Method == http.MethodPost && r.URL.Path == "/api/agent/records/99":
var update RecordUpdate
if err := json.NewDecoder(r.Body).Decode(&update); err != nil {
t.Fatalf("Decode update returned error: %v", err)
}
if update.Status != "" {
finalUpdate = update
}
writeAgentEnvelope(t, w, map[string]string{"status": "ok"})
default:
http.NotFound(w, r)
}
}))
defer server.Close()
executor := NewExecutor(NewMasterClient(server.URL, "token", false), filepath.Join(t.TempDir(), "tmp"))
executor.storageRegistry = storage.NewRegistry(&agentTestStorageFactory{
providers: map[string]*agentTestStorageProvider{
"broken-a": {name: "broken-a", failUpload: true},
"broken-b": {name: "broken-b", failUpload: true},
},
})
if err := executor.ExecuteRunTask(context.Background(), 1, 99); err == nil {
t.Fatal("expected ExecuteRunTask to return upload failure")
}
if finalUpdate.Status != "failed" {
t.Fatalf("expected final failed update, got %#v", finalUpdate)
}
if len(finalUpdate.StorageUploadResults) != 2 {
t.Fatalf("expected failed update to keep per-target results, got %#v", finalUpdate.StorageUploadResults)
}
for _, item := range finalUpdate.StorageUploadResults {
if item.Status != "failed" || item.Error == "" {
t.Fatalf("unexpected upload result: %#v", item)
}
}
}
type agentTestStorageFactory struct {
providers map[string]*agentTestStorageProvider
}
func (f *agentTestStorageFactory) Type() storage.ProviderType {
return "agent_test_storage"
}
func (f *agentTestStorageFactory) New(_ context.Context, config map[string]any) (storage.StorageProvider, error) {
name, _ := config["name"].(string)
provider := f.providers[name]
if provider == nil {
return nil, fmt.Errorf("unknown provider %q", name)
}
return provider, nil
}
type agentTestStorageProvider struct {
name string
failUpload bool
objects map[string][]byte
}
func (p *agentTestStorageProvider) Type() storage.ProviderType { return "agent_test_storage" }
func (p *agentTestStorageProvider) TestConnection(context.Context) error {
return nil
}
func (p *agentTestStorageProvider) Upload(_ context.Context, objectKey string, reader io.Reader, _ int64, _ map[string]string) error {
if p.failUpload {
return fmt.Errorf("upload failed for %s", p.name)
}
data, err := io.ReadAll(reader)
if err != nil {
return err
}
if p.objects == nil {
p.objects = map[string][]byte{}
}
p.objects[objectKey] = data
return nil
}
func (p *agentTestStorageProvider) Download(_ context.Context, objectKey string) (io.ReadCloser, error) {
data, ok := p.objects[objectKey]
if !ok {
return nil, fmt.Errorf("object %s not found", objectKey)
}
return io.NopCloser(strings.NewReader(string(data))), nil
}
func (p *agentTestStorageProvider) Delete(_ context.Context, objectKey string) error {
delete(p.objects, objectKey)
return nil
}
func (p *agentTestStorageProvider) List(context.Context, string) ([]storage.ObjectInfo, error) {
return nil, nil
}
func writeAgentEnvelope(t *testing.T, w http.ResponseWriter, data any) {
t.Helper()
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(map[string]any{"code": "OK", "data": data}); err != nil {
t.Fatalf("Encode response returned error: %v", err)
}
}

View File

@@ -5,7 +5,6 @@ import (
"os"
"path/filepath"
"sort"
"strings"
)
// DirEntry Agent 返回给 Master 的目录项。
@@ -18,8 +17,8 @@ type DirEntry struct {
// listLocalDir 列出 Agent 所在机器的指定路径。
func listLocalDir(path string) ([]DirEntry, error) {
cleaned := filepath.Clean(strings.TrimSpace(path))
if strings.TrimSpace(path) == "" || cleaned == "." {
cleaned := filepath.Clean(path)
if cleaned == "" {
cleaned = "/"
}
entries, err := os.ReadDir(cleaned)

View File

@@ -36,21 +36,6 @@ func TestListLocalDir(t *testing.T) {
}
}
func TestListLocalDirEmptyPathUsesRoot(t *testing.T) {
entries, err := listLocalDir("")
if err != nil {
t.Fatalf("list root: %v", err)
}
if len(entries) == 0 {
t.Fatalf("expected root entries")
}
for _, entry := range entries {
if !filepath.IsAbs(entry.Path) {
t.Fatalf("entry path should be absolute: %+v", entry)
}
}
}
func TestSplitCommaOrNewline(t *testing.T) {
cases := []struct {
in string

View File

@@ -131,7 +131,6 @@ func New(ctx context.Context, cfg config.Config, version string) (*Application,
// Agent 协议服务:命令队列 + 任务下发 + 记录上报
agentCmdRepo := repository.NewAgentCommandRepository(db)
nodeService.SetAgentCommandRepository(agentCmdRepo)
agentService := service.NewAgentService(nodeRepo, backupTaskRepo, backupRecordRepo, storageTargetRepo, agentCmdRepo, configCipher)
agentService.SetRestoreRepository(restoreRecordRepo)
agentService.StartCommandTimeoutMonitor(ctx, 30*time.Second, 10*time.Minute)
@@ -241,7 +240,7 @@ func New(ctx context.Context, cfg config.Config, version string) (*Application,
replicationService.SetMetrics(appMetrics)
metricsCollector := metrics.NewCollector(
appMetrics,
metrics.NewRepoSource(storageTargetRepo, backupRecordRepo, nodeRepo, backupTaskRepo, agentCmdRepo),
metrics.NewRepoSource(storageTargetRepo, backupRecordRepo, nodeRepo, backupTaskRepo),
30*time.Second,
)
metricsCollector.Start(ctx)
@@ -277,7 +276,7 @@ func New(ctx context.Context, cfg config.Config, version string) (*Application,
UserRepository: userRepo,
SystemConfigRepo: systemConfigRepo,
InstallTokenService: installTokenService,
MasterExternalURL: cfg.Server.ExternalURL,
MasterExternalURL: "", // 如需覆盖 URL可扩展 cfg.Server 增字段;目前留空依赖 X-Forwarded-* / Request.Host
DB: db,
Metrics: appMetrics,
})

View File

@@ -24,9 +24,6 @@ func (r *fakeRecordRepository) List(context.Context, repository.BackupRecordList
func (r *fakeRecordRepository) FindByID(context.Context, uint) (*model.BackupRecord, error) {
return nil, nil
}
func (r *fakeRecordRepository) FindRunningByTaskAndNode(context.Context, uint, uint) (*model.BackupRecord, error) {
return nil, nil
}
func (r *fakeRecordRepository) Create(context.Context, *model.BackupRecord) error { return nil }
func (r *fakeRecordRepository) Update(context.Context, *model.BackupRecord) error { return nil }
func (r *fakeRecordRepository) Delete(_ context.Context, id uint) error {

View File

@@ -17,10 +17,9 @@ type Config struct {
}
type ServerConfig struct {
Host string `mapstructure:"host"`
Port int `mapstructure:"port"`
Mode string `mapstructure:"mode"`
ExternalURL string `mapstructure:"external_url"`
Host string `mapstructure:"host"`
Port int `mapstructure:"port"`
Mode string `mapstructure:"mode"`
}
type DatabaseConfig struct {
@@ -137,7 +136,6 @@ func applyDefaults(v *viper.Viper) {
v.SetDefault("server.host", "0.0.0.0")
v.SetDefault("server.port", 8340)
v.SetDefault("server.mode", "release")
v.SetDefault("server.external_url", "")
v.SetDefault("database.path", "./data/backupx.db")
v.SetDefault("security.jwt_expire", "24h")
v.SetDefault("backup.temp_dir", "/tmp/backupx")

View File

@@ -1,10 +1,6 @@
package config
import (
"os"
"path/filepath"
"testing"
)
import "testing"
func TestLoadUsesDefaultsWithoutConfigFile(t *testing.T) {
cfg, err := Load("")
@@ -22,33 +18,3 @@ func TestLoadUsesDefaultsWithoutConfigFile(t *testing.T) {
t.Fatalf("expected default database path, got %s", cfg.Database.Path)
}
}
func TestLoadReadsServerExternalURLFromFile(t *testing.T) {
configPath := filepath.Join(t.TempDir(), "config.yaml")
content := []byte("server:\n external_url: \"https://backup.example.com\"\n")
if err := os.WriteFile(configPath, content, 0o600); err != nil {
t.Fatalf("write config: %v", err)
}
cfg, err := Load(configPath)
if err != nil {
t.Fatalf("Load returned error: %v", err)
}
if cfg.Server.ExternalURL != "https://backup.example.com" {
t.Fatalf("expected external URL from config, got %q", cfg.Server.ExternalURL)
}
}
func TestLoadReadsServerExternalURLFromEnv(t *testing.T) {
t.Setenv("BACKUPX_SERVER_EXTERNAL_URL", "https://env-backup.example.com")
cfg, err := Load("")
if err != nil {
t.Fatalf("Load returned error: %v", err)
}
if cfg.Server.ExternalURL != "https://env-backup.example.com" {
t.Fatalf("expected external URL from env, got %q", cfg.Server.ExternalURL)
}
}

View File

@@ -25,14 +25,10 @@ import (
// setupInstallFlowRouter 构造一个 Node + Agent + InstallToken 全量依赖的 router
// 并返回已登录管理员 JWT。
func setupInstallFlowRouter(t *testing.T) (http.Handler, string) {
return setupInstallFlowRouterWithExternalURL(t, "")
}
func setupInstallFlowRouterWithExternalURL(t *testing.T, externalURL string) (http.Handler, string) {
t.Helper()
tempDir := t.TempDir()
cfg := config.Config{
Server: config.ServerConfig{Host: "127.0.0.1", Port: 8340, Mode: "test", ExternalURL: externalURL},
Server: config.ServerConfig{Host: "127.0.0.1", Port: 8340, Mode: "test"},
Database: config.DatabaseConfig{Path: filepath.Join(tempDir, "backupx.db")},
Security: config.SecurityConfig{JWTExpire: "24h"},
Log: config.LogConfig{Level: "error"},
@@ -72,6 +68,9 @@ func setupInstallFlowRouterWithExternalURL(t *testing.T, externalURL string) (ht
installTokenRepo := repository.NewAgentInstallTokenRepository(db)
installTokenSvc := service.NewInstallTokenService(installTokenRepo, nodeRepo)
auditLogRepo := repository.NewAuditLogRepository(db)
auditSvc := service.NewAuditService(auditLogRepo)
// 用 cancelable ctx测试结束时停掉 handler 启动的后台 GC 协程,
// 避免 goroutine 持有 map 导致 tempdir 清理失败。
ctx, cancel := context.WithCancel(context.Background())
@@ -86,7 +85,7 @@ func setupInstallFlowRouterWithExternalURL(t *testing.T, externalURL string) (ht
SystemService: systemSvc,
NodeService: nodeSvc,
InstallTokenService: installTokenSvc,
MasterExternalURL: cfg.Server.ExternalURL,
AuditService: auditSvc,
JWTManager: jwtMgr,
UserRepository: userRepo,
SystemConfigRepo: systemConfigRepo,
@@ -115,73 +114,6 @@ func setupInstallFlowRouterWithExternalURL(t *testing.T, externalURL string) (ht
return router, setupResp.Data.Token
}
func TestInstallTokenUsesConfiguredExternalURL(t *testing.T) {
const externalURL = "https://public.example.com/base"
router, jwt := setupInstallFlowRouterWithExternalURL(t, externalURL)
batchBody, _ := json.Marshal(map[string][]string{"names": {"external-url-node"}})
batchReq := httptest.NewRequest(http.MethodPost, "/api/nodes/batch", bytes.NewBuffer(batchBody))
batchReq.Header.Set("Content-Type", "application/json")
batchReq.Header.Set("Authorization", "Bearer "+jwt)
batchRec := httptest.NewRecorder()
router.ServeHTTP(batchRec, batchReq)
if batchRec.Code != 200 {
t.Fatalf("batch create failed: %d %s", batchRec.Code, batchRec.Body.String())
}
var batchResp struct {
Data []struct {
ID uint `json:"id"`
} `json:"data"`
}
if err := json.Unmarshal(batchRec.Body.Bytes(), &batchResp); err != nil {
t.Fatalf("unmarshal batch: %v", err)
}
if len(batchResp.Data) != 1 {
t.Fatalf("expected 1 node, got %d", len(batchResp.Data))
}
genBody, _ := json.Marshal(map[string]any{
"mode": "systemd",
"arch": "auto",
"agentVersion": "v1.7.0",
"downloadSrc": "github",
"ttlSeconds": 900,
})
genReq := httptest.NewRequest(http.MethodPost,
"/api/nodes/"+formatUint(batchResp.Data[0].ID)+"/install-tokens", bytes.NewBuffer(genBody))
genReq.Header.Set("Content-Type", "application/json")
genReq.Header.Set("Authorization", "Bearer "+jwt)
genRec := httptest.NewRecorder()
router.ServeHTTP(genRec, genReq)
if genRec.Code != 200 {
t.Fatalf("install-tokens failed: %d %s", genRec.Code, genRec.Body.String())
}
var genResp struct {
Data struct {
InstallToken string `json:"installToken"`
URL string `json:"url"`
FallbackURL string `json:"fallbackUrl"`
ScriptBase64 string `json:"scriptBase64"`
} `json:"data"`
}
if err := json.Unmarshal(genRec.Body.Bytes(), &genResp); err != nil {
t.Fatalf("unmarshal gen: %v", err)
}
if genResp.Data.URL != externalURL+"/api/install/"+genResp.Data.InstallToken {
t.Fatalf("url should use external URL, got %q", genResp.Data.URL)
}
if genResp.Data.FallbackURL != externalURL+"/install/"+genResp.Data.InstallToken {
t.Fatalf("fallbackUrl should use external URL, got %q", genResp.Data.FallbackURL)
}
decodedScript, err := base64.StdEncoding.DecodeString(genResp.Data.ScriptBase64)
if err != nil {
t.Fatalf("scriptBase64 should be valid base64: %v", err)
}
if !strings.Contains(string(decodedScript), `MASTER_URL="`+externalURL+`"`) {
t.Fatalf("script should use external MASTER_URL:\n%s", string(decodedScript))
}
}
func TestOneClickInstallFlow(t *testing.T) {
router, jwt := setupInstallFlowRouter(t)
@@ -496,76 +428,6 @@ func TestInstallFlowComposeModeMismatch(t *testing.T) {
}
}
func TestInstallFlowComposeSuccessConsumesToken(t *testing.T) {
router, jwt := setupInstallFlowRouter(t)
batchBody, _ := json.Marshal(map[string][]string{"names": {"compose-ok"}})
batchReq := httptest.NewRequest(http.MethodPost, "/api/nodes/batch", bytes.NewBuffer(batchBody))
batchReq.Header.Set("Content-Type", "application/json")
batchReq.Header.Set("Authorization", "Bearer "+jwt)
batchRec := httptest.NewRecorder()
router.ServeHTTP(batchRec, batchReq)
if batchRec.Code != 200 {
t.Fatalf("batch create failed: %d %s", batchRec.Code, batchRec.Body.String())
}
var batchResp struct {
Data []struct {
ID uint `json:"id"`
} `json:"data"`
}
if err := json.Unmarshal(batchRec.Body.Bytes(), &batchResp); err != nil {
t.Fatalf("unmarshal batch: %v", err)
}
if len(batchResp.Data) != 1 {
t.Fatalf("expected 1 node, got %d", len(batchResp.Data))
}
genBody, _ := json.Marshal(map[string]any{
"mode": "docker",
"arch": "auto",
"agentVersion": "v1.7.0",
"downloadSrc": "github",
"ttlSeconds": 900,
})
genReq := httptest.NewRequest(http.MethodPost,
"/api/nodes/"+formatUint(batchResp.Data[0].ID)+"/install-tokens", bytes.NewBuffer(genBody))
genReq.Header.Set("Content-Type", "application/json")
genReq.Header.Set("Authorization", "Bearer "+jwt)
genRec := httptest.NewRecorder()
router.ServeHTTP(genRec, genReq)
if genRec.Code != 200 {
t.Fatalf("install-tokens failed: %d %s", genRec.Code, genRec.Body.String())
}
var genResp struct {
Data struct {
InstallToken string `json:"installToken"`
} `json:"data"`
}
if err := json.Unmarshal(genRec.Body.Bytes(), &genResp); err != nil {
t.Fatalf("unmarshal gen: %v", err)
}
if genResp.Data.InstallToken == "" {
t.Fatalf("missing installToken")
}
composeReq := httptest.NewRequest(http.MethodGet, "/api/install/"+genResp.Data.InstallToken+"/compose.yml", nil)
composeRec := httptest.NewRecorder()
router.ServeHTTP(composeRec, composeReq)
if composeRec.Code != 200 {
t.Fatalf("compose fetch failed: %d %s", composeRec.Code, composeRec.Body.String())
}
if !strings.Contains(composeRec.Body.String(), "BACKUPX_AGENT_TOKEN") {
t.Fatalf("compose missing token env:\n%s", composeRec.Body.String())
}
scriptReq := httptest.NewRequest(http.MethodGet, "/api/install/"+genResp.Data.InstallToken, nil)
scriptRec := httptest.NewRecorder()
router.ServeHTTP(scriptRec, scriptReq)
if scriptRec.Code != http.StatusGone {
t.Fatalf("script after compose should be 410, got %d: %s", scriptRec.Code, scriptRec.Body.String())
}
}
// formatUint 小工具uint → 十进制字符串(无需引入 strconv
func formatUint(u uint) string {
if u == 0 {

View File

@@ -1,6 +1,7 @@
package http
import (
"encoding/base64"
"fmt"
stdhttp "net/http"
"strconv"
@@ -244,17 +245,14 @@ func (h *NodeHandler) CreateInstallToken(c *gin.Context) {
input.TTLSeconds = 900
}
out, err := h.installTokenSvc.CreateCommand(c.Request.Context(), service.InstallCommandInput{
InstallTokenInput: service.InstallTokenInput{
NodeID: uint(id),
Mode: input.Mode,
Arch: input.Arch,
AgentVersion: input.AgentVersion,
DownloadSrc: input.DownloadSrc,
TTLSeconds: input.TTLSeconds,
CreatedByID: h.resolveCurrentUserID(c),
},
MasterURL: resolveMasterURL(c, h.externalURL),
out, err := h.installTokenSvc.Create(c.Request.Context(), service.InstallTokenInput{
NodeID: uint(id),
Mode: input.Mode,
Arch: input.Arch,
AgentVersion: input.AgentVersion,
DownloadSrc: input.DownloadSrc,
TTLSeconds: input.TTLSeconds,
CreatedByID: h.resolveCurrentUserID(c),
})
if err != nil {
response.Error(c, err)
@@ -264,6 +262,12 @@ func (h *NodeHandler) CreateInstallToken(c *gin.Context) {
fmt.Sprintf("%d", id), out.Node.Name,
fmt.Sprintf("生成 %s/%s install token TTL=%ds", input.Mode, input.Arch, input.TTLSeconds))
masterURL := resolveMasterURL(c, h.externalURL)
script, err := renderInstallScript(masterURL, out.Node, out.Record)
if err != nil {
response.Error(c, err)
return
}
// 使用 /api/install/... 而非 /install/... —— 让反向代理的 /api/ 转发规则
// 自动接管,避免 SPA fallback 把请求当成前端路由返回 index.htmlissue #46
// 同时返回 /install/... 备用地址,兼容会剥离 /api 前缀的外层反向代理。
@@ -272,11 +276,15 @@ func (h *NodeHandler) CreateInstallToken(c *gin.Context) {
body := gin.H{
"installToken": out.Token,
"expiresAt": out.ExpiresAt,
"url": out.URL,
"fallbackUrl": out.FallbackURL,
"scriptBase64": out.ScriptBase64,
"composeUrl": out.ComposeURL,
"fallbackComposeUrl": out.FallbackComposeURL,
"url": masterURL + "/api/install/" + out.Token,
"fallbackUrl": masterURL + "/install/" + out.Token,
"scriptBase64": base64.StdEncoding.EncodeToString([]byte(script)),
"composeUrl": "",
"fallbackComposeUrl": "",
}
if input.Mode == "docker" {
body["composeUrl"] = masterURL + "/api/install/" + out.Token + "/compose.yml"
body["fallbackComposeUrl"] = masterURL + "/install/" + out.Token + "/compose.yml"
}
response.Success(c, body)
}

View File

@@ -1,41 +0,0 @@
package installscript
import (
"os"
"os/exec"
"path/filepath"
"strings"
"testing"
)
func TestDeployInstallScriptSyntax(t *testing.T) {
scriptPath := filepath.Join("..", "..", "..", "deploy", "install.sh")
cmd := exec.Command("sh", "-n", scriptPath)
output, err := cmd.CombinedOutput()
if err != nil {
t.Fatalf("install.sh syntax invalid: %v\n%s", err, output)
}
}
func TestDeployInstallScriptSupportsReleasePackageLayout(t *testing.T) {
scriptPath := filepath.Join("..", "..", "..", "deploy", "install.sh")
data, err := os.ReadFile(scriptPath)
if err != nil {
t.Fatal(err)
}
script := string(data)
for _, want := range []string{
`SCRIPT_DIR=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)`,
`if [ -f "$SCRIPT_DIR/backupx" ] && [ -d "$SCRIPT_DIR/web" ]; then`,
`BIN_SOURCE="${BIN_SOURCE:-$SCRIPT_DIR/backupx}"`,
`WEB_SOURCE="${WEB_SOURCE:-$SCRIPT_DIR/web}"`,
`CONFIG_TEMPLATE="${CONFIG_TEMPLATE:-$SCRIPT_DIR/config.example.yaml}"`,
`发布包安装请确认当前目录包含 ./backupx、./web 和 ./install.sh。`,
`cat > "/etc/systemd/system/$SERVICE_NAME.service" <<UNIT`,
`if [ -d "/etc/nginx/conf.d" ] && [ -f "$NGINX_SOURCE" ]; then`,
} {
if !strings.Contains(script, want) {
t.Fatalf("install.sh missing %q", want)
}
}
}

View File

@@ -37,22 +37,19 @@ func TestRenderScriptBashBootstrap(t *testing.T) {
}
}
func TestRenderScriptUsesRootForBareMetalBackups(t *testing.T) {
func TestRenderScriptCreatesBackupXUserAndGroup(t *testing.T) {
got, err := RenderScript(testCtx)
if err != nil {
t.Fatalf("render err: %v", err)
}
for _, want := range []string{
"/var/lib/backupx-agent/tmp",
"install -d -m 0700 /var/lib/backupx-agent /var/lib/backupx-agent/tmp",
"getent group backupx",
"groupadd --system backupx",
"useradd --system --gid backupx",
"Group=backupx",
} {
if !strings.Contains(got, want) {
t.Errorf("script missing %q:\n%s", want, got)
}
}
for _, forbidden := range []string{"User=backupx", "Group=backupx", "NoNewPrivileges=true"} {
if strings.Contains(got, forbidden) {
t.Errorf("script should not contain %q for bare-metal backups:\n%s", forbidden, got)
}
}
}

View File

@@ -1,8 +1,6 @@
package installscript
import (
"os"
"path/filepath"
"strings"
"testing"
@@ -29,10 +27,8 @@ func TestRenderScriptSystemd(t *testing.T) {
mustContain := []string{
"BACKUPX_AGENT_MASTER=${MASTER_URL}",
`Environment="BACKUPX_AGENT_TOKEN=${AGENT_TOKEN}"`,
"/var/lib/backupx-agent/tmp",
"systemctl daemon-reload",
"systemctl enable --now backupx-agent",
"systemctl status backupx-agent",
"X-Agent-Token: ${AGENT_TOKEN}",
"MASTER_URL=\"https://master.example.com\"",
"AGENT_TOKEN=\"deadbeefcafebabe0123456789abcdef0123456789abcdef0123456789abcdef\"",
@@ -60,9 +56,6 @@ func TestRenderScriptForeground(t *testing.T) {
if !strings.Contains(got, `exec "${INSTALL_PREFIX}/backupx" agent`) {
t.Errorf("foreground script missing exec line:\n%s", got)
}
if !strings.Contains(got, "/var/lib/backupx-agent/tmp") {
t.Errorf("foreground script missing dedicated temp dir:\n%s", got)
}
if strings.Contains(got, "systemctl daemon-reload") {
t.Errorf("foreground script should not reference systemctl:\n%s", got)
}
@@ -81,44 +74,14 @@ func TestRenderScriptDocker(t *testing.T) {
if !strings.Contains(got, "docker run") {
t.Errorf("docker script missing `docker run`:\n%s", got)
}
if !strings.Contains(got, "/var/lib/backupx-agent:/var/lib/backupx-agent") {
t.Errorf("docker script missing agent data volume:\n%s", got)
}
if !strings.Contains(got, "awuqing/backupx:${AGENT_VERSION}") {
t.Errorf("docker script missing image tag reference:\n%s", got)
}
if !strings.Contains(got, `"awuqing/backupx:${AGENT_VERSION}" agent`) {
t.Errorf("docker script must start image in agent mode:\n%s", got)
}
if !strings.Contains(got, `-e "BACKUPX_AGENT_TEMP_DIR=/var/lib/backupx-agent/tmp"`) {
t.Errorf("docker script missing temp dir env:\n%s", got)
}
if !strings.Contains(got, `docker logs --tail=100 backupx-agent`) {
t.Errorf("docker script missing diagnostic log command:\n%s", got)
}
if !strings.Contains(got, `grep -q '"status":"online"'`) {
t.Errorf("docker script missing online probe:\n%s", got)
}
if strings.Contains(got, "systemctl daemon-reload") {
t.Errorf("docker script should not reference systemctl:\n%s", got)
}
}
func TestDockerEntrypointForwardsAgentSubcommand(t *testing.T) {
entrypointPath := filepath.Join("..", "..", "..", "deploy", "docker", "entrypoint.sh")
got, err := os.ReadFile(entrypointPath)
if err != nil {
t.Fatalf("read docker entrypoint: %v", err)
}
script := string(got)
if !strings.Contains(script, `"${1:-}" = "agent"`) {
t.Fatalf("entrypoint must detect the agent subcommand before starting server:\n%s", script)
}
if !strings.Contains(script, `exec /app/bin/backupx "$@"`) {
t.Fatalf("entrypoint must exec backupx with forwarded args:\n%s", script)
}
}
func TestRenderComposeYaml(t *testing.T) {
ctx := testCtx
ctx.Mode = model.InstallModeDocker
@@ -129,26 +92,17 @@ func TestRenderComposeYaml(t *testing.T) {
if !strings.Contains(got, "image: awuqing/backupx:v1.7.0") {
t.Errorf("compose missing image:\n%s", got)
}
if !strings.Contains(got, `command: ["agent"]`) {
t.Errorf("compose must start image in agent mode:\n%s", got)
}
if !strings.Contains(got, `BACKUPX_AGENT_TOKEN: "deadbeefcafebabe0123456789abcdef0123456789abcdef0123456789abcdef"`) {
t.Errorf("compose missing token env:\n%s", got)
}
if !strings.Contains(got, `BACKUPX_AGENT_TEMP_DIR: "/var/lib/backupx-agent/tmp"`) {
t.Errorf("compose missing temp dir env:\n%s", got)
}
if !strings.Contains(got, "/var/lib/backupx-agent:/var/lib/backupx-agent") {
t.Errorf("compose missing agent data volume:\n%s", got)
}
}
func TestRenderScriptRejectsInjectedMasterURL(t *testing.T) {
bad := []string{
"https://example.com\" other: inject", // 含引号和空格
"javascript:alert(1)", // scheme 非法
"https://example.com\n- privileged", // 含换行YAML 注入经典 payload
"", // 空
"javascript:alert(1)", // scheme 非法
"https://example.com\n- privileged", // 含换行YAML 注入经典 payload
"", // 空
}
for _, u := range bad {
ctx := testCtx
@@ -207,8 +161,8 @@ func TestDownloadBaseMapping(t *testing.T) {
func TestRenderScriptDefaultsApplied(t *testing.T) {
ctx := testCtx
ctx.InstallPrefix = "" // 应被默认为 /opt/backupx-agent
ctx.DownloadBase = "" // 应被默认为 github
ctx.InstallPrefix = "" // 应被默认为 /opt/backupx-agent
ctx.DownloadBase = "" // 应被默认为 github
got, err := RenderScript(ctx)
if err != nil {
t.Fatalf("render err: %v", err)

View File

@@ -9,6 +9,5 @@ services:
environment:
BACKUPX_AGENT_MASTER: "{{.MasterURL}}"
BACKUPX_AGENT_TOKEN: "{{.AgentToken}}"
BACKUPX_AGENT_TEMP_DIR: "/var/lib/backupx-agent/tmp"
volumes:
- /var/lib/backupx-agent:/var/lib/backupx-agent
- /var/lib/backupx-agent:/tmp/backupx-agent

View File

@@ -47,10 +47,30 @@ else
fi
tar xzf "$TMPDIR/pkg.tar.gz" -C "$TMPDIR"
# 4. 安装二进制 + 数据目录
# 4. 安装二进制 + 用户
echo "[2/4] 安装到 ${INSTALL_PREFIX}"
install -d -m 0755 "$INSTALL_PREFIX"
install -d -m 0700 /var/lib/backupx-agent /var/lib/backupx-agent/tmp
if ! getent group backupx >/dev/null 2>&1; then
if command -v groupadd >/dev/null 2>&1; then
groupadd --system backupx
elif command -v addgroup >/dev/null 2>&1; then
addgroup --system backupx
else
echo "需要 groupadd 或 addgroup 来创建 backupx 组" >&2
exit 1
fi
fi
if ! id backupx >/dev/null 2>&1; then
if command -v useradd >/dev/null 2>&1; then
useradd --system --gid backupx --home-dir "$INSTALL_PREFIX" --shell /usr/sbin/nologin backupx
elif command -v adduser >/dev/null 2>&1; then
adduser --system --ingroup backupx --home "$INSTALL_PREFIX" --shell /usr/sbin/nologin backupx
else
echo "需要 useradd 或 adduser 来创建 backupx 用户" >&2
exit 1
fi
fi
id backupx >/dev/null 2>&1 || { echo "backupx 用户创建失败" >&2; exit 1; }
install -d -o backupx -g backupx "$INSTALL_PREFIX" /var/lib/backupx-agent
install -m 0755 "$TMPDIR/backupx-${AGENT_VERSION}-linux-${ARCH}/backupx" "$INSTALL_PREFIX/backupx"
{{end}}
@@ -65,11 +85,14 @@ Wants=network-online.target
[Service]
Type=simple
User=backupx
Group=backupx
Environment="BACKUPX_AGENT_MASTER=${MASTER_URL}"
Environment="BACKUPX_AGENT_TOKEN=${AGENT_TOKEN}"
ExecStart=${INSTALL_PREFIX}/backupx agent --temp-dir /var/lib/backupx-agent/tmp
ExecStart=${INSTALL_PREFIX}/backupx agent --temp-dir /var/lib/backupx-agent
Restart=on-failure
RestartSec=10s
NoNewPrivileges=true
[Install]
WantedBy=multi-user.target
@@ -88,7 +111,6 @@ for i in $(seq 1 15); do
fi
done
echo "⚠ 30s 内未收到上线心跳,请检查防火墙或 journalctl -u backupx-agent"
echo "提示systemd 服务名是 backupx-agent可执行 systemctl status backupx-agent 查看状态。"
exit 2
{{end}}
@@ -97,7 +119,7 @@ exit 2
echo "[3/3] 前台启动 agentCtrl+C 退出)"
export BACKUPX_AGENT_MASTER="${MASTER_URL}"
export BACKUPX_AGENT_TOKEN="${AGENT_TOKEN}"
exec "${INSTALL_PREFIX}/backupx" agent --temp-dir /var/lib/backupx-agent/tmp
exec "${INSTALL_PREFIX}/backupx" agent --temp-dir /var/lib/backupx-agent
{{end}}
{{if eq .Mode "docker"}}
@@ -109,20 +131,7 @@ docker rm -f backupx-agent >/dev/null 2>&1 || true
docker run -d --name backupx-agent --restart=unless-stopped \
-e "BACKUPX_AGENT_MASTER=${MASTER_URL}" \
-e "BACKUPX_AGENT_TOKEN=${AGENT_TOKEN}" \
-e "BACKUPX_AGENT_TEMP_DIR=/var/lib/backupx-agent/tmp" \
-v /var/lib/backupx-agent:/var/lib/backupx-agent \
-v /var/lib/backupx-agent:/tmp/backupx-agent \
"awuqing/backupx:${AGENT_VERSION}" agent
echo "✓ 容器已启动,等待节点上线"
for i in $(seq 1 15); do
sleep 2
if curl -fsSL -H "X-Agent-Token: ${AGENT_TOKEN}" "${MASTER_URL}/api/v1/agent/self" 2>/dev/null \
| grep -q '"status":"online"'; then
echo "✓ 节点已上线"
exit 0
fi
done
echo "⚠ 30s 内未收到上线心跳,请检查容器状态、网络与 Master URL。"
echo "排查命令docker ps -a --filter name=backupx-agent"
echo "排查命令docker logs --tail=100 backupx-agent"
exit 2
echo "✓ 容器已启动"
{{end}}

View File

@@ -13,18 +13,16 @@ type SampleSource interface {
ListStorageTargets(ctx context.Context) ([]model.StorageTarget, error)
StorageUsage(ctx context.Context) ([]repository.BackupStorageUsageItem, error)
ListNodes(ctx context.Context) ([]model.Node, error)
AgentQueueSummaries(ctx context.Context) (map[uint]repository.AgentCommandQueueSummary, error)
CountSLABreach(ctx context.Context) (int, error)
}
// repoSource 把 repository 适配到 SampleSource。
type repoSource struct {
targets repository.StorageTargetRepository
records repository.BackupRecordRepository
nodes repository.NodeRepository
tasks repository.BackupTaskRepository
commands repository.AgentCommandRepository
now func() time.Time
targets repository.StorageTargetRepository
records repository.BackupRecordRepository
nodes repository.NodeRepository
tasks repository.BackupTaskRepository
now func() time.Time
}
// NewRepoSource 用仓储实例构造 SampleSource。
@@ -33,15 +31,13 @@ func NewRepoSource(
records repository.BackupRecordRepository,
nodes repository.NodeRepository,
tasks repository.BackupTaskRepository,
commands repository.AgentCommandRepository,
) SampleSource {
return &repoSource{
targets: targets,
records: records,
nodes: nodes,
tasks: tasks,
commands: commands,
now: func() time.Time { return time.Now().UTC() },
targets: targets,
records: records,
nodes: nodes,
tasks: tasks,
now: func() time.Time { return time.Now().UTC() },
}
}
@@ -57,13 +53,6 @@ func (s *repoSource) ListNodes(ctx context.Context) ([]model.Node, error) {
return s.nodes.List(ctx)
}
func (s *repoSource) AgentQueueSummaries(ctx context.Context) (map[uint]repository.AgentCommandQueueSummary, error) {
if s.commands == nil {
return nil, nil
}
return s.commands.NodeQueueSummaries(ctx)
}
// CountSLABreach 统计当前违反 RPO 的任务:
// - 任务启用且配置了 SLAHoursRPO > 0
// - 最近一次成功备份距今超出 SLA 时间窗,或从未成功过
@@ -147,9 +136,7 @@ func (c *Collector) collect(ctx context.Context) {
}
// 节点在线状态role 约定为 master / agent
if nodes, err := c.source.ListNodes(ctx); err == nil {
queueByNode, _ := c.source.AgentQueueSummaries(ctx)
c.metrics.ResetNodeOnline()
c.metrics.ResetAgentQueue()
for i := range nodes {
n := &nodes[i]
role := "agent"
@@ -157,8 +144,6 @@ func (c *Collector) collect(ctx context.Context) {
role = "master"
}
c.metrics.SetNodeOnline(n.Name, role, n.Status == model.NodeStatusOnline)
queue := queueByNode[n.ID]
c.metrics.SetAgentQueue(n.Name, role, queue.Depth, queue.Running, queue.Timeouts)
}
}
if breach, err := c.source.CountSLABreach(ctx); err == nil {

View File

@@ -31,12 +31,6 @@ type Metrics struct {
StorageUsedBytes *prometheus.GaugeVec
// 节点在线状态labels: node_name, rolevalue: 0/1
NodeOnline *prometheus.GaugeVec
// Agent 命令队列深度labels: node_name, role
AgentCommandQueueDepth *prometheus.GaugeVec
// Agent 正在执行的长命令数labels: node_name, role
AgentCommandRunning *prometheus.GaugeVec
// Agent 命令超时累计数快照labels: node_name, role
AgentCommandTimeoutTotal *prometheus.GaugeVec
// 验证演练结果labels: status
VerifyRunTotal *prometheus.CounterVec
// 恢复操作结果labels: status
@@ -84,18 +78,6 @@ func New(version string) *Metrics {
Name: "backupx_node_online",
Help: "集群节点在线状态1 在线 / 0 离线)",
}, []string{"node_name", "role"}),
AgentCommandQueueDepth: prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "backupx_agent_command_queue_depth",
Help: "Agent 当前 pending/dispatched 命令总数",
}, []string{"node_name", "role"}),
AgentCommandRunning: prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "backupx_agent_command_running",
Help: "Agent 当前正在执行的长命令数",
}, []string{"node_name", "role"}),
AgentCommandTimeoutTotal: prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "backupx_agent_command_timeout_total",
Help: "Agent 已超时命令数快照",
}, []string{"node_name", "role"}),
VerifyRunTotal: prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "backupx_verify_run_total",
Help: "备份验证演练执行总数",
@@ -124,9 +106,6 @@ func New(version string) *Metrics {
m.TaskRunningGauge,
m.StorageUsedBytes,
m.NodeOnline,
m.AgentCommandQueueDepth,
m.AgentCommandRunning,
m.AgentCommandTimeoutTotal,
m.VerifyRunTotal,
m.RestoreRunTotal,
m.ReplicationRunTotal,
@@ -229,24 +208,6 @@ func (m *Metrics) ResetNodeOnline() {
m.NodeOnline.Reset()
}
func (m *Metrics) SetAgentQueue(name, role string, depth, running, timeoutCount int) {
if m == nil {
return
}
m.AgentCommandQueueDepth.WithLabelValues(name, role).Set(float64(depth))
m.AgentCommandRunning.WithLabelValues(name, role).Set(float64(running))
m.AgentCommandTimeoutTotal.WithLabelValues(name, role).Set(float64(timeoutCount))
}
func (m *Metrics) ResetAgentQueue() {
if m == nil {
return
}
m.AgentCommandQueueDepth.Reset()
m.AgentCommandRunning.Reset()
m.AgentCommandTimeoutTotal.Reset()
}
// ResetStorageUsed 清空存储目标 gauge。
func (m *Metrics) ResetStorageUsed() {
if m == nil {

View File

@@ -41,11 +41,9 @@ func TestObserveTaskRun_NilReceiverIsSafe(t *testing.T) {
m.DecTaskRunning()
m.SetStorageUsed("a", "s3", 1)
m.SetNodeOnline("n1", "master", true)
m.SetAgentQueue("n1", "agent", 2, 1, 3)
m.SetSLABreach(3)
m.ResetNodeOnline()
m.ResetStorageUsed()
m.ResetAgentQueue()
// no panic -> pass
}
@@ -53,7 +51,6 @@ func TestHandler_ExposesBackupxMetrics(t *testing.T) {
m := New("0.0.0-test")
m.ObserveTaskRun("file", "success", 1.0, 2048)
m.SetNodeOnline("n1", "master", true)
m.SetAgentQueue("edge-a", "agent", 3, 1, 2)
m.SetSLABreach(1)
recorder := httptest.NewRecorder()
@@ -69,9 +66,6 @@ func TestHandler_ExposesBackupxMetrics(t *testing.T) {
"backupx_task_run_total",
"backupx_task_run_duration_seconds",
"backupx_node_online",
"backupx_agent_command_queue_depth",
"backupx_agent_command_running",
"backupx_agent_command_timeout_total",
"backupx_sla_breach_tasks",
"backupx_app_info",
} {

View File

@@ -17,46 +17,15 @@ type AgentCommandRepository interface {
// 并返回领取到的命令。无命令时返回 (nil, nil)。
ClaimPending(ctx context.Context, nodeID uint) (*model.AgentCommand, error)
Update(ctx context.Context, cmd *model.AgentCommand) error
// CompleteDispatched 只在命令仍处于 dispatched 时写入终态。
// 返回 false 表示命令已被超时监控或其它流程终结,调用方不应覆盖。
CompleteDispatched(ctx context.Context, cmd *model.AgentCommand) (bool, error)
// MarkStaleTimeout 把 dispatched 状态但超时未完成的命令标记为 timeout。
// 返回被标记的行数。不返回具体命令(供背景监控简单调用)。
MarkStaleTimeout(ctx context.Context, threshold time.Time) (int64, error)
// TimeoutActive 只在命令仍处于 pending/dispatched 时写入 timeout。
// 返回 false 表示命令已被 Agent 回写为终态,调用方不应覆盖。
TimeoutActive(ctx context.Context, cmd *model.AgentCommand) (bool, error)
// ListStaleDispatched 列出 dispatched 但已超时、尚未被标记的命令。
// 调用方需要把它们逐一标记 timeout 并联动关联记录状态。
ListStaleDispatched(ctx context.Context, threshold time.Time) ([]model.AgentCommand, error)
// ListStaleActive 列出 pending/dispatched 但已超时、尚未完成的命令。
// pending 使用 created_at 判定dispatched 使用 dispatched_at 判定。
ListStaleActive(ctx context.Context, threshold time.Time) ([]model.AgentCommand, error)
// ListPendingByNode 列出某节点下的所有 pending/dispatched 命令。
// 用于删除节点或节点离线时的清理。
ListPendingByNode(ctx context.Context, nodeID uint) ([]model.AgentCommand, error)
NodeQueueSummaries(ctx context.Context) (map[uint]AgentCommandQueueSummary, error)
}
type AgentCommandQueueSummary struct {
NodeID uint `json:"nodeId"`
Pending int `json:"pending"`
Dispatched int `json:"dispatched"`
Running int `json:"running"`
Depth int `json:"depth"`
Timeouts int `json:"timeouts"`
LastError string `json:"lastError,omitempty"`
OldestActiveAt *time.Time `json:"oldestActiveAt,omitempty"`
}
type agentCommandTimeoutCount struct {
NodeID uint
Count int
}
type agentCommandLastError struct {
NodeID uint
ErrorMessage string
}
type GormAgentCommandRepository struct {
@@ -125,21 +94,6 @@ func (r *GormAgentCommandRepository) Update(ctx context.Context, cmd *model.Agen
return r.db.WithContext(ctx).Save(cmd).Error
}
func (r *GormAgentCommandRepository) CompleteDispatched(ctx context.Context, cmd *model.AgentCommand) (bool, error) {
result := r.db.WithContext(ctx).Model(&model.AgentCommand{}).
Where("id = ? AND node_id = ? AND status = ?", cmd.ID, cmd.NodeID, model.AgentCommandStatusDispatched).
Updates(map[string]any{
"status": cmd.Status,
"error_message": cmd.ErrorMessage,
"result": cmd.Result,
"completed_at": cmd.CompletedAt,
})
if result.Error != nil {
return false, result.Error
}
return result.RowsAffected > 0, nil
}
func (r *GormAgentCommandRepository) MarkStaleTimeout(ctx context.Context, threshold time.Time) (int64, error) {
result := r.db.WithContext(ctx).Model(&model.AgentCommand{}).
Where("status = ? AND dispatched_at < ?", model.AgentCommandStatusDispatched, threshold).
@@ -153,20 +107,6 @@ func (r *GormAgentCommandRepository) MarkStaleTimeout(ctx context.Context, thres
return result.RowsAffected, nil
}
func (r *GormAgentCommandRepository) TimeoutActive(ctx context.Context, cmd *model.AgentCommand) (bool, error) {
result := r.db.WithContext(ctx).Model(&model.AgentCommand{}).
Where("id = ? AND status IN ?", cmd.ID, []string{model.AgentCommandStatusPending, model.AgentCommandStatusDispatched}).
Updates(map[string]any{
"status": model.AgentCommandStatusTimeout,
"error_message": cmd.ErrorMessage,
"completed_at": cmd.CompletedAt,
})
if result.Error != nil {
return false, result.Error
}
return result.RowsAffected > 0, nil
}
// ListStaleDispatched 列出 dispatched 但 dispatched_at 早于 threshold 的命令。
func (r *GormAgentCommandRepository) ListStaleDispatched(ctx context.Context, threshold time.Time) ([]model.AgentCommand, error) {
var items []model.AgentCommand
@@ -179,21 +119,6 @@ func (r *GormAgentCommandRepository) ListStaleDispatched(ctx context.Context, th
return items, nil
}
func (r *GormAgentCommandRepository) ListStaleActive(ctx context.Context, threshold time.Time) ([]model.AgentCommand, error) {
var items []model.AgentCommand
if err := r.db.WithContext(ctx).
Where(
"(status = ? AND created_at < ?) OR (status = ? AND dispatched_at < ?)",
model.AgentCommandStatusPending, threshold,
model.AgentCommandStatusDispatched, threshold,
).
Order("id asc").
Find(&items).Error; err != nil {
return nil, err
}
return items, nil
}
// ListPendingByNode 列出某节点下所有待执行pending 或 dispatched命令。
func (r *GormAgentCommandRepository) ListPendingByNode(ctx context.Context, nodeID uint) ([]model.AgentCommand, error) {
var items []model.AgentCommand
@@ -208,114 +133,3 @@ func (r *GormAgentCommandRepository) ListPendingByNode(ctx context.Context, node
}
return items, nil
}
func (r *GormAgentCommandRepository) NodeQueueSummaries(ctx context.Context) (map[uint]AgentCommandQueueSummary, error) {
summaries, err := r.activeQueueSummaries(ctx)
if err != nil {
return nil, err
}
if err := r.applyTerminalQueueStats(ctx, summaries); err != nil {
return nil, err
}
return summaries, nil
}
func (r *GormAgentCommandRepository) activeQueueSummaries(ctx context.Context) (map[uint]AgentCommandQueueSummary, error) {
var items []model.AgentCommand
if err := r.db.WithContext(ctx).
Where("status IN ?", []string{
model.AgentCommandStatusPending,
model.AgentCommandStatusDispatched,
}).
Order("node_id asc, id asc").
Find(&items).Error; err != nil {
return nil, err
}
summaries := make(map[uint]AgentCommandQueueSummary)
for i := range items {
cmd := &items[i]
summary := summaries[cmd.NodeID]
summary.NodeID = cmd.NodeID
switch cmd.Status {
case model.AgentCommandStatusPending:
summary.Pending++
summary.Depth++
summary.OldestActiveAt = oldestTime(summary.OldestActiveAt, &cmd.CreatedAt)
case model.AgentCommandStatusDispatched:
summary.Dispatched++
summary.Depth++
if isLongRunningAgentCommand(cmd.Type) {
summary.Running++
}
summary.OldestActiveAt = oldestTime(summary.OldestActiveAt, cmd.DispatchedAt)
}
summaries[cmd.NodeID] = summary
}
return summaries, nil
}
func (r *GormAgentCommandRepository) applyTerminalQueueStats(ctx context.Context, summaries map[uint]AgentCommandQueueSummary) error {
var timeoutCounts []agentCommandTimeoutCount
if err := r.db.WithContext(ctx).
Model(&model.AgentCommand{}).
Select("node_id, COUNT(*) AS count").
Where("status = ?", model.AgentCommandStatusTimeout).
Group("node_id").
Scan(&timeoutCounts).Error; err != nil {
return err
}
for _, item := range timeoutCounts {
summary := summaries[item.NodeID]
summary.NodeID = item.NodeID
summary.Timeouts = item.Count
summaries[item.NodeID] = summary
}
terminalStatuses := []string{
model.AgentCommandStatusFailed,
model.AgentCommandStatusTimeout,
}
latestByNode := r.db.WithContext(ctx).
Model(&model.AgentCommand{}).
Select("node_id, MAX(COALESCE(completed_at, updated_at, created_at)) AS last_error_at").
Where("status IN ? AND error_message <> ''", terminalStatuses).
Group("node_id")
var lastErrors []agentCommandLastError
if err := r.db.WithContext(ctx).
Table("agent_commands AS cmd").
Select("cmd.node_id, cmd.error_message").
Joins("JOIN (?) latest ON latest.node_id = cmd.node_id AND latest.last_error_at = COALESCE(cmd.completed_at, cmd.updated_at, cmd.created_at)", latestByNode).
Where("cmd.status IN ? AND cmd.error_message <> ''", terminalStatuses).
Order("cmd.node_id asc, cmd.id desc").
Scan(&lastErrors).Error; err != nil {
return err
}
seenLastError := make(map[uint]struct{}, len(lastErrors))
for _, item := range lastErrors {
if _, ok := seenLastError[item.NodeID]; ok {
continue
}
summary := summaries[item.NodeID]
summary.NodeID = item.NodeID
summary.LastError = item.ErrorMessage
summaries[item.NodeID] = summary
seenLastError[item.NodeID] = struct{}{}
}
return nil
}
func oldestTime(current *time.Time, candidate *time.Time) *time.Time {
if candidate == nil {
return current
}
if current == nil || candidate.Before(*current) {
value := *candidate
return &value
}
return current
}
func isLongRunningAgentCommand(commandType string) bool {
return commandType == model.AgentCommandTypeRunTask || commandType == model.AgentCommandTypeRestoreRecord
}

View File

@@ -90,78 +90,6 @@ func TestAgentCommandRepository_Update(t *testing.T) {
}
}
func TestAgentCommandRepository_CompleteDispatchedOnlyUpdatesDispatchedCommand(t *testing.T) {
db := newTestDB(t)
repo := NewAgentCommandRepository(db)
ctx := context.Background()
dispatched := &model.AgentCommand{NodeID: 1, Type: "run_task", Status: model.AgentCommandStatusDispatched}
timeout := &model.AgentCommand{NodeID: 1, Type: "run_task", Status: model.AgentCommandStatusTimeout, ErrorMessage: "timeout"}
if err := repo.Create(ctx, dispatched); err != nil {
t.Fatalf("Create dispatched returned error: %v", err)
}
if err := repo.Create(ctx, timeout); err != nil {
t.Fatalf("Create timeout returned error: %v", err)
}
now := time.Now().UTC()
dispatched.Status = model.AgentCommandStatusSucceeded
dispatched.Result = `{"ok":true}`
dispatched.CompletedAt = &now
updated, err := repo.CompleteDispatched(ctx, dispatched)
if err != nil {
t.Fatalf("CompleteDispatched returned error: %v", err)
}
if !updated {
t.Fatal("expected dispatched command to be updated")
}
timeout.Status = model.AgentCommandStatusSucceeded
timeout.Result = `{"late":true}`
timeout.CompletedAt = &now
updated, err = repo.CompleteDispatched(ctx, timeout)
if err != nil {
t.Fatalf("CompleteDispatched terminal returned error: %v", err)
}
if updated {
t.Fatal("expected terminal command not to be updated")
}
gotTimeout, err := repo.FindByID(ctx, timeout.ID)
if err != nil {
t.Fatalf("FindByID timeout returned error: %v", err)
}
if gotTimeout.Status != model.AgentCommandStatusTimeout || gotTimeout.Result != "" {
t.Fatalf("expected timeout command unchanged, got %#v", gotTimeout)
}
}
func TestAgentCommandRepository_TimeoutActiveDoesNotOverwriteTerminalCommand(t *testing.T) {
db := newTestDB(t)
repo := NewAgentCommandRepository(db)
ctx := context.Background()
succeeded := &model.AgentCommand{NodeID: 1, Type: "run_task", Status: model.AgentCommandStatusSucceeded, Result: `{"ok":true}`}
if err := repo.Create(ctx, succeeded); err != nil {
t.Fatalf("Create succeeded returned error: %v", err)
}
now := time.Now().UTC()
succeeded.ErrorMessage = "timeout"
succeeded.CompletedAt = &now
updated, err := repo.TimeoutActive(ctx, succeeded)
if err != nil {
t.Fatalf("TimeoutActive returned error: %v", err)
}
if updated {
t.Fatal("expected terminal command not to be timed out")
}
got, err := repo.FindByID(ctx, succeeded.ID)
if err != nil {
t.Fatalf("FindByID returned error: %v", err)
}
if got.Status != model.AgentCommandStatusSucceeded || got.ErrorMessage != "" || got.Result != `{"ok":true}` {
t.Fatalf("expected succeeded command unchanged, got %#v", got)
}
}
func TestAgentCommandRepository_MarkStaleTimeout(t *testing.T) {
db := newTestDB(t)
repo := NewAgentCommandRepository(db)
@@ -190,72 +118,3 @@ func TestAgentCommandRepository_MarkStaleTimeout(t *testing.T) {
t.Errorf("new should stay dispatched: %+v", newGot)
}
}
func TestAgentCommandRepository_ListStaleActiveIncludesPendingAndDispatched(t *testing.T) {
db := newTestDB(t)
repo := NewAgentCommandRepository(db)
ctx := context.Background()
old := time.Now().Add(-time.Hour)
recent := time.Now()
oldPending := &model.AgentCommand{NodeID: 1, Type: "run_task", Status: model.AgentCommandStatusPending, CreatedAt: old}
oldDispatched := &model.AgentCommand{NodeID: 1, Type: "restore_record", Status: model.AgentCommandStatusDispatched, DispatchedAt: &old}
recentPending := &model.AgentCommand{NodeID: 1, Type: "run_task", Status: model.AgentCommandStatusPending, CreatedAt: recent}
succeeded := &model.AgentCommand{NodeID: 1, Type: "run_task", Status: model.AgentCommandStatusSucceeded, CreatedAt: old}
for _, cmd := range []*model.AgentCommand{oldPending, oldDispatched, recentPending, succeeded} {
if err := repo.Create(ctx, cmd); err != nil {
t.Fatalf("Create returned error: %v", err)
}
}
items, err := repo.ListStaleActive(ctx, time.Now().Add(-30*time.Minute))
if err != nil {
t.Fatalf("ListStaleActive returned error: %v", err)
}
if len(items) != 2 {
t.Fatalf("expected 2 stale active commands, got %#v", items)
}
if items[0].ID != oldPending.ID || items[1].ID != oldDispatched.ID {
t.Fatalf("unexpected stale active order/items: %#v", items)
}
}
func TestAgentCommandRepository_NodeQueueSummaries(t *testing.T) {
db := newTestDB(t)
repo := NewAgentCommandRepository(db)
ctx := context.Background()
old := time.Now().UTC().Add(-20 * time.Minute)
recent := time.Now().UTC().Add(-2 * time.Minute)
dispatchedAt := time.Now().UTC().Add(-5 * time.Minute)
completedAt := time.Now().UTC().Add(-1 * time.Minute)
commands := []*model.AgentCommand{
{NodeID: 1, Type: model.AgentCommandTypeRunTask, Status: model.AgentCommandStatusPending, CreatedAt: old},
{NodeID: 1, Type: model.AgentCommandTypeRestoreRecord, Status: model.AgentCommandStatusPending, CreatedAt: recent},
{NodeID: 1, Type: model.AgentCommandTypeRunTask, Status: model.AgentCommandStatusDispatched, DispatchedAt: &dispatchedAt},
{NodeID: 1, Type: model.AgentCommandTypeRunTask, Status: model.AgentCommandStatusFailed, ErrorMessage: "boom", CompletedAt: &completedAt},
{NodeID: 1, Type: model.AgentCommandTypeRunTask, Status: model.AgentCommandStatusTimeout, ErrorMessage: "late", CompletedAt: &recent},
{NodeID: 2, Type: model.AgentCommandTypeRunTask, Status: model.AgentCommandStatusPending, CreatedAt: old},
}
for _, cmd := range commands {
if err := repo.Create(ctx, cmd); err != nil {
t.Fatalf("Create returned error: %v", err)
}
}
summaries, err := repo.NodeQueueSummaries(ctx)
if err != nil {
t.Fatalf("NodeQueueSummaries returned error: %v", err)
}
nodeOne := summaries[1]
if nodeOne.Pending != 2 || nodeOne.Dispatched != 1 || nodeOne.Running != 1 || nodeOne.Depth != 3 {
t.Fatalf("unexpected node 1 summary: %#v", nodeOne)
}
if nodeOne.Timeouts != 1 || nodeOne.LastError != "boom" {
t.Fatalf("expected terminal timeout and latest error in summary, got %#v", nodeOne)
}
if nodeOne.OldestActiveAt == nil || !nodeOne.OldestActiveAt.Equal(old) {
t.Fatalf("expected oldest active at %s, got %#v", old, nodeOne.OldestActiveAt)
}
if nodeTwo := summaries[2]; nodeTwo.Pending != 1 || nodeTwo.Depth != 1 || nodeTwo.Timeouts != 0 || nodeTwo.LastError != "" {
t.Fatalf("unexpected node 2 summary: %#v", nodeTwo)
}
}

View File

@@ -3,7 +3,6 @@ package repository
import (
"context"
"path/filepath"
"sync"
"testing"
"time"
@@ -84,59 +83,6 @@ func TestInstallTokenConsumeExpired(t *testing.T) {
}
}
func TestInstallTokenConsumeConcurrentOnlyOneWins(t *testing.T) {
db := openTestInstallTokenDB(t)
repo := NewAgentInstallTokenRepository(db)
ctx := context.Background()
tok := &model.AgentInstallToken{
Token: "concurrent", NodeID: 1, Mode: model.InstallModeSystemd,
Arch: model.InstallArchAuto, AgentVer: "v1.7.0",
DownloadSrc: model.InstallSourceGitHub,
ExpiresAt: time.Now().UTC().Add(15 * time.Minute),
CreatedByID: 1,
}
if err := repo.Create(ctx, tok); err != nil {
t.Fatalf("create: %v", err)
}
const workers = 8
var wg sync.WaitGroup
start := make(chan struct{})
results := make(chan *model.AgentInstallToken, workers)
errs := make(chan error, workers)
for i := 0; i < workers; i++ {
wg.Add(1)
go func() {
defer wg.Done()
<-start
got, err := repo.ConsumeByToken(ctx, "concurrent")
if err != nil {
errs <- err
return
}
results <- got
}()
}
close(start)
wg.Wait()
close(results)
close(errs)
for err := range errs {
t.Fatalf("consume err: %v", err)
}
success := 0
for got := range results {
if got != nil {
success++
}
}
if success != 1 {
t.Fatalf("expected exactly one successful consume, got %d", success)
}
}
func TestInstallTokenGC(t *testing.T) {
db := openTestInstallTokenDB(t)
repo := NewAgentInstallTokenRepository(db)

View File

@@ -33,7 +33,6 @@ type BackupStorageUsageItem struct {
type BackupRecordRepository interface {
List(context.Context, BackupRecordListOptions) ([]model.BackupRecord, error)
FindByID(context.Context, uint) (*model.BackupRecord, error)
FindRunningByTaskAndNode(context.Context, uint, uint) (*model.BackupRecord, error)
Create(context.Context, *model.BackupRecord) error
Update(context.Context, *model.BackupRecord) error
Delete(context.Context, uint) error
@@ -94,20 +93,6 @@ func (r *GormBackupRecordRepository) FindByID(ctx context.Context, id uint) (*mo
return &item, nil
}
func (r *GormBackupRecordRepository) FindRunningByTaskAndNode(ctx context.Context, taskID uint, nodeID uint) (*model.BackupRecord, error) {
var item model.BackupRecord
if err := r.db.WithContext(ctx).
Where("task_id = ? AND node_id = ? AND status = ?", taskID, nodeID, model.BackupRecordStatusRunning).
Order("id desc").
First(&item).Error; err != nil {
if errors.Is(err, gorm.ErrRecordNotFound) {
return nil, nil
}
return nil, err
}
return &item, nil
}
func (r *GormBackupRecordRepository) Create(ctx context.Context, item *model.BackupRecord) error {
return r.db.WithContext(ctx).Create(item).Error
}

View File

@@ -226,7 +226,7 @@ func (r *GormBackupTaskRepository) Create(ctx context.Context, item *model.Backu
}
func (r *GormBackupTaskRepository) Update(ctx context.Context, item *model.BackupTask) error {
if err := r.db.WithContext(ctx).Omit("StorageTarget", "StorageTargets", "Node").Save(item).Error; err != nil {
if err := r.db.WithContext(ctx).Save(item).Error; err != nil {
return err
}
if len(item.StorageTargets) > 0 {

View File

@@ -92,49 +92,3 @@ func TestBackupTaskRepositoryCRUD(t *testing.T) {
t.Fatalf("expected task deleted, got %#v", deleted)
}
}
func TestBackupTaskRepositoryUpdateCanClearNodeIDAfterPreload(t *testing.T) {
ctx := context.Background()
repo := newBackupTaskTestRepository(t)
remoteNode := &model.Node{Name: "edge-1", Token: "edge-token", Status: model.NodeStatusOnline, IsLocal: false}
if err := repo.db.WithContext(ctx).Create(remoteNode).Error; err != nil {
t.Fatalf("create node: %v", err)
}
task := &model.BackupTask{
Name: "pooled-source",
Type: "file",
Enabled: true,
SourcePath: "/srv/www/site",
StorageTargetID: 1,
NodeID: remoteNode.ID,
RetentionDays: 30,
Compression: "gzip",
MaxBackups: 10,
LastStatus: "idle",
}
if err := repo.Create(ctx, task); err != nil {
t.Fatalf("Create returned error: %v", err)
}
loaded, err := repo.FindByID(ctx, task.ID)
if err != nil {
t.Fatalf("FindByID returned error: %v", err)
}
if loaded == nil || loaded.Node.ID != remoteNode.ID {
t.Fatalf("expected preloaded node %d, got %#v", remoteNode.ID, loaded)
}
loaded.NodeID = 0
loaded.NodePoolTag = "db"
if err := repo.Update(ctx, loaded); err != nil {
t.Fatalf("Update returned error: %v", err)
}
stored, err := repo.FindByID(ctx, task.ID)
if err != nil {
t.Fatalf("FindByID after update returned error: %v", err)
}
if stored.NodeID != 0 {
t.Fatalf("expected NodeID to be cleared, got %d", stored.NodeID)
}
if stored.NodePoolTag != "db" {
t.Fatalf("expected NodePoolTag db, got %q", stored.NodePoolTag)
}
}

View File

@@ -45,7 +45,7 @@ type Service struct {
func NewService(tasks repository.BackupTaskRepository, runner TaskRunner, logger *zap.Logger) *Service {
parser := cron.NewParser(cron.SecondOptional | cron.Minute | cron.Hour | cron.Dom | cron.Month | cron.Dow | cron.Descriptor)
return &Service{
cron: cron.New(cron.WithParser(parser), cron.WithLocation(time.Local)),
cron: cron.New(cron.WithParser(parser), cron.WithLocation(time.UTC)),
tasks: tasks,
runner: runner,
logger: logger,

View File

@@ -68,37 +68,3 @@ func TestServiceSyncTaskAndTrigger(t *testing.T) {
t.Fatalf("expected scheduled runner to be triggered")
}
}
func TestServiceSchedulesTasksInLocalTimezone(t *testing.T) {
location, err := time.LoadLocation("Asia/Shanghai")
if err != nil {
t.Fatalf("LoadLocation returned error: %v", err)
}
originalLocal := time.Local
time.Local = location
t.Cleanup(func() {
time.Local = originalLocal
})
service := NewService(&fakeTaskRepository{}, &fakeRunner{}, nil)
if got := service.cron.Location(); got != location {
t.Fatalf("cron location = %v, want %v", got, location)
}
task := &model.BackupTask{ID: 1, Enabled: true, CronExpr: "0 5 * * *"}
if err := service.SyncTask(context.Background(), task); err != nil {
t.Fatalf("SyncTask returned error: %v", err)
}
entryID, ok := service.entries[task.ID]
if !ok {
t.Fatalf("expected cron entry for task %d", task.ID)
}
entry := service.cron.Entry(entryID)
now := time.Date(2026, 4, 30, 4, 0, 0, 0, location)
got := entry.Schedule.Next(now)
want := time.Date(2026, 4, 30, 5, 0, 0, 0, location)
if !got.Equal(want) {
t.Fatalf("next run = %s, want %s", got, want)
}
}

View File

@@ -118,8 +118,7 @@ func (s *AgentService) SubmitCommandResult(ctx context.Context, node *model.Node
cmd.Result = string(result.Result)
}
cmd.CompletedAt = &now
_, err = s.cmdRepo.CompleteDispatched(ctx, cmd)
return err
return s.cmdRepo.Update(ctx, cmd)
}
// AgentTaskSpec 给 Agent 返回的任务规格,包含解密后的存储配置,供 Agent 直接执行。
@@ -160,8 +159,8 @@ func (s *AgentService) GetTaskSpec(ctx context.Context, node *model.Node, taskID
if task == nil {
return nil, apperror.New(404, "BACKUP_TASK_NOT_FOUND", "任务不存在", nil)
}
if err := s.ensureTaskSpecAccess(ctx, node, task); err != nil {
return nil, err
if task.NodeID != node.ID {
return nil, apperror.Unauthorized("BACKUP_TASK_FORBIDDEN", "任务不属于当前节点", nil)
}
// 解密数据库密码(若有)
dbPassword := ""
@@ -214,31 +213,15 @@ func (s *AgentService) GetTaskSpec(ctx context.Context, node *model.Node, taskID
}, nil
}
func (s *AgentService) ensureTaskSpecAccess(ctx context.Context, node *model.Node, task *model.BackupTask) error {
if task.NodeID == node.ID {
return nil
}
record, err := s.recordRepo.FindRunningByTaskAndNode(ctx, task.ID, node.ID)
if err != nil {
return err
}
if record == nil {
return apperror.Unauthorized("BACKUP_TASK_FORBIDDEN", "任务不属于当前节点", nil)
}
return nil
}
// AgentRecordUpdate Agent 上报备份记录的最终状态。
type AgentRecordUpdate struct {
Status string `json:"status"` // running | success | failed
FileName string `json:"fileName,omitempty"`
FileSize int64 `json:"fileSize,omitempty"`
Checksum string `json:"checksum,omitempty"`
StoragePath string `json:"storagePath,omitempty"`
StorageTargetID uint `json:"storageTargetId,omitempty"`
StorageUploadResults []StorageUploadResultItem `json:"storageUploadResults,omitempty"`
ErrorMessage string `json:"errorMessage,omitempty"`
LogAppend string `json:"logAppend,omitempty"` // 增量日志,追加到 record.log_content
Status string `json:"status"` // running | success | failed
FileName string `json:"fileName,omitempty"`
FileSize int64 `json:"fileSize,omitempty"`
Checksum string `json:"checksum,omitempty"`
StoragePath string `json:"storagePath,omitempty"`
ErrorMessage string `json:"errorMessage,omitempty"`
LogAppend string `json:"logAppend,omitempty"` // 增量日志,追加到 record.log_content
}
// UpdateRecord 更新备份记录的状态/日志。Agent 在执行过程中可多次调用。
@@ -250,16 +233,14 @@ func (s *AgentService) UpdateRecord(ctx context.Context, node *model.Node, recor
if record == nil {
return apperror.New(404, "BACKUP_RECORD_NOT_FOUND", "记录不存在", nil)
}
// 通过 task.NodeID 判断是否属于当前 agent
task, err := s.taskRepo.FindByID(ctx, record.TaskID)
if err != nil {
return err
}
if task == nil || !recordBelongsToNode(record, task, node.ID) {
if task == nil || task.NodeID != node.ID {
return apperror.Unauthorized("BACKUP_RECORD_FORBIDDEN", "记录不属于当前节点", nil)
}
if isBackupRecordTerminal(record.Status) {
return nil
}
if update.Status != "" {
record.Status = update.Status
}
@@ -275,14 +256,6 @@ func (s *AgentService) UpdateRecord(ctx context.Context, node *model.Node, recor
if update.StoragePath != "" {
record.StoragePath = update.StoragePath
}
if update.StorageTargetID > 0 {
record.StorageTargetID = update.StorageTargetID
}
if len(update.StorageUploadResults) > 0 {
if resultsJSON, marshalErr := json.Marshal(update.StorageUploadResults); marshalErr == nil {
record.StorageUploadResults = string(resultsJSON)
}
}
if update.ErrorMessage != "" {
record.ErrorMessage = update.ErrorMessage
}
@@ -304,25 +277,11 @@ func (s *AgentService) UpdateRecord(ctx context.Context, node *model.Node, recor
// 同步更新任务的 last_status
if update.Status == model.BackupRecordStatusSuccess || update.Status == model.BackupRecordStatusFailed {
task.LastStatus = update.Status
task.LastRunAt = &record.StartedAt
if err := s.taskRepo.Update(ctx, task); err != nil {
return fmt.Errorf("update backup task summary: %w", err)
}
_ = s.taskRepo.Update(ctx, task)
}
return nil
}
func recordBelongsToNode(record *model.BackupRecord, task *model.BackupTask, nodeID uint) bool {
if record.NodeID != 0 {
return record.NodeID == nodeID
}
return task.NodeID == nodeID
}
func isBackupRecordTerminal(status string) bool {
return status == model.BackupRecordStatusSuccess || status == model.BackupRecordStatusFailed
}
// EnqueueCommand Master 端调用:给指定节点插入一条待执行命令。
// 返回命令 ID。
func (s *AgentService) EnqueueCommand(ctx context.Context, nodeID uint, cmdType string, payload any) (uint, error) {
@@ -397,84 +356,25 @@ func (s *AgentService) StartCommandTimeoutMonitor(ctx context.Context, interval
}()
}
// processStaleCommands 扫描已超时的 pending/dispatched 命令并联动关联记录。
// 流程:先取超时候选 → 条件式把命令置为 timeout → 对抢到的命令联动 backup/restore 记录。
// processStaleCommands 扫描已超时的 dispatched 命令并联动关联记录。
// 流程:先取超时候选 → 对每条联动 backup/restore 记录 → 把命令置为 timeout
// 单条失败不影响后续处理。
func (s *AgentService) processStaleCommands(ctx context.Context, threshold time.Time) {
commands, err := s.cmdRepo.ListStaleActive(ctx, threshold)
commands, err := s.cmdRepo.ListStaleDispatched(ctx, threshold)
if err != nil || len(commands) == 0 {
return
}
for i := range commands {
cmd := commands[i]
if s.commandStillActive(ctx, &cmd, threshold) {
continue
}
s.failLinkedRecord(ctx, &cmd)
now := time.Now().UTC()
cmd.Status = model.AgentCommandStatusTimeout
cmd.ErrorMessage = "agent did not report result before timeout"
cmd.CompletedAt = &now
timedOut, err := s.cmdRepo.TimeoutActive(ctx, &cmd)
if err != nil || !timedOut {
continue
}
s.failLinkedRecord(ctx, &cmd)
_ = s.cmdRepo.Update(ctx, &cmd)
}
}
// commandStillActive 用关联记录状态、记录更新时间和节点心跳作为长任务续租信号。
// 仅 run_task / restore_record 允许续租,避免短 RPC 命令被在线节点长期保留。
func (s *AgentService) commandStillActive(ctx context.Context, cmd *model.AgentCommand, threshold time.Time) bool {
if cmd.Status != model.AgentCommandStatusDispatched {
return false
}
switch cmd.Type {
case model.AgentCommandTypeRunTask:
var payload struct {
RecordID uint `json:"recordId"`
}
if err := json.Unmarshal([]byte(cmd.Payload), &payload); err != nil || payload.RecordID == 0 {
return false
}
record, err := s.recordRepo.FindByID(ctx, payload.RecordID)
if err != nil || record == nil || record.Status != model.BackupRecordStatusRunning {
return false
}
if s.nodeRecentlySeen(ctx, cmd.NodeID, threshold) {
return true
}
return record.UpdatedAt.After(threshold)
case model.AgentCommandTypeRestoreRecord:
if s.restoreRepo == nil {
return false
}
var payload struct {
RestoreRecordID uint `json:"restoreRecordId"`
}
if err := json.Unmarshal([]byte(cmd.Payload), &payload); err != nil || payload.RestoreRecordID == 0 {
return false
}
restore, err := s.restoreRepo.FindByID(ctx, payload.RestoreRecordID)
if err != nil || restore == nil || restore.Status != model.RestoreRecordStatusRunning {
return false
}
if s.nodeRecentlySeen(ctx, cmd.NodeID, threshold) {
return true
}
return restore.UpdatedAt.After(threshold)
default:
return false
}
}
func (s *AgentService) nodeRecentlySeen(ctx context.Context, nodeID uint, threshold time.Time) bool {
node, err := s.nodeRepo.FindByID(ctx, nodeID)
if err != nil || node == nil {
return false
}
return node.Status == model.NodeStatusOnline && node.LastSeen.After(threshold)
}
// failLinkedRecord 根据命令类型把关联记录标记为 failed。
// 只对仍然处于 running 状态的记录生效,避免覆盖已完成的结果。
func (s *AgentService) failLinkedRecord(ctx context.Context, cmd *model.AgentCommand) {

View File

@@ -1,654 +0,0 @@
package service
import (
"context"
"errors"
"path/filepath"
"strings"
"testing"
"time"
"backupx/server/internal/config"
"backupx/server/internal/database"
"backupx/server/internal/logger"
"backupx/server/internal/model"
"backupx/server/internal/repository"
"backupx/server/internal/storage/codec"
"gorm.io/gorm"
)
func newAgentServicePoolTestHarness(t *testing.T) (*AgentService, *gorm.DB, repository.BackupRecordRepository, repository.AgentCommandRepository, *model.Node, *model.Node) {
t.Helper()
log, err := logger.New(config.LogConfig{Level: "error"})
if err != nil {
t.Fatalf("logger.New returned error: %v", err)
}
db, err := database.Open(config.DatabaseConfig{Path: filepath.Join(t.TempDir(), "backupx.db")}, log)
if err != nil {
t.Fatalf("database.Open returned error: %v", err)
}
cipher := codec.NewConfigCipher("agent-service-secret")
nodeRepo := repository.NewNodeRepository(db)
taskRepo := repository.NewBackupTaskRepository(db)
recordRepo := repository.NewBackupRecordRepository(db)
storageRepo := repository.NewStorageTargetRepository(db)
cmdRepo := repository.NewAgentCommandRepository(db)
owner := &model.Node{Name: "edge-owner", Token: "owner-token", Status: model.NodeStatusOnline, IsLocal: false, LastSeen: time.Now().UTC()}
other := &model.Node{Name: "edge-other", Token: "other-token", Status: model.NodeStatusOnline, IsLocal: false, LastSeen: time.Now().UTC()}
if err := nodeRepo.Create(context.Background(), owner); err != nil {
t.Fatalf("create owner node: %v", err)
}
if err := nodeRepo.Create(context.Background(), other); err != nil {
t.Fatalf("create other node: %v", err)
}
targetConfig, err := cipher.EncryptJSON(map[string]any{"basePath": t.TempDir()})
if err != nil {
t.Fatalf("EncryptJSON returned error: %v", err)
}
target := &model.StorageTarget{Name: "local", Type: "local_disk", Enabled: true, ConfigCiphertext: targetConfig, ConfigVersion: 1, LastTestStatus: "unknown"}
if err := storageRepo.Create(context.Background(), target); err != nil {
t.Fatalf("create storage target: %v", err)
}
task := &model.BackupTask{
Name: "pooled-task",
Type: "file",
Enabled: true,
SourcePath: "/srv/data",
StorageTargetID: target.ID,
NodeID: 0,
NodePoolTag: "db",
RetentionDays: 30,
Compression: "gzip",
MaxBackups: 10,
LastStatus: "running",
}
if err := taskRepo.Create(context.Background(), task); err != nil {
t.Fatalf("create task: %v", err)
}
record := &model.BackupRecord{
TaskID: task.ID,
StorageTargetID: target.ID,
NodeID: owner.ID,
Status: model.BackupRecordStatusRunning,
StartedAt: time.Now().UTC(),
}
if err := recordRepo.Create(context.Background(), record); err != nil {
t.Fatalf("create record: %v", err)
}
return NewAgentService(nodeRepo, taskRepo, recordRepo, storageRepo, cmdRepo, cipher), db, recordRepo, cmdRepo, owner, other
}
func TestAgentServicePooledTaskUsesRecordNodeForSpecAndRecordUpdates(t *testing.T) {
svc, _, records, _, owner, other := newAgentServicePoolTestHarness(t)
ctx := context.Background()
spec, err := svc.GetTaskSpec(ctx, owner, 1)
if err != nil {
t.Fatalf("owner GetTaskSpec returned error: %v", err)
}
if spec.TaskID != 1 || len(spec.StorageTargets) != 1 {
t.Fatalf("unexpected spec: %#v", spec)
}
if _, err := svc.GetTaskSpec(ctx, other, 1); err == nil {
t.Fatal("expected non-owner node to be forbidden from pooled task spec")
}
if err := svc.UpdateRecord(ctx, owner, 1, AgentRecordUpdate{
Status: model.BackupRecordStatusSuccess,
FileName: "backup.tar.gz",
FileSize: 123,
StoragePath: "tasks/1/backup.tar.gz",
StorageTargetID: 2,
StorageUploadResults: []StorageUploadResultItem{
{StorageTargetID: 1, StorageTargetName: "first", Status: "failed", Error: "boom"},
{StorageTargetID: 2, StorageTargetName: "second", Status: "success", StoragePath: "tasks/1/backup.tar.gz", FileSize: 123},
},
}); err != nil {
t.Fatalf("owner UpdateRecord returned error: %v", err)
}
updated, err := records.FindByID(ctx, 1)
if err != nil {
t.Fatalf("FindByID returned error: %v", err)
}
if updated.Status != model.BackupRecordStatusSuccess || updated.NodeID != owner.ID {
t.Fatalf("unexpected updated record: %#v", updated)
}
if updated.StorageTargetID != 2 {
t.Fatalf("expected successful storage target id 2, got %d", updated.StorageTargetID)
}
if !strings.Contains(updated.StorageUploadResults, `"storageTargetName":"second"`) {
t.Fatalf("expected upload results to be persisted, got %q", updated.StorageUploadResults)
}
if err := svc.UpdateRecord(ctx, other, 1, AgentRecordUpdate{LogAppend: "bad"}); err == nil {
t.Fatal("expected non-owner node to be forbidden from record update")
}
}
func TestAgentServiceUpdateRecordRefreshesTaskSummaryOnTerminalStatus(t *testing.T) {
for _, status := range []string{model.BackupRecordStatusSuccess, model.BackupRecordStatusFailed} {
t.Run(status, func(t *testing.T) {
svc, _, records, _, owner, _ := newAgentServicePoolTestHarness(t)
ctx := context.Background()
record, err := records.FindByID(ctx, 1)
if err != nil {
t.Fatalf("FindByID record returned error: %v", err)
}
if err := svc.UpdateRecord(ctx, owner, record.ID, AgentRecordUpdate{Status: status}); err != nil {
t.Fatalf("UpdateRecord returned error: %v", err)
}
task, err := svc.taskRepo.FindByID(ctx, record.TaskID)
if err != nil {
t.Fatalf("FindByID task returned error: %v", err)
}
if task.LastStatus != status {
t.Fatalf("expected task LastStatus %q, got %q", status, task.LastStatus)
}
if task.LastRunAt == nil || !task.LastRunAt.Equal(record.StartedAt) {
t.Fatalf("expected task LastRunAt to match record startedAt %s, got %#v", record.StartedAt, task.LastRunAt)
}
})
}
}
func TestAgentServiceUpdateRecordReturnsTaskSummaryUpdateError(t *testing.T) {
svc, _, _, _, owner, _ := newAgentServicePoolTestHarness(t)
ctx := context.Background()
expectedErr := errors.New("task update failed")
svc.taskRepo = &failingUpdateTaskRepo{
BackupTaskRepository: svc.taskRepo,
err: expectedErr,
}
err := svc.UpdateRecord(ctx, owner, 1, AgentRecordUpdate{Status: model.BackupRecordStatusSuccess})
if !errors.Is(err, expectedErr) {
t.Fatalf("expected task update error %v, got %v", expectedErr, err)
}
}
func TestAgentServiceProcessStaleCommandsFailsPendingRunTaskRecord(t *testing.T) {
svc, _, records, commands, owner, _ := newAgentServicePoolTestHarness(t)
ctx := context.Background()
oldCommand := &model.AgentCommand{
NodeID: owner.ID,
Type: model.AgentCommandTypeRunTask,
Status: model.AgentCommandStatusPending,
Payload: `{"recordId":1}`,
CreatedAt: time.Now().UTC().Add(-time.Hour),
}
if err := commands.Create(ctx, oldCommand); err != nil {
t.Fatalf("Create command returned error: %v", err)
}
svc.processStaleCommands(ctx, time.Now().UTC().Add(-30*time.Minute))
updatedCommand, err := commands.FindByID(ctx, oldCommand.ID)
if err != nil {
t.Fatalf("FindByID command returned error: %v", err)
}
if updatedCommand.Status != model.AgentCommandStatusTimeout {
t.Fatalf("expected command timeout, got %#v", updatedCommand)
}
updatedRecord, err := records.FindByID(ctx, 1)
if err != nil {
t.Fatalf("FindByID record returned error: %v", err)
}
if updatedRecord.Status != model.BackupRecordStatusFailed {
t.Fatalf("expected record failed, got %#v", updatedRecord)
}
if updatedRecord.CompletedAt == nil {
t.Fatal("expected failed record completedAt to be set")
}
}
func TestAgentServiceProcessStaleCommandsFailsPendingRestoreRecord(t *testing.T) {
svc, db, _, commands, owner, _ := newAgentServicePoolTestHarness(t)
ctx := context.Background()
restoreRepo := repository.NewRestoreRecordRepository(db)
restore := &model.RestoreRecord{
BackupRecordID: 1,
TaskID: 1,
NodeID: owner.ID,
Status: model.RestoreRecordStatusRunning,
StartedAt: time.Now().UTC().Add(-time.Hour),
}
if err := restoreRepo.Create(ctx, restore); err != nil {
t.Fatalf("Create restore returned error: %v", err)
}
svc.SetRestoreRepository(restoreRepo)
oldCommand := &model.AgentCommand{
NodeID: owner.ID,
Type: model.AgentCommandTypeRestoreRecord,
Status: model.AgentCommandStatusPending,
Payload: `{"restoreRecordId":1}`,
CreatedAt: time.Now().UTC().Add(-time.Hour),
}
if err := commands.Create(ctx, oldCommand); err != nil {
t.Fatalf("Create command returned error: %v", err)
}
svc.processStaleCommands(ctx, time.Now().UTC().Add(-30*time.Minute))
updatedCommand, err := commands.FindByID(ctx, oldCommand.ID)
if err != nil {
t.Fatalf("FindByID command returned error: %v", err)
}
if updatedCommand.Status != model.AgentCommandStatusTimeout {
t.Fatalf("expected command timeout, got %#v", updatedCommand)
}
updatedRestore, err := restoreRepo.FindByID(ctx, restore.ID)
if err != nil {
t.Fatalf("FindByID restore returned error: %v", err)
}
if updatedRestore.Status != model.RestoreRecordStatusFailed {
t.Fatalf("expected restore failed, got %#v", updatedRestore)
}
if updatedRestore.CompletedAt == nil {
t.Fatal("expected failed restore completedAt to be set")
}
}
func TestAgentServiceProcessStaleCommandsKeepsActiveDispatchedRunTaskRecord(t *testing.T) {
svc, _, records, commands, owner, _ := newAgentServicePoolTestHarness(t)
ctx := context.Background()
dispatchedAt := time.Now().UTC().Add(-time.Hour)
oldCommand := &model.AgentCommand{
NodeID: owner.ID,
Type: model.AgentCommandTypeRunTask,
Status: model.AgentCommandStatusDispatched,
Payload: `{"recordId":1}`,
CreatedAt: dispatchedAt,
DispatchedAt: &dispatchedAt,
}
if err := commands.Create(ctx, oldCommand); err != nil {
t.Fatalf("Create command returned error: %v", err)
}
svc.processStaleCommands(ctx, time.Now().UTC().Add(-30*time.Minute))
updatedCommand, err := commands.FindByID(ctx, oldCommand.ID)
if err != nil {
t.Fatalf("FindByID command returned error: %v", err)
}
if updatedCommand.Status != model.AgentCommandStatusDispatched {
t.Fatalf("expected active command to remain dispatched, got %#v", updatedCommand)
}
updatedRecord, err := records.FindByID(ctx, 1)
if err != nil {
t.Fatalf("FindByID record returned error: %v", err)
}
if updatedRecord.Status != model.BackupRecordStatusRunning {
t.Fatalf("expected active record to remain running, got %#v", updatedRecord)
}
}
func TestAgentServiceProcessStaleCommandsKeepsDispatchedRunTaskWhenNodeHeartbeatIsFresh(t *testing.T) {
svc, db, records, commands, owner, _ := newAgentServicePoolTestHarness(t)
ctx := context.Background()
dispatchedAt := time.Now().UTC().Add(-time.Hour)
if err := setBackupRecordUpdatedAt(db, 1, dispatchedAt); err != nil {
t.Fatalf("set backup record updated_at: %v", err)
}
if err := db.Model(&model.Node{}).Where("id = ?", owner.ID).UpdateColumn("last_seen", time.Now().UTC()).Error; err != nil {
t.Fatalf("set owner last_seen: %v", err)
}
oldCommand := &model.AgentCommand{
NodeID: owner.ID,
Type: model.AgentCommandTypeRunTask,
Status: model.AgentCommandStatusDispatched,
Payload: `{"recordId":1}`,
CreatedAt: dispatchedAt,
DispatchedAt: &dispatchedAt,
}
if err := commands.Create(ctx, oldCommand); err != nil {
t.Fatalf("Create command returned error: %v", err)
}
svc.processStaleCommands(ctx, time.Now().UTC().Add(-30*time.Minute))
updatedCommand, err := commands.FindByID(ctx, oldCommand.ID)
if err != nil {
t.Fatalf("FindByID command returned error: %v", err)
}
if updatedCommand.Status != model.AgentCommandStatusDispatched {
t.Fatalf("expected command to remain dispatched while node heartbeat is fresh, got %#v", updatedCommand)
}
updatedRecord, err := records.FindByID(ctx, 1)
if err != nil {
t.Fatalf("FindByID record returned error: %v", err)
}
if updatedRecord.Status != model.BackupRecordStatusRunning {
t.Fatalf("expected record to remain running while node heartbeat is fresh, got %#v", updatedRecord)
}
}
func TestAgentServiceProcessStaleCommandsTimesOutShortCommandEvenWhenNodeHeartbeatIsFresh(t *testing.T) {
svc, db, _, commands, owner, _ := newAgentServicePoolTestHarness(t)
ctx := context.Background()
dispatchedAt := time.Now().UTC().Add(-time.Hour)
if err := db.Model(&model.Node{}).Where("id = ?", owner.ID).UpdateColumn("last_seen", time.Now().UTC()).Error; err != nil {
t.Fatalf("set owner last_seen: %v", err)
}
oldCommand := &model.AgentCommand{
NodeID: owner.ID,
Type: model.AgentCommandTypeListDir,
Status: model.AgentCommandStatusDispatched,
Payload: `{"path":"/srv"}`,
CreatedAt: dispatchedAt,
DispatchedAt: &dispatchedAt,
}
if err := commands.Create(ctx, oldCommand); err != nil {
t.Fatalf("Create command returned error: %v", err)
}
svc.processStaleCommands(ctx, time.Now().UTC().Add(-30*time.Minute))
updatedCommand, err := commands.FindByID(ctx, oldCommand.ID)
if err != nil {
t.Fatalf("FindByID command returned error: %v", err)
}
if updatedCommand.Status != model.AgentCommandStatusTimeout {
t.Fatalf("expected stale short command timeout, got %#v", updatedCommand)
}
}
func TestAgentServiceProcessStaleCommandsTimesOutDispatchedRunTaskWhenRecordIsTerminalEvenWithFreshHeartbeat(t *testing.T) {
svc, db, records, commands, owner, _ := newAgentServicePoolTestHarness(t)
ctx := context.Background()
dispatchedAt := time.Now().UTC().Add(-time.Hour)
if err := db.Model(&model.Node{}).Where("id = ?", owner.ID).UpdateColumn("last_seen", time.Now().UTC()).Error; err != nil {
t.Fatalf("set owner last_seen: %v", err)
}
record, err := records.FindByID(ctx, 1)
if err != nil {
t.Fatalf("FindByID record returned error: %v", err)
}
completedAt := time.Now().UTC().Add(-time.Minute)
record.Status = model.BackupRecordStatusFailed
record.CompletedAt = &completedAt
if err := records.Update(ctx, record); err != nil {
t.Fatalf("Update terminal record returned error: %v", err)
}
oldCommand := &model.AgentCommand{
NodeID: owner.ID,
Type: model.AgentCommandTypeRunTask,
Status: model.AgentCommandStatusDispatched,
Payload: `{"recordId":1}`,
CreatedAt: dispatchedAt,
DispatchedAt: &dispatchedAt,
}
if err := commands.Create(ctx, oldCommand); err != nil {
t.Fatalf("Create command returned error: %v", err)
}
svc.processStaleCommands(ctx, time.Now().UTC().Add(-30*time.Minute))
updatedCommand, err := commands.FindByID(ctx, oldCommand.ID)
if err != nil {
t.Fatalf("FindByID command returned error: %v", err)
}
if updatedCommand.Status != model.AgentCommandStatusTimeout {
t.Fatalf("expected command timeout when linked record is terminal, got %#v", updatedCommand)
}
}
func TestAgentServiceProcessStaleCommandsTimesOutInactiveDispatchedRunTaskRecord(t *testing.T) {
svc, db, records, commands, owner, _ := newAgentServicePoolTestHarness(t)
ctx := context.Background()
dispatchedAt := time.Now().UTC().Add(-time.Hour)
if err := setBackupRecordUpdatedAt(db, 1, dispatchedAt); err != nil {
t.Fatalf("set backup record updated_at: %v", err)
}
if err := db.Model(&model.Node{}).Where("id = ?", owner.ID).UpdateColumn("last_seen", dispatchedAt).Error; err != nil {
t.Fatalf("set owner last_seen: %v", err)
}
oldCommand := &model.AgentCommand{
NodeID: owner.ID,
Type: model.AgentCommandTypeRunTask,
Status: model.AgentCommandStatusDispatched,
Payload: `{"recordId":1}`,
CreatedAt: dispatchedAt,
DispatchedAt: &dispatchedAt,
}
if err := commands.Create(ctx, oldCommand); err != nil {
t.Fatalf("Create command returned error: %v", err)
}
svc.processStaleCommands(ctx, time.Now().UTC().Add(-30*time.Minute))
updatedCommand, err := commands.FindByID(ctx, oldCommand.ID)
if err != nil {
t.Fatalf("FindByID command returned error: %v", err)
}
if updatedCommand.Status != model.AgentCommandStatusTimeout {
t.Fatalf("expected inactive command timeout, got %#v", updatedCommand)
}
updatedRecord, err := records.FindByID(ctx, 1)
if err != nil {
t.Fatalf("FindByID record returned error: %v", err)
}
if updatedRecord.Status != model.BackupRecordStatusFailed {
t.Fatalf("expected inactive record failed, got %#v", updatedRecord)
}
}
func TestAgentServiceProcessStaleCommandsKeepsActiveDispatchedRestoreRecord(t *testing.T) {
svc, db, _, commands, owner, _ := newAgentServicePoolTestHarness(t)
ctx := context.Background()
restoreRepo := repository.NewRestoreRecordRepository(db)
restore := createAgentServiceRestoreRecord(t, restoreRepo, owner.ID)
svc.SetRestoreRepository(restoreRepo)
dispatchedAt := time.Now().UTC().Add(-time.Hour)
oldCommand := &model.AgentCommand{
NodeID: owner.ID,
Type: model.AgentCommandTypeRestoreRecord,
Status: model.AgentCommandStatusDispatched,
Payload: `{"restoreRecordId":1}`,
CreatedAt: dispatchedAt,
DispatchedAt: &dispatchedAt,
}
if err := commands.Create(ctx, oldCommand); err != nil {
t.Fatalf("Create command returned error: %v", err)
}
svc.processStaleCommands(ctx, time.Now().UTC().Add(-30*time.Minute))
updatedCommand, err := commands.FindByID(ctx, oldCommand.ID)
if err != nil {
t.Fatalf("FindByID command returned error: %v", err)
}
if updatedCommand.Status != model.AgentCommandStatusDispatched {
t.Fatalf("expected active restore command to remain dispatched, got %#v", updatedCommand)
}
updatedRestore, err := restoreRepo.FindByID(ctx, restore.ID)
if err != nil {
t.Fatalf("FindByID restore returned error: %v", err)
}
if updatedRestore.Status != model.RestoreRecordStatusRunning {
t.Fatalf("expected active restore to remain running, got %#v", updatedRestore)
}
}
func TestAgentServiceProcessStaleCommandsKeepsDispatchedRestoreWhenNodeHeartbeatIsFresh(t *testing.T) {
svc, db, _, commands, owner, _ := newAgentServicePoolTestHarness(t)
ctx := context.Background()
restoreRepo := repository.NewRestoreRecordRepository(db)
restore := createAgentServiceRestoreRecord(t, restoreRepo, owner.ID)
svc.SetRestoreRepository(restoreRepo)
dispatchedAt := time.Now().UTC().Add(-time.Hour)
if err := setRestoreRecordUpdatedAt(db, restore.ID, dispatchedAt); err != nil {
t.Fatalf("set restore record updated_at: %v", err)
}
if err := db.Model(&model.Node{}).Where("id = ?", owner.ID).UpdateColumn("last_seen", time.Now().UTC()).Error; err != nil {
t.Fatalf("set owner last_seen: %v", err)
}
oldCommand := &model.AgentCommand{
NodeID: owner.ID,
Type: model.AgentCommandTypeRestoreRecord,
Status: model.AgentCommandStatusDispatched,
Payload: `{"restoreRecordId":1}`,
CreatedAt: dispatchedAt,
DispatchedAt: &dispatchedAt,
}
if err := commands.Create(ctx, oldCommand); err != nil {
t.Fatalf("Create command returned error: %v", err)
}
svc.processStaleCommands(ctx, time.Now().UTC().Add(-30*time.Minute))
updatedCommand, err := commands.FindByID(ctx, oldCommand.ID)
if err != nil {
t.Fatalf("FindByID command returned error: %v", err)
}
if updatedCommand.Status != model.AgentCommandStatusDispatched {
t.Fatalf("expected restore command to remain dispatched while node heartbeat is fresh, got %#v", updatedCommand)
}
}
func TestAgentServiceProcessStaleCommandsTimesOutInactiveDispatchedRestoreRecord(t *testing.T) {
svc, db, _, commands, owner, _ := newAgentServicePoolTestHarness(t)
ctx := context.Background()
restoreRepo := repository.NewRestoreRecordRepository(db)
restore := createAgentServiceRestoreRecord(t, restoreRepo, owner.ID)
svc.SetRestoreRepository(restoreRepo)
dispatchedAt := time.Now().UTC().Add(-time.Hour)
if err := setRestoreRecordUpdatedAt(db, restore.ID, dispatchedAt); err != nil {
t.Fatalf("set restore record updated_at: %v", err)
}
if err := db.Model(&model.Node{}).Where("id = ?", owner.ID).UpdateColumn("last_seen", dispatchedAt).Error; err != nil {
t.Fatalf("set owner last_seen: %v", err)
}
oldCommand := &model.AgentCommand{
NodeID: owner.ID,
Type: model.AgentCommandTypeRestoreRecord,
Status: model.AgentCommandStatusDispatched,
Payload: `{"restoreRecordId":1}`,
CreatedAt: dispatchedAt,
DispatchedAt: &dispatchedAt,
}
if err := commands.Create(ctx, oldCommand); err != nil {
t.Fatalf("Create command returned error: %v", err)
}
svc.processStaleCommands(ctx, time.Now().UTC().Add(-30*time.Minute))
updatedCommand, err := commands.FindByID(ctx, oldCommand.ID)
if err != nil {
t.Fatalf("FindByID command returned error: %v", err)
}
if updatedCommand.Status != model.AgentCommandStatusTimeout {
t.Fatalf("expected inactive restore command timeout, got %#v", updatedCommand)
}
updatedRestore, err := restoreRepo.FindByID(ctx, restore.ID)
if err != nil {
t.Fatalf("FindByID restore returned error: %v", err)
}
if updatedRestore.Status != model.RestoreRecordStatusFailed {
t.Fatalf("expected inactive restore failed, got %#v", updatedRestore)
}
}
func TestAgentServiceSubmitCommandResultDoesNotOverwriteTerminalCommand(t *testing.T) {
svc, _, _, commands, owner, _ := newAgentServicePoolTestHarness(t)
ctx := context.Background()
completedAt := time.Now().UTC().Add(-time.Minute)
command := &model.AgentCommand{
NodeID: owner.ID,
Type: model.AgentCommandTypeRunTask,
Status: model.AgentCommandStatusTimeout,
Payload: `{"recordId":1}`,
ErrorMessage: "timeout",
CompletedAt: &completedAt,
}
if err := commands.Create(ctx, command); err != nil {
t.Fatalf("Create command returned error: %v", err)
}
if err := svc.SubmitCommandResult(ctx, owner, command.ID, AgentCommandResult{Success: true, Result: []byte(`{"ok":true}`)}); err != nil {
t.Fatalf("SubmitCommandResult returned error: %v", err)
}
updatedCommand, err := commands.FindByID(ctx, command.ID)
if err != nil {
t.Fatalf("FindByID command returned error: %v", err)
}
if updatedCommand.Status != model.AgentCommandStatusTimeout {
t.Fatalf("expected terminal command status to remain timeout, got %#v", updatedCommand)
}
if updatedCommand.Result != "" {
t.Fatalf("expected terminal command result to remain empty, got %q", updatedCommand.Result)
}
}
func TestAgentServiceUpdateRecordDoesNotOverwriteTerminalRecord(t *testing.T) {
svc, _, records, _, owner, _ := newAgentServicePoolTestHarness(t)
ctx := context.Background()
record, err := records.FindByID(ctx, 1)
if err != nil {
t.Fatalf("FindByID record returned error: %v", err)
}
completedAt := time.Now().UTC().Add(-time.Minute)
record.Status = model.BackupRecordStatusFailed
record.ErrorMessage = "timeout"
record.CompletedAt = &completedAt
if err := records.Update(ctx, record); err != nil {
t.Fatalf("Update record returned error: %v", err)
}
if err := svc.UpdateRecord(ctx, owner, record.ID, AgentRecordUpdate{
Status: model.BackupRecordStatusSuccess,
FileName: "late.tar.gz",
FileSize: 42,
Checksum: "late",
StoragePath: "late/path",
ErrorMessage: "late success",
LogAppend: "late log\n",
}); err != nil {
t.Fatalf("UpdateRecord returned error: %v", err)
}
updatedRecord, err := records.FindByID(ctx, record.ID)
if err != nil {
t.Fatalf("FindByID updated record returned error: %v", err)
}
if updatedRecord.Status != model.BackupRecordStatusFailed {
t.Fatalf("expected terminal record status to remain failed, got %#v", updatedRecord)
}
if updatedRecord.FileName != "" || updatedRecord.StoragePath != "" || updatedRecord.ErrorMessage != "timeout" {
t.Fatalf("expected terminal record fields to remain unchanged, got %#v", updatedRecord)
}
}
func createAgentServiceRestoreRecord(t *testing.T, repo repository.RestoreRecordRepository, nodeID uint) *model.RestoreRecord {
t.Helper()
restore := &model.RestoreRecord{
BackupRecordID: 1,
TaskID: 1,
NodeID: nodeID,
Status: model.RestoreRecordStatusRunning,
StartedAt: time.Now().UTC().Add(-time.Hour),
}
if err := repo.Create(context.Background(), restore); err != nil {
t.Fatalf("Create restore returned error: %v", err)
}
return restore
}
func setBackupRecordUpdatedAt(db *gorm.DB, id uint, updatedAt time.Time) error {
return db.Model(&model.BackupRecord{}).Where("id = ?", id).UpdateColumn("updated_at", updatedAt).Error
}
func setRestoreRecordUpdatedAt(db *gorm.DB, id uint, updatedAt time.Time) error {
return db.Model(&model.RestoreRecord{}).Where("id = ?", id).UpdateColumn("updated_at", updatedAt).Error
}
type failingUpdateTaskRepo struct {
repository.BackupTaskRepository
err error
}
func (r *failingUpdateTaskRepo) Update(context.Context, *model.BackupTask) error {
return r.err
}

View File

@@ -52,11 +52,6 @@ type StorageUploadResultItem struct {
Error string `json:"error,omitempty"`
}
const (
uploadMaxAttempts = 3
uploadRetryBackoff = 10 * time.Second
)
type DownloadedArtifact struct {
FileName string
Reader io.ReadCloser
@@ -78,30 +73,29 @@ func collectTargetIDs(task *model.BackupTask) []uint {
}
type BackupExecutionService struct {
tasks repository.BackupTaskRepository
records repository.BackupRecordRepository
targets repository.StorageTargetRepository
nodeRepo repository.NodeRepository
storageRegistry *storage.Registry
runnerRegistry *backup.Registry
logHub *backup.LogHub
retention *backupretention.Service
cipher *codec.ConfigCipher
tasks repository.BackupTaskRepository
records repository.BackupRecordRepository
targets repository.StorageTargetRepository
nodeRepo repository.NodeRepository
storageRegistry *storage.Registry
runnerRegistry *backup.Registry
logHub *backup.LogHub
retention *backupretention.Service
cipher *codec.ConfigCipher
notifier BackupResultNotifier
agentDispatcher AgentDispatcher
replicationHook ReplicationTrigger
dependentsResolver DependentsResolver
async func(func())
now func() time.Time
tempDir string
semaphore chan struct{}
async func(func())
now func() time.Time
tempDir string
semaphore chan struct{}
// nodeSemaphores 节点级并发限制(按 NodeID 映射)。
// 没命中的 NodeID 走全局 semaphore节点配置 MaxConcurrent>0 时按该节点独立排队。
nodeSemaphores sync.Map
retries int // rclone 底层重试次数
bandwidthLimit string // rclone 带宽限制(全局默认,节点配置可覆盖)
retries int // rclone 底层重试次数
bandwidthLimit string // rclone 带宽限制(全局默认,节点配置可覆盖)
metrics *metrics.Metrics
taskLocks sync.Map
}
// SetMetrics 注入 Prometheus 采集器。nil 时所有埋点退化为 no-op。
@@ -276,9 +270,11 @@ func (s *BackupExecutionService) DeleteRecord(ctx context.Context, recordID uint
if record == nil {
return apperror.New(404, "BACKUP_RECORD_NOT_FOUND", "备份记录不存在", fmt.Errorf("backup record %d not found", recordID))
}
if remote, err := s.deleteRemoteLocalDiskObject(ctx, record); err != nil {
// 集群场景保护:跨节点 local_disk 文件 Master 无法远程删除,拒绝操作以避免存储泄漏的错觉
if err := s.validateClusterAccessible(ctx, record); err != nil {
return err
} else if !remote && strings.TrimSpace(record.StoragePath) != "" {
}
if strings.TrimSpace(record.StoragePath) != "" {
provider, err := s.resolveProvider(ctx, record.StorageTargetID)
if err != nil {
return err
@@ -293,40 +289,6 @@ func (s *BackupExecutionService) DeleteRecord(ctx context.Context, recordID uint
return nil
}
func (s *BackupExecutionService) deleteRemoteLocalDiskObject(ctx context.Context, record *model.BackupRecord) (bool, error) {
if strings.TrimSpace(record.StoragePath) == "" || s.nodeRepo == nil {
return false, nil
}
node, err := s.nodeRepo.FindByID(ctx, record.NodeID)
if err != nil || node == nil || node.IsLocal {
return false, nil
}
target, err := s.targets.FindByID(ctx, record.StorageTargetID)
if err != nil {
return false, apperror.Internal("BACKUP_STORAGE_TARGET_GET_FAILED", "无法获取存储目标详情", err)
}
if target == nil || !strings.EqualFold(target.Type, "local_disk") {
return false, nil
}
if s.agentDispatcher == nil {
return true, apperror.BadRequest("BACKUP_RECORD_CROSS_NODE_LOCAL_DISK",
fmt.Sprintf("该备份位于节点 %s 的本地磁盘local_diskMaster 无法跨节点删除。请确保 Agent 在线后再操作。", node.Name),
nil)
}
configMap := map[string]any{}
if err := s.cipher.DecryptJSON(target.ConfigCiphertext, &configMap); err != nil {
return true, apperror.Internal("BACKUP_STORAGE_TARGET_DECRYPT_FAILED", "无法解密存储目标配置", err)
}
if _, err := s.agentDispatcher.EnqueueCommand(ctx, record.NodeID, model.AgentCommandTypeDeleteStorageObject, map[string]any{
"targetType": target.Type,
"targetConfig": configMap,
"storagePath": record.StoragePath,
}); err != nil {
return true, apperror.Internal("AGENT_COMMAND_ENQUEUE_FAILED", "无法下发远程备份文件删除命令", err)
}
return true, nil
}
// validateClusterAccessible 在跨节点 + local_disk 场景下拒绝 Master 端直接访问。
// 场景说明:远程 Agent 把备份写到其本机磁盘local_disk basePathMaster 的
// provider 指向的是 Master 本机的同名路径,访问会静默取错文件或 404。明确拒绝
@@ -364,11 +326,6 @@ func (s *BackupExecutionService) startTask(ctx context.Context, id uint, async b
if task == nil {
return nil, apperror.New(404, "BACKUP_TASK_NOT_FOUND", "备份任务不存在", fmt.Errorf("backup task %d not found", id))
}
unlock := s.acquireTaskStartLock(task.ID)
defer unlock()
if err := s.ensureTaskNotRunning(ctx, task); err != nil {
return nil, err
}
// 维护窗口校验:手动执行同样尊重窗口,避免业务高峰期误触发。
if strings.TrimSpace(task.MaintenanceWindows) != "" {
windows := backup.ParseMaintenanceWindows(task.MaintenanceWindows)
@@ -399,8 +356,8 @@ func (s *BackupExecutionService) startTask(ctx context.Context, id uint, async b
if err := s.records.Create(ctx, record); err != nil {
return nil, apperror.Internal("BACKUP_RECORD_CREATE_FAILED", "无法创建备份记录", err)
}
runTask := *task
runTask.NodeID = resolvedNodeID
// 用池选出的节点 ID 复写 task 副本,使后续路由/执行沿用
task.NodeID = resolvedNodeID
task.LastRunAt = &startedAt
task.LastStatus = "running"
if err := s.tasks.Update(ctx, task); err != nil {
@@ -408,27 +365,27 @@ func (s *BackupExecutionService) startTask(ctx context.Context, id uint, async b
}
// 多节点路由task.NodeID 指向远程节点时,把执行任务入队给 Agent
// NodeID=0 或本机节点时由 Master 直接执行。
if remoteNode := s.resolveRemoteNode(ctx, resolvedNodeID); remoteNode != nil {
if remoteNode := s.resolveRemoteNode(ctx, task.NodeID); remoteNode != nil {
// 节点离线 → 立即把刚创建的 running 记录标记 failed返回明确错误
if remoteNode.Status != model.NodeStatusOnline {
offlineMsg := fmt.Sprintf("节点 %s 当前离线,无法执行备份任务", remoteNode.Name)
_ = s.finalizeRecord(ctx, &runTask, record.ID, startedAt, model.BackupRecordStatusFailed,
offlineMsg, "", "", 0, "", "", primaryTargetID)
_ = s.finalizeRecord(ctx, task, record.ID, startedAt, model.BackupRecordStatusFailed,
offlineMsg, "", "", 0, "", "")
return nil, apperror.BadRequest("NODE_OFFLINE", offlineMsg, nil)
}
if _, enqueueErr := s.agentDispatcher.EnqueueCommand(ctx, resolvedNodeID, model.AgentCommandTypeRunTask, map[string]any{
if _, enqueueErr := s.agentDispatcher.EnqueueCommand(ctx, task.NodeID, model.AgentCommandTypeRunTask, map[string]any{
"taskId": task.ID,
"recordId": record.ID,
}); enqueueErr != nil {
// 入队失败 → 在记录中标记失败,继续返回详情
_ = s.finalizeRecord(ctx, &runTask, record.ID, startedAt, model.BackupRecordStatusFailed,
"无法下发任务到远程节点: "+enqueueErr.Error(), "", "", 0, "", "", primaryTargetID)
_ = s.finalizeRecord(ctx, task, record.ID, startedAt, model.BackupRecordStatusFailed,
"无法下发任务到远程节点: "+enqueueErr.Error(), "", "", 0, "", "")
return nil, apperror.Internal("AGENT_COMMAND_ENQUEUE_FAILED", "无法下发任务到远程节点", enqueueErr)
}
return s.getRecordDetail(ctx, record.ID)
}
run := func() {
s.executeTask(context.Background(), &runTask, record.ID, startedAt)
s.executeTask(context.Background(), task, record.ID, startedAt)
}
if async {
s.async(run)
@@ -438,27 +395,6 @@ func (s *BackupExecutionService) startTask(ctx context.Context, id uint, async b
return s.getRecordDetail(ctx, record.ID)
}
func (s *BackupExecutionService) acquireTaskStartLock(taskID uint) func() {
value, _ := s.taskLocks.LoadOrStore(taskID, &sync.Mutex{})
mu := value.(*sync.Mutex)
mu.Lock()
return mu.Unlock
}
func (s *BackupExecutionService) ensureTaskNotRunning(ctx context.Context, task *model.BackupTask) error {
taskID := task.ID
items, err := s.records.List(ctx, repository.BackupRecordListOptions{TaskID: &taskID, Status: model.BackupRecordStatusRunning})
if err != nil {
return apperror.Internal("BACKUP_RECORD_LIST_FAILED", "无法检查任务运行状态", err)
}
if len(items) == 0 {
return nil
}
return apperror.BadRequest("BACKUP_TASK_ALREADY_RUNNING",
fmt.Sprintf("任务「%s」正在运行记录 #%d请等待完成后再触发。", task.Name, items[0].ID),
nil)
}
// shouldNotify 按任务的告警策略决定是否发送本次通知。
// 成功结果:始终发送(方便用户确认备份状态)。
// 失败结果:仅当"最近 N 条记录(含本次)均为 failed"时发送N = AlertOnConsecutiveFails。
@@ -625,10 +561,9 @@ func (s *BackupExecutionService) executeTask(ctx context.Context, task *model.Ba
var fileSize int64
var checksum string
var storagePath string
selectedStorageTargetID := task.StorageTargetID
var uploadResults []StorageUploadResultItem
completeRecord := func() {
if finalizeErr := s.finalizeRecord(ctx, task, recordID, startedAt, status, errMessage, logger.String(), fileName, fileSize, checksum, storagePath, selectedStorageTargetID); finalizeErr != nil {
if finalizeErr := s.finalizeRecord(ctx, task, recordID, startedAt, status, errMessage, logger.String(), fileName, fileSize, checksum, storagePath); finalizeErr != nil {
logger.Errorf("写回备份记录失败:%v", finalizeErr)
}
// 采集任务执行结果到 Prometheus耗时 + 产出字节 + 状态计数)
@@ -710,11 +645,6 @@ func (s *BackupExecutionService) executeTask(ctx context.Context, task *model.Ba
logger.Errorf("没有关联的存储目标")
return
}
storageUsage, err := s.storageUsageSnapshot(ctx)
if err != nil {
logger.Warnf("读取存储目标用量失败,跳过本次软配额校验:%v", err)
storageUsage = map[uint]int64{}
}
// 并行上传到所有目标
uploadResults = make([]StorageUploadResultItem, len(targetIDs))
@@ -738,7 +668,15 @@ func (s *BackupExecutionService) executeTask(ctx context.Context, task *model.Ba
}
// 软限额校验QuotaBytes > 0 时,已累计 + 本次 > 配额 → 拒绝上传
if target != nil && target.QuotaBytes > 0 {
currentUsed := storageUsage[targetID]
currentUsed := int64(0)
if items, err := s.records.StorageUsage(ctx); err == nil {
for _, it := range items {
if it.StorageTargetID == targetID {
currentUsed = it.TotalSize
break
}
}
}
if currentUsed+fileSize > target.QuotaBytes {
quotaMsg := fmt.Sprintf("超出存储目标 %s 的配额(%d + %d > %d", targetName, currentUsed, fileSize, target.QuotaBytes)
uploadResults[index] = StorageUploadResultItem{StorageTargetID: targetID, StorageTargetName: targetName, Status: "failed", Error: quotaMsg}
@@ -747,18 +685,15 @@ func (s *BackupExecutionService) executeTask(ctx context.Context, task *model.Ba
}
}
logger.Infof("开始上传备份到存储目标:%s", targetName)
// 上传级重试:最多 3 次,等待时间随 context 取消及时退出。
// 上传级重试:最多 3 次,指数退避10s, 30s, 90s
maxAttempts := 3
var lastUploadErr error
var hr *hashingReader
for attempt := 1; attempt <= uploadMaxAttempts; attempt++ {
for attempt := 1; attempt <= maxAttempts; attempt++ {
if attempt > 1 {
backoff := time.Duration(attempt-1) * uploadRetryBackoff
backoff := time.Duration(attempt*attempt) * 10 * time.Second
logger.Warnf("存储目标 %s 第 %d 次重试(等待 %v%v", targetName, attempt, backoff, lastUploadErr)
if waitErr := waitForUploadRetry(ctx, backoff); waitErr != nil {
uploadResults[index] = StorageUploadResultItem{StorageTargetID: targetID, StorageTargetName: targetName, Status: "failed", Error: waitErr.Error()}
logger.Warnf("存储目标 %s 上传重试已取消:%v", targetName, waitErr)
return
}
time.Sleep(backoff)
}
artifact, openErr := os.Open(finalPath)
if openErr != nil {
@@ -788,7 +723,7 @@ func (s *BackupExecutionService) executeTask(ctx context.Context, task *model.Ba
}
if lastUploadErr != nil {
uploadResults[index] = StorageUploadResultItem{StorageTargetID: targetID, StorageTargetName: targetName, Status: "failed", Error: lastUploadErr.Error()}
logger.Warnf("存储目标 %s 上传失败(已重试 %d 次):%v", targetName, uploadMaxAttempts, lastUploadErr)
logger.Warnf("存储目标 %s 上传失败(已重试 %d 次):%v", targetName, maxAttempts, lastUploadErr)
return
}
// 完整性校验:对比实际传输字节数
@@ -824,9 +759,6 @@ func (s *BackupExecutionService) executeTask(ctx context.Context, task *model.Ba
for _, r := range uploadResults {
if r.Status == "success" {
anySuccess = true
if selectedStorageTargetID == task.StorageTargetID {
selectedStorageTargetID = r.StorageTargetID
}
} else if r.Error != "" {
failedMessages = append(failedMessages, fmt.Sprintf("%s: %s", r.StorageTargetName, r.Error))
}
@@ -859,7 +791,7 @@ func (s *BackupExecutionService) executeTask(ctx context.Context, task *model.Ba
record := &model.BackupRecord{
ID: recordID,
TaskID: task.ID,
StorageTargetID: selectedStorageTargetID,
StorageTargetID: task.StorageTargetID,
NodeID: task.NodeID,
Status: "success",
FileName: fileName,
@@ -884,7 +816,7 @@ func (s *BackupExecutionService) executeTask(ctx context.Context, task *model.Ba
}
}
func (s *BackupExecutionService) finalizeRecord(ctx context.Context, task *model.BackupTask, recordID uint, startedAt time.Time, status string, errorMessage string, logContent string, fileName string, fileSize int64, checksum string, storagePath string, storageTargetID uint) error {
func (s *BackupExecutionService) finalizeRecord(ctx context.Context, task *model.BackupTask, recordID uint, startedAt time.Time, status string, errorMessage string, logContent string, fileName string, fileSize int64, checksum string, storagePath string) error {
record, err := s.records.FindByID(ctx, recordID)
if err != nil {
return err
@@ -894,9 +826,6 @@ func (s *BackupExecutionService) finalizeRecord(ctx context.Context, task *model
}
completedAt := s.now()
record.Status = status
if storageTargetID > 0 {
record.StorageTargetID = storageTargetID
}
record.FileName = fileName
record.FileSize = fileSize
record.Checksum = checksum
@@ -913,32 +842,6 @@ func (s *BackupExecutionService) finalizeRecord(ctx context.Context, task *model
return s.tasks.Update(ctx, task)
}
func (s *BackupExecutionService) storageUsageSnapshot(ctx context.Context) (map[uint]int64, error) {
items, err := s.records.StorageUsage(ctx)
if err != nil {
return nil, fmt.Errorf("storage usage snapshot: %w", err)
}
usage := make(map[uint]int64, len(items))
for _, item := range items {
usage[item.StorageTargetID] = item.TotalSize
}
return usage, nil
}
func waitForUploadRetry(ctx context.Context, delay time.Duration) error {
if delay <= 0 {
return nil
}
timer := time.NewTimer(delay)
defer timer.Stop()
select {
case <-ctx.Done():
return ctx.Err()
case <-timer.C:
return nil
}
}
func (s *BackupExecutionService) resolveProvider(ctx context.Context, targetID uint) (storage.StorageProvider, error) {
return s.resolveProviderForNode(ctx, targetID, 0)
}
@@ -1054,9 +957,6 @@ func (s *BackupExecutionService) loadRecordProvider(ctx context.Context, recordI
if record == nil {
return nil, nil, apperror.New(404, "BACKUP_RECORD_NOT_FOUND", "备份记录不存在", fmt.Errorf("backup record %d not found", recordID))
}
if err := s.validateClusterAccessible(ctx, record); err != nil {
return nil, nil, err
}
provider, err := s.resolveProvider(ctx, record.StorageTargetID)
if err != nil {
return nil, nil, err

View File

@@ -2,15 +2,9 @@ package service
import (
"context"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"sync"
"testing"
"time"
"backupx/server/internal/backup"
backupretention "backupx/server/internal/backup/retention"
@@ -24,70 +18,6 @@ import (
storageRclone "backupx/server/internal/storage/rclone"
)
type testStorageFactory struct {
providers map[string]*testStorageProvider
}
func (f *testStorageFactory) Type() storage.ProviderType {
return "test_storage"
}
func (f *testStorageFactory) New(_ context.Context, config map[string]any) (storage.StorageProvider, error) {
name, _ := config["name"].(string)
provider := f.providers[name]
if provider == nil {
return nil, fmt.Errorf("unknown provider %q", name)
}
return provider, nil
}
type testStorageProvider struct {
name string
failUpload bool
blockUpload <-chan struct{}
onUpload func()
objects map[string][]byte
}
func (p *testStorageProvider) Type() storage.ProviderType { return "test_storage" }
func (p *testStorageProvider) TestConnection(context.Context) error {
return nil
}
func (p *testStorageProvider) Upload(_ context.Context, objectKey string, reader io.Reader, _ int64, _ map[string]string) error {
if p.blockUpload != nil {
<-p.blockUpload
}
if p.onUpload != nil {
p.onUpload()
}
if p.failUpload {
return fmt.Errorf("upload failed for %s", p.name)
}
data, err := io.ReadAll(reader)
if err != nil {
return err
}
if p.objects == nil {
p.objects = map[string][]byte{}
}
p.objects[objectKey] = data
return nil
}
func (p *testStorageProvider) Download(_ context.Context, objectKey string) (io.ReadCloser, error) {
data, ok := p.objects[objectKey]
if !ok {
return nil, fmt.Errorf("object %s not found", objectKey)
}
return io.NopCloser(strings.NewReader(string(data))), nil
}
func (p *testStorageProvider) Delete(_ context.Context, objectKey string) error {
delete(p.objects, objectKey)
return nil
}
func (p *testStorageProvider) List(context.Context, string) ([]storage.ObjectInfo, error) {
return nil, nil
}
func newExecutionTestServices(t *testing.T) (*BackupExecutionService, *BackupRecordService, repository.BackupTaskRepository, repository.StorageTargetRepository, repository.BackupRecordRepository, string, string) {
t.Helper()
baseDir := t.TempDir()
@@ -155,377 +85,6 @@ func TestBackupExecutionServiceRunTaskByIDSync(t *testing.T) {
}
}
func TestBackupExecutionServiceNodePoolSelectionDoesNotPersistTaskNodeID(t *testing.T) {
executionService, _, tasks, _, records, _, _ := newExecutionTestServices(t)
ctx := context.Background()
nodeRepo := &nodeRepoStub{nodes: []model.Node{
{ID: 10, Name: "edge-a", Token: "edge-a-token", Status: model.NodeStatusOnline, Labels: "prod,db"},
{ID: 11, Name: "edge-b", Token: "edge-b-token", Status: model.NodeStatusOnline, Labels: "prod,db"},
}}
dispatcher := &fakeDispatcher{}
executionService.SetClusterDependencies(nodeRepo, dispatcher)
task, err := tasks.FindByID(ctx, 1)
if err != nil {
t.Fatalf("FindByID returned error: %v", err)
}
task.NodeID = 0
task.NodePoolTag = "db"
if err := tasks.Update(ctx, task); err != nil {
t.Fatalf("Update task returned error: %v", err)
}
detail, err := executionService.RunTaskByID(ctx, 1)
if err != nil {
t.Fatalf("RunTaskByID returned error: %v", err)
}
storedTask, err := tasks.FindByID(ctx, 1)
if err != nil {
t.Fatalf("FindByID after run returned error: %v", err)
}
if storedTask.NodeID != 0 {
t.Fatalf("expected pooled task NodeID to remain 0, got %d", storedTask.NodeID)
}
if storedTask.NodePoolTag != "db" {
t.Fatalf("expected pooled task tag to remain db, got %q", storedTask.NodePoolTag)
}
storedRecord, err := records.FindByID(ctx, detail.ID)
if err != nil {
t.Fatalf("FindByID record returned error: %v", err)
}
if storedRecord == nil || storedRecord.NodeID != 10 {
t.Fatalf("expected record to keep selected node 10, got %#v", storedRecord)
}
calls := dispatcher.snapshot()
if len(calls) != 1 || calls[0].NodeID != 10 || calls[0].CmdType != model.AgentCommandTypeRunTask {
t.Fatalf("unexpected dispatcher calls: %#v", calls)
}
}
func TestBackupExecutionServiceRejectsDuplicateRunningTask(t *testing.T) {
executionService, _, tasks, _, records, _, _ := newExecutionTestServices(t)
ctx := context.Background()
task, err := tasks.FindByID(ctx, 1)
if err != nil {
t.Fatalf("FindByID task returned error: %v", err)
}
startedAt := time.Now().UTC()
running := &model.BackupRecord{
TaskID: task.ID,
StorageTargetID: task.StorageTargetID,
NodeID: 0,
Status: model.BackupRecordStatusRunning,
StartedAt: startedAt,
}
if err := records.Create(ctx, running); err != nil {
t.Fatalf("Create running record returned error: %v", err)
}
_, err = executionService.RunTaskByIDSync(ctx, task.ID)
if err == nil || !strings.Contains(err.Error(), "正在运行") {
t.Fatalf("expected duplicate running task to be rejected, got %v", err)
}
items, err := records.List(ctx, repository.BackupRecordListOptions{Status: model.BackupRecordStatusRunning})
if err != nil {
t.Fatalf("List running records returned error: %v", err)
}
if len(items) != 1 || items[0].ID != running.ID {
t.Fatalf("expected only the original running record, got %#v", items)
}
}
func TestBackupExecutionServiceDeleteRecordDispatchesRemoteLocalDiskCleanup(t *testing.T) {
executionService, _, tasks, _, records, _, _ := newExecutionTestServices(t)
ctx := context.Background()
nodeRepo := &nodeRepoStub{nodes: []model.Node{
{ID: 10, Name: "edge-a", Token: "edge-a-token", Status: model.NodeStatusOnline},
}}
dispatcher := &fakeDispatcher{}
executionService.SetClusterDependencies(nodeRepo, dispatcher)
task, err := tasks.FindByID(ctx, 1)
if err != nil {
t.Fatalf("FindByID task returned error: %v", err)
}
completedAt := time.Now().UTC()
record := &model.BackupRecord{
TaskID: task.ID,
StorageTargetID: task.StorageTargetID,
NodeID: 10,
Status: model.BackupRecordStatusSuccess,
FileName: "remote.tar.gz",
StoragePath: "file/2026/05/09/remote.tar.gz",
StartedAt: completedAt.Add(-time.Second),
CompletedAt: &completedAt,
}
if err := records.Create(ctx, record); err != nil {
t.Fatalf("Create record returned error: %v", err)
}
if err := executionService.DeleteRecord(ctx, record.ID); err != nil {
t.Fatalf("DeleteRecord returned error: %v", err)
}
deleted, err := records.FindByID(ctx, record.ID)
if err != nil {
t.Fatalf("FindByID record returned error: %v", err)
}
if deleted != nil {
t.Fatalf("expected record deleted, got %#v", deleted)
}
calls := dispatcher.snapshot()
if len(calls) != 1 {
t.Fatalf("expected one dispatcher call, got %#v", calls)
}
if calls[0].NodeID != 10 || calls[0].CmdType != model.AgentCommandTypeDeleteStorageObject {
t.Fatalf("unexpected dispatcher call: %#v", calls[0])
}
if calls[0].Payload["storagePath"] != record.StoragePath {
t.Fatalf("expected storagePath %q, got %#v", record.StoragePath, calls[0].Payload)
}
if calls[0].Payload["targetType"] != string(storage.ProviderTypeLocalDisk) {
t.Fatalf("expected local_disk targetType, got %#v", calls[0].Payload)
}
if _, ok := calls[0].Payload["targetConfig"].(map[string]any); !ok {
t.Fatalf("expected targetConfig map, got %#v", calls[0].Payload["targetConfig"])
}
}
func TestBackupExecutionServiceRestoreRecordRejectsRemoteLocalDisk(t *testing.T) {
executionService, _, tasks, _, records, _, _ := newExecutionTestServices(t)
ctx := context.Background()
executionService.SetClusterDependencies(&nodeRepoStub{nodes: []model.Node{
{ID: 10, Name: "edge-a", Token: "edge-a-token", Status: model.NodeStatusOnline},
}}, &fakeDispatcher{})
task, err := tasks.FindByID(ctx, 1)
if err != nil {
t.Fatalf("FindByID task returned error: %v", err)
}
completedAt := time.Now().UTC()
record := &model.BackupRecord{
TaskID: task.ID,
StorageTargetID: task.StorageTargetID,
NodeID: 10,
Status: model.BackupRecordStatusSuccess,
FileName: "remote.tar.gz",
StoragePath: "file/2026/05/09/remote.tar.gz",
StartedAt: completedAt.Add(-time.Second),
CompletedAt: &completedAt,
}
if err := records.Create(ctx, record); err != nil {
t.Fatalf("Create record returned error: %v", err)
}
err = executionService.RestoreRecord(ctx, record.ID)
if err == nil {
t.Fatal("expected remote local_disk restore to be rejected")
}
if !strings.Contains(err.Error(), "Master 无法跨节点访问") {
t.Fatalf("expected cross-node local_disk error, got %v", err)
}
}
func TestBackupExecutionServiceRecordsFirstSuccessfulStorageTarget(t *testing.T) {
executionService, _, tasks, targets, records, _, _ := newExecutionTestServices(t)
ctx := context.Background()
second := &testStorageProvider{name: "second", objects: map[string][]byte{}}
executionService.storageRegistry = storage.NewRegistry(&testStorageFactory{providers: map[string]*testStorageProvider{
"second": second,
}})
cipher := codec.NewConfigCipher("execution-secret")
firstConfig, err := cipher.EncryptJSON(map[string]any{"name": "missing"})
if err != nil {
t.Fatalf("EncryptJSON first returned error: %v", err)
}
secondConfig, err := cipher.EncryptJSON(map[string]any{"name": "second"})
if err != nil {
t.Fatalf("EncryptJSON second returned error: %v", err)
}
if err := targets.Create(ctx, &model.StorageTarget{Name: "first", Type: "test_storage", Enabled: true, ConfigCiphertext: firstConfig, ConfigVersion: 1, LastTestStatus: "unknown"}); err != nil {
t.Fatalf("Create first target returned error: %v", err)
}
if err := targets.Create(ctx, &model.StorageTarget{Name: "second", Type: "test_storage", Enabled: true, ConfigCiphertext: secondConfig, ConfigVersion: 1, LastTestStatus: "unknown"}); err != nil {
t.Fatalf("Create second target returned error: %v", err)
}
task, err := tasks.FindByID(ctx, 1)
if err != nil {
t.Fatalf("FindByID task returned error: %v", err)
}
task.StorageTargetID = 2
task.StorageTargets = []model.StorageTarget{{ID: 2}, {ID: 3}}
if err := tasks.Update(ctx, task); err != nil {
t.Fatalf("Update task returned error: %v", err)
}
detail, err := executionService.RunTaskByIDSync(ctx, 1)
if err != nil {
t.Fatalf("RunTaskByIDSync returned error: %v", err)
}
if detail.Status != model.BackupRecordStatusSuccess {
t.Fatalf("expected success, got %#v", detail)
}
storedRecord, err := records.FindByID(ctx, detail.ID)
if err != nil {
t.Fatalf("FindByID record returned error: %v", err)
}
if storedRecord.StorageTargetID != 3 {
t.Fatalf("expected record StorageTargetID to point at successful target 3, got %d", storedRecord.StorageTargetID)
}
if _, ok := second.objects[storedRecord.StoragePath]; !ok {
t.Fatalf("expected object in successful provider at %q", storedRecord.StoragePath)
}
}
func TestBackupExecutionServiceUploadRetryStopsWhenContextCancelled(t *testing.T) {
executionService, _, tasks, targets, records, _, _ := newExecutionTestServices(t)
ctx, cancel := context.WithCancel(context.Background())
var cancelOnce sync.Once
failing := &testStorageProvider{
name: "failing",
failUpload: true,
onUpload: func() {
cancelOnce.Do(cancel)
},
}
executionService.storageRegistry = storage.NewRegistry(&testStorageFactory{providers: map[string]*testStorageProvider{
"failing": failing,
}})
cipher := codec.NewConfigCipher("execution-secret")
failingConfig, err := cipher.EncryptJSON(map[string]any{"name": "failing"})
if err != nil {
t.Fatalf("EncryptJSON returned error: %v", err)
}
if err := targets.Update(ctx, &model.StorageTarget{
ID: 1,
Name: "local",
Type: "test_storage",
Enabled: true,
ConfigCiphertext: failingConfig,
ConfigVersion: 1,
LastTestStatus: "unknown",
}); err != nil {
t.Fatalf("Update target returned error: %v", err)
}
task, err := tasks.FindByID(ctx, 1)
if err != nil {
t.Fatalf("FindByID task returned error: %v", err)
}
startedAt := time.Now().UTC()
record := &model.BackupRecord{
TaskID: task.ID,
StorageTargetID: task.StorageTargetID,
Status: model.BackupRecordStatusRunning,
StartedAt: startedAt,
}
if err := records.Create(ctx, record); err != nil {
t.Fatalf("Create record returned error: %v", err)
}
done := make(chan struct{})
go func() {
executionService.executeTask(ctx, task, record.ID, startedAt)
close(done)
}()
select {
case <-done:
case <-time.After(2 * time.Second):
t.Fatal("expected cancelled upload retry to stop without waiting for backoff sleep")
}
}
func TestBackupExecutionServiceReadsStorageUsageOnceForMultiTargetQuotaChecks(t *testing.T) {
executionService, _, tasks, targets, records, _, _ := newExecutionTestServices(t)
ctx := context.Background()
first := &testStorageProvider{name: "first", objects: map[string][]byte{}}
second := &testStorageProvider{name: "second", objects: map[string][]byte{}}
executionService.storageRegistry = storage.NewRegistry(&testStorageFactory{providers: map[string]*testStorageProvider{
"first": first,
"second": second,
}})
cipher := codec.NewConfigCipher("execution-secret")
firstConfig, err := cipher.EncryptJSON(map[string]any{"name": "first"})
if err != nil {
t.Fatalf("EncryptJSON first returned error: %v", err)
}
secondConfig, err := cipher.EncryptJSON(map[string]any{"name": "second"})
if err != nil {
t.Fatalf("EncryptJSON second returned error: %v", err)
}
if err := targets.Update(ctx, &model.StorageTarget{ID: 1, Name: "local", Type: "test_storage", Enabled: true, ConfigCiphertext: firstConfig, ConfigVersion: 1, LastTestStatus: "unknown", QuotaBytes: 1 << 30}); err != nil {
t.Fatalf("Update first target returned error: %v", err)
}
if err := targets.Create(ctx, &model.StorageTarget{Name: "second", Type: "test_storage", Enabled: true, ConfigCiphertext: secondConfig, ConfigVersion: 1, LastTestStatus: "unknown", QuotaBytes: 1 << 30}); err != nil {
t.Fatalf("Create second target returned error: %v", err)
}
task, err := tasks.FindByID(ctx, 1)
if err != nil {
t.Fatalf("FindByID task returned error: %v", err)
}
task.StorageTargets = []model.StorageTarget{{ID: 1}, {ID: 2}}
if err := tasks.Update(ctx, task); err != nil {
t.Fatalf("Update task returned error: %v", err)
}
executionService.records = &storageUsageCountingRecordRepo{BackupRecordRepository: records}
detail, err := executionService.RunTaskByIDSync(ctx, task.ID)
if err != nil {
t.Fatalf("RunTaskByIDSync returned error: %v", err)
}
if detail.Status != model.BackupRecordStatusSuccess {
t.Fatalf("expected success, got %#v", detail)
}
countingRepo := executionService.records.(*storageUsageCountingRecordRepo)
if countingRepo.usageCalls != 1 {
t.Fatalf("expected StorageUsage to be called once for quota snapshot, got %d", countingRepo.usageCalls)
}
if len(first.objects) != 1 || len(second.objects) != 1 {
t.Fatalf("expected both targets to receive upload, got first=%d second=%d", len(first.objects), len(second.objects))
}
}
func TestBackupExecutionServiceContinuesWhenStorageUsageSnapshotFails(t *testing.T) {
executionService, _, _, targets, records, _, _ := newExecutionTestServices(t)
ctx := context.Background()
provider := &testStorageProvider{name: "primary", objects: map[string][]byte{}}
executionService.storageRegistry = storage.NewRegistry(&testStorageFactory{providers: map[string]*testStorageProvider{
"primary": provider,
}})
cipher := codec.NewConfigCipher("execution-secret")
configCiphertext, err := cipher.EncryptJSON(map[string]any{"name": "primary"})
if err != nil {
t.Fatalf("EncryptJSON returned error: %v", err)
}
if err := targets.Update(ctx, &model.StorageTarget{
ID: 1,
Name: "local",
Type: "test_storage",
Enabled: true,
ConfigCiphertext: configCiphertext,
ConfigVersion: 1,
LastTestStatus: "unknown",
QuotaBytes: 1 << 30,
}); err != nil {
t.Fatalf("Update target returned error: %v", err)
}
executionService.records = &storageUsageFailingRecordRepo{
BackupRecordRepository: records,
err: errStorageUsageFailed,
}
detail, err := executionService.RunTaskByIDSync(ctx, 1)
if err != nil {
t.Fatalf("RunTaskByIDSync returned error: %v", err)
}
if detail.Status != model.BackupRecordStatusSuccess {
t.Fatalf("expected success despite soft quota usage snapshot error, got %#v", detail)
}
if len(provider.objects) != 1 {
t.Fatalf("expected upload to proceed, got %d uploaded objects", len(provider.objects))
}
}
func TestBackupRecordServiceRestore(t *testing.T) {
executionService, recordService, _, _, _, sourceDir, _ := newExecutionTestServices(t)
detail, err := executionService.RunTaskByIDSync(context.Background(), 1)
@@ -546,27 +105,3 @@ func TestBackupRecordServiceRestore(t *testing.T) {
t.Fatalf("unexpected restored content: %s", string(content))
}
}
type storageUsageCountingRecordRepo struct {
repository.BackupRecordRepository
mu sync.Mutex
usageCalls int
}
func (r *storageUsageCountingRecordRepo) StorageUsage(ctx context.Context) ([]repository.BackupStorageUsageItem, error) {
r.mu.Lock()
r.usageCalls++
r.mu.Unlock()
return r.BackupRecordRepository.StorageUsage(ctx)
}
type storageUsageFailingRecordRepo struct {
repository.BackupRecordRepository
err error
}
func (r *storageUsageFailingRecordRepo) StorageUsage(context.Context) ([]repository.BackupStorageUsageItem, error) {
return nil, r.err
}
var errStorageUsageFailed = errors.New("storage usage failed")

View File

@@ -33,16 +33,16 @@ type BackupTaskUpsertInput struct {
DBPassword string `json:"dbPassword" binding:"max=255"`
DBName string `json:"dbName" binding:"max=255"`
DBPath string `json:"dbPath" binding:"max=500"`
StorageTargetID uint `json:"storageTargetId"` // deprecated: 向后兼容
StorageTargetIDs []uint `json:"storageTargetIds"` // 新增:多存储目标
NodeID uint `json:"nodeId"` // 执行节点0 = 本机 Master 或节点池)
StorageTargetID uint `json:"storageTargetId"` // deprecated: 向后兼容
StorageTargetIDs []uint `json:"storageTargetIds"` // 新增:多存储目标
NodeID uint `json:"nodeId"` // 执行节点0 = 本机 Master 或节点池)
// NodePoolTag 节点池标签。NodeID=0 且本字段非空时,调度器动态从 Labels 命中的在线节点中选负载最低者。
NodePoolTag string `json:"nodePoolTag" binding:"max=64"`
Tags string `json:"tags" binding:"max=500"` // 逗号分隔标签
RetentionDays int `json:"retentionDays"`
Compression string `json:"compression" binding:"omitempty,oneof=gzip none"`
Encrypt bool `json:"encrypt"`
MaxBackups int `json:"maxBackups"`
NodePoolTag string `json:"nodePoolTag" binding:"max=64"`
Tags string `json:"tags" binding:"max=500"` // 逗号分隔标签
RetentionDays int `json:"retentionDays"`
Compression string `json:"compression" binding:"omitempty,oneof=gzip none"`
Encrypt bool `json:"encrypt"`
MaxBackups int `json:"maxBackups"`
// ExtraConfig 类型特有扩展配置(如 SAP HANA 的 backupLevel/backupChannels
ExtraConfig map[string]any `json:"extraConfig"`
// 验证(恢复演练)配置
@@ -70,8 +70,8 @@ type BackupTaskSummary struct {
Type string `json:"type"`
Enabled bool `json:"enabled"`
CronExpr string `json:"cronExpr"`
StorageTargetID uint `json:"storageTargetId"` // deprecated: 取第一个
StorageTargetName string `json:"storageTargetName"` // deprecated: 取第一个
StorageTargetID uint `json:"storageTargetId"` // deprecated: 取第一个
StorageTargetName string `json:"storageTargetName"` // deprecated: 取第一个
StorageTargetIDs []uint `json:"storageTargetIds"`
StorageTargetNames []string `json:"storageTargetNames"`
NodeID uint `json:"nodeId"`
@@ -91,10 +91,10 @@ type BackupTaskSummary struct {
SLAHoursRPO int `json:"slaHoursRpo"`
AlertOnConsecutiveFails int `json:"alertOnConsecutiveFails"`
// 备份复制目标3-2-1
ReplicationTargetIDs []uint `json:"replicationTargetIds"`
MaintenanceWindows string `json:"maintenanceWindows"`
DependsOnTaskIDs []uint `json:"dependsOnTaskIds"`
UpdatedAt time.Time `json:"updatedAt"`
ReplicationTargetIDs []uint `json:"replicationTargetIds"`
MaintenanceWindows string `json:"maintenanceWindows"`
DependsOnTaskIDs []uint `json:"dependsOnTaskIds"`
UpdatedAt time.Time `json:"updatedAt"`
}
type BackupTaskDetail struct {
@@ -488,7 +488,6 @@ func (s *BackupTaskService) validateInput(ctx context.Context, existing *model.B
return apperror.BadRequest("BACKUP_STORAGE_TARGET_INVALID", fmt.Sprintf("关联的存储目标 %d 不存在", tid), nil)
}
}
var fixedNode *model.Node
if input.NodeID > 0 && s.nodes != nil {
node, err := s.nodes.FindByID(ctx, input.NodeID)
if err != nil {
@@ -497,17 +496,12 @@ func (s *BackupTaskService) validateInput(ctx context.Context, existing *model.B
if node == nil {
return apperror.BadRequest("BACKUP_TASK_INVALID", "所选执行节点不存在", nil)
}
fixedNode = node
}
// 节点池与固定节点互斥:固定节点已确定执行位置,不再动态调度
if input.NodeID > 0 && strings.TrimSpace(input.NodePoolTag) != "" {
return apperror.BadRequest("BACKUP_TASK_INVALID",
"固定执行节点与节点池标签只能选其一", nil)
}
if input.Encrypt && (strings.TrimSpace(input.NodePoolTag) != "" || (fixedNode != nil && !fixedNode.IsLocal)) {
return apperror.BadRequest("BACKUP_TASK_REMOTE_ENCRYPT_UNSUPPORTED",
"远程节点暂不支持加密备份。请关闭加密,或将任务固定在 Master 本机执行。", nil)
}
if input.RetentionDays < 0 {
return apperror.BadRequest("BACKUP_TASK_INVALID", "保留天数不能小于 0", nil)
}
@@ -645,38 +639,38 @@ func (s *BackupTaskService) buildTask(existing *model.BackupTask, input BackupTa
return nil, apperror.BadRequest("BACKUP_TASK_INVALID", "扩展配置格式不合法", err)
}
item := &model.BackupTask{
Name: strings.TrimSpace(input.Name),
Type: normalizeBackupTaskType(input.Type),
Enabled: input.Enabled,
CronExpr: strings.TrimSpace(input.CronExpr),
SourcePath: primarySourcePath,
SourcePaths: sourcePathsJSON,
ExcludePatterns: excludePatterns,
DBHost: strings.TrimSpace(input.DBHost),
DBPort: input.DBPort,
DBUser: strings.TrimSpace(input.DBUser),
DBPasswordCiphertext: passwordCiphertext,
DBName: strings.TrimSpace(input.DBName),
DBPath: strings.TrimSpace(input.DBPath),
ExtraConfig: extraConfigJSON,
StorageTargetID: primaryTargetID,
StorageTargets: storageTargets,
NodeID: input.NodeID,
NodePoolTag: strings.TrimSpace(input.NodePoolTag),
Tags: strings.TrimSpace(input.Tags),
RetentionDays: input.RetentionDays,
Compression: compression,
Encrypt: input.Encrypt,
MaxBackups: maxBackups,
LastStatus: "idle",
VerifyEnabled: input.VerifyEnabled,
VerifyCronExpr: strings.TrimSpace(input.VerifyCronExpr),
VerifyMode: normalizeVerifyMode(input.VerifyMode),
SLAHoursRPO: maxInt(0, input.SLAHoursRPO),
Name: strings.TrimSpace(input.Name),
Type: normalizeBackupTaskType(input.Type),
Enabled: input.Enabled,
CronExpr: strings.TrimSpace(input.CronExpr),
SourcePath: primarySourcePath,
SourcePaths: sourcePathsJSON,
ExcludePatterns: excludePatterns,
DBHost: strings.TrimSpace(input.DBHost),
DBPort: input.DBPort,
DBUser: strings.TrimSpace(input.DBUser),
DBPasswordCiphertext: passwordCiphertext,
DBName: strings.TrimSpace(input.DBName),
DBPath: strings.TrimSpace(input.DBPath),
ExtraConfig: extraConfigJSON,
StorageTargetID: primaryTargetID,
StorageTargets: storageTargets,
NodeID: input.NodeID,
NodePoolTag: strings.TrimSpace(input.NodePoolTag),
Tags: strings.TrimSpace(input.Tags),
RetentionDays: input.RetentionDays,
Compression: compression,
Encrypt: input.Encrypt,
MaxBackups: maxBackups,
LastStatus: "idle",
VerifyEnabled: input.VerifyEnabled,
VerifyCronExpr: strings.TrimSpace(input.VerifyCronExpr),
VerifyMode: normalizeVerifyMode(input.VerifyMode),
SLAHoursRPO: maxInt(0, input.SLAHoursRPO),
AlertOnConsecutiveFails: alertThreshold(input.AlertOnConsecutiveFails),
ReplicationTargetIDs: encodeUintCSV(input.ReplicationTargetIDs),
MaintenanceWindows: strings.TrimSpace(input.MaintenanceWindows),
DependsOnTaskIDs: encodeUintCSV(input.DependsOnTaskIDs),
ReplicationTargetIDs: encodeUintCSV(input.ReplicationTargetIDs),
MaintenanceWindows: strings.TrimSpace(input.MaintenanceWindows),
DependsOnTaskIDs: encodeUintCSV(input.DependsOnTaskIDs),
}
if existing != nil {
item.LastRunAt = existing.LastRunAt
@@ -742,25 +736,25 @@ func toBackupTaskSummary(item *model.BackupTask) BackupTaskSummary {
primaryName = targetNames[0]
}
return BackupTaskSummary{
ID: item.ID,
Name: item.Name,
Type: normalizeBackupTaskType(item.Type),
Enabled: item.Enabled,
CronExpr: item.CronExpr,
StorageTargetID: primaryID,
StorageTargetName: primaryName,
StorageTargetIDs: targetIDs,
StorageTargetNames: targetNames,
NodeID: item.NodeID,
NodeName: item.Node.Name,
NodePoolTag: item.NodePoolTag,
Tags: item.Tags,
RetentionDays: item.RetentionDays,
Compression: item.Compression,
Encrypt: item.Encrypt,
MaxBackups: item.MaxBackups,
LastRunAt: item.LastRunAt,
LastStatus: item.LastStatus,
ID: item.ID,
Name: item.Name,
Type: normalizeBackupTaskType(item.Type),
Enabled: item.Enabled,
CronExpr: item.CronExpr,
StorageTargetID: primaryID,
StorageTargetName: primaryName,
StorageTargetIDs: targetIDs,
StorageTargetNames: targetNames,
NodeID: item.NodeID,
NodeName: item.Node.Name,
NodePoolTag: item.NodePoolTag,
Tags: item.Tags,
RetentionDays: item.RetentionDays,
Compression: item.Compression,
Encrypt: item.Encrypt,
MaxBackups: item.MaxBackups,
LastRunAt: item.LastRunAt,
LastStatus: item.LastStatus,
VerifyEnabled: item.VerifyEnabled,
VerifyCronExpr: item.VerifyCronExpr,
VerifyMode: item.VerifyMode,
@@ -769,7 +763,7 @@ func toBackupTaskSummary(item *model.BackupTask) BackupTaskSummary {
ReplicationTargetIDs: parseUintCSV(item.ReplicationTargetIDs),
MaintenanceWindows: item.MaintenanceWindows,
DependsOnTaskIDs: parseUintCSV(item.DependsOnTaskIDs),
UpdatedAt: item.UpdatedAt,
UpdatedAt: item.UpdatedAt,
}
}

View File

@@ -3,7 +3,6 @@ package service
import (
"context"
"path/filepath"
"strings"
"testing"
"backupx/server/internal/config"
@@ -30,82 +29,6 @@ func newBackupTaskServiceForTest(t *testing.T) (*BackupTaskService, repository.S
return service, targets, tasks
}
func TestBackupTaskServiceRejectsEncryptedRemoteTasks(t *testing.T) {
ctx := context.Background()
service, targets, _ := newBackupTaskServiceForTest(t)
service.SetNodeRepository(&nodeRepoStub{nodes: []model.Node{
{ID: 41, Name: "master", Token: "master-token", Status: model.NodeStatusOnline, IsLocal: true},
{ID: 42, Name: "edge", Token: "edge-token", Status: model.NodeStatusOnline, IsLocal: false},
}})
if err := targets.Create(ctx, &model.StorageTarget{Name: "local", Type: "local_disk", Enabled: true, ConfigCiphertext: "ciphertext", ConfigVersion: 1, LastTestStatus: "unknown"}); err != nil {
t.Fatalf("seed storage target error: %v", err)
}
_, err := service.Create(ctx, BackupTaskUpsertInput{
Name: "encrypted-node-pool",
Type: "file",
Enabled: true,
SourcePath: "/srv/site",
StorageTargetID: 1,
NodePoolTag: "db",
RetentionDays: 30,
Compression: "gzip",
MaxBackups: 10,
Encrypt: true,
})
if err == nil || !strings.Contains(err.Error(), "远程节点暂不支持加密备份") {
t.Fatalf("expected encrypted node-pool task to be rejected, got %v", err)
}
created, err := service.Create(ctx, BackupTaskUpsertInput{
Name: "local-encrypted",
Type: "file",
Enabled: true,
SourcePath: "/srv/site",
StorageTargetID: 1,
RetentionDays: 30,
Compression: "gzip",
MaxBackups: 10,
Encrypt: true,
})
if err != nil {
t.Fatalf("Create local encrypted task returned error: %v", err)
}
localNodeTask, err := service.Create(ctx, BackupTaskUpsertInput{
Name: "local-node-encrypted",
Type: "file",
Enabled: true,
SourcePath: "/srv/site",
StorageTargetID: 1,
NodeID: 41,
RetentionDays: 30,
Compression: "gzip",
MaxBackups: 10,
Encrypt: true,
})
if err != nil {
t.Fatalf("Create encrypted task pinned to local node returned error: %v", err)
}
if localNodeTask.NodeID != 41 || !localNodeTask.Encrypt {
t.Fatalf("expected encrypted task to keep local node, got %#v", localNodeTask)
}
_, err = service.Update(ctx, created.ID, BackupTaskUpsertInput{
Name: created.Name,
Type: created.Type,
Enabled: true,
SourcePath: "/srv/site",
StorageTargetID: 1,
NodeID: 42,
RetentionDays: 30,
Compression: "gzip",
MaxBackups: 10,
Encrypt: true,
})
if err == nil || !strings.Contains(err.Error(), "远程节点暂不支持加密备份") {
t.Fatalf("expected encrypted fixed-node update to be rejected, got %v", err)
}
}
func TestBackupTaskServiceCreateAndGet(t *testing.T) {
ctx := context.Background()
service, targets, _ := newBackupTaskServiceForTest(t)

View File

@@ -3,14 +3,12 @@ package service
import (
"context"
"crypto/rand"
"encoding/base64"
"encoding/hex"
"fmt"
"strings"
"time"
"backupx/server/internal/apperror"
"backupx/server/internal/installscript"
"backupx/server/internal/model"
"backupx/server/internal/repository"
)
@@ -44,25 +42,6 @@ type InstallTokenOutput struct {
Record *model.AgentInstallToken
}
// InstallCommandInput 生成可展示安装命令所需的完整业务输入。
type InstallCommandInput struct {
InstallTokenInput
MasterURL string
}
// InstallCommandOutput 是 UI 生成安装命令所需的完整业务输出。
type InstallCommandOutput struct {
Token string
ExpiresAt time.Time
Node *model.Node
Record *model.AgentInstallToken
URL string
FallbackURL string
ComposeURL string
FallbackComposeURL string
ScriptBase64 string
}
// ConsumedInstallToken 消费成功后返回给 handler 的组合体。
type ConsumedInstallToken struct {
Record *model.AgentInstallToken
@@ -127,67 +106,6 @@ func (s *InstallTokenService) Create(ctx context.Context, in InstallTokenInput)
return &InstallTokenOutput{Token: token, ExpiresAt: expiresAt, Node: node, Record: record}, nil
}
// CreateCommand 创建 install token并返回 UI 展示安装命令所需的 URL 与嵌入式脚本。
func (s *InstallTokenService) CreateCommand(ctx context.Context, in InstallCommandInput) (*InstallCommandOutput, error) {
masterURL := strings.TrimRight(strings.TrimSpace(in.MasterURL), "/")
if masterURL == "" {
return nil, apperror.BadRequest("INSTALL_TOKEN_INVALID", "masterURL 必填", nil)
}
if err := s.validate(in.InstallTokenInput); err != nil {
return nil, err
}
node, err := s.nodeRepo.FindByID(ctx, in.NodeID)
if err != nil {
return nil, err
}
if node == nil {
return nil, apperror.New(404, "NODE_NOT_FOUND", "节点不存在", nil)
}
if _, err := renderInstallCommandScript(masterURL, node, &model.AgentInstallToken{
Mode: in.Mode,
Arch: in.Arch,
AgentVer: in.AgentVersion,
DownloadSrc: in.DownloadSrc,
}); err != nil {
return nil, err
}
out, err := s.Create(ctx, in.InstallTokenInput)
if err != nil {
return nil, err
}
script, err := renderInstallCommandScript(masterURL, out.Node, out.Record)
if err != nil {
return nil, err
}
result := &InstallCommandOutput{
Token: out.Token,
ExpiresAt: out.ExpiresAt,
Node: out.Node,
Record: out.Record,
URL: masterURL + "/api/install/" + out.Token,
FallbackURL: masterURL + "/install/" + out.Token,
ScriptBase64: base64.StdEncoding.EncodeToString([]byte(script)),
}
if out.Record.Mode == model.InstallModeDocker {
result.ComposeURL = masterURL + "/api/install/" + out.Token + "/compose.yml"
result.FallbackComposeURL = masterURL + "/install/" + out.Token + "/compose.yml"
}
return result, nil
}
func renderInstallCommandScript(masterURL string, node *model.Node, record *model.AgentInstallToken) (string, error) {
return installscript.RenderScript(installscript.Context{
MasterURL: masterURL,
AgentToken: node.Token,
AgentVersion: record.AgentVer,
Mode: record.Mode,
Arch: record.Arch,
DownloadBase: installscript.DownloadBaseFor(record.DownloadSrc),
InstallPrefix: "/opt/backupx-agent",
NodeID: node.ID,
})
}
// Consume 原子消费令牌。未命中/已过期/已消费均返回 (nil, nil)。
func (s *InstallTokenService) Consume(ctx context.Context, token string) (*ConsumedInstallToken, error) {
if strings.TrimSpace(token) == "" {
@@ -252,8 +170,8 @@ func (s *InstallTokenService) validate(in InstallTokenInput) error {
if !validInstallSources[in.DownloadSrc] {
return apperror.BadRequest("INSTALL_TOKEN_INVALID", "downloadSrc 非法", nil)
}
if err := validateInstallAgentVersion(in.AgentVersion); err != nil {
return err
if strings.TrimSpace(in.AgentVersion) == "" {
return apperror.BadRequest("INSTALL_TOKEN_INVALID", "agentVersion 必填", nil)
}
if in.TTLSeconds < InstallTokenMinTTL || in.TTLSeconds > InstallTokenMaxTTL {
return apperror.BadRequest("INSTALL_TOKEN_INVALID",
@@ -262,27 +180,6 @@ func (s *InstallTokenService) validate(in InstallTokenInput) error {
return nil
}
func validateInstallAgentVersion(v string) error {
v = strings.TrimSpace(v)
if v == "" {
return apperror.BadRequest("INSTALL_TOKEN_INVALID", "agentVersion 必填", nil)
}
if len(v) > 64 {
return apperror.BadRequest("INSTALL_TOKEN_INVALID", "agentVersion 不能超过 64 字符", nil)
}
for _, c := range v {
switch {
case c >= '0' && c <= '9':
case c >= 'a' && c <= 'z':
case c >= 'A' && c <= 'Z':
case c == '.' || c == '-' || c == '_' || c == '+':
default:
return apperror.BadRequest("INSTALL_TOKEN_INVALID", "agentVersion 包含非法字符", nil)
}
}
return nil
}
func generateInstallToken() (string, error) {
b := make([]byte, 32)
if _, err := rand.Read(b); err != nil {

View File

@@ -131,79 +131,6 @@ func TestInstallTokenServiceValidatesInput(t *testing.T) {
}
}
func TestInstallTokenServiceRejectsInvalidAgentVersionBeforeCreate(t *testing.T) {
db := openInstallTokenTestDB(t)
nodeRepo := repository.NewNodeRepository(db)
node := &model.Node{Name: "invalid-version", Token: "feedface"}
if err := nodeRepo.Create(context.Background(), node); err != nil {
t.Fatalf("create node: %v", err)
}
tokenRepo := repository.NewAgentInstallTokenRepository(db)
svc := NewInstallTokenService(tokenRepo, nodeRepo)
_, err := svc.Create(context.Background(), InstallTokenInput{
NodeID: node.ID,
Mode: model.InstallModeSystemd,
Arch: model.InstallArchAuto,
AgentVersion: "v1 && rm -rf /",
DownloadSrc: model.InstallSourceGitHub,
TTLSeconds: 900,
CreatedByID: 1,
})
if err == nil {
t.Fatalf("expected invalid version error")
}
count, err := tokenRepo.CountCreatedSince(context.Background(), node.ID, time.Now().UTC().Add(-time.Hour))
if err != nil {
t.Fatalf("count: %v", err)
}
if count != 0 {
t.Fatalf("invalid request created %d token records", count)
}
}
func TestInstallTokenServiceCreateCommandBuildsURLsAndScript(t *testing.T) {
db := openInstallTokenTestDB(t)
nodeRepo := repository.NewNodeRepository(db)
node := &model.Node{
Name: "command-node",
Token: "deadbeefcafebabe0123456789abcdef0123456789abcdef0123456789abcdef",
}
if err := nodeRepo.Create(context.Background(), node); err != nil {
t.Fatalf("create node: %v", err)
}
tokenRepo := repository.NewAgentInstallTokenRepository(db)
svc := NewInstallTokenService(tokenRepo, nodeRepo)
out, err := svc.CreateCommand(context.Background(), InstallCommandInput{
InstallTokenInput: InstallTokenInput{
NodeID: node.ID,
Mode: model.InstallModeDocker,
Arch: model.InstallArchAuto,
AgentVersion: "v1.7.0",
DownloadSrc: model.InstallSourceGitHub,
TTLSeconds: 900,
CreatedByID: 1,
},
MasterURL: "https://public.example.com/base",
})
if err != nil {
t.Fatalf("create command: %v", err)
}
if out.Token == "" || out.ScriptBase64 == "" {
t.Fatalf("missing token or script: %+v", out)
}
if out.URL != "https://public.example.com/base/api/install/"+out.Token {
t.Fatalf("bad url: %s", out.URL)
}
if out.FallbackURL != "https://public.example.com/base/install/"+out.Token {
t.Fatalf("bad fallback url: %s", out.FallbackURL)
}
if out.ComposeURL != "https://public.example.com/base/api/install/"+out.Token+"/compose.yml" {
t.Fatalf("bad compose url: %s", out.ComposeURL)
}
}
func TestInstallTokenServiceRateLimit(t *testing.T) {
db := openInstallTokenTestDB(t)
nodeRepo := repository.NewNodeRepository(db)

View File

@@ -36,19 +36,6 @@ type NodeSummary struct {
BandwidthLimit string `json:"bandwidthLimit"`
Labels string `json:"labels"`
CreatedAt time.Time `json:"createdAt"`
Queue NodeQueue `json:"queue"`
RunningTasks int `json:"runningTasks"`
LastError string `json:"lastError,omitempty"`
Health string `json:"health"`
}
type NodeQueue struct {
Pending int `json:"pending"`
Dispatched int `json:"dispatched"`
Depth int `json:"depth"`
Timeouts int `json:"timeouts"`
OldestActiveAt *time.Time `json:"oldestActiveAt,omitempty"`
OldestActiveAgeS int `json:"oldestActiveAgeSeconds"`
}
// NodeCreateInput is the input for creating a new remote node.
@@ -67,11 +54,10 @@ type NodeUpdateInput struct {
// NodeService manages the cluster nodes.
type NodeService struct {
repo repository.NodeRepository
taskRepo repository.BackupTaskRepository
agentRPC NodeAgentRPC
cmdRepo repository.AgentCommandRepository
version string
repo repository.NodeRepository
taskRepo repository.BackupTaskRepository
agentRPC NodeAgentRPC
version string
}
// NodeAgentRPC 抽象 Agent 远程调用能力(避免 service 内循环依赖)。
@@ -95,10 +81,6 @@ func (s *NodeService) SetAgentRPC(rpc NodeAgentRPC) {
s.agentRPC = rpc
}
func (s *NodeService) SetAgentCommandRepository(cmdRepo repository.AgentCommandRepository) {
s.cmdRepo = cmdRepo
}
// EnsureLocalNode creates the default "local" node if it does not exist.
func (s *NodeService) EnsureLocalNode(ctx context.Context) error {
existing, err := s.repo.FindLocal(ctx)
@@ -138,10 +120,24 @@ func (s *NodeService) List(ctx context.Context) ([]NodeSummary, error) {
if err != nil {
return nil, err
}
queueByNode := s.loadQueueSummaries(ctx)
result := make([]NodeSummary, len(nodes))
for i, n := range nodes {
result[i] = s.toNodeSummary(&n, queueByNode[n.ID])
result[i] = NodeSummary{
ID: n.ID,
Name: n.Name,
Hostname: n.Hostname,
IPAddress: n.IPAddress,
Status: n.Status,
IsLocal: n.IsLocal,
OS: n.OS,
Arch: n.Arch,
AgentVersion: n.AgentVer,
LastSeen: n.LastSeen,
MaxConcurrent: n.MaxConcurrent,
BandwidthLimit: n.BandwidthLimit,
Labels: n.Labels,
CreatedAt: n.CreatedAt,
}
}
return result, nil
}
@@ -154,24 +150,7 @@ func (s *NodeService) Get(ctx context.Context, id uint) (*NodeSummary, error) {
if node == nil {
return nil, apperror.New(http.StatusNotFound, "NODE_NOT_FOUND", "节点不存在", nil)
}
queueByNode := s.loadQueueSummaries(ctx)
summary := s.toNodeSummary(node, queueByNode[node.ID])
return &summary, nil
}
func (s *NodeService) loadQueueSummaries(ctx context.Context) map[uint]repository.AgentCommandQueueSummary {
if s.cmdRepo == nil {
return nil
}
summaries, err := s.cmdRepo.NodeQueueSummaries(ctx)
if err != nil {
return nil
}
return summaries
}
func (s *NodeService) toNodeSummary(node *model.Node, queue repository.AgentCommandQueueSummary) NodeSummary {
summary := NodeSummary{
return &NodeSummary{
ID: node.ID,
Name: node.Name,
Hostname: node.Hostname,
@@ -186,31 +165,7 @@ func (s *NodeService) toNodeSummary(node *model.Node, queue repository.AgentComm
BandwidthLimit: node.BandwidthLimit,
Labels: node.Labels,
CreatedAt: node.CreatedAt,
Queue: NodeQueue{
Pending: queue.Pending,
Dispatched: queue.Dispatched,
Depth: queue.Depth,
Timeouts: queue.Timeouts,
OldestActiveAt: queue.OldestActiveAt,
},
RunningTasks: queue.Running,
LastError: queue.LastError,
Health: nodeHealth(node, queue),
}
if queue.OldestActiveAt != nil {
summary.Queue.OldestActiveAgeS = int(time.Since(*queue.OldestActiveAt).Seconds())
}
return summary
}
func nodeHealth(node *model.Node, queue repository.AgentCommandQueueSummary) string {
if node.Status != model.NodeStatusOnline {
return "offline"
}
if queue.Timeouts > 0 || strings.TrimSpace(queue.LastError) != "" {
return "degraded"
}
return "healthy"
}, nil
}
// Create registers a new remote node and returns its authentication token.

View File

@@ -23,9 +23,6 @@ func openNodeServiceDB(t *testing.T) *gorm.DB {
if err := db.AutoMigrate(&model.Node{}); err != nil {
t.Fatalf("migrate: %v", err)
}
if err := db.AutoMigrate(&model.AgentCommand{}); err != nil {
t.Fatalf("migrate agent commands: %v", err)
}
return db
}
@@ -160,48 +157,3 @@ func TestRotateTokenNotFound(t *testing.T) {
t.Fatalf("expected not found error")
}
}
func TestNodeServiceListIncludesQueueHealthSummary(t *testing.T) {
db := openNodeServiceDB(t)
nodeRepo := repository.NewNodeRepository(db)
cmdRepo := repository.NewAgentCommandRepository(db)
svc := NewNodeService(nodeRepo, "test")
svc.SetAgentCommandRepository(cmdRepo)
ctx := context.Background()
node := &model.Node{
Name: "edge-a",
Token: "edge-token",
Status: model.NodeStatusOnline,
IsLocal: false,
LastSeen: time.Now().UTC(),
}
if err := nodeRepo.Create(ctx, node); err != nil {
t.Fatalf("Create node returned error: %v", err)
}
old := time.Now().UTC().Add(-time.Minute)
if err := cmdRepo.Create(ctx, &model.AgentCommand{NodeID: node.ID, Type: model.AgentCommandTypeRunTask, Status: model.AgentCommandStatusPending, CreatedAt: old}); err != nil {
t.Fatalf("Create pending command returned error: %v", err)
}
completedAt := time.Now().UTC()
if err := cmdRepo.Create(ctx, &model.AgentCommand{NodeID: node.ID, Type: model.AgentCommandTypeRunTask, Status: model.AgentCommandStatusTimeout, ErrorMessage: "agent timeout", CompletedAt: &completedAt}); err != nil {
t.Fatalf("Create timeout command returned error: %v", err)
}
items, err := svc.List(ctx)
if err != nil {
t.Fatalf("List returned error: %v", err)
}
if len(items) != 1 {
t.Fatalf("expected one node, got %#v", items)
}
got := items[0]
if got.Queue.Pending != 1 || got.Queue.Depth != 1 || got.Queue.Timeouts != 1 {
t.Fatalf("unexpected queue summary: %#v", got.Queue)
}
if got.Health != "degraded" || got.LastError != "agent timeout" {
t.Fatalf("expected terminal command errors to degrade healthy node, got %#v", got)
}
if got.Queue.OldestActiveAt == nil || got.Queue.OldestActiveAgeS <= 0 {
t.Fatalf("expected oldest active metadata, got %#v", got.Queue)
}
}

View File

@@ -141,11 +141,10 @@ func (s *RestoreService) Start(ctx context.Context, backupRecordID uint, trigger
}
startedAt := s.now()
restoreNodeID := s.resolveRestoreNodeID(record, task)
restore := &model.RestoreRecord{
BackupRecordID: backupRecordID,
TaskID: record.TaskID,
NodeID: restoreNodeID,
NodeID: task.NodeID,
Status: model.RestoreRecordStatusRunning,
StartedAt: startedAt,
TriggeredBy: strings.TrimSpace(triggeredBy),
@@ -155,7 +154,7 @@ func (s *RestoreService) Start(ctx context.Context, backupRecordID uint, trigger
}
// 远程节点路由
if remoteNode := s.resolveRemoteNode(ctx, restoreNodeID); remoteNode != nil {
if remoteNode := s.resolveRemoteNode(ctx, task.NodeID); remoteNode != nil {
if s.dispatcher == nil {
return nil, apperror.Internal("RESTORE_DISPATCH_UNAVAILABLE", "Agent 下发通道未就绪", nil)
}
@@ -167,14 +166,14 @@ func (s *RestoreService) Start(ctx context.Context, backupRecordID uint, trigger
s.logHub.Complete(restore.ID, model.RestoreRecordStatusFailed)
return nil, apperror.BadRequest("NODE_OFFLINE", offlineMsg, nil)
}
if _, dispatchErr := s.dispatcher.EnqueueCommand(ctx, restoreNodeID, model.AgentCommandTypeRestoreRecord, map[string]any{
if _, dispatchErr := s.dispatcher.EnqueueCommand(ctx, task.NodeID, model.AgentCommandTypeRestoreRecord, map[string]any{
"restoreRecordId": restore.ID,
}); dispatchErr != nil {
_ = s.finalize(ctx, restore.ID, model.RestoreRecordStatusFailed,
"下发恢复任务到远程节点失败: "+dispatchErr.Error())
return nil, apperror.Internal("AGENT_COMMAND_ENQUEUE_FAILED", "无法下发恢复任务到远程节点", dispatchErr)
}
s.logHub.Append(restore.ID, "info", fmt.Sprintf("已下发恢复任务到节点 %s#%d等待 Agent 执行", remoteNode.Name, restoreNodeID))
s.logHub.Append(restore.ID, "info", fmt.Sprintf("已下发恢复任务到节点 %s#%d等待 Agent 执行", remoteNode.Name, task.NodeID))
return s.getDetail(ctx, restore.ID)
}
@@ -186,16 +185,6 @@ func (s *RestoreService) Start(ctx context.Context, backupRecordID uint, trigger
return s.getDetail(ctx, restore.ID)
}
func (s *RestoreService) resolveRestoreNodeID(record *model.BackupRecord, task *model.BackupTask) uint {
if record != nil && record.NodeID != 0 {
return record.NodeID
}
if task != nil {
return task.NodeID
}
return 0
}
// isRemoteNode 判断 NodeID 是否指向有效的远程节点。
func (s *RestoreService) isRemoteNode(ctx context.Context, nodeID uint) bool {
return s.resolveRemoteNode(ctx, nodeID) != nil
@@ -640,9 +629,6 @@ func (s *RestoreService) UpdateAgentRestore(ctx context.Context, node *model.Nod
if restore.NodeID != node.ID {
return apperror.Unauthorized("RESTORE_RECORD_FORBIDDEN", "恢复记录不属于当前节点", nil)
}
if isRestoreRecordTerminal(restore.Status) {
return nil
}
// 追加日志到 LogHub + DB
if strings.TrimSpace(update.LogAppend) != "" {
for _, line := range strings.Split(update.LogAppend, "\n") {
@@ -681,10 +667,6 @@ func (s *RestoreService) UpdateAgentRestore(ctx context.Context, node *model.Nod
return nil
}
func isRestoreRecordTerminal(status string) bool {
return status == model.RestoreRecordStatusSuccess || status == model.RestoreRecordStatusFailed
}
// --- 内部辅助 ---
func (s *RestoreService) getDetail(ctx context.Context, restoreID uint) (*RestoreRecordDetail, error) {

View File

@@ -51,15 +51,15 @@ func (f *fakeDispatcher) snapshot() []dispatcherCall {
}
type restoreTestHarness struct {
service *RestoreService
execution *BackupExecutionService
records repository.BackupRecordRepository
restores repository.RestoreRecordRepository
tasks repository.BackupTaskRepository
nodes repository.NodeRepository
dispatcher *fakeDispatcher
sourceDir string
storageDir string
service *RestoreService
execution *BackupExecutionService
records repository.BackupRecordRepository
restores repository.RestoreRecordRepository
tasks repository.BackupTaskRepository
nodes repository.NodeRepository
dispatcher *fakeDispatcher
sourceDir string
storageDir string
}
func newRestoreTestHarness(t *testing.T, remoteNode bool) *restoreTestHarness {
@@ -228,179 +228,6 @@ func TestRestoreServiceStart_RemoteNodeEnqueuesCommand(t *testing.T) {
}
}
func TestRestoreServiceStart_UsesBackupRecordNodeForPooledTask(t *testing.T) {
h := newRestoreTestHarness(t, true)
ctx := context.Background()
task, err := h.tasks.FindByID(ctx, 1)
if err != nil {
t.Fatalf("FindByID task: %v", err)
}
remoteNodeID := task.NodeID
task.NodeID = 0
task.NodePoolTag = "db"
if err := h.tasks.Update(ctx, task); err != nil {
t.Fatalf("Update task: %v", err)
}
storedTask, err := h.tasks.FindByID(ctx, task.ID)
if err != nil {
t.Fatalf("FindByID stored task: %v", err)
}
if storedTask.NodeID != 0 {
t.Fatalf("expected stored task NodeID to be reset to 0, got %d", storedTask.NodeID)
}
startedAt := time.Now().UTC()
completedAt := startedAt.Add(time.Second)
backupRecord := &model.BackupRecord{
TaskID: task.ID,
StorageTargetID: task.StorageTargetID,
NodeID: remoteNodeID,
Status: model.BackupRecordStatusSuccess,
FileName: "pooled.tar.gz",
StoragePath: "file/2026/05/09/pooled.tar.gz",
StartedAt: startedAt,
CompletedAt: &completedAt,
}
if err := h.records.Create(ctx, backupRecord); err != nil {
t.Fatalf("Create backup record: %v", err)
}
detail, err := h.service.Start(ctx, backupRecord.ID, "tester-pool")
if err != nil {
t.Fatalf("Start: %v", err)
}
if detail.NodeID != remoteNodeID {
t.Fatalf("expected restore node %d, got %d", remoteNodeID, detail.NodeID)
}
calls := h.dispatcher.snapshot()
if len(calls) != 1 {
t.Fatalf("expected exactly 1 dispatcher call, got %d", len(calls))
}
if calls[0].NodeID != remoteNodeID {
t.Fatalf("expected dispatch to node %d, got %d", remoteNodeID, calls[0].NodeID)
}
}
func TestRestoreServiceAgentRestoreAccessUsesRestoreRecordNode(t *testing.T) {
h := newRestoreTestHarness(t, true)
ctx := context.Background()
task, err := h.tasks.FindByID(ctx, 1)
if err != nil {
t.Fatalf("FindByID task: %v", err)
}
owner, err := h.nodes.FindByID(ctx, task.NodeID)
if err != nil {
t.Fatalf("FindByID owner node: %v", err)
}
other := &model.Node{Name: "edge-2", Token: "other-token", Status: model.NodeStatusOnline, IsLocal: false, LastSeen: time.Now().UTC()}
if err := h.nodes.Create(ctx, other); err != nil {
t.Fatalf("Create other node: %v", err)
}
startedAt := time.Now().UTC()
completedAt := startedAt.Add(time.Second)
backupRecord := &model.BackupRecord{
TaskID: task.ID,
StorageTargetID: task.StorageTargetID,
NodeID: owner.ID,
Status: model.BackupRecordStatusSuccess,
FileName: "remote.tar.gz",
StoragePath: "file/2026/05/09/remote.tar.gz",
StartedAt: startedAt,
CompletedAt: &completedAt,
}
if err := h.records.Create(ctx, backupRecord); err != nil {
t.Fatalf("Create backup record: %v", err)
}
restore := &model.RestoreRecord{
BackupRecordID: backupRecord.ID,
TaskID: task.ID,
NodeID: owner.ID,
Status: model.RestoreRecordStatusRunning,
StartedAt: startedAt,
TriggeredBy: "agent-test",
}
if err := h.restores.Create(ctx, restore); err != nil {
t.Fatalf("Create restore record: %v", err)
}
spec, err := h.service.GetAgentRestoreSpec(ctx, owner, restore.ID)
if err != nil {
t.Fatalf("owner GetAgentRestoreSpec returned error: %v", err)
}
if spec.RestoreRecordID != restore.ID || spec.StoragePath != backupRecord.StoragePath {
t.Fatalf("unexpected restore spec: %#v", spec)
}
if _, err := h.service.GetAgentRestoreSpec(ctx, other, restore.ID); err == nil {
t.Fatal("expected non-owner node to be forbidden from restore spec")
}
if err := h.service.UpdateAgentRestore(ctx, owner, restore.ID, AgentRestoreUpdate{
Status: model.RestoreRecordStatusSuccess,
LogAppend: "done\n",
}); err != nil {
t.Fatalf("owner UpdateAgentRestore returned error: %v", err)
}
updated, err := h.restores.FindByID(ctx, restore.ID)
if err != nil {
t.Fatalf("FindByID restore returned error: %v", err)
}
if updated.Status != model.RestoreRecordStatusSuccess || updated.NodeID != owner.ID {
t.Fatalf("unexpected updated restore record: %#v", updated)
}
if err := h.service.UpdateAgentRestore(ctx, other, restore.ID, AgentRestoreUpdate{LogAppend: "bad\n"}); err == nil {
t.Fatal("expected non-owner node to be forbidden from restore update")
}
}
func TestRestoreServiceUpdateAgentRestoreDoesNotOverwriteTerminalRecord(t *testing.T) {
h := newRestoreTestHarness(t, true)
ctx := context.Background()
task, err := h.tasks.FindByID(ctx, 1)
if err != nil {
t.Fatalf("FindByID task: %v", err)
}
owner, err := h.nodes.FindByID(ctx, task.NodeID)
if err != nil {
t.Fatalf("FindByID owner node: %v", err)
}
startedAt := time.Now().UTC().Add(-time.Hour)
completedAt := time.Now().UTC().Add(-time.Minute)
restore := &model.RestoreRecord{
BackupRecordID: 1,
TaskID: task.ID,
NodeID: owner.ID,
Status: model.RestoreRecordStatusFailed,
ErrorMessage: "timeout",
StartedAt: startedAt,
CompletedAt: &completedAt,
TriggeredBy: "agent-test",
}
if err := h.restores.Create(ctx, restore); err != nil {
t.Fatalf("Create restore record: %v", err)
}
if err := h.service.UpdateAgentRestore(ctx, owner, restore.ID, AgentRestoreUpdate{
Status: model.RestoreRecordStatusSuccess,
ErrorMessage: "late success",
LogAppend: "late log\n",
}); err != nil {
t.Fatalf("UpdateAgentRestore returned error: %v", err)
}
updated, err := h.restores.FindByID(ctx, restore.ID)
if err != nil {
t.Fatalf("FindByID restore returned error: %v", err)
}
if updated.Status != model.RestoreRecordStatusFailed {
t.Fatalf("expected terminal restore status to remain failed, got %#v", updated)
}
if updated.ErrorMessage != "timeout" {
t.Fatalf("expected terminal restore error to remain unchanged, got %q", updated.ErrorMessage)
}
}
func TestRestoreServiceStart_FailsOnNonSuccessBackup(t *testing.T) {
h := newRestoreTestHarness(t, false)
ctx := context.Background()

View File

@@ -1,11 +1,12 @@
import React, { useEffect, useRef, useState } from 'react'
import { Modal, Steps, Button, Space, Message, Spin } from '@arco-design/web-react'
import { Modal, Steps, Button, Space, Message, Spin, Progress } from '@arco-design/web-react'
import { Step1NodeName, type Mode } from './wizard/Step1NodeName'
import { Step2DeployOptions, type DeployOptions } from './wizard/Step2DeployOptions'
import { Step3CommandPreview } from './wizard/Step3CommandPreview'
import { BatchCommandTable, type BatchCommandRow } from './BatchCommandTable'
import { batchCreateNodes, createInstallToken } from '../../services/nodes'
import type { InstallTokenResult } from '../../types/nodes'
import { useAgentDeployFlow, type AgentDeployRow } from './useAgentDeployFlow'
import { buildAgentInstallCommand } from './installCommands'
const Step = Steps.Step
@@ -24,7 +25,9 @@ export function AgentInstallWizard({ visible, onClose, onSuccess, masterVersion,
const [mode, setMode] = useState<Mode>('single')
const [singleName, setSingleName] = useState('')
const [batchText, setBatchText] = useState('')
const deployFlow = useAgentDeployFlow()
// 批量进度(已生成 / 总数)
const [batchProgress, setBatchProgress] = useState<{ done: number; total: number } | null>(null)
const [deploy, setDeploy] = useState<DeployOptions>({
mode: 'systemd',
@@ -63,6 +66,7 @@ export function AgentInstallWizard({ visible, onClose, onSuccess, masterVersion,
setSingleToken(null)
setSingleNodeInfo(null)
setBatchRows([])
setBatchProgress(null)
}
const handleClose = () => {
@@ -98,21 +102,71 @@ export function AgentInstallWizard({ visible, onClose, onSuccess, masterVersion,
Message.warning('请填写 Agent 版本号(形如 v1.7.0')
return
}
// 步骤 1 的批次内去重在前端先提示一次,再由后端最终校验
if (mode === 'batch' && !fixedNode) {
const names = parseBatchNames()
const seen = new Set<string>()
const dups: string[] = []
for (const n of names) {
if (seen.has(n)) dups.push(n)
seen.add(n)
}
if (dups.length > 0) {
Message.warning(`批次内有重复节点名:${Array.from(new Set(dups)).join(', ')}`)
return
}
}
setSubmitting(true)
try {
if (fixedNode) {
const result = await deployFlow.submitExistingNode(fixedNode, deploy)
applySingleOrTableResult(result.rows, fixedNode)
const tok = await createInstallToken(fixedNode.id, {
mode: deploy.mode,
arch: deploy.arch,
agentVersion: deploy.agentVersion,
downloadSrc: deploy.downloadSrc,
ttlSeconds: deploy.ttlSeconds,
})
setSingleNodeInfo(fixedNode)
setSingleToken(tok)
} else if (mode === 'single') {
const result = await deployFlow.submitNewNodes([singleName.trim()], deploy)
applySingleOrTableResult(result.rows)
const created = await batchCreateNodes([singleName.trim()])
const one = created[0]
const tok = await createInstallToken(one.id, {
mode: deploy.mode,
arch: deploy.arch,
agentVersion: deploy.agentVersion,
downloadSrc: deploy.downloadSrc,
ttlSeconds: deploy.ttlSeconds,
})
setSingleNodeInfo({ id: one.id, name: one.name })
setSingleToken(tok)
} else {
const names = parseBatchNames()
const result = await deployFlow.submitNewNodes(names, deploy)
if (mountedRef.current) setBatchRows(toBatchRows(result.rows))
if (result.status === 'partialFailed') {
Message.warning('部分节点安装命令生成失败,可在结果表中查看')
}
const created = await batchCreateNodes(names)
setBatchProgress({ done: 0, total: created.length })
// 并发生成 install tokenPromise.all每完成一个递增 done 计数
let done = 0
const tokens = await Promise.all(
created.map(async (c) => {
const tok = await createInstallToken(c.id, {
mode: deploy.mode,
arch: deploy.arch,
agentVersion: deploy.agentVersion,
downloadSrc: deploy.downloadSrc,
ttlSeconds: deploy.ttlSeconds,
})
done += 1
if (mountedRef.current) setBatchProgress({ done, total: created.length })
return { c, tok }
}),
)
const rows: BatchCommandRow[] = tokens.map(({ c, tok }) => ({
nodeId: c.id,
nodeName: c.name,
command: buildAgentInstallCommand(tok.url, tok.fallbackUrl, tok.scriptBase64),
expiresAt: tok.expiresAt,
}))
if (mountedRef.current) setBatchRows(rows)
}
setStep(2)
onSuccess()
@@ -127,12 +181,14 @@ export function AgentInstallWizard({ visible, onClose, onSuccess, masterVersion,
if (!singleNodeInfo) return
setSubmitting(true)
try {
const row = await deployFlow.regenerateNode(singleNodeInfo, deploy)
if (row.status === 'ready' && row.installToken) {
setSingleToken(row.installToken)
} else {
Message.error(row.errorMessage || '重新生成失败')
}
const tok = await createInstallToken(singleNodeInfo.id, {
mode: deploy.mode,
arch: deploy.arch,
agentVersion: deploy.agentVersion,
downloadSrc: deploy.downloadSrc,
ttlSeconds: deploy.ttlSeconds,
})
setSingleToken(tok)
} catch (e: any) {
Message.error(e?.message || '重新生成失败')
} finally {
@@ -140,25 +196,6 @@ export function AgentInstallWizard({ visible, onClose, onSuccess, masterVersion,
}
}
const retryBatchNode = async (row: BatchCommandRow) => {
setSubmitting(true)
try {
const next = await deployFlow.regenerateNode({ id: row.nodeId, name: row.nodeName }, deploy)
setBatchRows((rows) => rows.map((item) => (
item.nodeId === row.nodeId ? toBatchRows([next])[0] : item
)))
if (next.status === 'ready') {
Message.success(`节点「${row.nodeName}」安装命令已重新生成`)
} else {
Message.error(next.errorMessage || '重试失败')
}
} catch (e: any) {
Message.error(e?.message || '重试失败')
} finally {
setSubmitting(false)
}
}
const previewParams = {
mode: deploy.mode,
arch: deploy.arch,
@@ -188,6 +225,17 @@ export function AgentInstallWizard({ visible, onClose, onSuccess, masterVersion,
{submitting && (
<div style={{ textAlign: 'center', padding: 32 }}>
<Spin />
{batchProgress && (
<div style={{ marginTop: 16, maxWidth: 360, marginLeft: 'auto', marginRight: 'auto' }}>
<div style={{ fontSize: 13, marginBottom: 6 }}>
{batchProgress.done} / {batchProgress.total}
</div>
<Progress
percent={Math.round((batchProgress.done / batchProgress.total) * 100)}
showText
/>
</div>
)}
</div>
)}
@@ -241,7 +289,7 @@ export function AgentInstallWizard({ visible, onClose, onSuccess, masterVersion,
onRegenerate={regenerateSingle}
/>
)}
{batchRows.length > 0 && <BatchCommandTable rows={batchRows} onRetryNode={retryBatchNode} />}
{batchRows.length > 0 && <BatchCommandTable rows={batchRows} />}
<div style={{ marginTop: 24, textAlign: 'right' }}>
<Button type="primary" onClick={handleClose}>
@@ -251,31 +299,4 @@ export function AgentInstallWizard({ visible, onClose, onSuccess, masterVersion,
)}
</Modal>
)
function applySingleOrTableResult(rows: AgentDeployRow[], fallbackNode?: { id: number; name: string }) {
const row = rows[0]
if (!row) return
if (row.status === 'ready' && row.installToken) {
setSingleNodeInfo({ id: row.nodeId || fallbackNode?.id || 0, name: row.nodeName || fallbackNode?.name || '' })
setSingleToken(row.installToken)
setBatchRows([])
return
}
setSingleNodeInfo(null)
setSingleToken(null)
setBatchRows(toBatchRows(rows))
Message.error(row.errorMessage || '安装命令生成失败')
}
}
function toBatchRows(rows: AgentDeployRow[]): BatchCommandRow[] {
return rows.map((row) => ({
nodeId: row.nodeId,
nodeName: row.nodeName,
status: row.status,
command: row.command,
expiresAt: row.expiresAt,
errorMessage: row.errorMessage,
embeddedCommand: row.embeddedCommand,
}))
}

View File

@@ -1,30 +0,0 @@
import { describe, expect, it, vi } from 'vitest'
import type { BatchCommandRow } from './BatchCommandTable'
import { getExportableBatchRows } from './BatchCommandTable'
function row(patch: Partial<BatchCommandRow>): BatchCommandRow {
return {
nodeId: 1,
nodeName: 'prod-a',
status: 'ready',
command: 'curl install',
expiresAt: '2099-01-01T00:00:00Z',
...patch,
}
}
describe('getExportableBatchRows', () => {
it('excludes failed and expired commands from batch export', () => {
vi.useFakeTimers()
vi.setSystemTime(new Date('2026-05-09T00:00:00Z'))
const rows = [
row({ nodeId: 1, nodeName: 'ready', expiresAt: '2026-05-09T00:05:00Z' }),
row({ nodeId: 2, nodeName: 'failed', status: 'failed', errorMessage: 'failed' }),
row({ nodeId: 3, nodeName: 'expired', expiresAt: '2026-05-08T23:59:59Z' }),
]
expect(getExportableBatchRows(rows).map((item) => item.nodeName)).toEqual(['ready'])
vi.useRealTimers()
})
})

View File

@@ -1,32 +1,29 @@
import React, { useEffect, useState } from 'react'
import { Table, Button, Space, Message, Typography, Tag } from '@arco-design/web-react'
import { IconCopy, IconDownload, IconRefresh } from '@arco-design/web-react/icon'
import { Table, Button, Space, Message, Typography } from '@arco-design/web-react'
import { IconCopy, IconDownload } from '@arco-design/web-react/icon'
const { Text } = Typography
export interface BatchCommandRow {
nodeId: number
nodeName: string
status: 'ready' | 'failed'
command: string
expiresAt: string
errorMessage?: string
embeddedCommand?: string
}
interface Props {
rows: BatchCommandRow[]
onRetryNode?: (row: BatchCommandRow) => void
}
export function BatchCommandTable({ rows, onRetryNode }: Props) {
export function BatchCommandTable({ rows }: Props) {
const [remaining, setRemaining] = useState<Record<number, number>>({})
useEffect(() => {
const tick = () => {
const next: Record<number, number> = {}
rows.forEach((r) => {
next[r.nodeId] = secondsLeft(r.expiresAt)
const exp = new Date(r.expiresAt).getTime()
next[r.nodeId] = Math.max(0, Math.floor((exp - Date.now()) / 1000))
})
setRemaining(next)
}
@@ -41,13 +38,12 @@ export function BatchCommandTable({ rows, onRetryNode }: Props) {
}
const exportAll = () => {
const exportRows = getExportableBatchRows(rows)
const content = [
'#!/bin/sh',
'# BackupX Agent 批量部署脚本',
'# 使用方法:在目标机逐个执行下面对应节点命令',
'',
...exportRows.map((r) => `# --- ${r.nodeName} ---\n${r.command}`),
...rows.map((r) => `# --- ${r.nodeName} ---\n${r.command}`),
].join('\n\n')
const blob = new Blob([content], { type: 'text/x-shellscript' })
const url = URL.createObjectURL(blob)
@@ -65,20 +61,11 @@ export function BatchCommandTable({ rows, onRetryNode }: Props) {
pagination={false}
columns={[
{ title: '节点', dataIndex: 'nodeName', width: 140 },
{
title: '状态', dataIndex: 'status', width: 90,
render: (status: BatchCommandRow['status']) => (
status === 'ready' ? <Tag color="green"></Tag> : <Tag color="red"></Tag>
),
},
{
title: '安装命令',
dataIndex: 'command',
render: (cmd: unknown, row: BatchCommandRow) => {
const left = remaining[row.nodeId] ?? 0
if (row.status === 'failed') {
return <Text type="error" style={{ fontSize: 12 }}>{row.errorMessage || '生成安装命令失败'}</Text>
}
return (
<Text style={{
fontFamily: 'monospace', fontSize: 12, wordBreak: 'break-all',
@@ -93,9 +80,6 @@ export function BatchCommandTable({ rows, onRetryNode }: Props) {
title: '剩余', dataIndex: 'expiresAt', width: 90,
render: (_v: unknown, row: BatchCommandRow) => {
const left = remaining[row.nodeId] ?? 0
if (row.status === 'failed') {
return <Text type="secondary" style={{ fontSize: 12 }}>-</Text>
}
return (
<Text type={left === 0 ? 'secondary' : 'primary'} style={{ fontSize: 12 }}>
{left === 0 ? '已过期' : `${Math.floor(left / 60)}:${String(left % 60).padStart(2, '0')}`}
@@ -104,17 +88,10 @@ export function BatchCommandTable({ rows, onRetryNode }: Props) {
},
},
{
title: '操作', width: 110,
title: '操作', width: 80,
render: (_v: unknown, row: BatchCommandRow) => (
<Space>
{row.status === 'ready' && (
<Button size="small" icon={<IconCopy />} onClick={() => copy(row.command)}
disabled={(remaining[row.nodeId] ?? 0) === 0}></Button>
)}
{row.status === 'failed' && onRetryNode && (
<Button size="small" icon={<IconRefresh />} onClick={() => onRetryNode(row)}></Button>
)}
</Space>
<Button size="small" icon={<IconCopy />} onClick={() => copy(row.command)}
disabled={(remaining[row.nodeId] ?? 0) === 0}></Button>
),
},
]}
@@ -123,22 +100,9 @@ export function BatchCommandTable({ rows, onRetryNode }: Props) {
/>
<div style={{ marginTop: 12, textAlign: 'right' }}>
<Space>
<Button icon={<IconDownload />} onClick={exportAll}
disabled={getExportableBatchRows(rows).length === 0}> .sh</Button>
<Button icon={<IconDownload />} onClick={exportAll}> .sh</Button>
</Space>
</div>
</div>
)
}
function secondsLeft(expiresAt: string) {
if (!expiresAt) {
return 0
}
const exp = new Date(expiresAt).getTime()
return Math.max(0, Math.floor((exp - Date.now()) / 1000))
}
export function getExportableBatchRows(rows: BatchCommandRow[]) {
return rows.filter((row) => row.status === 'ready' && secondsLeft(row.expiresAt) > 0)
}

View File

@@ -1,77 +0,0 @@
import { describe, expect, it } from 'vitest'
import type { UserInfo } from '../../services/auth'
import { canManageNodes, formatQueueAge, getNodeHealthView } from './NodesPage'
import type { NodeSummary } from '../../types/nodes'
function user(role: string): UserInfo {
return {
id: 1,
username: role,
displayName: role,
role,
}
}
describe('canManageNodes', () => {
it('allows only admins to manage deployment operations', () => {
expect(canManageNodes(user('admin'))).toBe(true)
expect(canManageNodes(user('operator'))).toBe(false)
expect(canManageNodes(user('viewer'))).toBe(false)
expect(canManageNodes(null)).toBe(false)
})
})
describe('node diagnostics helpers', () => {
it('formats queue age and health status from backend summaries', () => {
const node: NodeSummary = {
id: 1,
name: 'edge-a',
hostname: '',
ipAddress: '',
status: 'online',
isLocal: false,
os: 'linux',
arch: 'amd64',
agentVersion: 'v1',
lastSeen: '2026-05-12T00:00:00Z',
createdAt: '2026-05-12T00:00:00Z',
health: 'degraded',
lastError: 'agent timeout',
runningTasks: 1,
queue: {
pending: 2,
dispatched: 1,
depth: 3,
timeouts: 1,
oldestActiveAgeSeconds: 125,
},
}
expect(formatQueueAge(node.queue?.oldestActiveAgeSeconds)).toBe('2m')
expect(getNodeHealthView(node)).toEqual({
text: '异常',
badgeStatus: 'warning',
tagColor: 'orangered',
tooltip: 'agent timeout',
})
})
it('treats offline nodes as offline even without queue errors', () => {
const node = {
id: 2,
name: 'edge-b',
hostname: '',
ipAddress: '',
status: 'offline',
isLocal: false,
os: '',
arch: '',
agentVersion: '',
lastSeen: '',
createdAt: '',
} satisfies NodeSummary
expect(formatQueueAge(0)).toBe('-')
expect(getNodeHealthView(node).text).toBe('离线')
})
})

View File

@@ -10,43 +10,12 @@ import type { NodeSummary } from '../../types/nodes'
import { listNodes, deleteNode, updateNode, rotateNodeToken } from '../../services/nodes'
import { fetchSystemInfo } from '../../services/system'
import { AgentInstallWizard } from './AgentInstallWizard'
import { useAuthStore } from '../../stores/auth'
import { isAdmin } from '../../utils/permissions'
import type { UserInfo } from '../../services/auth'
const { Text } = Typography
export function canManageNodes(user: UserInfo | null | undefined): boolean {
return isAdmin(user)
}
export function formatQueueAge(seconds?: number): string {
if (!seconds || seconds <= 0) return '-'
if (seconds < 60) return `${seconds}s`
if (seconds < 3600) return `${Math.floor(seconds / 60)}m`
return `${Math.floor(seconds / 3600)}h`
}
export function getNodeHealthView(node: NodeSummary) {
if (node.status !== 'online' || node.health === 'offline') {
return { text: '离线', badgeStatus: 'default' as const, tagColor: 'gray', tooltip: '节点未在线' }
}
if (node.health === 'degraded' || node.queue?.timeouts || node.lastError) {
return {
text: '异常',
badgeStatus: 'warning' as const,
tagColor: 'orangered',
tooltip: node.lastError || '存在超时或失败的 Agent 命令',
}
}
return { text: '健康', badgeStatus: 'success' as const, tagColor: 'green', tooltip: 'Agent 心跳与队列状态正常' }
}
export default function NodesPage() {
const [nodes, setNodes] = useState<NodeSummary[]>([])
const [loading, setLoading] = useState(false)
const currentUser = useAuthStore((state) => state.user)
const manageable = canManageNodes(currentUser)
const [wizardVisible, setWizardVisible] = useState(false)
const [wizardFixedNode, setWizardFixedNode] = useState<{ id: number; name: string } | undefined>()
@@ -144,18 +113,10 @@ export default function NodesPage() {
),
},
{
title: '健康', dataIndex: 'health', width: 150,
render: (_: string, record: NodeSummary) => {
const health = getNodeHealthView(record)
return (
<Tooltip content={health.tooltip}>
<Space size={6}>
<Badge status={health.badgeStatus} />
<Tag color={health.tagColor}>{health.text}</Tag>
</Space>
</Tooltip>
)
},
title: '状态', dataIndex: 'status', width: 100,
render: (status: string) => status === 'online'
? <Badge status="success" text="在线" />
: <Badge status="default" text="离线" />,
},
{ title: '主机名', dataIndex: 'hostname', render: (v: string) => v || '-' },
{ title: 'IP 地址', dataIndex: 'ipAddress', render: (v: string) => v || '-' },
@@ -168,27 +129,6 @@ export default function NodesPage() {
title: 'Agent 版本', dataIndex: 'agentVersion', width: 140,
render: (v: string) => renderAgentVersion(v, masterVersion),
},
{
title: '队列', dataIndex: 'queue', width: 160,
render: (_: unknown, record: NodeSummary) => {
const queue = record.queue
if (!queue || queue.depth === 0) {
return <Text type="secondary"></Text>
}
return (
<Tooltip content={`pending ${queue.pending} / dispatched ${queue.dispatched} / oldest ${formatQueueAge(queue.oldestActiveAgeSeconds)}`}>
<Space size={4}>
<Tag color="arcoblue"> {queue.depth}</Tag>
{queue.timeouts > 0 && <Tag color="orangered"> {queue.timeouts}</Tag>}
</Space>
</Tooltip>
)
},
},
{
title: '运行中', dataIndex: 'runningTasks', width: 90,
render: (v: number | undefined) => v && v > 0 ? <Tag color="green">{v}</Tag> : <Text type="secondary">0</Text>,
},
{
title: '标签 / 节点池', dataIndex: 'labels', width: 180,
render: (v: string) => {
@@ -203,43 +143,38 @@ export default function NodesPage() {
},
{
title: '操作', width: 180,
render: (_: unknown, record: NodeSummary) => {
if (!manageable) {
return <Text type="secondary">-</Text>
}
return (
<Space>
<Button type="text" icon={<IconEdit />} size="small"
onClick={() => {
setEditNode(record); setEditName(record.name)
setEditLabels(record.labels || '')
setEditMaxConcurrent(record.maxConcurrent || 0)
setEditBandwidthLimit(record.bandwidthLimit || '')
setEditVisible(true)
}} />
{!record.isLocal && (
<>
<Dropdown trigger="click" droplist={(
<Menu>
<Menu.Item key="install"
onClick={() => { setWizardFixedNode({ id: record.id, name: record.name }); setWizardVisible(true) }}>
</Menu.Item>
<Menu.Item key="rotate" onClick={() => handleRotate(record)}>
Token
</Menu.Item>
</Menu>
)}>
<Button type="text" icon={<IconMore />} size="small" />
</Dropdown>
<Popconfirm title="确定删除该节点?" onOk={() => handleDelete(record.id)}>
<Button type="text" status="danger" icon={<IconDelete />} size="small" />
</Popconfirm>
</>
)}
</Space>
)
},
render: (_: unknown, record: NodeSummary) => (
<Space>
<Button type="text" icon={<IconEdit />} size="small"
onClick={() => {
setEditNode(record); setEditName(record.name)
setEditLabels(record.labels || '')
setEditMaxConcurrent(record.maxConcurrent || 0)
setEditBandwidthLimit(record.bandwidthLimit || '')
setEditVisible(true)
}} />
{!record.isLocal && (
<>
<Dropdown trigger="click" droplist={(
<Menu>
<Menu.Item key="install"
onClick={() => { setWizardFixedNode({ id: record.id, name: record.name }); setWizardVisible(true) }}>
</Menu.Item>
<Menu.Item key="rotate" onClick={() => handleRotate(record)}>
Token
</Menu.Item>
</Menu>
)}>
<Button type="text" icon={<IconMore />} size="small" />
</Dropdown>
<Popconfirm title="确定删除该节点?" onOk={() => handleDelete(record.id)}>
<Button type="text" status="danger" icon={<IconDelete />} size="small" />
</Popconfirm>
</>
)}
</Space>
),
},
]
@@ -248,12 +183,12 @@ export default function NodesPage() {
<PageHeader
title="节点管理"
subTitle="管理集群中的服务器节点"
extra={manageable ? (
extra={
<Button type="primary" icon={<IconPlus />}
onClick={() => { setWizardFixedNode(undefined); setWizardVisible(true) }}>
</Button>
) : undefined}
}
/>
<Card style={{ marginTop: 16 }}>

View File

@@ -1,5 +1,5 @@
import { describe, expect, it } from 'vitest'
import { buildAgentDownloadCommand, buildAgentInstallCommand, buildEmbeddedAgentInstallCommand } from './installCommands'
import { buildAgentDownloadCommand, buildAgentInstallCommand } from './installCommands'
describe('install command builders', () => {
it('adds script marker validation and fallback install path', () => {
@@ -22,24 +22,16 @@ describe('install command builders', () => {
expect(cmd).toContain('non-script content')
})
it('keeps URL install command as primary even when embedded script is available', () => {
it('prefers embedded script content when available', () => {
const cmd = buildAgentInstallCommand(
'https://master.example.com/api/install/abc',
'https://master.example.com/install/abc',
'IyEvYmluL3NoCg==',
)
expect(cmd).toContain('https://master.example.com/api/install/abc')
expect(cmd).toContain('https://master.example.com/install/abc')
expect(cmd).not.toContain('IyEvYmluL3NoCg==')
})
it('builds embedded fallback command explicitly', () => {
const cmd = buildEmbeddedAgentInstallCommand('IyEvYmluL3NoCg==')
expect(cmd).toContain('base64 -d')
expect(cmd).toContain('base64 -D')
expect(cmd).toContain('BACKUPX_AGENT_INSTALL_V1')
expect(cmd).toContain("'IyEvYmluL3NoCg=='")
expect(cmd).not.toContain('https://master.example.com/api/install/abc')
})
})

View File

@@ -12,7 +12,19 @@ function runScriptCommand(path: string) {
return `if [ "$(id -u)" -eq 0 ]; then sh ${path}; else sudo sh ${path}; fi`
}
export function buildAgentInstallCommand(url: string, fallbackUrl?: string, _scriptBase64?: string) {
export function buildAgentInstallCommand(url: string, fallbackUrl?: string, scriptBase64?: string) {
if (scriptBase64?.trim()) {
const marker = shellQuote(INSTALL_MAGIC_MARKER)
return [
'enc=$(mktemp)',
'tmp=$(mktemp)',
`printf %s ${shellQuote(scriptBase64.trim())} > "$enc"`,
'(base64 -d < "$enc" > "$tmp" 2>/dev/null || base64 -D < "$enc" > "$tmp")',
`{ grep -q ${marker} "$tmp" || { echo 'BackupX embedded installer is invalid.' >&2; head -5 "$tmp" >&2; false; }; }`,
runScriptCommand('"$tmp"'),
].join(' && ') + '; rc=$?; rm -f "$enc" "$tmp"; test $rc -eq 0'
}
const primary = url.trim()
const fallback = (fallbackUrl || legacyInstallUrl(primary)).trim()
const urls = fallback && fallback !== primary ? [primary, fallback] : [primary]
@@ -29,7 +41,17 @@ export function buildAgentInstallCommand(url: string, fallbackUrl?: string, _scr
].join(' && ') + '; rc=$?; rm -f "$tmp"; test $rc -eq 0'
}
export function buildAgentDownloadCommand(url: string, fallbackUrl?: string, _scriptBase64?: string) {
export function buildAgentDownloadCommand(url: string, fallbackUrl?: string, scriptBase64?: string) {
if (scriptBase64?.trim()) {
const marker = shellQuote(INSTALL_MAGIC_MARKER)
return [
`printf %s ${shellQuote(scriptBase64.trim())} > /tmp/bx-agent-install.b64`,
'(base64 -d < /tmp/bx-agent-install.b64 > /tmp/bx-agent-install.sh 2>/dev/null || base64 -D < /tmp/bx-agent-install.b64 > /tmp/bx-agent-install.sh)',
`{ grep -q ${marker} /tmp/bx-agent-install.sh || { echo 'BackupX embedded installer is invalid.' >&2; head -5 /tmp/bx-agent-install.sh >&2; false; }; }`,
runScriptCommand('/tmp/bx-agent-install.sh'),
].join(' && ')
}
const primary = url.trim()
const fallback = (fallbackUrl || legacyInstallUrl(primary)).trim()
const marker = shellQuote(INSTALL_MAGIC_MARKER)
@@ -43,15 +65,3 @@ export function buildAgentDownloadCommand(url: string, fallbackUrl?: string, _sc
runScriptCommand('/tmp/bx-agent-install.sh'),
].join(' && ')
}
export function buildEmbeddedAgentInstallCommand(scriptBase64: string) {
const marker = shellQuote(INSTALL_MAGIC_MARKER)
return [
'enc=$(mktemp)',
'tmp=$(mktemp)',
`printf %s ${shellQuote(scriptBase64.trim())} > "$enc"`,
'(base64 -d < "$enc" > "$tmp" 2>/dev/null || base64 -D < "$enc" > "$tmp")',
`{ grep -q ${marker} "$tmp" || { echo 'BackupX embedded installer is invalid.' >&2; head -5 "$tmp" >&2; false; }; }`,
runScriptCommand('"$tmp"'),
].join(' && ') + '; rc=$?; rm -f "$enc" "$tmp"; test $rc -eq 0'
}

View File

@@ -1,90 +0,0 @@
import { describe, expect, it } from 'vitest'
import type { InstallTokenInput, InstallTokenResult } from '../../types/nodes'
import { createAgentDeployFlow } from './useAgentDeployFlow'
function deployOptions(): InstallTokenInput {
return {
mode: 'systemd',
arch: 'auto',
agentVersion: 'v2.3.1',
downloadSrc: 'github',
ttlSeconds: 900,
}
}
function tokenResult(overrides: Partial<InstallTokenResult> = {}): InstallTokenResult {
return {
installToken: 'install-token',
expiresAt: '2099-01-01T00:00:00Z',
url: 'https://master.example.com/api/install/install-token',
fallbackUrl: 'https://master.example.com/install/install-token',
scriptBase64: 'IyEvYmluL3NoCg==',
composeUrl: '',
fallbackComposeUrl: '',
...overrides,
}
}
describe('createAgentDeployFlow', () => {
it('creates one node then issues one install token', async () => {
const calls: string[] = []
const flow = createAgentDeployFlow({
batchCreateNodes: async (names) => {
calls.push(`batch:${names.join(',')}`)
return [{ id: 7, name: names[0] }]
},
createInstallToken: async (nodeId) => {
calls.push(`token:${nodeId}`)
return tokenResult()
},
})
const result = await flow.submitNewNodes(['prod-a'], deployOptions())
expect(calls).toEqual(['batch:prod-a', 'token:7'])
expect(result.status).toBe('ready')
expect(result.rows).toHaveLength(1)
expect(result.rows[0]).toMatchObject({
nodeId: 7,
nodeName: 'prod-a',
status: 'ready',
})
expect(result.rows[0].command).toContain('/api/install/install-token')
expect(result.rows[0].embeddedCommand).toContain('IyEvYmluL3NoCg==')
})
it('returns partialFailed when one batch token request fails', async () => {
const flow = createAgentDeployFlow({
batchCreateNodes: async (names) => names.map((name, index) => ({ id: index + 1, name })),
createInstallToken: async (nodeId) => {
if (nodeId === 2) {
throw new Error('token service unavailable')
}
return tokenResult({ installToken: `tok-${nodeId}`, url: `https://master.example.com/api/install/tok-${nodeId}` })
},
})
const result = await flow.submitNewNodes(['prod-a', 'prod-b', 'prod-c'], deployOptions())
expect(result.status).toBe('partialFailed')
expect(result.rows.map((row) => row.status)).toEqual(['ready', 'failed', 'ready'])
expect(result.rows[1]).toMatchObject({
nodeId: 2,
nodeName: 'prod-b',
status: 'failed',
errorMessage: 'token service unavailable',
})
})
it('rejects duplicate names before creating nodes', async () => {
const flow = createAgentDeployFlow({
batchCreateNodes: async () => {
throw new Error('should not call batchCreateNodes')
},
createInstallToken: async () => tokenResult(),
})
await expect(flow.submitNewNodes(['prod-a', ' prod-a '], deployOptions()))
.rejects.toThrow('批次内重复节点名')
})
})

View File

@@ -1,146 +0,0 @@
import { useMemo } from 'react'
import type { BatchCreateResult, InstallTokenInput, InstallTokenResult } from '../../types/nodes'
import { batchCreateNodes, createInstallToken } from '../../services/nodes'
import {
buildAgentInstallCommand,
buildEmbeddedAgentInstallCommand,
} from './installCommands'
export type DeployRowStatus = 'ready' | 'failed'
export type DeployResultStatus = 'ready' | 'partialFailed'
export interface AgentDeployNode {
id: number
name: string
}
export interface AgentDeployRow {
nodeId: number
nodeName: string
status: DeployRowStatus
command: string
expiresAt: string
installToken?: InstallTokenResult
embeddedCommand?: string
errorMessage?: string
}
export interface AgentDeployResult {
status: DeployResultStatus
rows: AgentDeployRow[]
}
interface AgentDeployFlowDeps {
batchCreateNodes: (names: string[]) => Promise<BatchCreateResult[]>
createInstallToken: (nodeId: number, input: InstallTokenInput) => Promise<InstallTokenResult>
}
const TOKEN_CONCURRENCY = 4
export function createAgentDeployFlow(deps: AgentDeployFlowDeps) {
const issueTokenForNode = async (node: AgentDeployNode, input: InstallTokenInput): Promise<AgentDeployRow> => {
try {
const token = await deps.createInstallToken(node.id, input)
return readyRow(node, token)
} catch (error) {
return {
nodeId: node.id,
nodeName: node.name,
status: 'failed',
command: '',
expiresAt: '',
errorMessage: resolveErrorMessage(error),
}
}
}
return {
async submitNewNodes(names: string[], input: InstallTokenInput): Promise<AgentDeployResult> {
const cleanedNames = normalizeNodeNames(names)
const nodes = await deps.batchCreateNodes(cleanedNames)
const rows = await mapWithConcurrency(nodes, TOKEN_CONCURRENCY, (node) => issueTokenForNode(node, input))
return resultFromRows(rows)
},
async submitExistingNode(node: AgentDeployNode, input: InstallTokenInput): Promise<AgentDeployResult> {
const row = await issueTokenForNode(node, input)
return resultFromRows([row])
},
async regenerateNode(node: AgentDeployNode, input: InstallTokenInput): Promise<AgentDeployRow> {
return issueTokenForNode(node, input)
},
}
}
export function useAgentDeployFlow() {
return useMemo(() => createAgentDeployFlow({ batchCreateNodes, createInstallToken }), [])
}
function readyRow(node: AgentDeployNode, token: InstallTokenResult): AgentDeployRow {
return {
nodeId: node.id,
nodeName: node.name,
status: 'ready',
command: buildAgentInstallCommand(token.url, token.fallbackUrl),
expiresAt: token.expiresAt,
installToken: token,
embeddedCommand: token.scriptBase64
? buildEmbeddedAgentInstallCommand(token.scriptBase64)
: undefined,
}
}
function resultFromRows(rows: AgentDeployRow[]): AgentDeployResult {
return {
status: rows.some((row) => row.status === 'failed') ? 'partialFailed' : 'ready',
rows,
}
}
function normalizeNodeNames(names: string[]) {
const cleaned = names.map((name) => name.trim()).filter(Boolean)
if (cleaned.length === 0) {
throw new Error('请至少输入一个节点名称')
}
if (cleaned.length > 50) {
throw new Error('单次最多创建 50 个节点')
}
const seen = new Set<string>()
for (const name of cleaned) {
if (seen.has(name)) {
throw new Error(`批次内重复节点名:${name}`)
}
seen.add(name)
}
return cleaned
}
async function mapWithConcurrency<T, R>(
items: T[],
concurrency: number,
mapper: (item: T, index: number) => Promise<R>,
): Promise<R[]> {
const results = new Array<R>(items.length)
let nextIndex = 0
const workerCount = Math.min(concurrency, items.length)
const workers = Array.from({ length: workerCount }, async () => {
for (;;) {
const index = nextIndex
nextIndex += 1
if (index >= items.length) {
return
}
results[index] = await mapper(items[index], index)
}
})
await Promise.all(workers)
return results
}
function resolveErrorMessage(error: unknown) {
if (error instanceof Error && error.message) {
return error.message
}
return '生成安装命令失败'
}

View File

@@ -3,7 +3,7 @@ import { Typography, Button, Space, Collapse, Spin, Message, Tag } from '@arco-d
import { IconCopy, IconRefresh } from '@arco-design/web-react/icon'
import { fetchScriptPreview } from '../../../services/nodes'
import type { InstallTokenResult, InstallMode } from '../../../types/nodes'
import { buildAgentDownloadCommand, buildAgentInstallCommand, buildEmbeddedAgentInstallCommand } from '../installCommands'
import { buildAgentDownloadCommand, buildAgentInstallCommand } from '../installCommands'
const { Text } = Typography
@@ -30,9 +30,8 @@ export function Step3CommandPreview({ nodeId, nodeName, token, mode, previewPara
}, [token.expiresAt])
const expired = remaining === 0
const command = buildAgentInstallCommand(token.url, token.fallbackUrl)
const fallbackCommand = buildAgentDownloadCommand(token.url, token.fallbackUrl)
const embeddedCommand = token.scriptBase64 ? buildEmbeddedAgentInstallCommand(token.scriptBase64) : null
const command = buildAgentInstallCommand(token.url, token.fallbackUrl, token.scriptBase64)
const fallbackCommand = buildAgentDownloadCommand(token.url, token.fallbackUrl, token.scriptBase64)
const dockerComposeCmd = mode === 'docker' && token.composeUrl
? `curl -fsSL ${token.composeUrl} -o docker-compose.yml && docker-compose up -d`
: null
@@ -108,22 +107,8 @@ export function Step3CommandPreview({ nodeId, nodeName, token, mode, previewPara
</div>
)}
{embeddedCommand && (
<div style={{ background: 'var(--color-fill-2)', padding: '12px 14px', borderRadius: 6, marginBottom: 12 }}>
<Text type="secondary" style={{ fontSize: 12, display: 'block', marginBottom: 4 }}>
使
</Text>
<Text style={{ fontFamily: 'monospace', fontSize: 13, wordBreak: 'break-all', userSelect: 'all' }}>
{embeddedCommand}
</Text>
<div style={{ marginTop: 8 }}>
<Button size="small" icon={<IconCopy />} onClick={() => copy(embeddedCommand)}></Button>
</div>
</div>
)}
<Text type="secondary" style={{ fontSize: 12, display: 'block', marginBottom: 8 }}>
install token TTL token
token TTL
</Text>
<Collapse bordered={false} onChange={(_key, keys) => {

View File

@@ -14,19 +14,6 @@ export interface NodeSummary {
/** CSV 节点标签;任务的 NodePoolTag 命中这里任一即会被调度到本节点 */
labels?: string
createdAt: string
queue?: NodeQueueSummary
runningTasks?: number
lastError?: string
health?: 'healthy' | 'degraded' | 'offline'
}
export interface NodeQueueSummary {
pending: number
dispatched: number
depth: number
timeouts: number
oldestActiveAt?: string
oldestActiveAgeSeconds?: number
}
export interface DirEntry {