mirror of
https://github.com/Awuqing/BackupX.git
synced 2026-06-25 03:23:41 +08:00
Compare commits
6 Commits
v2.3.5
...
feat/compl
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1e386e1205 | ||
|
|
6e7a884c64 | ||
|
|
0b2263086f | ||
|
|
2f494818cf | ||
|
|
7dfd12254b | ||
|
|
2997e971a6 |
5
.github/workflows/release.yml
vendored
5
.github/workflows/release.yml
vendored
@@ -116,15 +116,12 @@ jobs:
|
||||
fi
|
||||
cp deploy/nginx.conf "${ARCHIVE_NAME}/nginx.conf" 2>/dev/null || true
|
||||
tar czf "${ARCHIVE_NAME}.tar.gz" "${ARCHIVE_NAME}"
|
||||
cp "${ARCHIVE_NAME}.tar.gz" "backupx-${{ matrix.goos }}-${{ matrix.goarch }}.tar.gz"
|
||||
|
||||
- name: Upload to GitHub Release
|
||||
uses: softprops/action-gh-release@v2
|
||||
with:
|
||||
tag_name: ${{ env.VERSION }}
|
||||
files: |
|
||||
backupx-${{ env.VERSION }}-${{ matrix.goos }}-${{ matrix.goarch }}.tar.gz
|
||||
backupx-${{ matrix.goos }}-${{ matrix.goarch }}.tar.gz
|
||||
files: backupx-${{ env.VERSION }}-${{ matrix.goos }}-${{ matrix.goarch }}.tar.gz
|
||||
generate_release_notes: true
|
||||
|
||||
# ─── Job 3: Docker 多架构 → Docker Hub ───
|
||||
|
||||
@@ -62,8 +62,6 @@ curl -LO https://github.com/Awuqing/BackupX/releases/latest/download/backupx-lin
|
||||
tar xzf backupx-*.tar.gz && cd backupx-* && sudo ./install.sh
|
||||
```
|
||||
|
||||
For ARM64 hosts, use `backupx-linux-arm64.tar.gz`. The archive contains `backupx`, `web/`, `config.example.yaml`, and `install.sh`; run `install.sh` from the extracted directory.
|
||||
|
||||
Open `http://your-server:8340`, create the admin account, then follow the [5-minute Quick Start](https://awuqing.github.io/BackupX/docs/getting-started/quick-start).
|
||||
|
||||
## Documentation
|
||||
|
||||
@@ -62,8 +62,6 @@ curl -LO https://github.com/Awuqing/BackupX/releases/latest/download/backupx-lin
|
||||
tar xzf backupx-*.tar.gz && cd backupx-* && sudo ./install.sh
|
||||
```
|
||||
|
||||
ARM64 主机请下载 `backupx-linux-arm64.tar.gz`。预编译包内包含 `backupx`、`web/`、`config.example.yaml` 和 `install.sh`,请在解压后的目录内执行 `install.sh`。
|
||||
|
||||
打开 `http://your-server:8340`,创建管理员账户,按 [5 分钟快速开始](https://awuqing.github.io/BackupX/zh-Hans/docs/getting-started/quick-start) 完成首次备份。
|
||||
|
||||
## 文档
|
||||
|
||||
@@ -1,10 +1,6 @@
|
||||
#!/bin/sh
|
||||
set -e
|
||||
|
||||
if [ "${1:-}" = "agent" ]; then
|
||||
exec /app/bin/backupx "$@"
|
||||
fi
|
||||
|
||||
# Backend listens on internal port 8341, Nginx exposes 8340
|
||||
export BACKUPX_SERVER_PORT="${BACKUPX_SERVER_PORT_INTERNAL:-8341}"
|
||||
|
||||
|
||||
@@ -1,25 +1,17 @@
|
||||
#!/bin/sh
|
||||
set -eu
|
||||
|
||||
SCRIPT_DIR=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)
|
||||
PROJECT_ROOT=$(CDPATH= cd -- "$SCRIPT_DIR/.." && pwd)
|
||||
PROJECT_ROOT=$(CDPATH= cd -- "$(dirname -- "$0")/.." && pwd)
|
||||
PREFIX="${PREFIX:-/opt/backupx}"
|
||||
ETC_DIR="${ETC_DIR:-/etc/backupx}"
|
||||
SERVICE_NAME="backupx"
|
||||
APP_USER="backupx"
|
||||
APP_GROUP="backupx"
|
||||
if [ -f "$SCRIPT_DIR/backupx" ] && [ -d "$SCRIPT_DIR/web" ]; then
|
||||
BIN_SOURCE="${BIN_SOURCE:-$SCRIPT_DIR/backupx}"
|
||||
WEB_SOURCE="${WEB_SOURCE:-$SCRIPT_DIR/web}"
|
||||
CONFIG_TEMPLATE="${CONFIG_TEMPLATE:-$SCRIPT_DIR/config.example.yaml}"
|
||||
NGINX_SOURCE="${NGINX_SOURCE:-$SCRIPT_DIR/nginx.conf}"
|
||||
else
|
||||
BIN_SOURCE="${BIN_SOURCE:-$PROJECT_ROOT/server/backupx}"
|
||||
WEB_SOURCE="${WEB_SOURCE:-$PROJECT_ROOT/web/dist}"
|
||||
CONFIG_TEMPLATE="${CONFIG_TEMPLATE:-$PROJECT_ROOT/server/config.example.yaml}"
|
||||
NGINX_SOURCE="${NGINX_SOURCE:-$PROJECT_ROOT/deploy/nginx.conf}"
|
||||
fi
|
||||
BIN_SOURCE="${BIN_SOURCE:-$PROJECT_ROOT/server/backupx}"
|
||||
WEB_SOURCE="${WEB_SOURCE:-$PROJECT_ROOT/web/dist}"
|
||||
CONFIG_TEMPLATE="${CONFIG_TEMPLATE:-$PROJECT_ROOT/server/config.example.yaml}"
|
||||
SERVICE_SOURCE="${SERVICE_SOURCE:-$PROJECT_ROOT/deploy/backupx.service}"
|
||||
NGINX_SOURCE="${NGINX_SOURCE:-$PROJECT_ROOT/deploy/nginx.conf}"
|
||||
|
||||
if [ "$(id -u)" -ne 0 ]; then
|
||||
echo "请使用 root 或 sudo 执行安装脚本。" >&2
|
||||
@@ -28,20 +20,13 @@ fi
|
||||
|
||||
if [ ! -f "$BIN_SOURCE" ]; then
|
||||
echo "未找到后端二进制:$BIN_SOURCE" >&2
|
||||
echo "源码树安装请先执行:cd \"$PROJECT_ROOT/server\" && go build -o backupx ./cmd/backupx" >&2
|
||||
echo "发布包安装请确认当前目录包含 ./backupx、./web 和 ./install.sh。" >&2
|
||||
echo "请先执行:cd \"$PROJECT_ROOT/server\" && go build -o backupx ./cmd/backupx" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -d "$WEB_SOURCE" ]; then
|
||||
echo "未找到前端构建产物:$WEB_SOURCE" >&2
|
||||
echo "源码树安装请先执行:cd \"$PROJECT_ROOT/web\" && npm run build" >&2
|
||||
echo "发布包安装请确认当前目录包含 ./web。" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -f "$CONFIG_TEMPLATE" ]; then
|
||||
echo "未找到配置模板:$CONFIG_TEMPLATE" >&2
|
||||
echo "请先执行:cd \"$PROJECT_ROOT/web\" && npm run build" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
@@ -62,34 +47,11 @@ if [ ! -f "$ETC_DIR/config.yaml" ]; then
|
||||
install -m 0640 "$CONFIG_TEMPLATE" "$ETC_DIR/config.yaml"
|
||||
fi
|
||||
|
||||
if [ -f "$SERVICE_SOURCE" ]; then
|
||||
install -m 0644 "$SERVICE_SOURCE" "/etc/systemd/system/$SERVICE_NAME.service"
|
||||
else
|
||||
cat > "/etc/systemd/system/$SERVICE_NAME.service" <<UNIT
|
||||
[Unit]
|
||||
Description=BackupX API Service
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=$APP_USER
|
||||
Group=$APP_GROUP
|
||||
WorkingDirectory=$PREFIX
|
||||
ExecStart=$PREFIX/bin/backupx -config $ETC_DIR/config.yaml
|
||||
Restart=on-failure
|
||||
RestartSec=5
|
||||
NoNewPrivileges=true
|
||||
LimitNOFILE=65535
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
UNIT
|
||||
fi
|
||||
install -m 0644 "$SERVICE_SOURCE" "/etc/systemd/system/$SERVICE_NAME.service"
|
||||
systemctl daemon-reload
|
||||
systemctl enable --now "$SERVICE_NAME"
|
||||
|
||||
if [ -d "/etc/nginx/conf.d" ] && [ -f "$NGINX_SOURCE" ]; then
|
||||
if [ -d "/etc/nginx/conf.d" ]; then
|
||||
install -m 0644 "$NGINX_SOURCE" "/etc/nginx/conf.d/$SERVICE_NAME.conf"
|
||||
if command -v nginx >/dev/null 2>&1; then
|
||||
nginx -t
|
||||
|
||||
@@ -22,8 +22,6 @@ services:
|
||||
# - /home/user/data:/mnt/data:ro
|
||||
environment:
|
||||
- TZ=Asia/Shanghai
|
||||
# 远程 Agent 需要通过公网或可路由地址连接 Master 时,取消注释并改成真实 URL:
|
||||
# - BACKUPX_SERVER_EXTERNAL_URL=https://backup.example.com
|
||||
# 通过 BACKUPX_ 前缀环境变量覆盖配置:
|
||||
# - BACKUPX_LOG_LEVEL=debug
|
||||
# - BACKUPX_BACKUP_MAX_CONCURRENT=4
|
||||
|
||||
@@ -25,19 +25,6 @@ The installer performs these steps automatically:
|
||||
4. Installs `backupx.service` (systemd), enabled at boot
|
||||
5. (Optional) installs an Nginx site file — see [Nginx Reverse Proxy](./nginx)
|
||||
|
||||
For multi-node clusters, edit `/etc/backupx/config.yaml` after installation and set the Master URL that remote Agents can reach:
|
||||
|
||||
```yaml
|
||||
server:
|
||||
external_url: "https://backup.example.com"
|
||||
```
|
||||
|
||||
Restart BackupX after changing it:
|
||||
|
||||
```bash
|
||||
sudo systemctl restart backupx
|
||||
```
|
||||
|
||||
## From source
|
||||
|
||||
```bash
|
||||
|
||||
@@ -15,14 +15,13 @@ server:
|
||||
host: "0.0.0.0" # BACKUPX_SERVER_HOST
|
||||
port: 8340 # BACKUPX_SERVER_PORT
|
||||
mode: "release" # release | debug
|
||||
external_url: "" # BACKUPX_SERVER_EXTERNAL_URL — public Master URL for Agent install scripts
|
||||
|
||||
database:
|
||||
path: "./data/backupx.db" # BACKUPX_DATABASE_PATH — embedded SQLite
|
||||
|
||||
security:
|
||||
jwt_secret: "" # BACKUPX_SECURITY_JWT_SECRET — auto-generated if empty
|
||||
jwt_expire: "24h" # BACKUPX_SECURITY_JWT_EXPIRE
|
||||
jwt_expires_in: "24h"
|
||||
encryption_key: "" # AES-256-GCM key for storage config encryption
|
||||
|
||||
backup:
|
||||
@@ -47,20 +46,7 @@ The environment wins when both file and env are set. All dot-paths become unders
|
||||
| Config key | Env variable |
|
||||
|------------|--------------|
|
||||
| `server.port` | `BACKUPX_SERVER_PORT` |
|
||||
| `server.external_url` | `BACKUPX_SERVER_EXTERNAL_URL` |
|
||||
| `security.jwt_expire` | `BACKUPX_SECURITY_JWT_EXPIRE` |
|
||||
| `log.level` | `BACKUPX_LOG_LEVEL` |
|
||||
| `backup.max_concurrent` | `BACKUPX_BACKUP_MAX_CONCURRENT` |
|
||||
| `backup.temp_dir` | `BACKUPX_BACKUP_TEMP_DIR` |
|
||||
| `backup.bandwidth_limit` | `BACKUPX_BACKUP_BANDWIDTH_LIMIT` |
|
||||
|
||||
## Master external URL
|
||||
|
||||
Set `server.external_url` when BackupX is behind Docker, Nginx, a load balancer, or any reverse proxy whose internal Host is not reachable by remote Agents:
|
||||
|
||||
```yaml
|
||||
server:
|
||||
external_url: "https://backup.example.com"
|
||||
```
|
||||
|
||||
This value is used when BackupX renders one-click Agent install scripts and docker-compose snippets. It must be reachable from every Agent host. Leave it empty only when `X-Forwarded-Proto` / `X-Forwarded-Host` are reliable and point to the same URL that Agents can access.
|
||||
|
||||
@@ -25,8 +25,6 @@ services:
|
||||
- /etc/nginx:/mnt/nginx-conf:ro
|
||||
environment:
|
||||
- TZ=Asia/Shanghai
|
||||
# Required when remote Agents must connect through a public or routed URL:
|
||||
# - BACKUPX_SERVER_EXTERNAL_URL=https://backup.example.com
|
||||
- BACKUPX_LOG_LEVEL=info
|
||||
- BACKUPX_BACKUP_MAX_CONCURRENT=2
|
||||
|
||||
@@ -44,17 +42,6 @@ docker compose up -d
|
||||
|
||||
To back up files from the host, mount them into the container. When creating a file-type task in the web UI, point the source path at the mount location (e.g. `/mnt/www`). Make sure the directory is visible inside the container.
|
||||
|
||||
## Multi-node clusters
|
||||
|
||||
When deploying Agents on other machines, set `BACKUPX_SERVER_EXTERNAL_URL` on the Master container to the URL that those Agents can reach:
|
||||
|
||||
```yaml
|
||||
environment:
|
||||
- BACKUPX_SERVER_EXTERNAL_URL=https://backup.example.com
|
||||
```
|
||||
|
||||
Use an HTTPS URL if Agents cross untrusted networks. The generated one-click install scripts and docker-compose snippets use this value as `BACKUPX_AGENT_MASTER`.
|
||||
|
||||
## Environment variables
|
||||
|
||||
All configuration keys can be overridden with the `BACKUPX_` prefix:
|
||||
|
||||
@@ -8,8 +8,6 @@ description: File, MySQL, PostgreSQL, SQLite and SAP HANA — what they back up
|
||||
|
||||
BackupX supports five built-in backup types. Type determines which runner executes the job.
|
||||
|
||||
When a task is routed to a remote Agent, the source tools and paths are resolved on that Agent host. Multi-target uploads are still tracked per storage target; if at least one target succeeds, the backup record is marked successful and the per-target result table shows partial failures.
|
||||
|
||||
## File / Directory
|
||||
|
||||
Tars (and optionally gzips) one or more filesystem paths.
|
||||
|
||||
@@ -28,19 +28,6 @@ BackupX supports Master-Agent mode: backup tasks can be routed to specific nodes
|
||||
|
||||
## Walkthrough
|
||||
|
||||
### 0. Set the Master URL for production clusters
|
||||
|
||||
Before generating Agent install commands, make sure the Master URL shown to Agents is stable and reachable from every target host.
|
||||
|
||||
If BackupX runs behind Docker, Nginx, a load balancer, or an outer reverse proxy, configure `server.external_url` or `BACKUPX_SERVER_EXTERNAL_URL` on the Master:
|
||||
|
||||
```yaml title="config.yaml"
|
||||
server:
|
||||
external_url: "https://backup.example.com"
|
||||
```
|
||||
|
||||
This URL is baked into systemd units, foreground commands, and docker-compose snippets. If it is wrong, Agents will install successfully but stay offline because they keep polling an internal or browser-only address.
|
||||
|
||||
### 1. Open the install wizard
|
||||
|
||||
In the Web Console → **Node Management** → **Add Node**. You'll see a three-step wizard.
|
||||
@@ -62,8 +49,6 @@ The script runs automatically and:
|
||||
5. Runs `systemctl enable --now backupx-agent`
|
||||
6. Polls `/api/v1/agent/self` until the master confirms `status: online` (up to 30 s)
|
||||
|
||||
Docker mode uses the same `BACKUPX_AGENT_MASTER`, `BACKUPX_AGENT_TOKEN`, and `BACKUPX_AGENT_TEMP_DIR=/var/lib/backupx-agent/tmp` environment contract. After starting the container, the installer also probes `/api/v1/agent/self`; if the node does not come online, it prints `docker ps` and `docker logs --tail=100 backupx-agent` diagnostics before exiting non-zero.
|
||||
|
||||
If you choose the URL-based fallback command and `curl` prints HTML or the shell reports `Syntax error: newline unexpected`, the install URL is being served by the web console instead of the backend. Ensure either `/api/install/` or `/install/` is forwarded to the BackupX backend, or use the embedded command generated by the console.
|
||||
|
||||
Reruns are idempotent — to upgrade or re-provision, simply generate a new install command and run it again. The one-time install link expires after its TTL or after first consumption, whichever is sooner.
|
||||
@@ -83,15 +68,9 @@ In the **Backup Tasks** page, pick the target node when creating the task. When
|
||||
- Local (`nodeId=0`) → Master executes in-process
|
||||
- Remote node → Master enqueues the command → Agent claims → Agent runs locally → uploads → reports back
|
||||
|
||||
The node table shows the Agent health and command queue state: pending/dispatched depth, running long commands, timeouts, oldest active command age, and the latest Agent-side error. The same queue depth, running-command, and timeout snapshots are exported as Prometheus metrics:
|
||||
|
||||
- `backupx_agent_command_queue_depth`
|
||||
- `backupx_agent_command_running`
|
||||
- `backupx_agent_command_timeout_total`
|
||||
|
||||
## Known limitations
|
||||
|
||||
- **Encrypted backups are Master-only** — the Agent doesn't hold Master's AES-256 key. Creating or updating a task with `encrypt: true` and a remote node or node pool is rejected up front
|
||||
- **Encrypted backups don't work via Agent** — the Agent doesn't hold Master's AES-256 key. Tasks with `encrypt: true` will fail if routed to an Agent
|
||||
- **Directory browser timeout** — remote dir listing is a synchronous RPC through the queue (15s default)
|
||||
- **Dispatched command timeout** — claimed-but-unfinished commands are marked `timeout` after 10 minutes
|
||||
|
||||
|
||||
@@ -42,8 +42,6 @@ Go to **Backup Tasks → New**. Three steps:
|
||||
2. **Source** — paths for file backup (multi-source supported), or connection info for databases
|
||||
3. **Storage & policy** — pick target(s), compression, retention days, encryption on/off
|
||||
|
||||
For Agent-routed tasks, encryption must stay off because the Agent never receives the Master's encryption key. BackupX rejects remote-node or node-pool tasks with encryption enabled during create/update.
|
||||
|
||||
Save, then click **Run Now** to trigger a test. Live logs stream on the **Backup Records** page.
|
||||
|
||||
:::note
|
||||
|
||||
@@ -25,19 +25,6 @@ sudo ./install.sh
|
||||
4. 安装并启用 `backupx.service` systemd 单元
|
||||
5. (可选)生成 Nginx 站点配置 — 参见 [Nginx 反向代理](./nginx)
|
||||
|
||||
如果要部署多节点集群,安装后请编辑 `/etc/backupx/config.yaml`,设置远程 Agent 可访问到的 Master URL:
|
||||
|
||||
```yaml
|
||||
server:
|
||||
external_url: "https://backup.example.com"
|
||||
```
|
||||
|
||||
修改后重启 BackupX:
|
||||
|
||||
```bash
|
||||
sudo systemctl restart backupx
|
||||
```
|
||||
|
||||
## 从源码构建
|
||||
|
||||
```bash
|
||||
|
||||
@@ -15,14 +15,13 @@ server:
|
||||
host: "0.0.0.0" # BACKUPX_SERVER_HOST
|
||||
port: 8340 # BACKUPX_SERVER_PORT
|
||||
mode: "release" # release | debug
|
||||
external_url: "" # BACKUPX_SERVER_EXTERNAL_URL — Agent 安装脚本使用的 Master 对外 URL
|
||||
|
||||
database:
|
||||
path: "./data/backupx.db" # BACKUPX_DATABASE_PATH — 内嵌 SQLite
|
||||
|
||||
security:
|
||||
jwt_secret: "" # BACKUPX_SECURITY_JWT_SECRET — 留空自动生成
|
||||
jwt_expire: "24h" # BACKUPX_SECURITY_JWT_EXPIRE
|
||||
jwt_expires_in: "24h"
|
||||
encryption_key: "" # 用于加密存储配置的 AES-256-GCM 密钥
|
||||
|
||||
backup:
|
||||
@@ -47,20 +46,7 @@ log:
|
||||
| 配置项 | 环境变量 |
|
||||
|--------|----------|
|
||||
| `server.port` | `BACKUPX_SERVER_PORT` |
|
||||
| `server.external_url` | `BACKUPX_SERVER_EXTERNAL_URL` |
|
||||
| `security.jwt_expire` | `BACKUPX_SECURITY_JWT_EXPIRE` |
|
||||
| `log.level` | `BACKUPX_LOG_LEVEL` |
|
||||
| `backup.max_concurrent` | `BACKUPX_BACKUP_MAX_CONCURRENT` |
|
||||
| `backup.temp_dir` | `BACKUPX_BACKUP_TEMP_DIR` |
|
||||
| `backup.bandwidth_limit` | `BACKUPX_BACKUP_BANDWIDTH_LIMIT` |
|
||||
|
||||
## Master 对外 URL
|
||||
|
||||
当 BackupX 部署在 Docker、Nginx、负载均衡或多层反向代理后面,且后端收到的内部 Host 不是远程 Agent 可访问地址时,请配置 `server.external_url`:
|
||||
|
||||
```yaml
|
||||
server:
|
||||
external_url: "https://backup.example.com"
|
||||
```
|
||||
|
||||
BackupX 会用这个地址渲染一键 Agent 安装脚本和 docker-compose 片段。该地址必须能被所有 Agent 主机访问。只有在 `X-Forwarded-Proto` / `X-Forwarded-Host` 可靠且正好指向 Agent 可访问地址时,才建议留空。
|
||||
|
||||
@@ -25,8 +25,6 @@ services:
|
||||
- /etc/nginx:/mnt/nginx-conf:ro
|
||||
environment:
|
||||
- TZ=Asia/Shanghai
|
||||
# 远程 Agent 需要通过公网或可路由地址连接 Master 时必须配置:
|
||||
# - BACKUPX_SERVER_EXTERNAL_URL=https://backup.example.com
|
||||
- BACKUPX_LOG_LEVEL=info
|
||||
- BACKUPX_BACKUP_MAX_CONCURRENT=2
|
||||
|
||||
@@ -44,17 +42,6 @@ docker compose up -d
|
||||
|
||||
想备份宿主机上的文件,需要将对应路径挂载进容器。在 Web UI 创建文件类型任务时,把源路径指向挂载后的容器内路径(如 `/mnt/www`)。
|
||||
|
||||
## 多节点集群
|
||||
|
||||
如果要在其他机器部署 Agent,请在 Master 容器上设置 `BACKUPX_SERVER_EXTERNAL_URL`,值为所有 Agent 都能访问到的 URL:
|
||||
|
||||
```yaml
|
||||
environment:
|
||||
- BACKUPX_SERVER_EXTERNAL_URL=https://backup.example.com
|
||||
```
|
||||
|
||||
Agent 跨不可信网络访问时建议使用 HTTPS。控制台生成的一键安装脚本和 docker-compose 片段会把这个值写成 `BACKUPX_AGENT_MASTER`。
|
||||
|
||||
## 环境变量
|
||||
|
||||
所有配置项都可以通过 `BACKUPX_` 前缀环境变量覆盖:
|
||||
|
||||
@@ -8,8 +8,6 @@ description: 文件、MySQL、PostgreSQL、SQLite 和 SAP HANA — 各自的能
|
||||
|
||||
BackupX 支持五种内置备份类型,类型决定了用哪个 runner 执行。
|
||||
|
||||
当任务路由到远程 Agent 时,源路径和外部工具都会在该 Agent 主机上解析。多存储目标上传仍会逐目标记录结果;只要至少一个目标上传成功,备份记录即为成功,详情中的目标结果表会展示部分失败。
|
||||
|
||||
## 文件 / 目录
|
||||
|
||||
打包(可选 gzip)一个或多个文件系统路径。
|
||||
|
||||
@@ -28,19 +28,6 @@ BackupX 支持 Master-Agent 模式:备份任务可以指定在哪个节点执
|
||||
|
||||
## 一键部署步骤
|
||||
|
||||
### 0. 为生产集群设置 Master 对外 URL
|
||||
|
||||
生成 Agent 安装命令前,请先确认 Master URL 对所有目标主机稳定可达。
|
||||
|
||||
如果 BackupX 部署在 Docker、Nginx、负载均衡或外层反向代理后面,请在 Master 配置 `server.external_url` 或环境变量 `BACKUPX_SERVER_EXTERNAL_URL`:
|
||||
|
||||
```yaml title="config.yaml"
|
||||
server:
|
||||
external_url: "https://backup.example.com"
|
||||
```
|
||||
|
||||
该 URL 会写入 systemd 单元、前台运行命令和 docker-compose 片段。如果地址不正确,Agent 可能安装成功但始终离线,因为它会持续轮询一个内网地址或仅浏览器可访问的地址。
|
||||
|
||||
### 1. 打开安装向导
|
||||
|
||||
Web 控制台 → **节点管理** → **添加节点**,打开三步向导:
|
||||
@@ -62,8 +49,6 @@ Web 控制台 → **节点管理** → **添加节点**,打开三步向导:
|
||||
5. 执行 `systemctl enable --now backupx-agent`
|
||||
6. 轮询 `/api/v1/agent/self`,直到 Master 确认 `status: online`(最多 30 秒)
|
||||
|
||||
Docker 模式使用同一组环境变量约定:`BACKUPX_AGENT_MASTER`、`BACKUPX_AGENT_TOKEN` 和 `BACKUPX_AGENT_TEMP_DIR=/var/lib/backupx-agent/tmp`。容器启动后,安装脚本同样会探测 `/api/v1/agent/self`;如果节点没有上线,会输出 `docker ps` 与 `docker logs --tail=100 backupx-agent` 排查命令,并以非零状态退出。
|
||||
|
||||
如果使用 URL 备用命令时 `curl` 输出 HTML,或 shell 报 `Syntax error: newline unexpected`,说明安装 URL 被 Web 控制台接管而不是转发到后端。需要确保 `/api/install/` 或 `/install/` 至少一个路径能转发到 BackupX 后端,或改用控制台生成的嵌入式命令。
|
||||
|
||||
脚本是幂等的:升级或重装只需重新生成一条安装命令再跑一次。一次性安装链接在 TTL 到期或被首次消费后立即作废。
|
||||
@@ -83,15 +68,9 @@ Docker 模式使用同一组环境变量约定:`BACKUPX_AGENT_MASTER`、`BACKU
|
||||
- 本机 / 未指定(`nodeId=0`):Master 进程内直接执行
|
||||
- 远程节点:Master 写入命令队列 → Agent 拉取 → Agent 本地执行 → 上传 → 回报
|
||||
|
||||
节点列表会展示 Agent 健康与命令队列状态:pending/dispatched 深度、运行中的长任务、超时数、最旧活跃命令年龄和最近 Agent 错误。同样的队列深度、运行中命令数和超时快照会导出为 Prometheus 指标:
|
||||
|
||||
- `backupx_agent_command_queue_depth`
|
||||
- `backupx_agent_command_running`
|
||||
- `backupx_agent_command_timeout_total`
|
||||
|
||||
## 已知限制
|
||||
|
||||
- **加密备份仅支持 Master 本机执行**:Agent 不持有 Master 的 AES-256 密钥。创建或更新任务时,如果 `encrypt: true` 且选择了远程节点或节点池,会在入口直接拒绝
|
||||
- **Agent 不支持加密备份**:Agent 不持有 Master 的 AES-256 密钥。`encrypt: true` 的任务路由到 Agent 时会直接上报失败
|
||||
- **目录浏览超时**:远程目录浏览通过命令队列做同步 RPC,默认 15s 超时
|
||||
- **派发命令超时**:Agent 领取但未完成的命令超过 10 分钟会被置 `timeout`
|
||||
|
||||
|
||||
@@ -42,8 +42,6 @@ description: 部署 BackupX、添加存储目标、创建第一个备份任务
|
||||
2. **源配置** — 文件备份选择源路径(支持多个),数据库备份填写连接信息
|
||||
3. **存储与策略** — 选择存储目标(支持多个)、压缩策略、保留天数、是否加密
|
||||
|
||||
对于路由到 Agent 的任务,加密必须关闭,因为 Agent 不会拿到 Master 的加密密钥。BackupX 会在创建/更新阶段拒绝开启加密的远程节点或节点池任务。
|
||||
|
||||
保存后可点击 **立即执行** 测试,**备份记录** 页面实时查看执行日志。
|
||||
|
||||
:::note
|
||||
|
||||
@@ -3,7 +3,6 @@ server:
|
||||
host: "0.0.0.0"
|
||||
port: 8340
|
||||
mode: "release" # debug | release
|
||||
external_url: "" # 可选:Master 对 Agent 可达的 URL,例如 https://backup.example.com
|
||||
|
||||
database:
|
||||
path: "./data/backupx.db" # SQLite 数据库路径
|
||||
|
||||
@@ -143,24 +143,13 @@ func (c *MasterClient) GetTaskSpec(ctx context.Context, taskID uint) (*TaskSpec,
|
||||
|
||||
// RecordUpdate 与 service.AgentRecordUpdate 对齐
|
||||
type RecordUpdate struct {
|
||||
Status string `json:"status,omitempty"`
|
||||
FileName string `json:"fileName,omitempty"`
|
||||
FileSize int64 `json:"fileSize,omitempty"`
|
||||
Checksum string `json:"checksum,omitempty"`
|
||||
StoragePath string `json:"storagePath,omitempty"`
|
||||
StorageTargetID uint `json:"storageTargetId,omitempty"`
|
||||
StorageUploadResults []StorageResultItem `json:"storageUploadResults,omitempty"`
|
||||
ErrorMessage string `json:"errorMessage,omitempty"`
|
||||
LogAppend string `json:"logAppend,omitempty"`
|
||||
}
|
||||
|
||||
type StorageResultItem struct {
|
||||
StorageTargetID uint `json:"storageTargetId"`
|
||||
StorageTargetName string `json:"storageTargetName"`
|
||||
Status string `json:"status"`
|
||||
StoragePath string `json:"storagePath,omitempty"`
|
||||
FileSize int64 `json:"fileSize,omitempty"`
|
||||
Error string `json:"error,omitempty"`
|
||||
Status string `json:"status,omitempty"`
|
||||
FileName string `json:"fileName,omitempty"`
|
||||
FileSize int64 `json:"fileSize,omitempty"`
|
||||
Checksum string `json:"checksum,omitempty"`
|
||||
StoragePath string `json:"storagePath,omitempty"`
|
||||
ErrorMessage string `json:"errorMessage,omitempty"`
|
||||
LogAppend string `json:"logAppend,omitempty"`
|
||||
}
|
||||
|
||||
// UpdateRecord 上报备份记录的状态/日志
|
||||
|
||||
@@ -26,7 +26,7 @@ type Config struct {
|
||||
HeartbeatInterval string `yaml:"heartbeatInterval"`
|
||||
// PollInterval 命令轮询间隔,默认 5s
|
||||
PollInterval string `yaml:"pollInterval"`
|
||||
// TempDir 备份临时目录,默认 /var/lib/backupx-agent/tmp
|
||||
// TempDir 备份临时目录,默认 /tmp/backupx-agent
|
||||
TempDir string `yaml:"tempDir"`
|
||||
// InsecureSkipTLSVerify 测试环境允许跳过 TLS 证书校验
|
||||
InsecureSkipTLSVerify bool `yaml:"insecureSkipTlsVerify"`
|
||||
@@ -98,7 +98,7 @@ func applyConfigDefaults(cfg *Config) (*Config, error) {
|
||||
cfg.PollInterval = "5s"
|
||||
}
|
||||
if cfg.TempDir == "" {
|
||||
cfg.TempDir = "/var/lib/backupx-agent/tmp"
|
||||
cfg.TempDir = "/tmp/backupx-agent"
|
||||
}
|
||||
cfg.Master = strings.TrimRight(strings.TrimSpace(cfg.Master), "/")
|
||||
return cfg, nil
|
||||
|
||||
@@ -50,7 +50,7 @@ func TestLoadConfigDefaults(t *testing.T) {
|
||||
if cfg.HeartbeatInterval != "15s" || cfg.PollInterval != "5s" {
|
||||
t.Errorf("default intervals not applied: %+v", cfg)
|
||||
}
|
||||
if cfg.TempDir != "/var/lib/backupx-agent/tmp" {
|
||||
if cfg.TempDir != "/tmp/backupx-agent" {
|
||||
t.Errorf("default tempdir: %q", cfg.TempDir)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,7 +4,6 @@ import (
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
@@ -20,10 +19,10 @@ import (
|
||||
|
||||
// Executor 负责在 Agent 本地执行命令。
|
||||
type Executor struct {
|
||||
client *MasterClient
|
||||
tempDir string
|
||||
backupRegistry *backup.Registry
|
||||
storageRegistry *storage.Registry
|
||||
client *MasterClient
|
||||
tempDir string
|
||||
backupRegistry *backup.Registry
|
||||
storageRegistry *storage.Registry
|
||||
}
|
||||
|
||||
// NewExecutor 构造执行器。预先初始化 backup runner 与 storage registry。
|
||||
@@ -60,11 +59,6 @@ func NewExecutor(client *MasterClient, tempDir string) *Executor {
|
||||
// 注意:Agent 当前不支持 Encrypt=true(加密密钥不下发到 Agent,避免密钥扩散)。
|
||||
// 遇到启用加密的任务会向 Master 上报失败并返回错误。
|
||||
func (e *Executor) ExecuteRunTask(ctx context.Context, taskID, recordID uint) error {
|
||||
if err := e.ensureTempDir(); err != nil {
|
||||
e.reportRecordFailure(ctx, recordID, err.Error())
|
||||
return err
|
||||
}
|
||||
|
||||
// 1) 拉取任务规格
|
||||
spec, err := e.client.GetTaskSpec(ctx, taskID)
|
||||
if err != nil {
|
||||
@@ -80,6 +74,10 @@ func (e *Executor) ExecuteRunTask(ctx context.Context, taskID, recordID uint) er
|
||||
|
||||
// 2) 构造 backup.TaskSpec 并找对应 runner
|
||||
startedAt := time.Now().UTC()
|
||||
if err := os.MkdirAll(e.tempDir, 0o755); err != nil {
|
||||
e.reportRecordFailure(ctx, recordID, fmt.Sprintf("创建临时目录失败: %v", err))
|
||||
return err
|
||||
}
|
||||
backupSpec := buildBackupTaskSpec(spec, startedAt, e.tempDir)
|
||||
runner, err := e.backupRegistry.Runner(backupSpec.Type)
|
||||
if err != nil {
|
||||
@@ -126,52 +124,22 @@ func (e *Executor) ExecuteRunTask(ctx context.Context, taskID, recordID uint) er
|
||||
e.reportRecordFailure(ctx, recordID, "没有关联的存储目标")
|
||||
return fmt.Errorf("no storage targets")
|
||||
}
|
||||
uploadResults := make([]StorageResultItem, 0, len(spec.StorageTargets))
|
||||
selectedStorageTargetID := uint(0)
|
||||
var uploadErrors []string
|
||||
for _, target := range spec.StorageTargets {
|
||||
if err := e.uploadToTarget(ctx, recordID, target, finalPath, storagePath, fileSize, spec.TaskID); err != nil {
|
||||
uploadResults = append(uploadResults, StorageResultItem{
|
||||
StorageTargetID: target.ID,
|
||||
StorageTargetName: target.Name,
|
||||
Status: "failed",
|
||||
Error: err.Error(),
|
||||
})
|
||||
uploadErrors = append(uploadErrors, fmt.Sprintf("%s: %v", target.Name, err))
|
||||
e.appendLog(ctx, recordID, fmt.Sprintf("[agent] 上传到存储目标 %s 失败: %v\n", target.Name, err))
|
||||
continue
|
||||
e.reportRecordFailure(ctx, recordID, fmt.Sprintf("上传到 %s 失败: %v", target.Name, err))
|
||||
return err
|
||||
}
|
||||
if selectedStorageTargetID == 0 {
|
||||
selectedStorageTargetID = target.ID
|
||||
}
|
||||
uploadResults = append(uploadResults, StorageResultItem{
|
||||
StorageTargetID: target.ID,
|
||||
StorageTargetName: target.Name,
|
||||
Status: "success",
|
||||
StoragePath: storagePath,
|
||||
FileSize: fileSize,
|
||||
})
|
||||
e.appendLog(ctx, recordID, fmt.Sprintf("[agent] 已上传到存储目标 %s\n", target.Name))
|
||||
}
|
||||
if selectedStorageTargetID == 0 {
|
||||
msg := strings.Join(uploadErrors, "; ")
|
||||
if msg == "" {
|
||||
msg = "所有存储目标上传均失败"
|
||||
}
|
||||
e.reportRecordFailureWithUploadResults(ctx, recordID, msg, uploadResults)
|
||||
return fmt.Errorf("%s", msg)
|
||||
}
|
||||
|
||||
// 6) 上报最终成功
|
||||
return e.client.UpdateRecord(ctx, recordID, RecordUpdate{
|
||||
Status: "success",
|
||||
FileName: fileName,
|
||||
FileSize: fileSize,
|
||||
Checksum: checksum,
|
||||
StoragePath: storagePath,
|
||||
StorageTargetID: selectedStorageTargetID,
|
||||
StorageUploadResults: uploadResults,
|
||||
LogAppend: fmt.Sprintf("[agent] 任务完成,总计 %d 字节\n", fileSize),
|
||||
Status: "success",
|
||||
FileName: fileName,
|
||||
FileSize: fileSize,
|
||||
Checksum: checksum,
|
||||
StoragePath: storagePath,
|
||||
LogAppend: fmt.Sprintf("[agent] 任务完成,总计 %d 字节\n", fileSize),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -207,22 +175,31 @@ func (e *Executor) appendLog(ctx context.Context, recordID uint, line string) {
|
||||
|
||||
// reportRecordFailure 上报失败状态
|
||||
func (e *Executor) reportRecordFailure(ctx context.Context, recordID uint, msg string) {
|
||||
e.reportRecordFailureWithUploadResults(ctx, recordID, msg, nil)
|
||||
}
|
||||
|
||||
func (e *Executor) reportRecordFailureWithUploadResults(ctx context.Context, recordID uint, msg string, uploadResults []StorageResultItem) {
|
||||
_ = e.client.UpdateRecord(ctx, recordID, RecordUpdate{
|
||||
Status: "failed",
|
||||
ErrorMessage: msg,
|
||||
StorageUploadResults: uploadResults,
|
||||
LogAppend: fmt.Sprintf("[agent] 错误: %s\n", msg),
|
||||
Status: "failed",
|
||||
ErrorMessage: msg,
|
||||
LogAppend: fmt.Sprintf("[agent] 错误: %s\n", msg),
|
||||
})
|
||||
}
|
||||
|
||||
// buildBackupTaskSpec 把 AgentTaskSpec 转换为 backup.TaskSpec。
|
||||
func buildBackupTaskSpec(spec *TaskSpec, startedAt time.Time, tempDir string) backup.TaskSpec {
|
||||
sourcePaths := parseStringListField(spec.SourcePaths)
|
||||
excludes := parseStringListField(spec.ExcludePatterns)
|
||||
var sourcePaths []string
|
||||
if strings.TrimSpace(spec.SourcePaths) != "" {
|
||||
for _, p := range strings.Split(spec.SourcePaths, "\n") {
|
||||
if p = strings.TrimSpace(p); p != "" {
|
||||
sourcePaths = append(sourcePaths, p)
|
||||
}
|
||||
}
|
||||
}
|
||||
var excludes []string
|
||||
if strings.TrimSpace(spec.ExcludePatterns) != "" {
|
||||
for _, p := range strings.Split(spec.ExcludePatterns, "\n") {
|
||||
if p = strings.TrimSpace(p); p != "" {
|
||||
excludes = append(excludes, p)
|
||||
}
|
||||
}
|
||||
}
|
||||
return backup.TaskSpec{
|
||||
ID: spec.TaskID,
|
||||
Name: spec.Name,
|
||||
@@ -245,37 +222,6 @@ func buildBackupTaskSpec(spec *TaskSpec, startedAt time.Time, tempDir string) ba
|
||||
}
|
||||
}
|
||||
|
||||
func (e *Executor) ensureTempDir() error {
|
||||
if err := os.MkdirAll(e.tempDir, 0o755); err != nil {
|
||||
return fmt.Errorf("create agent temp dir: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func parseStringListField(value string) []string {
|
||||
trimmed := strings.TrimSpace(value)
|
||||
if trimmed == "" || trimmed == "[]" {
|
||||
return nil
|
||||
}
|
||||
var jsonItems []string
|
||||
if err := json.Unmarshal([]byte(trimmed), &jsonItems); err == nil {
|
||||
return compactStringList(jsonItems)
|
||||
}
|
||||
return compactStringList(strings.FieldsFunc(trimmed, func(r rune) bool {
|
||||
return r == '\n' || r == '\r'
|
||||
}))
|
||||
}
|
||||
|
||||
func compactStringList(items []string) []string {
|
||||
result := make([]string, 0, len(items))
|
||||
for _, item := range items {
|
||||
if trimmed := strings.TrimSpace(item); trimmed != "" {
|
||||
result = append(result, trimmed)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// recordLogger 把 runner 日志回传到 Master 记录。
|
||||
// 实现 backup.LogWriter,每条日志追加到 record.log_content。
|
||||
type recordLogger struct {
|
||||
@@ -294,8 +240,8 @@ func (l *recordLogger) WriteLine(message string) {
|
||||
|
||||
// restoreLogger 把 runner 日志回传到 Master 恢复记录。
|
||||
type restoreLogger struct {
|
||||
ctx context.Context
|
||||
client *MasterClient
|
||||
ctx context.Context
|
||||
client *MasterClient
|
||||
restoreID uint
|
||||
}
|
||||
|
||||
@@ -324,11 +270,6 @@ func (e *Executor) DeleteStorageObject(ctx context.Context, targetType string, t
|
||||
// - 执行:backup.Registry.Runner(spec.Type).Restore
|
||||
// - 上报:通过 UpdateRestore(status/logAppend)
|
||||
func (e *Executor) ExecuteRestore(ctx context.Context, restoreRecordID uint) error {
|
||||
if err := e.ensureTempDir(); err != nil {
|
||||
e.reportRestoreFailure(ctx, restoreRecordID, err.Error())
|
||||
return err
|
||||
}
|
||||
|
||||
spec, err := e.client.GetRestoreSpec(ctx, restoreRecordID)
|
||||
if err != nil {
|
||||
e.reportRestoreFailure(ctx, restoreRecordID, fmt.Sprintf("拉取恢复规格失败: %v", err))
|
||||
@@ -341,6 +282,10 @@ func (e *Executor) ExecuteRestore(ctx context.Context, restoreRecordID uint) err
|
||||
}
|
||||
e.appendRestoreLog(ctx, restoreRecordID, fmt.Sprintf("[agent] 开始恢复 %s (type=%s)\n", spec.TaskName, spec.Type))
|
||||
|
||||
if err := os.MkdirAll(e.tempDir, 0o755); err != nil {
|
||||
e.reportRestoreFailure(ctx, restoreRecordID, fmt.Sprintf("创建临时目录失败: %v", err))
|
||||
return err
|
||||
}
|
||||
tmpDir, err := os.MkdirTemp(e.tempDir, "restore-*")
|
||||
if err != nil {
|
||||
e.reportRestoreFailure(ctx, restoreRecordID, fmt.Sprintf("创建恢复临时目录失败: %v", err))
|
||||
|
||||
@@ -1,233 +0,0 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"backupx/server/internal/storage"
|
||||
)
|
||||
|
||||
func TestBuildBackupTaskSpecParsesJSONSourcePaths(t *testing.T) {
|
||||
spec := &TaskSpec{
|
||||
TaskID: 7,
|
||||
Name: "root-files",
|
||||
Type: "file",
|
||||
SourcePaths: `["/root","/etc"]`,
|
||||
ExcludePatterns: `["*.log","tmp"]`,
|
||||
}
|
||||
|
||||
got := buildBackupTaskSpec(spec, time.Unix(0, 0), "/var/lib/backupx-agent/tmp")
|
||||
|
||||
if !reflect.DeepEqual(got.SourcePaths, []string{"/root", "/etc"}) {
|
||||
t.Fatalf("source paths = %#v", got.SourcePaths)
|
||||
}
|
||||
if !reflect.DeepEqual(got.ExcludePatterns, []string{"*.log", "tmp"}) {
|
||||
t.Fatalf("exclude patterns = %#v", got.ExcludePatterns)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseStringListFieldKeepsLegacyLineFormat(t *testing.T) {
|
||||
got := parseStringListField("/root\n /etc \n")
|
||||
want := []string{"/root", "/etc"}
|
||||
if !reflect.DeepEqual(got, want) {
|
||||
t.Fatalf("paths = %#v, want %#v", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExecuteRunTaskRecordsPerTargetUploadResults(t *testing.T) {
|
||||
sourceDir := t.TempDir()
|
||||
if err := os.WriteFile(filepath.Join(sourceDir, "index.html"), []byte("hello"), 0o644); err != nil {
|
||||
t.Fatalf("WriteFile returned error: %v", err)
|
||||
}
|
||||
var finalUpdate RecordUpdate
|
||||
var updates []RecordUpdate
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch {
|
||||
case r.Method == http.MethodGet && r.URL.Path == "/api/agent/tasks/1":
|
||||
writeAgentEnvelope(t, w, TaskSpec{
|
||||
TaskID: 1,
|
||||
Name: "site",
|
||||
Type: "file",
|
||||
SourcePath: sourceDir,
|
||||
Compression: "gzip",
|
||||
StorageTargets: []StorageTargetConfig{
|
||||
{ID: 11, Name: "broken", Type: "agent_test_storage", Config: json.RawMessage(`{"name":"broken"}`)},
|
||||
{ID: 12, Name: "ok", Type: "agent_test_storage", Config: json.RawMessage(`{"name":"ok"}`)},
|
||||
},
|
||||
})
|
||||
case r.Method == http.MethodPost && r.URL.Path == "/api/agent/records/99":
|
||||
var update RecordUpdate
|
||||
if err := json.NewDecoder(r.Body).Decode(&update); err != nil {
|
||||
t.Fatalf("Decode update returned error: %v", err)
|
||||
}
|
||||
updates = append(updates, update)
|
||||
if update.Status != "" {
|
||||
finalUpdate = update
|
||||
}
|
||||
writeAgentEnvelope(t, w, map[string]string{"status": "ok"})
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
executor := NewExecutor(NewMasterClient(server.URL, "token", false), filepath.Join(t.TempDir(), "tmp"))
|
||||
executor.storageRegistry = storage.NewRegistry(&agentTestStorageFactory{
|
||||
providers: map[string]*agentTestStorageProvider{
|
||||
"broken": {name: "broken", failUpload: true},
|
||||
"ok": {name: "ok", objects: map[string][]byte{}},
|
||||
},
|
||||
})
|
||||
|
||||
if err := executor.ExecuteRunTask(context.Background(), 1, 99); err != nil {
|
||||
t.Fatalf("ExecuteRunTask returned error: %v", err)
|
||||
}
|
||||
if len(updates) == 0 || finalUpdate.Status != "success" {
|
||||
t.Fatalf("expected final success update, got updates=%#v final=%#v", updates, finalUpdate)
|
||||
}
|
||||
if finalUpdate.StorageTargetID != 12 {
|
||||
t.Fatalf("expected first successful target 12, got %d", finalUpdate.StorageTargetID)
|
||||
}
|
||||
if len(finalUpdate.StorageUploadResults) != 2 {
|
||||
t.Fatalf("expected two upload results, got %#v", finalUpdate.StorageUploadResults)
|
||||
}
|
||||
if finalUpdate.StorageUploadResults[0].Status != "failed" || finalUpdate.StorageUploadResults[1].Status != "success" {
|
||||
t.Fatalf("unexpected upload results: %#v", finalUpdate.StorageUploadResults)
|
||||
}
|
||||
if finalUpdate.StoragePath == "" || finalUpdate.FileSize <= 0 || finalUpdate.Checksum == "" {
|
||||
t.Fatalf("expected artifact metadata in final update, got %#v", finalUpdate)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExecuteRunTaskReportsPerTargetUploadResultsWhenAllTargetsFail(t *testing.T) {
|
||||
sourceDir := t.TempDir()
|
||||
if err := os.WriteFile(filepath.Join(sourceDir, "index.html"), []byte("hello"), 0o644); err != nil {
|
||||
t.Fatalf("WriteFile returned error: %v", err)
|
||||
}
|
||||
var finalUpdate RecordUpdate
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch {
|
||||
case r.Method == http.MethodGet && r.URL.Path == "/api/agent/tasks/1":
|
||||
writeAgentEnvelope(t, w, TaskSpec{
|
||||
TaskID: 1,
|
||||
Name: "site",
|
||||
Type: "file",
|
||||
SourcePath: sourceDir,
|
||||
Compression: "gzip",
|
||||
StorageTargets: []StorageTargetConfig{
|
||||
{ID: 11, Name: "broken-a", Type: "agent_test_storage", Config: json.RawMessage(`{"name":"broken-a"}`)},
|
||||
{ID: 12, Name: "broken-b", Type: "agent_test_storage", Config: json.RawMessage(`{"name":"broken-b"}`)},
|
||||
},
|
||||
})
|
||||
case r.Method == http.MethodPost && r.URL.Path == "/api/agent/records/99":
|
||||
var update RecordUpdate
|
||||
if err := json.NewDecoder(r.Body).Decode(&update); err != nil {
|
||||
t.Fatalf("Decode update returned error: %v", err)
|
||||
}
|
||||
if update.Status != "" {
|
||||
finalUpdate = update
|
||||
}
|
||||
writeAgentEnvelope(t, w, map[string]string{"status": "ok"})
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
executor := NewExecutor(NewMasterClient(server.URL, "token", false), filepath.Join(t.TempDir(), "tmp"))
|
||||
executor.storageRegistry = storage.NewRegistry(&agentTestStorageFactory{
|
||||
providers: map[string]*agentTestStorageProvider{
|
||||
"broken-a": {name: "broken-a", failUpload: true},
|
||||
"broken-b": {name: "broken-b", failUpload: true},
|
||||
},
|
||||
})
|
||||
|
||||
if err := executor.ExecuteRunTask(context.Background(), 1, 99); err == nil {
|
||||
t.Fatal("expected ExecuteRunTask to return upload failure")
|
||||
}
|
||||
if finalUpdate.Status != "failed" {
|
||||
t.Fatalf("expected final failed update, got %#v", finalUpdate)
|
||||
}
|
||||
if len(finalUpdate.StorageUploadResults) != 2 {
|
||||
t.Fatalf("expected failed update to keep per-target results, got %#v", finalUpdate.StorageUploadResults)
|
||||
}
|
||||
for _, item := range finalUpdate.StorageUploadResults {
|
||||
if item.Status != "failed" || item.Error == "" {
|
||||
t.Fatalf("unexpected upload result: %#v", item)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type agentTestStorageFactory struct {
|
||||
providers map[string]*agentTestStorageProvider
|
||||
}
|
||||
|
||||
func (f *agentTestStorageFactory) Type() storage.ProviderType {
|
||||
return "agent_test_storage"
|
||||
}
|
||||
|
||||
func (f *agentTestStorageFactory) New(_ context.Context, config map[string]any) (storage.StorageProvider, error) {
|
||||
name, _ := config["name"].(string)
|
||||
provider := f.providers[name]
|
||||
if provider == nil {
|
||||
return nil, fmt.Errorf("unknown provider %q", name)
|
||||
}
|
||||
return provider, nil
|
||||
}
|
||||
|
||||
type agentTestStorageProvider struct {
|
||||
name string
|
||||
failUpload bool
|
||||
objects map[string][]byte
|
||||
}
|
||||
|
||||
func (p *agentTestStorageProvider) Type() storage.ProviderType { return "agent_test_storage" }
|
||||
func (p *agentTestStorageProvider) TestConnection(context.Context) error {
|
||||
return nil
|
||||
}
|
||||
func (p *agentTestStorageProvider) Upload(_ context.Context, objectKey string, reader io.Reader, _ int64, _ map[string]string) error {
|
||||
if p.failUpload {
|
||||
return fmt.Errorf("upload failed for %s", p.name)
|
||||
}
|
||||
data, err := io.ReadAll(reader)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if p.objects == nil {
|
||||
p.objects = map[string][]byte{}
|
||||
}
|
||||
p.objects[objectKey] = data
|
||||
return nil
|
||||
}
|
||||
func (p *agentTestStorageProvider) Download(_ context.Context, objectKey string) (io.ReadCloser, error) {
|
||||
data, ok := p.objects[objectKey]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("object %s not found", objectKey)
|
||||
}
|
||||
return io.NopCloser(strings.NewReader(string(data))), nil
|
||||
}
|
||||
func (p *agentTestStorageProvider) Delete(_ context.Context, objectKey string) error {
|
||||
delete(p.objects, objectKey)
|
||||
return nil
|
||||
}
|
||||
func (p *agentTestStorageProvider) List(context.Context, string) ([]storage.ObjectInfo, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func writeAgentEnvelope(t *testing.T, w http.ResponseWriter, data any) {
|
||||
t.Helper()
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
if err := json.NewEncoder(w).Encode(map[string]any{"code": "OK", "data": data}); err != nil {
|
||||
t.Fatalf("Encode response returned error: %v", err)
|
||||
}
|
||||
}
|
||||
@@ -5,7 +5,6 @@ import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// DirEntry Agent 返回给 Master 的目录项。
|
||||
@@ -18,8 +17,8 @@ type DirEntry struct {
|
||||
|
||||
// listLocalDir 列出 Agent 所在机器的指定路径。
|
||||
func listLocalDir(path string) ([]DirEntry, error) {
|
||||
cleaned := filepath.Clean(strings.TrimSpace(path))
|
||||
if strings.TrimSpace(path) == "" || cleaned == "." {
|
||||
cleaned := filepath.Clean(path)
|
||||
if cleaned == "" {
|
||||
cleaned = "/"
|
||||
}
|
||||
entries, err := os.ReadDir(cleaned)
|
||||
|
||||
@@ -36,21 +36,6 @@ func TestListLocalDir(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestListLocalDirEmptyPathUsesRoot(t *testing.T) {
|
||||
entries, err := listLocalDir("")
|
||||
if err != nil {
|
||||
t.Fatalf("list root: %v", err)
|
||||
}
|
||||
if len(entries) == 0 {
|
||||
t.Fatalf("expected root entries")
|
||||
}
|
||||
for _, entry := range entries {
|
||||
if !filepath.IsAbs(entry.Path) {
|
||||
t.Fatalf("entry path should be absolute: %+v", entry)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestSplitCommaOrNewline(t *testing.T) {
|
||||
cases := []struct {
|
||||
in string
|
||||
|
||||
@@ -131,7 +131,6 @@ func New(ctx context.Context, cfg config.Config, version string) (*Application,
|
||||
|
||||
// Agent 协议服务:命令队列 + 任务下发 + 记录上报
|
||||
agentCmdRepo := repository.NewAgentCommandRepository(db)
|
||||
nodeService.SetAgentCommandRepository(agentCmdRepo)
|
||||
agentService := service.NewAgentService(nodeRepo, backupTaskRepo, backupRecordRepo, storageTargetRepo, agentCmdRepo, configCipher)
|
||||
agentService.SetRestoreRepository(restoreRecordRepo)
|
||||
agentService.StartCommandTimeoutMonitor(ctx, 30*time.Second, 10*time.Minute)
|
||||
@@ -241,7 +240,7 @@ func New(ctx context.Context, cfg config.Config, version string) (*Application,
|
||||
replicationService.SetMetrics(appMetrics)
|
||||
metricsCollector := metrics.NewCollector(
|
||||
appMetrics,
|
||||
metrics.NewRepoSource(storageTargetRepo, backupRecordRepo, nodeRepo, backupTaskRepo, agentCmdRepo),
|
||||
metrics.NewRepoSource(storageTargetRepo, backupRecordRepo, nodeRepo, backupTaskRepo),
|
||||
30*time.Second,
|
||||
)
|
||||
metricsCollector.Start(ctx)
|
||||
@@ -277,7 +276,7 @@ func New(ctx context.Context, cfg config.Config, version string) (*Application,
|
||||
UserRepository: userRepo,
|
||||
SystemConfigRepo: systemConfigRepo,
|
||||
InstallTokenService: installTokenService,
|
||||
MasterExternalURL: cfg.Server.ExternalURL,
|
||||
MasterExternalURL: "", // 如需覆盖 URL,可扩展 cfg.Server 增字段;目前留空依赖 X-Forwarded-* / Request.Host
|
||||
DB: db,
|
||||
Metrics: appMetrics,
|
||||
})
|
||||
|
||||
@@ -24,9 +24,6 @@ func (r *fakeRecordRepository) List(context.Context, repository.BackupRecordList
|
||||
func (r *fakeRecordRepository) FindByID(context.Context, uint) (*model.BackupRecord, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (r *fakeRecordRepository) FindRunningByTaskAndNode(context.Context, uint, uint) (*model.BackupRecord, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (r *fakeRecordRepository) Create(context.Context, *model.BackupRecord) error { return nil }
|
||||
func (r *fakeRecordRepository) Update(context.Context, *model.BackupRecord) error { return nil }
|
||||
func (r *fakeRecordRepository) Delete(_ context.Context, id uint) error {
|
||||
|
||||
@@ -17,10 +17,9 @@ type Config struct {
|
||||
}
|
||||
|
||||
type ServerConfig struct {
|
||||
Host string `mapstructure:"host"`
|
||||
Port int `mapstructure:"port"`
|
||||
Mode string `mapstructure:"mode"`
|
||||
ExternalURL string `mapstructure:"external_url"`
|
||||
Host string `mapstructure:"host"`
|
||||
Port int `mapstructure:"port"`
|
||||
Mode string `mapstructure:"mode"`
|
||||
}
|
||||
|
||||
type DatabaseConfig struct {
|
||||
@@ -137,7 +136,6 @@ func applyDefaults(v *viper.Viper) {
|
||||
v.SetDefault("server.host", "0.0.0.0")
|
||||
v.SetDefault("server.port", 8340)
|
||||
v.SetDefault("server.mode", "release")
|
||||
v.SetDefault("server.external_url", "")
|
||||
v.SetDefault("database.path", "./data/backupx.db")
|
||||
v.SetDefault("security.jwt_expire", "24h")
|
||||
v.SetDefault("backup.temp_dir", "/tmp/backupx")
|
||||
|
||||
@@ -1,10 +1,6 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
)
|
||||
import "testing"
|
||||
|
||||
func TestLoadUsesDefaultsWithoutConfigFile(t *testing.T) {
|
||||
cfg, err := Load("")
|
||||
@@ -22,33 +18,3 @@ func TestLoadUsesDefaultsWithoutConfigFile(t *testing.T) {
|
||||
t.Fatalf("expected default database path, got %s", cfg.Database.Path)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadReadsServerExternalURLFromFile(t *testing.T) {
|
||||
configPath := filepath.Join(t.TempDir(), "config.yaml")
|
||||
content := []byte("server:\n external_url: \"https://backup.example.com\"\n")
|
||||
if err := os.WriteFile(configPath, content, 0o600); err != nil {
|
||||
t.Fatalf("write config: %v", err)
|
||||
}
|
||||
|
||||
cfg, err := Load(configPath)
|
||||
if err != nil {
|
||||
t.Fatalf("Load returned error: %v", err)
|
||||
}
|
||||
|
||||
if cfg.Server.ExternalURL != "https://backup.example.com" {
|
||||
t.Fatalf("expected external URL from config, got %q", cfg.Server.ExternalURL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadReadsServerExternalURLFromEnv(t *testing.T) {
|
||||
t.Setenv("BACKUPX_SERVER_EXTERNAL_URL", "https://env-backup.example.com")
|
||||
|
||||
cfg, err := Load("")
|
||||
if err != nil {
|
||||
t.Fatalf("Load returned error: %v", err)
|
||||
}
|
||||
|
||||
if cfg.Server.ExternalURL != "https://env-backup.example.com" {
|
||||
t.Fatalf("expected external URL from env, got %q", cfg.Server.ExternalURL)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -25,14 +25,10 @@ import (
|
||||
// setupInstallFlowRouter 构造一个 Node + Agent + InstallToken 全量依赖的 router,
|
||||
// 并返回已登录管理员 JWT。
|
||||
func setupInstallFlowRouter(t *testing.T) (http.Handler, string) {
|
||||
return setupInstallFlowRouterWithExternalURL(t, "")
|
||||
}
|
||||
|
||||
func setupInstallFlowRouterWithExternalURL(t *testing.T, externalURL string) (http.Handler, string) {
|
||||
t.Helper()
|
||||
tempDir := t.TempDir()
|
||||
cfg := config.Config{
|
||||
Server: config.ServerConfig{Host: "127.0.0.1", Port: 8340, Mode: "test", ExternalURL: externalURL},
|
||||
Server: config.ServerConfig{Host: "127.0.0.1", Port: 8340, Mode: "test"},
|
||||
Database: config.DatabaseConfig{Path: filepath.Join(tempDir, "backupx.db")},
|
||||
Security: config.SecurityConfig{JWTExpire: "24h"},
|
||||
Log: config.LogConfig{Level: "error"},
|
||||
@@ -72,6 +68,9 @@ func setupInstallFlowRouterWithExternalURL(t *testing.T, externalURL string) (ht
|
||||
installTokenRepo := repository.NewAgentInstallTokenRepository(db)
|
||||
installTokenSvc := service.NewInstallTokenService(installTokenRepo, nodeRepo)
|
||||
|
||||
auditLogRepo := repository.NewAuditLogRepository(db)
|
||||
auditSvc := service.NewAuditService(auditLogRepo)
|
||||
|
||||
// 用 cancelable ctx,测试结束时停掉 handler 启动的后台 GC 协程,
|
||||
// 避免 goroutine 持有 map 导致 tempdir 清理失败。
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
@@ -86,7 +85,7 @@ func setupInstallFlowRouterWithExternalURL(t *testing.T, externalURL string) (ht
|
||||
SystemService: systemSvc,
|
||||
NodeService: nodeSvc,
|
||||
InstallTokenService: installTokenSvc,
|
||||
MasterExternalURL: cfg.Server.ExternalURL,
|
||||
AuditService: auditSvc,
|
||||
JWTManager: jwtMgr,
|
||||
UserRepository: userRepo,
|
||||
SystemConfigRepo: systemConfigRepo,
|
||||
@@ -115,73 +114,6 @@ func setupInstallFlowRouterWithExternalURL(t *testing.T, externalURL string) (ht
|
||||
return router, setupResp.Data.Token
|
||||
}
|
||||
|
||||
func TestInstallTokenUsesConfiguredExternalURL(t *testing.T) {
|
||||
const externalURL = "https://public.example.com/base"
|
||||
router, jwt := setupInstallFlowRouterWithExternalURL(t, externalURL)
|
||||
|
||||
batchBody, _ := json.Marshal(map[string][]string{"names": {"external-url-node"}})
|
||||
batchReq := httptest.NewRequest(http.MethodPost, "/api/nodes/batch", bytes.NewBuffer(batchBody))
|
||||
batchReq.Header.Set("Content-Type", "application/json")
|
||||
batchReq.Header.Set("Authorization", "Bearer "+jwt)
|
||||
batchRec := httptest.NewRecorder()
|
||||
router.ServeHTTP(batchRec, batchReq)
|
||||
if batchRec.Code != 200 {
|
||||
t.Fatalf("batch create failed: %d %s", batchRec.Code, batchRec.Body.String())
|
||||
}
|
||||
var batchResp struct {
|
||||
Data []struct {
|
||||
ID uint `json:"id"`
|
||||
} `json:"data"`
|
||||
}
|
||||
if err := json.Unmarshal(batchRec.Body.Bytes(), &batchResp); err != nil {
|
||||
t.Fatalf("unmarshal batch: %v", err)
|
||||
}
|
||||
if len(batchResp.Data) != 1 {
|
||||
t.Fatalf("expected 1 node, got %d", len(batchResp.Data))
|
||||
}
|
||||
|
||||
genBody, _ := json.Marshal(map[string]any{
|
||||
"mode": "systemd",
|
||||
"arch": "auto",
|
||||
"agentVersion": "v1.7.0",
|
||||
"downloadSrc": "github",
|
||||
"ttlSeconds": 900,
|
||||
})
|
||||
genReq := httptest.NewRequest(http.MethodPost,
|
||||
"/api/nodes/"+formatUint(batchResp.Data[0].ID)+"/install-tokens", bytes.NewBuffer(genBody))
|
||||
genReq.Header.Set("Content-Type", "application/json")
|
||||
genReq.Header.Set("Authorization", "Bearer "+jwt)
|
||||
genRec := httptest.NewRecorder()
|
||||
router.ServeHTTP(genRec, genReq)
|
||||
if genRec.Code != 200 {
|
||||
t.Fatalf("install-tokens failed: %d %s", genRec.Code, genRec.Body.String())
|
||||
}
|
||||
var genResp struct {
|
||||
Data struct {
|
||||
InstallToken string `json:"installToken"`
|
||||
URL string `json:"url"`
|
||||
FallbackURL string `json:"fallbackUrl"`
|
||||
ScriptBase64 string `json:"scriptBase64"`
|
||||
} `json:"data"`
|
||||
}
|
||||
if err := json.Unmarshal(genRec.Body.Bytes(), &genResp); err != nil {
|
||||
t.Fatalf("unmarshal gen: %v", err)
|
||||
}
|
||||
if genResp.Data.URL != externalURL+"/api/install/"+genResp.Data.InstallToken {
|
||||
t.Fatalf("url should use external URL, got %q", genResp.Data.URL)
|
||||
}
|
||||
if genResp.Data.FallbackURL != externalURL+"/install/"+genResp.Data.InstallToken {
|
||||
t.Fatalf("fallbackUrl should use external URL, got %q", genResp.Data.FallbackURL)
|
||||
}
|
||||
decodedScript, err := base64.StdEncoding.DecodeString(genResp.Data.ScriptBase64)
|
||||
if err != nil {
|
||||
t.Fatalf("scriptBase64 should be valid base64: %v", err)
|
||||
}
|
||||
if !strings.Contains(string(decodedScript), `MASTER_URL="`+externalURL+`"`) {
|
||||
t.Fatalf("script should use external MASTER_URL:\n%s", string(decodedScript))
|
||||
}
|
||||
}
|
||||
|
||||
func TestOneClickInstallFlow(t *testing.T) {
|
||||
router, jwt := setupInstallFlowRouter(t)
|
||||
|
||||
@@ -496,76 +428,6 @@ func TestInstallFlowComposeModeMismatch(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestInstallFlowComposeSuccessConsumesToken(t *testing.T) {
|
||||
router, jwt := setupInstallFlowRouter(t)
|
||||
|
||||
batchBody, _ := json.Marshal(map[string][]string{"names": {"compose-ok"}})
|
||||
batchReq := httptest.NewRequest(http.MethodPost, "/api/nodes/batch", bytes.NewBuffer(batchBody))
|
||||
batchReq.Header.Set("Content-Type", "application/json")
|
||||
batchReq.Header.Set("Authorization", "Bearer "+jwt)
|
||||
batchRec := httptest.NewRecorder()
|
||||
router.ServeHTTP(batchRec, batchReq)
|
||||
if batchRec.Code != 200 {
|
||||
t.Fatalf("batch create failed: %d %s", batchRec.Code, batchRec.Body.String())
|
||||
}
|
||||
var batchResp struct {
|
||||
Data []struct {
|
||||
ID uint `json:"id"`
|
||||
} `json:"data"`
|
||||
}
|
||||
if err := json.Unmarshal(batchRec.Body.Bytes(), &batchResp); err != nil {
|
||||
t.Fatalf("unmarshal batch: %v", err)
|
||||
}
|
||||
if len(batchResp.Data) != 1 {
|
||||
t.Fatalf("expected 1 node, got %d", len(batchResp.Data))
|
||||
}
|
||||
|
||||
genBody, _ := json.Marshal(map[string]any{
|
||||
"mode": "docker",
|
||||
"arch": "auto",
|
||||
"agentVersion": "v1.7.0",
|
||||
"downloadSrc": "github",
|
||||
"ttlSeconds": 900,
|
||||
})
|
||||
genReq := httptest.NewRequest(http.MethodPost,
|
||||
"/api/nodes/"+formatUint(batchResp.Data[0].ID)+"/install-tokens", bytes.NewBuffer(genBody))
|
||||
genReq.Header.Set("Content-Type", "application/json")
|
||||
genReq.Header.Set("Authorization", "Bearer "+jwt)
|
||||
genRec := httptest.NewRecorder()
|
||||
router.ServeHTTP(genRec, genReq)
|
||||
if genRec.Code != 200 {
|
||||
t.Fatalf("install-tokens failed: %d %s", genRec.Code, genRec.Body.String())
|
||||
}
|
||||
var genResp struct {
|
||||
Data struct {
|
||||
InstallToken string `json:"installToken"`
|
||||
} `json:"data"`
|
||||
}
|
||||
if err := json.Unmarshal(genRec.Body.Bytes(), &genResp); err != nil {
|
||||
t.Fatalf("unmarshal gen: %v", err)
|
||||
}
|
||||
if genResp.Data.InstallToken == "" {
|
||||
t.Fatalf("missing installToken")
|
||||
}
|
||||
|
||||
composeReq := httptest.NewRequest(http.MethodGet, "/api/install/"+genResp.Data.InstallToken+"/compose.yml", nil)
|
||||
composeRec := httptest.NewRecorder()
|
||||
router.ServeHTTP(composeRec, composeReq)
|
||||
if composeRec.Code != 200 {
|
||||
t.Fatalf("compose fetch failed: %d %s", composeRec.Code, composeRec.Body.String())
|
||||
}
|
||||
if !strings.Contains(composeRec.Body.String(), "BACKUPX_AGENT_TOKEN") {
|
||||
t.Fatalf("compose missing token env:\n%s", composeRec.Body.String())
|
||||
}
|
||||
|
||||
scriptReq := httptest.NewRequest(http.MethodGet, "/api/install/"+genResp.Data.InstallToken, nil)
|
||||
scriptRec := httptest.NewRecorder()
|
||||
router.ServeHTTP(scriptRec, scriptReq)
|
||||
if scriptRec.Code != http.StatusGone {
|
||||
t.Fatalf("script after compose should be 410, got %d: %s", scriptRec.Code, scriptRec.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
// formatUint 小工具:uint → 十进制字符串(无需引入 strconv)。
|
||||
func formatUint(u uint) string {
|
||||
if u == 0 {
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package http
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"fmt"
|
||||
stdhttp "net/http"
|
||||
"strconv"
|
||||
@@ -244,17 +245,14 @@ func (h *NodeHandler) CreateInstallToken(c *gin.Context) {
|
||||
input.TTLSeconds = 900
|
||||
}
|
||||
|
||||
out, err := h.installTokenSvc.CreateCommand(c.Request.Context(), service.InstallCommandInput{
|
||||
InstallTokenInput: service.InstallTokenInput{
|
||||
NodeID: uint(id),
|
||||
Mode: input.Mode,
|
||||
Arch: input.Arch,
|
||||
AgentVersion: input.AgentVersion,
|
||||
DownloadSrc: input.DownloadSrc,
|
||||
TTLSeconds: input.TTLSeconds,
|
||||
CreatedByID: h.resolveCurrentUserID(c),
|
||||
},
|
||||
MasterURL: resolveMasterURL(c, h.externalURL),
|
||||
out, err := h.installTokenSvc.Create(c.Request.Context(), service.InstallTokenInput{
|
||||
NodeID: uint(id),
|
||||
Mode: input.Mode,
|
||||
Arch: input.Arch,
|
||||
AgentVersion: input.AgentVersion,
|
||||
DownloadSrc: input.DownloadSrc,
|
||||
TTLSeconds: input.TTLSeconds,
|
||||
CreatedByID: h.resolveCurrentUserID(c),
|
||||
})
|
||||
if err != nil {
|
||||
response.Error(c, err)
|
||||
@@ -264,6 +262,12 @@ func (h *NodeHandler) CreateInstallToken(c *gin.Context) {
|
||||
fmt.Sprintf("%d", id), out.Node.Name,
|
||||
fmt.Sprintf("生成 %s/%s install token TTL=%ds", input.Mode, input.Arch, input.TTLSeconds))
|
||||
|
||||
masterURL := resolveMasterURL(c, h.externalURL)
|
||||
script, err := renderInstallScript(masterURL, out.Node, out.Record)
|
||||
if err != nil {
|
||||
response.Error(c, err)
|
||||
return
|
||||
}
|
||||
// 使用 /api/install/... 而非 /install/... —— 让反向代理的 /api/ 转发规则
|
||||
// 自动接管,避免 SPA fallback 把请求当成前端路由返回 index.html(issue #46)。
|
||||
// 同时返回 /install/... 备用地址,兼容会剥离 /api 前缀的外层反向代理。
|
||||
@@ -272,11 +276,15 @@ func (h *NodeHandler) CreateInstallToken(c *gin.Context) {
|
||||
body := gin.H{
|
||||
"installToken": out.Token,
|
||||
"expiresAt": out.ExpiresAt,
|
||||
"url": out.URL,
|
||||
"fallbackUrl": out.FallbackURL,
|
||||
"scriptBase64": out.ScriptBase64,
|
||||
"composeUrl": out.ComposeURL,
|
||||
"fallbackComposeUrl": out.FallbackComposeURL,
|
||||
"url": masterURL + "/api/install/" + out.Token,
|
||||
"fallbackUrl": masterURL + "/install/" + out.Token,
|
||||
"scriptBase64": base64.StdEncoding.EncodeToString([]byte(script)),
|
||||
"composeUrl": "",
|
||||
"fallbackComposeUrl": "",
|
||||
}
|
||||
if input.Mode == "docker" {
|
||||
body["composeUrl"] = masterURL + "/api/install/" + out.Token + "/compose.yml"
|
||||
body["fallbackComposeUrl"] = masterURL + "/install/" + out.Token + "/compose.yml"
|
||||
}
|
||||
response.Success(c, body)
|
||||
}
|
||||
|
||||
@@ -1,41 +0,0 @@
|
||||
package installscript
|
||||
|
||||
import (
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestDeployInstallScriptSyntax(t *testing.T) {
|
||||
scriptPath := filepath.Join("..", "..", "..", "deploy", "install.sh")
|
||||
cmd := exec.Command("sh", "-n", scriptPath)
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
t.Fatalf("install.sh syntax invalid: %v\n%s", err, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeployInstallScriptSupportsReleasePackageLayout(t *testing.T) {
|
||||
scriptPath := filepath.Join("..", "..", "..", "deploy", "install.sh")
|
||||
data, err := os.ReadFile(scriptPath)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
script := string(data)
|
||||
for _, want := range []string{
|
||||
`SCRIPT_DIR=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)`,
|
||||
`if [ -f "$SCRIPT_DIR/backupx" ] && [ -d "$SCRIPT_DIR/web" ]; then`,
|
||||
`BIN_SOURCE="${BIN_SOURCE:-$SCRIPT_DIR/backupx}"`,
|
||||
`WEB_SOURCE="${WEB_SOURCE:-$SCRIPT_DIR/web}"`,
|
||||
`CONFIG_TEMPLATE="${CONFIG_TEMPLATE:-$SCRIPT_DIR/config.example.yaml}"`,
|
||||
`发布包安装请确认当前目录包含 ./backupx、./web 和 ./install.sh。`,
|
||||
`cat > "/etc/systemd/system/$SERVICE_NAME.service" <<UNIT`,
|
||||
`if [ -d "/etc/nginx/conf.d" ] && [ -f "$NGINX_SOURCE" ]; then`,
|
||||
} {
|
||||
if !strings.Contains(script, want) {
|
||||
t.Fatalf("install.sh missing %q", want)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -37,22 +37,19 @@ func TestRenderScriptBashBootstrap(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestRenderScriptUsesRootForBareMetalBackups(t *testing.T) {
|
||||
func TestRenderScriptCreatesBackupXUserAndGroup(t *testing.T) {
|
||||
got, err := RenderScript(testCtx)
|
||||
if err != nil {
|
||||
t.Fatalf("render err: %v", err)
|
||||
}
|
||||
for _, want := range []string{
|
||||
"/var/lib/backupx-agent/tmp",
|
||||
"install -d -m 0700 /var/lib/backupx-agent /var/lib/backupx-agent/tmp",
|
||||
"getent group backupx",
|
||||
"groupadd --system backupx",
|
||||
"useradd --system --gid backupx",
|
||||
"Group=backupx",
|
||||
} {
|
||||
if !strings.Contains(got, want) {
|
||||
t.Errorf("script missing %q:\n%s", want, got)
|
||||
}
|
||||
}
|
||||
for _, forbidden := range []string{"User=backupx", "Group=backupx", "NoNewPrivileges=true"} {
|
||||
if strings.Contains(got, forbidden) {
|
||||
t.Errorf("script should not contain %q for bare-metal backups:\n%s", forbidden, got)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
package installscript
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
@@ -29,10 +27,8 @@ func TestRenderScriptSystemd(t *testing.T) {
|
||||
mustContain := []string{
|
||||
"BACKUPX_AGENT_MASTER=${MASTER_URL}",
|
||||
`Environment="BACKUPX_AGENT_TOKEN=${AGENT_TOKEN}"`,
|
||||
"/var/lib/backupx-agent/tmp",
|
||||
"systemctl daemon-reload",
|
||||
"systemctl enable --now backupx-agent",
|
||||
"systemctl status backupx-agent",
|
||||
"X-Agent-Token: ${AGENT_TOKEN}",
|
||||
"MASTER_URL=\"https://master.example.com\"",
|
||||
"AGENT_TOKEN=\"deadbeefcafebabe0123456789abcdef0123456789abcdef0123456789abcdef\"",
|
||||
@@ -60,9 +56,6 @@ func TestRenderScriptForeground(t *testing.T) {
|
||||
if !strings.Contains(got, `exec "${INSTALL_PREFIX}/backupx" agent`) {
|
||||
t.Errorf("foreground script missing exec line:\n%s", got)
|
||||
}
|
||||
if !strings.Contains(got, "/var/lib/backupx-agent/tmp") {
|
||||
t.Errorf("foreground script missing dedicated temp dir:\n%s", got)
|
||||
}
|
||||
if strings.Contains(got, "systemctl daemon-reload") {
|
||||
t.Errorf("foreground script should not reference systemctl:\n%s", got)
|
||||
}
|
||||
@@ -81,44 +74,14 @@ func TestRenderScriptDocker(t *testing.T) {
|
||||
if !strings.Contains(got, "docker run") {
|
||||
t.Errorf("docker script missing `docker run`:\n%s", got)
|
||||
}
|
||||
if !strings.Contains(got, "/var/lib/backupx-agent:/var/lib/backupx-agent") {
|
||||
t.Errorf("docker script missing agent data volume:\n%s", got)
|
||||
}
|
||||
if !strings.Contains(got, "awuqing/backupx:${AGENT_VERSION}") {
|
||||
t.Errorf("docker script missing image tag reference:\n%s", got)
|
||||
}
|
||||
if !strings.Contains(got, `"awuqing/backupx:${AGENT_VERSION}" agent`) {
|
||||
t.Errorf("docker script must start image in agent mode:\n%s", got)
|
||||
}
|
||||
if !strings.Contains(got, `-e "BACKUPX_AGENT_TEMP_DIR=/var/lib/backupx-agent/tmp"`) {
|
||||
t.Errorf("docker script missing temp dir env:\n%s", got)
|
||||
}
|
||||
if !strings.Contains(got, `docker logs --tail=100 backupx-agent`) {
|
||||
t.Errorf("docker script missing diagnostic log command:\n%s", got)
|
||||
}
|
||||
if !strings.Contains(got, `grep -q '"status":"online"'`) {
|
||||
t.Errorf("docker script missing online probe:\n%s", got)
|
||||
}
|
||||
if strings.Contains(got, "systemctl daemon-reload") {
|
||||
t.Errorf("docker script should not reference systemctl:\n%s", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDockerEntrypointForwardsAgentSubcommand(t *testing.T) {
|
||||
entrypointPath := filepath.Join("..", "..", "..", "deploy", "docker", "entrypoint.sh")
|
||||
got, err := os.ReadFile(entrypointPath)
|
||||
if err != nil {
|
||||
t.Fatalf("read docker entrypoint: %v", err)
|
||||
}
|
||||
script := string(got)
|
||||
if !strings.Contains(script, `"${1:-}" = "agent"`) {
|
||||
t.Fatalf("entrypoint must detect the agent subcommand before starting server:\n%s", script)
|
||||
}
|
||||
if !strings.Contains(script, `exec /app/bin/backupx "$@"`) {
|
||||
t.Fatalf("entrypoint must exec backupx with forwarded args:\n%s", script)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRenderComposeYaml(t *testing.T) {
|
||||
ctx := testCtx
|
||||
ctx.Mode = model.InstallModeDocker
|
||||
@@ -129,26 +92,17 @@ func TestRenderComposeYaml(t *testing.T) {
|
||||
if !strings.Contains(got, "image: awuqing/backupx:v1.7.0") {
|
||||
t.Errorf("compose missing image:\n%s", got)
|
||||
}
|
||||
if !strings.Contains(got, `command: ["agent"]`) {
|
||||
t.Errorf("compose must start image in agent mode:\n%s", got)
|
||||
}
|
||||
if !strings.Contains(got, `BACKUPX_AGENT_TOKEN: "deadbeefcafebabe0123456789abcdef0123456789abcdef0123456789abcdef"`) {
|
||||
t.Errorf("compose missing token env:\n%s", got)
|
||||
}
|
||||
if !strings.Contains(got, `BACKUPX_AGENT_TEMP_DIR: "/var/lib/backupx-agent/tmp"`) {
|
||||
t.Errorf("compose missing temp dir env:\n%s", got)
|
||||
}
|
||||
if !strings.Contains(got, "/var/lib/backupx-agent:/var/lib/backupx-agent") {
|
||||
t.Errorf("compose missing agent data volume:\n%s", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRenderScriptRejectsInjectedMasterURL(t *testing.T) {
|
||||
bad := []string{
|
||||
"https://example.com\" other: inject", // 含引号和空格
|
||||
"javascript:alert(1)", // scheme 非法
|
||||
"https://example.com\n- privileged", // 含换行,YAML 注入经典 payload
|
||||
"", // 空
|
||||
"javascript:alert(1)", // scheme 非法
|
||||
"https://example.com\n- privileged", // 含换行,YAML 注入经典 payload
|
||||
"", // 空
|
||||
}
|
||||
for _, u := range bad {
|
||||
ctx := testCtx
|
||||
@@ -207,8 +161,8 @@ func TestDownloadBaseMapping(t *testing.T) {
|
||||
|
||||
func TestRenderScriptDefaultsApplied(t *testing.T) {
|
||||
ctx := testCtx
|
||||
ctx.InstallPrefix = "" // 应被默认为 /opt/backupx-agent
|
||||
ctx.DownloadBase = "" // 应被默认为 github
|
||||
ctx.InstallPrefix = "" // 应被默认为 /opt/backupx-agent
|
||||
ctx.DownloadBase = "" // 应被默认为 github
|
||||
got, err := RenderScript(ctx)
|
||||
if err != nil {
|
||||
t.Fatalf("render err: %v", err)
|
||||
|
||||
@@ -9,6 +9,5 @@ services:
|
||||
environment:
|
||||
BACKUPX_AGENT_MASTER: "{{.MasterURL}}"
|
||||
BACKUPX_AGENT_TOKEN: "{{.AgentToken}}"
|
||||
BACKUPX_AGENT_TEMP_DIR: "/var/lib/backupx-agent/tmp"
|
||||
volumes:
|
||||
- /var/lib/backupx-agent:/var/lib/backupx-agent
|
||||
- /var/lib/backupx-agent:/tmp/backupx-agent
|
||||
|
||||
@@ -47,10 +47,30 @@ else
|
||||
fi
|
||||
tar xzf "$TMPDIR/pkg.tar.gz" -C "$TMPDIR"
|
||||
|
||||
# 4. 安装二进制 + 数据目录
|
||||
# 4. 安装二进制 + 用户
|
||||
echo "[2/4] 安装到 ${INSTALL_PREFIX}"
|
||||
install -d -m 0755 "$INSTALL_PREFIX"
|
||||
install -d -m 0700 /var/lib/backupx-agent /var/lib/backupx-agent/tmp
|
||||
if ! getent group backupx >/dev/null 2>&1; then
|
||||
if command -v groupadd >/dev/null 2>&1; then
|
||||
groupadd --system backupx
|
||||
elif command -v addgroup >/dev/null 2>&1; then
|
||||
addgroup --system backupx
|
||||
else
|
||||
echo "需要 groupadd 或 addgroup 来创建 backupx 组" >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
if ! id backupx >/dev/null 2>&1; then
|
||||
if command -v useradd >/dev/null 2>&1; then
|
||||
useradd --system --gid backupx --home-dir "$INSTALL_PREFIX" --shell /usr/sbin/nologin backupx
|
||||
elif command -v adduser >/dev/null 2>&1; then
|
||||
adduser --system --ingroup backupx --home "$INSTALL_PREFIX" --shell /usr/sbin/nologin backupx
|
||||
else
|
||||
echo "需要 useradd 或 adduser 来创建 backupx 用户" >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
id backupx >/dev/null 2>&1 || { echo "backupx 用户创建失败" >&2; exit 1; }
|
||||
install -d -o backupx -g backupx "$INSTALL_PREFIX" /var/lib/backupx-agent
|
||||
install -m 0755 "$TMPDIR/backupx-${AGENT_VERSION}-linux-${ARCH}/backupx" "$INSTALL_PREFIX/backupx"
|
||||
{{end}}
|
||||
|
||||
@@ -65,11 +85,14 @@ Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=backupx
|
||||
Group=backupx
|
||||
Environment="BACKUPX_AGENT_MASTER=${MASTER_URL}"
|
||||
Environment="BACKUPX_AGENT_TOKEN=${AGENT_TOKEN}"
|
||||
ExecStart=${INSTALL_PREFIX}/backupx agent --temp-dir /var/lib/backupx-agent/tmp
|
||||
ExecStart=${INSTALL_PREFIX}/backupx agent --temp-dir /var/lib/backupx-agent
|
||||
Restart=on-failure
|
||||
RestartSec=10s
|
||||
NoNewPrivileges=true
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
@@ -88,7 +111,6 @@ for i in $(seq 1 15); do
|
||||
fi
|
||||
done
|
||||
echo "⚠ 30s 内未收到上线心跳,请检查防火墙或 journalctl -u backupx-agent"
|
||||
echo "提示:systemd 服务名是 backupx-agent,可执行 systemctl status backupx-agent 查看状态。"
|
||||
exit 2
|
||||
{{end}}
|
||||
|
||||
@@ -97,7 +119,7 @@ exit 2
|
||||
echo "[3/3] 前台启动 agent(Ctrl+C 退出)"
|
||||
export BACKUPX_AGENT_MASTER="${MASTER_URL}"
|
||||
export BACKUPX_AGENT_TOKEN="${AGENT_TOKEN}"
|
||||
exec "${INSTALL_PREFIX}/backupx" agent --temp-dir /var/lib/backupx-agent/tmp
|
||||
exec "${INSTALL_PREFIX}/backupx" agent --temp-dir /var/lib/backupx-agent
|
||||
{{end}}
|
||||
|
||||
{{if eq .Mode "docker"}}
|
||||
@@ -109,20 +131,7 @@ docker rm -f backupx-agent >/dev/null 2>&1 || true
|
||||
docker run -d --name backupx-agent --restart=unless-stopped \
|
||||
-e "BACKUPX_AGENT_MASTER=${MASTER_URL}" \
|
||||
-e "BACKUPX_AGENT_TOKEN=${AGENT_TOKEN}" \
|
||||
-e "BACKUPX_AGENT_TEMP_DIR=/var/lib/backupx-agent/tmp" \
|
||||
-v /var/lib/backupx-agent:/var/lib/backupx-agent \
|
||||
-v /var/lib/backupx-agent:/tmp/backupx-agent \
|
||||
"awuqing/backupx:${AGENT_VERSION}" agent
|
||||
echo "✓ 容器已启动,等待节点上线"
|
||||
for i in $(seq 1 15); do
|
||||
sleep 2
|
||||
if curl -fsSL -H "X-Agent-Token: ${AGENT_TOKEN}" "${MASTER_URL}/api/v1/agent/self" 2>/dev/null \
|
||||
| grep -q '"status":"online"'; then
|
||||
echo "✓ 节点已上线"
|
||||
exit 0
|
||||
fi
|
||||
done
|
||||
echo "⚠ 30s 内未收到上线心跳,请检查容器状态、网络与 Master URL。"
|
||||
echo "排查命令:docker ps -a --filter name=backupx-agent"
|
||||
echo "排查命令:docker logs --tail=100 backupx-agent"
|
||||
exit 2
|
||||
echo "✓ 容器已启动"
|
||||
{{end}}
|
||||
|
||||
@@ -13,18 +13,16 @@ type SampleSource interface {
|
||||
ListStorageTargets(ctx context.Context) ([]model.StorageTarget, error)
|
||||
StorageUsage(ctx context.Context) ([]repository.BackupStorageUsageItem, error)
|
||||
ListNodes(ctx context.Context) ([]model.Node, error)
|
||||
AgentQueueSummaries(ctx context.Context) (map[uint]repository.AgentCommandQueueSummary, error)
|
||||
CountSLABreach(ctx context.Context) (int, error)
|
||||
}
|
||||
|
||||
// repoSource 把 repository 适配到 SampleSource。
|
||||
type repoSource struct {
|
||||
targets repository.StorageTargetRepository
|
||||
records repository.BackupRecordRepository
|
||||
nodes repository.NodeRepository
|
||||
tasks repository.BackupTaskRepository
|
||||
commands repository.AgentCommandRepository
|
||||
now func() time.Time
|
||||
targets repository.StorageTargetRepository
|
||||
records repository.BackupRecordRepository
|
||||
nodes repository.NodeRepository
|
||||
tasks repository.BackupTaskRepository
|
||||
now func() time.Time
|
||||
}
|
||||
|
||||
// NewRepoSource 用仓储实例构造 SampleSource。
|
||||
@@ -33,15 +31,13 @@ func NewRepoSource(
|
||||
records repository.BackupRecordRepository,
|
||||
nodes repository.NodeRepository,
|
||||
tasks repository.BackupTaskRepository,
|
||||
commands repository.AgentCommandRepository,
|
||||
) SampleSource {
|
||||
return &repoSource{
|
||||
targets: targets,
|
||||
records: records,
|
||||
nodes: nodes,
|
||||
tasks: tasks,
|
||||
commands: commands,
|
||||
now: func() time.Time { return time.Now().UTC() },
|
||||
targets: targets,
|
||||
records: records,
|
||||
nodes: nodes,
|
||||
tasks: tasks,
|
||||
now: func() time.Time { return time.Now().UTC() },
|
||||
}
|
||||
}
|
||||
|
||||
@@ -57,13 +53,6 @@ func (s *repoSource) ListNodes(ctx context.Context) ([]model.Node, error) {
|
||||
return s.nodes.List(ctx)
|
||||
}
|
||||
|
||||
func (s *repoSource) AgentQueueSummaries(ctx context.Context) (map[uint]repository.AgentCommandQueueSummary, error) {
|
||||
if s.commands == nil {
|
||||
return nil, nil
|
||||
}
|
||||
return s.commands.NodeQueueSummaries(ctx)
|
||||
}
|
||||
|
||||
// CountSLABreach 统计当前违反 RPO 的任务:
|
||||
// - 任务启用且配置了 SLAHoursRPO > 0
|
||||
// - 最近一次成功备份距今超出 SLA 时间窗,或从未成功过
|
||||
@@ -147,9 +136,7 @@ func (c *Collector) collect(ctx context.Context) {
|
||||
}
|
||||
// 节点在线状态:role 约定为 master / agent
|
||||
if nodes, err := c.source.ListNodes(ctx); err == nil {
|
||||
queueByNode, _ := c.source.AgentQueueSummaries(ctx)
|
||||
c.metrics.ResetNodeOnline()
|
||||
c.metrics.ResetAgentQueue()
|
||||
for i := range nodes {
|
||||
n := &nodes[i]
|
||||
role := "agent"
|
||||
@@ -157,8 +144,6 @@ func (c *Collector) collect(ctx context.Context) {
|
||||
role = "master"
|
||||
}
|
||||
c.metrics.SetNodeOnline(n.Name, role, n.Status == model.NodeStatusOnline)
|
||||
queue := queueByNode[n.ID]
|
||||
c.metrics.SetAgentQueue(n.Name, role, queue.Depth, queue.Running, queue.Timeouts)
|
||||
}
|
||||
}
|
||||
if breach, err := c.source.CountSLABreach(ctx); err == nil {
|
||||
|
||||
@@ -31,12 +31,6 @@ type Metrics struct {
|
||||
StorageUsedBytes *prometheus.GaugeVec
|
||||
// 节点在线状态(labels: node_name, role;value: 0/1)
|
||||
NodeOnline *prometheus.GaugeVec
|
||||
// Agent 命令队列深度(labels: node_name, role)
|
||||
AgentCommandQueueDepth *prometheus.GaugeVec
|
||||
// Agent 正在执行的长命令数(labels: node_name, role)
|
||||
AgentCommandRunning *prometheus.GaugeVec
|
||||
// Agent 命令超时累计数快照(labels: node_name, role)
|
||||
AgentCommandTimeoutTotal *prometheus.GaugeVec
|
||||
// 验证演练结果(labels: status)
|
||||
VerifyRunTotal *prometheus.CounterVec
|
||||
// 恢复操作结果(labels: status)
|
||||
@@ -84,18 +78,6 @@ func New(version string) *Metrics {
|
||||
Name: "backupx_node_online",
|
||||
Help: "集群节点在线状态(1 在线 / 0 离线)",
|
||||
}, []string{"node_name", "role"}),
|
||||
AgentCommandQueueDepth: prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Name: "backupx_agent_command_queue_depth",
|
||||
Help: "Agent 当前 pending/dispatched 命令总数",
|
||||
}, []string{"node_name", "role"}),
|
||||
AgentCommandRunning: prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Name: "backupx_agent_command_running",
|
||||
Help: "Agent 当前正在执行的长命令数",
|
||||
}, []string{"node_name", "role"}),
|
||||
AgentCommandTimeoutTotal: prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Name: "backupx_agent_command_timeout_total",
|
||||
Help: "Agent 已超时命令数快照",
|
||||
}, []string{"node_name", "role"}),
|
||||
VerifyRunTotal: prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Name: "backupx_verify_run_total",
|
||||
Help: "备份验证演练执行总数",
|
||||
@@ -124,9 +106,6 @@ func New(version string) *Metrics {
|
||||
m.TaskRunningGauge,
|
||||
m.StorageUsedBytes,
|
||||
m.NodeOnline,
|
||||
m.AgentCommandQueueDepth,
|
||||
m.AgentCommandRunning,
|
||||
m.AgentCommandTimeoutTotal,
|
||||
m.VerifyRunTotal,
|
||||
m.RestoreRunTotal,
|
||||
m.ReplicationRunTotal,
|
||||
@@ -229,24 +208,6 @@ func (m *Metrics) ResetNodeOnline() {
|
||||
m.NodeOnline.Reset()
|
||||
}
|
||||
|
||||
func (m *Metrics) SetAgentQueue(name, role string, depth, running, timeoutCount int) {
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
m.AgentCommandQueueDepth.WithLabelValues(name, role).Set(float64(depth))
|
||||
m.AgentCommandRunning.WithLabelValues(name, role).Set(float64(running))
|
||||
m.AgentCommandTimeoutTotal.WithLabelValues(name, role).Set(float64(timeoutCount))
|
||||
}
|
||||
|
||||
func (m *Metrics) ResetAgentQueue() {
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
m.AgentCommandQueueDepth.Reset()
|
||||
m.AgentCommandRunning.Reset()
|
||||
m.AgentCommandTimeoutTotal.Reset()
|
||||
}
|
||||
|
||||
// ResetStorageUsed 清空存储目标 gauge。
|
||||
func (m *Metrics) ResetStorageUsed() {
|
||||
if m == nil {
|
||||
|
||||
@@ -41,11 +41,9 @@ func TestObserveTaskRun_NilReceiverIsSafe(t *testing.T) {
|
||||
m.DecTaskRunning()
|
||||
m.SetStorageUsed("a", "s3", 1)
|
||||
m.SetNodeOnline("n1", "master", true)
|
||||
m.SetAgentQueue("n1", "agent", 2, 1, 3)
|
||||
m.SetSLABreach(3)
|
||||
m.ResetNodeOnline()
|
||||
m.ResetStorageUsed()
|
||||
m.ResetAgentQueue()
|
||||
// no panic -> pass
|
||||
}
|
||||
|
||||
@@ -53,7 +51,6 @@ func TestHandler_ExposesBackupxMetrics(t *testing.T) {
|
||||
m := New("0.0.0-test")
|
||||
m.ObserveTaskRun("file", "success", 1.0, 2048)
|
||||
m.SetNodeOnline("n1", "master", true)
|
||||
m.SetAgentQueue("edge-a", "agent", 3, 1, 2)
|
||||
m.SetSLABreach(1)
|
||||
|
||||
recorder := httptest.NewRecorder()
|
||||
@@ -69,9 +66,6 @@ func TestHandler_ExposesBackupxMetrics(t *testing.T) {
|
||||
"backupx_task_run_total",
|
||||
"backupx_task_run_duration_seconds",
|
||||
"backupx_node_online",
|
||||
"backupx_agent_command_queue_depth",
|
||||
"backupx_agent_command_running",
|
||||
"backupx_agent_command_timeout_total",
|
||||
"backupx_sla_breach_tasks",
|
||||
"backupx_app_info",
|
||||
} {
|
||||
|
||||
@@ -17,46 +17,15 @@ type AgentCommandRepository interface {
|
||||
// 并返回领取到的命令。无命令时返回 (nil, nil)。
|
||||
ClaimPending(ctx context.Context, nodeID uint) (*model.AgentCommand, error)
|
||||
Update(ctx context.Context, cmd *model.AgentCommand) error
|
||||
// CompleteDispatched 只在命令仍处于 dispatched 时写入终态。
|
||||
// 返回 false 表示命令已被超时监控或其它流程终结,调用方不应覆盖。
|
||||
CompleteDispatched(ctx context.Context, cmd *model.AgentCommand) (bool, error)
|
||||
// MarkStaleTimeout 把 dispatched 状态但超时未完成的命令标记为 timeout。
|
||||
// 返回被标记的行数。不返回具体命令(供背景监控简单调用)。
|
||||
MarkStaleTimeout(ctx context.Context, threshold time.Time) (int64, error)
|
||||
// TimeoutActive 只在命令仍处于 pending/dispatched 时写入 timeout。
|
||||
// 返回 false 表示命令已被 Agent 回写为终态,调用方不应覆盖。
|
||||
TimeoutActive(ctx context.Context, cmd *model.AgentCommand) (bool, error)
|
||||
// ListStaleDispatched 列出 dispatched 但已超时、尚未被标记的命令。
|
||||
// 调用方需要把它们逐一标记 timeout 并联动关联记录状态。
|
||||
ListStaleDispatched(ctx context.Context, threshold time.Time) ([]model.AgentCommand, error)
|
||||
// ListStaleActive 列出 pending/dispatched 但已超时、尚未完成的命令。
|
||||
// pending 使用 created_at 判定,dispatched 使用 dispatched_at 判定。
|
||||
ListStaleActive(ctx context.Context, threshold time.Time) ([]model.AgentCommand, error)
|
||||
// ListPendingByNode 列出某节点下的所有 pending/dispatched 命令。
|
||||
// 用于删除节点或节点离线时的清理。
|
||||
ListPendingByNode(ctx context.Context, nodeID uint) ([]model.AgentCommand, error)
|
||||
NodeQueueSummaries(ctx context.Context) (map[uint]AgentCommandQueueSummary, error)
|
||||
}
|
||||
|
||||
type AgentCommandQueueSummary struct {
|
||||
NodeID uint `json:"nodeId"`
|
||||
Pending int `json:"pending"`
|
||||
Dispatched int `json:"dispatched"`
|
||||
Running int `json:"running"`
|
||||
Depth int `json:"depth"`
|
||||
Timeouts int `json:"timeouts"`
|
||||
LastError string `json:"lastError,omitempty"`
|
||||
OldestActiveAt *time.Time `json:"oldestActiveAt,omitempty"`
|
||||
}
|
||||
|
||||
type agentCommandTimeoutCount struct {
|
||||
NodeID uint
|
||||
Count int
|
||||
}
|
||||
|
||||
type agentCommandLastError struct {
|
||||
NodeID uint
|
||||
ErrorMessage string
|
||||
}
|
||||
|
||||
type GormAgentCommandRepository struct {
|
||||
@@ -125,21 +94,6 @@ func (r *GormAgentCommandRepository) Update(ctx context.Context, cmd *model.Agen
|
||||
return r.db.WithContext(ctx).Save(cmd).Error
|
||||
}
|
||||
|
||||
func (r *GormAgentCommandRepository) CompleteDispatched(ctx context.Context, cmd *model.AgentCommand) (bool, error) {
|
||||
result := r.db.WithContext(ctx).Model(&model.AgentCommand{}).
|
||||
Where("id = ? AND node_id = ? AND status = ?", cmd.ID, cmd.NodeID, model.AgentCommandStatusDispatched).
|
||||
Updates(map[string]any{
|
||||
"status": cmd.Status,
|
||||
"error_message": cmd.ErrorMessage,
|
||||
"result": cmd.Result,
|
||||
"completed_at": cmd.CompletedAt,
|
||||
})
|
||||
if result.Error != nil {
|
||||
return false, result.Error
|
||||
}
|
||||
return result.RowsAffected > 0, nil
|
||||
}
|
||||
|
||||
func (r *GormAgentCommandRepository) MarkStaleTimeout(ctx context.Context, threshold time.Time) (int64, error) {
|
||||
result := r.db.WithContext(ctx).Model(&model.AgentCommand{}).
|
||||
Where("status = ? AND dispatched_at < ?", model.AgentCommandStatusDispatched, threshold).
|
||||
@@ -153,20 +107,6 @@ func (r *GormAgentCommandRepository) MarkStaleTimeout(ctx context.Context, thres
|
||||
return result.RowsAffected, nil
|
||||
}
|
||||
|
||||
func (r *GormAgentCommandRepository) TimeoutActive(ctx context.Context, cmd *model.AgentCommand) (bool, error) {
|
||||
result := r.db.WithContext(ctx).Model(&model.AgentCommand{}).
|
||||
Where("id = ? AND status IN ?", cmd.ID, []string{model.AgentCommandStatusPending, model.AgentCommandStatusDispatched}).
|
||||
Updates(map[string]any{
|
||||
"status": model.AgentCommandStatusTimeout,
|
||||
"error_message": cmd.ErrorMessage,
|
||||
"completed_at": cmd.CompletedAt,
|
||||
})
|
||||
if result.Error != nil {
|
||||
return false, result.Error
|
||||
}
|
||||
return result.RowsAffected > 0, nil
|
||||
}
|
||||
|
||||
// ListStaleDispatched 列出 dispatched 但 dispatched_at 早于 threshold 的命令。
|
||||
func (r *GormAgentCommandRepository) ListStaleDispatched(ctx context.Context, threshold time.Time) ([]model.AgentCommand, error) {
|
||||
var items []model.AgentCommand
|
||||
@@ -179,21 +119,6 @@ func (r *GormAgentCommandRepository) ListStaleDispatched(ctx context.Context, th
|
||||
return items, nil
|
||||
}
|
||||
|
||||
func (r *GormAgentCommandRepository) ListStaleActive(ctx context.Context, threshold time.Time) ([]model.AgentCommand, error) {
|
||||
var items []model.AgentCommand
|
||||
if err := r.db.WithContext(ctx).
|
||||
Where(
|
||||
"(status = ? AND created_at < ?) OR (status = ? AND dispatched_at < ?)",
|
||||
model.AgentCommandStatusPending, threshold,
|
||||
model.AgentCommandStatusDispatched, threshold,
|
||||
).
|
||||
Order("id asc").
|
||||
Find(&items).Error; err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return items, nil
|
||||
}
|
||||
|
||||
// ListPendingByNode 列出某节点下所有待执行(pending 或 dispatched)命令。
|
||||
func (r *GormAgentCommandRepository) ListPendingByNode(ctx context.Context, nodeID uint) ([]model.AgentCommand, error) {
|
||||
var items []model.AgentCommand
|
||||
@@ -208,114 +133,3 @@ func (r *GormAgentCommandRepository) ListPendingByNode(ctx context.Context, node
|
||||
}
|
||||
return items, nil
|
||||
}
|
||||
|
||||
func (r *GormAgentCommandRepository) NodeQueueSummaries(ctx context.Context) (map[uint]AgentCommandQueueSummary, error) {
|
||||
summaries, err := r.activeQueueSummaries(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := r.applyTerminalQueueStats(ctx, summaries); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return summaries, nil
|
||||
}
|
||||
|
||||
func (r *GormAgentCommandRepository) activeQueueSummaries(ctx context.Context) (map[uint]AgentCommandQueueSummary, error) {
|
||||
var items []model.AgentCommand
|
||||
if err := r.db.WithContext(ctx).
|
||||
Where("status IN ?", []string{
|
||||
model.AgentCommandStatusPending,
|
||||
model.AgentCommandStatusDispatched,
|
||||
}).
|
||||
Order("node_id asc, id asc").
|
||||
Find(&items).Error; err != nil {
|
||||
return nil, err
|
||||
}
|
||||
summaries := make(map[uint]AgentCommandQueueSummary)
|
||||
for i := range items {
|
||||
cmd := &items[i]
|
||||
summary := summaries[cmd.NodeID]
|
||||
summary.NodeID = cmd.NodeID
|
||||
switch cmd.Status {
|
||||
case model.AgentCommandStatusPending:
|
||||
summary.Pending++
|
||||
summary.Depth++
|
||||
summary.OldestActiveAt = oldestTime(summary.OldestActiveAt, &cmd.CreatedAt)
|
||||
case model.AgentCommandStatusDispatched:
|
||||
summary.Dispatched++
|
||||
summary.Depth++
|
||||
if isLongRunningAgentCommand(cmd.Type) {
|
||||
summary.Running++
|
||||
}
|
||||
summary.OldestActiveAt = oldestTime(summary.OldestActiveAt, cmd.DispatchedAt)
|
||||
}
|
||||
summaries[cmd.NodeID] = summary
|
||||
}
|
||||
return summaries, nil
|
||||
}
|
||||
|
||||
func (r *GormAgentCommandRepository) applyTerminalQueueStats(ctx context.Context, summaries map[uint]AgentCommandQueueSummary) error {
|
||||
var timeoutCounts []agentCommandTimeoutCount
|
||||
if err := r.db.WithContext(ctx).
|
||||
Model(&model.AgentCommand{}).
|
||||
Select("node_id, COUNT(*) AS count").
|
||||
Where("status = ?", model.AgentCommandStatusTimeout).
|
||||
Group("node_id").
|
||||
Scan(&timeoutCounts).Error; err != nil {
|
||||
return err
|
||||
}
|
||||
for _, item := range timeoutCounts {
|
||||
summary := summaries[item.NodeID]
|
||||
summary.NodeID = item.NodeID
|
||||
summary.Timeouts = item.Count
|
||||
summaries[item.NodeID] = summary
|
||||
}
|
||||
|
||||
terminalStatuses := []string{
|
||||
model.AgentCommandStatusFailed,
|
||||
model.AgentCommandStatusTimeout,
|
||||
}
|
||||
latestByNode := r.db.WithContext(ctx).
|
||||
Model(&model.AgentCommand{}).
|
||||
Select("node_id, MAX(COALESCE(completed_at, updated_at, created_at)) AS last_error_at").
|
||||
Where("status IN ? AND error_message <> ''", terminalStatuses).
|
||||
Group("node_id")
|
||||
|
||||
var lastErrors []agentCommandLastError
|
||||
if err := r.db.WithContext(ctx).
|
||||
Table("agent_commands AS cmd").
|
||||
Select("cmd.node_id, cmd.error_message").
|
||||
Joins("JOIN (?) latest ON latest.node_id = cmd.node_id AND latest.last_error_at = COALESCE(cmd.completed_at, cmd.updated_at, cmd.created_at)", latestByNode).
|
||||
Where("cmd.status IN ? AND cmd.error_message <> ''", terminalStatuses).
|
||||
Order("cmd.node_id asc, cmd.id desc").
|
||||
Scan(&lastErrors).Error; err != nil {
|
||||
return err
|
||||
}
|
||||
seenLastError := make(map[uint]struct{}, len(lastErrors))
|
||||
for _, item := range lastErrors {
|
||||
if _, ok := seenLastError[item.NodeID]; ok {
|
||||
continue
|
||||
}
|
||||
summary := summaries[item.NodeID]
|
||||
summary.NodeID = item.NodeID
|
||||
summary.LastError = item.ErrorMessage
|
||||
summaries[item.NodeID] = summary
|
||||
seenLastError[item.NodeID] = struct{}{}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func oldestTime(current *time.Time, candidate *time.Time) *time.Time {
|
||||
if candidate == nil {
|
||||
return current
|
||||
}
|
||||
if current == nil || candidate.Before(*current) {
|
||||
value := *candidate
|
||||
return &value
|
||||
}
|
||||
return current
|
||||
}
|
||||
|
||||
func isLongRunningAgentCommand(commandType string) bool {
|
||||
return commandType == model.AgentCommandTypeRunTask || commandType == model.AgentCommandTypeRestoreRecord
|
||||
}
|
||||
|
||||
@@ -90,78 +90,6 @@ func TestAgentCommandRepository_Update(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestAgentCommandRepository_CompleteDispatchedOnlyUpdatesDispatchedCommand(t *testing.T) {
|
||||
db := newTestDB(t)
|
||||
repo := NewAgentCommandRepository(db)
|
||||
ctx := context.Background()
|
||||
dispatched := &model.AgentCommand{NodeID: 1, Type: "run_task", Status: model.AgentCommandStatusDispatched}
|
||||
timeout := &model.AgentCommand{NodeID: 1, Type: "run_task", Status: model.AgentCommandStatusTimeout, ErrorMessage: "timeout"}
|
||||
if err := repo.Create(ctx, dispatched); err != nil {
|
||||
t.Fatalf("Create dispatched returned error: %v", err)
|
||||
}
|
||||
if err := repo.Create(ctx, timeout); err != nil {
|
||||
t.Fatalf("Create timeout returned error: %v", err)
|
||||
}
|
||||
|
||||
now := time.Now().UTC()
|
||||
dispatched.Status = model.AgentCommandStatusSucceeded
|
||||
dispatched.Result = `{"ok":true}`
|
||||
dispatched.CompletedAt = &now
|
||||
updated, err := repo.CompleteDispatched(ctx, dispatched)
|
||||
if err != nil {
|
||||
t.Fatalf("CompleteDispatched returned error: %v", err)
|
||||
}
|
||||
if !updated {
|
||||
t.Fatal("expected dispatched command to be updated")
|
||||
}
|
||||
|
||||
timeout.Status = model.AgentCommandStatusSucceeded
|
||||
timeout.Result = `{"late":true}`
|
||||
timeout.CompletedAt = &now
|
||||
updated, err = repo.CompleteDispatched(ctx, timeout)
|
||||
if err != nil {
|
||||
t.Fatalf("CompleteDispatched terminal returned error: %v", err)
|
||||
}
|
||||
if updated {
|
||||
t.Fatal("expected terminal command not to be updated")
|
||||
}
|
||||
gotTimeout, err := repo.FindByID(ctx, timeout.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID timeout returned error: %v", err)
|
||||
}
|
||||
if gotTimeout.Status != model.AgentCommandStatusTimeout || gotTimeout.Result != "" {
|
||||
t.Fatalf("expected timeout command unchanged, got %#v", gotTimeout)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAgentCommandRepository_TimeoutActiveDoesNotOverwriteTerminalCommand(t *testing.T) {
|
||||
db := newTestDB(t)
|
||||
repo := NewAgentCommandRepository(db)
|
||||
ctx := context.Background()
|
||||
succeeded := &model.AgentCommand{NodeID: 1, Type: "run_task", Status: model.AgentCommandStatusSucceeded, Result: `{"ok":true}`}
|
||||
if err := repo.Create(ctx, succeeded); err != nil {
|
||||
t.Fatalf("Create succeeded returned error: %v", err)
|
||||
}
|
||||
|
||||
now := time.Now().UTC()
|
||||
succeeded.ErrorMessage = "timeout"
|
||||
succeeded.CompletedAt = &now
|
||||
updated, err := repo.TimeoutActive(ctx, succeeded)
|
||||
if err != nil {
|
||||
t.Fatalf("TimeoutActive returned error: %v", err)
|
||||
}
|
||||
if updated {
|
||||
t.Fatal("expected terminal command not to be timed out")
|
||||
}
|
||||
got, err := repo.FindByID(ctx, succeeded.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID returned error: %v", err)
|
||||
}
|
||||
if got.Status != model.AgentCommandStatusSucceeded || got.ErrorMessage != "" || got.Result != `{"ok":true}` {
|
||||
t.Fatalf("expected succeeded command unchanged, got %#v", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAgentCommandRepository_MarkStaleTimeout(t *testing.T) {
|
||||
db := newTestDB(t)
|
||||
repo := NewAgentCommandRepository(db)
|
||||
@@ -190,72 +118,3 @@ func TestAgentCommandRepository_MarkStaleTimeout(t *testing.T) {
|
||||
t.Errorf("new should stay dispatched: %+v", newGot)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAgentCommandRepository_ListStaleActiveIncludesPendingAndDispatched(t *testing.T) {
|
||||
db := newTestDB(t)
|
||||
repo := NewAgentCommandRepository(db)
|
||||
ctx := context.Background()
|
||||
old := time.Now().Add(-time.Hour)
|
||||
recent := time.Now()
|
||||
oldPending := &model.AgentCommand{NodeID: 1, Type: "run_task", Status: model.AgentCommandStatusPending, CreatedAt: old}
|
||||
oldDispatched := &model.AgentCommand{NodeID: 1, Type: "restore_record", Status: model.AgentCommandStatusDispatched, DispatchedAt: &old}
|
||||
recentPending := &model.AgentCommand{NodeID: 1, Type: "run_task", Status: model.AgentCommandStatusPending, CreatedAt: recent}
|
||||
succeeded := &model.AgentCommand{NodeID: 1, Type: "run_task", Status: model.AgentCommandStatusSucceeded, CreatedAt: old}
|
||||
for _, cmd := range []*model.AgentCommand{oldPending, oldDispatched, recentPending, succeeded} {
|
||||
if err := repo.Create(ctx, cmd); err != nil {
|
||||
t.Fatalf("Create returned error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
items, err := repo.ListStaleActive(ctx, time.Now().Add(-30*time.Minute))
|
||||
if err != nil {
|
||||
t.Fatalf("ListStaleActive returned error: %v", err)
|
||||
}
|
||||
if len(items) != 2 {
|
||||
t.Fatalf("expected 2 stale active commands, got %#v", items)
|
||||
}
|
||||
if items[0].ID != oldPending.ID || items[1].ID != oldDispatched.ID {
|
||||
t.Fatalf("unexpected stale active order/items: %#v", items)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAgentCommandRepository_NodeQueueSummaries(t *testing.T) {
|
||||
db := newTestDB(t)
|
||||
repo := NewAgentCommandRepository(db)
|
||||
ctx := context.Background()
|
||||
old := time.Now().UTC().Add(-20 * time.Minute)
|
||||
recent := time.Now().UTC().Add(-2 * time.Minute)
|
||||
dispatchedAt := time.Now().UTC().Add(-5 * time.Minute)
|
||||
completedAt := time.Now().UTC().Add(-1 * time.Minute)
|
||||
commands := []*model.AgentCommand{
|
||||
{NodeID: 1, Type: model.AgentCommandTypeRunTask, Status: model.AgentCommandStatusPending, CreatedAt: old},
|
||||
{NodeID: 1, Type: model.AgentCommandTypeRestoreRecord, Status: model.AgentCommandStatusPending, CreatedAt: recent},
|
||||
{NodeID: 1, Type: model.AgentCommandTypeRunTask, Status: model.AgentCommandStatusDispatched, DispatchedAt: &dispatchedAt},
|
||||
{NodeID: 1, Type: model.AgentCommandTypeRunTask, Status: model.AgentCommandStatusFailed, ErrorMessage: "boom", CompletedAt: &completedAt},
|
||||
{NodeID: 1, Type: model.AgentCommandTypeRunTask, Status: model.AgentCommandStatusTimeout, ErrorMessage: "late", CompletedAt: &recent},
|
||||
{NodeID: 2, Type: model.AgentCommandTypeRunTask, Status: model.AgentCommandStatusPending, CreatedAt: old},
|
||||
}
|
||||
for _, cmd := range commands {
|
||||
if err := repo.Create(ctx, cmd); err != nil {
|
||||
t.Fatalf("Create returned error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
summaries, err := repo.NodeQueueSummaries(ctx)
|
||||
if err != nil {
|
||||
t.Fatalf("NodeQueueSummaries returned error: %v", err)
|
||||
}
|
||||
nodeOne := summaries[1]
|
||||
if nodeOne.Pending != 2 || nodeOne.Dispatched != 1 || nodeOne.Running != 1 || nodeOne.Depth != 3 {
|
||||
t.Fatalf("unexpected node 1 summary: %#v", nodeOne)
|
||||
}
|
||||
if nodeOne.Timeouts != 1 || nodeOne.LastError != "boom" {
|
||||
t.Fatalf("expected terminal timeout and latest error in summary, got %#v", nodeOne)
|
||||
}
|
||||
if nodeOne.OldestActiveAt == nil || !nodeOne.OldestActiveAt.Equal(old) {
|
||||
t.Fatalf("expected oldest active at %s, got %#v", old, nodeOne.OldestActiveAt)
|
||||
}
|
||||
if nodeTwo := summaries[2]; nodeTwo.Pending != 1 || nodeTwo.Depth != 1 || nodeTwo.Timeouts != 0 || nodeTwo.LastError != "" {
|
||||
t.Fatalf("unexpected node 2 summary: %#v", nodeTwo)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,7 +3,6 @@ package repository
|
||||
import (
|
||||
"context"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@@ -84,59 +83,6 @@ func TestInstallTokenConsumeExpired(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestInstallTokenConsumeConcurrentOnlyOneWins(t *testing.T) {
|
||||
db := openTestInstallTokenDB(t)
|
||||
repo := NewAgentInstallTokenRepository(db)
|
||||
ctx := context.Background()
|
||||
|
||||
tok := &model.AgentInstallToken{
|
||||
Token: "concurrent", NodeID: 1, Mode: model.InstallModeSystemd,
|
||||
Arch: model.InstallArchAuto, AgentVer: "v1.7.0",
|
||||
DownloadSrc: model.InstallSourceGitHub,
|
||||
ExpiresAt: time.Now().UTC().Add(15 * time.Minute),
|
||||
CreatedByID: 1,
|
||||
}
|
||||
if err := repo.Create(ctx, tok); err != nil {
|
||||
t.Fatalf("create: %v", err)
|
||||
}
|
||||
|
||||
const workers = 8
|
||||
var wg sync.WaitGroup
|
||||
start := make(chan struct{})
|
||||
results := make(chan *model.AgentInstallToken, workers)
|
||||
errs := make(chan error, workers)
|
||||
for i := 0; i < workers; i++ {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
<-start
|
||||
got, err := repo.ConsumeByToken(ctx, "concurrent")
|
||||
if err != nil {
|
||||
errs <- err
|
||||
return
|
||||
}
|
||||
results <- got
|
||||
}()
|
||||
}
|
||||
close(start)
|
||||
wg.Wait()
|
||||
close(results)
|
||||
close(errs)
|
||||
|
||||
for err := range errs {
|
||||
t.Fatalf("consume err: %v", err)
|
||||
}
|
||||
success := 0
|
||||
for got := range results {
|
||||
if got != nil {
|
||||
success++
|
||||
}
|
||||
}
|
||||
if success != 1 {
|
||||
t.Fatalf("expected exactly one successful consume, got %d", success)
|
||||
}
|
||||
}
|
||||
|
||||
func TestInstallTokenGC(t *testing.T) {
|
||||
db := openTestInstallTokenDB(t)
|
||||
repo := NewAgentInstallTokenRepository(db)
|
||||
|
||||
@@ -33,7 +33,6 @@ type BackupStorageUsageItem struct {
|
||||
type BackupRecordRepository interface {
|
||||
List(context.Context, BackupRecordListOptions) ([]model.BackupRecord, error)
|
||||
FindByID(context.Context, uint) (*model.BackupRecord, error)
|
||||
FindRunningByTaskAndNode(context.Context, uint, uint) (*model.BackupRecord, error)
|
||||
Create(context.Context, *model.BackupRecord) error
|
||||
Update(context.Context, *model.BackupRecord) error
|
||||
Delete(context.Context, uint) error
|
||||
@@ -94,20 +93,6 @@ func (r *GormBackupRecordRepository) FindByID(ctx context.Context, id uint) (*mo
|
||||
return &item, nil
|
||||
}
|
||||
|
||||
func (r *GormBackupRecordRepository) FindRunningByTaskAndNode(ctx context.Context, taskID uint, nodeID uint) (*model.BackupRecord, error) {
|
||||
var item model.BackupRecord
|
||||
if err := r.db.WithContext(ctx).
|
||||
Where("task_id = ? AND node_id = ? AND status = ?", taskID, nodeID, model.BackupRecordStatusRunning).
|
||||
Order("id desc").
|
||||
First(&item).Error; err != nil {
|
||||
if errors.Is(err, gorm.ErrRecordNotFound) {
|
||||
return nil, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
return &item, nil
|
||||
}
|
||||
|
||||
func (r *GormBackupRecordRepository) Create(ctx context.Context, item *model.BackupRecord) error {
|
||||
return r.db.WithContext(ctx).Create(item).Error
|
||||
}
|
||||
|
||||
@@ -226,7 +226,7 @@ func (r *GormBackupTaskRepository) Create(ctx context.Context, item *model.Backu
|
||||
}
|
||||
|
||||
func (r *GormBackupTaskRepository) Update(ctx context.Context, item *model.BackupTask) error {
|
||||
if err := r.db.WithContext(ctx).Omit("StorageTarget", "StorageTargets", "Node").Save(item).Error; err != nil {
|
||||
if err := r.db.WithContext(ctx).Save(item).Error; err != nil {
|
||||
return err
|
||||
}
|
||||
if len(item.StorageTargets) > 0 {
|
||||
|
||||
@@ -92,49 +92,3 @@ func TestBackupTaskRepositoryCRUD(t *testing.T) {
|
||||
t.Fatalf("expected task deleted, got %#v", deleted)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBackupTaskRepositoryUpdateCanClearNodeIDAfterPreload(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
repo := newBackupTaskTestRepository(t)
|
||||
remoteNode := &model.Node{Name: "edge-1", Token: "edge-token", Status: model.NodeStatusOnline, IsLocal: false}
|
||||
if err := repo.db.WithContext(ctx).Create(remoteNode).Error; err != nil {
|
||||
t.Fatalf("create node: %v", err)
|
||||
}
|
||||
task := &model.BackupTask{
|
||||
Name: "pooled-source",
|
||||
Type: "file",
|
||||
Enabled: true,
|
||||
SourcePath: "/srv/www/site",
|
||||
StorageTargetID: 1,
|
||||
NodeID: remoteNode.ID,
|
||||
RetentionDays: 30,
|
||||
Compression: "gzip",
|
||||
MaxBackups: 10,
|
||||
LastStatus: "idle",
|
||||
}
|
||||
if err := repo.Create(ctx, task); err != nil {
|
||||
t.Fatalf("Create returned error: %v", err)
|
||||
}
|
||||
loaded, err := repo.FindByID(ctx, task.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID returned error: %v", err)
|
||||
}
|
||||
if loaded == nil || loaded.Node.ID != remoteNode.ID {
|
||||
t.Fatalf("expected preloaded node %d, got %#v", remoteNode.ID, loaded)
|
||||
}
|
||||
loaded.NodeID = 0
|
||||
loaded.NodePoolTag = "db"
|
||||
if err := repo.Update(ctx, loaded); err != nil {
|
||||
t.Fatalf("Update returned error: %v", err)
|
||||
}
|
||||
stored, err := repo.FindByID(ctx, task.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID after update returned error: %v", err)
|
||||
}
|
||||
if stored.NodeID != 0 {
|
||||
t.Fatalf("expected NodeID to be cleared, got %d", stored.NodeID)
|
||||
}
|
||||
if stored.NodePoolTag != "db" {
|
||||
t.Fatalf("expected NodePoolTag db, got %q", stored.NodePoolTag)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -45,7 +45,7 @@ type Service struct {
|
||||
func NewService(tasks repository.BackupTaskRepository, runner TaskRunner, logger *zap.Logger) *Service {
|
||||
parser := cron.NewParser(cron.SecondOptional | cron.Minute | cron.Hour | cron.Dom | cron.Month | cron.Dow | cron.Descriptor)
|
||||
return &Service{
|
||||
cron: cron.New(cron.WithParser(parser), cron.WithLocation(time.Local)),
|
||||
cron: cron.New(cron.WithParser(parser), cron.WithLocation(time.UTC)),
|
||||
tasks: tasks,
|
||||
runner: runner,
|
||||
logger: logger,
|
||||
|
||||
@@ -68,37 +68,3 @@ func TestServiceSyncTaskAndTrigger(t *testing.T) {
|
||||
t.Fatalf("expected scheduled runner to be triggered")
|
||||
}
|
||||
}
|
||||
|
||||
func TestServiceSchedulesTasksInLocalTimezone(t *testing.T) {
|
||||
location, err := time.LoadLocation("Asia/Shanghai")
|
||||
if err != nil {
|
||||
t.Fatalf("LoadLocation returned error: %v", err)
|
||||
}
|
||||
originalLocal := time.Local
|
||||
time.Local = location
|
||||
t.Cleanup(func() {
|
||||
time.Local = originalLocal
|
||||
})
|
||||
|
||||
service := NewService(&fakeTaskRepository{}, &fakeRunner{}, nil)
|
||||
if got := service.cron.Location(); got != location {
|
||||
t.Fatalf("cron location = %v, want %v", got, location)
|
||||
}
|
||||
|
||||
task := &model.BackupTask{ID: 1, Enabled: true, CronExpr: "0 5 * * *"}
|
||||
if err := service.SyncTask(context.Background(), task); err != nil {
|
||||
t.Fatalf("SyncTask returned error: %v", err)
|
||||
}
|
||||
entryID, ok := service.entries[task.ID]
|
||||
if !ok {
|
||||
t.Fatalf("expected cron entry for task %d", task.ID)
|
||||
}
|
||||
|
||||
entry := service.cron.Entry(entryID)
|
||||
now := time.Date(2026, 4, 30, 4, 0, 0, 0, location)
|
||||
got := entry.Schedule.Next(now)
|
||||
want := time.Date(2026, 4, 30, 5, 0, 0, 0, location)
|
||||
if !got.Equal(want) {
|
||||
t.Fatalf("next run = %s, want %s", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -118,8 +118,7 @@ func (s *AgentService) SubmitCommandResult(ctx context.Context, node *model.Node
|
||||
cmd.Result = string(result.Result)
|
||||
}
|
||||
cmd.CompletedAt = &now
|
||||
_, err = s.cmdRepo.CompleteDispatched(ctx, cmd)
|
||||
return err
|
||||
return s.cmdRepo.Update(ctx, cmd)
|
||||
}
|
||||
|
||||
// AgentTaskSpec 给 Agent 返回的任务规格,包含解密后的存储配置,供 Agent 直接执行。
|
||||
@@ -160,8 +159,8 @@ func (s *AgentService) GetTaskSpec(ctx context.Context, node *model.Node, taskID
|
||||
if task == nil {
|
||||
return nil, apperror.New(404, "BACKUP_TASK_NOT_FOUND", "任务不存在", nil)
|
||||
}
|
||||
if err := s.ensureTaskSpecAccess(ctx, node, task); err != nil {
|
||||
return nil, err
|
||||
if task.NodeID != node.ID {
|
||||
return nil, apperror.Unauthorized("BACKUP_TASK_FORBIDDEN", "任务不属于当前节点", nil)
|
||||
}
|
||||
// 解密数据库密码(若有)
|
||||
dbPassword := ""
|
||||
@@ -214,31 +213,15 @@ func (s *AgentService) GetTaskSpec(ctx context.Context, node *model.Node, taskID
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (s *AgentService) ensureTaskSpecAccess(ctx context.Context, node *model.Node, task *model.BackupTask) error {
|
||||
if task.NodeID == node.ID {
|
||||
return nil
|
||||
}
|
||||
record, err := s.recordRepo.FindRunningByTaskAndNode(ctx, task.ID, node.ID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if record == nil {
|
||||
return apperror.Unauthorized("BACKUP_TASK_FORBIDDEN", "任务不属于当前节点", nil)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// AgentRecordUpdate Agent 上报备份记录的最终状态。
|
||||
type AgentRecordUpdate struct {
|
||||
Status string `json:"status"` // running | success | failed
|
||||
FileName string `json:"fileName,omitempty"`
|
||||
FileSize int64 `json:"fileSize,omitempty"`
|
||||
Checksum string `json:"checksum,omitempty"`
|
||||
StoragePath string `json:"storagePath,omitempty"`
|
||||
StorageTargetID uint `json:"storageTargetId,omitempty"`
|
||||
StorageUploadResults []StorageUploadResultItem `json:"storageUploadResults,omitempty"`
|
||||
ErrorMessage string `json:"errorMessage,omitempty"`
|
||||
LogAppend string `json:"logAppend,omitempty"` // 增量日志,追加到 record.log_content
|
||||
Status string `json:"status"` // running | success | failed
|
||||
FileName string `json:"fileName,omitempty"`
|
||||
FileSize int64 `json:"fileSize,omitempty"`
|
||||
Checksum string `json:"checksum,omitempty"`
|
||||
StoragePath string `json:"storagePath,omitempty"`
|
||||
ErrorMessage string `json:"errorMessage,omitempty"`
|
||||
LogAppend string `json:"logAppend,omitempty"` // 增量日志,追加到 record.log_content
|
||||
}
|
||||
|
||||
// UpdateRecord 更新备份记录的状态/日志。Agent 在执行过程中可多次调用。
|
||||
@@ -250,16 +233,14 @@ func (s *AgentService) UpdateRecord(ctx context.Context, node *model.Node, recor
|
||||
if record == nil {
|
||||
return apperror.New(404, "BACKUP_RECORD_NOT_FOUND", "记录不存在", nil)
|
||||
}
|
||||
// 通过 task.NodeID 判断是否属于当前 agent
|
||||
task, err := s.taskRepo.FindByID(ctx, record.TaskID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if task == nil || !recordBelongsToNode(record, task, node.ID) {
|
||||
if task == nil || task.NodeID != node.ID {
|
||||
return apperror.Unauthorized("BACKUP_RECORD_FORBIDDEN", "记录不属于当前节点", nil)
|
||||
}
|
||||
if isBackupRecordTerminal(record.Status) {
|
||||
return nil
|
||||
}
|
||||
if update.Status != "" {
|
||||
record.Status = update.Status
|
||||
}
|
||||
@@ -275,14 +256,6 @@ func (s *AgentService) UpdateRecord(ctx context.Context, node *model.Node, recor
|
||||
if update.StoragePath != "" {
|
||||
record.StoragePath = update.StoragePath
|
||||
}
|
||||
if update.StorageTargetID > 0 {
|
||||
record.StorageTargetID = update.StorageTargetID
|
||||
}
|
||||
if len(update.StorageUploadResults) > 0 {
|
||||
if resultsJSON, marshalErr := json.Marshal(update.StorageUploadResults); marshalErr == nil {
|
||||
record.StorageUploadResults = string(resultsJSON)
|
||||
}
|
||||
}
|
||||
if update.ErrorMessage != "" {
|
||||
record.ErrorMessage = update.ErrorMessage
|
||||
}
|
||||
@@ -304,25 +277,11 @@ func (s *AgentService) UpdateRecord(ctx context.Context, node *model.Node, recor
|
||||
// 同步更新任务的 last_status
|
||||
if update.Status == model.BackupRecordStatusSuccess || update.Status == model.BackupRecordStatusFailed {
|
||||
task.LastStatus = update.Status
|
||||
task.LastRunAt = &record.StartedAt
|
||||
if err := s.taskRepo.Update(ctx, task); err != nil {
|
||||
return fmt.Errorf("update backup task summary: %w", err)
|
||||
}
|
||||
_ = s.taskRepo.Update(ctx, task)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func recordBelongsToNode(record *model.BackupRecord, task *model.BackupTask, nodeID uint) bool {
|
||||
if record.NodeID != 0 {
|
||||
return record.NodeID == nodeID
|
||||
}
|
||||
return task.NodeID == nodeID
|
||||
}
|
||||
|
||||
func isBackupRecordTerminal(status string) bool {
|
||||
return status == model.BackupRecordStatusSuccess || status == model.BackupRecordStatusFailed
|
||||
}
|
||||
|
||||
// EnqueueCommand Master 端调用:给指定节点插入一条待执行命令。
|
||||
// 返回命令 ID。
|
||||
func (s *AgentService) EnqueueCommand(ctx context.Context, nodeID uint, cmdType string, payload any) (uint, error) {
|
||||
@@ -397,84 +356,25 @@ func (s *AgentService) StartCommandTimeoutMonitor(ctx context.Context, interval
|
||||
}()
|
||||
}
|
||||
|
||||
// processStaleCommands 扫描已超时的 pending/dispatched 命令并联动关联记录。
|
||||
// 流程:先取超时候选 → 条件式把命令置为 timeout → 对抢到的命令联动 backup/restore 记录。
|
||||
// processStaleCommands 扫描已超时的 dispatched 命令并联动关联记录。
|
||||
// 流程:先取超时候选 → 对每条联动 backup/restore 记录 → 把命令置为 timeout。
|
||||
// 单条失败不影响后续处理。
|
||||
func (s *AgentService) processStaleCommands(ctx context.Context, threshold time.Time) {
|
||||
commands, err := s.cmdRepo.ListStaleActive(ctx, threshold)
|
||||
commands, err := s.cmdRepo.ListStaleDispatched(ctx, threshold)
|
||||
if err != nil || len(commands) == 0 {
|
||||
return
|
||||
}
|
||||
for i := range commands {
|
||||
cmd := commands[i]
|
||||
if s.commandStillActive(ctx, &cmd, threshold) {
|
||||
continue
|
||||
}
|
||||
s.failLinkedRecord(ctx, &cmd)
|
||||
now := time.Now().UTC()
|
||||
cmd.Status = model.AgentCommandStatusTimeout
|
||||
cmd.ErrorMessage = "agent did not report result before timeout"
|
||||
cmd.CompletedAt = &now
|
||||
timedOut, err := s.cmdRepo.TimeoutActive(ctx, &cmd)
|
||||
if err != nil || !timedOut {
|
||||
continue
|
||||
}
|
||||
s.failLinkedRecord(ctx, &cmd)
|
||||
_ = s.cmdRepo.Update(ctx, &cmd)
|
||||
}
|
||||
}
|
||||
|
||||
// commandStillActive 用关联记录状态、记录更新时间和节点心跳作为长任务续租信号。
|
||||
// 仅 run_task / restore_record 允许续租,避免短 RPC 命令被在线节点长期保留。
|
||||
func (s *AgentService) commandStillActive(ctx context.Context, cmd *model.AgentCommand, threshold time.Time) bool {
|
||||
if cmd.Status != model.AgentCommandStatusDispatched {
|
||||
return false
|
||||
}
|
||||
switch cmd.Type {
|
||||
case model.AgentCommandTypeRunTask:
|
||||
var payload struct {
|
||||
RecordID uint `json:"recordId"`
|
||||
}
|
||||
if err := json.Unmarshal([]byte(cmd.Payload), &payload); err != nil || payload.RecordID == 0 {
|
||||
return false
|
||||
}
|
||||
record, err := s.recordRepo.FindByID(ctx, payload.RecordID)
|
||||
if err != nil || record == nil || record.Status != model.BackupRecordStatusRunning {
|
||||
return false
|
||||
}
|
||||
if s.nodeRecentlySeen(ctx, cmd.NodeID, threshold) {
|
||||
return true
|
||||
}
|
||||
return record.UpdatedAt.After(threshold)
|
||||
case model.AgentCommandTypeRestoreRecord:
|
||||
if s.restoreRepo == nil {
|
||||
return false
|
||||
}
|
||||
var payload struct {
|
||||
RestoreRecordID uint `json:"restoreRecordId"`
|
||||
}
|
||||
if err := json.Unmarshal([]byte(cmd.Payload), &payload); err != nil || payload.RestoreRecordID == 0 {
|
||||
return false
|
||||
}
|
||||
restore, err := s.restoreRepo.FindByID(ctx, payload.RestoreRecordID)
|
||||
if err != nil || restore == nil || restore.Status != model.RestoreRecordStatusRunning {
|
||||
return false
|
||||
}
|
||||
if s.nodeRecentlySeen(ctx, cmd.NodeID, threshold) {
|
||||
return true
|
||||
}
|
||||
return restore.UpdatedAt.After(threshold)
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func (s *AgentService) nodeRecentlySeen(ctx context.Context, nodeID uint, threshold time.Time) bool {
|
||||
node, err := s.nodeRepo.FindByID(ctx, nodeID)
|
||||
if err != nil || node == nil {
|
||||
return false
|
||||
}
|
||||
return node.Status == model.NodeStatusOnline && node.LastSeen.After(threshold)
|
||||
}
|
||||
|
||||
// failLinkedRecord 根据命令类型把关联记录标记为 failed。
|
||||
// 只对仍然处于 running 状态的记录生效,避免覆盖已完成的结果。
|
||||
func (s *AgentService) failLinkedRecord(ctx context.Context, cmd *model.AgentCommand) {
|
||||
|
||||
@@ -1,654 +0,0 @@
|
||||
package service
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"backupx/server/internal/config"
|
||||
"backupx/server/internal/database"
|
||||
"backupx/server/internal/logger"
|
||||
"backupx/server/internal/model"
|
||||
"backupx/server/internal/repository"
|
||||
"backupx/server/internal/storage/codec"
|
||||
"gorm.io/gorm"
|
||||
)
|
||||
|
||||
func newAgentServicePoolTestHarness(t *testing.T) (*AgentService, *gorm.DB, repository.BackupRecordRepository, repository.AgentCommandRepository, *model.Node, *model.Node) {
|
||||
t.Helper()
|
||||
log, err := logger.New(config.LogConfig{Level: "error"})
|
||||
if err != nil {
|
||||
t.Fatalf("logger.New returned error: %v", err)
|
||||
}
|
||||
db, err := database.Open(config.DatabaseConfig{Path: filepath.Join(t.TempDir(), "backupx.db")}, log)
|
||||
if err != nil {
|
||||
t.Fatalf("database.Open returned error: %v", err)
|
||||
}
|
||||
cipher := codec.NewConfigCipher("agent-service-secret")
|
||||
nodeRepo := repository.NewNodeRepository(db)
|
||||
taskRepo := repository.NewBackupTaskRepository(db)
|
||||
recordRepo := repository.NewBackupRecordRepository(db)
|
||||
storageRepo := repository.NewStorageTargetRepository(db)
|
||||
cmdRepo := repository.NewAgentCommandRepository(db)
|
||||
|
||||
owner := &model.Node{Name: "edge-owner", Token: "owner-token", Status: model.NodeStatusOnline, IsLocal: false, LastSeen: time.Now().UTC()}
|
||||
other := &model.Node{Name: "edge-other", Token: "other-token", Status: model.NodeStatusOnline, IsLocal: false, LastSeen: time.Now().UTC()}
|
||||
if err := nodeRepo.Create(context.Background(), owner); err != nil {
|
||||
t.Fatalf("create owner node: %v", err)
|
||||
}
|
||||
if err := nodeRepo.Create(context.Background(), other); err != nil {
|
||||
t.Fatalf("create other node: %v", err)
|
||||
}
|
||||
targetConfig, err := cipher.EncryptJSON(map[string]any{"basePath": t.TempDir()})
|
||||
if err != nil {
|
||||
t.Fatalf("EncryptJSON returned error: %v", err)
|
||||
}
|
||||
target := &model.StorageTarget{Name: "local", Type: "local_disk", Enabled: true, ConfigCiphertext: targetConfig, ConfigVersion: 1, LastTestStatus: "unknown"}
|
||||
if err := storageRepo.Create(context.Background(), target); err != nil {
|
||||
t.Fatalf("create storage target: %v", err)
|
||||
}
|
||||
task := &model.BackupTask{
|
||||
Name: "pooled-task",
|
||||
Type: "file",
|
||||
Enabled: true,
|
||||
SourcePath: "/srv/data",
|
||||
StorageTargetID: target.ID,
|
||||
NodeID: 0,
|
||||
NodePoolTag: "db",
|
||||
RetentionDays: 30,
|
||||
Compression: "gzip",
|
||||
MaxBackups: 10,
|
||||
LastStatus: "running",
|
||||
}
|
||||
if err := taskRepo.Create(context.Background(), task); err != nil {
|
||||
t.Fatalf("create task: %v", err)
|
||||
}
|
||||
record := &model.BackupRecord{
|
||||
TaskID: task.ID,
|
||||
StorageTargetID: target.ID,
|
||||
NodeID: owner.ID,
|
||||
Status: model.BackupRecordStatusRunning,
|
||||
StartedAt: time.Now().UTC(),
|
||||
}
|
||||
if err := recordRepo.Create(context.Background(), record); err != nil {
|
||||
t.Fatalf("create record: %v", err)
|
||||
}
|
||||
return NewAgentService(nodeRepo, taskRepo, recordRepo, storageRepo, cmdRepo, cipher), db, recordRepo, cmdRepo, owner, other
|
||||
}
|
||||
|
||||
func TestAgentServicePooledTaskUsesRecordNodeForSpecAndRecordUpdates(t *testing.T) {
|
||||
svc, _, records, _, owner, other := newAgentServicePoolTestHarness(t)
|
||||
ctx := context.Background()
|
||||
|
||||
spec, err := svc.GetTaskSpec(ctx, owner, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("owner GetTaskSpec returned error: %v", err)
|
||||
}
|
||||
if spec.TaskID != 1 || len(spec.StorageTargets) != 1 {
|
||||
t.Fatalf("unexpected spec: %#v", spec)
|
||||
}
|
||||
if _, err := svc.GetTaskSpec(ctx, other, 1); err == nil {
|
||||
t.Fatal("expected non-owner node to be forbidden from pooled task spec")
|
||||
}
|
||||
|
||||
if err := svc.UpdateRecord(ctx, owner, 1, AgentRecordUpdate{
|
||||
Status: model.BackupRecordStatusSuccess,
|
||||
FileName: "backup.tar.gz",
|
||||
FileSize: 123,
|
||||
StoragePath: "tasks/1/backup.tar.gz",
|
||||
StorageTargetID: 2,
|
||||
StorageUploadResults: []StorageUploadResultItem{
|
||||
{StorageTargetID: 1, StorageTargetName: "first", Status: "failed", Error: "boom"},
|
||||
{StorageTargetID: 2, StorageTargetName: "second", Status: "success", StoragePath: "tasks/1/backup.tar.gz", FileSize: 123},
|
||||
},
|
||||
}); err != nil {
|
||||
t.Fatalf("owner UpdateRecord returned error: %v", err)
|
||||
}
|
||||
updated, err := records.FindByID(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID returned error: %v", err)
|
||||
}
|
||||
if updated.Status != model.BackupRecordStatusSuccess || updated.NodeID != owner.ID {
|
||||
t.Fatalf("unexpected updated record: %#v", updated)
|
||||
}
|
||||
if updated.StorageTargetID != 2 {
|
||||
t.Fatalf("expected successful storage target id 2, got %d", updated.StorageTargetID)
|
||||
}
|
||||
if !strings.Contains(updated.StorageUploadResults, `"storageTargetName":"second"`) {
|
||||
t.Fatalf("expected upload results to be persisted, got %q", updated.StorageUploadResults)
|
||||
}
|
||||
if err := svc.UpdateRecord(ctx, other, 1, AgentRecordUpdate{LogAppend: "bad"}); err == nil {
|
||||
t.Fatal("expected non-owner node to be forbidden from record update")
|
||||
}
|
||||
}
|
||||
|
||||
func TestAgentServiceUpdateRecordRefreshesTaskSummaryOnTerminalStatus(t *testing.T) {
|
||||
for _, status := range []string{model.BackupRecordStatusSuccess, model.BackupRecordStatusFailed} {
|
||||
t.Run(status, func(t *testing.T) {
|
||||
svc, _, records, _, owner, _ := newAgentServicePoolTestHarness(t)
|
||||
ctx := context.Background()
|
||||
record, err := records.FindByID(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID record returned error: %v", err)
|
||||
}
|
||||
|
||||
if err := svc.UpdateRecord(ctx, owner, record.ID, AgentRecordUpdate{Status: status}); err != nil {
|
||||
t.Fatalf("UpdateRecord returned error: %v", err)
|
||||
}
|
||||
|
||||
task, err := svc.taskRepo.FindByID(ctx, record.TaskID)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID task returned error: %v", err)
|
||||
}
|
||||
if task.LastStatus != status {
|
||||
t.Fatalf("expected task LastStatus %q, got %q", status, task.LastStatus)
|
||||
}
|
||||
if task.LastRunAt == nil || !task.LastRunAt.Equal(record.StartedAt) {
|
||||
t.Fatalf("expected task LastRunAt to match record startedAt %s, got %#v", record.StartedAt, task.LastRunAt)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestAgentServiceUpdateRecordReturnsTaskSummaryUpdateError(t *testing.T) {
|
||||
svc, _, _, _, owner, _ := newAgentServicePoolTestHarness(t)
|
||||
ctx := context.Background()
|
||||
expectedErr := errors.New("task update failed")
|
||||
svc.taskRepo = &failingUpdateTaskRepo{
|
||||
BackupTaskRepository: svc.taskRepo,
|
||||
err: expectedErr,
|
||||
}
|
||||
|
||||
err := svc.UpdateRecord(ctx, owner, 1, AgentRecordUpdate{Status: model.BackupRecordStatusSuccess})
|
||||
if !errors.Is(err, expectedErr) {
|
||||
t.Fatalf("expected task update error %v, got %v", expectedErr, err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAgentServiceProcessStaleCommandsFailsPendingRunTaskRecord(t *testing.T) {
|
||||
svc, _, records, commands, owner, _ := newAgentServicePoolTestHarness(t)
|
||||
ctx := context.Background()
|
||||
oldCommand := &model.AgentCommand{
|
||||
NodeID: owner.ID,
|
||||
Type: model.AgentCommandTypeRunTask,
|
||||
Status: model.AgentCommandStatusPending,
|
||||
Payload: `{"recordId":1}`,
|
||||
CreatedAt: time.Now().UTC().Add(-time.Hour),
|
||||
}
|
||||
if err := commands.Create(ctx, oldCommand); err != nil {
|
||||
t.Fatalf("Create command returned error: %v", err)
|
||||
}
|
||||
|
||||
svc.processStaleCommands(ctx, time.Now().UTC().Add(-30*time.Minute))
|
||||
|
||||
updatedCommand, err := commands.FindByID(ctx, oldCommand.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID command returned error: %v", err)
|
||||
}
|
||||
if updatedCommand.Status != model.AgentCommandStatusTimeout {
|
||||
t.Fatalf("expected command timeout, got %#v", updatedCommand)
|
||||
}
|
||||
updatedRecord, err := records.FindByID(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID record returned error: %v", err)
|
||||
}
|
||||
if updatedRecord.Status != model.BackupRecordStatusFailed {
|
||||
t.Fatalf("expected record failed, got %#v", updatedRecord)
|
||||
}
|
||||
if updatedRecord.CompletedAt == nil {
|
||||
t.Fatal("expected failed record completedAt to be set")
|
||||
}
|
||||
}
|
||||
|
||||
func TestAgentServiceProcessStaleCommandsFailsPendingRestoreRecord(t *testing.T) {
|
||||
svc, db, _, commands, owner, _ := newAgentServicePoolTestHarness(t)
|
||||
ctx := context.Background()
|
||||
restoreRepo := repository.NewRestoreRecordRepository(db)
|
||||
restore := &model.RestoreRecord{
|
||||
BackupRecordID: 1,
|
||||
TaskID: 1,
|
||||
NodeID: owner.ID,
|
||||
Status: model.RestoreRecordStatusRunning,
|
||||
StartedAt: time.Now().UTC().Add(-time.Hour),
|
||||
}
|
||||
if err := restoreRepo.Create(ctx, restore); err != nil {
|
||||
t.Fatalf("Create restore returned error: %v", err)
|
||||
}
|
||||
svc.SetRestoreRepository(restoreRepo)
|
||||
oldCommand := &model.AgentCommand{
|
||||
NodeID: owner.ID,
|
||||
Type: model.AgentCommandTypeRestoreRecord,
|
||||
Status: model.AgentCommandStatusPending,
|
||||
Payload: `{"restoreRecordId":1}`,
|
||||
CreatedAt: time.Now().UTC().Add(-time.Hour),
|
||||
}
|
||||
if err := commands.Create(ctx, oldCommand); err != nil {
|
||||
t.Fatalf("Create command returned error: %v", err)
|
||||
}
|
||||
|
||||
svc.processStaleCommands(ctx, time.Now().UTC().Add(-30*time.Minute))
|
||||
|
||||
updatedCommand, err := commands.FindByID(ctx, oldCommand.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID command returned error: %v", err)
|
||||
}
|
||||
if updatedCommand.Status != model.AgentCommandStatusTimeout {
|
||||
t.Fatalf("expected command timeout, got %#v", updatedCommand)
|
||||
}
|
||||
updatedRestore, err := restoreRepo.FindByID(ctx, restore.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID restore returned error: %v", err)
|
||||
}
|
||||
if updatedRestore.Status != model.RestoreRecordStatusFailed {
|
||||
t.Fatalf("expected restore failed, got %#v", updatedRestore)
|
||||
}
|
||||
if updatedRestore.CompletedAt == nil {
|
||||
t.Fatal("expected failed restore completedAt to be set")
|
||||
}
|
||||
}
|
||||
|
||||
func TestAgentServiceProcessStaleCommandsKeepsActiveDispatchedRunTaskRecord(t *testing.T) {
|
||||
svc, _, records, commands, owner, _ := newAgentServicePoolTestHarness(t)
|
||||
ctx := context.Background()
|
||||
dispatchedAt := time.Now().UTC().Add(-time.Hour)
|
||||
oldCommand := &model.AgentCommand{
|
||||
NodeID: owner.ID,
|
||||
Type: model.AgentCommandTypeRunTask,
|
||||
Status: model.AgentCommandStatusDispatched,
|
||||
Payload: `{"recordId":1}`,
|
||||
CreatedAt: dispatchedAt,
|
||||
DispatchedAt: &dispatchedAt,
|
||||
}
|
||||
if err := commands.Create(ctx, oldCommand); err != nil {
|
||||
t.Fatalf("Create command returned error: %v", err)
|
||||
}
|
||||
|
||||
svc.processStaleCommands(ctx, time.Now().UTC().Add(-30*time.Minute))
|
||||
|
||||
updatedCommand, err := commands.FindByID(ctx, oldCommand.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID command returned error: %v", err)
|
||||
}
|
||||
if updatedCommand.Status != model.AgentCommandStatusDispatched {
|
||||
t.Fatalf("expected active command to remain dispatched, got %#v", updatedCommand)
|
||||
}
|
||||
updatedRecord, err := records.FindByID(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID record returned error: %v", err)
|
||||
}
|
||||
if updatedRecord.Status != model.BackupRecordStatusRunning {
|
||||
t.Fatalf("expected active record to remain running, got %#v", updatedRecord)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAgentServiceProcessStaleCommandsKeepsDispatchedRunTaskWhenNodeHeartbeatIsFresh(t *testing.T) {
|
||||
svc, db, records, commands, owner, _ := newAgentServicePoolTestHarness(t)
|
||||
ctx := context.Background()
|
||||
dispatchedAt := time.Now().UTC().Add(-time.Hour)
|
||||
if err := setBackupRecordUpdatedAt(db, 1, dispatchedAt); err != nil {
|
||||
t.Fatalf("set backup record updated_at: %v", err)
|
||||
}
|
||||
if err := db.Model(&model.Node{}).Where("id = ?", owner.ID).UpdateColumn("last_seen", time.Now().UTC()).Error; err != nil {
|
||||
t.Fatalf("set owner last_seen: %v", err)
|
||||
}
|
||||
oldCommand := &model.AgentCommand{
|
||||
NodeID: owner.ID,
|
||||
Type: model.AgentCommandTypeRunTask,
|
||||
Status: model.AgentCommandStatusDispatched,
|
||||
Payload: `{"recordId":1}`,
|
||||
CreatedAt: dispatchedAt,
|
||||
DispatchedAt: &dispatchedAt,
|
||||
}
|
||||
if err := commands.Create(ctx, oldCommand); err != nil {
|
||||
t.Fatalf("Create command returned error: %v", err)
|
||||
}
|
||||
|
||||
svc.processStaleCommands(ctx, time.Now().UTC().Add(-30*time.Minute))
|
||||
|
||||
updatedCommand, err := commands.FindByID(ctx, oldCommand.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID command returned error: %v", err)
|
||||
}
|
||||
if updatedCommand.Status != model.AgentCommandStatusDispatched {
|
||||
t.Fatalf("expected command to remain dispatched while node heartbeat is fresh, got %#v", updatedCommand)
|
||||
}
|
||||
updatedRecord, err := records.FindByID(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID record returned error: %v", err)
|
||||
}
|
||||
if updatedRecord.Status != model.BackupRecordStatusRunning {
|
||||
t.Fatalf("expected record to remain running while node heartbeat is fresh, got %#v", updatedRecord)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAgentServiceProcessStaleCommandsTimesOutShortCommandEvenWhenNodeHeartbeatIsFresh(t *testing.T) {
|
||||
svc, db, _, commands, owner, _ := newAgentServicePoolTestHarness(t)
|
||||
ctx := context.Background()
|
||||
dispatchedAt := time.Now().UTC().Add(-time.Hour)
|
||||
if err := db.Model(&model.Node{}).Where("id = ?", owner.ID).UpdateColumn("last_seen", time.Now().UTC()).Error; err != nil {
|
||||
t.Fatalf("set owner last_seen: %v", err)
|
||||
}
|
||||
oldCommand := &model.AgentCommand{
|
||||
NodeID: owner.ID,
|
||||
Type: model.AgentCommandTypeListDir,
|
||||
Status: model.AgentCommandStatusDispatched,
|
||||
Payload: `{"path":"/srv"}`,
|
||||
CreatedAt: dispatchedAt,
|
||||
DispatchedAt: &dispatchedAt,
|
||||
}
|
||||
if err := commands.Create(ctx, oldCommand); err != nil {
|
||||
t.Fatalf("Create command returned error: %v", err)
|
||||
}
|
||||
|
||||
svc.processStaleCommands(ctx, time.Now().UTC().Add(-30*time.Minute))
|
||||
|
||||
updatedCommand, err := commands.FindByID(ctx, oldCommand.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID command returned error: %v", err)
|
||||
}
|
||||
if updatedCommand.Status != model.AgentCommandStatusTimeout {
|
||||
t.Fatalf("expected stale short command timeout, got %#v", updatedCommand)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAgentServiceProcessStaleCommandsTimesOutDispatchedRunTaskWhenRecordIsTerminalEvenWithFreshHeartbeat(t *testing.T) {
|
||||
svc, db, records, commands, owner, _ := newAgentServicePoolTestHarness(t)
|
||||
ctx := context.Background()
|
||||
dispatchedAt := time.Now().UTC().Add(-time.Hour)
|
||||
if err := db.Model(&model.Node{}).Where("id = ?", owner.ID).UpdateColumn("last_seen", time.Now().UTC()).Error; err != nil {
|
||||
t.Fatalf("set owner last_seen: %v", err)
|
||||
}
|
||||
record, err := records.FindByID(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID record returned error: %v", err)
|
||||
}
|
||||
completedAt := time.Now().UTC().Add(-time.Minute)
|
||||
record.Status = model.BackupRecordStatusFailed
|
||||
record.CompletedAt = &completedAt
|
||||
if err := records.Update(ctx, record); err != nil {
|
||||
t.Fatalf("Update terminal record returned error: %v", err)
|
||||
}
|
||||
oldCommand := &model.AgentCommand{
|
||||
NodeID: owner.ID,
|
||||
Type: model.AgentCommandTypeRunTask,
|
||||
Status: model.AgentCommandStatusDispatched,
|
||||
Payload: `{"recordId":1}`,
|
||||
CreatedAt: dispatchedAt,
|
||||
DispatchedAt: &dispatchedAt,
|
||||
}
|
||||
if err := commands.Create(ctx, oldCommand); err != nil {
|
||||
t.Fatalf("Create command returned error: %v", err)
|
||||
}
|
||||
|
||||
svc.processStaleCommands(ctx, time.Now().UTC().Add(-30*time.Minute))
|
||||
|
||||
updatedCommand, err := commands.FindByID(ctx, oldCommand.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID command returned error: %v", err)
|
||||
}
|
||||
if updatedCommand.Status != model.AgentCommandStatusTimeout {
|
||||
t.Fatalf("expected command timeout when linked record is terminal, got %#v", updatedCommand)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAgentServiceProcessStaleCommandsTimesOutInactiveDispatchedRunTaskRecord(t *testing.T) {
|
||||
svc, db, records, commands, owner, _ := newAgentServicePoolTestHarness(t)
|
||||
ctx := context.Background()
|
||||
dispatchedAt := time.Now().UTC().Add(-time.Hour)
|
||||
if err := setBackupRecordUpdatedAt(db, 1, dispatchedAt); err != nil {
|
||||
t.Fatalf("set backup record updated_at: %v", err)
|
||||
}
|
||||
if err := db.Model(&model.Node{}).Where("id = ?", owner.ID).UpdateColumn("last_seen", dispatchedAt).Error; err != nil {
|
||||
t.Fatalf("set owner last_seen: %v", err)
|
||||
}
|
||||
oldCommand := &model.AgentCommand{
|
||||
NodeID: owner.ID,
|
||||
Type: model.AgentCommandTypeRunTask,
|
||||
Status: model.AgentCommandStatusDispatched,
|
||||
Payload: `{"recordId":1}`,
|
||||
CreatedAt: dispatchedAt,
|
||||
DispatchedAt: &dispatchedAt,
|
||||
}
|
||||
if err := commands.Create(ctx, oldCommand); err != nil {
|
||||
t.Fatalf("Create command returned error: %v", err)
|
||||
}
|
||||
|
||||
svc.processStaleCommands(ctx, time.Now().UTC().Add(-30*time.Minute))
|
||||
|
||||
updatedCommand, err := commands.FindByID(ctx, oldCommand.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID command returned error: %v", err)
|
||||
}
|
||||
if updatedCommand.Status != model.AgentCommandStatusTimeout {
|
||||
t.Fatalf("expected inactive command timeout, got %#v", updatedCommand)
|
||||
}
|
||||
updatedRecord, err := records.FindByID(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID record returned error: %v", err)
|
||||
}
|
||||
if updatedRecord.Status != model.BackupRecordStatusFailed {
|
||||
t.Fatalf("expected inactive record failed, got %#v", updatedRecord)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAgentServiceProcessStaleCommandsKeepsActiveDispatchedRestoreRecord(t *testing.T) {
|
||||
svc, db, _, commands, owner, _ := newAgentServicePoolTestHarness(t)
|
||||
ctx := context.Background()
|
||||
restoreRepo := repository.NewRestoreRecordRepository(db)
|
||||
restore := createAgentServiceRestoreRecord(t, restoreRepo, owner.ID)
|
||||
svc.SetRestoreRepository(restoreRepo)
|
||||
dispatchedAt := time.Now().UTC().Add(-time.Hour)
|
||||
oldCommand := &model.AgentCommand{
|
||||
NodeID: owner.ID,
|
||||
Type: model.AgentCommandTypeRestoreRecord,
|
||||
Status: model.AgentCommandStatusDispatched,
|
||||
Payload: `{"restoreRecordId":1}`,
|
||||
CreatedAt: dispatchedAt,
|
||||
DispatchedAt: &dispatchedAt,
|
||||
}
|
||||
if err := commands.Create(ctx, oldCommand); err != nil {
|
||||
t.Fatalf("Create command returned error: %v", err)
|
||||
}
|
||||
|
||||
svc.processStaleCommands(ctx, time.Now().UTC().Add(-30*time.Minute))
|
||||
|
||||
updatedCommand, err := commands.FindByID(ctx, oldCommand.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID command returned error: %v", err)
|
||||
}
|
||||
if updatedCommand.Status != model.AgentCommandStatusDispatched {
|
||||
t.Fatalf("expected active restore command to remain dispatched, got %#v", updatedCommand)
|
||||
}
|
||||
updatedRestore, err := restoreRepo.FindByID(ctx, restore.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID restore returned error: %v", err)
|
||||
}
|
||||
if updatedRestore.Status != model.RestoreRecordStatusRunning {
|
||||
t.Fatalf("expected active restore to remain running, got %#v", updatedRestore)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAgentServiceProcessStaleCommandsKeepsDispatchedRestoreWhenNodeHeartbeatIsFresh(t *testing.T) {
|
||||
svc, db, _, commands, owner, _ := newAgentServicePoolTestHarness(t)
|
||||
ctx := context.Background()
|
||||
restoreRepo := repository.NewRestoreRecordRepository(db)
|
||||
restore := createAgentServiceRestoreRecord(t, restoreRepo, owner.ID)
|
||||
svc.SetRestoreRepository(restoreRepo)
|
||||
dispatchedAt := time.Now().UTC().Add(-time.Hour)
|
||||
if err := setRestoreRecordUpdatedAt(db, restore.ID, dispatchedAt); err != nil {
|
||||
t.Fatalf("set restore record updated_at: %v", err)
|
||||
}
|
||||
if err := db.Model(&model.Node{}).Where("id = ?", owner.ID).UpdateColumn("last_seen", time.Now().UTC()).Error; err != nil {
|
||||
t.Fatalf("set owner last_seen: %v", err)
|
||||
}
|
||||
oldCommand := &model.AgentCommand{
|
||||
NodeID: owner.ID,
|
||||
Type: model.AgentCommandTypeRestoreRecord,
|
||||
Status: model.AgentCommandStatusDispatched,
|
||||
Payload: `{"restoreRecordId":1}`,
|
||||
CreatedAt: dispatchedAt,
|
||||
DispatchedAt: &dispatchedAt,
|
||||
}
|
||||
if err := commands.Create(ctx, oldCommand); err != nil {
|
||||
t.Fatalf("Create command returned error: %v", err)
|
||||
}
|
||||
|
||||
svc.processStaleCommands(ctx, time.Now().UTC().Add(-30*time.Minute))
|
||||
|
||||
updatedCommand, err := commands.FindByID(ctx, oldCommand.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID command returned error: %v", err)
|
||||
}
|
||||
if updatedCommand.Status != model.AgentCommandStatusDispatched {
|
||||
t.Fatalf("expected restore command to remain dispatched while node heartbeat is fresh, got %#v", updatedCommand)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAgentServiceProcessStaleCommandsTimesOutInactiveDispatchedRestoreRecord(t *testing.T) {
|
||||
svc, db, _, commands, owner, _ := newAgentServicePoolTestHarness(t)
|
||||
ctx := context.Background()
|
||||
restoreRepo := repository.NewRestoreRecordRepository(db)
|
||||
restore := createAgentServiceRestoreRecord(t, restoreRepo, owner.ID)
|
||||
svc.SetRestoreRepository(restoreRepo)
|
||||
dispatchedAt := time.Now().UTC().Add(-time.Hour)
|
||||
if err := setRestoreRecordUpdatedAt(db, restore.ID, dispatchedAt); err != nil {
|
||||
t.Fatalf("set restore record updated_at: %v", err)
|
||||
}
|
||||
if err := db.Model(&model.Node{}).Where("id = ?", owner.ID).UpdateColumn("last_seen", dispatchedAt).Error; err != nil {
|
||||
t.Fatalf("set owner last_seen: %v", err)
|
||||
}
|
||||
oldCommand := &model.AgentCommand{
|
||||
NodeID: owner.ID,
|
||||
Type: model.AgentCommandTypeRestoreRecord,
|
||||
Status: model.AgentCommandStatusDispatched,
|
||||
Payload: `{"restoreRecordId":1}`,
|
||||
CreatedAt: dispatchedAt,
|
||||
DispatchedAt: &dispatchedAt,
|
||||
}
|
||||
if err := commands.Create(ctx, oldCommand); err != nil {
|
||||
t.Fatalf("Create command returned error: %v", err)
|
||||
}
|
||||
|
||||
svc.processStaleCommands(ctx, time.Now().UTC().Add(-30*time.Minute))
|
||||
|
||||
updatedCommand, err := commands.FindByID(ctx, oldCommand.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID command returned error: %v", err)
|
||||
}
|
||||
if updatedCommand.Status != model.AgentCommandStatusTimeout {
|
||||
t.Fatalf("expected inactive restore command timeout, got %#v", updatedCommand)
|
||||
}
|
||||
updatedRestore, err := restoreRepo.FindByID(ctx, restore.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID restore returned error: %v", err)
|
||||
}
|
||||
if updatedRestore.Status != model.RestoreRecordStatusFailed {
|
||||
t.Fatalf("expected inactive restore failed, got %#v", updatedRestore)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAgentServiceSubmitCommandResultDoesNotOverwriteTerminalCommand(t *testing.T) {
|
||||
svc, _, _, commands, owner, _ := newAgentServicePoolTestHarness(t)
|
||||
ctx := context.Background()
|
||||
completedAt := time.Now().UTC().Add(-time.Minute)
|
||||
command := &model.AgentCommand{
|
||||
NodeID: owner.ID,
|
||||
Type: model.AgentCommandTypeRunTask,
|
||||
Status: model.AgentCommandStatusTimeout,
|
||||
Payload: `{"recordId":1}`,
|
||||
ErrorMessage: "timeout",
|
||||
CompletedAt: &completedAt,
|
||||
}
|
||||
if err := commands.Create(ctx, command); err != nil {
|
||||
t.Fatalf("Create command returned error: %v", err)
|
||||
}
|
||||
|
||||
if err := svc.SubmitCommandResult(ctx, owner, command.ID, AgentCommandResult{Success: true, Result: []byte(`{"ok":true}`)}); err != nil {
|
||||
t.Fatalf("SubmitCommandResult returned error: %v", err)
|
||||
}
|
||||
|
||||
updatedCommand, err := commands.FindByID(ctx, command.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID command returned error: %v", err)
|
||||
}
|
||||
if updatedCommand.Status != model.AgentCommandStatusTimeout {
|
||||
t.Fatalf("expected terminal command status to remain timeout, got %#v", updatedCommand)
|
||||
}
|
||||
if updatedCommand.Result != "" {
|
||||
t.Fatalf("expected terminal command result to remain empty, got %q", updatedCommand.Result)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAgentServiceUpdateRecordDoesNotOverwriteTerminalRecord(t *testing.T) {
|
||||
svc, _, records, _, owner, _ := newAgentServicePoolTestHarness(t)
|
||||
ctx := context.Background()
|
||||
record, err := records.FindByID(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID record returned error: %v", err)
|
||||
}
|
||||
completedAt := time.Now().UTC().Add(-time.Minute)
|
||||
record.Status = model.BackupRecordStatusFailed
|
||||
record.ErrorMessage = "timeout"
|
||||
record.CompletedAt = &completedAt
|
||||
if err := records.Update(ctx, record); err != nil {
|
||||
t.Fatalf("Update record returned error: %v", err)
|
||||
}
|
||||
|
||||
if err := svc.UpdateRecord(ctx, owner, record.ID, AgentRecordUpdate{
|
||||
Status: model.BackupRecordStatusSuccess,
|
||||
FileName: "late.tar.gz",
|
||||
FileSize: 42,
|
||||
Checksum: "late",
|
||||
StoragePath: "late/path",
|
||||
ErrorMessage: "late success",
|
||||
LogAppend: "late log\n",
|
||||
}); err != nil {
|
||||
t.Fatalf("UpdateRecord returned error: %v", err)
|
||||
}
|
||||
|
||||
updatedRecord, err := records.FindByID(ctx, record.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID updated record returned error: %v", err)
|
||||
}
|
||||
if updatedRecord.Status != model.BackupRecordStatusFailed {
|
||||
t.Fatalf("expected terminal record status to remain failed, got %#v", updatedRecord)
|
||||
}
|
||||
if updatedRecord.FileName != "" || updatedRecord.StoragePath != "" || updatedRecord.ErrorMessage != "timeout" {
|
||||
t.Fatalf("expected terminal record fields to remain unchanged, got %#v", updatedRecord)
|
||||
}
|
||||
}
|
||||
|
||||
func createAgentServiceRestoreRecord(t *testing.T, repo repository.RestoreRecordRepository, nodeID uint) *model.RestoreRecord {
|
||||
t.Helper()
|
||||
restore := &model.RestoreRecord{
|
||||
BackupRecordID: 1,
|
||||
TaskID: 1,
|
||||
NodeID: nodeID,
|
||||
Status: model.RestoreRecordStatusRunning,
|
||||
StartedAt: time.Now().UTC().Add(-time.Hour),
|
||||
}
|
||||
if err := repo.Create(context.Background(), restore); err != nil {
|
||||
t.Fatalf("Create restore returned error: %v", err)
|
||||
}
|
||||
return restore
|
||||
}
|
||||
|
||||
func setBackupRecordUpdatedAt(db *gorm.DB, id uint, updatedAt time.Time) error {
|
||||
return db.Model(&model.BackupRecord{}).Where("id = ?", id).UpdateColumn("updated_at", updatedAt).Error
|
||||
}
|
||||
|
||||
func setRestoreRecordUpdatedAt(db *gorm.DB, id uint, updatedAt time.Time) error {
|
||||
return db.Model(&model.RestoreRecord{}).Where("id = ?", id).UpdateColumn("updated_at", updatedAt).Error
|
||||
}
|
||||
|
||||
type failingUpdateTaskRepo struct {
|
||||
repository.BackupTaskRepository
|
||||
err error
|
||||
}
|
||||
|
||||
func (r *failingUpdateTaskRepo) Update(context.Context, *model.BackupTask) error {
|
||||
return r.err
|
||||
}
|
||||
@@ -52,11 +52,6 @@ type StorageUploadResultItem struct {
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
const (
|
||||
uploadMaxAttempts = 3
|
||||
uploadRetryBackoff = 10 * time.Second
|
||||
)
|
||||
|
||||
type DownloadedArtifact struct {
|
||||
FileName string
|
||||
Reader io.ReadCloser
|
||||
@@ -78,30 +73,29 @@ func collectTargetIDs(task *model.BackupTask) []uint {
|
||||
}
|
||||
|
||||
type BackupExecutionService struct {
|
||||
tasks repository.BackupTaskRepository
|
||||
records repository.BackupRecordRepository
|
||||
targets repository.StorageTargetRepository
|
||||
nodeRepo repository.NodeRepository
|
||||
storageRegistry *storage.Registry
|
||||
runnerRegistry *backup.Registry
|
||||
logHub *backup.LogHub
|
||||
retention *backupretention.Service
|
||||
cipher *codec.ConfigCipher
|
||||
tasks repository.BackupTaskRepository
|
||||
records repository.BackupRecordRepository
|
||||
targets repository.StorageTargetRepository
|
||||
nodeRepo repository.NodeRepository
|
||||
storageRegistry *storage.Registry
|
||||
runnerRegistry *backup.Registry
|
||||
logHub *backup.LogHub
|
||||
retention *backupretention.Service
|
||||
cipher *codec.ConfigCipher
|
||||
notifier BackupResultNotifier
|
||||
agentDispatcher AgentDispatcher
|
||||
replicationHook ReplicationTrigger
|
||||
dependentsResolver DependentsResolver
|
||||
async func(func())
|
||||
now func() time.Time
|
||||
tempDir string
|
||||
semaphore chan struct{}
|
||||
async func(func())
|
||||
now func() time.Time
|
||||
tempDir string
|
||||
semaphore chan struct{}
|
||||
// nodeSemaphores 节点级并发限制(按 NodeID 映射)。
|
||||
// 没命中的 NodeID 走全局 semaphore,节点配置 MaxConcurrent>0 时按该节点独立排队。
|
||||
nodeSemaphores sync.Map
|
||||
retries int // rclone 底层重试次数
|
||||
bandwidthLimit string // rclone 带宽限制(全局默认,节点配置可覆盖)
|
||||
retries int // rclone 底层重试次数
|
||||
bandwidthLimit string // rclone 带宽限制(全局默认,节点配置可覆盖)
|
||||
metrics *metrics.Metrics
|
||||
taskLocks sync.Map
|
||||
}
|
||||
|
||||
// SetMetrics 注入 Prometheus 采集器。nil 时所有埋点退化为 no-op。
|
||||
@@ -276,9 +270,11 @@ func (s *BackupExecutionService) DeleteRecord(ctx context.Context, recordID uint
|
||||
if record == nil {
|
||||
return apperror.New(404, "BACKUP_RECORD_NOT_FOUND", "备份记录不存在", fmt.Errorf("backup record %d not found", recordID))
|
||||
}
|
||||
if remote, err := s.deleteRemoteLocalDiskObject(ctx, record); err != nil {
|
||||
// 集群场景保护:跨节点 local_disk 文件 Master 无法远程删除,拒绝操作以避免存储泄漏的错觉
|
||||
if err := s.validateClusterAccessible(ctx, record); err != nil {
|
||||
return err
|
||||
} else if !remote && strings.TrimSpace(record.StoragePath) != "" {
|
||||
}
|
||||
if strings.TrimSpace(record.StoragePath) != "" {
|
||||
provider, err := s.resolveProvider(ctx, record.StorageTargetID)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -293,40 +289,6 @@ func (s *BackupExecutionService) DeleteRecord(ctx context.Context, recordID uint
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *BackupExecutionService) deleteRemoteLocalDiskObject(ctx context.Context, record *model.BackupRecord) (bool, error) {
|
||||
if strings.TrimSpace(record.StoragePath) == "" || s.nodeRepo == nil {
|
||||
return false, nil
|
||||
}
|
||||
node, err := s.nodeRepo.FindByID(ctx, record.NodeID)
|
||||
if err != nil || node == nil || node.IsLocal {
|
||||
return false, nil
|
||||
}
|
||||
target, err := s.targets.FindByID(ctx, record.StorageTargetID)
|
||||
if err != nil {
|
||||
return false, apperror.Internal("BACKUP_STORAGE_TARGET_GET_FAILED", "无法获取存储目标详情", err)
|
||||
}
|
||||
if target == nil || !strings.EqualFold(target.Type, "local_disk") {
|
||||
return false, nil
|
||||
}
|
||||
if s.agentDispatcher == nil {
|
||||
return true, apperror.BadRequest("BACKUP_RECORD_CROSS_NODE_LOCAL_DISK",
|
||||
fmt.Sprintf("该备份位于节点 %s 的本地磁盘(local_disk),Master 无法跨节点删除。请确保 Agent 在线后再操作。", node.Name),
|
||||
nil)
|
||||
}
|
||||
configMap := map[string]any{}
|
||||
if err := s.cipher.DecryptJSON(target.ConfigCiphertext, &configMap); err != nil {
|
||||
return true, apperror.Internal("BACKUP_STORAGE_TARGET_DECRYPT_FAILED", "无法解密存储目标配置", err)
|
||||
}
|
||||
if _, err := s.agentDispatcher.EnqueueCommand(ctx, record.NodeID, model.AgentCommandTypeDeleteStorageObject, map[string]any{
|
||||
"targetType": target.Type,
|
||||
"targetConfig": configMap,
|
||||
"storagePath": record.StoragePath,
|
||||
}); err != nil {
|
||||
return true, apperror.Internal("AGENT_COMMAND_ENQUEUE_FAILED", "无法下发远程备份文件删除命令", err)
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// validateClusterAccessible 在跨节点 + local_disk 场景下拒绝 Master 端直接访问。
|
||||
// 场景说明:远程 Agent 把备份写到其本机磁盘(local_disk basePath)时,Master 的
|
||||
// provider 指向的是 Master 本机的同名路径,访问会静默取错文件或 404。明确拒绝
|
||||
@@ -364,11 +326,6 @@ func (s *BackupExecutionService) startTask(ctx context.Context, id uint, async b
|
||||
if task == nil {
|
||||
return nil, apperror.New(404, "BACKUP_TASK_NOT_FOUND", "备份任务不存在", fmt.Errorf("backup task %d not found", id))
|
||||
}
|
||||
unlock := s.acquireTaskStartLock(task.ID)
|
||||
defer unlock()
|
||||
if err := s.ensureTaskNotRunning(ctx, task); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// 维护窗口校验:手动执行同样尊重窗口,避免业务高峰期误触发。
|
||||
if strings.TrimSpace(task.MaintenanceWindows) != "" {
|
||||
windows := backup.ParseMaintenanceWindows(task.MaintenanceWindows)
|
||||
@@ -399,8 +356,8 @@ func (s *BackupExecutionService) startTask(ctx context.Context, id uint, async b
|
||||
if err := s.records.Create(ctx, record); err != nil {
|
||||
return nil, apperror.Internal("BACKUP_RECORD_CREATE_FAILED", "无法创建备份记录", err)
|
||||
}
|
||||
runTask := *task
|
||||
runTask.NodeID = resolvedNodeID
|
||||
// 用池选出的节点 ID 复写 task 副本,使后续路由/执行沿用
|
||||
task.NodeID = resolvedNodeID
|
||||
task.LastRunAt = &startedAt
|
||||
task.LastStatus = "running"
|
||||
if err := s.tasks.Update(ctx, task); err != nil {
|
||||
@@ -408,27 +365,27 @@ func (s *BackupExecutionService) startTask(ctx context.Context, id uint, async b
|
||||
}
|
||||
// 多节点路由:task.NodeID 指向远程节点时,把执行任务入队给 Agent;
|
||||
// NodeID=0 或本机节点时由 Master 直接执行。
|
||||
if remoteNode := s.resolveRemoteNode(ctx, resolvedNodeID); remoteNode != nil {
|
||||
if remoteNode := s.resolveRemoteNode(ctx, task.NodeID); remoteNode != nil {
|
||||
// 节点离线 → 立即把刚创建的 running 记录标记 failed,返回明确错误
|
||||
if remoteNode.Status != model.NodeStatusOnline {
|
||||
offlineMsg := fmt.Sprintf("节点 %s 当前离线,无法执行备份任务", remoteNode.Name)
|
||||
_ = s.finalizeRecord(ctx, &runTask, record.ID, startedAt, model.BackupRecordStatusFailed,
|
||||
offlineMsg, "", "", 0, "", "", primaryTargetID)
|
||||
_ = s.finalizeRecord(ctx, task, record.ID, startedAt, model.BackupRecordStatusFailed,
|
||||
offlineMsg, "", "", 0, "", "")
|
||||
return nil, apperror.BadRequest("NODE_OFFLINE", offlineMsg, nil)
|
||||
}
|
||||
if _, enqueueErr := s.agentDispatcher.EnqueueCommand(ctx, resolvedNodeID, model.AgentCommandTypeRunTask, map[string]any{
|
||||
if _, enqueueErr := s.agentDispatcher.EnqueueCommand(ctx, task.NodeID, model.AgentCommandTypeRunTask, map[string]any{
|
||||
"taskId": task.ID,
|
||||
"recordId": record.ID,
|
||||
}); enqueueErr != nil {
|
||||
// 入队失败 → 在记录中标记失败,继续返回详情
|
||||
_ = s.finalizeRecord(ctx, &runTask, record.ID, startedAt, model.BackupRecordStatusFailed,
|
||||
"无法下发任务到远程节点: "+enqueueErr.Error(), "", "", 0, "", "", primaryTargetID)
|
||||
_ = s.finalizeRecord(ctx, task, record.ID, startedAt, model.BackupRecordStatusFailed,
|
||||
"无法下发任务到远程节点: "+enqueueErr.Error(), "", "", 0, "", "")
|
||||
return nil, apperror.Internal("AGENT_COMMAND_ENQUEUE_FAILED", "无法下发任务到远程节点", enqueueErr)
|
||||
}
|
||||
return s.getRecordDetail(ctx, record.ID)
|
||||
}
|
||||
run := func() {
|
||||
s.executeTask(context.Background(), &runTask, record.ID, startedAt)
|
||||
s.executeTask(context.Background(), task, record.ID, startedAt)
|
||||
}
|
||||
if async {
|
||||
s.async(run)
|
||||
@@ -438,27 +395,6 @@ func (s *BackupExecutionService) startTask(ctx context.Context, id uint, async b
|
||||
return s.getRecordDetail(ctx, record.ID)
|
||||
}
|
||||
|
||||
func (s *BackupExecutionService) acquireTaskStartLock(taskID uint) func() {
|
||||
value, _ := s.taskLocks.LoadOrStore(taskID, &sync.Mutex{})
|
||||
mu := value.(*sync.Mutex)
|
||||
mu.Lock()
|
||||
return mu.Unlock
|
||||
}
|
||||
|
||||
func (s *BackupExecutionService) ensureTaskNotRunning(ctx context.Context, task *model.BackupTask) error {
|
||||
taskID := task.ID
|
||||
items, err := s.records.List(ctx, repository.BackupRecordListOptions{TaskID: &taskID, Status: model.BackupRecordStatusRunning})
|
||||
if err != nil {
|
||||
return apperror.Internal("BACKUP_RECORD_LIST_FAILED", "无法检查任务运行状态", err)
|
||||
}
|
||||
if len(items) == 0 {
|
||||
return nil
|
||||
}
|
||||
return apperror.BadRequest("BACKUP_TASK_ALREADY_RUNNING",
|
||||
fmt.Sprintf("任务「%s」正在运行(记录 #%d),请等待完成后再触发。", task.Name, items[0].ID),
|
||||
nil)
|
||||
}
|
||||
|
||||
// shouldNotify 按任务的告警策略决定是否发送本次通知。
|
||||
// 成功结果:始终发送(方便用户确认备份状态)。
|
||||
// 失败结果:仅当"最近 N 条记录(含本次)均为 failed"时发送,N = AlertOnConsecutiveFails。
|
||||
@@ -625,10 +561,9 @@ func (s *BackupExecutionService) executeTask(ctx context.Context, task *model.Ba
|
||||
var fileSize int64
|
||||
var checksum string
|
||||
var storagePath string
|
||||
selectedStorageTargetID := task.StorageTargetID
|
||||
var uploadResults []StorageUploadResultItem
|
||||
completeRecord := func() {
|
||||
if finalizeErr := s.finalizeRecord(ctx, task, recordID, startedAt, status, errMessage, logger.String(), fileName, fileSize, checksum, storagePath, selectedStorageTargetID); finalizeErr != nil {
|
||||
if finalizeErr := s.finalizeRecord(ctx, task, recordID, startedAt, status, errMessage, logger.String(), fileName, fileSize, checksum, storagePath); finalizeErr != nil {
|
||||
logger.Errorf("写回备份记录失败:%v", finalizeErr)
|
||||
}
|
||||
// 采集任务执行结果到 Prometheus(耗时 + 产出字节 + 状态计数)
|
||||
@@ -710,11 +645,6 @@ func (s *BackupExecutionService) executeTask(ctx context.Context, task *model.Ba
|
||||
logger.Errorf("没有关联的存储目标")
|
||||
return
|
||||
}
|
||||
storageUsage, err := s.storageUsageSnapshot(ctx)
|
||||
if err != nil {
|
||||
logger.Warnf("读取存储目标用量失败,跳过本次软配额校验:%v", err)
|
||||
storageUsage = map[uint]int64{}
|
||||
}
|
||||
|
||||
// 并行上传到所有目标
|
||||
uploadResults = make([]StorageUploadResultItem, len(targetIDs))
|
||||
@@ -738,7 +668,15 @@ func (s *BackupExecutionService) executeTask(ctx context.Context, task *model.Ba
|
||||
}
|
||||
// 软限额校验:QuotaBytes > 0 时,已累计 + 本次 > 配额 → 拒绝上传
|
||||
if target != nil && target.QuotaBytes > 0 {
|
||||
currentUsed := storageUsage[targetID]
|
||||
currentUsed := int64(0)
|
||||
if items, err := s.records.StorageUsage(ctx); err == nil {
|
||||
for _, it := range items {
|
||||
if it.StorageTargetID == targetID {
|
||||
currentUsed = it.TotalSize
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if currentUsed+fileSize > target.QuotaBytes {
|
||||
quotaMsg := fmt.Sprintf("超出存储目标 %s 的配额(%d + %d > %d)", targetName, currentUsed, fileSize, target.QuotaBytes)
|
||||
uploadResults[index] = StorageUploadResultItem{StorageTargetID: targetID, StorageTargetName: targetName, Status: "failed", Error: quotaMsg}
|
||||
@@ -747,18 +685,15 @@ func (s *BackupExecutionService) executeTask(ctx context.Context, task *model.Ba
|
||||
}
|
||||
}
|
||||
logger.Infof("开始上传备份到存储目标:%s", targetName)
|
||||
// 上传级重试:最多 3 次,等待时间随 context 取消及时退出。
|
||||
// 上传级重试:最多 3 次,指数退避(10s, 30s, 90s)
|
||||
maxAttempts := 3
|
||||
var lastUploadErr error
|
||||
var hr *hashingReader
|
||||
for attempt := 1; attempt <= uploadMaxAttempts; attempt++ {
|
||||
for attempt := 1; attempt <= maxAttempts; attempt++ {
|
||||
if attempt > 1 {
|
||||
backoff := time.Duration(attempt-1) * uploadRetryBackoff
|
||||
backoff := time.Duration(attempt*attempt) * 10 * time.Second
|
||||
logger.Warnf("存储目标 %s 第 %d 次重试(等待 %v):%v", targetName, attempt, backoff, lastUploadErr)
|
||||
if waitErr := waitForUploadRetry(ctx, backoff); waitErr != nil {
|
||||
uploadResults[index] = StorageUploadResultItem{StorageTargetID: targetID, StorageTargetName: targetName, Status: "failed", Error: waitErr.Error()}
|
||||
logger.Warnf("存储目标 %s 上传重试已取消:%v", targetName, waitErr)
|
||||
return
|
||||
}
|
||||
time.Sleep(backoff)
|
||||
}
|
||||
artifact, openErr := os.Open(finalPath)
|
||||
if openErr != nil {
|
||||
@@ -788,7 +723,7 @@ func (s *BackupExecutionService) executeTask(ctx context.Context, task *model.Ba
|
||||
}
|
||||
if lastUploadErr != nil {
|
||||
uploadResults[index] = StorageUploadResultItem{StorageTargetID: targetID, StorageTargetName: targetName, Status: "failed", Error: lastUploadErr.Error()}
|
||||
logger.Warnf("存储目标 %s 上传失败(已重试 %d 次):%v", targetName, uploadMaxAttempts, lastUploadErr)
|
||||
logger.Warnf("存储目标 %s 上传失败(已重试 %d 次):%v", targetName, maxAttempts, lastUploadErr)
|
||||
return
|
||||
}
|
||||
// 完整性校验:对比实际传输字节数
|
||||
@@ -824,9 +759,6 @@ func (s *BackupExecutionService) executeTask(ctx context.Context, task *model.Ba
|
||||
for _, r := range uploadResults {
|
||||
if r.Status == "success" {
|
||||
anySuccess = true
|
||||
if selectedStorageTargetID == task.StorageTargetID {
|
||||
selectedStorageTargetID = r.StorageTargetID
|
||||
}
|
||||
} else if r.Error != "" {
|
||||
failedMessages = append(failedMessages, fmt.Sprintf("%s: %s", r.StorageTargetName, r.Error))
|
||||
}
|
||||
@@ -859,7 +791,7 @@ func (s *BackupExecutionService) executeTask(ctx context.Context, task *model.Ba
|
||||
record := &model.BackupRecord{
|
||||
ID: recordID,
|
||||
TaskID: task.ID,
|
||||
StorageTargetID: selectedStorageTargetID,
|
||||
StorageTargetID: task.StorageTargetID,
|
||||
NodeID: task.NodeID,
|
||||
Status: "success",
|
||||
FileName: fileName,
|
||||
@@ -884,7 +816,7 @@ func (s *BackupExecutionService) executeTask(ctx context.Context, task *model.Ba
|
||||
}
|
||||
}
|
||||
|
||||
func (s *BackupExecutionService) finalizeRecord(ctx context.Context, task *model.BackupTask, recordID uint, startedAt time.Time, status string, errorMessage string, logContent string, fileName string, fileSize int64, checksum string, storagePath string, storageTargetID uint) error {
|
||||
func (s *BackupExecutionService) finalizeRecord(ctx context.Context, task *model.BackupTask, recordID uint, startedAt time.Time, status string, errorMessage string, logContent string, fileName string, fileSize int64, checksum string, storagePath string) error {
|
||||
record, err := s.records.FindByID(ctx, recordID)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -894,9 +826,6 @@ func (s *BackupExecutionService) finalizeRecord(ctx context.Context, task *model
|
||||
}
|
||||
completedAt := s.now()
|
||||
record.Status = status
|
||||
if storageTargetID > 0 {
|
||||
record.StorageTargetID = storageTargetID
|
||||
}
|
||||
record.FileName = fileName
|
||||
record.FileSize = fileSize
|
||||
record.Checksum = checksum
|
||||
@@ -913,32 +842,6 @@ func (s *BackupExecutionService) finalizeRecord(ctx context.Context, task *model
|
||||
return s.tasks.Update(ctx, task)
|
||||
}
|
||||
|
||||
func (s *BackupExecutionService) storageUsageSnapshot(ctx context.Context) (map[uint]int64, error) {
|
||||
items, err := s.records.StorageUsage(ctx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("storage usage snapshot: %w", err)
|
||||
}
|
||||
usage := make(map[uint]int64, len(items))
|
||||
for _, item := range items {
|
||||
usage[item.StorageTargetID] = item.TotalSize
|
||||
}
|
||||
return usage, nil
|
||||
}
|
||||
|
||||
func waitForUploadRetry(ctx context.Context, delay time.Duration) error {
|
||||
if delay <= 0 {
|
||||
return nil
|
||||
}
|
||||
timer := time.NewTimer(delay)
|
||||
defer timer.Stop()
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case <-timer.C:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func (s *BackupExecutionService) resolveProvider(ctx context.Context, targetID uint) (storage.StorageProvider, error) {
|
||||
return s.resolveProviderForNode(ctx, targetID, 0)
|
||||
}
|
||||
@@ -1054,9 +957,6 @@ func (s *BackupExecutionService) loadRecordProvider(ctx context.Context, recordI
|
||||
if record == nil {
|
||||
return nil, nil, apperror.New(404, "BACKUP_RECORD_NOT_FOUND", "备份记录不存在", fmt.Errorf("backup record %d not found", recordID))
|
||||
}
|
||||
if err := s.validateClusterAccessible(ctx, record); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
provider, err := s.resolveProvider(ctx, record.StorageTargetID)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
|
||||
@@ -2,15 +2,9 @@ package service
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"backupx/server/internal/backup"
|
||||
backupretention "backupx/server/internal/backup/retention"
|
||||
@@ -24,70 +18,6 @@ import (
|
||||
storageRclone "backupx/server/internal/storage/rclone"
|
||||
)
|
||||
|
||||
type testStorageFactory struct {
|
||||
providers map[string]*testStorageProvider
|
||||
}
|
||||
|
||||
func (f *testStorageFactory) Type() storage.ProviderType {
|
||||
return "test_storage"
|
||||
}
|
||||
|
||||
func (f *testStorageFactory) New(_ context.Context, config map[string]any) (storage.StorageProvider, error) {
|
||||
name, _ := config["name"].(string)
|
||||
provider := f.providers[name]
|
||||
if provider == nil {
|
||||
return nil, fmt.Errorf("unknown provider %q", name)
|
||||
}
|
||||
return provider, nil
|
||||
}
|
||||
|
||||
type testStorageProvider struct {
|
||||
name string
|
||||
failUpload bool
|
||||
blockUpload <-chan struct{}
|
||||
onUpload func()
|
||||
objects map[string][]byte
|
||||
}
|
||||
|
||||
func (p *testStorageProvider) Type() storage.ProviderType { return "test_storage" }
|
||||
func (p *testStorageProvider) TestConnection(context.Context) error {
|
||||
return nil
|
||||
}
|
||||
func (p *testStorageProvider) Upload(_ context.Context, objectKey string, reader io.Reader, _ int64, _ map[string]string) error {
|
||||
if p.blockUpload != nil {
|
||||
<-p.blockUpload
|
||||
}
|
||||
if p.onUpload != nil {
|
||||
p.onUpload()
|
||||
}
|
||||
if p.failUpload {
|
||||
return fmt.Errorf("upload failed for %s", p.name)
|
||||
}
|
||||
data, err := io.ReadAll(reader)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if p.objects == nil {
|
||||
p.objects = map[string][]byte{}
|
||||
}
|
||||
p.objects[objectKey] = data
|
||||
return nil
|
||||
}
|
||||
func (p *testStorageProvider) Download(_ context.Context, objectKey string) (io.ReadCloser, error) {
|
||||
data, ok := p.objects[objectKey]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("object %s not found", objectKey)
|
||||
}
|
||||
return io.NopCloser(strings.NewReader(string(data))), nil
|
||||
}
|
||||
func (p *testStorageProvider) Delete(_ context.Context, objectKey string) error {
|
||||
delete(p.objects, objectKey)
|
||||
return nil
|
||||
}
|
||||
func (p *testStorageProvider) List(context.Context, string) ([]storage.ObjectInfo, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func newExecutionTestServices(t *testing.T) (*BackupExecutionService, *BackupRecordService, repository.BackupTaskRepository, repository.StorageTargetRepository, repository.BackupRecordRepository, string, string) {
|
||||
t.Helper()
|
||||
baseDir := t.TempDir()
|
||||
@@ -155,377 +85,6 @@ func TestBackupExecutionServiceRunTaskByIDSync(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestBackupExecutionServiceNodePoolSelectionDoesNotPersistTaskNodeID(t *testing.T) {
|
||||
executionService, _, tasks, _, records, _, _ := newExecutionTestServices(t)
|
||||
ctx := context.Background()
|
||||
|
||||
nodeRepo := &nodeRepoStub{nodes: []model.Node{
|
||||
{ID: 10, Name: "edge-a", Token: "edge-a-token", Status: model.NodeStatusOnline, Labels: "prod,db"},
|
||||
{ID: 11, Name: "edge-b", Token: "edge-b-token", Status: model.NodeStatusOnline, Labels: "prod,db"},
|
||||
}}
|
||||
dispatcher := &fakeDispatcher{}
|
||||
executionService.SetClusterDependencies(nodeRepo, dispatcher)
|
||||
|
||||
task, err := tasks.FindByID(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID returned error: %v", err)
|
||||
}
|
||||
task.NodeID = 0
|
||||
task.NodePoolTag = "db"
|
||||
if err := tasks.Update(ctx, task); err != nil {
|
||||
t.Fatalf("Update task returned error: %v", err)
|
||||
}
|
||||
|
||||
detail, err := executionService.RunTaskByID(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("RunTaskByID returned error: %v", err)
|
||||
}
|
||||
storedTask, err := tasks.FindByID(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID after run returned error: %v", err)
|
||||
}
|
||||
if storedTask.NodeID != 0 {
|
||||
t.Fatalf("expected pooled task NodeID to remain 0, got %d", storedTask.NodeID)
|
||||
}
|
||||
if storedTask.NodePoolTag != "db" {
|
||||
t.Fatalf("expected pooled task tag to remain db, got %q", storedTask.NodePoolTag)
|
||||
}
|
||||
storedRecord, err := records.FindByID(ctx, detail.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID record returned error: %v", err)
|
||||
}
|
||||
if storedRecord == nil || storedRecord.NodeID != 10 {
|
||||
t.Fatalf("expected record to keep selected node 10, got %#v", storedRecord)
|
||||
}
|
||||
calls := dispatcher.snapshot()
|
||||
if len(calls) != 1 || calls[0].NodeID != 10 || calls[0].CmdType != model.AgentCommandTypeRunTask {
|
||||
t.Fatalf("unexpected dispatcher calls: %#v", calls)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBackupExecutionServiceRejectsDuplicateRunningTask(t *testing.T) {
|
||||
executionService, _, tasks, _, records, _, _ := newExecutionTestServices(t)
|
||||
ctx := context.Background()
|
||||
|
||||
task, err := tasks.FindByID(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID task returned error: %v", err)
|
||||
}
|
||||
startedAt := time.Now().UTC()
|
||||
running := &model.BackupRecord{
|
||||
TaskID: task.ID,
|
||||
StorageTargetID: task.StorageTargetID,
|
||||
NodeID: 0,
|
||||
Status: model.BackupRecordStatusRunning,
|
||||
StartedAt: startedAt,
|
||||
}
|
||||
if err := records.Create(ctx, running); err != nil {
|
||||
t.Fatalf("Create running record returned error: %v", err)
|
||||
}
|
||||
|
||||
_, err = executionService.RunTaskByIDSync(ctx, task.ID)
|
||||
if err == nil || !strings.Contains(err.Error(), "正在运行") {
|
||||
t.Fatalf("expected duplicate running task to be rejected, got %v", err)
|
||||
}
|
||||
items, err := records.List(ctx, repository.BackupRecordListOptions{Status: model.BackupRecordStatusRunning})
|
||||
if err != nil {
|
||||
t.Fatalf("List running records returned error: %v", err)
|
||||
}
|
||||
if len(items) != 1 || items[0].ID != running.ID {
|
||||
t.Fatalf("expected only the original running record, got %#v", items)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBackupExecutionServiceDeleteRecordDispatchesRemoteLocalDiskCleanup(t *testing.T) {
|
||||
executionService, _, tasks, _, records, _, _ := newExecutionTestServices(t)
|
||||
ctx := context.Background()
|
||||
nodeRepo := &nodeRepoStub{nodes: []model.Node{
|
||||
{ID: 10, Name: "edge-a", Token: "edge-a-token", Status: model.NodeStatusOnline},
|
||||
}}
|
||||
dispatcher := &fakeDispatcher{}
|
||||
executionService.SetClusterDependencies(nodeRepo, dispatcher)
|
||||
|
||||
task, err := tasks.FindByID(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID task returned error: %v", err)
|
||||
}
|
||||
completedAt := time.Now().UTC()
|
||||
record := &model.BackupRecord{
|
||||
TaskID: task.ID,
|
||||
StorageTargetID: task.StorageTargetID,
|
||||
NodeID: 10,
|
||||
Status: model.BackupRecordStatusSuccess,
|
||||
FileName: "remote.tar.gz",
|
||||
StoragePath: "file/2026/05/09/remote.tar.gz",
|
||||
StartedAt: completedAt.Add(-time.Second),
|
||||
CompletedAt: &completedAt,
|
||||
}
|
||||
if err := records.Create(ctx, record); err != nil {
|
||||
t.Fatalf("Create record returned error: %v", err)
|
||||
}
|
||||
|
||||
if err := executionService.DeleteRecord(ctx, record.ID); err != nil {
|
||||
t.Fatalf("DeleteRecord returned error: %v", err)
|
||||
}
|
||||
deleted, err := records.FindByID(ctx, record.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID record returned error: %v", err)
|
||||
}
|
||||
if deleted != nil {
|
||||
t.Fatalf("expected record deleted, got %#v", deleted)
|
||||
}
|
||||
calls := dispatcher.snapshot()
|
||||
if len(calls) != 1 {
|
||||
t.Fatalf("expected one dispatcher call, got %#v", calls)
|
||||
}
|
||||
if calls[0].NodeID != 10 || calls[0].CmdType != model.AgentCommandTypeDeleteStorageObject {
|
||||
t.Fatalf("unexpected dispatcher call: %#v", calls[0])
|
||||
}
|
||||
if calls[0].Payload["storagePath"] != record.StoragePath {
|
||||
t.Fatalf("expected storagePath %q, got %#v", record.StoragePath, calls[0].Payload)
|
||||
}
|
||||
if calls[0].Payload["targetType"] != string(storage.ProviderTypeLocalDisk) {
|
||||
t.Fatalf("expected local_disk targetType, got %#v", calls[0].Payload)
|
||||
}
|
||||
if _, ok := calls[0].Payload["targetConfig"].(map[string]any); !ok {
|
||||
t.Fatalf("expected targetConfig map, got %#v", calls[0].Payload["targetConfig"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestBackupExecutionServiceRestoreRecordRejectsRemoteLocalDisk(t *testing.T) {
|
||||
executionService, _, tasks, _, records, _, _ := newExecutionTestServices(t)
|
||||
ctx := context.Background()
|
||||
executionService.SetClusterDependencies(&nodeRepoStub{nodes: []model.Node{
|
||||
{ID: 10, Name: "edge-a", Token: "edge-a-token", Status: model.NodeStatusOnline},
|
||||
}}, &fakeDispatcher{})
|
||||
task, err := tasks.FindByID(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID task returned error: %v", err)
|
||||
}
|
||||
completedAt := time.Now().UTC()
|
||||
record := &model.BackupRecord{
|
||||
TaskID: task.ID,
|
||||
StorageTargetID: task.StorageTargetID,
|
||||
NodeID: 10,
|
||||
Status: model.BackupRecordStatusSuccess,
|
||||
FileName: "remote.tar.gz",
|
||||
StoragePath: "file/2026/05/09/remote.tar.gz",
|
||||
StartedAt: completedAt.Add(-time.Second),
|
||||
CompletedAt: &completedAt,
|
||||
}
|
||||
if err := records.Create(ctx, record); err != nil {
|
||||
t.Fatalf("Create record returned error: %v", err)
|
||||
}
|
||||
|
||||
err = executionService.RestoreRecord(ctx, record.ID)
|
||||
if err == nil {
|
||||
t.Fatal("expected remote local_disk restore to be rejected")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "Master 无法跨节点访问") {
|
||||
t.Fatalf("expected cross-node local_disk error, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBackupExecutionServiceRecordsFirstSuccessfulStorageTarget(t *testing.T) {
|
||||
executionService, _, tasks, targets, records, _, _ := newExecutionTestServices(t)
|
||||
ctx := context.Background()
|
||||
second := &testStorageProvider{name: "second", objects: map[string][]byte{}}
|
||||
executionService.storageRegistry = storage.NewRegistry(&testStorageFactory{providers: map[string]*testStorageProvider{
|
||||
"second": second,
|
||||
}})
|
||||
cipher := codec.NewConfigCipher("execution-secret")
|
||||
firstConfig, err := cipher.EncryptJSON(map[string]any{"name": "missing"})
|
||||
if err != nil {
|
||||
t.Fatalf("EncryptJSON first returned error: %v", err)
|
||||
}
|
||||
secondConfig, err := cipher.EncryptJSON(map[string]any{"name": "second"})
|
||||
if err != nil {
|
||||
t.Fatalf("EncryptJSON second returned error: %v", err)
|
||||
}
|
||||
if err := targets.Create(ctx, &model.StorageTarget{Name: "first", Type: "test_storage", Enabled: true, ConfigCiphertext: firstConfig, ConfigVersion: 1, LastTestStatus: "unknown"}); err != nil {
|
||||
t.Fatalf("Create first target returned error: %v", err)
|
||||
}
|
||||
if err := targets.Create(ctx, &model.StorageTarget{Name: "second", Type: "test_storage", Enabled: true, ConfigCiphertext: secondConfig, ConfigVersion: 1, LastTestStatus: "unknown"}); err != nil {
|
||||
t.Fatalf("Create second target returned error: %v", err)
|
||||
}
|
||||
task, err := tasks.FindByID(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID task returned error: %v", err)
|
||||
}
|
||||
task.StorageTargetID = 2
|
||||
task.StorageTargets = []model.StorageTarget{{ID: 2}, {ID: 3}}
|
||||
if err := tasks.Update(ctx, task); err != nil {
|
||||
t.Fatalf("Update task returned error: %v", err)
|
||||
}
|
||||
|
||||
detail, err := executionService.RunTaskByIDSync(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("RunTaskByIDSync returned error: %v", err)
|
||||
}
|
||||
if detail.Status != model.BackupRecordStatusSuccess {
|
||||
t.Fatalf("expected success, got %#v", detail)
|
||||
}
|
||||
storedRecord, err := records.FindByID(ctx, detail.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID record returned error: %v", err)
|
||||
}
|
||||
if storedRecord.StorageTargetID != 3 {
|
||||
t.Fatalf("expected record StorageTargetID to point at successful target 3, got %d", storedRecord.StorageTargetID)
|
||||
}
|
||||
if _, ok := second.objects[storedRecord.StoragePath]; !ok {
|
||||
t.Fatalf("expected object in successful provider at %q", storedRecord.StoragePath)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBackupExecutionServiceUploadRetryStopsWhenContextCancelled(t *testing.T) {
|
||||
executionService, _, tasks, targets, records, _, _ := newExecutionTestServices(t)
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
var cancelOnce sync.Once
|
||||
failing := &testStorageProvider{
|
||||
name: "failing",
|
||||
failUpload: true,
|
||||
onUpload: func() {
|
||||
cancelOnce.Do(cancel)
|
||||
},
|
||||
}
|
||||
executionService.storageRegistry = storage.NewRegistry(&testStorageFactory{providers: map[string]*testStorageProvider{
|
||||
"failing": failing,
|
||||
}})
|
||||
cipher := codec.NewConfigCipher("execution-secret")
|
||||
failingConfig, err := cipher.EncryptJSON(map[string]any{"name": "failing"})
|
||||
if err != nil {
|
||||
t.Fatalf("EncryptJSON returned error: %v", err)
|
||||
}
|
||||
if err := targets.Update(ctx, &model.StorageTarget{
|
||||
ID: 1,
|
||||
Name: "local",
|
||||
Type: "test_storage",
|
||||
Enabled: true,
|
||||
ConfigCiphertext: failingConfig,
|
||||
ConfigVersion: 1,
|
||||
LastTestStatus: "unknown",
|
||||
}); err != nil {
|
||||
t.Fatalf("Update target returned error: %v", err)
|
||||
}
|
||||
task, err := tasks.FindByID(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID task returned error: %v", err)
|
||||
}
|
||||
startedAt := time.Now().UTC()
|
||||
record := &model.BackupRecord{
|
||||
TaskID: task.ID,
|
||||
StorageTargetID: task.StorageTargetID,
|
||||
Status: model.BackupRecordStatusRunning,
|
||||
StartedAt: startedAt,
|
||||
}
|
||||
if err := records.Create(ctx, record); err != nil {
|
||||
t.Fatalf("Create record returned error: %v", err)
|
||||
}
|
||||
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
executionService.executeTask(ctx, task, record.ID, startedAt)
|
||||
close(done)
|
||||
}()
|
||||
|
||||
select {
|
||||
case <-done:
|
||||
case <-time.After(2 * time.Second):
|
||||
t.Fatal("expected cancelled upload retry to stop without waiting for backoff sleep")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBackupExecutionServiceReadsStorageUsageOnceForMultiTargetQuotaChecks(t *testing.T) {
|
||||
executionService, _, tasks, targets, records, _, _ := newExecutionTestServices(t)
|
||||
ctx := context.Background()
|
||||
first := &testStorageProvider{name: "first", objects: map[string][]byte{}}
|
||||
second := &testStorageProvider{name: "second", objects: map[string][]byte{}}
|
||||
executionService.storageRegistry = storage.NewRegistry(&testStorageFactory{providers: map[string]*testStorageProvider{
|
||||
"first": first,
|
||||
"second": second,
|
||||
}})
|
||||
cipher := codec.NewConfigCipher("execution-secret")
|
||||
firstConfig, err := cipher.EncryptJSON(map[string]any{"name": "first"})
|
||||
if err != nil {
|
||||
t.Fatalf("EncryptJSON first returned error: %v", err)
|
||||
}
|
||||
secondConfig, err := cipher.EncryptJSON(map[string]any{"name": "second"})
|
||||
if err != nil {
|
||||
t.Fatalf("EncryptJSON second returned error: %v", err)
|
||||
}
|
||||
if err := targets.Update(ctx, &model.StorageTarget{ID: 1, Name: "local", Type: "test_storage", Enabled: true, ConfigCiphertext: firstConfig, ConfigVersion: 1, LastTestStatus: "unknown", QuotaBytes: 1 << 30}); err != nil {
|
||||
t.Fatalf("Update first target returned error: %v", err)
|
||||
}
|
||||
if err := targets.Create(ctx, &model.StorageTarget{Name: "second", Type: "test_storage", Enabled: true, ConfigCiphertext: secondConfig, ConfigVersion: 1, LastTestStatus: "unknown", QuotaBytes: 1 << 30}); err != nil {
|
||||
t.Fatalf("Create second target returned error: %v", err)
|
||||
}
|
||||
task, err := tasks.FindByID(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID task returned error: %v", err)
|
||||
}
|
||||
task.StorageTargets = []model.StorageTarget{{ID: 1}, {ID: 2}}
|
||||
if err := tasks.Update(ctx, task); err != nil {
|
||||
t.Fatalf("Update task returned error: %v", err)
|
||||
}
|
||||
executionService.records = &storageUsageCountingRecordRepo{BackupRecordRepository: records}
|
||||
|
||||
detail, err := executionService.RunTaskByIDSync(ctx, task.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("RunTaskByIDSync returned error: %v", err)
|
||||
}
|
||||
if detail.Status != model.BackupRecordStatusSuccess {
|
||||
t.Fatalf("expected success, got %#v", detail)
|
||||
}
|
||||
countingRepo := executionService.records.(*storageUsageCountingRecordRepo)
|
||||
if countingRepo.usageCalls != 1 {
|
||||
t.Fatalf("expected StorageUsage to be called once for quota snapshot, got %d", countingRepo.usageCalls)
|
||||
}
|
||||
if len(first.objects) != 1 || len(second.objects) != 1 {
|
||||
t.Fatalf("expected both targets to receive upload, got first=%d second=%d", len(first.objects), len(second.objects))
|
||||
}
|
||||
}
|
||||
|
||||
func TestBackupExecutionServiceContinuesWhenStorageUsageSnapshotFails(t *testing.T) {
|
||||
executionService, _, _, targets, records, _, _ := newExecutionTestServices(t)
|
||||
ctx := context.Background()
|
||||
provider := &testStorageProvider{name: "primary", objects: map[string][]byte{}}
|
||||
executionService.storageRegistry = storage.NewRegistry(&testStorageFactory{providers: map[string]*testStorageProvider{
|
||||
"primary": provider,
|
||||
}})
|
||||
cipher := codec.NewConfigCipher("execution-secret")
|
||||
configCiphertext, err := cipher.EncryptJSON(map[string]any{"name": "primary"})
|
||||
if err != nil {
|
||||
t.Fatalf("EncryptJSON returned error: %v", err)
|
||||
}
|
||||
if err := targets.Update(ctx, &model.StorageTarget{
|
||||
ID: 1,
|
||||
Name: "local",
|
||||
Type: "test_storage",
|
||||
Enabled: true,
|
||||
ConfigCiphertext: configCiphertext,
|
||||
ConfigVersion: 1,
|
||||
LastTestStatus: "unknown",
|
||||
QuotaBytes: 1 << 30,
|
||||
}); err != nil {
|
||||
t.Fatalf("Update target returned error: %v", err)
|
||||
}
|
||||
executionService.records = &storageUsageFailingRecordRepo{
|
||||
BackupRecordRepository: records,
|
||||
err: errStorageUsageFailed,
|
||||
}
|
||||
|
||||
detail, err := executionService.RunTaskByIDSync(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("RunTaskByIDSync returned error: %v", err)
|
||||
}
|
||||
if detail.Status != model.BackupRecordStatusSuccess {
|
||||
t.Fatalf("expected success despite soft quota usage snapshot error, got %#v", detail)
|
||||
}
|
||||
if len(provider.objects) != 1 {
|
||||
t.Fatalf("expected upload to proceed, got %d uploaded objects", len(provider.objects))
|
||||
}
|
||||
}
|
||||
|
||||
func TestBackupRecordServiceRestore(t *testing.T) {
|
||||
executionService, recordService, _, _, _, sourceDir, _ := newExecutionTestServices(t)
|
||||
detail, err := executionService.RunTaskByIDSync(context.Background(), 1)
|
||||
@@ -546,27 +105,3 @@ func TestBackupRecordServiceRestore(t *testing.T) {
|
||||
t.Fatalf("unexpected restored content: %s", string(content))
|
||||
}
|
||||
}
|
||||
|
||||
type storageUsageCountingRecordRepo struct {
|
||||
repository.BackupRecordRepository
|
||||
mu sync.Mutex
|
||||
usageCalls int
|
||||
}
|
||||
|
||||
func (r *storageUsageCountingRecordRepo) StorageUsage(ctx context.Context) ([]repository.BackupStorageUsageItem, error) {
|
||||
r.mu.Lock()
|
||||
r.usageCalls++
|
||||
r.mu.Unlock()
|
||||
return r.BackupRecordRepository.StorageUsage(ctx)
|
||||
}
|
||||
|
||||
type storageUsageFailingRecordRepo struct {
|
||||
repository.BackupRecordRepository
|
||||
err error
|
||||
}
|
||||
|
||||
func (r *storageUsageFailingRecordRepo) StorageUsage(context.Context) ([]repository.BackupStorageUsageItem, error) {
|
||||
return nil, r.err
|
||||
}
|
||||
|
||||
var errStorageUsageFailed = errors.New("storage usage failed")
|
||||
|
||||
@@ -33,16 +33,16 @@ type BackupTaskUpsertInput struct {
|
||||
DBPassword string `json:"dbPassword" binding:"max=255"`
|
||||
DBName string `json:"dbName" binding:"max=255"`
|
||||
DBPath string `json:"dbPath" binding:"max=500"`
|
||||
StorageTargetID uint `json:"storageTargetId"` // deprecated: 向后兼容
|
||||
StorageTargetIDs []uint `json:"storageTargetIds"` // 新增:多存储目标
|
||||
NodeID uint `json:"nodeId"` // 执行节点(0 = 本机 Master 或节点池)
|
||||
StorageTargetID uint `json:"storageTargetId"` // deprecated: 向后兼容
|
||||
StorageTargetIDs []uint `json:"storageTargetIds"` // 新增:多存储目标
|
||||
NodeID uint `json:"nodeId"` // 执行节点(0 = 本机 Master 或节点池)
|
||||
// NodePoolTag 节点池标签。NodeID=0 且本字段非空时,调度器动态从 Labels 命中的在线节点中选负载最低者。
|
||||
NodePoolTag string `json:"nodePoolTag" binding:"max=64"`
|
||||
Tags string `json:"tags" binding:"max=500"` // 逗号分隔标签
|
||||
RetentionDays int `json:"retentionDays"`
|
||||
Compression string `json:"compression" binding:"omitempty,oneof=gzip none"`
|
||||
Encrypt bool `json:"encrypt"`
|
||||
MaxBackups int `json:"maxBackups"`
|
||||
NodePoolTag string `json:"nodePoolTag" binding:"max=64"`
|
||||
Tags string `json:"tags" binding:"max=500"` // 逗号分隔标签
|
||||
RetentionDays int `json:"retentionDays"`
|
||||
Compression string `json:"compression" binding:"omitempty,oneof=gzip none"`
|
||||
Encrypt bool `json:"encrypt"`
|
||||
MaxBackups int `json:"maxBackups"`
|
||||
// ExtraConfig 类型特有扩展配置(如 SAP HANA 的 backupLevel/backupChannels)
|
||||
ExtraConfig map[string]any `json:"extraConfig"`
|
||||
// 验证(恢复演练)配置
|
||||
@@ -70,8 +70,8 @@ type BackupTaskSummary struct {
|
||||
Type string `json:"type"`
|
||||
Enabled bool `json:"enabled"`
|
||||
CronExpr string `json:"cronExpr"`
|
||||
StorageTargetID uint `json:"storageTargetId"` // deprecated: 取第一个
|
||||
StorageTargetName string `json:"storageTargetName"` // deprecated: 取第一个
|
||||
StorageTargetID uint `json:"storageTargetId"` // deprecated: 取第一个
|
||||
StorageTargetName string `json:"storageTargetName"` // deprecated: 取第一个
|
||||
StorageTargetIDs []uint `json:"storageTargetIds"`
|
||||
StorageTargetNames []string `json:"storageTargetNames"`
|
||||
NodeID uint `json:"nodeId"`
|
||||
@@ -91,10 +91,10 @@ type BackupTaskSummary struct {
|
||||
SLAHoursRPO int `json:"slaHoursRpo"`
|
||||
AlertOnConsecutiveFails int `json:"alertOnConsecutiveFails"`
|
||||
// 备份复制目标(3-2-1)
|
||||
ReplicationTargetIDs []uint `json:"replicationTargetIds"`
|
||||
MaintenanceWindows string `json:"maintenanceWindows"`
|
||||
DependsOnTaskIDs []uint `json:"dependsOnTaskIds"`
|
||||
UpdatedAt time.Time `json:"updatedAt"`
|
||||
ReplicationTargetIDs []uint `json:"replicationTargetIds"`
|
||||
MaintenanceWindows string `json:"maintenanceWindows"`
|
||||
DependsOnTaskIDs []uint `json:"dependsOnTaskIds"`
|
||||
UpdatedAt time.Time `json:"updatedAt"`
|
||||
}
|
||||
|
||||
type BackupTaskDetail struct {
|
||||
@@ -488,7 +488,6 @@ func (s *BackupTaskService) validateInput(ctx context.Context, existing *model.B
|
||||
return apperror.BadRequest("BACKUP_STORAGE_TARGET_INVALID", fmt.Sprintf("关联的存储目标 %d 不存在", tid), nil)
|
||||
}
|
||||
}
|
||||
var fixedNode *model.Node
|
||||
if input.NodeID > 0 && s.nodes != nil {
|
||||
node, err := s.nodes.FindByID(ctx, input.NodeID)
|
||||
if err != nil {
|
||||
@@ -497,17 +496,12 @@ func (s *BackupTaskService) validateInput(ctx context.Context, existing *model.B
|
||||
if node == nil {
|
||||
return apperror.BadRequest("BACKUP_TASK_INVALID", "所选执行节点不存在", nil)
|
||||
}
|
||||
fixedNode = node
|
||||
}
|
||||
// 节点池与固定节点互斥:固定节点已确定执行位置,不再动态调度
|
||||
if input.NodeID > 0 && strings.TrimSpace(input.NodePoolTag) != "" {
|
||||
return apperror.BadRequest("BACKUP_TASK_INVALID",
|
||||
"固定执行节点与节点池标签只能选其一", nil)
|
||||
}
|
||||
if input.Encrypt && (strings.TrimSpace(input.NodePoolTag) != "" || (fixedNode != nil && !fixedNode.IsLocal)) {
|
||||
return apperror.BadRequest("BACKUP_TASK_REMOTE_ENCRYPT_UNSUPPORTED",
|
||||
"远程节点暂不支持加密备份。请关闭加密,或将任务固定在 Master 本机执行。", nil)
|
||||
}
|
||||
if input.RetentionDays < 0 {
|
||||
return apperror.BadRequest("BACKUP_TASK_INVALID", "保留天数不能小于 0", nil)
|
||||
}
|
||||
@@ -645,38 +639,38 @@ func (s *BackupTaskService) buildTask(existing *model.BackupTask, input BackupTa
|
||||
return nil, apperror.BadRequest("BACKUP_TASK_INVALID", "扩展配置格式不合法", err)
|
||||
}
|
||||
item := &model.BackupTask{
|
||||
Name: strings.TrimSpace(input.Name),
|
||||
Type: normalizeBackupTaskType(input.Type),
|
||||
Enabled: input.Enabled,
|
||||
CronExpr: strings.TrimSpace(input.CronExpr),
|
||||
SourcePath: primarySourcePath,
|
||||
SourcePaths: sourcePathsJSON,
|
||||
ExcludePatterns: excludePatterns,
|
||||
DBHost: strings.TrimSpace(input.DBHost),
|
||||
DBPort: input.DBPort,
|
||||
DBUser: strings.TrimSpace(input.DBUser),
|
||||
DBPasswordCiphertext: passwordCiphertext,
|
||||
DBName: strings.TrimSpace(input.DBName),
|
||||
DBPath: strings.TrimSpace(input.DBPath),
|
||||
ExtraConfig: extraConfigJSON,
|
||||
StorageTargetID: primaryTargetID,
|
||||
StorageTargets: storageTargets,
|
||||
NodeID: input.NodeID,
|
||||
NodePoolTag: strings.TrimSpace(input.NodePoolTag),
|
||||
Tags: strings.TrimSpace(input.Tags),
|
||||
RetentionDays: input.RetentionDays,
|
||||
Compression: compression,
|
||||
Encrypt: input.Encrypt,
|
||||
MaxBackups: maxBackups,
|
||||
LastStatus: "idle",
|
||||
VerifyEnabled: input.VerifyEnabled,
|
||||
VerifyCronExpr: strings.TrimSpace(input.VerifyCronExpr),
|
||||
VerifyMode: normalizeVerifyMode(input.VerifyMode),
|
||||
SLAHoursRPO: maxInt(0, input.SLAHoursRPO),
|
||||
Name: strings.TrimSpace(input.Name),
|
||||
Type: normalizeBackupTaskType(input.Type),
|
||||
Enabled: input.Enabled,
|
||||
CronExpr: strings.TrimSpace(input.CronExpr),
|
||||
SourcePath: primarySourcePath,
|
||||
SourcePaths: sourcePathsJSON,
|
||||
ExcludePatterns: excludePatterns,
|
||||
DBHost: strings.TrimSpace(input.DBHost),
|
||||
DBPort: input.DBPort,
|
||||
DBUser: strings.TrimSpace(input.DBUser),
|
||||
DBPasswordCiphertext: passwordCiphertext,
|
||||
DBName: strings.TrimSpace(input.DBName),
|
||||
DBPath: strings.TrimSpace(input.DBPath),
|
||||
ExtraConfig: extraConfigJSON,
|
||||
StorageTargetID: primaryTargetID,
|
||||
StorageTargets: storageTargets,
|
||||
NodeID: input.NodeID,
|
||||
NodePoolTag: strings.TrimSpace(input.NodePoolTag),
|
||||
Tags: strings.TrimSpace(input.Tags),
|
||||
RetentionDays: input.RetentionDays,
|
||||
Compression: compression,
|
||||
Encrypt: input.Encrypt,
|
||||
MaxBackups: maxBackups,
|
||||
LastStatus: "idle",
|
||||
VerifyEnabled: input.VerifyEnabled,
|
||||
VerifyCronExpr: strings.TrimSpace(input.VerifyCronExpr),
|
||||
VerifyMode: normalizeVerifyMode(input.VerifyMode),
|
||||
SLAHoursRPO: maxInt(0, input.SLAHoursRPO),
|
||||
AlertOnConsecutiveFails: alertThreshold(input.AlertOnConsecutiveFails),
|
||||
ReplicationTargetIDs: encodeUintCSV(input.ReplicationTargetIDs),
|
||||
MaintenanceWindows: strings.TrimSpace(input.MaintenanceWindows),
|
||||
DependsOnTaskIDs: encodeUintCSV(input.DependsOnTaskIDs),
|
||||
ReplicationTargetIDs: encodeUintCSV(input.ReplicationTargetIDs),
|
||||
MaintenanceWindows: strings.TrimSpace(input.MaintenanceWindows),
|
||||
DependsOnTaskIDs: encodeUintCSV(input.DependsOnTaskIDs),
|
||||
}
|
||||
if existing != nil {
|
||||
item.LastRunAt = existing.LastRunAt
|
||||
@@ -742,25 +736,25 @@ func toBackupTaskSummary(item *model.BackupTask) BackupTaskSummary {
|
||||
primaryName = targetNames[0]
|
||||
}
|
||||
return BackupTaskSummary{
|
||||
ID: item.ID,
|
||||
Name: item.Name,
|
||||
Type: normalizeBackupTaskType(item.Type),
|
||||
Enabled: item.Enabled,
|
||||
CronExpr: item.CronExpr,
|
||||
StorageTargetID: primaryID,
|
||||
StorageTargetName: primaryName,
|
||||
StorageTargetIDs: targetIDs,
|
||||
StorageTargetNames: targetNames,
|
||||
NodeID: item.NodeID,
|
||||
NodeName: item.Node.Name,
|
||||
NodePoolTag: item.NodePoolTag,
|
||||
Tags: item.Tags,
|
||||
RetentionDays: item.RetentionDays,
|
||||
Compression: item.Compression,
|
||||
Encrypt: item.Encrypt,
|
||||
MaxBackups: item.MaxBackups,
|
||||
LastRunAt: item.LastRunAt,
|
||||
LastStatus: item.LastStatus,
|
||||
ID: item.ID,
|
||||
Name: item.Name,
|
||||
Type: normalizeBackupTaskType(item.Type),
|
||||
Enabled: item.Enabled,
|
||||
CronExpr: item.CronExpr,
|
||||
StorageTargetID: primaryID,
|
||||
StorageTargetName: primaryName,
|
||||
StorageTargetIDs: targetIDs,
|
||||
StorageTargetNames: targetNames,
|
||||
NodeID: item.NodeID,
|
||||
NodeName: item.Node.Name,
|
||||
NodePoolTag: item.NodePoolTag,
|
||||
Tags: item.Tags,
|
||||
RetentionDays: item.RetentionDays,
|
||||
Compression: item.Compression,
|
||||
Encrypt: item.Encrypt,
|
||||
MaxBackups: item.MaxBackups,
|
||||
LastRunAt: item.LastRunAt,
|
||||
LastStatus: item.LastStatus,
|
||||
VerifyEnabled: item.VerifyEnabled,
|
||||
VerifyCronExpr: item.VerifyCronExpr,
|
||||
VerifyMode: item.VerifyMode,
|
||||
@@ -769,7 +763,7 @@ func toBackupTaskSummary(item *model.BackupTask) BackupTaskSummary {
|
||||
ReplicationTargetIDs: parseUintCSV(item.ReplicationTargetIDs),
|
||||
MaintenanceWindows: item.MaintenanceWindows,
|
||||
DependsOnTaskIDs: parseUintCSV(item.DependsOnTaskIDs),
|
||||
UpdatedAt: item.UpdatedAt,
|
||||
UpdatedAt: item.UpdatedAt,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -3,7 +3,6 @@ package service
|
||||
import (
|
||||
"context"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"backupx/server/internal/config"
|
||||
@@ -30,82 +29,6 @@ func newBackupTaskServiceForTest(t *testing.T) (*BackupTaskService, repository.S
|
||||
return service, targets, tasks
|
||||
}
|
||||
|
||||
func TestBackupTaskServiceRejectsEncryptedRemoteTasks(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
service, targets, _ := newBackupTaskServiceForTest(t)
|
||||
service.SetNodeRepository(&nodeRepoStub{nodes: []model.Node{
|
||||
{ID: 41, Name: "master", Token: "master-token", Status: model.NodeStatusOnline, IsLocal: true},
|
||||
{ID: 42, Name: "edge", Token: "edge-token", Status: model.NodeStatusOnline, IsLocal: false},
|
||||
}})
|
||||
if err := targets.Create(ctx, &model.StorageTarget{Name: "local", Type: "local_disk", Enabled: true, ConfigCiphertext: "ciphertext", ConfigVersion: 1, LastTestStatus: "unknown"}); err != nil {
|
||||
t.Fatalf("seed storage target error: %v", err)
|
||||
}
|
||||
|
||||
_, err := service.Create(ctx, BackupTaskUpsertInput{
|
||||
Name: "encrypted-node-pool",
|
||||
Type: "file",
|
||||
Enabled: true,
|
||||
SourcePath: "/srv/site",
|
||||
StorageTargetID: 1,
|
||||
NodePoolTag: "db",
|
||||
RetentionDays: 30,
|
||||
Compression: "gzip",
|
||||
MaxBackups: 10,
|
||||
Encrypt: true,
|
||||
})
|
||||
if err == nil || !strings.Contains(err.Error(), "远程节点暂不支持加密备份") {
|
||||
t.Fatalf("expected encrypted node-pool task to be rejected, got %v", err)
|
||||
}
|
||||
|
||||
created, err := service.Create(ctx, BackupTaskUpsertInput{
|
||||
Name: "local-encrypted",
|
||||
Type: "file",
|
||||
Enabled: true,
|
||||
SourcePath: "/srv/site",
|
||||
StorageTargetID: 1,
|
||||
RetentionDays: 30,
|
||||
Compression: "gzip",
|
||||
MaxBackups: 10,
|
||||
Encrypt: true,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("Create local encrypted task returned error: %v", err)
|
||||
}
|
||||
localNodeTask, err := service.Create(ctx, BackupTaskUpsertInput{
|
||||
Name: "local-node-encrypted",
|
||||
Type: "file",
|
||||
Enabled: true,
|
||||
SourcePath: "/srv/site",
|
||||
StorageTargetID: 1,
|
||||
NodeID: 41,
|
||||
RetentionDays: 30,
|
||||
Compression: "gzip",
|
||||
MaxBackups: 10,
|
||||
Encrypt: true,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("Create encrypted task pinned to local node returned error: %v", err)
|
||||
}
|
||||
if localNodeTask.NodeID != 41 || !localNodeTask.Encrypt {
|
||||
t.Fatalf("expected encrypted task to keep local node, got %#v", localNodeTask)
|
||||
}
|
||||
_, err = service.Update(ctx, created.ID, BackupTaskUpsertInput{
|
||||
Name: created.Name,
|
||||
Type: created.Type,
|
||||
Enabled: true,
|
||||
SourcePath: "/srv/site",
|
||||
StorageTargetID: 1,
|
||||
NodeID: 42,
|
||||
RetentionDays: 30,
|
||||
Compression: "gzip",
|
||||
MaxBackups: 10,
|
||||
Encrypt: true,
|
||||
})
|
||||
if err == nil || !strings.Contains(err.Error(), "远程节点暂不支持加密备份") {
|
||||
t.Fatalf("expected encrypted fixed-node update to be rejected, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBackupTaskServiceCreateAndGet(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
service, targets, _ := newBackupTaskServiceForTest(t)
|
||||
|
||||
@@ -3,14 +3,12 @@ package service
|
||||
import (
|
||||
"context"
|
||||
"crypto/rand"
|
||||
"encoding/base64"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"backupx/server/internal/apperror"
|
||||
"backupx/server/internal/installscript"
|
||||
"backupx/server/internal/model"
|
||||
"backupx/server/internal/repository"
|
||||
)
|
||||
@@ -44,25 +42,6 @@ type InstallTokenOutput struct {
|
||||
Record *model.AgentInstallToken
|
||||
}
|
||||
|
||||
// InstallCommandInput 生成可展示安装命令所需的完整业务输入。
|
||||
type InstallCommandInput struct {
|
||||
InstallTokenInput
|
||||
MasterURL string
|
||||
}
|
||||
|
||||
// InstallCommandOutput 是 UI 生成安装命令所需的完整业务输出。
|
||||
type InstallCommandOutput struct {
|
||||
Token string
|
||||
ExpiresAt time.Time
|
||||
Node *model.Node
|
||||
Record *model.AgentInstallToken
|
||||
URL string
|
||||
FallbackURL string
|
||||
ComposeURL string
|
||||
FallbackComposeURL string
|
||||
ScriptBase64 string
|
||||
}
|
||||
|
||||
// ConsumedInstallToken 消费成功后返回给 handler 的组合体。
|
||||
type ConsumedInstallToken struct {
|
||||
Record *model.AgentInstallToken
|
||||
@@ -127,67 +106,6 @@ func (s *InstallTokenService) Create(ctx context.Context, in InstallTokenInput)
|
||||
return &InstallTokenOutput{Token: token, ExpiresAt: expiresAt, Node: node, Record: record}, nil
|
||||
}
|
||||
|
||||
// CreateCommand 创建 install token,并返回 UI 展示安装命令所需的 URL 与嵌入式脚本。
|
||||
func (s *InstallTokenService) CreateCommand(ctx context.Context, in InstallCommandInput) (*InstallCommandOutput, error) {
|
||||
masterURL := strings.TrimRight(strings.TrimSpace(in.MasterURL), "/")
|
||||
if masterURL == "" {
|
||||
return nil, apperror.BadRequest("INSTALL_TOKEN_INVALID", "masterURL 必填", nil)
|
||||
}
|
||||
if err := s.validate(in.InstallTokenInput); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
node, err := s.nodeRepo.FindByID(ctx, in.NodeID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if node == nil {
|
||||
return nil, apperror.New(404, "NODE_NOT_FOUND", "节点不存在", nil)
|
||||
}
|
||||
if _, err := renderInstallCommandScript(masterURL, node, &model.AgentInstallToken{
|
||||
Mode: in.Mode,
|
||||
Arch: in.Arch,
|
||||
AgentVer: in.AgentVersion,
|
||||
DownloadSrc: in.DownloadSrc,
|
||||
}); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out, err := s.Create(ctx, in.InstallTokenInput)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
script, err := renderInstallCommandScript(masterURL, out.Node, out.Record)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
result := &InstallCommandOutput{
|
||||
Token: out.Token,
|
||||
ExpiresAt: out.ExpiresAt,
|
||||
Node: out.Node,
|
||||
Record: out.Record,
|
||||
URL: masterURL + "/api/install/" + out.Token,
|
||||
FallbackURL: masterURL + "/install/" + out.Token,
|
||||
ScriptBase64: base64.StdEncoding.EncodeToString([]byte(script)),
|
||||
}
|
||||
if out.Record.Mode == model.InstallModeDocker {
|
||||
result.ComposeURL = masterURL + "/api/install/" + out.Token + "/compose.yml"
|
||||
result.FallbackComposeURL = masterURL + "/install/" + out.Token + "/compose.yml"
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func renderInstallCommandScript(masterURL string, node *model.Node, record *model.AgentInstallToken) (string, error) {
|
||||
return installscript.RenderScript(installscript.Context{
|
||||
MasterURL: masterURL,
|
||||
AgentToken: node.Token,
|
||||
AgentVersion: record.AgentVer,
|
||||
Mode: record.Mode,
|
||||
Arch: record.Arch,
|
||||
DownloadBase: installscript.DownloadBaseFor(record.DownloadSrc),
|
||||
InstallPrefix: "/opt/backupx-agent",
|
||||
NodeID: node.ID,
|
||||
})
|
||||
}
|
||||
|
||||
// Consume 原子消费令牌。未命中/已过期/已消费均返回 (nil, nil)。
|
||||
func (s *InstallTokenService) Consume(ctx context.Context, token string) (*ConsumedInstallToken, error) {
|
||||
if strings.TrimSpace(token) == "" {
|
||||
@@ -252,8 +170,8 @@ func (s *InstallTokenService) validate(in InstallTokenInput) error {
|
||||
if !validInstallSources[in.DownloadSrc] {
|
||||
return apperror.BadRequest("INSTALL_TOKEN_INVALID", "downloadSrc 非法", nil)
|
||||
}
|
||||
if err := validateInstallAgentVersion(in.AgentVersion); err != nil {
|
||||
return err
|
||||
if strings.TrimSpace(in.AgentVersion) == "" {
|
||||
return apperror.BadRequest("INSTALL_TOKEN_INVALID", "agentVersion 必填", nil)
|
||||
}
|
||||
if in.TTLSeconds < InstallTokenMinTTL || in.TTLSeconds > InstallTokenMaxTTL {
|
||||
return apperror.BadRequest("INSTALL_TOKEN_INVALID",
|
||||
@@ -262,27 +180,6 @@ func (s *InstallTokenService) validate(in InstallTokenInput) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func validateInstallAgentVersion(v string) error {
|
||||
v = strings.TrimSpace(v)
|
||||
if v == "" {
|
||||
return apperror.BadRequest("INSTALL_TOKEN_INVALID", "agentVersion 必填", nil)
|
||||
}
|
||||
if len(v) > 64 {
|
||||
return apperror.BadRequest("INSTALL_TOKEN_INVALID", "agentVersion 不能超过 64 字符", nil)
|
||||
}
|
||||
for _, c := range v {
|
||||
switch {
|
||||
case c >= '0' && c <= '9':
|
||||
case c >= 'a' && c <= 'z':
|
||||
case c >= 'A' && c <= 'Z':
|
||||
case c == '.' || c == '-' || c == '_' || c == '+':
|
||||
default:
|
||||
return apperror.BadRequest("INSTALL_TOKEN_INVALID", "agentVersion 包含非法字符", nil)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func generateInstallToken() (string, error) {
|
||||
b := make([]byte, 32)
|
||||
if _, err := rand.Read(b); err != nil {
|
||||
|
||||
@@ -131,79 +131,6 @@ func TestInstallTokenServiceValidatesInput(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestInstallTokenServiceRejectsInvalidAgentVersionBeforeCreate(t *testing.T) {
|
||||
db := openInstallTokenTestDB(t)
|
||||
nodeRepo := repository.NewNodeRepository(db)
|
||||
node := &model.Node{Name: "invalid-version", Token: "feedface"}
|
||||
if err := nodeRepo.Create(context.Background(), node); err != nil {
|
||||
t.Fatalf("create node: %v", err)
|
||||
}
|
||||
tokenRepo := repository.NewAgentInstallTokenRepository(db)
|
||||
svc := NewInstallTokenService(tokenRepo, nodeRepo)
|
||||
|
||||
_, err := svc.Create(context.Background(), InstallTokenInput{
|
||||
NodeID: node.ID,
|
||||
Mode: model.InstallModeSystemd,
|
||||
Arch: model.InstallArchAuto,
|
||||
AgentVersion: "v1 && rm -rf /",
|
||||
DownloadSrc: model.InstallSourceGitHub,
|
||||
TTLSeconds: 900,
|
||||
CreatedByID: 1,
|
||||
})
|
||||
if err == nil {
|
||||
t.Fatalf("expected invalid version error")
|
||||
}
|
||||
count, err := tokenRepo.CountCreatedSince(context.Background(), node.ID, time.Now().UTC().Add(-time.Hour))
|
||||
if err != nil {
|
||||
t.Fatalf("count: %v", err)
|
||||
}
|
||||
if count != 0 {
|
||||
t.Fatalf("invalid request created %d token records", count)
|
||||
}
|
||||
}
|
||||
|
||||
func TestInstallTokenServiceCreateCommandBuildsURLsAndScript(t *testing.T) {
|
||||
db := openInstallTokenTestDB(t)
|
||||
nodeRepo := repository.NewNodeRepository(db)
|
||||
node := &model.Node{
|
||||
Name: "command-node",
|
||||
Token: "deadbeefcafebabe0123456789abcdef0123456789abcdef0123456789abcdef",
|
||||
}
|
||||
if err := nodeRepo.Create(context.Background(), node); err != nil {
|
||||
t.Fatalf("create node: %v", err)
|
||||
}
|
||||
tokenRepo := repository.NewAgentInstallTokenRepository(db)
|
||||
svc := NewInstallTokenService(tokenRepo, nodeRepo)
|
||||
|
||||
out, err := svc.CreateCommand(context.Background(), InstallCommandInput{
|
||||
InstallTokenInput: InstallTokenInput{
|
||||
NodeID: node.ID,
|
||||
Mode: model.InstallModeDocker,
|
||||
Arch: model.InstallArchAuto,
|
||||
AgentVersion: "v1.7.0",
|
||||
DownloadSrc: model.InstallSourceGitHub,
|
||||
TTLSeconds: 900,
|
||||
CreatedByID: 1,
|
||||
},
|
||||
MasterURL: "https://public.example.com/base",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("create command: %v", err)
|
||||
}
|
||||
if out.Token == "" || out.ScriptBase64 == "" {
|
||||
t.Fatalf("missing token or script: %+v", out)
|
||||
}
|
||||
if out.URL != "https://public.example.com/base/api/install/"+out.Token {
|
||||
t.Fatalf("bad url: %s", out.URL)
|
||||
}
|
||||
if out.FallbackURL != "https://public.example.com/base/install/"+out.Token {
|
||||
t.Fatalf("bad fallback url: %s", out.FallbackURL)
|
||||
}
|
||||
if out.ComposeURL != "https://public.example.com/base/api/install/"+out.Token+"/compose.yml" {
|
||||
t.Fatalf("bad compose url: %s", out.ComposeURL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestInstallTokenServiceRateLimit(t *testing.T) {
|
||||
db := openInstallTokenTestDB(t)
|
||||
nodeRepo := repository.NewNodeRepository(db)
|
||||
|
||||
@@ -36,19 +36,6 @@ type NodeSummary struct {
|
||||
BandwidthLimit string `json:"bandwidthLimit"`
|
||||
Labels string `json:"labels"`
|
||||
CreatedAt time.Time `json:"createdAt"`
|
||||
Queue NodeQueue `json:"queue"`
|
||||
RunningTasks int `json:"runningTasks"`
|
||||
LastError string `json:"lastError,omitempty"`
|
||||
Health string `json:"health"`
|
||||
}
|
||||
|
||||
type NodeQueue struct {
|
||||
Pending int `json:"pending"`
|
||||
Dispatched int `json:"dispatched"`
|
||||
Depth int `json:"depth"`
|
||||
Timeouts int `json:"timeouts"`
|
||||
OldestActiveAt *time.Time `json:"oldestActiveAt,omitempty"`
|
||||
OldestActiveAgeS int `json:"oldestActiveAgeSeconds"`
|
||||
}
|
||||
|
||||
// NodeCreateInput is the input for creating a new remote node.
|
||||
@@ -67,11 +54,10 @@ type NodeUpdateInput struct {
|
||||
|
||||
// NodeService manages the cluster nodes.
|
||||
type NodeService struct {
|
||||
repo repository.NodeRepository
|
||||
taskRepo repository.BackupTaskRepository
|
||||
agentRPC NodeAgentRPC
|
||||
cmdRepo repository.AgentCommandRepository
|
||||
version string
|
||||
repo repository.NodeRepository
|
||||
taskRepo repository.BackupTaskRepository
|
||||
agentRPC NodeAgentRPC
|
||||
version string
|
||||
}
|
||||
|
||||
// NodeAgentRPC 抽象 Agent 远程调用能力(避免 service 内循环依赖)。
|
||||
@@ -95,10 +81,6 @@ func (s *NodeService) SetAgentRPC(rpc NodeAgentRPC) {
|
||||
s.agentRPC = rpc
|
||||
}
|
||||
|
||||
func (s *NodeService) SetAgentCommandRepository(cmdRepo repository.AgentCommandRepository) {
|
||||
s.cmdRepo = cmdRepo
|
||||
}
|
||||
|
||||
// EnsureLocalNode creates the default "local" node if it does not exist.
|
||||
func (s *NodeService) EnsureLocalNode(ctx context.Context) error {
|
||||
existing, err := s.repo.FindLocal(ctx)
|
||||
@@ -138,10 +120,24 @@ func (s *NodeService) List(ctx context.Context) ([]NodeSummary, error) {
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
queueByNode := s.loadQueueSummaries(ctx)
|
||||
result := make([]NodeSummary, len(nodes))
|
||||
for i, n := range nodes {
|
||||
result[i] = s.toNodeSummary(&n, queueByNode[n.ID])
|
||||
result[i] = NodeSummary{
|
||||
ID: n.ID,
|
||||
Name: n.Name,
|
||||
Hostname: n.Hostname,
|
||||
IPAddress: n.IPAddress,
|
||||
Status: n.Status,
|
||||
IsLocal: n.IsLocal,
|
||||
OS: n.OS,
|
||||
Arch: n.Arch,
|
||||
AgentVersion: n.AgentVer,
|
||||
LastSeen: n.LastSeen,
|
||||
MaxConcurrent: n.MaxConcurrent,
|
||||
BandwidthLimit: n.BandwidthLimit,
|
||||
Labels: n.Labels,
|
||||
CreatedAt: n.CreatedAt,
|
||||
}
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
@@ -154,24 +150,7 @@ func (s *NodeService) Get(ctx context.Context, id uint) (*NodeSummary, error) {
|
||||
if node == nil {
|
||||
return nil, apperror.New(http.StatusNotFound, "NODE_NOT_FOUND", "节点不存在", nil)
|
||||
}
|
||||
queueByNode := s.loadQueueSummaries(ctx)
|
||||
summary := s.toNodeSummary(node, queueByNode[node.ID])
|
||||
return &summary, nil
|
||||
}
|
||||
|
||||
func (s *NodeService) loadQueueSummaries(ctx context.Context) map[uint]repository.AgentCommandQueueSummary {
|
||||
if s.cmdRepo == nil {
|
||||
return nil
|
||||
}
|
||||
summaries, err := s.cmdRepo.NodeQueueSummaries(ctx)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
return summaries
|
||||
}
|
||||
|
||||
func (s *NodeService) toNodeSummary(node *model.Node, queue repository.AgentCommandQueueSummary) NodeSummary {
|
||||
summary := NodeSummary{
|
||||
return &NodeSummary{
|
||||
ID: node.ID,
|
||||
Name: node.Name,
|
||||
Hostname: node.Hostname,
|
||||
@@ -186,31 +165,7 @@ func (s *NodeService) toNodeSummary(node *model.Node, queue repository.AgentComm
|
||||
BandwidthLimit: node.BandwidthLimit,
|
||||
Labels: node.Labels,
|
||||
CreatedAt: node.CreatedAt,
|
||||
Queue: NodeQueue{
|
||||
Pending: queue.Pending,
|
||||
Dispatched: queue.Dispatched,
|
||||
Depth: queue.Depth,
|
||||
Timeouts: queue.Timeouts,
|
||||
OldestActiveAt: queue.OldestActiveAt,
|
||||
},
|
||||
RunningTasks: queue.Running,
|
||||
LastError: queue.LastError,
|
||||
Health: nodeHealth(node, queue),
|
||||
}
|
||||
if queue.OldestActiveAt != nil {
|
||||
summary.Queue.OldestActiveAgeS = int(time.Since(*queue.OldestActiveAt).Seconds())
|
||||
}
|
||||
return summary
|
||||
}
|
||||
|
||||
func nodeHealth(node *model.Node, queue repository.AgentCommandQueueSummary) string {
|
||||
if node.Status != model.NodeStatusOnline {
|
||||
return "offline"
|
||||
}
|
||||
if queue.Timeouts > 0 || strings.TrimSpace(queue.LastError) != "" {
|
||||
return "degraded"
|
||||
}
|
||||
return "healthy"
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Create registers a new remote node and returns its authentication token.
|
||||
|
||||
@@ -23,9 +23,6 @@ func openNodeServiceDB(t *testing.T) *gorm.DB {
|
||||
if err := db.AutoMigrate(&model.Node{}); err != nil {
|
||||
t.Fatalf("migrate: %v", err)
|
||||
}
|
||||
if err := db.AutoMigrate(&model.AgentCommand{}); err != nil {
|
||||
t.Fatalf("migrate agent commands: %v", err)
|
||||
}
|
||||
return db
|
||||
}
|
||||
|
||||
@@ -160,48 +157,3 @@ func TestRotateTokenNotFound(t *testing.T) {
|
||||
t.Fatalf("expected not found error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestNodeServiceListIncludesQueueHealthSummary(t *testing.T) {
|
||||
db := openNodeServiceDB(t)
|
||||
nodeRepo := repository.NewNodeRepository(db)
|
||||
cmdRepo := repository.NewAgentCommandRepository(db)
|
||||
svc := NewNodeService(nodeRepo, "test")
|
||||
svc.SetAgentCommandRepository(cmdRepo)
|
||||
ctx := context.Background()
|
||||
node := &model.Node{
|
||||
Name: "edge-a",
|
||||
Token: "edge-token",
|
||||
Status: model.NodeStatusOnline,
|
||||
IsLocal: false,
|
||||
LastSeen: time.Now().UTC(),
|
||||
}
|
||||
if err := nodeRepo.Create(ctx, node); err != nil {
|
||||
t.Fatalf("Create node returned error: %v", err)
|
||||
}
|
||||
old := time.Now().UTC().Add(-time.Minute)
|
||||
if err := cmdRepo.Create(ctx, &model.AgentCommand{NodeID: node.ID, Type: model.AgentCommandTypeRunTask, Status: model.AgentCommandStatusPending, CreatedAt: old}); err != nil {
|
||||
t.Fatalf("Create pending command returned error: %v", err)
|
||||
}
|
||||
completedAt := time.Now().UTC()
|
||||
if err := cmdRepo.Create(ctx, &model.AgentCommand{NodeID: node.ID, Type: model.AgentCommandTypeRunTask, Status: model.AgentCommandStatusTimeout, ErrorMessage: "agent timeout", CompletedAt: &completedAt}); err != nil {
|
||||
t.Fatalf("Create timeout command returned error: %v", err)
|
||||
}
|
||||
|
||||
items, err := svc.List(ctx)
|
||||
if err != nil {
|
||||
t.Fatalf("List returned error: %v", err)
|
||||
}
|
||||
if len(items) != 1 {
|
||||
t.Fatalf("expected one node, got %#v", items)
|
||||
}
|
||||
got := items[0]
|
||||
if got.Queue.Pending != 1 || got.Queue.Depth != 1 || got.Queue.Timeouts != 1 {
|
||||
t.Fatalf("unexpected queue summary: %#v", got.Queue)
|
||||
}
|
||||
if got.Health != "degraded" || got.LastError != "agent timeout" {
|
||||
t.Fatalf("expected terminal command errors to degrade healthy node, got %#v", got)
|
||||
}
|
||||
if got.Queue.OldestActiveAt == nil || got.Queue.OldestActiveAgeS <= 0 {
|
||||
t.Fatalf("expected oldest active metadata, got %#v", got.Queue)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -141,11 +141,10 @@ func (s *RestoreService) Start(ctx context.Context, backupRecordID uint, trigger
|
||||
}
|
||||
|
||||
startedAt := s.now()
|
||||
restoreNodeID := s.resolveRestoreNodeID(record, task)
|
||||
restore := &model.RestoreRecord{
|
||||
BackupRecordID: backupRecordID,
|
||||
TaskID: record.TaskID,
|
||||
NodeID: restoreNodeID,
|
||||
NodeID: task.NodeID,
|
||||
Status: model.RestoreRecordStatusRunning,
|
||||
StartedAt: startedAt,
|
||||
TriggeredBy: strings.TrimSpace(triggeredBy),
|
||||
@@ -155,7 +154,7 @@ func (s *RestoreService) Start(ctx context.Context, backupRecordID uint, trigger
|
||||
}
|
||||
|
||||
// 远程节点路由
|
||||
if remoteNode := s.resolveRemoteNode(ctx, restoreNodeID); remoteNode != nil {
|
||||
if remoteNode := s.resolveRemoteNode(ctx, task.NodeID); remoteNode != nil {
|
||||
if s.dispatcher == nil {
|
||||
return nil, apperror.Internal("RESTORE_DISPATCH_UNAVAILABLE", "Agent 下发通道未就绪", nil)
|
||||
}
|
||||
@@ -167,14 +166,14 @@ func (s *RestoreService) Start(ctx context.Context, backupRecordID uint, trigger
|
||||
s.logHub.Complete(restore.ID, model.RestoreRecordStatusFailed)
|
||||
return nil, apperror.BadRequest("NODE_OFFLINE", offlineMsg, nil)
|
||||
}
|
||||
if _, dispatchErr := s.dispatcher.EnqueueCommand(ctx, restoreNodeID, model.AgentCommandTypeRestoreRecord, map[string]any{
|
||||
if _, dispatchErr := s.dispatcher.EnqueueCommand(ctx, task.NodeID, model.AgentCommandTypeRestoreRecord, map[string]any{
|
||||
"restoreRecordId": restore.ID,
|
||||
}); dispatchErr != nil {
|
||||
_ = s.finalize(ctx, restore.ID, model.RestoreRecordStatusFailed,
|
||||
"下发恢复任务到远程节点失败: "+dispatchErr.Error())
|
||||
return nil, apperror.Internal("AGENT_COMMAND_ENQUEUE_FAILED", "无法下发恢复任务到远程节点", dispatchErr)
|
||||
}
|
||||
s.logHub.Append(restore.ID, "info", fmt.Sprintf("已下发恢复任务到节点 %s(#%d),等待 Agent 执行", remoteNode.Name, restoreNodeID))
|
||||
s.logHub.Append(restore.ID, "info", fmt.Sprintf("已下发恢复任务到节点 %s(#%d),等待 Agent 执行", remoteNode.Name, task.NodeID))
|
||||
return s.getDetail(ctx, restore.ID)
|
||||
}
|
||||
|
||||
@@ -186,16 +185,6 @@ func (s *RestoreService) Start(ctx context.Context, backupRecordID uint, trigger
|
||||
return s.getDetail(ctx, restore.ID)
|
||||
}
|
||||
|
||||
func (s *RestoreService) resolveRestoreNodeID(record *model.BackupRecord, task *model.BackupTask) uint {
|
||||
if record != nil && record.NodeID != 0 {
|
||||
return record.NodeID
|
||||
}
|
||||
if task != nil {
|
||||
return task.NodeID
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// isRemoteNode 判断 NodeID 是否指向有效的远程节点。
|
||||
func (s *RestoreService) isRemoteNode(ctx context.Context, nodeID uint) bool {
|
||||
return s.resolveRemoteNode(ctx, nodeID) != nil
|
||||
@@ -640,9 +629,6 @@ func (s *RestoreService) UpdateAgentRestore(ctx context.Context, node *model.Nod
|
||||
if restore.NodeID != node.ID {
|
||||
return apperror.Unauthorized("RESTORE_RECORD_FORBIDDEN", "恢复记录不属于当前节点", nil)
|
||||
}
|
||||
if isRestoreRecordTerminal(restore.Status) {
|
||||
return nil
|
||||
}
|
||||
// 追加日志到 LogHub + DB
|
||||
if strings.TrimSpace(update.LogAppend) != "" {
|
||||
for _, line := range strings.Split(update.LogAppend, "\n") {
|
||||
@@ -681,10 +667,6 @@ func (s *RestoreService) UpdateAgentRestore(ctx context.Context, node *model.Nod
|
||||
return nil
|
||||
}
|
||||
|
||||
func isRestoreRecordTerminal(status string) bool {
|
||||
return status == model.RestoreRecordStatusSuccess || status == model.RestoreRecordStatusFailed
|
||||
}
|
||||
|
||||
// --- 内部辅助 ---
|
||||
|
||||
func (s *RestoreService) getDetail(ctx context.Context, restoreID uint) (*RestoreRecordDetail, error) {
|
||||
|
||||
@@ -51,15 +51,15 @@ func (f *fakeDispatcher) snapshot() []dispatcherCall {
|
||||
}
|
||||
|
||||
type restoreTestHarness struct {
|
||||
service *RestoreService
|
||||
execution *BackupExecutionService
|
||||
records repository.BackupRecordRepository
|
||||
restores repository.RestoreRecordRepository
|
||||
tasks repository.BackupTaskRepository
|
||||
nodes repository.NodeRepository
|
||||
dispatcher *fakeDispatcher
|
||||
sourceDir string
|
||||
storageDir string
|
||||
service *RestoreService
|
||||
execution *BackupExecutionService
|
||||
records repository.BackupRecordRepository
|
||||
restores repository.RestoreRecordRepository
|
||||
tasks repository.BackupTaskRepository
|
||||
nodes repository.NodeRepository
|
||||
dispatcher *fakeDispatcher
|
||||
sourceDir string
|
||||
storageDir string
|
||||
}
|
||||
|
||||
func newRestoreTestHarness(t *testing.T, remoteNode bool) *restoreTestHarness {
|
||||
@@ -228,179 +228,6 @@ func TestRestoreServiceStart_RemoteNodeEnqueuesCommand(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestRestoreServiceStart_UsesBackupRecordNodeForPooledTask(t *testing.T) {
|
||||
h := newRestoreTestHarness(t, true)
|
||||
ctx := context.Background()
|
||||
|
||||
task, err := h.tasks.FindByID(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID task: %v", err)
|
||||
}
|
||||
remoteNodeID := task.NodeID
|
||||
task.NodeID = 0
|
||||
task.NodePoolTag = "db"
|
||||
if err := h.tasks.Update(ctx, task); err != nil {
|
||||
t.Fatalf("Update task: %v", err)
|
||||
}
|
||||
storedTask, err := h.tasks.FindByID(ctx, task.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID stored task: %v", err)
|
||||
}
|
||||
if storedTask.NodeID != 0 {
|
||||
t.Fatalf("expected stored task NodeID to be reset to 0, got %d", storedTask.NodeID)
|
||||
}
|
||||
|
||||
startedAt := time.Now().UTC()
|
||||
completedAt := startedAt.Add(time.Second)
|
||||
backupRecord := &model.BackupRecord{
|
||||
TaskID: task.ID,
|
||||
StorageTargetID: task.StorageTargetID,
|
||||
NodeID: remoteNodeID,
|
||||
Status: model.BackupRecordStatusSuccess,
|
||||
FileName: "pooled.tar.gz",
|
||||
StoragePath: "file/2026/05/09/pooled.tar.gz",
|
||||
StartedAt: startedAt,
|
||||
CompletedAt: &completedAt,
|
||||
}
|
||||
if err := h.records.Create(ctx, backupRecord); err != nil {
|
||||
t.Fatalf("Create backup record: %v", err)
|
||||
}
|
||||
|
||||
detail, err := h.service.Start(ctx, backupRecord.ID, "tester-pool")
|
||||
if err != nil {
|
||||
t.Fatalf("Start: %v", err)
|
||||
}
|
||||
if detail.NodeID != remoteNodeID {
|
||||
t.Fatalf("expected restore node %d, got %d", remoteNodeID, detail.NodeID)
|
||||
}
|
||||
calls := h.dispatcher.snapshot()
|
||||
if len(calls) != 1 {
|
||||
t.Fatalf("expected exactly 1 dispatcher call, got %d", len(calls))
|
||||
}
|
||||
if calls[0].NodeID != remoteNodeID {
|
||||
t.Fatalf("expected dispatch to node %d, got %d", remoteNodeID, calls[0].NodeID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRestoreServiceAgentRestoreAccessUsesRestoreRecordNode(t *testing.T) {
|
||||
h := newRestoreTestHarness(t, true)
|
||||
ctx := context.Background()
|
||||
|
||||
task, err := h.tasks.FindByID(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID task: %v", err)
|
||||
}
|
||||
owner, err := h.nodes.FindByID(ctx, task.NodeID)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID owner node: %v", err)
|
||||
}
|
||||
other := &model.Node{Name: "edge-2", Token: "other-token", Status: model.NodeStatusOnline, IsLocal: false, LastSeen: time.Now().UTC()}
|
||||
if err := h.nodes.Create(ctx, other); err != nil {
|
||||
t.Fatalf("Create other node: %v", err)
|
||||
}
|
||||
startedAt := time.Now().UTC()
|
||||
completedAt := startedAt.Add(time.Second)
|
||||
backupRecord := &model.BackupRecord{
|
||||
TaskID: task.ID,
|
||||
StorageTargetID: task.StorageTargetID,
|
||||
NodeID: owner.ID,
|
||||
Status: model.BackupRecordStatusSuccess,
|
||||
FileName: "remote.tar.gz",
|
||||
StoragePath: "file/2026/05/09/remote.tar.gz",
|
||||
StartedAt: startedAt,
|
||||
CompletedAt: &completedAt,
|
||||
}
|
||||
if err := h.records.Create(ctx, backupRecord); err != nil {
|
||||
t.Fatalf("Create backup record: %v", err)
|
||||
}
|
||||
restore := &model.RestoreRecord{
|
||||
BackupRecordID: backupRecord.ID,
|
||||
TaskID: task.ID,
|
||||
NodeID: owner.ID,
|
||||
Status: model.RestoreRecordStatusRunning,
|
||||
StartedAt: startedAt,
|
||||
TriggeredBy: "agent-test",
|
||||
}
|
||||
if err := h.restores.Create(ctx, restore); err != nil {
|
||||
t.Fatalf("Create restore record: %v", err)
|
||||
}
|
||||
|
||||
spec, err := h.service.GetAgentRestoreSpec(ctx, owner, restore.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("owner GetAgentRestoreSpec returned error: %v", err)
|
||||
}
|
||||
if spec.RestoreRecordID != restore.ID || spec.StoragePath != backupRecord.StoragePath {
|
||||
t.Fatalf("unexpected restore spec: %#v", spec)
|
||||
}
|
||||
if _, err := h.service.GetAgentRestoreSpec(ctx, other, restore.ID); err == nil {
|
||||
t.Fatal("expected non-owner node to be forbidden from restore spec")
|
||||
}
|
||||
if err := h.service.UpdateAgentRestore(ctx, owner, restore.ID, AgentRestoreUpdate{
|
||||
Status: model.RestoreRecordStatusSuccess,
|
||||
LogAppend: "done\n",
|
||||
}); err != nil {
|
||||
t.Fatalf("owner UpdateAgentRestore returned error: %v", err)
|
||||
}
|
||||
updated, err := h.restores.FindByID(ctx, restore.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID restore returned error: %v", err)
|
||||
}
|
||||
if updated.Status != model.RestoreRecordStatusSuccess || updated.NodeID != owner.ID {
|
||||
t.Fatalf("unexpected updated restore record: %#v", updated)
|
||||
}
|
||||
if err := h.service.UpdateAgentRestore(ctx, other, restore.ID, AgentRestoreUpdate{LogAppend: "bad\n"}); err == nil {
|
||||
t.Fatal("expected non-owner node to be forbidden from restore update")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRestoreServiceUpdateAgentRestoreDoesNotOverwriteTerminalRecord(t *testing.T) {
|
||||
h := newRestoreTestHarness(t, true)
|
||||
ctx := context.Background()
|
||||
|
||||
task, err := h.tasks.FindByID(ctx, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID task: %v", err)
|
||||
}
|
||||
owner, err := h.nodes.FindByID(ctx, task.NodeID)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID owner node: %v", err)
|
||||
}
|
||||
startedAt := time.Now().UTC().Add(-time.Hour)
|
||||
completedAt := time.Now().UTC().Add(-time.Minute)
|
||||
restore := &model.RestoreRecord{
|
||||
BackupRecordID: 1,
|
||||
TaskID: task.ID,
|
||||
NodeID: owner.ID,
|
||||
Status: model.RestoreRecordStatusFailed,
|
||||
ErrorMessage: "timeout",
|
||||
StartedAt: startedAt,
|
||||
CompletedAt: &completedAt,
|
||||
TriggeredBy: "agent-test",
|
||||
}
|
||||
if err := h.restores.Create(ctx, restore); err != nil {
|
||||
t.Fatalf("Create restore record: %v", err)
|
||||
}
|
||||
|
||||
if err := h.service.UpdateAgentRestore(ctx, owner, restore.ID, AgentRestoreUpdate{
|
||||
Status: model.RestoreRecordStatusSuccess,
|
||||
ErrorMessage: "late success",
|
||||
LogAppend: "late log\n",
|
||||
}); err != nil {
|
||||
t.Fatalf("UpdateAgentRestore returned error: %v", err)
|
||||
}
|
||||
|
||||
updated, err := h.restores.FindByID(ctx, restore.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("FindByID restore returned error: %v", err)
|
||||
}
|
||||
if updated.Status != model.RestoreRecordStatusFailed {
|
||||
t.Fatalf("expected terminal restore status to remain failed, got %#v", updated)
|
||||
}
|
||||
if updated.ErrorMessage != "timeout" {
|
||||
t.Fatalf("expected terminal restore error to remain unchanged, got %q", updated.ErrorMessage)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRestoreServiceStart_FailsOnNonSuccessBackup(t *testing.T) {
|
||||
h := newRestoreTestHarness(t, false)
|
||||
ctx := context.Background()
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
import React, { useEffect, useRef, useState } from 'react'
|
||||
import { Modal, Steps, Button, Space, Message, Spin } from '@arco-design/web-react'
|
||||
import { Modal, Steps, Button, Space, Message, Spin, Progress } from '@arco-design/web-react'
|
||||
import { Step1NodeName, type Mode } from './wizard/Step1NodeName'
|
||||
import { Step2DeployOptions, type DeployOptions } from './wizard/Step2DeployOptions'
|
||||
import { Step3CommandPreview } from './wizard/Step3CommandPreview'
|
||||
import { BatchCommandTable, type BatchCommandRow } from './BatchCommandTable'
|
||||
import { batchCreateNodes, createInstallToken } from '../../services/nodes'
|
||||
import type { InstallTokenResult } from '../../types/nodes'
|
||||
import { useAgentDeployFlow, type AgentDeployRow } from './useAgentDeployFlow'
|
||||
import { buildAgentInstallCommand } from './installCommands'
|
||||
|
||||
const Step = Steps.Step
|
||||
|
||||
@@ -24,7 +25,9 @@ export function AgentInstallWizard({ visible, onClose, onSuccess, masterVersion,
|
||||
const [mode, setMode] = useState<Mode>('single')
|
||||
const [singleName, setSingleName] = useState('')
|
||||
const [batchText, setBatchText] = useState('')
|
||||
const deployFlow = useAgentDeployFlow()
|
||||
|
||||
// 批量进度(已生成 / 总数)
|
||||
const [batchProgress, setBatchProgress] = useState<{ done: number; total: number } | null>(null)
|
||||
|
||||
const [deploy, setDeploy] = useState<DeployOptions>({
|
||||
mode: 'systemd',
|
||||
@@ -63,6 +66,7 @@ export function AgentInstallWizard({ visible, onClose, onSuccess, masterVersion,
|
||||
setSingleToken(null)
|
||||
setSingleNodeInfo(null)
|
||||
setBatchRows([])
|
||||
setBatchProgress(null)
|
||||
}
|
||||
|
||||
const handleClose = () => {
|
||||
@@ -98,21 +102,71 @@ export function AgentInstallWizard({ visible, onClose, onSuccess, masterVersion,
|
||||
Message.warning('请填写 Agent 版本号(形如 v1.7.0)')
|
||||
return
|
||||
}
|
||||
// 步骤 1 的批次内去重在前端先提示一次,再由后端最终校验
|
||||
if (mode === 'batch' && !fixedNode) {
|
||||
const names = parseBatchNames()
|
||||
const seen = new Set<string>()
|
||||
const dups: string[] = []
|
||||
for (const n of names) {
|
||||
if (seen.has(n)) dups.push(n)
|
||||
seen.add(n)
|
||||
}
|
||||
if (dups.length > 0) {
|
||||
Message.warning(`批次内有重复节点名:${Array.from(new Set(dups)).join(', ')}`)
|
||||
return
|
||||
}
|
||||
}
|
||||
setSubmitting(true)
|
||||
try {
|
||||
if (fixedNode) {
|
||||
const result = await deployFlow.submitExistingNode(fixedNode, deploy)
|
||||
applySingleOrTableResult(result.rows, fixedNode)
|
||||
const tok = await createInstallToken(fixedNode.id, {
|
||||
mode: deploy.mode,
|
||||
arch: deploy.arch,
|
||||
agentVersion: deploy.agentVersion,
|
||||
downloadSrc: deploy.downloadSrc,
|
||||
ttlSeconds: deploy.ttlSeconds,
|
||||
})
|
||||
setSingleNodeInfo(fixedNode)
|
||||
setSingleToken(tok)
|
||||
} else if (mode === 'single') {
|
||||
const result = await deployFlow.submitNewNodes([singleName.trim()], deploy)
|
||||
applySingleOrTableResult(result.rows)
|
||||
const created = await batchCreateNodes([singleName.trim()])
|
||||
const one = created[0]
|
||||
const tok = await createInstallToken(one.id, {
|
||||
mode: deploy.mode,
|
||||
arch: deploy.arch,
|
||||
agentVersion: deploy.agentVersion,
|
||||
downloadSrc: deploy.downloadSrc,
|
||||
ttlSeconds: deploy.ttlSeconds,
|
||||
})
|
||||
setSingleNodeInfo({ id: one.id, name: one.name })
|
||||
setSingleToken(tok)
|
||||
} else {
|
||||
const names = parseBatchNames()
|
||||
const result = await deployFlow.submitNewNodes(names, deploy)
|
||||
if (mountedRef.current) setBatchRows(toBatchRows(result.rows))
|
||||
if (result.status === 'partialFailed') {
|
||||
Message.warning('部分节点安装命令生成失败,可在结果表中查看')
|
||||
}
|
||||
const created = await batchCreateNodes(names)
|
||||
setBatchProgress({ done: 0, total: created.length })
|
||||
// 并发生成 install token(Promise.all),每完成一个递增 done 计数
|
||||
let done = 0
|
||||
const tokens = await Promise.all(
|
||||
created.map(async (c) => {
|
||||
const tok = await createInstallToken(c.id, {
|
||||
mode: deploy.mode,
|
||||
arch: deploy.arch,
|
||||
agentVersion: deploy.agentVersion,
|
||||
downloadSrc: deploy.downloadSrc,
|
||||
ttlSeconds: deploy.ttlSeconds,
|
||||
})
|
||||
done += 1
|
||||
if (mountedRef.current) setBatchProgress({ done, total: created.length })
|
||||
return { c, tok }
|
||||
}),
|
||||
)
|
||||
const rows: BatchCommandRow[] = tokens.map(({ c, tok }) => ({
|
||||
nodeId: c.id,
|
||||
nodeName: c.name,
|
||||
command: buildAgentInstallCommand(tok.url, tok.fallbackUrl, tok.scriptBase64),
|
||||
expiresAt: tok.expiresAt,
|
||||
}))
|
||||
if (mountedRef.current) setBatchRows(rows)
|
||||
}
|
||||
setStep(2)
|
||||
onSuccess()
|
||||
@@ -127,12 +181,14 @@ export function AgentInstallWizard({ visible, onClose, onSuccess, masterVersion,
|
||||
if (!singleNodeInfo) return
|
||||
setSubmitting(true)
|
||||
try {
|
||||
const row = await deployFlow.regenerateNode(singleNodeInfo, deploy)
|
||||
if (row.status === 'ready' && row.installToken) {
|
||||
setSingleToken(row.installToken)
|
||||
} else {
|
||||
Message.error(row.errorMessage || '重新生成失败')
|
||||
}
|
||||
const tok = await createInstallToken(singleNodeInfo.id, {
|
||||
mode: deploy.mode,
|
||||
arch: deploy.arch,
|
||||
agentVersion: deploy.agentVersion,
|
||||
downloadSrc: deploy.downloadSrc,
|
||||
ttlSeconds: deploy.ttlSeconds,
|
||||
})
|
||||
setSingleToken(tok)
|
||||
} catch (e: any) {
|
||||
Message.error(e?.message || '重新生成失败')
|
||||
} finally {
|
||||
@@ -140,25 +196,6 @@ export function AgentInstallWizard({ visible, onClose, onSuccess, masterVersion,
|
||||
}
|
||||
}
|
||||
|
||||
const retryBatchNode = async (row: BatchCommandRow) => {
|
||||
setSubmitting(true)
|
||||
try {
|
||||
const next = await deployFlow.regenerateNode({ id: row.nodeId, name: row.nodeName }, deploy)
|
||||
setBatchRows((rows) => rows.map((item) => (
|
||||
item.nodeId === row.nodeId ? toBatchRows([next])[0] : item
|
||||
)))
|
||||
if (next.status === 'ready') {
|
||||
Message.success(`节点「${row.nodeName}」安装命令已重新生成`)
|
||||
} else {
|
||||
Message.error(next.errorMessage || '重试失败')
|
||||
}
|
||||
} catch (e: any) {
|
||||
Message.error(e?.message || '重试失败')
|
||||
} finally {
|
||||
setSubmitting(false)
|
||||
}
|
||||
}
|
||||
|
||||
const previewParams = {
|
||||
mode: deploy.mode,
|
||||
arch: deploy.arch,
|
||||
@@ -188,6 +225,17 @@ export function AgentInstallWizard({ visible, onClose, onSuccess, masterVersion,
|
||||
{submitting && (
|
||||
<div style={{ textAlign: 'center', padding: 32 }}>
|
||||
<Spin />
|
||||
{batchProgress && (
|
||||
<div style={{ marginTop: 16, maxWidth: 360, marginLeft: 'auto', marginRight: 'auto' }}>
|
||||
<div style={{ fontSize: 13, marginBottom: 6 }}>
|
||||
正在生成安装命令 {batchProgress.done} / {batchProgress.total}
|
||||
</div>
|
||||
<Progress
|
||||
percent={Math.round((batchProgress.done / batchProgress.total) * 100)}
|
||||
showText
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
@@ -241,7 +289,7 @@ export function AgentInstallWizard({ visible, onClose, onSuccess, masterVersion,
|
||||
onRegenerate={regenerateSingle}
|
||||
/>
|
||||
)}
|
||||
{batchRows.length > 0 && <BatchCommandTable rows={batchRows} onRetryNode={retryBatchNode} />}
|
||||
{batchRows.length > 0 && <BatchCommandTable rows={batchRows} />}
|
||||
<div style={{ marginTop: 24, textAlign: 'right' }}>
|
||||
<Button type="primary" onClick={handleClose}>
|
||||
完成
|
||||
@@ -251,31 +299,4 @@ export function AgentInstallWizard({ visible, onClose, onSuccess, masterVersion,
|
||||
)}
|
||||
</Modal>
|
||||
)
|
||||
|
||||
function applySingleOrTableResult(rows: AgentDeployRow[], fallbackNode?: { id: number; name: string }) {
|
||||
const row = rows[0]
|
||||
if (!row) return
|
||||
if (row.status === 'ready' && row.installToken) {
|
||||
setSingleNodeInfo({ id: row.nodeId || fallbackNode?.id || 0, name: row.nodeName || fallbackNode?.name || '' })
|
||||
setSingleToken(row.installToken)
|
||||
setBatchRows([])
|
||||
return
|
||||
}
|
||||
setSingleNodeInfo(null)
|
||||
setSingleToken(null)
|
||||
setBatchRows(toBatchRows(rows))
|
||||
Message.error(row.errorMessage || '安装命令生成失败')
|
||||
}
|
||||
}
|
||||
|
||||
function toBatchRows(rows: AgentDeployRow[]): BatchCommandRow[] {
|
||||
return rows.map((row) => ({
|
||||
nodeId: row.nodeId,
|
||||
nodeName: row.nodeName,
|
||||
status: row.status,
|
||||
command: row.command,
|
||||
expiresAt: row.expiresAt,
|
||||
errorMessage: row.errorMessage,
|
||||
embeddedCommand: row.embeddedCommand,
|
||||
}))
|
||||
}
|
||||
|
||||
@@ -1,30 +0,0 @@
|
||||
import { describe, expect, it, vi } from 'vitest'
|
||||
import type { BatchCommandRow } from './BatchCommandTable'
|
||||
import { getExportableBatchRows } from './BatchCommandTable'
|
||||
|
||||
function row(patch: Partial<BatchCommandRow>): BatchCommandRow {
|
||||
return {
|
||||
nodeId: 1,
|
||||
nodeName: 'prod-a',
|
||||
status: 'ready',
|
||||
command: 'curl install',
|
||||
expiresAt: '2099-01-01T00:00:00Z',
|
||||
...patch,
|
||||
}
|
||||
}
|
||||
|
||||
describe('getExportableBatchRows', () => {
|
||||
it('excludes failed and expired commands from batch export', () => {
|
||||
vi.useFakeTimers()
|
||||
vi.setSystemTime(new Date('2026-05-09T00:00:00Z'))
|
||||
const rows = [
|
||||
row({ nodeId: 1, nodeName: 'ready', expiresAt: '2026-05-09T00:05:00Z' }),
|
||||
row({ nodeId: 2, nodeName: 'failed', status: 'failed', errorMessage: 'failed' }),
|
||||
row({ nodeId: 3, nodeName: 'expired', expiresAt: '2026-05-08T23:59:59Z' }),
|
||||
]
|
||||
|
||||
expect(getExportableBatchRows(rows).map((item) => item.nodeName)).toEqual(['ready'])
|
||||
|
||||
vi.useRealTimers()
|
||||
})
|
||||
})
|
||||
@@ -1,32 +1,29 @@
|
||||
import React, { useEffect, useState } from 'react'
|
||||
import { Table, Button, Space, Message, Typography, Tag } from '@arco-design/web-react'
|
||||
import { IconCopy, IconDownload, IconRefresh } from '@arco-design/web-react/icon'
|
||||
import { Table, Button, Space, Message, Typography } from '@arco-design/web-react'
|
||||
import { IconCopy, IconDownload } from '@arco-design/web-react/icon'
|
||||
|
||||
const { Text } = Typography
|
||||
|
||||
export interface BatchCommandRow {
|
||||
nodeId: number
|
||||
nodeName: string
|
||||
status: 'ready' | 'failed'
|
||||
command: string
|
||||
expiresAt: string
|
||||
errorMessage?: string
|
||||
embeddedCommand?: string
|
||||
}
|
||||
|
||||
interface Props {
|
||||
rows: BatchCommandRow[]
|
||||
onRetryNode?: (row: BatchCommandRow) => void
|
||||
}
|
||||
|
||||
export function BatchCommandTable({ rows, onRetryNode }: Props) {
|
||||
export function BatchCommandTable({ rows }: Props) {
|
||||
const [remaining, setRemaining] = useState<Record<number, number>>({})
|
||||
|
||||
useEffect(() => {
|
||||
const tick = () => {
|
||||
const next: Record<number, number> = {}
|
||||
rows.forEach((r) => {
|
||||
next[r.nodeId] = secondsLeft(r.expiresAt)
|
||||
const exp = new Date(r.expiresAt).getTime()
|
||||
next[r.nodeId] = Math.max(0, Math.floor((exp - Date.now()) / 1000))
|
||||
})
|
||||
setRemaining(next)
|
||||
}
|
||||
@@ -41,13 +38,12 @@ export function BatchCommandTable({ rows, onRetryNode }: Props) {
|
||||
}
|
||||
|
||||
const exportAll = () => {
|
||||
const exportRows = getExportableBatchRows(rows)
|
||||
const content = [
|
||||
'#!/bin/sh',
|
||||
'# BackupX Agent 批量部署脚本',
|
||||
'# 使用方法:在目标机逐个执行下面对应节点命令',
|
||||
'',
|
||||
...exportRows.map((r) => `# --- ${r.nodeName} ---\n${r.command}`),
|
||||
...rows.map((r) => `# --- ${r.nodeName} ---\n${r.command}`),
|
||||
].join('\n\n')
|
||||
const blob = new Blob([content], { type: 'text/x-shellscript' })
|
||||
const url = URL.createObjectURL(blob)
|
||||
@@ -65,20 +61,11 @@ export function BatchCommandTable({ rows, onRetryNode }: Props) {
|
||||
pagination={false}
|
||||
columns={[
|
||||
{ title: '节点', dataIndex: 'nodeName', width: 140 },
|
||||
{
|
||||
title: '状态', dataIndex: 'status', width: 90,
|
||||
render: (status: BatchCommandRow['status']) => (
|
||||
status === 'ready' ? <Tag color="green">可执行</Tag> : <Tag color="red">失败</Tag>
|
||||
),
|
||||
},
|
||||
{
|
||||
title: '安装命令',
|
||||
dataIndex: 'command',
|
||||
render: (cmd: unknown, row: BatchCommandRow) => {
|
||||
const left = remaining[row.nodeId] ?? 0
|
||||
if (row.status === 'failed') {
|
||||
return <Text type="error" style={{ fontSize: 12 }}>{row.errorMessage || '生成安装命令失败'}</Text>
|
||||
}
|
||||
return (
|
||||
<Text style={{
|
||||
fontFamily: 'monospace', fontSize: 12, wordBreak: 'break-all',
|
||||
@@ -93,9 +80,6 @@ export function BatchCommandTable({ rows, onRetryNode }: Props) {
|
||||
title: '剩余', dataIndex: 'expiresAt', width: 90,
|
||||
render: (_v: unknown, row: BatchCommandRow) => {
|
||||
const left = remaining[row.nodeId] ?? 0
|
||||
if (row.status === 'failed') {
|
||||
return <Text type="secondary" style={{ fontSize: 12 }}>-</Text>
|
||||
}
|
||||
return (
|
||||
<Text type={left === 0 ? 'secondary' : 'primary'} style={{ fontSize: 12 }}>
|
||||
{left === 0 ? '已过期' : `${Math.floor(left / 60)}:${String(left % 60).padStart(2, '0')}`}
|
||||
@@ -104,17 +88,10 @@ export function BatchCommandTable({ rows, onRetryNode }: Props) {
|
||||
},
|
||||
},
|
||||
{
|
||||
title: '操作', width: 110,
|
||||
title: '操作', width: 80,
|
||||
render: (_v: unknown, row: BatchCommandRow) => (
|
||||
<Space>
|
||||
{row.status === 'ready' && (
|
||||
<Button size="small" icon={<IconCopy />} onClick={() => copy(row.command)}
|
||||
disabled={(remaining[row.nodeId] ?? 0) === 0}>复制</Button>
|
||||
)}
|
||||
{row.status === 'failed' && onRetryNode && (
|
||||
<Button size="small" icon={<IconRefresh />} onClick={() => onRetryNode(row)}>重试</Button>
|
||||
)}
|
||||
</Space>
|
||||
<Button size="small" icon={<IconCopy />} onClick={() => copy(row.command)}
|
||||
disabled={(remaining[row.nodeId] ?? 0) === 0}>复制</Button>
|
||||
),
|
||||
},
|
||||
]}
|
||||
@@ -123,22 +100,9 @@ export function BatchCommandTable({ rows, onRetryNode }: Props) {
|
||||
/>
|
||||
<div style={{ marginTop: 12, textAlign: 'right' }}>
|
||||
<Space>
|
||||
<Button icon={<IconDownload />} onClick={exportAll}
|
||||
disabled={getExportableBatchRows(rows).length === 0}>导出 .sh</Button>
|
||||
<Button icon={<IconDownload />} onClick={exportAll}>导出 .sh</Button>
|
||||
</Space>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
function secondsLeft(expiresAt: string) {
|
||||
if (!expiresAt) {
|
||||
return 0
|
||||
}
|
||||
const exp = new Date(expiresAt).getTime()
|
||||
return Math.max(0, Math.floor((exp - Date.now()) / 1000))
|
||||
}
|
||||
|
||||
export function getExportableBatchRows(rows: BatchCommandRow[]) {
|
||||
return rows.filter((row) => row.status === 'ready' && secondsLeft(row.expiresAt) > 0)
|
||||
}
|
||||
|
||||
@@ -1,77 +0,0 @@
|
||||
import { describe, expect, it } from 'vitest'
|
||||
import type { UserInfo } from '../../services/auth'
|
||||
import { canManageNodes, formatQueueAge, getNodeHealthView } from './NodesPage'
|
||||
import type { NodeSummary } from '../../types/nodes'
|
||||
|
||||
function user(role: string): UserInfo {
|
||||
return {
|
||||
id: 1,
|
||||
username: role,
|
||||
displayName: role,
|
||||
role,
|
||||
}
|
||||
}
|
||||
|
||||
describe('canManageNodes', () => {
|
||||
it('allows only admins to manage deployment operations', () => {
|
||||
expect(canManageNodes(user('admin'))).toBe(true)
|
||||
expect(canManageNodes(user('operator'))).toBe(false)
|
||||
expect(canManageNodes(user('viewer'))).toBe(false)
|
||||
expect(canManageNodes(null)).toBe(false)
|
||||
})
|
||||
})
|
||||
|
||||
describe('node diagnostics helpers', () => {
|
||||
it('formats queue age and health status from backend summaries', () => {
|
||||
const node: NodeSummary = {
|
||||
id: 1,
|
||||
name: 'edge-a',
|
||||
hostname: '',
|
||||
ipAddress: '',
|
||||
status: 'online',
|
||||
isLocal: false,
|
||||
os: 'linux',
|
||||
arch: 'amd64',
|
||||
agentVersion: 'v1',
|
||||
lastSeen: '2026-05-12T00:00:00Z',
|
||||
createdAt: '2026-05-12T00:00:00Z',
|
||||
health: 'degraded',
|
||||
lastError: 'agent timeout',
|
||||
runningTasks: 1,
|
||||
queue: {
|
||||
pending: 2,
|
||||
dispatched: 1,
|
||||
depth: 3,
|
||||
timeouts: 1,
|
||||
oldestActiveAgeSeconds: 125,
|
||||
},
|
||||
}
|
||||
|
||||
expect(formatQueueAge(node.queue?.oldestActiveAgeSeconds)).toBe('2m')
|
||||
expect(getNodeHealthView(node)).toEqual({
|
||||
text: '异常',
|
||||
badgeStatus: 'warning',
|
||||
tagColor: 'orangered',
|
||||
tooltip: 'agent timeout',
|
||||
})
|
||||
})
|
||||
|
||||
it('treats offline nodes as offline even without queue errors', () => {
|
||||
const node = {
|
||||
id: 2,
|
||||
name: 'edge-b',
|
||||
hostname: '',
|
||||
ipAddress: '',
|
||||
status: 'offline',
|
||||
isLocal: false,
|
||||
os: '',
|
||||
arch: '',
|
||||
agentVersion: '',
|
||||
lastSeen: '',
|
||||
createdAt: '',
|
||||
} satisfies NodeSummary
|
||||
|
||||
expect(formatQueueAge(0)).toBe('-')
|
||||
expect(getNodeHealthView(node).text).toBe('离线')
|
||||
})
|
||||
})
|
||||
@@ -10,43 +10,12 @@ import type { NodeSummary } from '../../types/nodes'
|
||||
import { listNodes, deleteNode, updateNode, rotateNodeToken } from '../../services/nodes'
|
||||
import { fetchSystemInfo } from '../../services/system'
|
||||
import { AgentInstallWizard } from './AgentInstallWizard'
|
||||
import { useAuthStore } from '../../stores/auth'
|
||||
import { isAdmin } from '../../utils/permissions'
|
||||
import type { UserInfo } from '../../services/auth'
|
||||
|
||||
const { Text } = Typography
|
||||
|
||||
export function canManageNodes(user: UserInfo | null | undefined): boolean {
|
||||
return isAdmin(user)
|
||||
}
|
||||
|
||||
export function formatQueueAge(seconds?: number): string {
|
||||
if (!seconds || seconds <= 0) return '-'
|
||||
if (seconds < 60) return `${seconds}s`
|
||||
if (seconds < 3600) return `${Math.floor(seconds / 60)}m`
|
||||
return `${Math.floor(seconds / 3600)}h`
|
||||
}
|
||||
|
||||
export function getNodeHealthView(node: NodeSummary) {
|
||||
if (node.status !== 'online' || node.health === 'offline') {
|
||||
return { text: '离线', badgeStatus: 'default' as const, tagColor: 'gray', tooltip: '节点未在线' }
|
||||
}
|
||||
if (node.health === 'degraded' || node.queue?.timeouts || node.lastError) {
|
||||
return {
|
||||
text: '异常',
|
||||
badgeStatus: 'warning' as const,
|
||||
tagColor: 'orangered',
|
||||
tooltip: node.lastError || '存在超时或失败的 Agent 命令',
|
||||
}
|
||||
}
|
||||
return { text: '健康', badgeStatus: 'success' as const, tagColor: 'green', tooltip: 'Agent 心跳与队列状态正常' }
|
||||
}
|
||||
|
||||
export default function NodesPage() {
|
||||
const [nodes, setNodes] = useState<NodeSummary[]>([])
|
||||
const [loading, setLoading] = useState(false)
|
||||
const currentUser = useAuthStore((state) => state.user)
|
||||
const manageable = canManageNodes(currentUser)
|
||||
|
||||
const [wizardVisible, setWizardVisible] = useState(false)
|
||||
const [wizardFixedNode, setWizardFixedNode] = useState<{ id: number; name: string } | undefined>()
|
||||
@@ -144,18 +113,10 @@ export default function NodesPage() {
|
||||
),
|
||||
},
|
||||
{
|
||||
title: '健康', dataIndex: 'health', width: 150,
|
||||
render: (_: string, record: NodeSummary) => {
|
||||
const health = getNodeHealthView(record)
|
||||
return (
|
||||
<Tooltip content={health.tooltip}>
|
||||
<Space size={6}>
|
||||
<Badge status={health.badgeStatus} />
|
||||
<Tag color={health.tagColor}>{health.text}</Tag>
|
||||
</Space>
|
||||
</Tooltip>
|
||||
)
|
||||
},
|
||||
title: '状态', dataIndex: 'status', width: 100,
|
||||
render: (status: string) => status === 'online'
|
||||
? <Badge status="success" text="在线" />
|
||||
: <Badge status="default" text="离线" />,
|
||||
},
|
||||
{ title: '主机名', dataIndex: 'hostname', render: (v: string) => v || '-' },
|
||||
{ title: 'IP 地址', dataIndex: 'ipAddress', render: (v: string) => v || '-' },
|
||||
@@ -168,27 +129,6 @@ export default function NodesPage() {
|
||||
title: 'Agent 版本', dataIndex: 'agentVersion', width: 140,
|
||||
render: (v: string) => renderAgentVersion(v, masterVersion),
|
||||
},
|
||||
{
|
||||
title: '队列', dataIndex: 'queue', width: 160,
|
||||
render: (_: unknown, record: NodeSummary) => {
|
||||
const queue = record.queue
|
||||
if (!queue || queue.depth === 0) {
|
||||
return <Text type="secondary">空闲</Text>
|
||||
}
|
||||
return (
|
||||
<Tooltip content={`pending ${queue.pending} / dispatched ${queue.dispatched} / oldest ${formatQueueAge(queue.oldestActiveAgeSeconds)}`}>
|
||||
<Space size={4}>
|
||||
<Tag color="arcoblue">深度 {queue.depth}</Tag>
|
||||
{queue.timeouts > 0 && <Tag color="orangered">超时 {queue.timeouts}</Tag>}
|
||||
</Space>
|
||||
</Tooltip>
|
||||
)
|
||||
},
|
||||
},
|
||||
{
|
||||
title: '运行中', dataIndex: 'runningTasks', width: 90,
|
||||
render: (v: number | undefined) => v && v > 0 ? <Tag color="green">{v}</Tag> : <Text type="secondary">0</Text>,
|
||||
},
|
||||
{
|
||||
title: '标签 / 节点池', dataIndex: 'labels', width: 180,
|
||||
render: (v: string) => {
|
||||
@@ -203,43 +143,38 @@ export default function NodesPage() {
|
||||
},
|
||||
{
|
||||
title: '操作', width: 180,
|
||||
render: (_: unknown, record: NodeSummary) => {
|
||||
if (!manageable) {
|
||||
return <Text type="secondary">-</Text>
|
||||
}
|
||||
return (
|
||||
<Space>
|
||||
<Button type="text" icon={<IconEdit />} size="small"
|
||||
onClick={() => {
|
||||
setEditNode(record); setEditName(record.name)
|
||||
setEditLabels(record.labels || '')
|
||||
setEditMaxConcurrent(record.maxConcurrent || 0)
|
||||
setEditBandwidthLimit(record.bandwidthLimit || '')
|
||||
setEditVisible(true)
|
||||
}} />
|
||||
{!record.isLocal && (
|
||||
<>
|
||||
<Dropdown trigger="click" droplist={(
|
||||
<Menu>
|
||||
<Menu.Item key="install"
|
||||
onClick={() => { setWizardFixedNode({ id: record.id, name: record.name }); setWizardVisible(true) }}>
|
||||
生成安装命令
|
||||
</Menu.Item>
|
||||
<Menu.Item key="rotate" onClick={() => handleRotate(record)}>
|
||||
重新生成 Token
|
||||
</Menu.Item>
|
||||
</Menu>
|
||||
)}>
|
||||
<Button type="text" icon={<IconMore />} size="small" />
|
||||
</Dropdown>
|
||||
<Popconfirm title="确定删除该节点?" onOk={() => handleDelete(record.id)}>
|
||||
<Button type="text" status="danger" icon={<IconDelete />} size="small" />
|
||||
</Popconfirm>
|
||||
</>
|
||||
)}
|
||||
</Space>
|
||||
)
|
||||
},
|
||||
render: (_: unknown, record: NodeSummary) => (
|
||||
<Space>
|
||||
<Button type="text" icon={<IconEdit />} size="small"
|
||||
onClick={() => {
|
||||
setEditNode(record); setEditName(record.name)
|
||||
setEditLabels(record.labels || '')
|
||||
setEditMaxConcurrent(record.maxConcurrent || 0)
|
||||
setEditBandwidthLimit(record.bandwidthLimit || '')
|
||||
setEditVisible(true)
|
||||
}} />
|
||||
{!record.isLocal && (
|
||||
<>
|
||||
<Dropdown trigger="click" droplist={(
|
||||
<Menu>
|
||||
<Menu.Item key="install"
|
||||
onClick={() => { setWizardFixedNode({ id: record.id, name: record.name }); setWizardVisible(true) }}>
|
||||
生成安装命令
|
||||
</Menu.Item>
|
||||
<Menu.Item key="rotate" onClick={() => handleRotate(record)}>
|
||||
重新生成 Token
|
||||
</Menu.Item>
|
||||
</Menu>
|
||||
)}>
|
||||
<Button type="text" icon={<IconMore />} size="small" />
|
||||
</Dropdown>
|
||||
<Popconfirm title="确定删除该节点?" onOk={() => handleDelete(record.id)}>
|
||||
<Button type="text" status="danger" icon={<IconDelete />} size="small" />
|
||||
</Popconfirm>
|
||||
</>
|
||||
)}
|
||||
</Space>
|
||||
),
|
||||
},
|
||||
]
|
||||
|
||||
@@ -248,12 +183,12 @@ export default function NodesPage() {
|
||||
<PageHeader
|
||||
title="节点管理"
|
||||
subTitle="管理集群中的服务器节点"
|
||||
extra={manageable ? (
|
||||
extra={
|
||||
<Button type="primary" icon={<IconPlus />}
|
||||
onClick={() => { setWizardFixedNode(undefined); setWizardVisible(true) }}>
|
||||
添加节点
|
||||
</Button>
|
||||
) : undefined}
|
||||
}
|
||||
/>
|
||||
|
||||
<Card style={{ marginTop: 16 }}>
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { describe, expect, it } from 'vitest'
|
||||
import { buildAgentDownloadCommand, buildAgentInstallCommand, buildEmbeddedAgentInstallCommand } from './installCommands'
|
||||
import { buildAgentDownloadCommand, buildAgentInstallCommand } from './installCommands'
|
||||
|
||||
describe('install command builders', () => {
|
||||
it('adds script marker validation and fallback install path', () => {
|
||||
@@ -22,24 +22,16 @@ describe('install command builders', () => {
|
||||
expect(cmd).toContain('non-script content')
|
||||
})
|
||||
|
||||
it('keeps URL install command as primary even when embedded script is available', () => {
|
||||
it('prefers embedded script content when available', () => {
|
||||
const cmd = buildAgentInstallCommand(
|
||||
'https://master.example.com/api/install/abc',
|
||||
'https://master.example.com/install/abc',
|
||||
'IyEvYmluL3NoCg==',
|
||||
)
|
||||
|
||||
expect(cmd).toContain('https://master.example.com/api/install/abc')
|
||||
expect(cmd).toContain('https://master.example.com/install/abc')
|
||||
expect(cmd).not.toContain('IyEvYmluL3NoCg==')
|
||||
})
|
||||
|
||||
it('builds embedded fallback command explicitly', () => {
|
||||
const cmd = buildEmbeddedAgentInstallCommand('IyEvYmluL3NoCg==')
|
||||
|
||||
expect(cmd).toContain('base64 -d')
|
||||
expect(cmd).toContain('base64 -D')
|
||||
expect(cmd).toContain('BACKUPX_AGENT_INSTALL_V1')
|
||||
expect(cmd).toContain("'IyEvYmluL3NoCg=='")
|
||||
expect(cmd).not.toContain('https://master.example.com/api/install/abc')
|
||||
})
|
||||
})
|
||||
|
||||
@@ -12,7 +12,19 @@ function runScriptCommand(path: string) {
|
||||
return `if [ "$(id -u)" -eq 0 ]; then sh ${path}; else sudo sh ${path}; fi`
|
||||
}
|
||||
|
||||
export function buildAgentInstallCommand(url: string, fallbackUrl?: string, _scriptBase64?: string) {
|
||||
export function buildAgentInstallCommand(url: string, fallbackUrl?: string, scriptBase64?: string) {
|
||||
if (scriptBase64?.trim()) {
|
||||
const marker = shellQuote(INSTALL_MAGIC_MARKER)
|
||||
return [
|
||||
'enc=$(mktemp)',
|
||||
'tmp=$(mktemp)',
|
||||
`printf %s ${shellQuote(scriptBase64.trim())} > "$enc"`,
|
||||
'(base64 -d < "$enc" > "$tmp" 2>/dev/null || base64 -D < "$enc" > "$tmp")',
|
||||
`{ grep -q ${marker} "$tmp" || { echo 'BackupX embedded installer is invalid.' >&2; head -5 "$tmp" >&2; false; }; }`,
|
||||
runScriptCommand('"$tmp"'),
|
||||
].join(' && ') + '; rc=$?; rm -f "$enc" "$tmp"; test $rc -eq 0'
|
||||
}
|
||||
|
||||
const primary = url.trim()
|
||||
const fallback = (fallbackUrl || legacyInstallUrl(primary)).trim()
|
||||
const urls = fallback && fallback !== primary ? [primary, fallback] : [primary]
|
||||
@@ -29,7 +41,17 @@ export function buildAgentInstallCommand(url: string, fallbackUrl?: string, _scr
|
||||
].join(' && ') + '; rc=$?; rm -f "$tmp"; test $rc -eq 0'
|
||||
}
|
||||
|
||||
export function buildAgentDownloadCommand(url: string, fallbackUrl?: string, _scriptBase64?: string) {
|
||||
export function buildAgentDownloadCommand(url: string, fallbackUrl?: string, scriptBase64?: string) {
|
||||
if (scriptBase64?.trim()) {
|
||||
const marker = shellQuote(INSTALL_MAGIC_MARKER)
|
||||
return [
|
||||
`printf %s ${shellQuote(scriptBase64.trim())} > /tmp/bx-agent-install.b64`,
|
||||
'(base64 -d < /tmp/bx-agent-install.b64 > /tmp/bx-agent-install.sh 2>/dev/null || base64 -D < /tmp/bx-agent-install.b64 > /tmp/bx-agent-install.sh)',
|
||||
`{ grep -q ${marker} /tmp/bx-agent-install.sh || { echo 'BackupX embedded installer is invalid.' >&2; head -5 /tmp/bx-agent-install.sh >&2; false; }; }`,
|
||||
runScriptCommand('/tmp/bx-agent-install.sh'),
|
||||
].join(' && ')
|
||||
}
|
||||
|
||||
const primary = url.trim()
|
||||
const fallback = (fallbackUrl || legacyInstallUrl(primary)).trim()
|
||||
const marker = shellQuote(INSTALL_MAGIC_MARKER)
|
||||
@@ -43,15 +65,3 @@ export function buildAgentDownloadCommand(url: string, fallbackUrl?: string, _sc
|
||||
runScriptCommand('/tmp/bx-agent-install.sh'),
|
||||
].join(' && ')
|
||||
}
|
||||
|
||||
export function buildEmbeddedAgentInstallCommand(scriptBase64: string) {
|
||||
const marker = shellQuote(INSTALL_MAGIC_MARKER)
|
||||
return [
|
||||
'enc=$(mktemp)',
|
||||
'tmp=$(mktemp)',
|
||||
`printf %s ${shellQuote(scriptBase64.trim())} > "$enc"`,
|
||||
'(base64 -d < "$enc" > "$tmp" 2>/dev/null || base64 -D < "$enc" > "$tmp")',
|
||||
`{ grep -q ${marker} "$tmp" || { echo 'BackupX embedded installer is invalid.' >&2; head -5 "$tmp" >&2; false; }; }`,
|
||||
runScriptCommand('"$tmp"'),
|
||||
].join(' && ') + '; rc=$?; rm -f "$enc" "$tmp"; test $rc -eq 0'
|
||||
}
|
||||
|
||||
@@ -1,90 +0,0 @@
|
||||
import { describe, expect, it } from 'vitest'
|
||||
import type { InstallTokenInput, InstallTokenResult } from '../../types/nodes'
|
||||
import { createAgentDeployFlow } from './useAgentDeployFlow'
|
||||
|
||||
function deployOptions(): InstallTokenInput {
|
||||
return {
|
||||
mode: 'systemd',
|
||||
arch: 'auto',
|
||||
agentVersion: 'v2.3.1',
|
||||
downloadSrc: 'github',
|
||||
ttlSeconds: 900,
|
||||
}
|
||||
}
|
||||
|
||||
function tokenResult(overrides: Partial<InstallTokenResult> = {}): InstallTokenResult {
|
||||
return {
|
||||
installToken: 'install-token',
|
||||
expiresAt: '2099-01-01T00:00:00Z',
|
||||
url: 'https://master.example.com/api/install/install-token',
|
||||
fallbackUrl: 'https://master.example.com/install/install-token',
|
||||
scriptBase64: 'IyEvYmluL3NoCg==',
|
||||
composeUrl: '',
|
||||
fallbackComposeUrl: '',
|
||||
...overrides,
|
||||
}
|
||||
}
|
||||
|
||||
describe('createAgentDeployFlow', () => {
|
||||
it('creates one node then issues one install token', async () => {
|
||||
const calls: string[] = []
|
||||
const flow = createAgentDeployFlow({
|
||||
batchCreateNodes: async (names) => {
|
||||
calls.push(`batch:${names.join(',')}`)
|
||||
return [{ id: 7, name: names[0] }]
|
||||
},
|
||||
createInstallToken: async (nodeId) => {
|
||||
calls.push(`token:${nodeId}`)
|
||||
return tokenResult()
|
||||
},
|
||||
})
|
||||
|
||||
const result = await flow.submitNewNodes(['prod-a'], deployOptions())
|
||||
|
||||
expect(calls).toEqual(['batch:prod-a', 'token:7'])
|
||||
expect(result.status).toBe('ready')
|
||||
expect(result.rows).toHaveLength(1)
|
||||
expect(result.rows[0]).toMatchObject({
|
||||
nodeId: 7,
|
||||
nodeName: 'prod-a',
|
||||
status: 'ready',
|
||||
})
|
||||
expect(result.rows[0].command).toContain('/api/install/install-token')
|
||||
expect(result.rows[0].embeddedCommand).toContain('IyEvYmluL3NoCg==')
|
||||
})
|
||||
|
||||
it('returns partialFailed when one batch token request fails', async () => {
|
||||
const flow = createAgentDeployFlow({
|
||||
batchCreateNodes: async (names) => names.map((name, index) => ({ id: index + 1, name })),
|
||||
createInstallToken: async (nodeId) => {
|
||||
if (nodeId === 2) {
|
||||
throw new Error('token service unavailable')
|
||||
}
|
||||
return tokenResult({ installToken: `tok-${nodeId}`, url: `https://master.example.com/api/install/tok-${nodeId}` })
|
||||
},
|
||||
})
|
||||
|
||||
const result = await flow.submitNewNodes(['prod-a', 'prod-b', 'prod-c'], deployOptions())
|
||||
|
||||
expect(result.status).toBe('partialFailed')
|
||||
expect(result.rows.map((row) => row.status)).toEqual(['ready', 'failed', 'ready'])
|
||||
expect(result.rows[1]).toMatchObject({
|
||||
nodeId: 2,
|
||||
nodeName: 'prod-b',
|
||||
status: 'failed',
|
||||
errorMessage: 'token service unavailable',
|
||||
})
|
||||
})
|
||||
|
||||
it('rejects duplicate names before creating nodes', async () => {
|
||||
const flow = createAgentDeployFlow({
|
||||
batchCreateNodes: async () => {
|
||||
throw new Error('should not call batchCreateNodes')
|
||||
},
|
||||
createInstallToken: async () => tokenResult(),
|
||||
})
|
||||
|
||||
await expect(flow.submitNewNodes(['prod-a', ' prod-a '], deployOptions()))
|
||||
.rejects.toThrow('批次内重复节点名')
|
||||
})
|
||||
})
|
||||
@@ -1,146 +0,0 @@
|
||||
import { useMemo } from 'react'
|
||||
import type { BatchCreateResult, InstallTokenInput, InstallTokenResult } from '../../types/nodes'
|
||||
import { batchCreateNodes, createInstallToken } from '../../services/nodes'
|
||||
import {
|
||||
buildAgentInstallCommand,
|
||||
buildEmbeddedAgentInstallCommand,
|
||||
} from './installCommands'
|
||||
|
||||
export type DeployRowStatus = 'ready' | 'failed'
|
||||
export type DeployResultStatus = 'ready' | 'partialFailed'
|
||||
|
||||
export interface AgentDeployNode {
|
||||
id: number
|
||||
name: string
|
||||
}
|
||||
|
||||
export interface AgentDeployRow {
|
||||
nodeId: number
|
||||
nodeName: string
|
||||
status: DeployRowStatus
|
||||
command: string
|
||||
expiresAt: string
|
||||
installToken?: InstallTokenResult
|
||||
embeddedCommand?: string
|
||||
errorMessage?: string
|
||||
}
|
||||
|
||||
export interface AgentDeployResult {
|
||||
status: DeployResultStatus
|
||||
rows: AgentDeployRow[]
|
||||
}
|
||||
|
||||
interface AgentDeployFlowDeps {
|
||||
batchCreateNodes: (names: string[]) => Promise<BatchCreateResult[]>
|
||||
createInstallToken: (nodeId: number, input: InstallTokenInput) => Promise<InstallTokenResult>
|
||||
}
|
||||
|
||||
const TOKEN_CONCURRENCY = 4
|
||||
|
||||
export function createAgentDeployFlow(deps: AgentDeployFlowDeps) {
|
||||
const issueTokenForNode = async (node: AgentDeployNode, input: InstallTokenInput): Promise<AgentDeployRow> => {
|
||||
try {
|
||||
const token = await deps.createInstallToken(node.id, input)
|
||||
return readyRow(node, token)
|
||||
} catch (error) {
|
||||
return {
|
||||
nodeId: node.id,
|
||||
nodeName: node.name,
|
||||
status: 'failed',
|
||||
command: '',
|
||||
expiresAt: '',
|
||||
errorMessage: resolveErrorMessage(error),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
async submitNewNodes(names: string[], input: InstallTokenInput): Promise<AgentDeployResult> {
|
||||
const cleanedNames = normalizeNodeNames(names)
|
||||
const nodes = await deps.batchCreateNodes(cleanedNames)
|
||||
const rows = await mapWithConcurrency(nodes, TOKEN_CONCURRENCY, (node) => issueTokenForNode(node, input))
|
||||
return resultFromRows(rows)
|
||||
},
|
||||
|
||||
async submitExistingNode(node: AgentDeployNode, input: InstallTokenInput): Promise<AgentDeployResult> {
|
||||
const row = await issueTokenForNode(node, input)
|
||||
return resultFromRows([row])
|
||||
},
|
||||
|
||||
async regenerateNode(node: AgentDeployNode, input: InstallTokenInput): Promise<AgentDeployRow> {
|
||||
return issueTokenForNode(node, input)
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
export function useAgentDeployFlow() {
|
||||
return useMemo(() => createAgentDeployFlow({ batchCreateNodes, createInstallToken }), [])
|
||||
}
|
||||
|
||||
function readyRow(node: AgentDeployNode, token: InstallTokenResult): AgentDeployRow {
|
||||
return {
|
||||
nodeId: node.id,
|
||||
nodeName: node.name,
|
||||
status: 'ready',
|
||||
command: buildAgentInstallCommand(token.url, token.fallbackUrl),
|
||||
expiresAt: token.expiresAt,
|
||||
installToken: token,
|
||||
embeddedCommand: token.scriptBase64
|
||||
? buildEmbeddedAgentInstallCommand(token.scriptBase64)
|
||||
: undefined,
|
||||
}
|
||||
}
|
||||
|
||||
function resultFromRows(rows: AgentDeployRow[]): AgentDeployResult {
|
||||
return {
|
||||
status: rows.some((row) => row.status === 'failed') ? 'partialFailed' : 'ready',
|
||||
rows,
|
||||
}
|
||||
}
|
||||
|
||||
function normalizeNodeNames(names: string[]) {
|
||||
const cleaned = names.map((name) => name.trim()).filter(Boolean)
|
||||
if (cleaned.length === 0) {
|
||||
throw new Error('请至少输入一个节点名称')
|
||||
}
|
||||
if (cleaned.length > 50) {
|
||||
throw new Error('单次最多创建 50 个节点')
|
||||
}
|
||||
const seen = new Set<string>()
|
||||
for (const name of cleaned) {
|
||||
if (seen.has(name)) {
|
||||
throw new Error(`批次内重复节点名:${name}`)
|
||||
}
|
||||
seen.add(name)
|
||||
}
|
||||
return cleaned
|
||||
}
|
||||
|
||||
async function mapWithConcurrency<T, R>(
|
||||
items: T[],
|
||||
concurrency: number,
|
||||
mapper: (item: T, index: number) => Promise<R>,
|
||||
): Promise<R[]> {
|
||||
const results = new Array<R>(items.length)
|
||||
let nextIndex = 0
|
||||
const workerCount = Math.min(concurrency, items.length)
|
||||
const workers = Array.from({ length: workerCount }, async () => {
|
||||
for (;;) {
|
||||
const index = nextIndex
|
||||
nextIndex += 1
|
||||
if (index >= items.length) {
|
||||
return
|
||||
}
|
||||
results[index] = await mapper(items[index], index)
|
||||
}
|
||||
})
|
||||
await Promise.all(workers)
|
||||
return results
|
||||
}
|
||||
|
||||
function resolveErrorMessage(error: unknown) {
|
||||
if (error instanceof Error && error.message) {
|
||||
return error.message
|
||||
}
|
||||
return '生成安装命令失败'
|
||||
}
|
||||
@@ -3,7 +3,7 @@ import { Typography, Button, Space, Collapse, Spin, Message, Tag } from '@arco-d
|
||||
import { IconCopy, IconRefresh } from '@arco-design/web-react/icon'
|
||||
import { fetchScriptPreview } from '../../../services/nodes'
|
||||
import type { InstallTokenResult, InstallMode } from '../../../types/nodes'
|
||||
import { buildAgentDownloadCommand, buildAgentInstallCommand, buildEmbeddedAgentInstallCommand } from '../installCommands'
|
||||
import { buildAgentDownloadCommand, buildAgentInstallCommand } from '../installCommands'
|
||||
|
||||
const { Text } = Typography
|
||||
|
||||
@@ -30,9 +30,8 @@ export function Step3CommandPreview({ nodeId, nodeName, token, mode, previewPara
|
||||
}, [token.expiresAt])
|
||||
|
||||
const expired = remaining === 0
|
||||
const command = buildAgentInstallCommand(token.url, token.fallbackUrl)
|
||||
const fallbackCommand = buildAgentDownloadCommand(token.url, token.fallbackUrl)
|
||||
const embeddedCommand = token.scriptBase64 ? buildEmbeddedAgentInstallCommand(token.scriptBase64) : null
|
||||
const command = buildAgentInstallCommand(token.url, token.fallbackUrl, token.scriptBase64)
|
||||
const fallbackCommand = buildAgentDownloadCommand(token.url, token.fallbackUrl, token.scriptBase64)
|
||||
const dockerComposeCmd = mode === 'docker' && token.composeUrl
|
||||
? `curl -fsSL ${token.composeUrl} -o docker-compose.yml && docker-compose up -d`
|
||||
: null
|
||||
@@ -108,22 +107,8 @@ export function Step3CommandPreview({ nodeId, nodeName, token, mode, previewPara
|
||||
</div>
|
||||
)}
|
||||
|
||||
{embeddedCommand && (
|
||||
<div style={{ background: 'var(--color-fill-2)', padding: '12px 14px', borderRadius: 6, marginBottom: 12 }}>
|
||||
<Text type="secondary" style={{ fontSize: 12, display: 'block', marginBottom: 4 }}>
|
||||
代理异常时使用嵌入式备用命令:
|
||||
</Text>
|
||||
<Text style={{ fontFamily: 'monospace', fontSize: 13, wordBreak: 'break-all', userSelect: 'all' }}>
|
||||
{embeddedCommand}
|
||||
</Text>
|
||||
<div style={{ marginTop: 8 }}>
|
||||
<Button size="small" icon={<IconCopy />} onClick={() => copy(embeddedCommand)}>复制</Button>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<Text type="secondary" style={{ fontSize: 12, display: 'block', marginBottom: 8 }}>
|
||||
主安装命令包含公开 install token,会在 TTL 到期或首次消费后作废;嵌入式备用命令包含完整节点 token,不依赖公开链接消费状态,请仅在目标机执行并妥善保存。
|
||||
安装命令包含节点 token,请仅在目标机执行并妥善保存;公开安装链接会在 TTL 到期或首次消费后作废。
|
||||
</Text>
|
||||
|
||||
<Collapse bordered={false} onChange={(_key, keys) => {
|
||||
|
||||
@@ -14,19 +14,6 @@ export interface NodeSummary {
|
||||
/** CSV 节点标签;任务的 NodePoolTag 命中这里任一即会被调度到本节点 */
|
||||
labels?: string
|
||||
createdAt: string
|
||||
queue?: NodeQueueSummary
|
||||
runningTasks?: number
|
||||
lastError?: string
|
||||
health?: 'healthy' | 'degraded' | 'offline'
|
||||
}
|
||||
|
||||
export interface NodeQueueSummary {
|
||||
pending: number
|
||||
dispatched: number
|
||||
depth: number
|
||||
timeouts: number
|
||||
oldestActiveAt?: string
|
||||
oldestActiveAgeSeconds?: number
|
||||
}
|
||||
|
||||
export interface DirEntry {
|
||||
|
||||
Reference in New Issue
Block a user