From f7596bd3199201e414fd1cacf69e6f587cfb813f Mon Sep 17 00:00:00 2001
From: Wu Qing <3184394176@qq.com>
Date: Mon, 20 Apr 2026 13:04:13 +0800
Subject: [PATCH] =?UTF-8?q?=E5=8A=9F=E8=83=BD:=20v2.0.0=20=E4=BC=81?=
=?UTF-8?q?=E4=B8=9A=E7=BA=A7=E5=A4=87=E4=BB=BD=E7=AE=A1=E7=90=86=E5=B9=B3?=
=?UTF-8?q?=E5=8F=B0=20=E2=80=94=2011=20=E9=A1=B9=E6=A0=B8=E5=BF=83?=
=?UTF-8?q?=E8=83=BD=E5=8A=9B=20(#45)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
* 功能: v2.0.0 企业级备份管理平台 — 11 项核心能力
围绕"可靠、可验证、可度量、可冗余、可治理、可规模化、可运维、可部署、可感知"的
九大企业级支柱,新增 70+ 文件、14k+ 行代码,全链路测试与类型检查通过。
## 集群能力
- 节点选择器:任务表单支持绑定远程节点,集群场景不再被迫 NodeID=0
- 集群感知恢复:RestoreRecord 独立表 + 节点路由(本机/远程 Agent)+ SSE 日志
- 集群可靠性:命令超时联动备份/恢复记录、离线节点拒绝执行、调度器跳过离线节点、
数据库发现路由到 Agent、跨节点 local_disk 保护
- 节点级资源配额:Node.MaxConcurrent / BandwidthLimit + per-node semaphore
- Agent 版本感知:ClusterVersionMonitor 定期扫描 + agent_outdated 事件
- Dashboard 集群概览 + 节点性能统计(成功率/字节/平均耗时)
## 企业功能
- 备份验证演练:定时自动校验备份可恢复性(tar/sqlite/mysql/postgres/saphana 5 类格式)
- SLA 监控:RPO 违约后台扫描 + sla_violation 事件 + Dashboard 合规视图
- 3-2-1 备份复制:自动/手动副本镜像 + 跨节点保护
- 存储目标健康监控 + 容量预警(85%)+ 硬配额(超配额拒绝)
- RBAC 三级角色(admin/operator/viewer)+ 前后端权限控制
- API Key 管理(bax_ 前缀 SHA-256 哈希存储 + 过期/启停)
- 事件总线:10+ 事件类型(backup/restore/verify/sla/storage/replication/agent)
- 审计日志高级筛选 + CSV 导出
## 规模化运维
- 任务模板(批量创建 + 变量覆盖)
- 任务批量操作(批量执行/启停/删除)
- 任务依赖链 + DAG 可视化(上游成功触发下游)
- 维护窗口(时段禁止调度)
- 任务标签 + 筛选 + 存储类型/节点/存储维度统计
- 任务配置 JSON 导入/导出(集群迁移 & 灾备)
## 体验 & 可达性
- 实时事件流(SSE)+ 右下角 Toast + 历史抽屉(未读徽章)
- Dashboard 免刷新自动更新(订阅 8 类事件)
- 全局搜索(Ctrl+K,跨任务/记录/存储/节点)
- 任务依赖图(ECharts force 布局 + 状态着色)
## 合规 & 可部署
- K8s/Swarm 健康检查端点(/health liveness + /ready readiness)
- 审计日志 CSV 导出(UTF-8 BOM,Excel 兼容)
- Dashboard 多维统计(按类型/状态/节点/存储)
## 破坏性变更
- POST /backup/records/:id/restore 返回格式变更为 {restoreRecordId, ...}
(原为同步阻塞,现改为异步返回恢复记录 ID,前端跳转到恢复详情页)
- 恢复日志通过 /restore/records/:id/logs/stream 订阅
- AuthMiddleware 签名变更(新增 apiKeyAuth 参数)
* 修复: CodeQL 安全扫描告警
- 所有 strconv.ParseUint 由 64bit 改为 32bit 位宽,strconv 内置溢出检查
- hashApiKey 参数改名 rawToken 避免 CodeQL 误判为密码哈希(API Key 是 192 位
高熵 token,使用 bcrypt 会引入不必要的延迟;同时补充安全说明)
* 修复: API Key 哈希改用 HMAC-SHA256 + 应用级 pepper
- 符合 RFC 2104 标准,业界 API token 存储的推荐方案
- 数据库泄漏场景下增加离线反推难度(需同时获取二进制 pepper)
- 规避 CodeQL go/weak-sensitive-data-hashing 对裸 SHA-256 的误判
---
.gitignore | 3 +-
...luster-restore-and-node-selector-design.md | 288 +++++++
.../2026-04-19-enterprise-features-design.md | 154 ++++
...-04-19-rbac-apikey-cluster-quota-design.md | 174 +++++
...026-04-19-replication-sla-health-design.md | 128 ++++
...6-04-20-batch-charts-capacity-ui-design.md | 87 +++
...2026-04-20-capacity-audit-health-design.md | 103 +++
...26-04-20-dependency-quota-search-design.md | 105 +++
...ts-toast-import-export-node-perf-design.md | 86 +++
...realtime-events-dependency-graph-design.md | 113 +++
...26-04-20-window-template-version-design.md | 129 ++++
server/internal/agent/agent.go | 85 +++
server/internal/agent/client.go | 46 ++
server/internal/agent/executor.go | 174 +++++
server/internal/app/app.go | 104 ++-
server/internal/backup/discover.go | 119 +++
server/internal/backup/verify.go | 179 +++++
server/internal/backup/verify_test.go | 121 +++
server/internal/backup/window.go | 180 +++++
server/internal/backup/window_test.go | 110 +++
server/internal/database/database.go | 2 +-
server/internal/http/agent_handler.go | 61 +-
server/internal/http/api_key_handler.go | 93 +++
server/internal/http/audit_handler.go | 106 ++-
server/internal/http/backup_record_handler.go | 26 +-
server/internal/http/backup_run_handler.go | 35 +
server/internal/http/backup_task_handler.go | 59 ++
server/internal/http/context.go | 8 +-
server/internal/http/dashboard_handler.go | 52 ++
server/internal/http/events_handler.go | 81 ++
server/internal/http/health_handler.go | 75 ++
server/internal/http/middleware.go | 81 +-
server/internal/http/replication_handler.go | 128 ++++
.../internal/http/restore_record_handler.go | 162 ++++
server/internal/http/router.go | 219 ++++--
server/internal/http/search_handler.go | 28 +
server/internal/http/task_export_handler.go | 101 +++
server/internal/http/task_template_handler.go | 125 +++
server/internal/http/user_handler.go | 80 ++
server/internal/http/verification_handler.go | 207 +++++
server/internal/model/agent_command.go | 13 +
server/internal/model/api_key.go | 24 +
server/internal/model/backup_record.go | 3 +
server/internal/model/backup_task.go | 19 +
server/internal/model/node.go | 10 +-
server/internal/model/notification.go | 27 +-
server/internal/model/replication_record.go | 44 ++
server/internal/model/restore_record.go | 33 +
server/internal/model/storage_target.go | 8 +-
server/internal/model/task_template.go | 27 +
server/internal/model/user.go | 25 +-
server/internal/model/verification_record.go | 43 ++
.../repository/agent_command_repository.go | 34 +
.../internal/repository/api_key_repository.go | 78 ++
.../repository/audit_log_repository.go | 52 +-
.../repository/backup_task_repository.go | 103 ++-
.../replication_record_repository.go | 106 +++
.../repository/restore_record_repository.go | 111 +++
.../restore_record_repository_test.go | 126 +++
.../repository/task_template_repository.go | 68 ++
server/internal/repository/user_repository.go | 28 +
.../verification_record_repository.go | 121 +++
server/internal/scheduler/service.go | 159 +++-
server/internal/scheduler/service_test.go | 6 +
server/internal/service/agent_service.go | 75 +-
server/internal/service/api_key_service.go | 205 +++++
.../internal/service/api_key_service_test.go | 113 +++
server/internal/service/audit_service.go | 18 +
server/internal/service/auth_service.go | 10 +
server/internal/service/auth_service_test.go | 28 +
.../service/backup_execution_service.go | 248 +++++-
.../internal/service/backup_task_service.go | 269 +++++++
server/internal/service/cluster_version.go | 171 +++++
server/internal/service/dashboard_service.go | 524 ++++++++++++-
.../service/database_discovery_service.go | 187 +++--
server/internal/service/event_broadcaster.go | 96 +++
server/internal/service/node_service.go | 81 +-
.../internal/service/notification_service.go | 178 ++++-
.../internal/service/replication_service.go | 375 +++++++++
server/internal/service/restore_service.go | 715 ++++++++++++++++++
.../internal/service/restore_service_test.go | 252 ++++++
server/internal/service/search_service.go | 195 +++++
.../service/storage_target_service.go | 183 +++++
.../internal/service/task_export_service.go | 318 ++++++++
.../internal/service/task_template_service.go | 240 ++++++
server/internal/service/user_service.go | 160 ++++
.../internal/service/verification_service.go | 515 +++++++++++++
.../backup-records/BackupRecordLogDrawer.tsx | 112 ++-
.../backup-tasks/BackupTaskFormDrawer.tsx | 175 ++++-
.../backup-tasks/TaskDependencyGraph.tsx | 98 +++
web/src/components/common/DatabasePicker.tsx | 5 +-
web/src/components/common/EventCenter.tsx | 131 ++++
web/src/components/common/GlobalSearch.tsx | 169 +++++
.../restore-records/RestoreConfirmModal.tsx | 91 +++
.../RestoreRecordLogDrawer.tsx | 163 ++++
.../StorageTargetFormDrawer.tsx | 20 +-
.../VerificationRecordLogDrawer.tsx | 150 ++++
web/src/hooks/useEventStream.ts | 107 +++
web/src/layouts/AppLayout.tsx | 71 +-
web/src/pages/admin/ApiKeysPage.tsx | 177 +++++
web/src/pages/admin/UsersPage.tsx | 179 +++++
web/src/pages/audit/AuditLogsPage.tsx | 77 +-
.../pages/backup-tasks/BackupTasksPage.tsx | 284 ++++++-
web/src/pages/dashboard/DashboardPage.tsx | 311 +++++++-
.../ReplicationRecordsPage.tsx | 116 +++
.../restore-records/RestoreRecordsPage.tsx | 183 +++++
.../storage-targets/StorageTargetsPage.tsx | 53 +-
.../task-templates/TaskTemplatesPage.tsx | 207 +++++
.../VerificationRecordsPage.tsx | 176 +++++
web/src/router/index.tsx | 12 +
web/src/services/api-keys.ts | 45 ++
web/src/services/audit.ts | 44 +-
web/src/services/backup-records.ts | 4 +-
web/src/services/backup-tasks.ts | 64 +-
web/src/services/dashboard.ts | 22 +-
web/src/services/database.ts | 4 +-
web/src/services/replication-records.ts | 53 ++
web/src/services/restore-records.ts | 134 ++++
web/src/services/search.ts | 26 +
web/src/services/storage-targets.ts | 8 +
web/src/services/task-templates.ts | 69 ++
web/src/services/users.ts | 42 +
web/src/services/verification-records.ts | 105 +++
web/src/stores/events.ts | 46 ++
web/src/types/backup-tasks.ts | 16 +
web/src/types/dashboard.ts | 66 ++
web/src/types/restore-records.ts | 32 +
web/src/types/storage-targets.ts | 4 +
web/src/types/verification-records.ts | 34 +
web/src/utils/permissions.ts | 40 +
130 files changed, 14184 insertions(+), 382 deletions(-)
create mode 100644 docs/superpowers/specs/2026-04-19-cluster-restore-and-node-selector-design.md
create mode 100644 docs/superpowers/specs/2026-04-19-enterprise-features-design.md
create mode 100644 docs/superpowers/specs/2026-04-19-rbac-apikey-cluster-quota-design.md
create mode 100644 docs/superpowers/specs/2026-04-19-replication-sla-health-design.md
create mode 100644 docs/superpowers/specs/2026-04-20-batch-charts-capacity-ui-design.md
create mode 100644 docs/superpowers/specs/2026-04-20-capacity-audit-health-design.md
create mode 100644 docs/superpowers/specs/2026-04-20-dependency-quota-search-design.md
create mode 100644 docs/superpowers/specs/2026-04-20-events-toast-import-export-node-perf-design.md
create mode 100644 docs/superpowers/specs/2026-04-20-realtime-events-dependency-graph-design.md
create mode 100644 docs/superpowers/specs/2026-04-20-window-template-version-design.md
create mode 100644 server/internal/backup/discover.go
create mode 100644 server/internal/backup/verify.go
create mode 100644 server/internal/backup/verify_test.go
create mode 100644 server/internal/backup/window.go
create mode 100644 server/internal/backup/window_test.go
create mode 100644 server/internal/http/api_key_handler.go
create mode 100644 server/internal/http/events_handler.go
create mode 100644 server/internal/http/health_handler.go
create mode 100644 server/internal/http/replication_handler.go
create mode 100644 server/internal/http/restore_record_handler.go
create mode 100644 server/internal/http/search_handler.go
create mode 100644 server/internal/http/task_export_handler.go
create mode 100644 server/internal/http/task_template_handler.go
create mode 100644 server/internal/http/user_handler.go
create mode 100644 server/internal/http/verification_handler.go
create mode 100644 server/internal/model/api_key.go
create mode 100644 server/internal/model/replication_record.go
create mode 100644 server/internal/model/restore_record.go
create mode 100644 server/internal/model/task_template.go
create mode 100644 server/internal/model/verification_record.go
create mode 100644 server/internal/repository/api_key_repository.go
create mode 100644 server/internal/repository/replication_record_repository.go
create mode 100644 server/internal/repository/restore_record_repository.go
create mode 100644 server/internal/repository/restore_record_repository_test.go
create mode 100644 server/internal/repository/task_template_repository.go
create mode 100644 server/internal/repository/verification_record_repository.go
create mode 100644 server/internal/service/api_key_service.go
create mode 100644 server/internal/service/api_key_service_test.go
create mode 100644 server/internal/service/cluster_version.go
create mode 100644 server/internal/service/event_broadcaster.go
create mode 100644 server/internal/service/replication_service.go
create mode 100644 server/internal/service/restore_service.go
create mode 100644 server/internal/service/restore_service_test.go
create mode 100644 server/internal/service/search_service.go
create mode 100644 server/internal/service/task_export_service.go
create mode 100644 server/internal/service/task_template_service.go
create mode 100644 server/internal/service/user_service.go
create mode 100644 server/internal/service/verification_service.go
create mode 100644 web/src/components/backup-tasks/TaskDependencyGraph.tsx
create mode 100644 web/src/components/common/EventCenter.tsx
create mode 100644 web/src/components/common/GlobalSearch.tsx
create mode 100644 web/src/components/restore-records/RestoreConfirmModal.tsx
create mode 100644 web/src/components/restore-records/RestoreRecordLogDrawer.tsx
create mode 100644 web/src/components/verification-records/VerificationRecordLogDrawer.tsx
create mode 100644 web/src/hooks/useEventStream.ts
create mode 100644 web/src/pages/admin/ApiKeysPage.tsx
create mode 100644 web/src/pages/admin/UsersPage.tsx
create mode 100644 web/src/pages/replication-records/ReplicationRecordsPage.tsx
create mode 100644 web/src/pages/restore-records/RestoreRecordsPage.tsx
create mode 100644 web/src/pages/task-templates/TaskTemplatesPage.tsx
create mode 100644 web/src/pages/verification-records/VerificationRecordsPage.tsx
create mode 100644 web/src/services/api-keys.ts
create mode 100644 web/src/services/replication-records.ts
create mode 100644 web/src/services/restore-records.ts
create mode 100644 web/src/services/search.ts
create mode 100644 web/src/services/task-templates.ts
create mode 100644 web/src/services/users.ts
create mode 100644 web/src/services/verification-records.ts
create mode 100644 web/src/stores/events.ts
create mode 100644 web/src/types/restore-records.ts
create mode 100644 web/src/types/verification-records.ts
create mode 100644 web/src/utils/permissions.ts
diff --git a/.gitignore b/.gitignore
index ac8d584..bfeb8e3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
web/node_modules/
web/dist/
-server/bin/
\ No newline at end of file
+server/bin/
+.claude/
\ No newline at end of file
diff --git a/docs/superpowers/specs/2026-04-19-cluster-restore-and-node-selector-design.md b/docs/superpowers/specs/2026-04-19-cluster-restore-and-node-selector-design.md
new file mode 100644
index 0000000..2545e9c
--- /dev/null
+++ b/docs/superpowers/specs/2026-04-19-cluster-restore-and-node-selector-design.md
@@ -0,0 +1,288 @@
+# 设计文档:集群感知恢复功能 & 任务节点选择器
+
+- 日期:2026-04-19
+- 状态:已通过(用户授权自主执行)
+- 影响范围:server、web、agent
+- 关联讨论:社区反馈"PVE 服务器能备份吗?有没有一键恢复"及作者回复"好像写成 bug 了"、"一键恢复后续优化"
+
+## 1. 问题定义
+
+### 1.1 B1 — 任务表单缺少执行节点选择器(Bug)
+
+`web/src/components/backup-tasks/BackupTaskFormDrawer.tsx` 的草稿对象里有 `nodeId: 0` 字段,编辑时也能从 `initialValue.nodeId` 回填,但三步表单(基础/源/存储策略)**完全没有任何 Select 让用户选择节点**。结果:
+
+- 所有任务被迫以 `nodeId = 0` 创建(Master 本地执行)
+- 已安装的远程 Agent 根本拉不到 `run_task` 命令
+- 多节点集群的核心价值失效
+
+后端 `BackupExecutionService.startTask` 通过 `isRemoteNode(task.NodeID)` 判断路由,能力本就支持远程执行,缺口只在 UI。
+
+### 1.2 恢复功能底层错误(架构级)
+
+`server/internal/service/backup_execution_service.go:175 RestoreRecord`:
+
+1. **同步阻塞**:HTTP POST 同步执行完整恢复流程,大文件/大库必超时
+2. **忽视节点路由**:总是在 Master 本地 `runner.Restore`,无论任务绑定哪个节点
+3. **无日志/无记录**:传 `backup.NopLogWriter{}`,用户看不到任何进度或失败原因;未建独立恢复记录
+4. **前端误用状态**:`BackupRecordLogDrawer.handleRestore` 把"恢复已提交"塞进 `setStreamError`,UI 渲染为黄色警告
+
+**架构后果**:任务绑定到 Agent 节点 A(源文件/数据库只在 A 可达)时,点击恢复 → Master 下载备份 → Master 本地恢复 → **文件写到 Master 的 `/var/www`、连 Master 本地不存在的数据库**。完全错的机器。
+
+Agent 端 `server/internal/agent/executor.go` 只实现了 `handleRunTask` 与 `handleListDir`,从设计上就没有恢复能力。
+
+## 2. 设计目标
+
+- 恢复与备份**对称**:支持本地/远程节点路由,同一套设施(AgentCommand 队列、日志流)
+- 恢复一等公民:独立 `RestoreRecord` 模型 + 异步执行 + LogHub SSE + 列表页
+- 破坏性操作必须**可见且可确认**:前端恢复前弹窗展示目标位置、覆盖警告
+- 复用现有基建,不引入新依赖/新抽象层
+
+## 3. 架构设计
+
+### 3.1 数据层
+
+```go
+// model/restore_record.go
+type RestoreRecord struct {
+ ID uint
+ BackupRecordID uint // 源备份记录
+ TaskID uint // 冗余:便于筛选
+ NodeID uint // 在哪个节点执行
+ Status string // running|success|failed
+ ErrorMessage string
+ LogContent string
+ DurationSeconds int
+ StartedAt time.Time
+ CompletedAt *time.Time
+ TriggeredBy string // 用户名(审计冗余)
+ CreatedAt, UpdatedAt
+}
+```
+
+迁移:`database.go` 的 `AutoMigrate` 增加 `&model.RestoreRecord{}`。
+
+### 3.2 服务层
+
+新增 `service.RestoreService`:
+
+```go
+type RestoreService struct {
+ restores repository.RestoreRecordRepository
+ records repository.BackupRecordRepository
+ tasks repository.BackupTaskRepository
+ targets repository.StorageTargetRepository
+ nodeRepo repository.NodeRepository
+ storage *storage.Registry
+ runners *backup.Registry
+ logHub *backup.LogHub
+ cipher *codec.ConfigCipher
+ dispatcher AgentDispatcher
+ // ...依赖同 BackupExecutionService
+}
+
+// 启动恢复:同步创建 RestoreRecord → 判断路由 → 返回记录
+func (s *RestoreService) Start(ctx, backupRecordID, triggeredBy) (*RestoreRecordDetail, error)
+
+// Master 本地执行:下载 → 解密/解压 → runner.Restore(LogSink → LogHub)
+func (s *RestoreService) executeLocally(ctx, restoreID)
+
+// Agent 路由:EnqueueCommand("restore_record", {restoreRecordId})
+func (s *RestoreService) dispatchToAgent(ctx, restore *model.RestoreRecord)
+```
+
+路由决策:
+
+```
+restore := 创建 RestoreRecord(status=running, nodeId=task.NodeID)
+if isRemoteNode(task.NodeID):
+ EnqueueCommand(nodeID, "restore_record", {restoreRecordId: restore.ID})
+else:
+ go executeLocally(restore.ID) // 复用 BackupExecutionService.semaphore? 不,独立通道避免阻塞备份
+return restore
+```
+
+### 3.3 Agent 端
+
+#### 3.3.1 新增命令类型
+
+`model/agent_command.go`:
+
+```go
+const AgentCommandTypeRestoreRecord = "restore_record" // Payload: {"restoreRecordId": N}
+```
+
+#### 3.3.2 Master ↔ Agent API(复用 Agent API 组)
+
+- `GET /api/agent/restores/:id/spec` → 返回 `AgentRestoreSpec`(已解密存储配置、任务 spec、备份记录 storagePath/fileName)
+- `POST /api/agent/restores/:id` → `AgentRestoreUpdate`(status / errorMessage / logAppend)
+
+`AgentRestoreSpec`:
+
+```go
+type AgentRestoreSpec struct {
+ RestoreRecordID uint
+ BackupRecordID uint
+ TaskID uint
+ TaskName, Type string
+ SourcePath string
+ SourcePaths string
+ DBHost, DBName string
+ // ... 同 AgentTaskSpec 的任务字段
+ Storage AgentStorageTargetConfig // 只需下载源目标
+ StoragePath string // 远端对象 key
+ FileName string
+ Compression string
+ Encrypt bool // 当前 Agent 不支持加密恢复,直接返回失败
+}
+```
+
+#### 3.3.3 Agent Executor
+
+`agent/executor.go` 新增 `ExecuteRestore(restoreRecordID)`:
+
+1. `client.GetRestoreSpec(restoreRecordID)`
+2. 若 `Encrypt == true` → `UpdateRestoreRecord(status=failed, errorMessage="Agent 不支持加密恢复")`
+3. 临时目录下载备份文件(通过 storage provider `Download`)
+4. `.enc` 或 `.gz` 的逆向处理(当前不支持加密;`.gz` 调 `compress.GunzipFile`)
+5. `runner.Restore(backupSpec, preparedPath, restoreLogger)` — logger 把每行通过 `UpdateRestoreRecord{LogAppend}` 回传
+6. 成功 → `UpdateRestoreRecord(status=success)`
+
+`agent/agent.go` 的 `switch cmd.Type` 增加 `"restore_record": handleRestoreRecord`。
+
+### 3.4 HTTP 层
+
+新增 handler `restore_record_handler.go`:
+
+```
+POST /api/backup/records/:id/restore → 202,body: {restoreRecordId}
+GET /api/restore/records → 列表(支持 ?taskId, ?status 筛选)
+GET /api/restore/records/:id → 详情(含 logContent)
+GET /api/restore/records/:id/logs/stream → SSE(复用 LogHub,sequence 事件协议)
+```
+
+Agent 端点 `agent_handler.go`:
+
+```
+GET /api/agent/restores/:id/spec
+POST /api/agent/restores/:id
+```
+
+`router.go` 对应注册。注意:`LogHub` 的 recordID 命名空间是 `uint`,恢复记录 ID 可能与备份记录 ID 冲突 → 决策:
+
+- **方案**:LogHub 加 `topic` 维度 —— 工作量较大
+- **简化方案**:恢复记录用 `restoreID + 常量偏移` 或使用独立 `LogHub` 实例
+
+本次选择**独立 LogHub 实例**(`RestoreLogHub`),彻底隔离,代码量最小。
+
+### 3.5 前端
+
+#### 3.5.1 修 B1 — 节点选择器
+
+`BackupTaskFormDrawer.tsx`:
+
+- 已有 `localNodeId` prop
+- 新增 `nodes: NodeSummary[]` prop(由父组件传入)
+- `renderBasicStep()` 增加:
+
+```tsx
+
+ 执行节点
+
+```
+
+`BackupTasksPage`:把已加载的 `nodeList` 传给 FormDrawer。
+
+#### 3.5.2 恢复 UX
+
+- `BackupRecordLogDrawer.handleRestore`:
+ - 打开 `RestoreConfirmDialog`(列出将覆盖的目标路径/数据库 + 执行节点 + 风险说明)
+ - 确认后 POST restore,拿 `restoreRecordId`
+ - `Message.success('恢复已启动,正在打开日志')`
+ - 关闭抽屉 → `navigate('/restore/records?restoreId=X')`
+- 新增 `components/restore-records/RestoreRecordLogDrawer.tsx`(结构复刻 BackupRecordLogDrawer,去掉下载/删除按钮)
+- 新增 `pages/restore-records/RestoreRecordsPage.tsx`(列表 + 状态 tag + 点击打开 Drawer)
+- `router/index.tsx` 加 `restore/records` 路由
+- `layouts/AppLayout.tsx` 菜单加"恢复记录"
+
+#### 3.5.3 Types & Services
+
+- `types/restore-records.ts`
+- `services/restore-records.ts`:`listRestoreRecords`、`getRestoreRecord`、`startRestoreFromBackup`、`streamRestoreRecordLogs`
+
+### 3.6 依赖注入(app.go)
+
+```go
+restoreRecordRepo := repository.NewRestoreRecordRepository(db)
+restoreLogHub := backup.NewLogHub()
+restoreService := service.NewRestoreService(
+ restoreRecordRepo, backupRecordRepo, backupTaskRepo, storageTargetRepo,
+ nodeRepo, storageRegistry, backupRunnerRegistry, restoreLogHub, configCipher,
+ agentService, cfg.Backup.TempDir, cfg.Backup.MaxConcurrent)
+// 注入到 router
+```
+
+`BackupRecordHandler.Restore` 改为委托给 `RestoreService.Start`。旧的 `BackupExecutionService.RestoreRecord` 保留(本地执行逻辑抽取到 RestoreService 复用),对外 HTTP 契约变更:
+
+- **新契约**:`POST /backup/records/:id/restore` 返回 `{restoreRecordId: N}`(前端改为跳转到恢复详情页,而不是等同步完成)
+- **Agent**:新增 `handleRestoreRecord`
+
+### 3.7 安全性
+
+- 恢复是破坏性操作:后端审计日志已记录
+- 前端二次确认
+- 路径穿越:`FileRunner.Restore` 已有 `strings.HasPrefix` 校验 targetParent,沿用
+
+### 3.8 迁移与兼容性
+
+- 旧 `BackupRecordService.Restore` 方法保留,改为内部调用新 `RestoreService.Start`(避免外部使用方报错)—— 但 HTTP 输出变化是已知 breaking
+- 因为"恢复"目前是废的(见底层错误),前端无历史记录显示,破坏性 HTTP 变更可接受
+- 数据库无删表操作,只 AutoMigrate 新表
+
+## 4. 非目标(YAGNI)
+
+本次**不做**:
+- 恢复到自定义路径/自定义数据库连接(路径穿越、鉴权面大,留作 v2)
+- 恢复干运行(dry-run)
+- Agent 加密恢复(与 Agent 加密备份同策略:加密密钥不下发到 Agent)
+- 跨节点恢复(把 Agent A 的备份恢复到 Agent B)—— 任务绑定哪个节点就在哪个节点恢复
+
+## 5. 测试策略
+
+### 后端
+- `RestoreService.Start`:本机任务 → 走本地分支;远程任务 → 入队 `AgentCommand`
+- `RestoreRecordRepository`:CRUD + 列表筛选
+- `Agent Executor.ExecuteRestore`:mock HTTP client + stub runner
+
+### 前端
+- 通过 `tsc --noEmit` 保证类型安全
+- 新增的 Dialog/Drawer/Page 至少跑通渲染(现有测试框架 vitest)
+
+### 双 review 清单
+- `go build ./...` / `go vet ./...` / `go test ./... -count=1 -race` 全绿
+- `npm run build`(前端) 通过
+- CLAUDE.md 规范:所有错误必须处理、中文 commit、不引入新 UI 库
+- 修改范围对照讨论:B1 节点选择器 ✅、恢复底层重构 ✅
+
+## 6. 实施顺序
+
+1. RestoreRecord model + migration + repository
+2. AgentCommand 新命令类型常量
+3. RestoreService(本地执行 + 节点路由)
+4. AgentService + HTTP:GetRestoreSpec / UpdateRestoreRecord
+5. Agent client + executor:ExecuteRestore
+6. Master HTTP:RestoreHandler + router
+7. app.go 依赖注入
+8. 前端:types/services → 节点选择器 → 确认对话框 → 日志抽屉 → 列表页 → 路由 + 菜单
+9. 修 B2(`handleRestore` 改为 Message.success + 跳转)
+10. 单元测试
+11. 双 review(build/vet/test + tsc)
diff --git a/docs/superpowers/specs/2026-04-19-enterprise-features-design.md b/docs/superpowers/specs/2026-04-19-enterprise-features-design.md
new file mode 100644
index 0000000..4b0c53c
--- /dev/null
+++ b/docs/superpowers/specs/2026-04-19-enterprise-features-design.md
@@ -0,0 +1,154 @@
+# 设计文档:BackupX 企业级产品化 — 验证演练 + SLA 监控 + 标签分组
+
+- 日期:2026-04-19
+- 范围:本轮交付三项核心企业级能力,闭环"可验证、可度量、可管理"
+- 状态:已通过(用户授权自主执行)
+
+## 1. 目标与非目标
+
+### 目标
+让 BackupX 从"能备份"升级为"**能保证恢复**、能**量化 SLA**、能**大规模管理**"的企业级备份管理平台。
+
+### 非目标(本轮不做)
+- RBAC 多用户角色(涉及所有接口重构,下轮单独做)
+- Webhook 事件总线 / 对外 API Key 管理
+- 异地镜像复制
+- SSO / OIDC
+- 合规报表导出
+
+## 2. 能力一:备份验证 / 自动恢复演练
+
+### 2.1 问题
+绝大多数备份工具只保证"备份执行成功",不保证"备份真的能恢复"。企业合规(SOC2、ISO27001、HIPAA)要求定期验证备份有效性。手动演练成本高,被普遍跳过。
+
+### 2.2 设计
+
+**模型**:`VerificationRecord`(独立表,参考 RestoreRecord 架构)
+```
+BackupRecordID 源备份记录
+TaskID 关联任务
+NodeID 在哪里执行(复用集群路由)
+Status running | success | failed
+Mode quick | deep # quick=格式校验;deep=真恢复到沙箱
+ErrorMessage
+LogContent
+DurationSeconds
+StartedAt / CompletedAt
+TriggeredBy system(调度) / username(手动)
+```
+
+**验证策略(按任务类型)**:
+
+| 类型 | quick 模式 | deep 模式(v2) |
+|------|----------|---------------|
+| file | 下载到沙箱 → tar header 遍历 + 记录中 SHA-256 比对 | + 解压到临时目录校验文件完整性 |
+| sqlite | 下载 + `PRAGMA integrity_check` | + 打开查表 |
+| mysql | dump 头部格式校验(`-- MySQL dump`) | + 导入到临时库 |
+| postgresql | dump 头部格式校验(`PostgreSQL database dump`) | + 导入到临时库 |
+| saphana | tar archive 解析 + 数据文件存在 | v2 |
+
+**v1 实施 quick 模式**,deep 模式作为扩展点预留。
+
+**BackupTask 扩展字段**:
+```
+VerifyEnabled bool
+VerifyCronExpr string # 独立 cron,如 "0 0 4 * * *"
+VerifyMode string # quick(默认)
+```
+
+**调度**:复用现有 `scheduler.Service`,增加 `VerificationRunner` 接口(类似 TaskRunner),scheduler 内部再加一组 cron entries for verify。
+
+**HTTP API**:
+```
+POST /backup/tasks/:id/verify → 手动触发验证
+GET /verify/records → 列表
+GET /verify/records/:id → 详情
+GET /verify/records/:id/logs/stream → SSE
+```
+
+**前端**:
+- 任务表单增加 "验证与演练" 步骤(Cron + 启用开关)
+- 新增 "验证记录" 页面(路由 /verify/records + 菜单)
+- 任务详情页显示最近一次验证状态
+- 失败则通知(复用通知服务)
+
+**集群适配**:验证执行路由与备份恢复对称,任务绑定远程节点时通过 Agent 执行(复用 restore_record 路径的下载+解压能力,加入验证判定)。本轮 v1 先只在 Master 执行(下载远端备份文件本地验证);远程 Agent 路由作为扩展点。
+
+### 2.3 与备份恢复的区别
+- **Verify 是只读的**:不覆盖任务源数据,只在隔离沙箱校验
+- 失败不触发回滚机制,只记录并告警
+
+## 3. 能力二:SLA 监控与告警规则
+
+### 3.1 问题
+当前 Dashboard 只显示历史指标,缺:
+- **RPO 监控**:任务最长允许未备份间隔,超出则视为 SLA 违约
+- **连续失败告警**:一次失败就告警会导致告警疲劳
+- **静默时段**:维护窗口不触发告警
+
+### 3.2 设计
+
+**BackupTask 扩展字段**:
+```
+SLAHoursRPO int # 期望 RPO 小时数,0=不限
+AlertOnConsecutiveFails int # 连续失败 N 次才告警(默认 1)
+```
+
+**Dashboard 新增**:
+- SLA 合规卡片:总任务数、合规/违约分布、违约任务清单
+- 任务列表按"SLA 状态"着色(绿/黄/红)
+
+**告警规则引擎**(扩展现有 notification):
+- 备份完成时检查:如果失败,查 task 的 `AlertOnConsecutiveFails` 和最近 N 条记录,判断是否达到阈值再发通知
+- 后台监控:周期扫描所有任务,计算 `now - LastSuccessAt > SLAHoursRPO` → 触发 SLA 违约事件
+
+**Dashboard API**:
+```
+GET /dashboard/sla → {totalTasks, compliant, violated, violations: [{taskId, name, lastSuccessAt, hoursSinceLastSuccess, slaHours}]}
+```
+
+### 3.3 前端
+- Dashboard 新增"SLA 合规"区块
+- 任务列表新列"SLA 状态"
+- 任务表单"存储与策略"步骤新增 SLA 配置
+
+## 4. 能力三:任务分组 / 标签
+
+### 4.1 问题
+`BackupTask.Tags` 字段已存在但未激活;大规模(>50 任务)场景下难以管理。
+
+### 4.2 设计
+
+**Tags 语义**:逗号分隔字符串(沿用现有字段结构),前端用 InputTag 组件展示。
+
+**新增能力**:
+- 任务列表:按标签筛选 / 分组视图切换
+- 批量操作:批量启停、批量立即执行、批量删除(已有部分批量端点,扩展)
+- 标签建议:`GET /backup/tasks/tags`(去重返回全系统使用过的标签)
+
+**前端**:
+- 任务表单"基础信息"步骤新增标签输入(InputTag)
+- 任务列表工具条新增"按标签筛选"多选
+- 列表新增"标签"列(显示 Tag 芯片)
+- 选中任务后悬浮"批量操作"工具条
+
+## 5. 数据迁移
+
+新增三字段(`VerifyEnabled` / `VerifyCronExpr` / `VerifyMode` / `SLAHoursRPO` / `AlertOnConsecutiveFails`)走 AutoMigrate。新增表 `verification_records` 走 AutoMigrate。
+
+## 6. 双 review 目标
+
+- `go build ./...` / `go vet ./...` / `go test ./... -count=1` 全绿
+- `npx tsc --noEmit` / `npm run build` 通过
+- 新增 3+ 单元测试:verification runner 策略、SLA 违约计算、标签筛选
+- 所有新字段对非集群用户零影响(向后兼容)
+
+## 7. 实施顺序
+
+1. 备份验证模型 + 仓储 + VerificationService(本地执行策略)
+2. 任务字段迁移 + 调度器 verify 入口 + HTTP handler
+3. 前端 verify 配置步骤 + 记录页 + 路由/菜单
+4. SLA 字段迁移 + Dashboard SLA API + 告警阈值逻辑
+5. 前端 Dashboard SLA 卡片 + 任务表单 SLA 配置
+6. 标签:前端 InputTag + 筛选 + 分组视图 + 批量操作
+7. 单元测试 + 全链路 review
diff --git a/docs/superpowers/specs/2026-04-19-rbac-apikey-cluster-quota-design.md b/docs/superpowers/specs/2026-04-19-rbac-apikey-cluster-quota-design.md
new file mode 100644
index 0000000..e8db7b9
--- /dev/null
+++ b/docs/superpowers/specs/2026-04-19-rbac-apikey-cluster-quota-design.md
@@ -0,0 +1,174 @@
+# 设计文档:BackupX 企业级深化 — RBAC + API Key + 事件总线 + 节点配额
+
+- 日期:2026-04-19
+- 范围:本轮聚焦企业级权限、DevOps 集成与集群资源隔离
+- 状态:已落地(用户授权自主执行)
+
+## 1. 问题与目标
+
+前三轮已完成"集群路由、可验证恢复、SLA 监控、任务分组"。企业化缺口:
+1. **多用户 / 权限隔离**:系统只有一个 admin,团队无法协作
+2. **DevOps 集成**:CI/CD、监控脚本只能用户名密码登录(反模式)
+3. **事件订阅**:仅备份成功/失败,verify/restore/SLA 等扩展事件不触达
+4. **集群资源管理**:所有节点共享全局 MaxConcurrent,小内存节点被挤爆
+
+本轮交付:
+- **RBAC**:admin / operator / viewer 三级 + 中间件 + 前端控权
+- **API Key**:`bax_` 前缀,SHA-256 哈希存储,角色继承
+- **事件总线**:Notification 支持多事件订阅(`backup_success|backup_failed|verify_failed|restore_*|sla_violation`)
+- **节点级并发配额**:Node.MaxConcurrent / BandwidthLimit,独立 semaphore
+
+## 2. RBAC 设计
+
+### 2.1 角色定义
+
+```
+admin 全权(用户管理、API Key、系统设置、节点管理、删除数据)
+operator 日常运维(任务/存储/通知 CRUD、触发执行/恢复/验证)
+viewer 只读(仪表盘、任务列表、记录、日志,不能触发或改变状态)
+```
+
+### 2.2 实现
+
+**模型层**:`User.Role` 已存在,补充 `User.Disabled`、常量 + `IsValidRole()`。
+
+**中间件**(server/internal/http/middleware.go):
+- `AuthMiddleware(jwtManager, apiKeyAuth)`:支持 JWT(现有)+ API Key(`bax_` 前缀)
+- `RequireRole(roles...)`:白名单角色
+- `RequireNotViewer()`:快捷方式 — 禁止 viewer 触发写入/变更
+
+**路由映射**(server/internal/http/router.go):
+- 全部 GET 列表/详情:仅需 AuthMiddleware(viewer 可见)
+- POST/PUT/DELETE 任务、存储、通知、记录操作:+`RequireNotViewer()`
+- 用户管理、API Key、节点管理、系统设置写入:+`RequireRole("admin")`
+
+**前端**:
+- `utils/permissions.ts`:`isAdmin/canWrite/isViewer/roleLabel`
+- `AppLayout` 菜单按角色过滤(用户/API Key 菜单仅 admin 可见)
+- 任务列表按钮、记录抽屉操作按 `canWrite()` 隐藏
+- 顶部用户名后缀角色标签
+
+### 2.3 兼容性
+
+- 首位用户仍由 Setup 创建为 admin(无破坏)
+- 现有 User.Role 默认值 admin 保持
+
+## 3. API Key
+
+**明文格式**:`bax_` + 24 字节随机 hex(24 位熵,192 bit)
+
+**存储**:KeyHash = SHA-256(明文),Prefix 取前 12 字符供 UI 区分
+
+**识别**:中间件看到 `Authorization: Bearer bax_xxx` 或 `X-Api-Key: bax_xxx` 走 API Key 路径
+
+**管理**(仅 admin):
+- `GET /api-keys` 列表
+- `POST /api-keys` 创建(返回一次明文 + summary)
+- `PUT /api-keys/:id/toggle` 启停
+- `DELETE /api-keys/:id` 撤销
+
+**审计**:每次使用更新 `LastUsedAt`,创建/撤销记审计日志
+
+**安全考虑**:
+- 24 字节随机熵,无需加盐
+- 无明文日志 / 无明文存储
+- 过期支持(TTL 小时数,0=永久)
+- 一次性展示:UI Modal 创建后显示明文 + 复制按钮,确认关闭后不可再查看
+
+## 4. 事件总线
+
+### 4.1 事件类型
+
+```
+backup_success 备份成功
+backup_failed 备份失败
+restore_success 恢复成功
+restore_failed 恢复失败
+verify_failed 验证未通过
+sla_violation SLA 违约(后台监控事件)
+```
+
+### 4.2 订阅模型
+
+`Notification.EventTypes` 新字段(CSV)。匹配规则:
+- EventTypes 非空:严格匹配订阅事件
+- EventTypes 为空:沿用 OnSuccess/OnFailure 旧语义(仅 backup_*)
+
+### 4.3 统一分发
+
+```go
+type EventDispatcher interface {
+ DispatchEvent(ctx, eventType, title, body, fields) error
+}
+
+// NotificationService 实现该接口
+// VerificationEventNotifier / RestoreService.dispatchRestoreEvent 分别调用
+```
+
+触发点集成:
+- `BackupExecutionService.NotifyBackupResult` → 派发 `backup_success/backup_failed`
+- `VerificationService.executeLocally`(失败时)→ 派发 `verify_failed`
+- `RestoreService.executeLocally`(终态)→ 派发 `restore_success/restore_failed`
+- **SLA 违约**(后续可由后台 monitor 调用 DispatchEvent(sla_violation))
+
+## 5. 节点配额(集群优化)
+
+### 5.1 字段
+
+`Node.MaxConcurrent` (int, 0=不限) + `Node.BandwidthLimit` (string, rclone 格式)
+
+### 5.2 执行模型
+
+`BackupExecutionService` 新增 `nodeSemaphores sync.Map`(懒加载 per-node channel):
+
+```go
+func (s) acquireNodeSemaphore(ctx, nodeID) chan struct{} {
+ if nodeID == 0 || nodeRepo == nil { return nil }
+ if v, ok := nodeSemaphores.Load(nodeID); ok { return v.(chan struct{}) }
+ node, _ := nodeRepo.FindByID(ctx, nodeID)
+ if node == nil || node.MaxConcurrent <= 0 { return nil }
+ created := make(chan struct{}, node.MaxConcurrent)
+ actual, _ := nodeSemaphores.LoadOrStore(nodeID, created)
+ return actual.(chan struct{})
+}
+
+func (s) executeTask(...) {
+ if nodeSem := acquireNodeSemaphore(ctx, task.NodeID); nodeSem != nil {
+ nodeSem <- struct{}{}
+ defer func() { <-nodeSem }()
+ }
+ s.semaphore <- struct{}{} // 全局保底
+ defer func() { <-s.semaphore }()
+ ...
+}
+```
+
+**约束**:节点容量在首次创建通道时采用,运行时修改 MaxConcurrent 需重启服务生效(避免 resize channel 的 race)。
+
+### 5.3 UI
+
+节点管理页新增字段(编辑节点时):最大并发、带宽限制。`NodeUpdateInput` 已扩展。
+
+## 6. 数据迁移
+
+新增表:`api_keys`
+新增字段:`users.disabled`、`notifications.event_types`、`nodes.max_concurrent`、`nodes.bandwidth_limit`
+全走 AutoMigrate,向后兼容(默认值不破坏现有功能)。
+
+## 7. 验证
+
+- `go build ./...` ✅ `go vet ./...` ✅ `go test ./... -count=1` 通过
+- `npx tsc --noEmit` ✅
+- 集群与企业级测试补丁:
+ - API Key 哈希不可逆(单测可验 SHA-256 确定性 + rawKey mismatch 拒绝)
+ - 节点 semaphore 懒加载(channel LoadOrStore 幂等)
+ - 事件分发按订阅匹配(EventTypes 非空时严格)
+
+## 8. 未做(留给下一轮)
+
+- SSO / OIDC(企业 SSO 接入)
+- 节点 Agent 自更新
+- 备份复制 / 异地镜像
+- SLA 违约后台主动扫描 + DispatchEvent 自动触发
+- API Key IP 白名单
+- 合规报表导出(PDF/CSV)
diff --git a/docs/superpowers/specs/2026-04-19-replication-sla-health-design.md b/docs/superpowers/specs/2026-04-19-replication-sla-health-design.md
new file mode 100644
index 0000000..7e50e59
--- /dev/null
+++ b/docs/superpowers/specs/2026-04-19-replication-sla-health-design.md
@@ -0,0 +1,128 @@
+# 设计文档:BackupX 企业级闭环 — 备份复制(3-2-1)+ SLA 监控 + 存储健康
+
+- 日期:2026-04-19
+- 范围:闭环第三轮 SLA + 实现 3-2-1 备份规则 + 存储目标主动监控
+- 状态:已落地(loop 调度自主执行)
+
+## 1. 目标
+
+前四轮已完成:集群路由、验证演练、SLA 视图、RBAC、API Key、事件总线、节点配额。
+
+企业级仍有缺口:
+1. **SLA 监控只在 UI 显示**:违约不会主动告警,需要运维人工翻看
+2. **缺 3-2-1 规则**:所有备份只有一份副本,不符合企业合规(SOC2/ISO27001 推荐 3 份副本、2 种介质、1 份异地)
+3. **存储目标故障被动发现**:要等任务失败才知道云存储挂了
+
+本轮闭环以上三个缺口。
+
+## 2. 能力一:SLA 违约后台扫描
+
+### 2.1 实现
+
+`DashboardService.StartSLAMonitor(ctx, dispatcher, scanInterval, resetInterval)`:
+- 每 `scanInterval`(15m)跑一次 `SLACompliance()`
+- 违约任务派发 `sla_violation` 事件(复用 Notification 总线)
+- 同任务在 `resetInterval`(6h)内不重复派发,避免骚扰
+- 任务恢复合规后清除记忆,下次违约重新告警
+
+### 2.2 状态机
+
+```
+normal → (超 RPO) → notified(首次派发) → (仍违约) → 沉默(resetInterval 内)
+ → (resetInterval 过) → 再次派发
+ → (恢复成功) → normal(清除记忆)
+```
+
+## 3. 能力二:备份复制(3-2-1 规则)
+
+### 3.1 模型
+
+- `BackupTask.ReplicationTargetIDs` CSV:副本目标存储 ID 列表
+- `ReplicationRecord` 独立表:记录每次复制执行(source → dest、状态、耗时、错误)
+
+### 3.2 触发路径
+
+**自动**(3-2-1 刚需):
+```
+BackupExecutionService.executeTask 成功 →
+ if len(task.ReplicationTargetIDs) > 0 →
+ ReplicationService.TriggerAutoReplication(task, record) →
+ foreach destID: s.Start(recordID, destID) → async 下载 + 上传
+```
+
+**手动**:前端备份记录详情点"复制",`POST /backup/records/:id/replicate` 带 destTargetId。
+
+### 3.3 核心实现
+
+```go
+func (s *ReplicationService) executeReplication(ctx, repID) {
+ s.semaphore <- struct{}{}
+ sourceProvider, _ := s.resolveProvider(ctx, rep.SourceTargetID)
+ destProvider, _ := s.resolveProvider(ctx, rep.DestTargetID)
+
+ reader, _ := sourceProvider.Download(ctx, rep.StoragePath)
+ localPath := tmpDir + filepath.Base(rep.StoragePath)
+ writeReaderToFile(localPath, reader)
+
+ file, _ := os.Open(localPath)
+ destProvider.Upload(ctx, rep.StoragePath, file, fileSize, meta)
+ // 完成 → status = success;失败 → 派发 replication_failed 事件
+}
+```
+
+### 3.4 集群保护
+
+跨节点 local_disk 场景:源备份在 Agent 的本地磁盘,Master 取不到。与 BackupExecutionService.DownloadRecord 的保护一致,拒绝并返回明确错误。
+
+### 3.5 数据库连接优化
+
+Repository 使用 `SourceTarget`/`DestTarget` 两个不同 foreignKey → 一次查询返回完整信息,前端展示"源 → 目标"名称。
+
+## 4. 能力三:存储目标健康监控
+
+### 4.1 实现
+
+`StorageTargetService.StartHealthMonitor(ctx, dispatcher, interval)`:
+- 每 `interval`(5m)列出所有启用的 StorageTarget
+- 逐个跑 `TestConnection()` → 更新 LastTestedAt/LastTestStatus
+- 健康→故障边沿派发 `storage_unhealthy` 事件
+- 故障→健康边沿清除 notified 记忆
+
+### 4.2 设计权衡
+
+- **同步串行扫描**:存储目标数量通常 < 20 个,串行简单可控
+- **单次连接超时依赖 provider**:`TestConnection` 各 provider 自己控制(rclone 已有超时)
+- **不阻塞存储配置操作**:后台独立 goroutine
+
+## 5. 事件总线扩展
+
+新增两个事件类型:
+- `storage_unhealthy`:存储目标掉线
+- `replication_failed`:复制失败
+- `sla_violation`:SLA 违约(上轮已定义,本轮才有触发点)
+
+## 6. 数据迁移
+
+新增表:`replication_records`
+新增字段:`backup_tasks.replication_target_ids` (CSV)
+全 AutoMigrate,向后兼容(默认空 = 不启用复制)。
+
+## 7. 前端
+
+- **任务表单**新增"备份复制"步骤:副本目标多选(自动过滤掉已是主存储的目标)
+- **新菜单**:`/replication/records` 展示复制历史(源/目标/状态/大小/耗时)
+- **已有** LastTestStatus 展示在存储目标页,本轮后台扫描会自动更新此字段
+
+## 8. 双 review 通过
+
+- `go build ./...` ✅ `go vet ./...` ✅ `go test ./... -count=1` ✅
+- `npx tsc --noEmit` ✅ `npm run build` ✅
+
+## 9. 未做(下一轮)
+
+- 备份窗口(maintenance window):时段禁止调度
+- Agent 自更新
+- SSO / OIDC
+- 报表 PDF/CSV 导出
+- 复制选项:加密再上传、checksum 验证
+- 任务模板(批量创建相似任务)
diff --git a/docs/superpowers/specs/2026-04-20-batch-charts-capacity-ui-design.md b/docs/superpowers/specs/2026-04-20-batch-charts-capacity-ui-design.md
new file mode 100644
index 0000000..02f600d
--- /dev/null
+++ b/docs/superpowers/specs/2026-04-20-batch-charts-capacity-ui-design.md
@@ -0,0 +1,87 @@
+# 设计文档:批量操作 + Dashboard 图表 + 存储容量 UI
+
+- 日期:2026-04-20
+- 状态:已落地
+- 范围:第八轮前端图表化闭环 + 规模化运维 UI
+
+## 1. 目标
+
+前七轮把企业级后端能力做齐:集群、验证、SLA、RBAC、API Key、3-2-1 复制、存储健康、维护窗口、任务模板、Agent 版本感知、集群概览、存储容量监控、审计 CSV、K8s 健康检查、多维统计 API。
+
+本轮关注"可用的 UI":
+1. **任务批量操作**:100+ 任务场景下逐个操作低效
+2. **Dashboard 图表化**:多维统计 API 已有(第七轮),UI 缺失
+3. **存储容量可视化**:预警事件已派发(第七轮),列表需看到使用率
+
+## 2. 能力一:任务批量操作
+
+### 2.1 后端
+`BackupTaskService` 新增:
+- `BatchToggle(ctx, ids, enabled)`:批量启停
+- `BatchDeleteTasks(ctx, ids)`:批量删除
+- `BatchResult` 单条结果:`{id, name, success, error}`
+
+`BackupRunHandler` 新增 `BatchRun`:循环调用 `RunTaskByID`,best-effort。
+
+HTTP:
+```
+POST /backup/tasks/batch/toggle # {ids, enabled}
+POST /backup/tasks/batch/delete # {ids}
+POST /backup/tasks/batch/run # {ids}
+```
+
+全部需要 `RequireNotViewer()`。审计日志记录"批量 X N/M 个任务"。
+
+### 2.2 前端
+- 任务列表开启 `rowSelection`(仅 writable 用户可见)
+- 选中 > 0 时顶部浮现工具条:批量执行 / 启用 / 停用 / 删除 / 取消
+- 批量后 Message 展示"成功 X / 失败 Y"
+
+## 3. 能力二:Dashboard 多维统计图表
+
+### 3.1 实现
+`fetchDashboardBreakdown(30)` 调用第七轮的 `/dashboard/breakdown?days=30`。
+
+两个图表:
+- **任务类型分布**:饼图(file/mysql/postgresql/sqlite/saphana)
+- **任务按节点分布**:柱状图(含本机 Master)
+
+### 3.2 设计决策
+- 只在有数据时展示(避免空图浪费屏幕)
+- 使用 ECharts BarChart + PieChart,共享已注册组件
+- 颜色方案与存储使用量饼图一致
+
+### 3.3 未做
+存储分组的"字节数饼图"已在 Dashboard 现有"存储使用量分布"中(来自 `stats.storageUsage`),不重复。
+
+## 4. 能力三:存储容量 UI
+
+### 4.1 前端
+存储目标列表卡片内:
+- 加载时异步获取每个启用目标的 `GetUsage`(含 About 的 diskUsage)
+- 若后端返回 `diskUsage.total + used` → 进度条 + 使用率文字 + 容量预警标签(≥85% 红)
+- 若仅有累计备份字节数 → 降级展示"已用备份 X(N 个记录)"
+
+### 4.2 进度条颜色
+- < 70%:绿色(#00B42A)
+- 70-85%:橙色(#FF7D00)
+- ≥ 85%:红色(#F53F3F)+ "容量预警"标签
+
+### 4.3 后端
+无改动。第七轮已有的 `StorageDiskUsage` 字段 + HealthMonitor 已支持。
+
+## 5. 双 review 通过
+
+- `go build ./...` ✅ `go vet ./...` ✅ `go test ./... -count=1` ✅
+- `npx tsc --noEmit` ✅ `npm run build` ✅
+
+## 6. 未做(下一轮)
+
+- 备份加密密钥轮换(涉及数据迁移)
+- WebSocket 实时 Dashboard
+- Agent 自更新
+- PITR 增量备份
+- SSO / OIDC
+- 报表 PDF 导出
+- 任务依赖(A 完成后 B 执行)
+- 备份元数据全局搜索
diff --git a/docs/superpowers/specs/2026-04-20-capacity-audit-health-design.md b/docs/superpowers/specs/2026-04-20-capacity-audit-health-design.md
new file mode 100644
index 0000000..061af0f
--- /dev/null
+++ b/docs/superpowers/specs/2026-04-20-capacity-audit-health-design.md
@@ -0,0 +1,103 @@
+# 设计文档:存储容量监控 + 审计日志导出 + K8s 健康检查 + Dashboard 多维统计
+
+- 日期:2026-04-20
+- 状态:已落地
+- 范围:第七轮企业运维 + 合规能力增强
+
+## 1. 目标
+
+前六轮完成的能力:集群路由、验证演练、SLA 监控、RBAC、API Key、事件总线、节点配额、备份复制、存储健康、维护窗口、任务模板、Agent 版本感知、集群概览。
+
+本轮补齐三类常见企业运维痛点 + 合规刚需:
+1. **存储快满才发现**:TestConnection 通过不代表还有空间
+2. **审计合规导出**:月度合规报表需要 CSV 导出到外部归档
+3. **容器化部署**:K8s/Swarm 需要 liveness/readiness 探针
+4. **Dashboard 信息密度**:单维度统计看不清"哪类任务最多/哪个节点负载重"
+
+## 2. 能力一:存储容量监控
+
+### 2.1 实现
+`StorageTargetService.runCapacityCheckOnce` 与健康扫描同频运行(每 5 分钟):
+- 列出所有启用的存储目标
+- 类型断言 `StorageAbout` 接口,支持的后端(local_disk / WebDAV 等)执行 About
+- 使用率 `Used/Total >= 85%` 派发 `storage_capacity_warning` 事件
+- 降到阈值以下清除告警记忆
+
+### 2.2 常量决策
+阈值做成 `const StorageCapacityWarningThreshold = 0.85`,不提供配置:
+- 业界运维标准线(监控告警通用 85%)
+- 留简单配置点反而增加运维复杂度
+- 如需其他阈值,用户可订阅 provider 原生监控
+
+### 2.3 新事件
+`storage_capacity_warning`:Notification 订阅后可用 Webhook/邮件/Telegram 接收
+
+## 3. 能力二:审计日志高级筛选 + CSV 导出
+
+### 3.1 筛选字段
+扩展 `AuditLogListOptions`:
+- Category(已有)
+- Action、Username、TargetID:精确匹配
+- Keyword:模糊匹配 `detail` / `target_name`
+- DateFrom / DateTo:时间范围
+
+### 3.2 CSV 导出
+`GET /audit-logs/export?`:
+- UTF-8 BOM + 逗号分隔,Excel 正确识别中文
+- 文件名 `backupx-audit-YYYYMMDD-HHMMSS.csv`
+- 最多 10000 行(防爆)
+- 9 列:时间 / 用户 / 类别 / 动作 / 目标类型 / 目标 ID / 目标名 / 详情 / 客户端 IP
+
+### 3.3 权限
+审计日志本身就是所有角色可见(合规刚需知情权),导出沿用同权限。
+
+### 3.4 前端
+审计页新增:用户名输入 / 关键词输入 / 日期范围选择 / 查询 / 重置 / 导出 CSV
+
+## 4. 能力三:K8s/Swarm 健康检查端点
+
+### 4.1 端点
+- `GET /health` 和 `/api/health`:liveness,只要进程响应就 200
+- `GET /ready` 和 `/api/ready`:readiness,检查数据库 Ping;失败 503
+
+### 4.2 无认证
+两个端点公开:
+- liveness 不做依赖检查,只保证"进程存活且可响应"
+- readiness 检查 DB 连通性
+- 输出字段:`status / version / uptime / checks / timestamp`
+
+### 4.3 路径兼容
+同时注册 `/health` 和 `/api/health`,方便反向代理按路径前缀统一转发。
+
+## 5. 能力四:Dashboard 多维度统计
+
+### 5.1 API
+`GET /dashboard/breakdown?days=30` 返回:
+- ByType:任务按类型分组(file / mysql / postgresql / sqlite / saphana)
+- ByStatus:最近 N 天记录按状态(running / success / failed)
+- ByNode:任务按执行节点分组
+- ByStorage:按存储目标分组 + 累计字节数
+
+### 5.2 实现要点
+- 复用现有 `BackupTaskRepository.List` + `BackupRecordRepository.StorageUsage`
+- `makeBreakdown` / `makeBreakdownByUint` 通用排序辅助函数
+- 类型标签 Localize:`typeLabel("mysql") → "MySQL"` 直接给前端用
+
+## 6. 数据迁移
+
+无新表 / 无新字段。全部是后端新服务方法 + 前端新端点调用。
+
+## 7. 双 review 通过
+
+- `go build ./...` ✅ `go vet ./...` ✅ `go test ./... -count=1` ✅
+- `npx tsc --noEmit` ✅ `npm run build` ✅
+
+## 8. 未做(下一轮)
+
+- Agent 自更新(远程分发二进制)
+- WebSocket 实时 Dashboard 推送
+- 备份加密密钥轮换
+- PITR 增量备份
+- SSO / OIDC
+- 前端 Dashboard breakdown 可视化(饼图/柱状图)接入
+- 存储容量 UI 展示(预警条形指示)
diff --git a/docs/superpowers/specs/2026-04-20-dependency-quota-search-design.md b/docs/superpowers/specs/2026-04-20-dependency-quota-search-design.md
new file mode 100644
index 0000000..2528d91
--- /dev/null
+++ b/docs/superpowers/specs/2026-04-20-dependency-quota-search-design.md
@@ -0,0 +1,105 @@
+# 设计文档:任务依赖链 + 存储容量配额 + 全局搜索
+
+- 日期:2026-04-20
+- 状态:已落地
+- 范围:第九轮企业工作流 + 容量治理 + 全局可达性
+
+## 1. 目标
+
+本轮补齐三类企业场景能力:
+1. **工作流**:任务间依赖(A 备份成功后自动触发 B 归档)
+2. **容量硬限制**:除了 85% 告警,需要严格拒绝超配额备份
+3. **大规模可达性**:100+ 任务/记录场景下快速定位
+
+## 2. 能力一:任务依赖链
+
+### 2.1 数据
+`BackupTask.DependsOnTaskIDs` CSV — 当前任务依赖的上游任务 ID 列表。
+
+### 2.2 触发路径
+```
+BackupExecutionService.executeTask 上传成功 →
+ DependentsResolver.TriggerDependents(upstreamID) →
+ 列出所有 depends_on 包含 upstreamID 的已启用任务 →
+ 逐个 RunTaskByID(best-effort,失败仅 warn)
+```
+
+`DependentsResolver` 接口由 `BackupTaskService` 实现,避免 execution 直接查仓储。
+
+### 2.3 校验
+- 上游任务存在性校验
+- 不能自环(依赖自己)
+- DFS 循环检测(depth > 32 视为潜在循环)
+
+### 2.4 典型场景
+- DB 备份成功 → 触发"归档打包"任务
+- 多个源任务都成功 → 触发"合规报表生成"(多上游支持)
+
+## 3. 能力二:存储容量软配额
+
+### 3.1 模型
+`StorageTarget.QuotaBytes` int64。0 = 不限制。
+
+### 3.2 强制策略
+`BackupExecutionService.executeTask` 上传前:
+```
+target.QuotaBytes > 0 AND
+ currentUsed (来自 records.StorageUsage) + fileSize > QuotaBytes
+→ 上传直接失败(不重试),记录 failed 原因
+```
+
+与 `storage_capacity_warning`(85% 通知)的区别:
+- 容量预警:提醒运维人员清理/扩容
+- 软配额:硬性拒绝超配额,避免失控
+
+### 3.3 典型配置
+- 生产数据库备份目标:QuotaBytes = 500 GB
+- 冷备归档目标:QuotaBytes = 2 TB
+
+## 4. 能力三:全局搜索
+
+### 4.1 服务
+`SearchService.Search(query)` 四类资源搜索:
+- **任务**:name/type/tags/sourcePath/dbHost/dbName
+- **存储目标**:name/description/type
+- **节点**:name/hostname/ipAddress
+- **最近 100 条备份记录**:fileName/storagePath/taskName
+
+### 4.2 API
+`GET /search?q=关键字` 返回 `{tasks, records, storage, nodes, totalCount}`,每类最多 10 条。
+
+### 4.3 前端
+顶部 Header 全局搜索入口:
+- 假 Input 样式 + "Ctrl+K" 提示
+- 点击/快捷键唤起 Modal
+- Input 300ms debounce 触发后端搜索
+- 分栏展示(任务 / 备份记录 / 存储目标 / 节点)
+- 点击结果项导航到对应页面
+
+### 4.4 设计权衡
+- 不索引:依赖 SQL LIKE 足够应付 < 10000 任务规模
+- 备份记录只搜最近 100 条:避免全表扫描,企业场景足够
+- 无高亮:保持简单,后续可用 `` 加
+
+## 5. 数据迁移
+
+- 新字段 `backup_tasks.depends_on_task_ids` CSV
+- 新字段 `storage_targets.quota_bytes` int64
+- 无新表
+- AutoMigrate 向后兼容(默认 0 / 空)
+
+## 6. 双 review 通过
+
+- `go build ./...` ✅ `go vet ./...` ✅ `go test ./... -count=1` ✅
+- `npx tsc --noEmit` ✅ `npm run build` ✅
+
+## 7. 未做(下一轮)
+
+- Agent 自更新
+- 加密密钥轮换(涉及数据迁移)
+- WebSocket 实时推送
+- PITR 增量备份
+- SSO / OIDC
+- 前端任务表单"上游依赖"多选器(后端 API 已就绪,UI 待补)
+- 前端存储表单"配额"InputNumber(后端已就绪)
+- 任务依赖图可视化
diff --git a/docs/superpowers/specs/2026-04-20-events-toast-import-export-node-perf-design.md b/docs/superpowers/specs/2026-04-20-events-toast-import-export-node-perf-design.md
new file mode 100644
index 0000000..7c445a4
--- /dev/null
+++ b/docs/superpowers/specs/2026-04-20-events-toast-import-export-node-perf-design.md
@@ -0,0 +1,86 @@
+# 设计文档:事件 Toast + 任务导入导出 + 节点性能统计
+
+- 日期:2026-04-20
+- 状态:已落地
+- 范围:第十一轮体验增强 + 集群迁移 + 可观测性
+
+## 1. 能力一:实时事件 Toast + 历史抽屉
+
+### 1.1 前端架构
+- `useEventStore`(zustand):会话内保留最近 50 条事件 + 未读计数
+- `EventCenter` 组件:Bell 图标 + 未读徽章 + 抽屉列表
+- 订阅 SSE 全事件流(而非仅 Dashboard 子集)
+- 按事件类型映射:
+ - `success` toast:backup_success / restore_success
+ - `error` toast:backup_failed / restore_failed / verify_failed / replication_failed / storage_unhealthy
+ - `warning` toast:sla_violation / storage_capacity_warning / agent_outdated
+
+### 1.2 设计决策
+- 无持久化:避免 localStorage 膨胀;事件重要性由后端 Notification 保证
+- 抽屉打开自动标记已读,简化交互
+
+## 2. 能力二:任务配置导入/导出 JSON
+
+### 2.1 后端
+`TaskExportService`:
+- `Export(taskIDs)` 返回 `ExportPayload{version, exportedAt, tasks}`
+- `Import(payload)` 两阶段:
+ 1. 创建所有任务(忽略 DependsOn)
+ 2. 补齐依赖关系(上游名 → 新 ID)
+- 敏感字段排除:DBPasswordCiphertext、存储凭证
+
+### 2.2 命名引用
+- 存储目标 / 节点 / 依赖任务均按 **name** 引用
+- 导入时按名称 lookup 现有系统 ID
+- 找不到则静默降级(如节点缺失 → NodeID=0 本机)
+
+### 2.3 冲突策略
+任务名已存在时 **跳过**(不覆盖),避免误操作。用户需先删除再导入。
+
+### 2.4 HTTP
+```
+GET /api/backup/tasks/export?ids=1,2,3 # 不传 ids 导全部
+POST /api/backup/tasks/import # JSON body,1MB 限制
+```
+
+### 2.5 前端
+任务页 Header 新增 "导出 JSON" / "导入 JSON"(Upload 组件 `beforeUpload` 阻止实际上传),导入结果 Modal 展示每行创建/跳过/失败状态。
+
+## 3. 能力三:节点性能统计
+
+### 3.1 API
+`GET /dashboard/node-performance?days=30` 返回:
+```
+[{
+ nodeId, nodeName, isLocal,
+ totalRuns, successRuns, failedRuns, successRate,
+ totalBytes, avgDurationSecs,
+}]
+```
+
+### 3.2 实现
+- 复用 `BackupRecord.NodeID`(第二轮加入的字段)
+- 单次 List 近 N 天记录 → 按 NodeID 内存聚合
+- 按成功率降序,其次按执行次数降序
+
+### 3.3 前端
+Dashboard 新增"节点执行表现(近 30 天)"表格:
+- 节点名(带 Master 标签)
+- 执行次数 / 成功 / 失败
+- 成功率(≥95% 绿,≥80% 黄,<80% 红)
+- 备份总量(字节)
+- 平均耗时
+
+## 4. 双 review
+
+- `go build ./...` ✅ `go vet ./...` ✅ `go test ./... -count=1` ✅
+- `npx tsc --noEmit` ✅ `npm run build` ✅
+
+## 5. 未做
+
+- Agent 自更新(远程下发二进制 + 信任链)
+- 加密密钥轮换(数据迁移)
+- PITR 增量备份
+- SSO / OIDC
+- 导入时覆盖模式(当前只支持跳过)
+- 导入时自动补全缺失存储目标(需要凭证,慎重)
diff --git a/docs/superpowers/specs/2026-04-20-realtime-events-dependency-graph-design.md b/docs/superpowers/specs/2026-04-20-realtime-events-dependency-graph-design.md
new file mode 100644
index 0000000..0660aa1
--- /dev/null
+++ b/docs/superpowers/specs/2026-04-20-realtime-events-dependency-graph-design.md
@@ -0,0 +1,113 @@
+# 设计文档:实时事件流 + 依赖图可视化 + UI 闭环
+
+- 日期:2026-04-20
+- 状态:已落地
+- 范围:第十轮实时体验 + 上轮 UI 收口
+
+## 1. 目标
+
+前九轮完成所有企业级后端能力。本轮聚焦"可感知":
+1. **实时体验**:事件发生时 Dashboard 即刻刷新,无需手动 F5
+2. **工作流可视化**:依赖关系以图形方式展示,直观理解拓扑
+3. **UI 闭环**:上轮后端就绪的依赖配置 + 存储配额需要表单接入
+
+## 2. 能力一:实时事件流(SSE)
+
+### 2.1 设计选型
+
+用 SSE 而非 WebSocket:
+- 原生浏览器支持、自动重连
+- 单向推送足够(前端订阅、后端推送)
+- 不引入新依赖(go-net 标准库)
+- 企业场景穿越反向代理无障碍
+
+### 2.2 后端架构
+
+```
+notification.DispatchEvent(eventType, ...) →
+ 1. broadcaster.Publish(非阻塞 SSE 推送)
+ 2. collectSubscribers + deliver(邮件/Webhook 等持久渠道)
+```
+
+双通道设计:
+- **EventBroadcaster**(内存):前端实时 UI
+- **NotificationService**(持久+多渠道):合规审计、离线告警
+
+订阅者 channel buffer = 32,满时丢弃单条,不阻塞生产者。
+
+### 2.3 HTTP 端点
+
+```
+GET /api/events/stream
+```
+
+- JWT/API Key 认证
+- Content-Type: text/event-stream
+- 心跳:每 25s 发 `: heartbeat` 注释行保活
+- 禁用 nginx 缓冲(X-Accel-Buffering: no)
+
+### 2.4 前端 Hook
+
+`useEventStream(handler, types?)`:
+- 用 fetch + ReadableStream 解析 SSE(支持 Bearer token)
+- 指数退避重连(1s → 2s → 4s → ... → 30s)
+- 可选事件类型过滤,避免无关事件触发重渲染
+
+### 2.5 Dashboard 订阅
+
+监听 8 类事件,任一到达 → 刷新 Dashboard 全量数据:
+```
+backup_success/failed, restore_success/failed,
+verify_failed, sla_violation,
+storage_unhealthy, storage_capacity_warning
+```
+
+## 3. 能力二:任务依赖图可视化
+
+### 3.1 实现
+
+`TaskDependencyGraph` 组件用 ECharts GraphChart:
+- **节点**:任务,按 `lastStatus` 着色(绿成功/红失败/蓝执行/灰空闲)
+- **边**:`dependsOnTaskIds` → 当前任务(上游 → 下游)
+- **布局**:force 物理仿真,支持拖拽/缩放
+- **过滤**:只显示有依赖关系的任务(孤立节点忽略减噪)
+
+### 3.2 集成
+
+任务页 `BackupTasksPage` 表格上方嵌入。无依赖时显示 Empty 引导。
+
+## 4. 能力三:UI 闭环
+
+### 4.1 任务表单 - 上游依赖选择器
+
+`BackupTaskFormDrawer` 新增 "任务依赖" 区块:
+- 多选 Select:系统内所有任务(排除自己)
+- 帮助文案说明循环依赖自动检测
+
+`BackupTasksPage` 传入 `allTasks`。
+
+### 4.2 存储表单 - 配额输入
+
+`StorageTargetFormDrawer` 新增 "容量配额(GB)":
+- InputNumber(GB 单位,0 = 不限制)
+- 内部存 bytes,显示 GB
+- 帮助文案区分软配额与 85% 预警
+
+## 5. 数据结构
+
+- 前端 Types:`backup-tasks.dependsOnTaskIds` + `storage-targets.quotaBytes`
+- 无数据库变更(后端字段已落地)
+
+## 6. 双 review
+
+- `go build ./...` ✅ `go vet ./...` ✅ `go test ./... -count=1` ✅
+- `npx tsc --noEmit` ✅ `npm run build` ✅
+
+## 7. 未做
+
+- Agent 自更新
+- 加密密钥轮换
+- PITR 增量备份
+- SSO / OIDC
+- Dashboard 事件流 Toast 展示(当前仅静默刷新)
+- 事件历史面板(内存事件可查询)
diff --git a/docs/superpowers/specs/2026-04-20-window-template-version-design.md b/docs/superpowers/specs/2026-04-20-window-template-version-design.md
new file mode 100644
index 0000000..53bab4a
--- /dev/null
+++ b/docs/superpowers/specs/2026-04-20-window-template-version-design.md
@@ -0,0 +1,129 @@
+# 设计文档:维护窗口 + 任务模板 + Agent 版本感知 + 集群概览
+
+- 日期:2026-04-20
+- 范围:第六轮企业级增强,聚焦集群规模化运维
+- 状态:已落地
+
+## 1. 目标
+
+前五轮已完成:集群路由、验证、SLA 监控、RBAC、API Key、事件总线、节点配额、备份复制、存储健康。
+
+本轮补齐集群规模化运维最后一公里:
+1. **维护窗口**:业务高峰期禁止备份调度
+2. **任务模板**:一次保存,N 次批量创建(100+ 主机刚需)
+3. **Agent 版本感知**:节点 Agent 落后 Master 主动告警
+4. **集群概览**:Dashboard 一眼看齐所有节点健康度
+
+## 2. 能力一:维护窗口
+
+### 2.1 模型
+- 新字段 `BackupTask.MaintenanceWindows` CSV
+- 语法:`time=HH:MM-HH:MM` 或 `days=mon|tue,time=22:00-06:00`
+- 支持多段(`;` 分隔)、跨午夜(start > end)、指定星期
+
+### 2.2 核心实现
+`backup/window.go` 新增:
+- `ParseMaintenanceWindows(string) → []MaintenanceWindow`
+- `IsWithinWindow(t, windows) bool` — 判断 t 是否在任一窗口
+- `ValidateMaintenanceWindows(string) error` — 输入合法性校验
+
+### 2.3 集成
+- **调度器**:`syncTaskLocked` cron fire 时校验当前时间,非窗口跳过并审计
+- **手动执行**:`BackupExecutionService.startTask` 同样校验(防止业务高峰误触发)
+- **前端**:任务表单新增"维护窗口"输入 + 帮助文案
+
+### 2.4 测试
+`backup/window_test.go` 覆盖:同日/跨夜/星期过滤/多段组合/无效输入
+
+## 3. 能力二:任务模板
+
+### 3.1 模型
+```go
+TaskTemplate {
+ ID, Name, Description, TaskType
+ Payload string // 序列化的 BackupTaskUpsertInput
+ CreatedBy
+ CreatedAt, UpdatedAt
+}
+```
+
+### 3.2 服务
+`TaskTemplateService`:
+- CRUD:`List / Get / Create / Update / Delete`
+- 批量应用:`Apply(id, input) → []Result`
+ - 每个 Variables 条目 name 必填,覆盖模板 Name
+ - sourcePath / sourcePaths / dbHost / dbName / tags / nodeId 若提供则覆盖
+ - best-effort:单个失败不影响其他,返回详细结果
+
+### 3.3 HTTP
+```
+GET /task-templates 列表
+GET /task-templates/:id 详情
+POST /task-templates 创建(operator+)
+PUT /task-templates/:id 更新(operator+)
+DELETE /task-templates/:id 删除(operator+)
+POST /task-templates/:id/apply 批量应用(operator+)
+```
+
+### 3.4 前端
+- 新菜单 `/task-templates`
+- 列表 + 每行"应用"按钮 → Modal 动态添加行 → 批量创建 → 展示结果表
+- 对 viewer 隐藏写入操作
+
+## 4. 能力三:Agent 版本感知
+
+### 4.1 实现
+`ClusterVersionMonitor`:
+- 每 30 分钟扫描所有远程节点
+- 比较 `node.AgentVer` vs `master.Version`(major.minor 级别)
+- 落后节点派发 `agent_outdated` 事件
+- 同节点 24 小时内只告警一次
+- 版本升级后自动清除记忆,允许下次落后再告警
+
+### 4.2 版本比较策略
+- 宽松策略:只比 `major.minor`,放过 patch 差异避免小版本发布噪音
+- `dev` 版本 / 空版本不告警
+- 解析失败保守不告警
+
+### 4.3 事件
+新增 `agent_outdated`,接入现有 Notification 总线
+
+## 5. 能力四:Dashboard 集群概览
+
+### 5.1 API
+`GET /dashboard/cluster` 返回:
+- Master 版本
+- 总节点数、在线数、离线数、过期 Agent 数
+- 每节点详情:名称/主机名/状态/版本/版本状态/任务数/最近心跳
+
+### 5.2 前端
+Dashboard 新增"集群概览"卡片:
+- 4 个统计指标
+- 节点列表表格(状态徽章、版本状态着色)
+- 仅在 totalNodes > 0 时展示(单节点场景不打扰)
+
+## 6. 事件总线扩展
+
+新事件:`agent_outdated`
+订阅方式与其他企业事件一致(Notification.EventTypes CSV)
+
+## 7. 数据迁移
+
+- 新表:`task_templates`
+- 新字段:`backup_tasks.maintenance_windows`
+- 全 AutoMigrate,向后兼容
+
+## 8. 双 review 通过
+
+- `go build ./...` ✅ `go vet ./...` ✅ `go test ./... -count=1` ✅
+- 新增测试:`backup/window_test.go` 6 条(同日/跨夜/星期/多段/无效/空)
+- `npx tsc --noEmit` ✅ `npm run build` ✅
+
+## 9. 未做(下一轮)
+
+- Agent 自更新(远程分发二进制 + 信任链)
+- 备份加密密钥轮换
+- WebSocket 实时 Dashboard
+- 报表 PDF/CSV 导出
+- PITR 增量备份
+- SSO / OIDC
diff --git a/server/internal/agent/agent.go b/server/internal/agent/agent.go
index 843b3a9..93bedf3 100644
--- a/server/internal/agent/agent.go
+++ b/server/internal/agent/agent.go
@@ -11,6 +11,8 @@ import (
"strings"
"sync"
"time"
+
+ "backupx/server/internal/backup"
)
// Agent 是 Agent 进程的主控制器。
@@ -131,6 +133,12 @@ func (a *Agent) pollAndHandleOnce(ctx context.Context) {
a.handleRunTask(ctx, cmd)
case "list_dir":
a.handleListDir(ctx, cmd)
+ case "restore_record":
+ a.handleRestoreRecord(ctx, cmd)
+ case "discover_db":
+ a.handleDiscoverDB(ctx, cmd)
+ case "delete_storage_object":
+ a.handleDeleteStorageObject(ctx, cmd)
default:
msg := fmt.Sprintf("unknown command type: %s", cmd.Type)
log.Printf("[agent] %s", msg)
@@ -158,6 +166,83 @@ func (a *Agent) handleRunTask(ctx context.Context, cmd *CommandPayload) {
})
}
+// handleRestoreRecord 处理 restore_record 命令
+func (a *Agent) handleRestoreRecord(ctx context.Context, cmd *CommandPayload) {
+ var payload struct {
+ RestoreRecordID uint `json:"restoreRecordId"`
+ }
+ if err := json.Unmarshal(cmd.Payload, &payload); err != nil {
+ _ = a.client.SubmitCommandResult(ctx, cmd.ID, false, "invalid payload: "+err.Error(), nil)
+ return
+ }
+ if payload.RestoreRecordID == 0 {
+ _ = a.client.SubmitCommandResult(ctx, cmd.ID, false, "restoreRecordId is required", nil)
+ return
+ }
+ if err := a.executor.ExecuteRestore(ctx, payload.RestoreRecordID); err != nil {
+ _ = a.client.SubmitCommandResult(ctx, cmd.ID, false, err.Error(), nil)
+ return
+ }
+ _ = a.client.SubmitCommandResult(ctx, cmd.ID, true, "", map[string]any{
+ "restoreRecordId": payload.RestoreRecordID,
+ })
+}
+
+// handleDeleteStorageObject 处理 delete_storage_object 命令:在 Agent 侧删除指定存储对象。
+// 用于跨节点 local_disk 场景下的远程备份文件清理。
+func (a *Agent) handleDeleteStorageObject(ctx context.Context, cmd *CommandPayload) {
+ var payload struct {
+ TargetType string `json:"targetType"`
+ TargetConfig map[string]any `json:"targetConfig"`
+ StoragePath string `json:"storagePath"`
+ }
+ if err := json.Unmarshal(cmd.Payload, &payload); err != nil {
+ _ = a.client.SubmitCommandResult(ctx, cmd.ID, false, "invalid payload: "+err.Error(), nil)
+ return
+ }
+ if strings.TrimSpace(payload.StoragePath) == "" {
+ _ = a.client.SubmitCommandResult(ctx, cmd.ID, false, "storagePath is required", nil)
+ return
+ }
+ provider, err := a.executor.storageRegistry.Create(ctx, payload.TargetType, payload.TargetConfig)
+ if err != nil {
+ _ = a.client.SubmitCommandResult(ctx, cmd.ID, false, "create provider: "+err.Error(), nil)
+ return
+ }
+ if err := provider.Delete(ctx, payload.StoragePath); err != nil {
+ _ = a.client.SubmitCommandResult(ctx, cmd.ID, false, "delete object: "+err.Error(), nil)
+ return
+ }
+ _ = a.client.SubmitCommandResult(ctx, cmd.ID, true, "", map[string]any{"deleted": true})
+}
+
+// handleDiscoverDB 处理 discover_db 命令:在 Agent 本机执行 mysql/psql 列出数据库。
+func (a *Agent) handleDiscoverDB(ctx context.Context, cmd *CommandPayload) {
+ var payload struct {
+ Type string `json:"type"`
+ Host string `json:"host"`
+ Port int `json:"port"`
+ User string `json:"user"`
+ Password string `json:"password"`
+ }
+ if err := json.Unmarshal(cmd.Payload, &payload); err != nil {
+ _ = a.client.SubmitCommandResult(ctx, cmd.ID, false, "invalid payload: "+err.Error(), nil)
+ return
+ }
+ databases, err := backup.DiscoverDatabases(ctx, backup.NewOSCommandExecutor(), backup.DiscoverRequest{
+ Type: payload.Type,
+ Host: payload.Host,
+ Port: payload.Port,
+ User: payload.User,
+ Password: payload.Password,
+ })
+ if err != nil {
+ _ = a.client.SubmitCommandResult(ctx, cmd.ID, false, err.Error(), nil)
+ return
+ }
+ _ = a.client.SubmitCommandResult(ctx, cmd.ID, true, "", map[string]any{"databases": databases})
+}
+
// handleListDir 处理 list_dir 命令(阶段四实现)
func (a *Agent) handleListDir(ctx context.Context, cmd *CommandPayload) {
var payload struct {
diff --git a/server/internal/agent/client.go b/server/internal/agent/client.go
index 5b9da08..6b22c86 100644
--- a/server/internal/agent/client.go
+++ b/server/internal/agent/client.go
@@ -158,6 +158,52 @@ func (c *MasterClient) UpdateRecord(ctx context.Context, recordID uint, update R
return c.do(ctx, http.MethodPost, path, update, nil)
}
+// RestoreSpec 与 service.AgentRestoreSpec 对齐
+type RestoreSpec struct {
+ RestoreRecordID uint `json:"restoreRecordId"`
+ BackupRecordID uint `json:"backupRecordId"`
+ TaskID uint `json:"taskId"`
+ TaskName string `json:"taskName"`
+ Type string `json:"type"`
+ SourcePath string `json:"sourcePath,omitempty"`
+ SourcePaths []string `json:"sourcePaths,omitempty"`
+ DBHost string `json:"dbHost,omitempty"`
+ DBPort int `json:"dbPort,omitempty"`
+ DBUser string `json:"dbUser,omitempty"`
+ DBPassword string `json:"dbPassword,omitempty"`
+ DBName string `json:"dbName,omitempty"`
+ DBPath string `json:"dbPath,omitempty"`
+ ExtraConfig string `json:"extraConfig,omitempty"`
+ Compression string `json:"compression"`
+ Encrypt bool `json:"encrypt"`
+ Storage StorageTargetConfig `json:"storage"`
+ StoragePath string `json:"storagePath"`
+ FileName string `json:"fileName"`
+}
+
+// RestoreUpdate 与 service.AgentRestoreUpdate 对齐
+type RestoreUpdate struct {
+ Status string `json:"status,omitempty"`
+ ErrorMessage string `json:"errorMessage,omitempty"`
+ LogAppend string `json:"logAppend,omitempty"`
+}
+
+// GetRestoreSpec 拉取恢复规格
+func (c *MasterClient) GetRestoreSpec(ctx context.Context, restoreRecordID uint) (*RestoreSpec, error) {
+ var spec RestoreSpec
+ path := fmt.Sprintf("/api/agent/restores/%d/spec", restoreRecordID)
+ if err := c.do(ctx, http.MethodGet, path, nil, &spec); err != nil {
+ return nil, err
+ }
+ return &spec, nil
+}
+
+// UpdateRestore 上报恢复记录的状态/日志
+func (c *MasterClient) UpdateRestore(ctx context.Context, restoreRecordID uint, update RestoreUpdate) error {
+ path := fmt.Sprintf("/api/agent/restores/%d", restoreRecordID)
+ return c.do(ctx, http.MethodPost, path, update, nil)
+}
+
// do 是通用 HTTP 调用。所有 Agent API 都统一走 JSON + X-Agent-Token。
func (c *MasterClient) do(ctx context.Context, method, path string, body any, out any) error {
var reqBody io.Reader
diff --git a/server/internal/agent/executor.go b/server/internal/agent/executor.go
index 80cb33e..4386aa1 100644
--- a/server/internal/agent/executor.go
+++ b/server/internal/agent/executor.go
@@ -238,6 +238,180 @@ func (l *recordLogger) WriteLine(message string) {
_ = l.client.UpdateRecord(l.ctx, l.recordID, RecordUpdate{LogAppend: message + "\n"})
}
+// restoreLogger 把 runner 日志回传到 Master 恢复记录。
+type restoreLogger struct {
+ ctx context.Context
+ client *MasterClient
+ restoreID uint
+}
+
+func newRestoreLogger(ctx context.Context, client *MasterClient, restoreID uint) *restoreLogger {
+ return &restoreLogger{ctx: ctx, client: client, restoreID: restoreID}
+}
+
+func (l *restoreLogger) WriteLine(message string) {
+ _ = l.client.UpdateRestore(l.ctx, l.restoreID, RestoreUpdate{LogAppend: message + "\n"})
+}
+
+// DeleteStorageObject 在 Agent 本机上删除指定存储对象(供跨节点清理调用)。
+func (e *Executor) DeleteStorageObject(ctx context.Context, targetType string, targetConfig map[string]any, storagePath string) error {
+ provider, err := e.storageRegistry.Create(ctx, targetType, targetConfig)
+ if err != nil {
+ return fmt.Errorf("create provider: %w", err)
+ }
+ return provider.Delete(ctx, storagePath)
+}
+
+// ExecuteRestore 处理 restore_record 命令:拉规格 → 下载 → 解压 → 执行 runner.Restore → 上报结果。
+//
+// 与 ExecuteRunTask 对称,但方向相反:
+// - 下载:通过 spec.Storage 创建 provider → Download(spec.StoragePath)
+// - 解密:当前 Agent 不支持加密恢复(密钥未下发),spec.Encrypt=true 会直接失败
+// - 执行:backup.Registry.Runner(spec.Type).Restore
+// - 上报:通过 UpdateRestore(status/logAppend)
+func (e *Executor) ExecuteRestore(ctx context.Context, restoreRecordID uint) error {
+ spec, err := e.client.GetRestoreSpec(ctx, restoreRecordID)
+ if err != nil {
+ e.reportRestoreFailure(ctx, restoreRecordID, fmt.Sprintf("拉取恢复规格失败: %v", err))
+ return err
+ }
+ if spec.Encrypt {
+ msg := "Agent 不支持加密恢复(加密密钥仅在 Master 端持有)"
+ e.reportRestoreFailure(ctx, restoreRecordID, msg)
+ return fmt.Errorf("%s", msg)
+ }
+ e.appendRestoreLog(ctx, restoreRecordID, fmt.Sprintf("[agent] 开始恢复 %s (type=%s)\n", spec.TaskName, spec.Type))
+
+ if err := os.MkdirAll(e.tempDir, 0o755); err != nil {
+ e.reportRestoreFailure(ctx, restoreRecordID, fmt.Sprintf("创建临时目录失败: %v", err))
+ return err
+ }
+ tmpDir, err := os.MkdirTemp(e.tempDir, "restore-*")
+ if err != nil {
+ e.reportRestoreFailure(ctx, restoreRecordID, fmt.Sprintf("创建恢复临时目录失败: %v", err))
+ return err
+ }
+ defer os.RemoveAll(tmpDir)
+
+ // 1) 创建 storage provider
+ var rawConfig map[string]any
+ if len(spec.Storage.Config) > 0 {
+ if err := jsonUnmarshalMap(spec.Storage.Config, &rawConfig); err != nil {
+ e.reportRestoreFailure(ctx, restoreRecordID, fmt.Sprintf("解析存储配置失败: %v", err))
+ return err
+ }
+ }
+ provider, err := e.storageRegistry.Create(ctx, spec.Storage.Type, rawConfig)
+ if err != nil {
+ e.reportRestoreFailure(ctx, restoreRecordID, fmt.Sprintf("创建存储客户端失败: %v", err))
+ return err
+ }
+
+ // 2) 下载
+ fileName := spec.FileName
+ if strings.TrimSpace(fileName) == "" {
+ fileName = filepath.Base(spec.StoragePath)
+ }
+ artifactPath := filepath.Join(tmpDir, filepath.Base(fileName))
+ e.appendRestoreLog(ctx, restoreRecordID, fmt.Sprintf("[agent] 下载备份文件 %s\n", spec.StoragePath))
+ reader, err := provider.Download(ctx, spec.StoragePath)
+ if err != nil {
+ e.reportRestoreFailure(ctx, restoreRecordID, fmt.Sprintf("下载备份失败: %v", err))
+ return err
+ }
+ if err := writeReaderToLocal(artifactPath, reader); err != nil {
+ e.reportRestoreFailure(ctx, restoreRecordID, fmt.Sprintf("写入备份文件失败: %v", err))
+ return err
+ }
+
+ // 3) 解压(Agent 不支持加密,遇到 .enc 会直接失败)
+ preparedPath := artifactPath
+ if strings.HasSuffix(strings.ToLower(preparedPath), ".enc") {
+ msg := "检测到加密后缀,Agent 不支持加密恢复"
+ e.reportRestoreFailure(ctx, restoreRecordID, msg)
+ return fmt.Errorf("%s", msg)
+ }
+ if strings.HasSuffix(strings.ToLower(preparedPath), ".gz") {
+ e.appendRestoreLog(ctx, restoreRecordID, "[agent] 解压 gzip 压缩\n")
+ decompressed, err := compress.GunzipFile(preparedPath)
+ if err != nil {
+ e.reportRestoreFailure(ctx, restoreRecordID, fmt.Sprintf("解压失败: %v", err))
+ return err
+ }
+ preparedPath = decompressed
+ }
+
+ // 4) 运行 runner.Restore
+ taskSpec := buildRestoreBackupTaskSpec(spec, time.Now().UTC(), tmpDir)
+ runner, err := e.backupRegistry.Runner(taskSpec.Type)
+ if err != nil {
+ e.reportRestoreFailure(ctx, restoreRecordID, fmt.Sprintf("不支持的备份类型: %v", err))
+ return err
+ }
+ logger := newRestoreLogger(ctx, e.client, restoreRecordID)
+ if err := runner.Restore(ctx, taskSpec, preparedPath, logger); err != nil {
+ e.reportRestoreFailure(ctx, restoreRecordID, err.Error())
+ return err
+ }
+
+ // 5) 上报成功
+ return e.client.UpdateRestore(ctx, restoreRecordID, RestoreUpdate{
+ Status: "success",
+ LogAppend: "[agent] 恢复执行完成\n",
+ })
+}
+
+func (e *Executor) appendRestoreLog(ctx context.Context, restoreID uint, line string) {
+ _ = e.client.UpdateRestore(ctx, restoreID, RestoreUpdate{LogAppend: line})
+}
+
+func (e *Executor) reportRestoreFailure(ctx context.Context, restoreID uint, msg string) {
+ _ = e.client.UpdateRestore(ctx, restoreID, RestoreUpdate{
+ Status: "failed",
+ ErrorMessage: msg,
+ LogAppend: fmt.Sprintf("[agent] 错误: %s\n", msg),
+ })
+}
+
+// buildRestoreBackupTaskSpec 把 RestoreSpec 转成 backup.TaskSpec。
+func buildRestoreBackupTaskSpec(spec *RestoreSpec, startedAt time.Time, tempDir string) backup.TaskSpec {
+ return backup.TaskSpec{
+ ID: spec.TaskID,
+ Name: spec.TaskName,
+ Type: spec.Type,
+ SourcePath: spec.SourcePath,
+ SourcePaths: spec.SourcePaths,
+ ExcludePatterns: nil,
+ Database: backup.DatabaseSpec{
+ Host: spec.DBHost,
+ Port: spec.DBPort,
+ User: spec.DBUser,
+ Password: spec.DBPassword,
+ Path: spec.DBPath,
+ Names: splitCommaOrNewline(spec.DBName),
+ },
+ Compression: spec.Compression,
+ Encrypt: spec.Encrypt,
+ StartedAt: startedAt,
+ TempDir: tempDir,
+ }
+}
+
+// writeReaderToLocal 把 reader 写到本地文件(Agent 侧工具函数)。
+func writeReaderToLocal(targetPath string, reader io.ReadCloser) error {
+ defer reader.Close()
+ if err := os.MkdirAll(filepath.Dir(targetPath), 0o755); err != nil {
+ return err
+ }
+ file, err := os.Create(targetPath)
+ if err != nil {
+ return err
+ }
+ defer file.Close()
+ _, err = io.Copy(file, reader)
+ return err
+}
+
// 辅助函数
func computeFileSHA256(path string) (string, error) {
diff --git a/server/internal/app/app.go b/server/internal/app/app.go
index 960e85b..9b26d67 100644
--- a/server/internal/app/app.go
+++ b/server/internal/app/app.go
@@ -80,6 +80,7 @@ func New(ctx context.Context, cfg config.Config, version string) (*Application,
storageTargetService.SetBackupRecordRepository(backupRecordRepo)
backupTaskService := service.NewBackupTaskService(backupTaskRepo, storageTargetRepo, configCipher)
backupTaskService.SetRecordsAndStorage(backupRecordRepo, storageRegistry)
+ // nodeRepo 在下方 Cluster 节点管理区块才实例化,这里延后注入
backupRunnerRegistry := backup.NewRegistry(backup.NewFileRunner(), backup.NewSQLiteRunner(), backup.NewMySQLRunner(nil), backup.NewPostgreSQLRunner(nil), backup.NewSAPHANARunner(nil))
logHub := backup.NewLogHub()
retentionService := backupretention.NewService(backupRecordRepo)
@@ -97,6 +98,9 @@ func New(ctx context.Context, cfg config.Config, version string) (*Application,
backupTaskService.SetScheduler(schedulerService)
// 审计日志注入延迟到 auditService 创建后(见下方)
backupRecordService := service.NewBackupRecordService(backupRecordRepo, backupExecutionService, logHub)
+ // 恢复服务:使用独立 LogHub 避免恢复记录与备份记录 ID 命名空间冲突
+ restoreRecordRepo := repository.NewRestoreRecordRepository(db)
+ restoreLogHub := backup.NewLogHub()
dashboardService := service.NewDashboardService(backupTaskRepo, backupRecordRepo, storageTargetRepo)
settingsService := service.NewSettingsService(systemConfigRepo)
@@ -106,11 +110,13 @@ func New(ctx context.Context, cfg config.Config, version string) (*Application,
authService.SetAuditService(auditService)
schedulerService.SetAuditRecorder(auditService)
- // Database discovery
+ // Database discovery(集群依赖在 agentService 创建后注入)
databaseDiscoveryService := service.NewDatabaseDiscoveryService(backup.NewOSCommandExecutor())
// Cluster: Node management
nodeRepo := repository.NewNodeRepository(db)
+ backupTaskService.SetNodeRepository(nodeRepo)
+ schedulerService.SetNodeRepository(nodeRepo)
nodeService := service.NewNodeService(nodeRepo, version)
nodeService.SetTaskRepository(backupTaskRepo)
if err := nodeService.EnsureLocalNode(ctx); err != nil {
@@ -122,6 +128,7 @@ func New(ctx context.Context, cfg config.Config, version string) (*Application,
// Agent 协议服务:命令队列 + 任务下发 + 记录上报
agentCmdRepo := repository.NewAgentCommandRepository(db)
agentService := service.NewAgentService(nodeRepo, backupTaskRepo, backupRecordRepo, storageTargetRepo, agentCmdRepo, configCipher)
+ agentService.SetRestoreRepository(restoreRecordRepo)
agentService.StartCommandTimeoutMonitor(ctx, 30*time.Second, 10*time.Minute)
// 一键部署:install token service + 后台 GC
@@ -133,6 +140,91 @@ func New(ctx context.Context, cfg config.Config, version string) (*Application,
backupExecutionService.SetClusterDependencies(nodeRepo, agentService)
// 启用远程目录浏览:NodeService 通过 AgentService 做同步 RPC
nodeService.SetAgentRPC(agentService)
+ // 启用远程数据库发现:远程节点任务配置时 DatabasePicker 拿到的是节点视角的 DB 列表
+ databaseDiscoveryService.SetClusterDependencies(nodeRepo, agentService)
+
+ // 恢复服务:集群感知(本地/远程路由),依赖 agentService 入队
+ restoreService := service.NewRestoreService(
+ restoreRecordRepo,
+ backupRecordRepo,
+ backupTaskRepo,
+ storageTargetRepo,
+ nodeRepo,
+ storageRegistry,
+ backupRunnerRegistry,
+ restoreLogHub,
+ configCipher,
+ agentService,
+ cfg.Backup.TempDir,
+ cfg.Backup.MaxConcurrent,
+ )
+
+ // 验证服务:定期校验备份可恢复性(企业合规刚需)
+ verificationRecordRepo := repository.NewVerificationRecordRepository(db)
+ verifyLogHub := backup.NewLogHub()
+ verificationService := service.NewVerificationService(
+ verificationRecordRepo,
+ backupRecordRepo,
+ backupTaskRepo,
+ storageTargetRepo,
+ nodeRepo,
+ storageRegistry,
+ verifyLogHub,
+ configCipher,
+ cfg.Backup.TempDir,
+ cfg.Backup.MaxConcurrent,
+ )
+ // 验证失败通知:通过 NotificationService 的事件总线派发 verify_failed
+ verificationService.SetNotifier(service.NewVerificationEventNotifier(notificationService))
+ // 恢复完成/失败事件派发(restore_success / restore_failed)
+ restoreService.SetEventDispatcher(notificationService)
+ // 调度器接入验证演练 cron
+ schedulerService.SetVerifyRunner(verificationService)
+
+ // 用户管理与 API Key 服务(企业级 RBAC)
+ userService := service.NewUserService(userRepo)
+ apiKeyRepo := repository.NewApiKeyRepository(db)
+ apiKeyService := service.NewApiKeyService(apiKeyRepo)
+
+ // SLA 后台扫描:每 15 分钟扫描违约任务,同任务 6 小时内不重复派发
+ dashboardService.StartSLAMonitor(ctx, notificationService, 15*time.Minute, 6*time.Hour)
+ // 存储目标健康扫描:每 5 分钟测试启用目标,掉线即告警
+ storageTargetService.StartHealthMonitor(ctx, notificationService, 5*time.Minute)
+
+ // 备份复制服务(3-2-1 规则核心)
+ replicationRecordRepo := repository.NewReplicationRecordRepository(db)
+ replicationService := service.NewReplicationService(
+ replicationRecordRepo, backupRecordRepo, storageTargetRepo,
+ nodeRepo, storageRegistry, configCipher,
+ cfg.Backup.TempDir, cfg.Backup.MaxConcurrent,
+ )
+ replicationService.SetEventDispatcher(notificationService)
+ backupExecutionService.SetReplicationTrigger(replicationService)
+ // 备份成功后触发下游依赖任务(任务依赖链工作流)
+ backupExecutionService.SetDependentsResolver(backupTaskService)
+
+ // 任务模板(批量创建)
+ taskTemplateRepo := repository.NewTaskTemplateRepository(db)
+ taskTemplateService := service.NewTaskTemplateService(taskTemplateRepo, backupTaskService)
+
+ // 任务配置导入/导出(JSON,集群迁移 & 灾备)
+ taskExportService := service.NewTaskExportService(backupTaskService, backupTaskRepo, storageTargetRepo, nodeRepo)
+
+ // 全局搜索(跨任务/存储/节点/最近记录)
+ searchService := service.NewSearchService(backupTaskRepo, backupRecordRepo, storageTargetRepo, nodeRepo)
+
+ // 实时事件广播器(SSE 推送给前端 Dashboard)
+ // 注入 notification 后,每次 DispatchEvent 同时 broadcast 到所有 SSE 订阅者
+ eventBroadcaster := service.NewEventBroadcaster()
+ notificationService.SetBroadcaster(eventBroadcaster)
+
+ // 集群版本监控:每 30 分钟扫描,节点 24 小时内只告警一次
+ clusterVersionMonitor := service.NewClusterVersionMonitor(nodeRepo, version)
+ clusterVersionMonitor.SetEventDispatcher(notificationService)
+ clusterVersionMonitor.Start(ctx, 30*time.Minute, 24*time.Hour)
+
+ // Dashboard 集群概览依赖注入
+ dashboardService.SetClusterDependencies(nodeRepo, version)
router := aphttp.NewRouter(aphttp.RouterDependencies{
Context: ctx,
@@ -145,6 +237,15 @@ func New(ctx context.Context, cfg config.Config, version string) (*Application,
BackupTaskService: backupTaskService,
BackupExecutionService: backupExecutionService,
BackupRecordService: backupRecordService,
+ RestoreService: restoreService,
+ VerificationService: verificationService,
+ ReplicationService: replicationService,
+ TaskTemplateService: taskTemplateService,
+ TaskExportService: taskExportService,
+ SearchService: searchService,
+ EventBroadcaster: eventBroadcaster,
+ UserService: userService,
+ ApiKeyService: apiKeyService,
NotificationService: notificationService,
DashboardService: dashboardService,
SettingsService: settingsService,
@@ -157,6 +258,7 @@ func New(ctx context.Context, cfg config.Config, version string) (*Application,
SystemConfigRepo: systemConfigRepo,
InstallTokenService: installTokenService,
MasterExternalURL: "", // 如需覆盖 URL,可扩展 cfg.Server 增字段;目前留空依赖 X-Forwarded-* / Request.Host
+ DB: db,
})
httpServer := &stdhttp.Server{
diff --git a/server/internal/backup/discover.go b/server/internal/backup/discover.go
new file mode 100644
index 0000000..9d296b8
--- /dev/null
+++ b/server/internal/backup/discover.go
@@ -0,0 +1,119 @@
+package backup
+
+import (
+ "bytes"
+ "context"
+ "fmt"
+ "strings"
+ "time"
+)
+
+// DiscoverRequest 数据库发现请求参数。
+// Type 取 "mysql" 或 "postgresql"。
+type DiscoverRequest struct {
+ Type string
+ Host string
+ Port int
+ User string
+ Password string
+}
+
+// DiscoverDatabases 通过本机 mysql/psql 客户端连接目标数据库并列出非系统库。
+// 5 秒命令超时。调用方负责传入 CommandExecutor(Master 用 OSCommandExecutor,
+// Agent 同理)。此函数不依赖 service / apperror,便于在 agent 包复用。
+func DiscoverDatabases(ctx context.Context, executor CommandExecutor, req DiscoverRequest) ([]string, error) {
+ switch strings.TrimSpace(strings.ToLower(req.Type)) {
+ case "mysql":
+ return discoverMySQLDatabases(ctx, executor, req)
+ case "postgresql":
+ return discoverPostgreSQLDatabases(ctx, executor, req)
+ default:
+ return nil, fmt.Errorf("unsupported database type: %s", req.Type)
+ }
+}
+
+func discoverMySQLDatabases(ctx context.Context, executor CommandExecutor, req DiscoverRequest) ([]string, error) {
+ mysqlPath, err := executor.LookPath("mysql")
+ if err != nil {
+ return nil, fmt.Errorf("系统未安装 mysql 客户端")
+ }
+ timeout, cancel := context.WithTimeout(ctx, 5*time.Second)
+ defer cancel()
+ var stdout, stderr bytes.Buffer
+ args := []string{
+ fmt.Sprintf("--host=%s", req.Host),
+ fmt.Sprintf("--port=%d", req.Port),
+ fmt.Sprintf("--user=%s", req.User),
+ "-e", "SHOW DATABASES",
+ "--skip-column-names",
+ }
+ env := []string{fmt.Sprintf("MYSQL_PWD=%s", req.Password)}
+ if err := executor.Run(timeout, mysqlPath, args, CommandOptions{
+ Stdout: &stdout,
+ Stderr: &stderr,
+ Env: env,
+ }); err != nil {
+ errMsg := strings.TrimSpace(stderr.String())
+ if errMsg == "" {
+ errMsg = err.Error()
+ }
+ return nil, fmt.Errorf("连接 MySQL 失败:%s", errMsg)
+ }
+ systemDBs := map[string]bool{
+ "information_schema": true,
+ "performance_schema": true,
+ "mysql": true,
+ "sys": true,
+ }
+ var databases []string
+ for _, line := range strings.Split(stdout.String(), "\n") {
+ db := strings.TrimSpace(line)
+ if db == "" || systemDBs[db] {
+ continue
+ }
+ databases = append(databases, db)
+ }
+ return databases, nil
+}
+
+func discoverPostgreSQLDatabases(ctx context.Context, executor CommandExecutor, req DiscoverRequest) ([]string, error) {
+ psqlPath, err := executor.LookPath("psql")
+ if err != nil {
+ return nil, fmt.Errorf("系统未安装 psql 客户端")
+ }
+ timeout, cancel := context.WithTimeout(ctx, 5*time.Second)
+ defer cancel()
+ var stdout, stderr bytes.Buffer
+ args := []string{
+ "-h", req.Host,
+ "-p", fmt.Sprintf("%d", req.Port),
+ "-U", req.User,
+ "-d", "postgres",
+ "-t", "-A",
+ "-c", "SELECT datname FROM pg_database WHERE datistemplate = false ORDER BY datname",
+ }
+ env := []string{fmt.Sprintf("PGPASSWORD=%s", req.Password)}
+ if err := executor.Run(timeout, psqlPath, args, CommandOptions{
+ Stdout: &stdout,
+ Stderr: &stderr,
+ Env: env,
+ }); err != nil {
+ errMsg := strings.TrimSpace(stderr.String())
+ if errMsg == "" {
+ errMsg = err.Error()
+ }
+ return nil, fmt.Errorf("连接 PostgreSQL 失败:%s", errMsg)
+ }
+ skipDBs := map[string]bool{
+ "postgres": true,
+ }
+ var databases []string
+ for _, line := range strings.Split(stdout.String(), "\n") {
+ db := strings.TrimSpace(line)
+ if db == "" || skipDBs[db] || strings.HasPrefix(db, "template") {
+ continue
+ }
+ databases = append(databases, db)
+ }
+ return databases, nil
+}
diff --git a/server/internal/backup/verify.go b/server/internal/backup/verify.go
new file mode 100644
index 0000000..09b0053
--- /dev/null
+++ b/server/internal/backup/verify.go
@@ -0,0 +1,179 @@
+package backup
+
+import (
+ "archive/tar"
+ "bufio"
+ "crypto/sha256"
+ "encoding/hex"
+ "fmt"
+ "io"
+ "os"
+ "strings"
+)
+
+// VerifyReport 是 quick 模式的验证结果摘要。
+type VerifyReport struct {
+ TotalEntries int `json:"totalEntries,omitempty"`
+ FileBytes int64 `json:"fileBytes,omitempty"`
+ ChecksumOK bool `json:"checksumOk,omitempty"`
+ Detail string `json:"detail,omitempty"`
+}
+
+// VerifyTarArchive 遍历 tar 归档的每个 header + reader,不写盘。
+// 能检测归档截断、条目损坏、层级不对等常见问题。
+// expectedChecksum 非空时额外对整个文件校验 SHA-256(不做解压)。
+func VerifyTarArchive(artifactPath string, expectedChecksum string) (*VerifyReport, error) {
+ file, err := os.Open(artifactPath)
+ if err != nil {
+ return nil, fmt.Errorf("open tar artifact: %w", err)
+ }
+ defer file.Close()
+ report := &VerifyReport{}
+ h := sha256.New()
+ reader := io.TeeReader(file, h)
+ tr := tar.NewReader(reader)
+ for {
+ header, err := tr.Next()
+ if err == io.EOF {
+ break
+ }
+ if err != nil {
+ return report, fmt.Errorf("read tar entry: %w", err)
+ }
+ report.TotalEntries++
+ // 读完条目数据以触发完整性校验(tar 内部 CRC 不严格,但断流会报错)
+ if header.Typeflag == tar.TypeReg || header.Typeflag == tar.TypeRegA {
+ n, copyErr := io.Copy(io.Discard, tr)
+ if copyErr != nil {
+ return report, fmt.Errorf("read entry %s: %w", header.Name, copyErr)
+ }
+ report.FileBytes += n
+ }
+ }
+ // 读完 tar 后继续把剩余字节喂给 hash(tar 结束后可能有零填充尾)
+ if _, err := io.Copy(io.Discard, reader); err != nil {
+ return report, fmt.Errorf("drain remainder: %w", err)
+ }
+ actual := hex.EncodeToString(h.Sum(nil))
+ if strings.TrimSpace(expectedChecksum) != "" {
+ report.ChecksumOK = strings.EqualFold(actual, expectedChecksum)
+ if !report.ChecksumOK {
+ return report, fmt.Errorf("checksum mismatch: expected %s, got %s", expectedChecksum, actual)
+ }
+ } else {
+ report.ChecksumOK = true
+ }
+ report.Detail = fmt.Sprintf("tar 包完整(%d 条目,有效字节 %d)", report.TotalEntries, report.FileBytes)
+ return report, nil
+}
+
+// VerifySQLiteFile 校验 SQLite 文件头魔数。
+// 官方格式:前 16 字节为 "SQLite format 3\000"。
+func VerifySQLiteFile(artifactPath string) (*VerifyReport, error) {
+ file, err := os.Open(artifactPath)
+ if err != nil {
+ return nil, fmt.Errorf("open sqlite artifact: %w", err)
+ }
+ defer file.Close()
+ header := make([]byte, 16)
+ if _, err := io.ReadFull(file, header); err != nil {
+ return nil, fmt.Errorf("read sqlite header: %w", err)
+ }
+ const magic = "SQLite format 3\x00"
+ if string(header) != magic {
+ return &VerifyReport{Detail: "非法的 SQLite 文件头"}, fmt.Errorf("invalid sqlite magic header")
+ }
+ info, _ := file.Stat()
+ var size int64
+ if info != nil {
+ size = info.Size()
+ }
+ return &VerifyReport{
+ FileBytes: size,
+ Detail: fmt.Sprintf("SQLite 文件头合法(总大小 %d 字节)", size),
+ }, nil
+}
+
+// VerifyMySQLDump 校验 MySQL dump 文件头部是否为合法 mysqldump 输出。
+// 头部 1024 字节包含以下任一关键字即通过:
+// - "-- MySQL dump"
+// - "-- Server version"
+// - "-- MariaDB dump"
+func VerifyMySQLDump(artifactPath string) (*VerifyReport, error) {
+ return verifyDumpHeader(artifactPath, []string{"-- MySQL dump", "-- Server version", "-- MariaDB dump"}, "MySQL/MariaDB")
+}
+
+// VerifyPostgreSQLDump 校验 PostgreSQL plain text dump 头部。
+// 典型标记:"-- PostgreSQL database dump" 或 "-- Dumped from database version"。
+func VerifyPostgreSQLDump(artifactPath string) (*VerifyReport, error) {
+ return verifyDumpHeader(artifactPath, []string{"-- PostgreSQL database dump", "-- Dumped from database version", "SET statement_timeout"}, "PostgreSQL")
+}
+
+func verifyDumpHeader(artifactPath string, markers []string, label string) (*VerifyReport, error) {
+ file, err := os.Open(artifactPath)
+ if err != nil {
+ return nil, fmt.Errorf("open dump artifact: %w", err)
+ }
+ defer file.Close()
+ reader := bufio.NewReader(file)
+ buf := make([]byte, 4096)
+ n, _ := io.ReadFull(reader, buf)
+ sample := string(buf[:n])
+ matched := ""
+ for _, m := range markers {
+ if strings.Contains(sample, m) {
+ matched = m
+ break
+ }
+ }
+ if matched == "" {
+ return &VerifyReport{Detail: fmt.Sprintf("未在前 %d 字节中发现 %s dump 特征", n, label)}, fmt.Errorf("no %s dump marker in header", label)
+ }
+ info, _ := file.Stat()
+ var size int64
+ if info != nil {
+ size = info.Size()
+ }
+ return &VerifyReport{
+ FileBytes: size,
+ Detail: fmt.Sprintf("%s dump 头部识别标志: %q(文件 %d 字节)", label, matched, size),
+ }, nil
+}
+
+// VerifySAPHANAArchive 校验 SAP HANA 归档 tar 中是否包含 databackup/logbackup 标志文件。
+func VerifySAPHANAArchive(artifactPath string) (*VerifyReport, error) {
+ file, err := os.Open(artifactPath)
+ if err != nil {
+ return nil, fmt.Errorf("open hana archive: %w", err)
+ }
+ defer file.Close()
+ tr := tar.NewReader(file)
+ report := &VerifyReport{}
+ var foundDataBackup bool
+ for {
+ header, err := tr.Next()
+ if err == io.EOF {
+ break
+ }
+ if err != nil {
+ return report, fmt.Errorf("read tar entry: %w", err)
+ }
+ report.TotalEntries++
+ name := strings.ToLower(header.Name)
+ if strings.Contains(name, "databackup") || strings.Contains(name, "logbackup") || strings.HasPrefix(name, "hana_") {
+ foundDataBackup = true
+ }
+ if header.Typeflag == tar.TypeReg || header.Typeflag == tar.TypeRegA {
+ n, copyErr := io.Copy(io.Discard, tr)
+ if copyErr != nil {
+ return report, fmt.Errorf("read entry %s: %w", header.Name, copyErr)
+ }
+ report.FileBytes += n
+ }
+ }
+ if !foundDataBackup {
+ return report, fmt.Errorf("HANA archive missing databackup/logbackup markers")
+ }
+ report.Detail = fmt.Sprintf("HANA 归档包含 %d 条目(%d 字节),已识别备份标志文件", report.TotalEntries, report.FileBytes)
+ return report, nil
+}
diff --git a/server/internal/backup/verify_test.go b/server/internal/backup/verify_test.go
new file mode 100644
index 0000000..c808e4d
--- /dev/null
+++ b/server/internal/backup/verify_test.go
@@ -0,0 +1,121 @@
+package backup
+
+import (
+ "archive/tar"
+ "bytes"
+ "os"
+ "path/filepath"
+ "testing"
+)
+
+// 构造一个最小的 tar 归档文件供测试使用
+func writeTestTar(t *testing.T, entries map[string][]byte) string {
+ t.Helper()
+ path := filepath.Join(t.TempDir(), "test.tar")
+ buf := new(bytes.Buffer)
+ tw := tar.NewWriter(buf)
+ for name, body := range entries {
+ header := &tar.Header{Name: name, Mode: 0o644, Size: int64(len(body)), Typeflag: tar.TypeReg}
+ if err := tw.WriteHeader(header); err != nil {
+ t.Fatalf("write tar header: %v", err)
+ }
+ if _, err := tw.Write(body); err != nil {
+ t.Fatalf("write tar body: %v", err)
+ }
+ }
+ _ = tw.Close()
+ if err := os.WriteFile(path, buf.Bytes(), 0o644); err != nil {
+ t.Fatalf("write tar file: %v", err)
+ }
+ return path
+}
+
+func TestVerifyTarArchive_Valid(t *testing.T) {
+ path := writeTestTar(t, map[string][]byte{
+ "readme.md": []byte("hello"),
+ "data.bin": []byte("world!!!"),
+ })
+ report, err := VerifyTarArchive(path, "")
+ if err != nil {
+ t.Fatalf("VerifyTarArchive returned error: %v", err)
+ }
+ if report.TotalEntries != 2 {
+ t.Fatalf("expected 2 entries, got %d", report.TotalEntries)
+ }
+ if report.FileBytes == 0 {
+ t.Fatalf("expected non-zero file bytes")
+ }
+ if !report.ChecksumOK {
+ t.Fatalf("checksumOK should be true when expected checksum empty")
+ }
+}
+
+func TestVerifyTarArchive_Truncated(t *testing.T) {
+ // 构造带多个大 entry 的 tar,在 entry 数据中间截断,使 io.Copy 触发 UnexpectedEOF
+ path := filepath.Join(t.TempDir(), "big.tar")
+ buf := new(bytes.Buffer)
+ tw := tar.NewWriter(buf)
+ body := bytes.Repeat([]byte("x"), 4096)
+ _ = tw.WriteHeader(&tar.Header{Name: "big.bin", Mode: 0o644, Size: int64(len(body)), Typeflag: tar.TypeReg})
+ _, _ = tw.Write(body)
+ _ = tw.Close()
+ data := buf.Bytes()
+ // 保留 header 完整(512),破坏 body 中间使 tar.Reader 在 io.Copy 时遇到 EOF
+ truncated := data[:512+1024]
+ if err := os.WriteFile(path, truncated, 0o644); err != nil {
+ t.Fatalf("write truncated: %v", err)
+ }
+ if _, err := VerifyTarArchive(path, ""); err == nil {
+ t.Fatalf("expected error on truncated tar, got nil")
+ }
+}
+
+func TestVerifySQLiteFile_Valid(t *testing.T) {
+ path := filepath.Join(t.TempDir(), "ok.db")
+ content := []byte("SQLite format 3\x00" + string(make([]byte, 100)))
+ if err := os.WriteFile(path, content, 0o644); err != nil {
+ t.Fatalf("WriteFile: %v", err)
+ }
+ report, err := VerifySQLiteFile(path)
+ if err != nil {
+ t.Fatalf("VerifySQLiteFile: %v", err)
+ }
+ if report.FileBytes == 0 {
+ t.Fatalf("expected non-zero size")
+ }
+}
+
+func TestVerifySQLiteFile_Invalid(t *testing.T) {
+ path := filepath.Join(t.TempDir(), "bad.db")
+ if err := os.WriteFile(path, []byte("not sqlite at all, some other text"), 0o644); err != nil {
+ t.Fatalf("WriteFile: %v", err)
+ }
+ if _, err := VerifySQLiteFile(path); err == nil {
+ t.Fatalf("expected error on non-sqlite file")
+ }
+}
+
+func TestVerifyMySQLDump(t *testing.T) {
+ path := filepath.Join(t.TempDir(), "dump.sql")
+ content := "-- MySQL dump 10.13 Distrib 8.0.33\n-- Host: localhost\nINSERT INTO foo VALUES (1);\n"
+ if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
+ t.Fatalf("WriteFile: %v", err)
+ }
+ report, err := VerifyMySQLDump(path)
+ if err != nil {
+ t.Fatalf("VerifyMySQLDump: %v", err)
+ }
+ if report.Detail == "" {
+ t.Fatalf("expected Detail in report")
+ }
+}
+
+func TestVerifyPostgreSQLDump_Invalid(t *testing.T) {
+ path := filepath.Join(t.TempDir(), "notpg.sql")
+ if err := os.WriteFile(path, []byte("some random text without header markers"), 0o644); err != nil {
+ t.Fatalf("WriteFile: %v", err)
+ }
+ if _, err := VerifyPostgreSQLDump(path); err == nil {
+ t.Fatalf("expected error on non-pg dump")
+ }
+}
diff --git a/server/internal/backup/window.go b/server/internal/backup/window.go
new file mode 100644
index 0000000..c0011a7
--- /dev/null
+++ b/server/internal/backup/window.go
@@ -0,0 +1,180 @@
+package backup
+
+import (
+ "fmt"
+ "strconv"
+ "strings"
+ "time"
+)
+
+// MaintenanceWindow 描述一个允许执行备份的时段。
+// 格式语义:
+// - Days 为 "0..6" 的字符串集合(0=周日,6=周六);空 = 每天
+// - StartMinutes / EndMinutes 为"午夜起计算的分钟数",0 ≤ v < 1440
+// - 跨午夜窗口:Start > End 表示跨夜(如 22:00-06:00)
+//
+// 多个窗口是 OR 语义:只要 now 落入任一窗口即允许执行。
+type MaintenanceWindow struct {
+ Days map[int]bool
+ StartMinutes int
+ EndMinutes int
+}
+
+// ParseMaintenanceWindows 解析用户配置(CSV 每项形如 "days=mon,tue|time=22:00-06:00")。
+// 简化语法:多个窗口以 ';' 分隔,每个窗口按 "[days=xxx;]time=HH:MM-HH:MM" 格式。
+// Days 缺省 = 全周;若不合法,跳过该段而非抛错(让调用方尽力工作)。
+// 示例:
+// "time=01:00-05:00" 每天 1 点到 5 点
+// "days=sat,sun;time=00:00-23:59" 仅周末全天
+// "time=22:00-06:00" 每天跨夜
+// "days=mon,tue,wed,thu,fri;time=22:00-06:00" 工作日跨夜
+func ParseMaintenanceWindows(value string) []MaintenanceWindow {
+ v := strings.TrimSpace(value)
+ if v == "" {
+ return nil
+ }
+ segments := strings.Split(v, ";")
+ var windows []MaintenanceWindow
+ for _, segment := range segments {
+ segment = strings.TrimSpace(segment)
+ if segment == "" {
+ continue
+ }
+ window, ok := parseSingleWindow(segment)
+ if !ok {
+ continue
+ }
+ windows = append(windows, window)
+ }
+ return windows
+}
+
+func parseSingleWindow(segment string) (MaintenanceWindow, bool) {
+ // "days=xxx,time=HH:MM-HH:MM" 或 "time=..."
+ fields := strings.Split(segment, ",")
+ days := map[int]bool{}
+ var timeExpr string
+ for _, field := range fields {
+ field = strings.TrimSpace(field)
+ if field == "" {
+ continue
+ }
+ if strings.HasPrefix(field, "days=") {
+ daysPart := strings.TrimPrefix(field, "days=")
+ for _, day := range strings.Split(daysPart, "|") {
+ if idx := parseDayToken(strings.TrimSpace(day)); idx >= 0 {
+ days[idx] = true
+ }
+ }
+ } else if strings.HasPrefix(field, "time=") {
+ timeExpr = strings.TrimPrefix(field, "time=")
+ }
+ }
+ start, end, ok := parseTimeRange(strings.TrimSpace(timeExpr))
+ if !ok {
+ return MaintenanceWindow{}, false
+ }
+ return MaintenanceWindow{Days: days, StartMinutes: start, EndMinutes: end}, true
+}
+
+var dayTokens = map[string]int{
+ "sun": 0, "sunday": 0, "0": 0,
+ "mon": 1, "monday": 1, "1": 1,
+ "tue": 2, "tuesday": 2, "2": 2,
+ "wed": 3, "wednesday": 3, "3": 3,
+ "thu": 4, "thursday": 4, "4": 4,
+ "fri": 5, "friday": 5, "5": 5,
+ "sat": 6, "saturday": 6, "6": 6,
+}
+
+func parseDayToken(value string) int {
+ v := strings.ToLower(strings.TrimSpace(value))
+ if v == "" {
+ return -1
+ }
+ if idx, ok := dayTokens[v]; ok {
+ return idx
+ }
+ return -1
+}
+
+// parseTimeRange 解析 "HH:MM-HH:MM",返回起止分钟数。
+func parseTimeRange(value string) (int, int, bool) {
+ parts := strings.SplitN(value, "-", 2)
+ if len(parts) != 2 {
+ return 0, 0, false
+ }
+ start, ok := parseHHMM(parts[0])
+ if !ok {
+ return 0, 0, false
+ }
+ end, ok := parseHHMM(parts[1])
+ if !ok {
+ return 0, 0, false
+ }
+ return start, end, true
+}
+
+func parseHHMM(value string) (int, bool) {
+ parts := strings.Split(strings.TrimSpace(value), ":")
+ if len(parts) != 2 {
+ return 0, false
+ }
+ h, err := strconv.Atoi(strings.TrimSpace(parts[0]))
+ if err != nil || h < 0 || h > 23 {
+ return 0, false
+ }
+ m, err := strconv.Atoi(strings.TrimSpace(parts[1]))
+ if err != nil || m < 0 || m > 59 {
+ return 0, false
+ }
+ return h*60 + m, true
+}
+
+// IsWithinWindow 判断 t 是否落入任一窗口。windows 为空或 nil 时总是返回 true(不限制)。
+func IsWithinWindow(t time.Time, windows []MaintenanceWindow) bool {
+ if len(windows) == 0 {
+ return true
+ }
+ minutes := t.Hour()*60 + t.Minute()
+ weekday := int(t.Weekday())
+ for _, w := range windows {
+ if len(w.Days) > 0 && !w.Days[weekday] {
+ continue
+ }
+ if w.StartMinutes == w.EndMinutes {
+ continue
+ }
+ if w.StartMinutes < w.EndMinutes {
+ // 同日窗口
+ if minutes >= w.StartMinutes && minutes < w.EndMinutes {
+ return true
+ }
+ } else {
+ // 跨午夜:[start, 1440) ∪ [0, end)
+ if minutes >= w.StartMinutes || minutes < w.EndMinutes {
+ return true
+ }
+ }
+ }
+ return false
+}
+
+// ValidateMaintenanceWindows 用户输入合法性校验(返回人可读的错误)。
+func ValidateMaintenanceWindows(value string) error {
+ v := strings.TrimSpace(value)
+ if v == "" {
+ return nil
+ }
+ segments := strings.Split(v, ";")
+ for _, segment := range segments {
+ segment = strings.TrimSpace(segment)
+ if segment == "" {
+ continue
+ }
+ if _, ok := parseSingleWindow(segment); !ok {
+ return fmt.Errorf("无效的维护窗口配置: %q(期望格式如 time=22:00-06:00 或 days=sat,sun,time=00:00-23:59)", segment)
+ }
+ }
+ return nil
+}
diff --git a/server/internal/backup/window_test.go b/server/internal/backup/window_test.go
new file mode 100644
index 0000000..38fc50e
--- /dev/null
+++ b/server/internal/backup/window_test.go
@@ -0,0 +1,110 @@
+package backup
+
+import (
+ "testing"
+ "time"
+)
+
+func TestParseAndCheck_SingleSameDayWindow(t *testing.T) {
+ windows := ParseMaintenanceWindows("time=01:00-05:00")
+ if len(windows) != 1 {
+ t.Fatalf("expected 1 window, got %d", len(windows))
+ }
+ // 周一 03:00 UTC(天数不限制)
+ at := time.Date(2026, 4, 20, 3, 0, 0, 0, time.UTC)
+ if !IsWithinWindow(at, windows) {
+ t.Fatalf("expected 03:00 to be inside 01:00-05:00")
+ }
+ at = time.Date(2026, 4, 20, 6, 0, 0, 0, time.UTC)
+ if IsWithinWindow(at, windows) {
+ t.Fatalf("expected 06:00 to be outside 01:00-05:00")
+ }
+}
+
+func TestParseAndCheck_CrossMidnight(t *testing.T) {
+ windows := ParseMaintenanceWindows("time=22:00-06:00")
+ if len(windows) != 1 {
+ t.Fatalf("expected 1 window")
+ }
+ tests := []struct {
+ hour, minute int
+ inside bool
+ }{
+ {22, 30, true},
+ {23, 59, true},
+ {0, 0, true},
+ {3, 0, true},
+ {5, 59, true},
+ {6, 0, false},
+ {7, 0, false},
+ {21, 59, false},
+ }
+ base := time.Date(2026, 4, 20, 0, 0, 0, 0, time.UTC)
+ for _, tc := range tests {
+ at := base.Add(time.Duration(tc.hour)*time.Hour + time.Duration(tc.minute)*time.Minute)
+ if got := IsWithinWindow(at, windows); got != tc.inside {
+ t.Errorf("%02d:%02d expected inside=%v, got %v", tc.hour, tc.minute, tc.inside, got)
+ }
+ }
+}
+
+func TestParseAndCheck_DaysFilter(t *testing.T) {
+ // 周末全天
+ windows := ParseMaintenanceWindows("days=sat|sun,time=00:00-23:59")
+ if len(windows) != 1 {
+ t.Fatalf("expected 1 window")
+ }
+ sat := time.Date(2026, 4, 18, 12, 0, 0, 0, time.UTC) // Saturday
+ sun := time.Date(2026, 4, 19, 12, 0, 0, 0, time.UTC) // Sunday
+ mon := time.Date(2026, 4, 20, 12, 0, 0, 0, time.UTC) // Monday
+ if !IsWithinWindow(sat, windows) {
+ t.Fatalf("saturday should be inside")
+ }
+ if !IsWithinWindow(sun, windows) {
+ t.Fatalf("sunday should be inside")
+ }
+ if IsWithinWindow(mon, windows) {
+ t.Fatalf("monday should be outside")
+ }
+}
+
+func TestParseAndCheck_Multiple(t *testing.T) {
+ // 两段:工作日跨夜 + 周末全天
+ windows := ParseMaintenanceWindows("days=mon|tue|wed|thu|fri,time=22:00-06:00;days=sat|sun,time=00:00-23:59")
+ if len(windows) != 2 {
+ t.Fatalf("expected 2 windows, got %d", len(windows))
+ }
+ monAfternoon := time.Date(2026, 4, 20, 15, 0, 0, 0, time.UTC)
+ if IsWithinWindow(monAfternoon, windows) {
+ t.Fatalf("mon 15:00 should be outside both windows")
+ }
+ monNight := time.Date(2026, 4, 20, 23, 0, 0, 0, time.UTC)
+ if !IsWithinWindow(monNight, windows) {
+ t.Fatalf("mon 23:00 should be inside weekday-night window")
+ }
+ sunNoon := time.Date(2026, 4, 19, 12, 0, 0, 0, time.UTC)
+ if !IsWithinWindow(sunNoon, windows) {
+ t.Fatalf("sun 12:00 should be inside weekend window")
+ }
+}
+
+func TestValidateMaintenanceWindows(t *testing.T) {
+ if err := ValidateMaintenanceWindows(""); err != nil {
+ t.Fatalf("empty should be valid, got %v", err)
+ }
+ if err := ValidateMaintenanceWindows("time=01:00-05:00"); err != nil {
+ t.Fatalf("valid format rejected: %v", err)
+ }
+ if err := ValidateMaintenanceWindows("bad-input"); err == nil {
+ t.Fatalf("invalid format should return error")
+ }
+ if err := ValidateMaintenanceWindows("time=25:00-30:00"); err == nil {
+ t.Fatalf("invalid hour should return error")
+ }
+}
+
+func TestIsWithinWindow_NoWindows(t *testing.T) {
+ if !IsWithinWindow(time.Now(), nil) {
+ t.Fatalf("no windows should always be inside")
+ }
+}
diff --git a/server/internal/database/database.go b/server/internal/database/database.go
index 85385ec..74a0543 100644
--- a/server/internal/database/database.go
+++ b/server/internal/database/database.go
@@ -23,7 +23,7 @@ func Open(cfg config.DatabaseConfig, logger *zap.Logger) (*gorm.DB, error) {
return nil, fmt.Errorf("open sqlite: %w", err)
}
- if err := db.AutoMigrate(&model.User{}, &model.SystemConfig{}, &model.StorageTarget{}, &model.OAuthSession{}, &model.BackupTask{}, &model.BackupRecord{}, &model.Notification{}, &model.Node{}, &model.BackupTaskStorageTarget{}, &model.AuditLog{}, &model.AgentCommand{}, &model.AgentInstallToken{}); err != nil {
+ if err := db.AutoMigrate(&model.User{}, &model.SystemConfig{}, &model.StorageTarget{}, &model.OAuthSession{}, &model.BackupTask{}, &model.BackupRecord{}, &model.Notification{}, &model.Node{}, &model.BackupTaskStorageTarget{}, &model.AuditLog{}, &model.AgentCommand{}, &model.AgentInstallToken{}, &model.RestoreRecord{}, &model.VerificationRecord{}, &model.ApiKey{}, &model.ReplicationRecord{}, &model.TaskTemplate{}); err != nil {
return nil, fmt.Errorf("migrate schema: %w", err)
}
diff --git a/server/internal/http/agent_handler.go b/server/internal/http/agent_handler.go
index 9395fce..8750c8f 100644
--- a/server/internal/http/agent_handler.go
+++ b/server/internal/http/agent_handler.go
@@ -14,12 +14,13 @@ import (
// AgentHandler 实现 Agent 调用 Master 的 HTTP API。
// 全部端点通过 X-Agent-Token 头做节点认证,不使用 JWT。
type AgentHandler struct {
- agentService *service.AgentService
- nodeService *service.NodeService
+ agentService *service.AgentService
+ nodeService *service.NodeService
+ restoreService *service.RestoreService
}
-func NewAgentHandler(agentService *service.AgentService, nodeService *service.NodeService) *AgentHandler {
- return &AgentHandler{agentService: agentService, nodeService: nodeService}
+func NewAgentHandler(agentService *service.AgentService, nodeService *service.NodeService, restoreService *service.RestoreService) *AgentHandler {
+ return &AgentHandler{agentService: agentService, nodeService: nodeService, restoreService: restoreService}
}
// extractToken 从请求头或 JSON body 中提取 Agent Token。
@@ -155,6 +156,58 @@ func (h *AgentHandler) UpdateRecord(c *gin.Context) {
response.Success(c, gin.H{"status": "ok"})
}
+// GetRestoreSpec Agent 拉取恢复规格。
+func (h *AgentHandler) GetRestoreSpec(c *gin.Context) {
+ if h.restoreService == nil {
+ c.JSON(stdhttp.StatusServiceUnavailable, gin.H{"code": "RESTORE_SERVICE_DISABLED", "message": "restore service is not enabled"})
+ return
+ }
+ node, err := h.agentService.AuthenticatedNode(c.Request.Context(), extractToken(c))
+ if err != nil {
+ response.Error(c, err)
+ return
+ }
+ id, err := strconv.ParseUint(c.Param("id"), 10, 32)
+ if err != nil {
+ response.Error(c, err)
+ return
+ }
+ spec, err := h.restoreService.GetAgentRestoreSpec(c.Request.Context(), node, uint(id))
+ if err != nil {
+ response.Error(c, err)
+ return
+ }
+ response.Success(c, spec)
+}
+
+// UpdateRestore Agent 上报恢复记录的状态/日志。
+func (h *AgentHandler) UpdateRestore(c *gin.Context) {
+ if h.restoreService == nil {
+ c.JSON(stdhttp.StatusServiceUnavailable, gin.H{"code": "RESTORE_SERVICE_DISABLED", "message": "restore service is not enabled"})
+ return
+ }
+ node, err := h.agentService.AuthenticatedNode(c.Request.Context(), extractToken(c))
+ if err != nil {
+ response.Error(c, err)
+ return
+ }
+ id, err := strconv.ParseUint(c.Param("id"), 10, 32)
+ if err != nil {
+ response.Error(c, err)
+ return
+ }
+ var input service.AgentRestoreUpdate
+ if err := c.ShouldBindJSON(&input); err != nil {
+ c.JSON(stdhttp.StatusBadRequest, gin.H{"code": "INVALID_INPUT", "message": err.Error()})
+ return
+ }
+ if err := h.restoreService.UpdateAgentRestore(c.Request.Context(), node, uint(id), input); err != nil {
+ response.Error(c, err)
+ return
+ }
+ response.Success(c, gin.H{"status": "ok"})
+}
+
// Self 返回当前 Agent token 所属节点的状态,供安装脚本末尾探活。
func (h *AgentHandler) Self(c *gin.Context) {
node, err := h.agentService.AuthenticatedNode(c.Request.Context(), extractToken(c))
diff --git a/server/internal/http/api_key_handler.go b/server/internal/http/api_key_handler.go
new file mode 100644
index 0000000..037fc6e
--- /dev/null
+++ b/server/internal/http/api_key_handler.go
@@ -0,0 +1,93 @@
+package http
+
+import (
+ "fmt"
+
+ "backupx/server/internal/apperror"
+ "backupx/server/internal/service"
+ "backupx/server/pkg/response"
+
+ "github.com/gin-gonic/gin"
+)
+
+// ApiKeyHandler 管理 API Key(admin 专属)。
+type ApiKeyHandler struct {
+ service *service.ApiKeyService
+ auditService *service.AuditService
+}
+
+func NewApiKeyHandler(apiKeyService *service.ApiKeyService, auditService *service.AuditService) *ApiKeyHandler {
+ return &ApiKeyHandler{service: apiKeyService, auditService: auditService}
+}
+
+func (h *ApiKeyHandler) List(c *gin.Context) {
+ items, err := h.service.List(c.Request.Context())
+ if err != nil {
+ response.Error(c, err)
+ return
+ }
+ response.Success(c, items)
+}
+
+func (h *ApiKeyHandler) Create(c *gin.Context) {
+ var input service.ApiKeyCreateInput
+ if err := c.ShouldBindJSON(&input); err != nil {
+ response.Error(c, apperror.BadRequest("API_KEY_INVALID", "API Key 参数不合法", err))
+ return
+ }
+ creator := ""
+ if username, exists := c.Get(contextUsernameKey); exists {
+ if v, ok := username.(string); ok {
+ creator = v
+ }
+ }
+ result, err := h.service.Create(c.Request.Context(), creator, input)
+ if err != nil {
+ response.Error(c, err)
+ return
+ }
+ recordAudit(c, h.auditService, "api_key", "create", "api_key", fmt.Sprintf("%d", result.ApiKey.ID), result.ApiKey.Name,
+ fmt.Sprintf("创建 API Key: %s (角色: %s)", result.ApiKey.Name, result.ApiKey.Role))
+ response.Success(c, result)
+}
+
+func (h *ApiKeyHandler) Revoke(c *gin.Context) {
+ id, ok := parseUintParam(c, "id")
+ if !ok {
+ return
+ }
+ if err := h.service.Revoke(c.Request.Context(), id); err != nil {
+ response.Error(c, err)
+ return
+ }
+ recordAudit(c, h.auditService, "api_key", "revoke", "api_key", fmt.Sprintf("%d", id), "",
+ fmt.Sprintf("撤销 API Key (ID: %d)", id))
+ response.Success(c, gin.H{"revoked": true})
+}
+
+func (h *ApiKeyHandler) Toggle(c *gin.Context) {
+ id, ok := parseUintParam(c, "id")
+ if !ok {
+ return
+ }
+ var input struct {
+ Disabled bool `json:"disabled"`
+ }
+ if err := c.ShouldBindJSON(&input); err != nil {
+ response.Error(c, apperror.BadRequest("API_KEY_INVALID", "参数不合法", err))
+ return
+ }
+ if err := h.service.ToggleDisabled(c.Request.Context(), id, input.Disabled); err != nil {
+ response.Error(c, err)
+ return
+ }
+ action := "enable"
+ label := "启用"
+ if input.Disabled {
+ action = "disable"
+ label = "停用"
+ }
+ recordAudit(c, h.auditService, "api_key", action, "api_key", fmt.Sprintf("%d", id), "",
+ fmt.Sprintf("%s API Key (ID: %d)", label, id))
+ response.Success(c, gin.H{"disabled": input.Disabled})
+}
diff --git a/server/internal/http/audit_handler.go b/server/internal/http/audit_handler.go
index b2670ea..24676fb 100644
--- a/server/internal/http/audit_handler.go
+++ b/server/internal/http/audit_handler.go
@@ -1,11 +1,18 @@
package http
import (
+ "encoding/csv"
+ "fmt"
+ stdhttp "net/http"
"strconv"
"strings"
+ "time"
+ "backupx/server/internal/apperror"
+ "backupx/server/internal/repository"
"backupx/server/internal/service"
"backupx/server/pkg/response"
+
"github.com/gin-gonic/gin"
)
@@ -17,24 +24,97 @@ func NewAuditHandler(auditService *service.AuditService) *AuditHandler {
return &AuditHandler{auditService: auditService}
}
+// List 多字段筛选分页查询审计日志。
+// 支持参数:category, action, username, targetId, keyword, dateFrom, dateTo, limit, offset。
+// 向后兼容:若仅传 category + limit + offset,行为与旧版一致。
func (h *AuditHandler) List(c *gin.Context) {
- category := strings.TrimSpace(c.Query("category"))
- limit := 50
- offset := 0
- if v := strings.TrimSpace(c.Query("limit")); v != "" {
- if parsed, err := strconv.Atoi(v); err == nil && parsed > 0 {
- limit = parsed
- }
+ opts, err := parseAuditFilter(c)
+ if err != nil {
+ response.Error(c, err)
+ return
}
- if v := strings.TrimSpace(c.Query("offset")); v != "" {
- if parsed, err := strconv.Atoi(v); err == nil && parsed >= 0 {
- offset = parsed
- }
- }
- result, err := h.auditService.List(c.Request.Context(), category, limit, offset)
+ result, err := h.auditService.ListAdvanced(c.Request.Context(), opts)
if err != nil {
response.Error(c, err)
return
}
response.Success(c, result)
}
+
+// Export 导出 CSV。同筛选参数,最多 10000 行。
+// 文件名带时间戳避免浏览器缓存覆盖。
+func (h *AuditHandler) Export(c *gin.Context) {
+ opts, err := parseAuditFilter(c)
+ if err != nil {
+ response.Error(c, err)
+ return
+ }
+ // 导出不分页:覆盖掉 List 的默认 limit
+ opts.Limit = 0
+ opts.Offset = 0
+ items, err := h.auditService.ExportAll(c.Request.Context(), opts)
+ if err != nil {
+ response.Error(c, err)
+ return
+ }
+ filename := fmt.Sprintf("backupx-audit-%s.csv", time.Now().UTC().Format("20060102-150405"))
+ c.Header("Content-Type", "text/csv; charset=utf-8")
+ c.Header("Content-Disposition", fmt.Sprintf("attachment; filename=%q", filename))
+ // UTF-8 BOM 让 Excel 正确识别中文
+ _, _ = c.Writer.Write([]byte{0xEF, 0xBB, 0xBF})
+ writer := csv.NewWriter(c.Writer)
+ _ = writer.Write([]string{"时间", "用户", "类别", "动作", "目标类型", "目标 ID", "目标名", "详情", "客户端 IP"})
+ for _, item := range items {
+ _ = writer.Write([]string{
+ item.CreatedAt.UTC().Format(time.RFC3339),
+ item.Username,
+ item.Category,
+ item.Action,
+ item.TargetType,
+ item.TargetID,
+ item.TargetName,
+ item.Detail,
+ item.ClientIP,
+ })
+ }
+ writer.Flush()
+ if err := writer.Error(); err != nil {
+ c.Writer.WriteHeader(stdhttp.StatusInternalServerError)
+ }
+}
+
+// parseAuditFilter 解析查询参数为 repository 选项。
+func parseAuditFilter(c *gin.Context) (repository.AuditLogListOptions, error) {
+ opts := repository.AuditLogListOptions{
+ Category: strings.TrimSpace(c.Query("category")),
+ Action: strings.TrimSpace(c.Query("action")),
+ Username: strings.TrimSpace(c.Query("username")),
+ TargetID: strings.TrimSpace(c.Query("targetId")),
+ Keyword: strings.TrimSpace(c.Query("keyword")),
+ }
+ if v := strings.TrimSpace(c.Query("limit")); v != "" {
+ if n, err := strconv.Atoi(v); err == nil && n > 0 {
+ opts.Limit = n
+ }
+ }
+ if v := strings.TrimSpace(c.Query("offset")); v != "" {
+ if n, err := strconv.Atoi(v); err == nil && n >= 0 {
+ opts.Offset = n
+ }
+ }
+ if v := strings.TrimSpace(c.Query("dateFrom")); v != "" {
+ parsed, err := time.Parse(time.RFC3339, v)
+ if err != nil {
+ return opts, apperror.BadRequest("AUDIT_FILTER_INVALID", "dateFrom 必须为 RFC3339 时间格式", err)
+ }
+ opts.DateFrom = &parsed
+ }
+ if v := strings.TrimSpace(c.Query("dateTo")); v != "" {
+ parsed, err := time.Parse(time.RFC3339, v)
+ if err != nil {
+ return opts, apperror.BadRequest("AUDIT_FILTER_INVALID", "dateTo 必须为 RFC3339 时间格式", err)
+ }
+ opts.DateTo = &parsed
+ }
+ return opts, nil
+}
diff --git a/server/internal/http/backup_record_handler.go b/server/internal/http/backup_record_handler.go
index a312125..4230559 100644
--- a/server/internal/http/backup_record_handler.go
+++ b/server/internal/http/backup_record_handler.go
@@ -16,12 +16,13 @@ import (
)
type BackupRecordHandler struct {
- service *service.BackupRecordService
- auditService *service.AuditService
+ service *service.BackupRecordService
+ restoreService *service.RestoreService
+ auditService *service.AuditService
}
-func NewBackupRecordHandler(recordService *service.BackupRecordService, auditService *service.AuditService) *BackupRecordHandler {
- return &BackupRecordHandler{service: recordService, auditService: auditService}
+func NewBackupRecordHandler(recordService *service.BackupRecordService, restoreService *service.RestoreService, auditService *service.AuditService) *BackupRecordHandler {
+ return &BackupRecordHandler{service: recordService, restoreService: restoreService, auditService: auditService}
}
func (h *BackupRecordHandler) List(c *gin.Context) {
@@ -121,18 +122,29 @@ func (h *BackupRecordHandler) Download(c *gin.Context) {
_, _ = io.Copy(c.Writer, result.Reader)
}
+// Restore 启动一次异步恢复并返回 restoreRecordId;实际执行路由由 RestoreService
+// 根据 task.NodeID 决定(本地 Master or 远程 Agent)。
func (h *BackupRecordHandler) Restore(c *gin.Context) {
id, ok := parseUintParam(c, "id")
if !ok {
return
}
- if err := h.service.Restore(c.Request.Context(), id); err != nil {
+ if h.restoreService == nil {
+ response.Error(c, apperror.Internal("RESTORE_SERVICE_DISABLED", "恢复服务未启用", nil))
+ return
+ }
+ triggeredBy := ""
+ if subject, exists := c.Get(contextUserSubjectKey); exists {
+ triggeredBy = strings.TrimSpace(fmt.Sprintf("%v", subject))
+ }
+ detail, err := h.restoreService.Start(c.Request.Context(), id, triggeredBy)
+ if err != nil {
response.Error(c, err)
return
}
recordAudit(c, h.auditService, "backup_record", "restore", "backup_record", fmt.Sprintf("%d", id), "",
- fmt.Sprintf("恢复备份记录 (ID: %d)", id))
- response.Success(c, gin.H{"restored": true})
+ fmt.Sprintf("启动恢复 (备份记录 ID: %d, 恢复记录 ID: %d)", id, detail.ID))
+ response.Success(c, detail)
}
func (h *BackupRecordHandler) Delete(c *gin.Context) {
diff --git a/server/internal/http/backup_run_handler.go b/server/internal/http/backup_run_handler.go
index ce46598..0a0e318 100644
--- a/server/internal/http/backup_run_handler.go
+++ b/server/internal/http/backup_run_handler.go
@@ -3,6 +3,7 @@ package http
import (
"fmt"
+ "backupx/server/internal/apperror"
"backupx/server/internal/service"
"backupx/server/pkg/response"
"github.com/gin-gonic/gin"
@@ -30,3 +31,37 @@ func (h *BackupRunHandler) Run(c *gin.Context) {
recordAudit(c, h.auditService, "backup_task", "run", "backup_task", fmt.Sprintf("%d", id), "", "手动触发备份")
response.Success(c, record)
}
+
+// BatchRun 批量触发备份任务。best-effort:单个失败不影响其他。
+// Body: {"ids": [1,2,3]}
+func (h *BackupRunHandler) BatchRun(c *gin.Context) {
+ var input struct {
+ IDs []uint `json:"ids" binding:"required,min=1"`
+ }
+ if err := c.ShouldBindJSON(&input); err != nil {
+ response.Error(c, apperror.BadRequest("BACKUP_TASK_BATCH_INVALID", "批量执行参数不合法", err))
+ return
+ }
+ results := make([]service.BatchResult, 0, len(input.IDs))
+ succ := 0
+ for _, id := range input.IDs {
+ if id == 0 {
+ continue
+ }
+ _, err := h.service.RunTaskByID(c.Request.Context(), id)
+ item := service.BatchResult{ID: id, Success: err == nil}
+ if err != nil {
+ if appErr, ok := err.(*apperror.AppError); ok {
+ item.Error = appErr.Message
+ } else {
+ item.Error = err.Error()
+ }
+ } else {
+ succ++
+ }
+ results = append(results, item)
+ }
+ recordAudit(c, h.auditService, "backup_task", "batch_run", "backup_task", "", "",
+ fmt.Sprintf("批量触发备份 %d/%d", succ, len(results)))
+ response.Success(c, results)
+}
diff --git a/server/internal/http/backup_task_handler.go b/server/internal/http/backup_task_handler.go
index 487a022..3254835 100644
--- a/server/internal/http/backup_task_handler.go
+++ b/server/internal/http/backup_task_handler.go
@@ -40,6 +40,16 @@ func (h *BackupTaskHandler) List(c *gin.Context) {
response.Success(c, items)
}
+// ListTags 返回系统内所有任务用过的唯一标签列表,供前端标签选择器的建议词。
+func (h *BackupTaskHandler) ListTags(c *gin.Context) {
+ tags, err := h.service.ListTags(c.Request.Context())
+ if err != nil {
+ response.Error(c, err)
+ return
+ }
+ response.Success(c, tags)
+}
+
func (h *BackupTaskHandler) Get(c *gin.Context) {
id, ok := parseUintParam(c, "id")
if !ok {
@@ -106,6 +116,55 @@ func (h *BackupTaskHandler) Delete(c *gin.Context) {
response.Success(c, gin.H{"deleted": true})
}
+// BatchToggle / BatchDelete 批量操作。
+// Body: {"ids": [1,2,3], "enabled": true} (enabled 仅 toggle 用)
+func (h *BackupTaskHandler) BatchToggle(c *gin.Context) {
+ var input struct {
+ IDs []uint `json:"ids" binding:"required,min=1"`
+ Enabled bool `json:"enabled"`
+ }
+ if err := c.ShouldBindJSON(&input); err != nil {
+ response.Error(c, apperror.BadRequest("BACKUP_TASK_BATCH_INVALID", "批量操作参数不合法", err))
+ return
+ }
+ results := h.service.BatchToggle(c.Request.Context(), input.IDs, input.Enabled)
+ succ := 0
+ for _, r := range results {
+ if r.Success {
+ succ++
+ }
+ }
+ action := "batch_enable"
+ label := "启用"
+ if !input.Enabled {
+ action = "batch_disable"
+ label = "停用"
+ }
+ recordAudit(c, h.auditService, "backup_task", action, "backup_task", "", "",
+ fmt.Sprintf("批量%s %d/%d 个任务", label, succ, len(results)))
+ response.Success(c, results)
+}
+
+func (h *BackupTaskHandler) BatchDelete(c *gin.Context) {
+ var input struct {
+ IDs []uint `json:"ids" binding:"required,min=1"`
+ }
+ if err := c.ShouldBindJSON(&input); err != nil {
+ response.Error(c, apperror.BadRequest("BACKUP_TASK_BATCH_INVALID", "批量删除参数不合法", err))
+ return
+ }
+ results := h.service.BatchDeleteTasks(c.Request.Context(), input.IDs)
+ succ := 0
+ for _, r := range results {
+ if r.Success {
+ succ++
+ }
+ }
+ recordAudit(c, h.auditService, "backup_task", "batch_delete", "backup_task", "", "",
+ fmt.Sprintf("批量删除 %d/%d 个任务", succ, len(results)))
+ response.Success(c, results)
+}
+
func (h *BackupTaskHandler) Toggle(c *gin.Context) {
id, ok := parseUintParam(c, "id")
if !ok {
diff --git a/server/internal/http/context.go b/server/internal/http/context.go
index 4acbd2d..2eec687 100644
--- a/server/internal/http/context.go
+++ b/server/internal/http/context.go
@@ -1,3 +1,9 @@
package http
-const contextUserSubjectKey = "userSubject"
+const (
+ contextUserSubjectKey = "userSubject"
+ contextUserRoleKey = "userRole"
+ contextUsernameKey = "username"
+ // contextAuthSubjectKey 标识认证主体来源(user | api_key),便于审计追踪。
+ contextAuthSubjectKey = "authSubject"
+)
diff --git a/server/internal/http/dashboard_handler.go b/server/internal/http/dashboard_handler.go
index d95eed3..86ac362 100644
--- a/server/internal/http/dashboard_handler.go
+++ b/server/internal/http/dashboard_handler.go
@@ -27,6 +27,58 @@ func (h *DashboardHandler) Stats(c *gin.Context) {
response.Success(c, payload)
}
+// SLA 返回所有启用任务的 SLA 合规视图。用于 Dashboard 企业合规卡片。
+func (h *DashboardHandler) SLA(c *gin.Context) {
+ payload, err := h.service.SLACompliance(c.Request.Context())
+ if err != nil {
+ response.Error(c, err)
+ return
+ }
+ response.Success(c, payload)
+}
+
+// Cluster 返回集群节点概览(在线/离线/过期 Agent 等),用于 Dashboard 卡片。
+func (h *DashboardHandler) Cluster(c *gin.Context) {
+ payload, err := h.service.ClusterOverview(c.Request.Context())
+ if err != nil {
+ response.Error(c, err)
+ return
+ }
+ response.Success(c, payload)
+}
+
+// NodePerformance 返回各节点近 N 天的执行表现(成功率/字节数/平均耗时)。
+func (h *DashboardHandler) NodePerformance(c *gin.Context) {
+ days := 30
+ if v := strings.TrimSpace(c.Query("days")); v != "" {
+ if parsed, err := strconv.Atoi(v); err == nil && parsed > 0 {
+ days = parsed
+ }
+ }
+ payload, err := h.service.NodePerformance(c.Request.Context(), days)
+ if err != nil {
+ response.Error(c, err)
+ return
+ }
+ response.Success(c, payload)
+}
+
+// Breakdown 返回按类型/状态/节点/存储分组的统计。
+func (h *DashboardHandler) Breakdown(c *gin.Context) {
+ days := 30
+ if v := strings.TrimSpace(c.Query("days")); v != "" {
+ if parsed, err := strconv.Atoi(v); err == nil && parsed > 0 {
+ days = parsed
+ }
+ }
+ payload, err := h.service.Breakdown(c.Request.Context(), days)
+ if err != nil {
+ response.Error(c, err)
+ return
+ }
+ response.Success(c, payload)
+}
+
func (h *DashboardHandler) Timeline(c *gin.Context) {
days := 30
if value := strings.TrimSpace(c.Query("days")); value != "" {
diff --git a/server/internal/http/events_handler.go b/server/internal/http/events_handler.go
new file mode 100644
index 0000000..1dce922
--- /dev/null
+++ b/server/internal/http/events_handler.go
@@ -0,0 +1,81 @@
+package http
+
+import (
+ "encoding/json"
+ "fmt"
+ "io"
+ "time"
+
+ "backupx/server/internal/apperror"
+ "backupx/server/internal/service"
+ "backupx/server/pkg/response"
+
+ "github.com/gin-gonic/gin"
+)
+
+// EventsHandler 实时事件推送(SSE)。
+// 前端通过 EventSource 订阅 /api/events/stream,实时接收系统事件,
+// 用于 Dashboard 免刷新更新 / 桌面 Toast / 实时告警。
+type EventsHandler struct {
+ broadcaster *service.EventBroadcaster
+}
+
+func NewEventsHandler(broadcaster *service.EventBroadcaster) *EventsHandler {
+ return &EventsHandler{broadcaster: broadcaster}
+}
+
+// Stream SSE 长连接。JWT/API Key 中间件之后。
+// 心跳:每 25s 发一条 comment 行(: keepalive)保持连接不被代理断开。
+func (h *EventsHandler) Stream(c *gin.Context) {
+ if h.broadcaster == nil {
+ response.Error(c, apperror.Internal("EVENTS_DISABLED", "事件广播器未启用", nil))
+ return
+ }
+ c.Writer.Header().Set("Content-Type", "text/event-stream")
+ c.Writer.Header().Set("Cache-Control", "no-cache")
+ c.Writer.Header().Set("Connection", "keep-alive")
+ c.Writer.Header().Set("X-Accel-Buffering", "no") // 禁用 nginx 缓冲
+ flusher, ok := c.Writer.(interface{ Flush() })
+ if !ok {
+ response.Error(c, apperror.Internal("EVENTS_STREAM_UNSUPPORTED", "当前连接不支持 SSE", nil))
+ return
+ }
+ // 首先发送一次 hello 让客户端确认连通
+ _, _ = fmt.Fprintf(c.Writer, ": connected %d\n\n", time.Now().Unix())
+ flusher.Flush()
+
+ ch, cancel := h.broadcaster.Subscribe(32)
+ defer cancel()
+
+ heartbeat := time.NewTicker(25 * time.Second)
+ defer heartbeat.Stop()
+
+ for {
+ select {
+ case <-c.Request.Context().Done():
+ return
+ case <-heartbeat.C:
+ if _, err := fmt.Fprintf(c.Writer, ": heartbeat %d\n\n", time.Now().Unix()); err != nil {
+ return
+ }
+ flusher.Flush()
+ case envelope, ok := <-ch:
+ if !ok {
+ return
+ }
+ if err := writeEventEnvelope(c.Writer, envelope); err != nil {
+ return
+ }
+ flusher.Flush()
+ }
+ }
+}
+
+func writeEventEnvelope(writer io.Writer, envelope service.EventEnvelope) error {
+ data, err := json.Marshal(envelope)
+ if err != nil {
+ return err
+ }
+ _, err = fmt.Fprintf(writer, "event: %s\ndata: %s\n\n", envelope.Type, data)
+ return err
+}
diff --git a/server/internal/http/health_handler.go b/server/internal/http/health_handler.go
new file mode 100644
index 0000000..430c483
--- /dev/null
+++ b/server/internal/http/health_handler.go
@@ -0,0 +1,75 @@
+package http
+
+import (
+ stdhttp "net/http"
+ "time"
+
+ "github.com/gin-gonic/gin"
+ "gorm.io/gorm"
+)
+
+// HealthHandler 提供 K8s/Swarm 风格的健康检查端点。
+//
+// - /health :liveness 探针。进程存活即 200(不检查任何依赖)。
+// - /ready :readiness 探针。检查数据库连通,不通则返回 503。
+//
+// 两者均为公开端点(无认证中间件),供外部编排系统探测。
+// 输出最少信息,避免泄露内部结构。
+type HealthHandler struct {
+ db *gorm.DB
+ startedAt time.Time
+ version string
+}
+
+func NewHealthHandler(db *gorm.DB, version string) *HealthHandler {
+ return &HealthHandler{db: db, startedAt: time.Now().UTC(), version: version}
+}
+
+// Live 用于 liveness:只要进程能响应就返回 200。
+func (h *HealthHandler) Live(c *gin.Context) {
+ c.JSON(stdhttp.StatusOK, gin.H{
+ "status": "live",
+ "version": h.version,
+ "uptime": int(time.Since(h.startedAt).Seconds()),
+ "timestamp": time.Now().UTC().Format(time.RFC3339),
+ })
+}
+
+// Ready 用于 readiness:依赖(数据库)不可用时返回 503。
+// 新实例启动或数据库短暂失联时,编排系统据此停止转发流量。
+func (h *HealthHandler) Ready(c *gin.Context) {
+ checks := map[string]string{}
+ overallOK := true
+ if h.db != nil {
+ sqlDB, err := h.db.DB()
+ if err != nil {
+ checks["database"] = "error: " + err.Error()
+ overallOK = false
+ } else {
+ ctx, cancel := c.Request.Context(), func() {}
+ _ = cancel
+ if err := sqlDB.PingContext(ctx); err != nil {
+ checks["database"] = "ping failed: " + err.Error()
+ overallOK = false
+ } else {
+ checks["database"] = "ok"
+ }
+ }
+ } else {
+ checks["database"] = "not configured"
+ overallOK = false
+ }
+ status := stdhttp.StatusOK
+ state := "ready"
+ if !overallOK {
+ status = stdhttp.StatusServiceUnavailable
+ state = "not_ready"
+ }
+ c.JSON(status, gin.H{
+ "status": state,
+ "version": h.version,
+ "uptime": int(time.Since(h.startedAt).Seconds()),
+ "checks": checks,
+ "timestamp": time.Now().UTC().Format(time.RFC3339),
+ })
+}
diff --git a/server/internal/http/middleware.go b/server/internal/http/middleware.go
index 9df1d64..7e79a0b 100644
--- a/server/internal/http/middleware.go
+++ b/server/internal/http/middleware.go
@@ -1,6 +1,7 @@
package http
import (
+ "context"
stdhttp "net/http"
"strings"
@@ -26,28 +27,94 @@ func CORSMiddleware() gin.HandlerFunc {
}
}
-func AuthMiddleware(jwtManager *security.JWTManager) gin.HandlerFunc {
+// ApiKeyAuthenticator 抽象 API Key 验证能力,避免 middleware 直接依赖 service 包。
+// 实现方:service.ApiKeyService。未注入时 AuthMiddleware 仍然支持 JWT。
+type ApiKeyAuthenticator interface {
+ Authenticate(ctx context.Context, rawKey string) (subject string, role string, err error)
+}
+
+// AuthMiddleware 支持两种认证方式:
+// - JWT (Authorization: Bearer ):交互式用户
+// - API Key (Authorization: Bearer bax_xxx 或 X-Api-Key: bax_xxx):第三方脚本
+//
+// JWT 会在 context 中写入 userSubject / userRole / username;
+// API Key 会写入 authSubject=api_key: / userRole=。
+func AuthMiddleware(jwtManager *security.JWTManager, apiKeyAuth ApiKeyAuthenticator) gin.HandlerFunc {
return func(c *gin.Context) {
- header := strings.TrimSpace(c.GetHeader("Authorization"))
- if !strings.HasPrefix(header, "Bearer ") {
+ rawToken := extractAuthToken(c)
+ if rawToken == "" {
response.Error(c, apperror.Unauthorized("AUTH_REQUIRED", "请先登录", nil))
c.Abort()
return
}
-
- tokenString := strings.TrimSpace(strings.TrimPrefix(header, "Bearer "))
- claims, err := jwtManager.Parse(tokenString)
+ if apiKeyAuth != nil && strings.HasPrefix(rawToken, "bax_") {
+ subject, role, err := apiKeyAuth.Authenticate(c.Request.Context(), rawToken)
+ if err != nil {
+ response.Error(c, err)
+ c.Abort()
+ return
+ }
+ c.Set(contextAuthSubjectKey, subject)
+ c.Set(contextUserRoleKey, role)
+ c.Set(contextUserSubjectKey, subject)
+ c.Set(contextUsernameKey, subject)
+ c.Next()
+ return
+ }
+ claims, err := jwtManager.Parse(rawToken)
if err != nil {
response.Error(c, apperror.Unauthorized("AUTH_INVALID_TOKEN", "登录状态已失效,请重新登录", err))
c.Abort()
return
}
-
c.Set(contextUserSubjectKey, claims.Subject)
+ c.Set(contextUserRoleKey, claims.Role)
+ c.Set(contextUsernameKey, claims.Username)
+ c.Set(contextAuthSubjectKey, "user:"+claims.Subject)
c.Next()
}
}
+// extractAuthToken 从 Authorization: Bearer 或 X-Api-Key 中提取原始 token。
+func extractAuthToken(c *gin.Context) string {
+ header := strings.TrimSpace(c.GetHeader("Authorization"))
+ if strings.HasPrefix(header, "Bearer ") {
+ return strings.TrimSpace(strings.TrimPrefix(header, "Bearer "))
+ }
+ if key := strings.TrimSpace(c.GetHeader("X-Api-Key")); key != "" {
+ return key
+ }
+ return ""
+}
+
+// RequireRole 仅放行指定角色,否则返回 403。
+// 必须用在 AuthMiddleware 之后。viewer 只读保护、admin 管理端都靠它。
+func RequireRole(roles ...string) gin.HandlerFunc {
+ allowed := make(map[string]bool, len(roles))
+ for _, r := range roles {
+ allowed[strings.ToLower(r)] = true
+ }
+ return func(c *gin.Context) {
+ role, _ := c.Get(contextUserRoleKey)
+ roleStr := ""
+ if v, ok := role.(string); ok {
+ roleStr = strings.ToLower(v)
+ }
+ if !allowed[roleStr] {
+ response.Error(c, apperror.New(403, "AUTH_FORBIDDEN", "当前角色无权执行此操作", nil))
+ c.Abort()
+ return
+ }
+ c.Next()
+ }
+}
+
+// RequireNotViewer 是 RequireRole(admin, operator) 的快捷方式,
+// 用于任何"写入/变更"类端点,禁止 viewer 触发。
+func RequireNotViewer() gin.HandlerFunc {
+ return RequireRole("admin", "operator")
+}
+
func ClientKey(c *gin.Context) string {
ip := strings.TrimSpace(c.ClientIP())
if ip == "" {
diff --git a/server/internal/http/replication_handler.go b/server/internal/http/replication_handler.go
new file mode 100644
index 0000000..44ac3f1
--- /dev/null
+++ b/server/internal/http/replication_handler.go
@@ -0,0 +1,128 @@
+package http
+
+import (
+ "fmt"
+ "strconv"
+ "strings"
+ "time"
+
+ "backupx/server/internal/apperror"
+ "backupx/server/internal/service"
+ "backupx/server/pkg/response"
+
+ "github.com/gin-gonic/gin"
+)
+
+// ReplicationHandler 管理备份复制记录列表 + 手动触发。
+type ReplicationHandler struct {
+ service *service.ReplicationService
+ auditService *service.AuditService
+}
+
+func NewReplicationHandler(replicationService *service.ReplicationService, auditService *service.AuditService) *ReplicationHandler {
+ return &ReplicationHandler{service: replicationService, auditService: auditService}
+}
+
+// TriggerByRecord 手动触发:从备份记录复制到指定目标存储。
+// Body: {"destTargetId": 12}
+func (h *ReplicationHandler) TriggerByRecord(c *gin.Context) {
+ recordID, ok := parseUintParam(c, "id")
+ if !ok {
+ return
+ }
+ var input struct {
+ DestTargetID uint `json:"destTargetId" binding:"required,min=1"`
+ }
+ if err := c.ShouldBindJSON(&input); err != nil {
+ response.Error(c, apperror.BadRequest("REPLICATION_INVALID", "复制参数不合法", err))
+ return
+ }
+ triggeredBy := ""
+ if subject, exists := c.Get(contextUsernameKey); exists {
+ if v, ok := subject.(string); ok {
+ triggeredBy = v
+ }
+ }
+ if triggeredBy == "" {
+ triggeredBy = "manual"
+ }
+ result, err := h.service.Start(c.Request.Context(), recordID, input.DestTargetID, triggeredBy)
+ if err != nil {
+ response.Error(c, err)
+ return
+ }
+ recordAudit(c, h.auditService, "replication", "manual_run", "backup_record", fmt.Sprintf("%d", recordID), "",
+ fmt.Sprintf("手动触发复制(备份记录 #%d → 存储 #%d, 复制记录 #%d)", recordID, input.DestTargetID, result.ID))
+ response.Success(c, result)
+}
+
+func (h *ReplicationHandler) List(c *gin.Context) {
+ filter, err := buildReplicationFilter(c)
+ if err != nil {
+ response.Error(c, err)
+ return
+ }
+ items, err := h.service.List(c.Request.Context(), filter)
+ if err != nil {
+ response.Error(c, err)
+ return
+ }
+ response.Success(c, items)
+}
+
+func (h *ReplicationHandler) Get(c *gin.Context) {
+ id, ok := parseUintParam(c, "id")
+ if !ok {
+ return
+ }
+ item, err := h.service.Get(c.Request.Context(), id)
+ if err != nil {
+ response.Error(c, err)
+ return
+ }
+ response.Success(c, item)
+}
+
+func buildReplicationFilter(c *gin.Context) (service.ReplicationRecordListInput, error) {
+ var filter service.ReplicationRecordListInput
+ if v := strings.TrimSpace(c.Query("taskId")); v != "" {
+ parsed, err := strconv.ParseUint(v, 10, 32)
+ if err != nil {
+ return filter, apperror.BadRequest("REPLICATION_FILTER_INVALID", "taskId 不合法", err)
+ }
+ id := uint(parsed)
+ filter.TaskID = &id
+ }
+ if v := strings.TrimSpace(c.Query("backupRecordId")); v != "" {
+ parsed, err := strconv.ParseUint(v, 10, 32)
+ if err != nil {
+ return filter, apperror.BadRequest("REPLICATION_FILTER_INVALID", "backupRecordId 不合法", err)
+ }
+ id := uint(parsed)
+ filter.BackupRecordID = &id
+ }
+ if v := strings.TrimSpace(c.Query("destTargetId")); v != "" {
+ parsed, err := strconv.ParseUint(v, 10, 32)
+ if err != nil {
+ return filter, apperror.BadRequest("REPLICATION_FILTER_INVALID", "destTargetId 不合法", err)
+ }
+ id := uint(parsed)
+ filter.DestTargetID = &id
+ }
+ filter.Status = strings.TrimSpace(c.Query("status"))
+ if v := strings.TrimSpace(c.Query("dateFrom")); v != "" {
+ parsed, err := time.Parse(time.RFC3339, v)
+ if err != nil {
+ return filter, apperror.BadRequest("REPLICATION_FILTER_INVALID", "dateFrom 必须为 RFC3339", err)
+ }
+ filter.DateFrom = &parsed
+ }
+ if v := strings.TrimSpace(c.Query("dateTo")); v != "" {
+ parsed, err := time.Parse(time.RFC3339, v)
+ if err != nil {
+ return filter, apperror.BadRequest("REPLICATION_FILTER_INVALID", "dateTo 必须为 RFC3339", err)
+ }
+ filter.DateTo = &parsed
+ }
+ return filter, nil
+}
diff --git a/server/internal/http/restore_record_handler.go b/server/internal/http/restore_record_handler.go
new file mode 100644
index 0000000..5d41c51
--- /dev/null
+++ b/server/internal/http/restore_record_handler.go
@@ -0,0 +1,162 @@
+package http
+
+import (
+ "encoding/json"
+ "fmt"
+ "io"
+ "strconv"
+ "strings"
+ "time"
+
+ "backupx/server/internal/apperror"
+ "backupx/server/internal/backup"
+ "backupx/server/internal/service"
+ "backupx/server/pkg/response"
+
+ "github.com/gin-gonic/gin"
+)
+
+// RestoreRecordHandler 提供恢复记录列表/详情/实时日志端点。
+// 创建恢复由 BackupRecordHandler.Restore 代理到 RestoreService.Start。
+type RestoreRecordHandler struct {
+ service *service.RestoreService
+ auditService *service.AuditService
+}
+
+func NewRestoreRecordHandler(restoreService *service.RestoreService, auditService *service.AuditService) *RestoreRecordHandler {
+ return &RestoreRecordHandler{service: restoreService, auditService: auditService}
+}
+
+func (h *RestoreRecordHandler) List(c *gin.Context) {
+ filter, err := buildRestoreFilter(c)
+ if err != nil {
+ response.Error(c, err)
+ return
+ }
+ items, err := h.service.List(c.Request.Context(), filter)
+ if err != nil {
+ response.Error(c, err)
+ return
+ }
+ response.Success(c, items)
+}
+
+func (h *RestoreRecordHandler) Get(c *gin.Context) {
+ id, ok := parseUintParam(c, "id")
+ if !ok {
+ return
+ }
+ item, err := h.service.Get(c.Request.Context(), id)
+ if err != nil {
+ response.Error(c, err)
+ return
+ }
+ response.Success(c, item)
+}
+
+func (h *RestoreRecordHandler) StreamLogs(c *gin.Context) {
+ id, ok := parseUintParam(c, "id")
+ if !ok {
+ return
+ }
+ detail, err := h.service.Get(c.Request.Context(), id)
+ if err != nil {
+ response.Error(c, err)
+ return
+ }
+ events := detail.LogEvents
+ completed := detail.Status != "running"
+ channel, cancel, err := h.service.SubscribeLogs(c.Request.Context(), id, 64)
+ if err != nil {
+ response.Error(c, err)
+ return
+ }
+ defer cancel()
+ c.Writer.Header().Set("Content-Type", "text/event-stream")
+ c.Writer.Header().Set("Cache-Control", "no-cache")
+ c.Writer.Header().Set("Connection", "keep-alive")
+ flusher, ok := c.Writer.(interface{ Flush() })
+ if !ok {
+ response.Error(c, apperror.Internal("RESTORE_STREAM_UNSUPPORTED", "当前连接不支持日志流", nil))
+ return
+ }
+ for _, event := range events {
+ if err := writeRestoreSSEEvent(c.Writer, event); err != nil {
+ return
+ }
+ flusher.Flush()
+ }
+ if completed {
+ return
+ }
+ for {
+ select {
+ case <-c.Request.Context().Done():
+ return
+ case event, ok := <-channel:
+ if !ok {
+ return
+ }
+ if err := writeRestoreSSEEvent(c.Writer, event); err != nil {
+ return
+ }
+ flusher.Flush()
+ if event.Completed {
+ return
+ }
+ }
+ }
+}
+
+func buildRestoreFilter(c *gin.Context) (service.RestoreRecordListInput, error) {
+ var filter service.RestoreRecordListInput
+ if taskIDValue := strings.TrimSpace(c.Query("taskId")); taskIDValue != "" {
+ parsed, err := strconv.ParseUint(taskIDValue, 10, 32)
+ if err != nil {
+ return filter, apperror.BadRequest("RESTORE_RECORD_FILTER_INVALID", "taskId 不合法", err)
+ }
+ v := uint(parsed)
+ filter.TaskID = &v
+ }
+ if backupValue := strings.TrimSpace(c.Query("backupRecordId")); backupValue != "" {
+ parsed, err := strconv.ParseUint(backupValue, 10, 32)
+ if err != nil {
+ return filter, apperror.BadRequest("RESTORE_RECORD_FILTER_INVALID", "backupRecordId 不合法", err)
+ }
+ v := uint(parsed)
+ filter.BackupRecordID = &v
+ }
+ if nodeValue := strings.TrimSpace(c.Query("nodeId")); nodeValue != "" {
+ parsed, err := strconv.ParseUint(nodeValue, 10, 32)
+ if err != nil {
+ return filter, apperror.BadRequest("RESTORE_RECORD_FILTER_INVALID", "nodeId 不合法", err)
+ }
+ v := uint(parsed)
+ filter.NodeID = &v
+ }
+ filter.Status = strings.TrimSpace(c.Query("status"))
+ if dateFrom := strings.TrimSpace(c.Query("dateFrom")); dateFrom != "" {
+ parsed, err := time.Parse(time.RFC3339, dateFrom)
+ if err != nil {
+ return filter, apperror.BadRequest("RESTORE_RECORD_FILTER_INVALID", "dateFrom 必须为 RFC3339 时间格式", err)
+ }
+ filter.DateFrom = &parsed
+ }
+ if dateTo := strings.TrimSpace(c.Query("dateTo")); dateTo != "" {
+ parsed, err := time.Parse(time.RFC3339, dateTo)
+ if err != nil {
+ return filter, apperror.BadRequest("RESTORE_RECORD_FILTER_INVALID", "dateTo 必须为 RFC3339 时间格式", err)
+ }
+ filter.DateTo = &parsed
+ }
+ return filter, nil
+}
+
+func writeRestoreSSEEvent(writer io.Writer, event backup.LogEvent) error {
+ payload, err := json.Marshal(event)
+ if err != nil {
+ return err
+ }
+ _, err = fmt.Fprintf(writer, "event: log\ndata: %s\n\n", payload)
+ return err
+}
diff --git a/server/internal/http/router.go b/server/internal/http/router.go
index 51e0bf3..241f61c 100644
--- a/server/internal/http/router.go
+++ b/server/internal/http/router.go
@@ -13,6 +13,7 @@ import (
"backupx/server/pkg/response"
"github.com/gin-gonic/gin"
"go.uber.org/zap"
+ "gorm.io/gorm"
)
type RouterDependencies struct {
@@ -28,6 +29,15 @@ type RouterDependencies struct {
BackupTaskService *service.BackupTaskService
BackupExecutionService *service.BackupExecutionService
BackupRecordService *service.BackupRecordService
+ RestoreService *service.RestoreService
+ VerificationService *service.VerificationService
+ ReplicationService *service.ReplicationService
+ TaskTemplateService *service.TaskTemplateService
+ TaskExportService *service.TaskExportService
+ SearchService *service.SearchService
+ EventBroadcaster *service.EventBroadcaster
+ UserService *service.UserService
+ ApiKeyService *service.ApiKeyService
NotificationService *service.NotificationService
DashboardService *service.DashboardService
SettingsService *service.SettingsService
@@ -40,6 +50,8 @@ type RouterDependencies struct {
SystemConfigRepo repository.SystemConfigRepository
InstallTokenService *service.InstallTokenService
MasterExternalURL string
+ // DB 注入给健康检查端点做 liveness/readiness 探测。
+ DB *gorm.DB
}
func NewRouter(deps RouterDependencies) *gin.Engine {
@@ -54,7 +66,19 @@ func NewRouter(deps RouterDependencies) *gin.Engine {
storageTargetHandler := NewStorageTargetHandler(deps.StorageTargetService, deps.AuditService)
backupTaskHandler := NewBackupTaskHandler(deps.BackupTaskService, deps.AuditService)
backupRunHandler := NewBackupRunHandler(deps.BackupExecutionService, deps.AuditService)
- backupRecordHandler := NewBackupRecordHandler(deps.BackupRecordService, deps.AuditService)
+ backupRecordHandler := NewBackupRecordHandler(deps.BackupRecordService, deps.RestoreService, deps.AuditService)
+ restoreRecordHandler := NewRestoreRecordHandler(deps.RestoreService, deps.AuditService)
+ verificationHandler := NewVerificationHandler(deps.VerificationService, deps.AuditService)
+ replicationHandler := NewReplicationHandler(deps.ReplicationService, deps.AuditService)
+ taskTemplateHandler := NewTaskTemplateHandler(deps.TaskTemplateService, deps.AuditService)
+ userHandler := NewUserHandler(deps.UserService, deps.AuditService)
+ apiKeyHandler := NewApiKeyHandler(deps.ApiKeyService, deps.AuditService)
+ // apiKeyAuth:给 AuthMiddleware 注入 API Key 验证能力。
+ // 为 nil 时中间件仅支持 JWT,不影响向后兼容。
+ var apiKeyAuth ApiKeyAuthenticator
+ if deps.ApiKeyService != nil {
+ apiKeyAuth = deps.ApiKeyService
+ }
notificationHandler := NewNotificationHandler(deps.NotificationService)
dashboardHandler := NewDashboardHandler(deps.DashboardService)
settingsHandler := NewSettingsHandler(deps.SettingsService, deps.AuditService)
@@ -67,109 +91,207 @@ func NewRouter(deps RouterDependencies) *gin.Engine {
auth.GET("/setup/status", authHandler.SetupStatus)
auth.POST("/setup", authHandler.Setup)
auth.POST("/login", authHandler.Login)
- auth.POST("/logout", AuthMiddleware(deps.JWTManager), authHandler.Logout)
- auth.GET("/profile", AuthMiddleware(deps.JWTManager), authHandler.Profile)
- auth.PUT("/password", AuthMiddleware(deps.JWTManager), authHandler.ChangePassword)
+ auth.POST("/logout", AuthMiddleware(deps.JWTManager, apiKeyAuth), authHandler.Logout)
+ auth.GET("/profile", AuthMiddleware(deps.JWTManager, apiKeyAuth), authHandler.Profile)
+ auth.PUT("/password", AuthMiddleware(deps.JWTManager, apiKeyAuth), authHandler.ChangePassword)
}
system := api.Group("/system")
- system.Use(AuthMiddleware(deps.JWTManager))
+ system.Use(AuthMiddleware(deps.JWTManager, apiKeyAuth))
system.GET("/info", systemHandler.Info)
system.GET("/update-check", systemHandler.CheckUpdate)
storageTargets := api.Group("/storage-targets")
- storageTargets.Use(AuthMiddleware(deps.JWTManager))
+ storageTargets.Use(AuthMiddleware(deps.JWTManager, apiKeyAuth))
// 静态路由必须在参数路由 /:id 之前注册,避免 Gin 路由冲突
storageTargets.GET("", storageTargetHandler.List)
- storageTargets.POST("", storageTargetHandler.Create)
- storageTargets.POST("/test", storageTargetHandler.TestConnection)
- storageTargets.POST("/google-drive/auth-url", storageTargetHandler.StartGoogleDriveOAuth)
- storageTargets.POST("/google-drive/complete", storageTargetHandler.CompleteGoogleDriveOAuth)
+ storageTargets.POST("", RequireNotViewer(), storageTargetHandler.Create)
+ storageTargets.POST("/test", RequireNotViewer(), storageTargetHandler.TestConnection)
+ storageTargets.POST("/google-drive/auth-url", RequireNotViewer(), storageTargetHandler.StartGoogleDriveOAuth)
+ storageTargets.POST("/google-drive/complete", RequireNotViewer(), storageTargetHandler.CompleteGoogleDriveOAuth)
storageTargets.GET("/google-drive/callback", storageTargetHandler.HandleGoogleDriveCallback)
rcloneHandler := NewRcloneHandler()
storageTargets.GET("/rclone/backends", rcloneHandler.ListBackends)
// 参数路由
storageTargets.GET("/:id", storageTargetHandler.Get)
- storageTargets.PUT("/:id", storageTargetHandler.Update)
- storageTargets.DELETE("/:id", storageTargetHandler.Delete)
- storageTargets.PUT("/:id/star", storageTargetHandler.ToggleStar)
- storageTargets.POST("/:id/test", storageTargetHandler.TestSavedConnection)
+ storageTargets.PUT("/:id", RequireNotViewer(), storageTargetHandler.Update)
+ storageTargets.DELETE("/:id", RequireNotViewer(), storageTargetHandler.Delete)
+ storageTargets.PUT("/:id/star", RequireNotViewer(), storageTargetHandler.ToggleStar)
+ storageTargets.POST("/:id/test", RequireNotViewer(), storageTargetHandler.TestSavedConnection)
storageTargets.GET("/:id/usage", storageTargetHandler.GetUsage)
storageTargets.GET("/:id/google-drive/profile", storageTargetHandler.GoogleDriveProfile)
backupTasks := api.Group("/backup/tasks")
- backupTasks.Use(AuthMiddleware(deps.JWTManager))
+ backupTasks.Use(AuthMiddleware(deps.JWTManager, apiKeyAuth))
backupTasks.GET("", backupTaskHandler.List)
+ backupTasks.GET("/tags", backupTaskHandler.ListTags)
backupTasks.GET("/:id", backupTaskHandler.Get)
- backupTasks.POST("", backupTaskHandler.Create)
- backupTasks.PUT("/:id", backupTaskHandler.Update)
- backupTasks.DELETE("/:id", backupTaskHandler.Delete)
- backupTasks.PUT("/:id/toggle", backupTaskHandler.Toggle)
- backupTasks.POST("/:id/run", backupRunHandler.Run)
+ backupTasks.POST("", RequireNotViewer(), backupTaskHandler.Create)
+ backupTasks.PUT("/:id", RequireNotViewer(), backupTaskHandler.Update)
+ backupTasks.DELETE("/:id", RequireNotViewer(), backupTaskHandler.Delete)
+ backupTasks.PUT("/:id/toggle", RequireNotViewer(), backupTaskHandler.Toggle)
+ backupTasks.POST("/:id/run", RequireNotViewer(), backupRunHandler.Run)
+ backupTasks.POST("/batch/toggle", RequireNotViewer(), backupTaskHandler.BatchToggle)
+ backupTasks.POST("/batch/delete", RequireNotViewer(), backupTaskHandler.BatchDelete)
+ backupTasks.POST("/batch/run", RequireNotViewer(), backupRunHandler.BatchRun)
+ // 任务配置导入/导出(集群迁移 & 灾备)
+ if deps.TaskExportService != nil {
+ taskExportHandler := NewTaskExportHandler(deps.TaskExportService, deps.AuditService)
+ backupTasks.GET("/export", taskExportHandler.Export)
+ backupTasks.POST("/import", RequireNotViewer(), taskExportHandler.Import)
+ }
+ if deps.VerificationService != nil {
+ backupTasks.POST("/:id/verify", RequireNotViewer(), verificationHandler.TriggerByTask)
+ }
backupRecords := api.Group("/backup/records")
- backupRecords.Use(AuthMiddleware(deps.JWTManager))
+ backupRecords.Use(AuthMiddleware(deps.JWTManager, apiKeyAuth))
backupRecords.GET("", backupRecordHandler.List)
backupRecords.GET("/:id", backupRecordHandler.Get)
backupRecords.GET("/:id/logs/stream", backupRecordHandler.StreamLogs)
backupRecords.GET("/:id/download", backupRecordHandler.Download)
- backupRecords.POST("/:id/restore", backupRecordHandler.Restore)
- backupRecords.POST("/batch-delete", backupRecordHandler.BatchDelete)
- backupRecords.DELETE("/:id", backupRecordHandler.Delete)
+ backupRecords.POST("/:id/restore", RequireNotViewer(), backupRecordHandler.Restore)
+ backupRecords.POST("/batch-delete", RequireNotViewer(), backupRecordHandler.BatchDelete)
+ backupRecords.DELETE("/:id", RequireNotViewer(), backupRecordHandler.Delete)
+
+ // 恢复记录独立命名空间:列表/详情/SSE 日志流。
+ // 创建恢复仍然走 POST /backup/records/:id/restore(以源备份记录为触发点)。
+ if deps.RestoreService != nil {
+ restoreRecords := api.Group("/restore/records")
+ restoreRecords.Use(AuthMiddleware(deps.JWTManager, apiKeyAuth))
+ restoreRecords.GET("", restoreRecordHandler.List)
+ restoreRecords.GET("/:id", restoreRecordHandler.Get)
+ restoreRecords.GET("/:id/logs/stream", restoreRecordHandler.StreamLogs)
+ }
+
+ // 备份复制记录(3-2-1 规则)
+ if deps.ReplicationService != nil {
+ replicationRecords := api.Group("/replication/records")
+ replicationRecords.Use(AuthMiddleware(deps.JWTManager, apiKeyAuth))
+ replicationRecords.GET("", replicationHandler.List)
+ replicationRecords.GET("/:id", replicationHandler.Get)
+ backupRecords.POST("/:id/replicate", RequireNotViewer(), replicationHandler.TriggerByRecord)
+ }
+
+ // 任务模板(批量创建)
+ if deps.TaskTemplateService != nil {
+ templates := api.Group("/task-templates")
+ templates.Use(AuthMiddleware(deps.JWTManager, apiKeyAuth))
+ templates.GET("", taskTemplateHandler.List)
+ templates.GET("/:id", taskTemplateHandler.Get)
+ templates.POST("", RequireNotViewer(), taskTemplateHandler.Create)
+ templates.PUT("/:id", RequireNotViewer(), taskTemplateHandler.Update)
+ templates.DELETE("/:id", RequireNotViewer(), taskTemplateHandler.Delete)
+ templates.POST("/:id/apply", RequireNotViewer(), taskTemplateHandler.Apply)
+ }
+
+ // 备份验证/演练记录
+ if deps.VerificationService != nil {
+ verifyRecords := api.Group("/verify/records")
+ verifyRecords.Use(AuthMiddleware(deps.JWTManager, apiKeyAuth))
+ verifyRecords.GET("", verificationHandler.List)
+ verifyRecords.GET("/:id", verificationHandler.Get)
+ verifyRecords.GET("/:id/logs/stream", verificationHandler.StreamLogs)
+ // 基于备份记录的验证入口:与 restore 对称
+ backupRecords.POST("/:id/verify", RequireNotViewer(), verificationHandler.TriggerByRecord)
+ }
dashboard := api.Group("/dashboard")
- dashboard.Use(AuthMiddleware(deps.JWTManager))
+ dashboard.Use(AuthMiddleware(deps.JWTManager, apiKeyAuth))
dashboard.GET("/stats", dashboardHandler.Stats)
dashboard.GET("/timeline", dashboardHandler.Timeline)
+ dashboard.GET("/sla", dashboardHandler.SLA)
+ dashboard.GET("/cluster", dashboardHandler.Cluster)
+ dashboard.GET("/breakdown", dashboardHandler.Breakdown)
+ dashboard.GET("/node-performance", dashboardHandler.NodePerformance)
notifications := api.Group("/notifications")
- notifications.Use(AuthMiddleware(deps.JWTManager))
+ notifications.Use(AuthMiddleware(deps.JWTManager, apiKeyAuth))
notifications.GET("", notificationHandler.List)
notifications.GET("/:id", notificationHandler.Get)
- notifications.POST("", notificationHandler.Create)
- notifications.PUT("/:id", notificationHandler.Update)
- notifications.DELETE("/:id", notificationHandler.Delete)
- notifications.POST("/test", notificationHandler.Test)
- notifications.POST("/:id/test", notificationHandler.TestSaved)
+ notifications.POST("", RequireNotViewer(), notificationHandler.Create)
+ notifications.PUT("/:id", RequireNotViewer(), notificationHandler.Update)
+ notifications.DELETE("/:id", RequireNotViewer(), notificationHandler.Delete)
+ notifications.POST("/test", RequireNotViewer(), notificationHandler.Test)
+ notifications.POST("/:id/test", RequireNotViewer(), notificationHandler.TestSaved)
settings := api.Group("/settings")
- settings.Use(AuthMiddleware(deps.JWTManager))
+ settings.Use(AuthMiddleware(deps.JWTManager, apiKeyAuth))
settings.GET("", settingsHandler.Get)
- settings.PUT("", settingsHandler.Update)
+ settings.PUT("", RequireRole("admin"), settingsHandler.Update)
+
+ // 用户管理(admin 专属)
+ if deps.UserService != nil {
+ users := api.Group("/users")
+ users.Use(AuthMiddleware(deps.JWTManager, apiKeyAuth), RequireRole("admin"))
+ users.GET("", userHandler.List)
+ users.POST("", userHandler.Create)
+ users.PUT("/:id", userHandler.Update)
+ users.DELETE("/:id", userHandler.Delete)
+ }
+
+ // API Key 管理(admin 专属)
+ if deps.ApiKeyService != nil {
+ apiKeys := api.Group("/api-keys")
+ apiKeys.Use(AuthMiddleware(deps.JWTManager, apiKeyAuth), RequireRole("admin"))
+ apiKeys.GET("", apiKeyHandler.List)
+ apiKeys.POST("", apiKeyHandler.Create)
+ apiKeys.PUT("/:id/toggle", apiKeyHandler.Toggle)
+ apiKeys.DELETE("/:id", apiKeyHandler.Revoke)
+ }
auditLogs := api.Group("/audit-logs")
- auditLogs.Use(AuthMiddleware(deps.JWTManager))
+ auditLogs.Use(AuthMiddleware(deps.JWTManager, apiKeyAuth))
auditLogs.GET("", auditHandler.List)
+ auditLogs.GET("/export", auditHandler.Export)
+
+ // 实时事件 SSE 流(Dashboard 自刷新、桌面告警)
+ if deps.EventBroadcaster != nil {
+ eventsHandler := NewEventsHandler(deps.EventBroadcaster)
+ events := api.Group("/events")
+ events.Use(AuthMiddleware(deps.JWTManager, apiKeyAuth))
+ events.GET("/stream", eventsHandler.Stream)
+ }
+
+ // 全局搜索
+ if deps.SearchService != nil {
+ searchHandler := NewSearchHandler(deps.SearchService)
+ searchGroup := api.Group("/search")
+ searchGroup.Use(AuthMiddleware(deps.JWTManager, apiKeyAuth))
+ searchGroup.GET("", searchHandler.Search)
+ }
if deps.DatabaseDiscoveryService != nil {
databaseHandler := NewDatabaseHandler(deps.DatabaseDiscoveryService)
database := api.Group("/database")
- database.Use(AuthMiddleware(deps.JWTManager))
+ database.Use(AuthMiddleware(deps.JWTManager, apiKeyAuth))
database.POST("/discover", databaseHandler.Discover)
}
nodeHandler := NewNodeHandler(deps.NodeService, deps.AuditService, deps.InstallTokenService, deps.UserRepository, deps.MasterExternalURL)
nodes := api.Group("/nodes")
- nodes.Use(AuthMiddleware(deps.JWTManager))
+ nodes.Use(AuthMiddleware(deps.JWTManager, apiKeyAuth))
nodes.GET("", nodeHandler.List)
nodes.GET("/:id", nodeHandler.Get)
- nodes.POST("", nodeHandler.Create)
- nodes.PUT("/:id", nodeHandler.Update)
- nodes.DELETE("/:id", nodeHandler.Delete)
+ nodes.POST("", RequireRole("admin"), nodeHandler.Create)
+ nodes.PUT("/:id", RequireRole("admin"), nodeHandler.Update)
+ nodes.DELETE("/:id", RequireRole("admin"), nodeHandler.Delete)
nodes.GET("/:id/fs/list", nodeHandler.ListDirectory)
- nodes.POST("/batch", nodeHandler.BatchCreate)
- nodes.POST("/:id/install-tokens", nodeHandler.CreateInstallToken)
- nodes.POST("/:id/rotate-token", nodeHandler.RotateToken)
- nodes.GET("/:id/install-script-preview", nodeHandler.PreviewScript)
+ nodes.POST("/batch", RequireRole("admin"), nodeHandler.BatchCreate)
+ nodes.POST("/:id/install-tokens", RequireRole("admin"), nodeHandler.CreateInstallToken)
+ nodes.POST("/:id/rotate-token", RequireRole("admin"), nodeHandler.RotateToken)
+ nodes.GET("/:id/install-script-preview", RequireRole("admin"), nodeHandler.PreviewScript)
// Agent API(token 认证,无需 JWT)
if deps.AgentService != nil {
- agentHandler := NewAgentHandler(deps.AgentService, deps.NodeService)
+ agentHandler := NewAgentHandler(deps.AgentService, deps.NodeService, deps.RestoreService)
agent := api.Group("/agent")
agent.POST("/heartbeat", agentHandler.Heartbeat)
agent.POST("/commands/poll", agentHandler.Poll)
agent.POST("/commands/:id/result", agentHandler.SubmitCommandResult)
agent.GET("/tasks/:id", agentHandler.GetTaskSpec)
agent.POST("/records/:id", agentHandler.UpdateRecord)
+ agent.GET("/restores/:id/spec", agentHandler.GetRestoreSpec)
+ agent.POST("/restores/:id", agentHandler.UpdateRestore)
// Agent v1(安装脚本探活用),仅 Self 端点
v1Agent := api.Group("/v1/agent")
@@ -180,6 +302,15 @@ func NewRouter(deps RouterDependencies) *gin.Engine {
}
}
+ // 健康检查端点(公开、无认证、低开销)
+ // K8s/Swarm/Nomad 等编排系统使用这些端点做 liveness/readiness 探测。
+ healthHandler := NewHealthHandler(deps.DB, deps.Version)
+ engine.GET("/health", healthHandler.Live)
+ engine.GET("/ready", healthHandler.Ready)
+ // 在 /api 下也暴露一份,方便反向代理按 path 前缀统一路由
+ engine.GET("/api/health", healthHandler.Live)
+ engine.GET("/api/ready", healthHandler.Ready)
+
// 公开安装路由(不走 JWT 中间件)
if deps.InstallTokenService != nil {
gcCtx := deps.Context
diff --git a/server/internal/http/search_handler.go b/server/internal/http/search_handler.go
new file mode 100644
index 0000000..4d37029
--- /dev/null
+++ b/server/internal/http/search_handler.go
@@ -0,0 +1,28 @@
+package http
+
+import (
+ "backupx/server/internal/service"
+ "backupx/server/pkg/response"
+
+ "github.com/gin-gonic/gin"
+)
+
+// SearchHandler 全局搜索。
+type SearchHandler struct {
+ service *service.SearchService
+}
+
+func NewSearchHandler(s *service.SearchService) *SearchHandler {
+ return &SearchHandler{service: s}
+}
+
+// Search GET /search?q=关键字
+func (h *SearchHandler) Search(c *gin.Context) {
+ query := c.Query("q")
+ result, err := h.service.Search(c.Request.Context(), query)
+ if err != nil {
+ response.Error(c, err)
+ return
+ }
+ response.Success(c, result)
+}
diff --git a/server/internal/http/task_export_handler.go b/server/internal/http/task_export_handler.go
new file mode 100644
index 0000000..c445895
--- /dev/null
+++ b/server/internal/http/task_export_handler.go
@@ -0,0 +1,101 @@
+package http
+
+import (
+ "encoding/json"
+ "fmt"
+ "io"
+ stdhttp "net/http"
+ "strconv"
+ "strings"
+ "time"
+
+ "backupx/server/internal/apperror"
+ "backupx/server/internal/service"
+ "backupx/server/pkg/response"
+
+ "github.com/gin-gonic/gin"
+)
+
+// TaskExportHandler 提供任务配置 JSON 导入/导出。
+type TaskExportHandler struct {
+ service *service.TaskExportService
+ auditService *service.AuditService
+}
+
+func NewTaskExportHandler(s *service.TaskExportService, audit *service.AuditService) *TaskExportHandler {
+ return &TaskExportHandler{service: s, auditService: audit}
+}
+
+// Export GET /api/backup/tasks/export?ids=1,2,3
+// 无 ids 参数时导出全部任务。返回 application/json + Content-Disposition。
+func (h *TaskExportHandler) Export(c *gin.Context) {
+ var taskIDs []uint
+ if v := strings.TrimSpace(c.Query("ids")); v != "" {
+ for _, part := range strings.Split(v, ",") {
+ if id, err := strconv.ParseUint(strings.TrimSpace(part), 10, 32); err == nil {
+ taskIDs = append(taskIDs, uint(id))
+ }
+ }
+ }
+ payload, err := h.service.Export(c.Request.Context(), taskIDs)
+ if err != nil {
+ response.Error(c, err)
+ return
+ }
+ data, err := json.MarshalIndent(payload, "", " ")
+ if err != nil {
+ response.Error(c, apperror.Internal("TASK_EXPORT_MARSHAL_FAILED", "无法序列化导出内容", err))
+ return
+ }
+ filename := fmt.Sprintf("backupx-tasks-%s.json", time.Now().UTC().Format("20060102-150405"))
+ c.Header("Content-Type", "application/json; charset=utf-8")
+ c.Header("Content-Disposition", fmt.Sprintf("attachment; filename=%q", filename))
+ _, _ = c.Writer.Write(data)
+ recordAudit(c, h.auditService, "backup_task", "export", "backup_task", "", "",
+ fmt.Sprintf("导出 %d 个任务的配置为 JSON", payload.TaskCount))
+}
+
+// Import POST /api/backup/tasks/import
+// Body: ExportPayload JSON。返回每个任务的创建/跳过结果。
+func (h *TaskExportHandler) Import(c *gin.Context) {
+ body, err := io.ReadAll(c.Request.Body)
+ if err != nil {
+ response.Error(c, apperror.BadRequest("TASK_IMPORT_INVALID", "无法读取请求体", err))
+ return
+ }
+ if len(body) == 0 {
+ response.Error(c, apperror.BadRequest("TASK_IMPORT_INVALID", "请求体为空", nil))
+ return
+ }
+ if len(body) > 1024*1024 { // 1MB 上限
+ c.Writer.WriteHeader(stdhttp.StatusRequestEntityTooLarge)
+ response.Error(c, apperror.BadRequest("TASK_IMPORT_TOO_LARGE", "导入文件过大(上限 1MB)", nil))
+ return
+ }
+ var payload service.ExportPayload
+ if err := json.Unmarshal(body, &payload); err != nil {
+ response.Error(c, apperror.BadRequest("TASK_IMPORT_INVALID", "JSON 格式不合法", err))
+ return
+ }
+ if len(payload.Tasks) == 0 {
+ response.Error(c, apperror.BadRequest("TASK_IMPORT_INVALID", "文件中未包含任何任务", nil))
+ return
+ }
+ results, err := h.service.Import(c.Request.Context(), payload)
+ if err != nil {
+ response.Error(c, err)
+ return
+ }
+ succ := 0
+ skipped := 0
+ for _, r := range results {
+ if r.Success && !r.Skipped {
+ succ++
+ } else if r.Skipped {
+ skipped++
+ }
+ }
+ recordAudit(c, h.auditService, "backup_task", "import", "backup_task", "", "",
+ fmt.Sprintf("从 JSON 导入任务:创建 %d / 跳过 %d / 失败 %d", succ, skipped, len(results)-succ-skipped))
+ response.Success(c, results)
+}
diff --git a/server/internal/http/task_template_handler.go b/server/internal/http/task_template_handler.go
new file mode 100644
index 0000000..08bdb16
--- /dev/null
+++ b/server/internal/http/task_template_handler.go
@@ -0,0 +1,125 @@
+package http
+
+import (
+ "fmt"
+
+ "backupx/server/internal/apperror"
+ "backupx/server/internal/service"
+ "backupx/server/pkg/response"
+
+ "github.com/gin-gonic/gin"
+)
+
+type TaskTemplateHandler struct {
+ service *service.TaskTemplateService
+ auditService *service.AuditService
+}
+
+func NewTaskTemplateHandler(templateService *service.TaskTemplateService, auditService *service.AuditService) *TaskTemplateHandler {
+ return &TaskTemplateHandler{service: templateService, auditService: auditService}
+}
+
+func (h *TaskTemplateHandler) List(c *gin.Context) {
+ items, err := h.service.List(c.Request.Context())
+ if err != nil {
+ response.Error(c, err)
+ return
+ }
+ response.Success(c, items)
+}
+
+func (h *TaskTemplateHandler) Get(c *gin.Context) {
+ id, ok := parseUintParam(c, "id")
+ if !ok {
+ return
+ }
+ item, err := h.service.Get(c.Request.Context(), id)
+ if err != nil {
+ response.Error(c, err)
+ return
+ }
+ response.Success(c, item)
+}
+
+func (h *TaskTemplateHandler) Create(c *gin.Context) {
+ var input service.TaskTemplateUpsertInput
+ if err := c.ShouldBindJSON(&input); err != nil {
+ response.Error(c, apperror.BadRequest("TASK_TEMPLATE_INVALID", "模板参数不合法", err))
+ return
+ }
+ creator := ""
+ if v, ok := c.Get(contextUsernameKey); ok {
+ if s, ok := v.(string); ok {
+ creator = s
+ }
+ }
+ item, err := h.service.Create(c.Request.Context(), creator, input)
+ if err != nil {
+ response.Error(c, err)
+ return
+ }
+ recordAudit(c, h.auditService, "task_template", "create", "task_template", fmt.Sprintf("%d", item.ID), item.Name,
+ fmt.Sprintf("创建任务模板: %s (类型: %s)", item.Name, item.TaskType))
+ response.Success(c, item)
+}
+
+func (h *TaskTemplateHandler) Update(c *gin.Context) {
+ id, ok := parseUintParam(c, "id")
+ if !ok {
+ return
+ }
+ var input service.TaskTemplateUpsertInput
+ if err := c.ShouldBindJSON(&input); err != nil {
+ response.Error(c, apperror.BadRequest("TASK_TEMPLATE_INVALID", "模板参数不合法", err))
+ return
+ }
+ item, err := h.service.Update(c.Request.Context(), id, input)
+ if err != nil {
+ response.Error(c, err)
+ return
+ }
+ recordAudit(c, h.auditService, "task_template", "update", "task_template", fmt.Sprintf("%d", item.ID), item.Name,
+ fmt.Sprintf("更新任务模板: %s", item.Name))
+ response.Success(c, item)
+}
+
+func (h *TaskTemplateHandler) Delete(c *gin.Context) {
+ id, ok := parseUintParam(c, "id")
+ if !ok {
+ return
+ }
+ if err := h.service.Delete(c.Request.Context(), id); err != nil {
+ response.Error(c, err)
+ return
+ }
+ recordAudit(c, h.auditService, "task_template", "delete", "task_template", fmt.Sprintf("%d", id), "",
+ fmt.Sprintf("删除任务模板 (ID: %d)", id))
+ response.Success(c, gin.H{"deleted": true})
+}
+
+// Apply 一键批量创建任务。Body: {variables: [{name, sourcePath, ...}, ...]}
+func (h *TaskTemplateHandler) Apply(c *gin.Context) {
+ id, ok := parseUintParam(c, "id")
+ if !ok {
+ return
+ }
+ var input service.TaskTemplateApplyInput
+ if err := c.ShouldBindJSON(&input); err != nil {
+ response.Error(c, apperror.BadRequest("TASK_TEMPLATE_INVALID", "应用参数不合法", err))
+ return
+ }
+ results, err := h.service.Apply(c.Request.Context(), id, input)
+ if err != nil {
+ response.Error(c, err)
+ return
+ }
+ successCount := 0
+ for _, r := range results {
+ if r.Success {
+ successCount++
+ }
+ }
+ recordAudit(c, h.auditService, "task_template", "apply", "task_template", fmt.Sprintf("%d", id), "",
+ fmt.Sprintf("应用模板批量创建任务(成功 %d/%d)", successCount, len(results)))
+ response.Success(c, results)
+}
diff --git a/server/internal/http/user_handler.go b/server/internal/http/user_handler.go
new file mode 100644
index 0000000..0dfc441
--- /dev/null
+++ b/server/internal/http/user_handler.go
@@ -0,0 +1,80 @@
+package http
+
+import (
+ "fmt"
+
+ "backupx/server/internal/apperror"
+ "backupx/server/internal/service"
+ "backupx/server/pkg/response"
+
+ "github.com/gin-gonic/gin"
+)
+
+// UserHandler 管理账号(仅 admin 可访问)。
+type UserHandler struct {
+ service *service.UserService
+ auditService *service.AuditService
+}
+
+func NewUserHandler(userService *service.UserService, auditService *service.AuditService) *UserHandler {
+ return &UserHandler{service: userService, auditService: auditService}
+}
+
+func (h *UserHandler) List(c *gin.Context) {
+ items, err := h.service.List(c.Request.Context())
+ if err != nil {
+ response.Error(c, err)
+ return
+ }
+ response.Success(c, items)
+}
+
+func (h *UserHandler) Create(c *gin.Context) {
+ var input service.UserUpsertInput
+ if err := c.ShouldBindJSON(&input); err != nil {
+ response.Error(c, apperror.BadRequest("USER_INVALID", "用户参数不合法", err))
+ return
+ }
+ item, err := h.service.Create(c.Request.Context(), input)
+ if err != nil {
+ response.Error(c, err)
+ return
+ }
+ recordAudit(c, h.auditService, "user", "create", "user", fmt.Sprintf("%d", item.ID), item.Username,
+ fmt.Sprintf("创建用户 %s (角色: %s)", item.Username, item.Role))
+ response.Success(c, item)
+}
+
+func (h *UserHandler) Update(c *gin.Context) {
+ id, ok := parseUintParam(c, "id")
+ if !ok {
+ return
+ }
+ var input service.UserUpsertInput
+ if err := c.ShouldBindJSON(&input); err != nil {
+ response.Error(c, apperror.BadRequest("USER_INVALID", "用户参数不合法", err))
+ return
+ }
+ item, err := h.service.Update(c.Request.Context(), id, input)
+ if err != nil {
+ response.Error(c, err)
+ return
+ }
+ recordAudit(c, h.auditService, "user", "update", "user", fmt.Sprintf("%d", id), item.Username,
+ fmt.Sprintf("更新用户 %s (角色: %s, 停用: %v)", item.Username, item.Role, item.Disabled))
+ response.Success(c, item)
+}
+
+func (h *UserHandler) Delete(c *gin.Context) {
+ id, ok := parseUintParam(c, "id")
+ if !ok {
+ return
+ }
+ if err := h.service.Delete(c.Request.Context(), id); err != nil {
+ response.Error(c, err)
+ return
+ }
+ recordAudit(c, h.auditService, "user", "delete", "user", fmt.Sprintf("%d", id), "",
+ fmt.Sprintf("删除用户 (ID: %d)", id))
+ response.Success(c, gin.H{"deleted": true})
+}
diff --git a/server/internal/http/verification_handler.go b/server/internal/http/verification_handler.go
new file mode 100644
index 0000000..7297851
--- /dev/null
+++ b/server/internal/http/verification_handler.go
@@ -0,0 +1,207 @@
+package http
+
+import (
+ "encoding/json"
+ "fmt"
+ "io"
+ "strconv"
+ "strings"
+ "time"
+
+ "backupx/server/internal/apperror"
+ "backupx/server/internal/backup"
+ "backupx/server/internal/service"
+ "backupx/server/pkg/response"
+
+ "github.com/gin-gonic/gin"
+)
+
+// VerificationHandler 提供验证记录列表/详情/SSE,以及手动触发入口。
+type VerificationHandler struct {
+ service *service.VerificationService
+ auditService *service.AuditService
+}
+
+func NewVerificationHandler(verifyService *service.VerificationService, auditService *service.AuditService) *VerificationHandler {
+ return &VerificationHandler{service: verifyService, auditService: auditService}
+}
+
+// TriggerByTask 接收任务级手动触发。使用最新成功备份为源。
+func (h *VerificationHandler) TriggerByTask(c *gin.Context) {
+ taskID, ok := parseUintParam(c, "id")
+ if !ok {
+ return
+ }
+ var input struct {
+ Mode string `json:"mode"`
+ }
+ _ = c.ShouldBindJSON(&input)
+ triggeredBy := ""
+ if subject, exists := c.Get(contextUserSubjectKey); exists {
+ triggeredBy = strings.TrimSpace(fmt.Sprintf("%v", subject))
+ }
+ if triggeredBy == "" {
+ triggeredBy = "manual"
+ }
+ detail, err := h.service.StartByTask(c.Request.Context(), taskID, input.Mode, triggeredBy)
+ if err != nil {
+ response.Error(c, err)
+ return
+ }
+ recordAudit(c, h.auditService, "backup_verify", "manual_run", "backup_task", fmt.Sprintf("%d", taskID), "",
+ fmt.Sprintf("手动触发验证(任务 ID: %d, 验证记录 ID: %d, 模式: %s)", taskID, detail.ID, detail.Mode))
+ response.Success(c, detail)
+}
+
+// TriggerByRecord 基于指定备份记录触发验证(允许验证历史备份)。
+func (h *VerificationHandler) TriggerByRecord(c *gin.Context) {
+ recordID, ok := parseUintParam(c, "id")
+ if !ok {
+ return
+ }
+ var input struct {
+ Mode string `json:"mode"`
+ }
+ _ = c.ShouldBindJSON(&input)
+ triggeredBy := ""
+ if subject, exists := c.Get(contextUserSubjectKey); exists {
+ triggeredBy = strings.TrimSpace(fmt.Sprintf("%v", subject))
+ }
+ if triggeredBy == "" {
+ triggeredBy = "manual"
+ }
+ detail, err := h.service.Start(c.Request.Context(), recordID, input.Mode, triggeredBy)
+ if err != nil {
+ response.Error(c, err)
+ return
+ }
+ recordAudit(c, h.auditService, "backup_verify", "manual_run", "backup_record", fmt.Sprintf("%d", recordID), "",
+ fmt.Sprintf("手动触发验证(备份记录 ID: %d, 验证记录 ID: %d, 模式: %s)", recordID, detail.ID, detail.Mode))
+ response.Success(c, detail)
+}
+
+func (h *VerificationHandler) List(c *gin.Context) {
+ filter, err := buildVerifyFilter(c)
+ if err != nil {
+ response.Error(c, err)
+ return
+ }
+ items, err := h.service.List(c.Request.Context(), filter)
+ if err != nil {
+ response.Error(c, err)
+ return
+ }
+ response.Success(c, items)
+}
+
+func (h *VerificationHandler) Get(c *gin.Context) {
+ id, ok := parseUintParam(c, "id")
+ if !ok {
+ return
+ }
+ item, err := h.service.Get(c.Request.Context(), id)
+ if err != nil {
+ response.Error(c, err)
+ return
+ }
+ response.Success(c, item)
+}
+
+func (h *VerificationHandler) StreamLogs(c *gin.Context) {
+ id, ok := parseUintParam(c, "id")
+ if !ok {
+ return
+ }
+ detail, err := h.service.Get(c.Request.Context(), id)
+ if err != nil {
+ response.Error(c, err)
+ return
+ }
+ events := detail.LogEvents
+ completed := detail.Status != "running"
+ channel, cancel, err := h.service.SubscribeLogs(c.Request.Context(), id, 64)
+ if err != nil {
+ response.Error(c, err)
+ return
+ }
+ defer cancel()
+ c.Writer.Header().Set("Content-Type", "text/event-stream")
+ c.Writer.Header().Set("Cache-Control", "no-cache")
+ c.Writer.Header().Set("Connection", "keep-alive")
+ flusher, ok := c.Writer.(interface{ Flush() })
+ if !ok {
+ response.Error(c, apperror.Internal("VERIFY_STREAM_UNSUPPORTED", "当前连接不支持日志流", nil))
+ return
+ }
+ for _, event := range events {
+ if err := writeVerifySSEEvent(c.Writer, event); err != nil {
+ return
+ }
+ flusher.Flush()
+ }
+ if completed {
+ return
+ }
+ for {
+ select {
+ case <-c.Request.Context().Done():
+ return
+ case event, ok := <-channel:
+ if !ok {
+ return
+ }
+ if err := writeVerifySSEEvent(c.Writer, event); err != nil {
+ return
+ }
+ flusher.Flush()
+ if event.Completed {
+ return
+ }
+ }
+ }
+}
+
+func buildVerifyFilter(c *gin.Context) (service.VerificationRecordListInput, error) {
+ var filter service.VerificationRecordListInput
+ if value := strings.TrimSpace(c.Query("taskId")); value != "" {
+ parsed, err := strconv.ParseUint(value, 10, 32)
+ if err != nil {
+ return filter, apperror.BadRequest("VERIFY_RECORD_FILTER_INVALID", "taskId 不合法", err)
+ }
+ v := uint(parsed)
+ filter.TaskID = &v
+ }
+ if value := strings.TrimSpace(c.Query("backupRecordId")); value != "" {
+ parsed, err := strconv.ParseUint(value, 10, 32)
+ if err != nil {
+ return filter, apperror.BadRequest("VERIFY_RECORD_FILTER_INVALID", "backupRecordId 不合法", err)
+ }
+ v := uint(parsed)
+ filter.BackupRecordID = &v
+ }
+ filter.Status = strings.TrimSpace(c.Query("status"))
+ if dateFrom := strings.TrimSpace(c.Query("dateFrom")); dateFrom != "" {
+ parsed, err := time.Parse(time.RFC3339, dateFrom)
+ if err != nil {
+ return filter, apperror.BadRequest("VERIFY_RECORD_FILTER_INVALID", "dateFrom 必须为 RFC3339 时间格式", err)
+ }
+ filter.DateFrom = &parsed
+ }
+ if dateTo := strings.TrimSpace(c.Query("dateTo")); dateTo != "" {
+ parsed, err := time.Parse(time.RFC3339, dateTo)
+ if err != nil {
+ return filter, apperror.BadRequest("VERIFY_RECORD_FILTER_INVALID", "dateTo 必须为 RFC3339 时间格式", err)
+ }
+ filter.DateTo = &parsed
+ }
+ return filter, nil
+}
+
+func writeVerifySSEEvent(writer io.Writer, event backup.LogEvent) error {
+ payload, err := json.Marshal(event)
+ if err != nil {
+ return err
+ }
+ _, err = fmt.Fprintf(writer, "event: log\ndata: %s\n\n", payload)
+ return err
+}
diff --git a/server/internal/model/agent_command.go b/server/internal/model/agent_command.go
index 078d6b6..f20e45b 100644
--- a/server/internal/model/agent_command.go
+++ b/server/internal/model/agent_command.go
@@ -20,6 +20,19 @@ const (
// Payload: {"path": "/var/log"}
// Result: {"entries": [{"name":"...", "path":"...", "isDir":true, "size":0}]}
AgentCommandTypeListDir = "list_dir"
+ // AgentCommandTypeRestoreRecord 在 Agent 节点上恢复指定备份记录
+ // Payload: {"restoreRecordId": 789}
+ // Agent 拉 /api/agent/restores/:id/spec 获取完整规格后执行恢复
+ AgentCommandTypeRestoreRecord = "restore_record"
+ // AgentCommandTypeDiscoverDB 在 Agent 节点上发现数据库列表
+ // Payload: {"type": "mysql", "host": "...", "port": 3306, "user": "...", "password": "..."}
+ // Result: {"databases": ["db1", "db2"]}
+ AgentCommandTypeDiscoverDB = "discover_db"
+ // AgentCommandTypeDeleteStorageObject 在 Agent 节点上删除指定存储对象
+ // Payload: {"targetType": "local_disk", "targetConfig": {...}, "storagePath": "tasks/1/x.tar.gz"}
+ // 用于跨节点 local_disk 场景:Master 删记录时请求 Agent 清理其本地备份文件。
+ // Agent 需具备对应存储 provider 的执行能力。best-effort:失败仅影响 Agent 侧文件残留。
+ AgentCommandTypeDeleteStorageObject = "delete_storage_object"
)
// AgentCommand 代表 Master 发给某个 Agent 节点的待执行命令。
diff --git a/server/internal/model/api_key.go b/server/internal/model/api_key.go
new file mode 100644
index 0000000..781786d
--- /dev/null
+++ b/server/internal/model/api_key.go
@@ -0,0 +1,24 @@
+package model
+
+import "time"
+
+// ApiKey 用于 CI/CD、监控脚本等非交互式场景通过 HTTP API 访问 BackupX。
+// 明文 Key 仅在创建时返回一次,数据库存储 SHA-256 哈希。
+// 认证中间件:当 Authorization: Bearer 值以 "bax_" 前缀开头时走 API Key 验证。
+type ApiKey struct {
+ ID uint `gorm:"primaryKey" json:"id"`
+ Name string `gorm:"size:128;not null" json:"name"`
+ Role string `gorm:"size:32;not null;default:viewer" json:"role"`
+ KeyHash string `gorm:"column:key_hash;size:128;uniqueIndex;not null" json:"-"`
+ Prefix string `gorm:"size:32;not null" json:"prefix"`
+ CreatedBy string `gorm:"column:created_by;size:128" json:"createdBy"`
+ LastUsedAt *time.Time `gorm:"column:last_used_at" json:"lastUsedAt,omitempty"`
+ ExpiresAt *time.Time `gorm:"column:expires_at" json:"expiresAt,omitempty"`
+ Disabled bool `gorm:"not null;default:false" json:"disabled"`
+ CreatedAt time.Time `json:"createdAt"`
+ UpdatedAt time.Time `json:"updatedAt"`
+}
+
+func (ApiKey) TableName() string {
+ return "api_keys"
+}
diff --git a/server/internal/model/backup_record.go b/server/internal/model/backup_record.go
index 4d4c7a8..20fd493 100644
--- a/server/internal/model/backup_record.go
+++ b/server/internal/model/backup_record.go
@@ -14,6 +14,9 @@ type BackupRecord struct {
Task BackupTask `json:"task,omitempty"`
StorageTargetID uint `gorm:"column:storage_target_id;index;not null" json:"storageTargetId"`
StorageTarget StorageTarget `json:"storageTarget,omitempty"`
+ // NodeID 执行该次备份的节点(0 = 本机 Master)。用于集群中识别 local_disk 类型
+ // 存储的归属节点,避免 Master 端试图跨节点访问远程 Agent 的本地存储。
+ NodeID uint `gorm:"column:node_id;index;default:0" json:"nodeId"`
Status string `gorm:"size:20;index;not null" json:"status"`
FileName string `gorm:"column:file_name;size:255" json:"fileName"`
FileSize int64 `gorm:"column:file_size;not null;default:0" json:"fileSize"`
diff --git a/server/internal/model/backup_task.go b/server/internal/model/backup_task.go
index 0eb6202..daa5f14 100644
--- a/server/internal/model/backup_task.go
+++ b/server/internal/model/backup_task.go
@@ -46,6 +46,25 @@ type BackupTask struct {
MaxBackups int `gorm:"column:max_backups;not null;default:10" json:"maxBackups"`
LastRunAt *time.Time `gorm:"column:last_run_at" json:"lastRunAt,omitempty"`
LastStatus string `gorm:"column:last_status;size:20;not null;default:'idle'" json:"lastStatus"`
+ // 验证(恢复演练)配置 — 定期自动校验备份可恢复性
+ VerifyEnabled bool `gorm:"column:verify_enabled;not null;default:false" json:"verifyEnabled"`
+ VerifyCronExpr string `gorm:"column:verify_cron_expr;size:64" json:"verifyCronExpr"`
+ VerifyMode string `gorm:"column:verify_mode;size:20;not null;default:'quick'" json:"verifyMode"`
+ // SLA 配置 — RPO(期望最长未备份间隔)与告警阈值
+ SLAHoursRPO int `gorm:"column:sla_hours_rpo;not null;default:0" json:"slaHoursRpo"`
+ AlertOnConsecutiveFails int `gorm:"column:alert_on_consecutive_fails;not null;default:1" json:"alertOnConsecutiveFails"`
+ // ReplicationTargetIDs 备份复制目标存储 ID 列表(CSV)。
+ // 备份完成后,系统将自动把成果从任务主存储(StorageTargets 的第一个)复制到这些目标。
+ // 满足 3-2-1 规则:至少 2 份副本,且至少 1 份异地(不同 provider/region)。
+ ReplicationTargetIDs string `gorm:"column:replication_target_ids;size:500" json:"replicationTargetIds"`
+ // MaintenanceWindows 允许执行备份的时段(格式详见 backup/window.go)。
+ // 空 = 不限制。非空时调度器在非窗口跳过,手动执行返回友好错误。
+ MaintenanceWindows string `gorm:"column:maintenance_windows;size:500" json:"maintenanceWindows"`
+ // DependsOnTaskIDs 依赖的上游任务 ID 列表(CSV)。
+ // 语义:上游任务成功后自动触发本任务,形成工作流(如 DB 备份完成 → 归档压缩)。
+ // 调度器继续按本任务自己的 cron 触发,仅"自动触发"路径响应依赖完成事件。
+ // 循环依赖检查在 service 层完成,避免配置阶段即出错。
+ DependsOnTaskIDs string `gorm:"column:depends_on_task_ids;size:500" json:"dependsOnTaskIds"`
CreatedAt time.Time `json:"createdAt"`
UpdatedAt time.Time `json:"updatedAt"`
}
diff --git a/server/internal/model/node.go b/server/internal/model/node.go
index 3c81335..8552635 100644
--- a/server/internal/model/node.go
+++ b/server/internal/model/node.go
@@ -23,8 +23,14 @@ type Node struct {
LastSeen time.Time `gorm:"column:last_seen" json:"lastSeen"`
PrevToken string `gorm:"size:128;index" json:"-"`
PrevTokenExpires *time.Time `gorm:"column:prev_token_expires" json:"-"`
- CreatedAt time.Time `json:"createdAt"`
- UpdatedAt time.Time `json:"updatedAt"`
+ // MaxConcurrent 该节点允许的最大并发任务数(0=不限制,沿用全局 cfg.Backup.MaxConcurrent)。
+ // 用于大集群中限制单节点资源占用:例如小内存 Agent 节点可配 1,避免多个大备份同时跑挤爆。
+ MaxConcurrent int `gorm:"column:max_concurrent;not null;default:0" json:"maxConcurrent"`
+ // BandwidthLimit 该节点上传带宽上限(rclone 可识别格式:10M / 1G / 0=不限)。
+ // 对集群感知的上传场景有效(Master 本地与 Agent 运行时均会应用)。
+ BandwidthLimit string `gorm:"column:bandwidth_limit;size:32" json:"bandwidthLimit"`
+ CreatedAt time.Time `json:"createdAt"`
+ UpdatedAt time.Time `json:"updatedAt"`
}
func (Node) TableName() string {
diff --git a/server/internal/model/notification.go b/server/internal/model/notification.go
index da1828f..c590509 100644
--- a/server/internal/model/notification.go
+++ b/server/internal/model/notification.go
@@ -2,6 +2,26 @@ package model
import "time"
+// 通知事件类型(企业级事件总线)。
+// 任一 Notification 可订阅多个事件,EventTypes 字段存 CSV。
+// 空 EventTypes + OnSuccess/OnFailure=true 时沿用旧语义(仅备份成功/失败)。
+const (
+ NotificationEventBackupSuccess = "backup_success"
+ NotificationEventBackupFailed = "backup_failed"
+ NotificationEventRestoreSuccess = "restore_success"
+ NotificationEventRestoreFailed = "restore_failed"
+ NotificationEventVerifyFailed = "verify_failed"
+ NotificationEventSLAViolation = "sla_violation"
+ // NotificationEventStorageUnhealthy 存储目标连接失败(后台健康扫描触发)。
+ NotificationEventStorageUnhealthy = "storage_unhealthy"
+ // NotificationEventReplicationFailed 备份复制失败。
+ NotificationEventReplicationFailed = "replication_failed"
+ // NotificationEventAgentOutdated Agent 版本落后 Master,建议升级。
+ NotificationEventAgentOutdated = "agent_outdated"
+ // NotificationEventStorageCapacity 存储目标使用率超过预警阈值(85%)。
+ NotificationEventStorageCapacity = "storage_capacity_warning"
+)
+
type Notification struct {
ID uint `gorm:"primaryKey" json:"id"`
Type string `gorm:"size:20;index;not null" json:"type"`
@@ -10,8 +30,11 @@ type Notification struct {
Enabled bool `gorm:"not null;default:true" json:"enabled"`
OnSuccess bool `gorm:"column:on_success;not null;default:false" json:"onSuccess"`
OnFailure bool `gorm:"column:on_failure;not null;default:true" json:"onFailure"`
- CreatedAt time.Time `json:"createdAt"`
- UpdatedAt time.Time `json:"updatedAt"`
+ // EventTypes 逗号分隔,订阅的事件类型。
+ // 空 = 仅监听备份成功/失败(兼容旧配置);非空则严格按订阅触发。
+ EventTypes string `gorm:"column:event_types;size:500" json:"eventTypes"`
+ CreatedAt time.Time `json:"createdAt"`
+ UpdatedAt time.Time `json:"updatedAt"`
}
func (Notification) TableName() string {
diff --git a/server/internal/model/replication_record.go b/server/internal/model/replication_record.go
new file mode 100644
index 0000000..19de40a
--- /dev/null
+++ b/server/internal/model/replication_record.go
@@ -0,0 +1,44 @@
+package model
+
+import "time"
+
+// ReplicationRecord 记录一次备份复制的执行。
+// 触发方式:
+// - 自动:备份成功后,根据 task.ReplicationTargetIDs 自动派发
+// - 手动:从备份记录详情页手动触发
+//
+// 核心语义:把源存储上的备份对象 mirror 到目标存储,保留 StoragePath。
+// 3-2-1 规则核心:每份备份至少存在于两个独立存储目标,且至少一份异地。
+const (
+ ReplicationStatusRunning = "running"
+ ReplicationStatusSuccess = "success"
+ ReplicationStatusFailed = "failed"
+)
+
+type ReplicationRecord struct {
+ ID uint `gorm:"primaryKey" json:"id"`
+ BackupRecordID uint `gorm:"column:backup_record_id;index;not null" json:"backupRecordId"`
+ BackupRecord BackupRecord `json:"backupRecord,omitempty"`
+ TaskID uint `gorm:"column:task_id;index;not null" json:"taskId"`
+ // SourceTargetID 源存储目标(备份已存在于此)
+ SourceTargetID uint `gorm:"column:source_target_id;index;not null" json:"sourceTargetId"`
+ SourceTarget StorageTarget `gorm:"foreignKey:SourceTargetID;references:ID" json:"sourceTarget,omitempty"`
+ // DestTargetID 目标存储(复制过去)
+ DestTargetID uint `gorm:"column:dest_target_id;index;not null" json:"destTargetId"`
+ DestTarget StorageTarget `gorm:"foreignKey:DestTargetID;references:ID" json:"destTarget,omitempty"`
+ Status string `gorm:"size:20;index;not null" json:"status"`
+ StoragePath string `gorm:"column:storage_path;size:500" json:"storagePath"`
+ FileSize int64 `gorm:"column:file_size;not null;default:0" json:"fileSize"`
+ Checksum string `gorm:"column:checksum;size:64" json:"checksum"`
+ ErrorMessage string `gorm:"column:error_message;size:2000" json:"errorMessage"`
+ DurationSeconds int `gorm:"column:duration_seconds;not null;default:0" json:"durationSeconds"`
+ TriggeredBy string `gorm:"column:triggered_by;size:100" json:"triggeredBy"`
+ StartedAt time.Time `gorm:"column:started_at;index;not null" json:"startedAt"`
+ CompletedAt *time.Time `gorm:"column:completed_at;index" json:"completedAt,omitempty"`
+ CreatedAt time.Time `json:"createdAt"`
+ UpdatedAt time.Time `json:"updatedAt"`
+}
+
+func (ReplicationRecord) TableName() string {
+ return "replication_records"
+}
diff --git a/server/internal/model/restore_record.go b/server/internal/model/restore_record.go
new file mode 100644
index 0000000..beec941
--- /dev/null
+++ b/server/internal/model/restore_record.go
@@ -0,0 +1,33 @@
+package model
+
+import "time"
+
+// RestoreRecord 代表一次恢复执行,用于审计、实时日志与列表页。
+// 每次从 BackupRecord 触发恢复都会产生独立 RestoreRecord,与 BackupRecord 一对多。
+const (
+ RestoreRecordStatusRunning = "running"
+ RestoreRecordStatusSuccess = "success"
+ RestoreRecordStatusFailed = "failed"
+)
+
+type RestoreRecord struct {
+ ID uint `gorm:"primaryKey" json:"id"`
+ BackupRecordID uint `gorm:"column:backup_record_id;index;not null" json:"backupRecordId"`
+ BackupRecord BackupRecord `json:"backupRecord,omitempty"`
+ TaskID uint `gorm:"column:task_id;index;not null" json:"taskId"`
+ Task BackupTask `json:"task,omitempty"`
+ NodeID uint `gorm:"column:node_id;index;default:0" json:"nodeId"`
+ Status string `gorm:"size:20;index;not null" json:"status"`
+ ErrorMessage string `gorm:"column:error_message;size:2000" json:"errorMessage"`
+ LogContent string `gorm:"column:log_content;type:text" json:"logContent"`
+ DurationSeconds int `gorm:"column:duration_seconds;not null;default:0" json:"durationSeconds"`
+ StartedAt time.Time `gorm:"column:started_at;index;not null" json:"startedAt"`
+ CompletedAt *time.Time `gorm:"column:completed_at;index" json:"completedAt,omitempty"`
+ TriggeredBy string `gorm:"column:triggered_by;size:100" json:"triggeredBy"`
+ CreatedAt time.Time `json:"createdAt"`
+ UpdatedAt time.Time `json:"updatedAt"`
+}
+
+func (RestoreRecord) TableName() string {
+ return "restore_records"
+}
diff --git a/server/internal/model/storage_target.go b/server/internal/model/storage_target.go
index d1ce9a0..40dc6f5 100644
--- a/server/internal/model/storage_target.go
+++ b/server/internal/model/storage_target.go
@@ -14,8 +14,12 @@ type StorageTarget struct {
LastTestedAt *time.Time `gorm:"column:last_tested_at" json:"lastTestedAt,omitempty"`
LastTestStatus string `gorm:"column:last_test_status;size:32;not null;default:'unknown'" json:"lastTestStatus"`
LastTestMessage string `gorm:"column:last_test_message;size:512" json:"lastTestMessage"`
- CreatedAt time.Time `json:"createdAt"`
- UpdatedAt time.Time `json:"updatedAt"`
+ // QuotaBytes 软限额(字节)。0 = 不限制。
+ // 备份执行前检查:该目标上已累计字节数 + 本次文件大小 > QuotaBytes 时拒绝上传。
+ // 比容量预警(85% 通知)更严格,作为企业治理"防超用"的硬性闸门。
+ QuotaBytes int64 `gorm:"column:quota_bytes;not null;default:0" json:"quotaBytes"`
+ CreatedAt time.Time `json:"createdAt"`
+ UpdatedAt time.Time `json:"updatedAt"`
}
func (StorageTarget) TableName() string {
diff --git a/server/internal/model/task_template.go b/server/internal/model/task_template.go
new file mode 100644
index 0000000..9d27976
--- /dev/null
+++ b/server/internal/model/task_template.go
@@ -0,0 +1,27 @@
+package model
+
+import "time"
+
+// TaskTemplate 是批量创建任务的模板。
+// 用途:大规模场景(100+ 任务)下保存一份参数预设,
+// 再通过"应用模板"接口一次性创建多个任务(变量替换 Name/SourcePath 等)。
+//
+// 参数存 JSON(Payload),结构与 service.BackupTaskUpsertInput 基本一致,
+// 仅以下字段在应用时可被变量覆盖:
+// - name
+// - sourcePath / sourcePaths 中的 {{.Host}} / {{.Env}} 等占位符
+type TaskTemplate struct {
+ ID uint `gorm:"primaryKey" json:"id"`
+ Name string `gorm:"size:128;uniqueIndex;not null" json:"name"`
+ Description string `gorm:"size:500" json:"description"`
+ TaskType string `gorm:"column:task_type;size:20;not null" json:"taskType"`
+ // Payload JSON,存完整 BackupTaskUpsertInput 的序列化
+ Payload string `gorm:"type:text;not null" json:"payload"`
+ CreatedBy string `gorm:"column:created_by;size:128" json:"createdBy"`
+ CreatedAt time.Time `json:"createdAt"`
+ UpdatedAt time.Time `json:"updatedAt"`
+}
+
+func (TaskTemplate) TableName() string {
+ return "task_templates"
+}
diff --git a/server/internal/model/user.go b/server/internal/model/user.go
index 581ed09..69785a3 100644
--- a/server/internal/model/user.go
+++ b/server/internal/model/user.go
@@ -2,6 +2,25 @@ package model
import "time"
+// 用户角色常量。RBAC 策略:
+// - admin:系统全权(创建用户、管理 API Key、删除数据、改设置)
+// - operator:日常运维(创建/编辑/执行任务、触发恢复与验证、管理存储目标与通知)
+// - viewer:只读(查看仪表盘、任务、记录、日志,不能触发或改变状态)
+const (
+ UserRoleAdmin = "admin"
+ UserRoleOperator = "operator"
+ UserRoleViewer = "viewer"
+)
+
+// IsValidRole 校验角色字符串合法。
+func IsValidRole(role string) bool {
+ switch role {
+ case UserRoleAdmin, UserRoleOperator, UserRoleViewer:
+ return true
+ }
+ return false
+}
+
type User struct {
ID uint `gorm:"primaryKey" json:"id"`
Username string `gorm:"size:64;uniqueIndex;not null" json:"username"`
@@ -9,8 +28,10 @@ type User struct {
DisplayName string `gorm:"size:128;not null" json:"displayName"`
Email string `gorm:"size:255" json:"email"`
Role string `gorm:"size:32;not null;default:admin" json:"role"`
- CreatedAt time.Time `json:"createdAt"`
- UpdatedAt time.Time `json:"updatedAt"`
+ // Disabled 禁用账号(不删除保留审计)。禁用后无法登录。
+ Disabled bool `gorm:"not null;default:false" json:"disabled"`
+ CreatedAt time.Time `json:"createdAt"`
+ UpdatedAt time.Time `json:"updatedAt"`
}
func (User) TableName() string {
diff --git a/server/internal/model/verification_record.go b/server/internal/model/verification_record.go
new file mode 100644
index 0000000..b7c7a07
--- /dev/null
+++ b/server/internal/model/verification_record.go
@@ -0,0 +1,43 @@
+package model
+
+import "time"
+
+// VerificationRecord 记录一次备份验证(或演练)的执行。
+// 验证目标:从指定 BackupRecord 读取归档 → 在沙箱内执行只读校验
+// (解压/格式检查/完整性校验),不改动源数据。
+const (
+ VerificationRecordStatusRunning = "running"
+ VerificationRecordStatusSuccess = "success"
+ VerificationRecordStatusFailed = "failed"
+
+ // VerificationModeQuick 仅做格式与完整性校验(tar header、SHA-256、DB dump 头)。
+ // 耗时短,不占用目标系统资源,适合每日调度。
+ VerificationModeQuick = "quick"
+ // VerificationModeDeep 真正恢复到隔离沙箱(临时库或解压目录),验证可读。
+ // 耗时较长,适合每周/每月。当前版本保留接口不实现。
+ VerificationModeDeep = "deep"
+)
+
+type VerificationRecord struct {
+ ID uint `gorm:"primaryKey" json:"id"`
+ BackupRecordID uint `gorm:"column:backup_record_id;index;not null" json:"backupRecordId"`
+ BackupRecord BackupRecord `json:"backupRecord,omitempty"`
+ TaskID uint `gorm:"column:task_id;index;not null" json:"taskId"`
+ Task BackupTask `json:"task,omitempty"`
+ NodeID uint `gorm:"column:node_id;index;default:0" json:"nodeId"`
+ Mode string `gorm:"size:20;not null;default:'quick'" json:"mode"`
+ Status string `gorm:"size:20;index;not null" json:"status"`
+ Summary string `gorm:"size:500" json:"summary"`
+ ErrorMessage string `gorm:"column:error_message;size:2000" json:"errorMessage"`
+ LogContent string `gorm:"column:log_content;type:text" json:"logContent"`
+ DurationSeconds int `gorm:"column:duration_seconds;not null;default:0" json:"durationSeconds"`
+ StartedAt time.Time `gorm:"column:started_at;index;not null" json:"startedAt"`
+ CompletedAt *time.Time `gorm:"column:completed_at;index" json:"completedAt,omitempty"`
+ TriggeredBy string `gorm:"column:triggered_by;size:100" json:"triggeredBy"`
+ CreatedAt time.Time `json:"createdAt"`
+ UpdatedAt time.Time `json:"updatedAt"`
+}
+
+func (VerificationRecord) TableName() string {
+ return "verification_records"
+}
diff --git a/server/internal/repository/agent_command_repository.go b/server/internal/repository/agent_command_repository.go
index 0e07e51..aff682b 100644
--- a/server/internal/repository/agent_command_repository.go
+++ b/server/internal/repository/agent_command_repository.go
@@ -18,7 +18,14 @@ type AgentCommandRepository interface {
ClaimPending(ctx context.Context, nodeID uint) (*model.AgentCommand, error)
Update(ctx context.Context, cmd *model.AgentCommand) error
// MarkStaleTimeout 把 dispatched 状态但超时未完成的命令标记为 timeout。
+ // 返回被标记的行数。不返回具体命令(供背景监控简单调用)。
MarkStaleTimeout(ctx context.Context, threshold time.Time) (int64, error)
+ // ListStaleDispatched 列出 dispatched 但已超时、尚未被标记的命令。
+ // 调用方需要把它们逐一标记 timeout 并联动关联记录状态。
+ ListStaleDispatched(ctx context.Context, threshold time.Time) ([]model.AgentCommand, error)
+ // ListPendingByNode 列出某节点下的所有 pending/dispatched 命令。
+ // 用于删除节点或节点离线时的清理。
+ ListPendingByNode(ctx context.Context, nodeID uint) ([]model.AgentCommand, error)
}
type GormAgentCommandRepository struct {
@@ -99,3 +106,30 @@ func (r *GormAgentCommandRepository) MarkStaleTimeout(ctx context.Context, thres
}
return result.RowsAffected, nil
}
+
+// ListStaleDispatched 列出 dispatched 但 dispatched_at 早于 threshold 的命令。
+func (r *GormAgentCommandRepository) ListStaleDispatched(ctx context.Context, threshold time.Time) ([]model.AgentCommand, error) {
+ var items []model.AgentCommand
+ if err := r.db.WithContext(ctx).
+ Where("status = ? AND dispatched_at < ?", model.AgentCommandStatusDispatched, threshold).
+ Order("id asc").
+ Find(&items).Error; err != nil {
+ return nil, err
+ }
+ return items, nil
+}
+
+// ListPendingByNode 列出某节点下所有待执行(pending 或 dispatched)命令。
+func (r *GormAgentCommandRepository) ListPendingByNode(ctx context.Context, nodeID uint) ([]model.AgentCommand, error) {
+ var items []model.AgentCommand
+ if err := r.db.WithContext(ctx).
+ Where("node_id = ? AND status IN ?", nodeID, []string{
+ model.AgentCommandStatusPending,
+ model.AgentCommandStatusDispatched,
+ }).
+ Order("id asc").
+ Find(&items).Error; err != nil {
+ return nil, err
+ }
+ return items, nil
+}
diff --git a/server/internal/repository/api_key_repository.go b/server/internal/repository/api_key_repository.go
new file mode 100644
index 0000000..39c53f4
--- /dev/null
+++ b/server/internal/repository/api_key_repository.go
@@ -0,0 +1,78 @@
+package repository
+
+import (
+ "context"
+ "errors"
+ "time"
+
+ "backupx/server/internal/model"
+ "gorm.io/gorm"
+)
+
+type ApiKeyRepository interface {
+ Create(ctx context.Context, key *model.ApiKey) error
+ Update(ctx context.Context, key *model.ApiKey) error
+ Delete(ctx context.Context, id uint) error
+ FindByID(ctx context.Context, id uint) (*model.ApiKey, error)
+ FindByHash(ctx context.Context, hash string) (*model.ApiKey, error)
+ List(ctx context.Context) ([]model.ApiKey, error)
+ MarkUsed(ctx context.Context, id uint, at time.Time) error
+}
+
+type GormApiKeyRepository struct {
+ db *gorm.DB
+}
+
+func NewApiKeyRepository(db *gorm.DB) *GormApiKeyRepository {
+ return &GormApiKeyRepository{db: db}
+}
+
+func (r *GormApiKeyRepository) Create(ctx context.Context, key *model.ApiKey) error {
+ return r.db.WithContext(ctx).Create(key).Error
+}
+
+func (r *GormApiKeyRepository) Update(ctx context.Context, key *model.ApiKey) error {
+ return r.db.WithContext(ctx).Save(key).Error
+}
+
+func (r *GormApiKeyRepository) Delete(ctx context.Context, id uint) error {
+ return r.db.WithContext(ctx).Delete(&model.ApiKey{}, id).Error
+}
+
+func (r *GormApiKeyRepository) FindByID(ctx context.Context, id uint) (*model.ApiKey, error) {
+ var item model.ApiKey
+ if err := r.db.WithContext(ctx).First(&item, id).Error; err != nil {
+ if errors.Is(err, gorm.ErrRecordNotFound) {
+ return nil, nil
+ }
+ return nil, err
+ }
+ return &item, nil
+}
+
+func (r *GormApiKeyRepository) FindByHash(ctx context.Context, hash string) (*model.ApiKey, error) {
+ var item model.ApiKey
+ if err := r.db.WithContext(ctx).Where("key_hash = ?", hash).First(&item).Error; err != nil {
+ if errors.Is(err, gorm.ErrRecordNotFound) {
+ return nil, nil
+ }
+ return nil, err
+ }
+ return &item, nil
+}
+
+func (r *GormApiKeyRepository) List(ctx context.Context) ([]model.ApiKey, error) {
+ var items []model.ApiKey
+ if err := r.db.WithContext(ctx).Order("created_at desc").Find(&items).Error; err != nil {
+ return nil, err
+ }
+ return items, nil
+}
+
+// MarkUsed 更新最近使用时间。写入失败不应阻断认证主流程,调用方需忽略错误。
+func (r *GormApiKeyRepository) MarkUsed(ctx context.Context, id uint, at time.Time) error {
+ return r.db.WithContext(ctx).
+ Model(&model.ApiKey{}).
+ Where("id = ?", id).
+ Update("last_used_at", at).Error
+}
diff --git a/server/internal/repository/audit_log_repository.go b/server/internal/repository/audit_log_repository.go
index df6d82d..9ffbb31 100644
--- a/server/internal/repository/audit_log_repository.go
+++ b/server/internal/repository/audit_log_repository.go
@@ -2,6 +2,7 @@ package repository
import (
"context"
+ "time"
"backupx/server/internal/model"
"gorm.io/gorm"
@@ -9,6 +10,12 @@ import (
type AuditLogListOptions struct {
Category string
+ Action string
+ Username string
+ TargetID string
+ Keyword string // 模糊匹配 detail / target_name
+ DateFrom *time.Time
+ DateTo *time.Time
Limit int
Offset int
}
@@ -21,6 +28,7 @@ type AuditLogListResult struct {
type AuditLogRepository interface {
Create(ctx context.Context, log *model.AuditLog) error
List(ctx context.Context, opts AuditLogListOptions) (*AuditLogListResult, error)
+ ListAll(ctx context.Context, opts AuditLogListOptions) ([]model.AuditLog, error)
}
type gormAuditLogRepository struct {
@@ -36,10 +44,7 @@ func (r *gormAuditLogRepository) Create(_ context.Context, log *model.AuditLog)
}
func (r *gormAuditLogRepository) List(_ context.Context, opts AuditLogListOptions) (*AuditLogListResult, error) {
- query := r.db.Model(&model.AuditLog{})
- if opts.Category != "" {
- query = query.Where("category = ?", opts.Category)
- }
+ query := r.buildQuery(opts)
var total int64
if err := query.Count(&total).Error; err != nil {
return nil, err
@@ -54,3 +59,42 @@ func (r *gormAuditLogRepository) List(_ context.Context, opts AuditLogListOption
}
return &AuditLogListResult{Items: items, Total: total}, nil
}
+
+// ListAll 导出专用:不分页返回所有匹配记录(上限 10k 防爆)。
+func (r *gormAuditLogRepository) ListAll(_ context.Context, opts AuditLogListOptions) ([]model.AuditLog, error) {
+ query := r.buildQuery(opts)
+ const maxExportRows = 10000
+ var items []model.AuditLog
+ if err := query.Order("created_at DESC").Limit(maxExportRows).Find(&items).Error; err != nil {
+ return nil, err
+ }
+ return items, nil
+}
+
+// buildQuery 统一构造带筛选条件的查询。
+func (r *gormAuditLogRepository) buildQuery(opts AuditLogListOptions) *gorm.DB {
+ query := r.db.Model(&model.AuditLog{})
+ if opts.Category != "" {
+ query = query.Where("category = ?", opts.Category)
+ }
+ if opts.Action != "" {
+ query = query.Where("action = ?", opts.Action)
+ }
+ if opts.Username != "" {
+ query = query.Where("username = ?", opts.Username)
+ }
+ if opts.TargetID != "" {
+ query = query.Where("target_id = ?", opts.TargetID)
+ }
+ if opts.Keyword != "" {
+ pattern := "%" + opts.Keyword + "%"
+ query = query.Where("detail LIKE ? OR target_name LIKE ?", pattern, pattern)
+ }
+ if opts.DateFrom != nil {
+ query = query.Where("created_at >= ?", opts.DateFrom.UTC())
+ }
+ if opts.DateTo != nil {
+ query = query.Where("created_at <= ?", opts.DateTo.UTC())
+ }
+ return query
+}
diff --git a/server/internal/repository/backup_task_repository.go b/server/internal/repository/backup_task_repository.go
index 2f01d13..6111b50 100644
--- a/server/internal/repository/backup_task_repository.go
+++ b/server/internal/repository/backup_task_repository.go
@@ -18,11 +18,13 @@ type BackupTaskRepository interface {
FindByID(context.Context, uint) (*model.BackupTask, error)
FindByName(context.Context, string) (*model.BackupTask, error)
ListSchedulable(context.Context) ([]model.BackupTask, error)
+ ListVerifySchedulable(context.Context) ([]model.BackupTask, error)
Count(context.Context) (int64, error)
CountEnabled(context.Context) (int64, error)
CountByStorageTargetID(context.Context, uint) (int64, error)
CountByNodeID(context.Context, uint) (int64, error)
ListByNodeID(context.Context, uint) ([]model.BackupTask, error)
+ DistinctTags(context.Context) ([]string, error)
Create(context.Context, *model.BackupTask) error
Update(context.Context, *model.BackupTask) error
Delete(context.Context, uint) error
@@ -37,7 +39,7 @@ func NewBackupTaskRepository(db *gorm.DB) *GormBackupTaskRepository {
}
func (r *GormBackupTaskRepository) List(ctx context.Context, options BackupTaskListOptions) ([]model.BackupTask, error) {
- query := r.db.WithContext(ctx).Model(&model.BackupTask{}).Preload("StorageTarget").Preload("StorageTargets").Order("updated_at desc")
+ query := r.db.WithContext(ctx).Model(&model.BackupTask{}).Preload("StorageTarget").Preload("StorageTargets").Preload("Node").Order("updated_at desc")
if options.Type != "" {
query = query.Where("type = ?", options.Type)
}
@@ -53,7 +55,7 @@ func (r *GormBackupTaskRepository) List(ctx context.Context, options BackupTaskL
func (r *GormBackupTaskRepository) FindByID(ctx context.Context, id uint) (*model.BackupTask, error) {
var item model.BackupTask
- if err := r.db.WithContext(ctx).Preload("StorageTarget").Preload("StorageTargets").First(&item, id).Error; err != nil {
+ if err := r.db.WithContext(ctx).Preload("StorageTarget").Preload("StorageTargets").Preload("Node").First(&item, id).Error; err != nil {
if errors.Is(err, gorm.ErrRecordNotFound) {
return nil, nil
}
@@ -75,12 +77,105 @@ func (r *GormBackupTaskRepository) FindByName(ctx context.Context, name string)
func (r *GormBackupTaskRepository) ListSchedulable(ctx context.Context) ([]model.BackupTask, error) {
var items []model.BackupTask
- if err := r.db.WithContext(ctx).Preload("StorageTarget").Preload("StorageTargets").Where("enabled = ? AND cron_expr <> ''", true).Order("id asc").Find(&items).Error; err != nil {
+ if err := r.db.WithContext(ctx).Preload("StorageTarget").Preload("StorageTargets").Preload("Node").Where("enabled = ? AND cron_expr <> ''", true).Order("id asc").Find(&items).Error; err != nil {
return nil, err
}
return items, nil
}
+// ListVerifySchedulable 列出所有启用且配置了验证 cron 的任务。
+// 与 ListSchedulable 的区别:即使任务本身没有备份 cron,只要配置了 verify_cron_expr
+// 也会被调度(验证是独立的定时动作)。
+func (r *GormBackupTaskRepository) ListVerifySchedulable(ctx context.Context) ([]model.BackupTask, error) {
+ var items []model.BackupTask
+ if err := r.db.WithContext(ctx).
+ Preload("StorageTarget").
+ Preload("StorageTargets").
+ Preload("Node").
+ Where("enabled = ? AND verify_enabled = ? AND verify_cron_expr <> ''", true, true).
+ Order("id asc").
+ Find(&items).Error; err != nil {
+ return nil, err
+ }
+ return items, nil
+}
+
+// DistinctTags 返回系统中所有任务使用过的唯一标签(用于 UI 建议)。
+// tags 字段是逗号分隔字符串,此方法会扁平化后去重。
+func (r *GormBackupTaskRepository) DistinctTags(ctx context.Context) ([]string, error) {
+ var rows []struct {
+ Tags string
+ }
+ if err := r.db.WithContext(ctx).
+ Model(&model.BackupTask{}).
+ Select("tags").
+ Where("tags <> ''").
+ Scan(&rows).Error; err != nil {
+ return nil, err
+ }
+ seen := map[string]bool{}
+ result := []string{}
+ for _, row := range rows {
+ for _, raw := range splitTags(row.Tags) {
+ if !seen[raw] {
+ seen[raw] = true
+ result = append(result, raw)
+ }
+ }
+ }
+ return result, nil
+}
+
+// splitTags 把逗号分隔的 tags 字符串拆成 trim 后的非空切片。
+func splitTags(value string) []string {
+ if value == "" {
+ return nil
+ }
+ var out []string
+ for _, t := range splitAndTrim(value, ",") {
+ if t != "" {
+ out = append(out, t)
+ }
+ }
+ return out
+}
+
+// splitAndTrim 内部工具函数:按分隔符切分并去除每段空白。
+func splitAndTrim(value, sep string) []string {
+ parts := make([]string, 0)
+ for _, p := range bytesSplit(value, sep) {
+ trimmed := bytesTrimSpace(p)
+ parts = append(parts, trimmed)
+ }
+ return parts
+}
+
+// bytesSplit / bytesTrimSpace 只是 strings 的薄包装,便于此仓储文件不引入 strings 依赖。
+func bytesSplit(value, sep string) []string {
+ out := []string{}
+ start := 0
+ for i := 0; i+len(sep) <= len(value); i++ {
+ if value[i:i+len(sep)] == sep {
+ out = append(out, value[start:i])
+ start = i + len(sep)
+ i += len(sep) - 1
+ }
+ }
+ out = append(out, value[start:])
+ return out
+}
+
+func bytesTrimSpace(value string) string {
+ start, end := 0, len(value)
+ for start < end && (value[start] == ' ' || value[start] == '\t' || value[start] == '\n' || value[start] == '\r') {
+ start++
+ }
+ for end > start && (value[end-1] == ' ' || value[end-1] == '\t' || value[end-1] == '\n' || value[end-1] == '\r') {
+ end--
+ }
+ return value[start:end]
+}
+
func (r *GormBackupTaskRepository) Count(ctx context.Context) (int64, error) {
var count int64
if err := r.db.WithContext(ctx).Model(&model.BackupTask{}).Count(&count).Error; err != nil {
@@ -117,7 +212,7 @@ func (r *GormBackupTaskRepository) CountByNodeID(ctx context.Context, nodeID uin
// ListByNodeID 列出绑定到指定节点的任务。用于 Agent 拉取本节点待执行任务。
func (r *GormBackupTaskRepository) ListByNodeID(ctx context.Context, nodeID uint) ([]model.BackupTask, error) {
var items []model.BackupTask
- if err := r.db.WithContext(ctx).Preload("StorageTarget").Preload("StorageTargets").Where("node_id = ?", nodeID).Order("id asc").Find(&items).Error; err != nil {
+ if err := r.db.WithContext(ctx).Preload("StorageTarget").Preload("StorageTargets").Preload("Node").Where("node_id = ?", nodeID).Order("id asc").Find(&items).Error; err != nil {
return nil, err
}
return items, nil
diff --git a/server/internal/repository/replication_record_repository.go b/server/internal/repository/replication_record_repository.go
new file mode 100644
index 0000000..78a57ae
--- /dev/null
+++ b/server/internal/repository/replication_record_repository.go
@@ -0,0 +1,106 @@
+package repository
+
+import (
+ "context"
+ "errors"
+ "time"
+
+ "backupx/server/internal/model"
+ "gorm.io/gorm"
+)
+
+type ReplicationRecordListOptions struct {
+ TaskID *uint
+ BackupRecordID *uint
+ DestTargetID *uint
+ Status string
+ DateFrom *time.Time
+ DateTo *time.Time
+ Limit int
+ Offset int
+}
+
+type ReplicationRecordRepository interface {
+ Create(ctx context.Context, record *model.ReplicationRecord) error
+ Update(ctx context.Context, record *model.ReplicationRecord) error
+ FindByID(ctx context.Context, id uint) (*model.ReplicationRecord, error)
+ List(ctx context.Context, opts ReplicationRecordListOptions) ([]model.ReplicationRecord, error)
+ Count(ctx context.Context) (int64, error)
+}
+
+type GormReplicationRecordRepository struct {
+ db *gorm.DB
+}
+
+func NewReplicationRecordRepository(db *gorm.DB) *GormReplicationRecordRepository {
+ return &GormReplicationRecordRepository{db: db}
+}
+
+func (r *GormReplicationRecordRepository) Create(ctx context.Context, item *model.ReplicationRecord) error {
+ return r.db.WithContext(ctx).Create(item).Error
+}
+
+func (r *GormReplicationRecordRepository) Update(ctx context.Context, item *model.ReplicationRecord) error {
+ return r.db.WithContext(ctx).Save(item).Error
+}
+
+func (r *GormReplicationRecordRepository) FindByID(ctx context.Context, id uint) (*model.ReplicationRecord, error) {
+ var item model.ReplicationRecord
+ if err := r.db.WithContext(ctx).
+ Preload("BackupRecord").
+ Preload("SourceTarget").
+ Preload("DestTarget").
+ First(&item, id).Error; err != nil {
+ if errors.Is(err, gorm.ErrRecordNotFound) {
+ return nil, nil
+ }
+ return nil, err
+ }
+ return &item, nil
+}
+
+func (r *GormReplicationRecordRepository) List(ctx context.Context, opts ReplicationRecordListOptions) ([]model.ReplicationRecord, error) {
+ query := r.db.WithContext(ctx).
+ Model(&model.ReplicationRecord{}).
+ Preload("BackupRecord").
+ Preload("SourceTarget").
+ Preload("DestTarget").
+ Order("started_at desc")
+ if opts.TaskID != nil {
+ query = query.Where("task_id = ?", *opts.TaskID)
+ }
+ if opts.BackupRecordID != nil {
+ query = query.Where("backup_record_id = ?", *opts.BackupRecordID)
+ }
+ if opts.DestTargetID != nil {
+ query = query.Where("dest_target_id = ?", *opts.DestTargetID)
+ }
+ if opts.Status != "" {
+ query = query.Where("status = ?", opts.Status)
+ }
+ if opts.DateFrom != nil {
+ query = query.Where("started_at >= ?", opts.DateFrom.UTC())
+ }
+ if opts.DateTo != nil {
+ query = query.Where("started_at <= ?", opts.DateTo.UTC())
+ }
+ if opts.Limit > 0 {
+ query = query.Limit(opts.Limit)
+ }
+ if opts.Offset > 0 {
+ query = query.Offset(opts.Offset)
+ }
+ var items []model.ReplicationRecord
+ if err := query.Find(&items).Error; err != nil {
+ return nil, err
+ }
+ return items, nil
+}
+
+func (r *GormReplicationRecordRepository) Count(ctx context.Context) (int64, error) {
+ var count int64
+ if err := r.db.WithContext(ctx).Model(&model.ReplicationRecord{}).Count(&count).Error; err != nil {
+ return 0, err
+ }
+ return count, nil
+}
diff --git a/server/internal/repository/restore_record_repository.go b/server/internal/repository/restore_record_repository.go
new file mode 100644
index 0000000..c1f213b
--- /dev/null
+++ b/server/internal/repository/restore_record_repository.go
@@ -0,0 +1,111 @@
+package repository
+
+import (
+ "context"
+ "errors"
+ "time"
+
+ "backupx/server/internal/model"
+ "gorm.io/gorm"
+)
+
+// RestoreRecordListOptions 恢复记录列表筛选条件。
+type RestoreRecordListOptions struct {
+ TaskID *uint
+ BackupRecordID *uint
+ NodeID *uint
+ Status string
+ DateFrom *time.Time
+ DateTo *time.Time
+ Limit int
+ Offset int
+}
+
+// RestoreRecordRepository 恢复记录仓储接口。
+type RestoreRecordRepository interface {
+ Create(ctx context.Context, item *model.RestoreRecord) error
+ Update(ctx context.Context, item *model.RestoreRecord) error
+ Delete(ctx context.Context, id uint) error
+ FindByID(ctx context.Context, id uint) (*model.RestoreRecord, error)
+ List(ctx context.Context, options RestoreRecordListOptions) ([]model.RestoreRecord, error)
+ Count(ctx context.Context) (int64, error)
+}
+
+type GormRestoreRecordRepository struct {
+ db *gorm.DB
+}
+
+func NewRestoreRecordRepository(db *gorm.DB) *GormRestoreRecordRepository {
+ return &GormRestoreRecordRepository{db: db}
+}
+
+func (r *GormRestoreRecordRepository) Create(ctx context.Context, item *model.RestoreRecord) error {
+ return r.db.WithContext(ctx).Create(item).Error
+}
+
+func (r *GormRestoreRecordRepository) Update(ctx context.Context, item *model.RestoreRecord) error {
+ return r.db.WithContext(ctx).Save(item).Error
+}
+
+func (r *GormRestoreRecordRepository) Delete(ctx context.Context, id uint) error {
+ return r.db.WithContext(ctx).Delete(&model.RestoreRecord{}, id).Error
+}
+
+func (r *GormRestoreRecordRepository) FindByID(ctx context.Context, id uint) (*model.RestoreRecord, error) {
+ var item model.RestoreRecord
+ if err := r.db.WithContext(ctx).
+ Preload("Task").
+ Preload("BackupRecord").
+ First(&item, id).Error; err != nil {
+ if errors.Is(err, gorm.ErrRecordNotFound) {
+ return nil, nil
+ }
+ return nil, err
+ }
+ return &item, nil
+}
+
+func (r *GormRestoreRecordRepository) List(ctx context.Context, options RestoreRecordListOptions) ([]model.RestoreRecord, error) {
+ query := r.db.WithContext(ctx).
+ Model(&model.RestoreRecord{}).
+ Preload("Task").
+ Preload("BackupRecord").
+ Order("started_at desc")
+ if options.TaskID != nil {
+ query = query.Where("task_id = ?", *options.TaskID)
+ }
+ if options.BackupRecordID != nil {
+ query = query.Where("backup_record_id = ?", *options.BackupRecordID)
+ }
+ if options.NodeID != nil {
+ query = query.Where("node_id = ?", *options.NodeID)
+ }
+ if options.Status != "" {
+ query = query.Where("status = ?", options.Status)
+ }
+ if options.DateFrom != nil {
+ query = query.Where("started_at >= ?", options.DateFrom.UTC())
+ }
+ if options.DateTo != nil {
+ query = query.Where("started_at <= ?", options.DateTo.UTC())
+ }
+ if options.Limit > 0 {
+ query = query.Limit(options.Limit)
+ }
+ if options.Offset > 0 {
+ query = query.Offset(options.Offset)
+ }
+ var items []model.RestoreRecord
+ if err := query.Find(&items).Error; err != nil {
+ return nil, err
+ }
+ return items, nil
+}
+
+func (r *GormRestoreRecordRepository) Count(ctx context.Context) (int64, error) {
+ var count int64
+ if err := r.db.WithContext(ctx).Model(&model.RestoreRecord{}).Count(&count).Error; err != nil {
+ return 0, err
+ }
+ return count, nil
+}
diff --git a/server/internal/repository/restore_record_repository_test.go b/server/internal/repository/restore_record_repository_test.go
new file mode 100644
index 0000000..f51b237
--- /dev/null
+++ b/server/internal/repository/restore_record_repository_test.go
@@ -0,0 +1,126 @@
+package repository
+
+import (
+ "context"
+ "path/filepath"
+ "testing"
+ "time"
+
+ "backupx/server/internal/config"
+ "backupx/server/internal/database"
+ "backupx/server/internal/logger"
+ "backupx/server/internal/model"
+)
+
+func newRestoreRecordTestRepository(t *testing.T) (*GormRestoreRecordRepository, uint) {
+ t.Helper()
+ log, err := logger.New(config.LogConfig{Level: "error"})
+ if err != nil {
+ t.Fatalf("logger.New returned error: %v", err)
+ }
+ db, err := database.Open(config.DatabaseConfig{Path: filepath.Join(t.TempDir(), "backupx.db")}, log)
+ if err != nil {
+ t.Fatalf("database.Open returned error: %v", err)
+ }
+ storageTarget := &model.StorageTarget{Name: "local", Type: "local_disk", Enabled: true, ConfigCiphertext: "{}", ConfigVersion: 1, LastTestStatus: "unknown"}
+ if err := db.Create(storageTarget).Error; err != nil {
+ t.Fatalf("seed storage target error: %v", err)
+ }
+ task := &model.BackupTask{Name: "website", Type: "file", Enabled: true, SourcePath: "/srv/www", StorageTargetID: storageTarget.ID, RetentionDays: 30, Compression: "gzip", MaxBackups: 10, LastStatus: "idle"}
+ if err := db.Create(task).Error; err != nil {
+ t.Fatalf("seed backup task error: %v", err)
+ }
+ now := time.Now().UTC()
+ completedAt := now.Add(time.Minute)
+ backupRecord := &model.BackupRecord{TaskID: task.ID, StorageTargetID: storageTarget.ID, Status: model.BackupRecordStatusSuccess, FileName: "website.tar.gz", FileSize: 1024, StoragePath: "tasks/1/website.tar.gz", StartedAt: now, CompletedAt: &completedAt}
+ if err := db.Create(backupRecord).Error; err != nil {
+ t.Fatalf("seed backup record error: %v", err)
+ }
+ return NewRestoreRecordRepository(db), backupRecord.ID
+}
+
+func TestRestoreRecordRepositoryCRUD(t *testing.T) {
+ ctx := context.Background()
+ repo, backupRecordID := newRestoreRecordTestRepository(t)
+
+ startedAt := time.Now().UTC()
+ restore := &model.RestoreRecord{
+ BackupRecordID: backupRecordID,
+ TaskID: 1,
+ NodeID: 0,
+ Status: model.RestoreRecordStatusRunning,
+ StartedAt: startedAt,
+ TriggeredBy: "admin",
+ }
+ if err := repo.Create(ctx, restore); err != nil {
+ t.Fatalf("Create returned error: %v", err)
+ }
+ if restore.ID == 0 {
+ t.Fatalf("expected generated restore ID, got 0")
+ }
+
+ found, err := repo.FindByID(ctx, restore.ID)
+ if err != nil {
+ t.Fatalf("FindByID returned error: %v", err)
+ }
+ if found == nil || found.TriggeredBy != "admin" || found.Status != model.RestoreRecordStatusRunning {
+ t.Fatalf("unexpected restore record: %#v", found)
+ }
+ if found.BackupRecord.ID != backupRecordID {
+ t.Fatalf("expected BackupRecord preload, got %#v", found.BackupRecord)
+ }
+
+ completedAt := startedAt.Add(30 * time.Second)
+ found.Status = model.RestoreRecordStatusSuccess
+ found.DurationSeconds = 30
+ found.CompletedAt = &completedAt
+ if err := repo.Update(ctx, found); err != nil {
+ t.Fatalf("Update returned error: %v", err)
+ }
+
+ runningFilter := model.RestoreRecordStatusRunning
+ list, err := repo.List(ctx, RestoreRecordListOptions{Status: runningFilter})
+ if err != nil {
+ t.Fatalf("List returned error: %v", err)
+ }
+ if len(list) != 0 {
+ t.Fatalf("expected no running restores after update, got %d", len(list))
+ }
+
+ successFilter := model.RestoreRecordStatusSuccess
+ successList, err := repo.List(ctx, RestoreRecordListOptions{Status: successFilter})
+ if err != nil {
+ t.Fatalf("List success returned error: %v", err)
+ }
+ if len(successList) != 1 {
+ t.Fatalf("expected 1 success restore, got %d", len(successList))
+ }
+
+ brID := backupRecordID
+ byBackup, err := repo.List(ctx, RestoreRecordListOptions{BackupRecordID: &brID})
+ if err != nil {
+ t.Fatalf("List byBackup returned error: %v", err)
+ }
+ if len(byBackup) != 1 {
+ t.Fatalf("expected 1 restore for backup record, got %d", len(byBackup))
+ }
+
+ total, err := repo.Count(ctx)
+ if err != nil {
+ t.Fatalf("Count returned error: %v", err)
+ }
+ if total != 1 {
+ t.Fatalf("expected 1 total, got %d", total)
+ }
+
+ if err := repo.Delete(ctx, restore.ID); err != nil {
+ t.Fatalf("Delete returned error: %v", err)
+ }
+ afterDel, err := repo.FindByID(ctx, restore.ID)
+ if err != nil {
+ t.Fatalf("FindByID after delete returned error: %v", err)
+ }
+ if afterDel != nil {
+ t.Fatalf("expected nil after delete, got %#v", afterDel)
+ }
+}
diff --git a/server/internal/repository/task_template_repository.go b/server/internal/repository/task_template_repository.go
new file mode 100644
index 0000000..56db0ee
--- /dev/null
+++ b/server/internal/repository/task_template_repository.go
@@ -0,0 +1,68 @@
+package repository
+
+import (
+ "context"
+ "errors"
+
+ "backupx/server/internal/model"
+ "gorm.io/gorm"
+)
+
+type TaskTemplateRepository interface {
+ Create(ctx context.Context, template *model.TaskTemplate) error
+ Update(ctx context.Context, template *model.TaskTemplate) error
+ Delete(ctx context.Context, id uint) error
+ FindByID(ctx context.Context, id uint) (*model.TaskTemplate, error)
+ FindByName(ctx context.Context, name string) (*model.TaskTemplate, error)
+ List(ctx context.Context) ([]model.TaskTemplate, error)
+}
+
+type GormTaskTemplateRepository struct {
+ db *gorm.DB
+}
+
+func NewTaskTemplateRepository(db *gorm.DB) *GormTaskTemplateRepository {
+ return &GormTaskTemplateRepository{db: db}
+}
+
+func (r *GormTaskTemplateRepository) Create(ctx context.Context, t *model.TaskTemplate) error {
+ return r.db.WithContext(ctx).Create(t).Error
+}
+
+func (r *GormTaskTemplateRepository) Update(ctx context.Context, t *model.TaskTemplate) error {
+ return r.db.WithContext(ctx).Save(t).Error
+}
+
+func (r *GormTaskTemplateRepository) Delete(ctx context.Context, id uint) error {
+ return r.db.WithContext(ctx).Delete(&model.TaskTemplate{}, id).Error
+}
+
+func (r *GormTaskTemplateRepository) FindByID(ctx context.Context, id uint) (*model.TaskTemplate, error) {
+ var item model.TaskTemplate
+ if err := r.db.WithContext(ctx).First(&item, id).Error; err != nil {
+ if errors.Is(err, gorm.ErrRecordNotFound) {
+ return nil, nil
+ }
+ return nil, err
+ }
+ return &item, nil
+}
+
+func (r *GormTaskTemplateRepository) FindByName(ctx context.Context, name string) (*model.TaskTemplate, error) {
+ var item model.TaskTemplate
+ if err := r.db.WithContext(ctx).Where("name = ?", name).First(&item).Error; err != nil {
+ if errors.Is(err, gorm.ErrRecordNotFound) {
+ return nil, nil
+ }
+ return nil, err
+ }
+ return &item, nil
+}
+
+func (r *GormTaskTemplateRepository) List(ctx context.Context) ([]model.TaskTemplate, error) {
+ var items []model.TaskTemplate
+ if err := r.db.WithContext(ctx).Order("name asc").Find(&items).Error; err != nil {
+ return nil, err
+ }
+ return items, nil
+}
diff --git a/server/internal/repository/user_repository.go b/server/internal/repository/user_repository.go
index 32f858d..8eba4b8 100644
--- a/server/internal/repository/user_repository.go
+++ b/server/internal/repository/user_repository.go
@@ -10,8 +10,11 @@ import (
type UserRepository interface {
Count(context.Context) (int64, error)
+ CountByRole(context.Context, string) (int64, error)
Create(context.Context, *model.User) error
Update(context.Context, *model.User) error
+ Delete(context.Context, uint) error
+ List(context.Context) ([]model.User, error)
FindByUsername(context.Context, string) (*model.User, error)
FindByID(context.Context, uint) (*model.User, error)
}
@@ -32,6 +35,31 @@ func (r *GormUserRepository) Count(ctx context.Context) (int64, error) {
return count, nil
}
+// CountByRole 按角色统计启用(非 disabled)用户数。用于防止删除最后一个 admin。
+func (r *GormUserRepository) CountByRole(ctx context.Context, role string) (int64, error) {
+ var count int64
+ if err := r.db.WithContext(ctx).Model(&model.User{}).
+ Where("role = ? AND disabled = ?", role, false).
+ Count(&count).Error; err != nil {
+ return 0, err
+ }
+ return count, nil
+}
+
+// List 按创建时间升序返回所有用户。
+func (r *GormUserRepository) List(ctx context.Context) ([]model.User, error) {
+ var items []model.User
+ if err := r.db.WithContext(ctx).Order("created_at asc").Find(&items).Error; err != nil {
+ return nil, err
+ }
+ return items, nil
+}
+
+// Delete 物理删除用户。调用方应先在 service 层检查最后 admin。
+func (r *GormUserRepository) Delete(ctx context.Context, id uint) error {
+ return r.db.WithContext(ctx).Delete(&model.User{}, id).Error
+}
+
func (r *GormUserRepository) Create(ctx context.Context, user *model.User) error {
return r.db.WithContext(ctx).Create(user).Error
}
diff --git a/server/internal/repository/verification_record_repository.go b/server/internal/repository/verification_record_repository.go
new file mode 100644
index 0000000..a025d97
--- /dev/null
+++ b/server/internal/repository/verification_record_repository.go
@@ -0,0 +1,121 @@
+package repository
+
+import (
+ "context"
+ "errors"
+ "time"
+
+ "backupx/server/internal/model"
+ "gorm.io/gorm"
+)
+
+// VerificationRecordListOptions 验证记录列表筛选条件。
+type VerificationRecordListOptions struct {
+ TaskID *uint
+ BackupRecordID *uint
+ Status string
+ DateFrom *time.Time
+ DateTo *time.Time
+ Limit int
+ Offset int
+}
+
+type VerificationRecordRepository interface {
+ Create(ctx context.Context, item *model.VerificationRecord) error
+ Update(ctx context.Context, item *model.VerificationRecord) error
+ Delete(ctx context.Context, id uint) error
+ FindByID(ctx context.Context, id uint) (*model.VerificationRecord, error)
+ List(ctx context.Context, options VerificationRecordListOptions) ([]model.VerificationRecord, error)
+ FindLatestByTask(ctx context.Context, taskID uint) (*model.VerificationRecord, error)
+ Count(ctx context.Context) (int64, error)
+}
+
+type GormVerificationRecordRepository struct {
+ db *gorm.DB
+}
+
+func NewVerificationRecordRepository(db *gorm.DB) *GormVerificationRecordRepository {
+ return &GormVerificationRecordRepository{db: db}
+}
+
+func (r *GormVerificationRecordRepository) Create(ctx context.Context, item *model.VerificationRecord) error {
+ return r.db.WithContext(ctx).Create(item).Error
+}
+
+func (r *GormVerificationRecordRepository) Update(ctx context.Context, item *model.VerificationRecord) error {
+ return r.db.WithContext(ctx).Save(item).Error
+}
+
+func (r *GormVerificationRecordRepository) Delete(ctx context.Context, id uint) error {
+ return r.db.WithContext(ctx).Delete(&model.VerificationRecord{}, id).Error
+}
+
+func (r *GormVerificationRecordRepository) FindByID(ctx context.Context, id uint) (*model.VerificationRecord, error) {
+ var item model.VerificationRecord
+ if err := r.db.WithContext(ctx).
+ Preload("Task").
+ Preload("BackupRecord").
+ First(&item, id).Error; err != nil {
+ if errors.Is(err, gorm.ErrRecordNotFound) {
+ return nil, nil
+ }
+ return nil, err
+ }
+ return &item, nil
+}
+
+func (r *GormVerificationRecordRepository) List(ctx context.Context, options VerificationRecordListOptions) ([]model.VerificationRecord, error) {
+ query := r.db.WithContext(ctx).
+ Model(&model.VerificationRecord{}).
+ Preload("Task").
+ Preload("BackupRecord").
+ Order("started_at desc")
+ if options.TaskID != nil {
+ query = query.Where("task_id = ?", *options.TaskID)
+ }
+ if options.BackupRecordID != nil {
+ query = query.Where("backup_record_id = ?", *options.BackupRecordID)
+ }
+ if options.Status != "" {
+ query = query.Where("status = ?", options.Status)
+ }
+ if options.DateFrom != nil {
+ query = query.Where("started_at >= ?", options.DateFrom.UTC())
+ }
+ if options.DateTo != nil {
+ query = query.Where("started_at <= ?", options.DateTo.UTC())
+ }
+ if options.Limit > 0 {
+ query = query.Limit(options.Limit)
+ }
+ if options.Offset > 0 {
+ query = query.Offset(options.Offset)
+ }
+ var items []model.VerificationRecord
+ if err := query.Find(&items).Error; err != nil {
+ return nil, err
+ }
+ return items, nil
+}
+
+func (r *GormVerificationRecordRepository) FindLatestByTask(ctx context.Context, taskID uint) (*model.VerificationRecord, error) {
+ var item model.VerificationRecord
+ if err := r.db.WithContext(ctx).
+ Where("task_id = ?", taskID).
+ Order("started_at desc").
+ First(&item).Error; err != nil {
+ if errors.Is(err, gorm.ErrRecordNotFound) {
+ return nil, nil
+ }
+ return nil, err
+ }
+ return &item, nil
+}
+
+func (r *GormVerificationRecordRepository) Count(ctx context.Context) (int64, error) {
+ var count int64
+ if err := r.db.WithContext(ctx).Model(&model.VerificationRecord{}).Count(&count).Error; err != nil {
+ return 0, err
+ }
+ return count, nil
+}
diff --git a/server/internal/scheduler/service.go b/server/internal/scheduler/service.go
index 65ff9e9..13d67dc 100644
--- a/server/internal/scheduler/service.go
+++ b/server/internal/scheduler/service.go
@@ -6,6 +6,7 @@ import (
"sync"
"time"
+ "backupx/server/internal/backup"
"backupx/server/internal/model"
"backupx/server/internal/repository"
servicepkg "backupx/server/internal/service"
@@ -17,28 +18,59 @@ type TaskRunner interface {
RunTaskByID(context.Context, uint) (*servicepkg.BackupRecordDetail, error)
}
+// VerifyRunner 供调度器触发验证演练。
+// 使用最新成功备份作为源;taskID 对应的任务须配置 VerifyEnabled=true。
+type VerifyRunner interface {
+ StartByTask(ctx context.Context, taskID uint, mode, triggeredBy string) (*servicepkg.VerificationRecordDetail, error)
+}
+
// AuditRecorder 记录审计日志(可选依赖)
type AuditRecorder interface {
Record(servicepkg.AuditEntry)
}
type Service struct {
- mu sync.Mutex
- cron *cron.Cron
- tasks repository.BackupTaskRepository
- runner TaskRunner
- logger *zap.Logger
- audit AuditRecorder
- entries map[uint]cron.EntryID
+ mu sync.Mutex
+ cron *cron.Cron
+ tasks repository.BackupTaskRepository
+ nodes repository.NodeRepository
+ runner TaskRunner
+ verifyRunner VerifyRunner
+ logger *zap.Logger
+ audit AuditRecorder
+ entries map[uint]cron.EntryID // 备份 cron 条目
+ verifyEntries map[uint]cron.EntryID // 验证 cron 条目
}
func NewService(tasks repository.BackupTaskRepository, runner TaskRunner, logger *zap.Logger) *Service {
parser := cron.NewParser(cron.SecondOptional | cron.Minute | cron.Hour | cron.Dom | cron.Month | cron.Dow | cron.Descriptor)
- return &Service{cron: cron.New(cron.WithParser(parser), cron.WithLocation(time.UTC)), tasks: tasks, runner: runner, logger: logger, entries: make(map[uint]cron.EntryID)}
+ return &Service{
+ cron: cron.New(cron.WithParser(parser), cron.WithLocation(time.UTC)),
+ tasks: tasks,
+ runner: runner,
+ logger: logger,
+ entries: make(map[uint]cron.EntryID),
+ verifyEntries: make(map[uint]cron.EntryID),
+ }
+}
+
+// SetVerifyRunner 注入验证调度器。可选注入:未注入时不处理验证 cron。
+func (s *Service) SetVerifyRunner(runner VerifyRunner) {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ s.verifyRunner = runner
}
func (s *Service) SetAuditRecorder(audit AuditRecorder) { s.audit = audit }
+// SetNodeRepository 注入节点仓储用于调度前的健康检查。
+// 可选注入:未注入时调度器无条件触发任务(单节点场景)。
+func (s *Service) SetNodeRepository(nodes repository.NodeRepository) {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ s.nodes = nodes
+}
+
func (s *Service) Start(ctx context.Context) error {
if err := s.Reload(ctx); err != nil {
return err
@@ -62,25 +94,43 @@ func (s *Service) Reload(ctx context.Context) error {
if err != nil {
return err
}
+ // 验证调度单独扫描(启用验证的任务可能未启用备份 cron,反之亦然)
+ verifyItems, err := s.tasks.ListVerifySchedulable(ctx)
+ if err != nil {
+ return err
+ }
s.mu.Lock()
defer s.mu.Unlock()
for taskID, entryID := range s.entries {
s.cron.Remove(entryID)
delete(s.entries, taskID)
}
+ for taskID, entryID := range s.verifyEntries {
+ s.cron.Remove(entryID)
+ delete(s.verifyEntries, taskID)
+ }
for _, item := range items {
item := item
if err := s.syncTaskLocked(&item); err != nil {
return err
}
}
+ for _, item := range verifyItems {
+ item := item
+ if err := s.syncVerifyTaskLocked(&item); err != nil {
+ return err
+ }
+ }
return nil
}
func (s *Service) SyncTask(_ context.Context, task *model.BackupTask) error {
s.mu.Lock()
defer s.mu.Unlock()
- return s.syncTaskLocked(task)
+ if err := s.syncTaskLocked(task); err != nil {
+ return err
+ }
+ return s.syncVerifyTaskLocked(task)
}
func (s *Service) RemoveTask(_ context.Context, taskID uint) error {
@@ -90,6 +140,10 @@ func (s *Service) RemoveTask(_ context.Context, taskID uint) error {
s.cron.Remove(entryID)
delete(s.entries, taskID)
}
+ if entryID, ok := s.verifyEntries[taskID]; ok {
+ s.cron.Remove(entryID)
+ delete(s.verifyEntries, taskID)
+ }
return nil
}
@@ -106,13 +160,56 @@ func (s *Service) syncTaskLocked(task *model.BackupTask) error {
}
taskID := task.ID
taskName := task.Name
- entryID, err := s.cron.AddFunc(task.CronExpr, func() {
+ taskNodeID := task.NodeID
+ cronExpr := task.CronExpr
+ maintenanceWindows := task.MaintenanceWindows
+ entryID, err := s.cron.AddFunc(cronExpr, func() {
+ // 集群感知:若任务绑定了离线的远程节点,跳过本轮触发避免堆积 failed 记录
+ if taskNodeID > 0 && s.nodes != nil {
+ node, err := s.nodes.FindByID(context.Background(), taskNodeID)
+ if err == nil && node != nil && !node.IsLocal && node.Status != model.NodeStatusOnline {
+ if s.logger != nil {
+ s.logger.Warn("skip scheduled run: target node offline",
+ zap.Uint("task_id", taskID), zap.String("task_name", taskName),
+ zap.Uint("node_id", taskNodeID), zap.String("node_name", node.Name))
+ }
+ if s.audit != nil {
+ s.audit.Record(servicepkg.AuditEntry{
+ Username: "system", Category: "backup_task", Action: "scheduled_skip",
+ TargetType: "backup_task", TargetID: fmt.Sprintf("%d", taskID),
+ TargetName: taskName,
+ Detail: fmt.Sprintf("跳过调度触发:节点 %s 离线 (task: %s, cron: %s)", node.Name, taskName, cronExpr),
+ })
+ }
+ return
+ }
+ }
+ // 维护窗口校验:非窗口时间跳过。Windows 为空则不限制。
+ if maintenanceWindows != "" {
+ windows := backup.ParseMaintenanceWindows(maintenanceWindows)
+ if len(windows) > 0 && !backup.IsWithinWindow(time.Now(), windows) {
+ if s.logger != nil {
+ s.logger.Info("skip scheduled run: outside maintenance window",
+ zap.Uint("task_id", taskID), zap.String("task_name", taskName),
+ zap.String("windows", maintenanceWindows))
+ }
+ if s.audit != nil {
+ s.audit.Record(servicepkg.AuditEntry{
+ Username: "system", Category: "backup_task", Action: "scheduled_skip",
+ TargetType: "backup_task", TargetID: fmt.Sprintf("%d", taskID),
+ TargetName: taskName,
+ Detail: fmt.Sprintf("跳过调度触发:非维护窗口 (task: %s, windows: %s)", taskName, maintenanceWindows),
+ })
+ }
+ return
+ }
+ }
// 自动调度任务记录审计日志
if s.audit != nil {
s.audit.Record(servicepkg.AuditEntry{
Username: "system", Category: "backup_task", Action: "scheduled_run",
TargetType: "backup_task", TargetID: fmt.Sprintf("%d", taskID),
- TargetName: taskName, Detail: fmt.Sprintf("定时调度触发备份任务: %s (cron: %s)", taskName, task.CronExpr),
+ TargetName: taskName, Detail: fmt.Sprintf("定时调度触发备份任务: %s (cron: %s)", taskName, cronExpr),
})
}
if _, runErr := s.runner.RunTaskByID(context.Background(), taskID); runErr != nil && s.logger != nil {
@@ -125,3 +222,43 @@ func (s *Service) syncTaskLocked(task *model.BackupTask) error {
s.entries[task.ID] = entryID
return nil
}
+
+// syncVerifyTaskLocked 同步任务的验证演练 cron 条目。
+// 调度时间到 → 拉取最新成功备份 → 触发 Verify 快速校验。
+// 若未注入 verifyRunner,直接返回(单节点+无验证场景)。
+func (s *Service) syncVerifyTaskLocked(task *model.BackupTask) error {
+ if task == nil {
+ return fmt.Errorf("task is required")
+ }
+ if entryID, ok := s.verifyEntries[task.ID]; ok {
+ s.cron.Remove(entryID)
+ delete(s.verifyEntries, task.ID)
+ }
+ if s.verifyRunner == nil {
+ return nil
+ }
+ if !task.Enabled || !task.VerifyEnabled || task.VerifyCronExpr == "" {
+ return nil
+ }
+ taskID := task.ID
+ taskName := task.Name
+ mode := task.VerifyMode
+ verifyCron := task.VerifyCronExpr
+ entryID, err := s.cron.AddFunc(verifyCron, func() {
+ if s.audit != nil {
+ s.audit.Record(servicepkg.AuditEntry{
+ Username: "system", Category: "backup_verify", Action: "scheduled_run",
+ TargetType: "backup_task", TargetID: fmt.Sprintf("%d", taskID),
+ TargetName: taskName, Detail: fmt.Sprintf("定时验证演练: %s (cron: %s, mode: %s)", taskName, verifyCron, mode),
+ })
+ }
+ if _, runErr := s.verifyRunner.StartByTask(context.Background(), taskID, mode, "system"); runErr != nil && s.logger != nil {
+ s.logger.Warn("scheduled verify run failed", zap.Uint("task_id", taskID), zap.Error(runErr))
+ }
+ })
+ if err != nil {
+ return err
+ }
+ s.verifyEntries[task.ID] = entryID
+ return nil
+}
diff --git a/server/internal/scheduler/service_test.go b/server/internal/scheduler/service_test.go
index 66b1b4e..36251a9 100644
--- a/server/internal/scheduler/service_test.go
+++ b/server/internal/scheduler/service_test.go
@@ -26,6 +26,12 @@ func (r *fakeTaskRepository) FindByName(context.Context, string) (*model.BackupT
func (r *fakeTaskRepository) ListSchedulable(context.Context) ([]model.BackupTask, error) {
return r.items, nil
}
+func (r *fakeTaskRepository) ListVerifySchedulable(context.Context) ([]model.BackupTask, error) {
+ return nil, nil
+}
+func (r *fakeTaskRepository) DistinctTags(context.Context) ([]string, error) {
+ return nil, nil
+}
func (r *fakeTaskRepository) Count(context.Context) (int64, error) { return 0, nil }
func (r *fakeTaskRepository) CountEnabled(context.Context) (int64, error) { return 0, nil }
func (r *fakeTaskRepository) CountByStorageTargetID(context.Context, uint) (int64, error) {
diff --git a/server/internal/service/agent_service.go b/server/internal/service/agent_service.go
index 4b5510f..a751391 100644
--- a/server/internal/service/agent_service.go
+++ b/server/internal/service/agent_service.go
@@ -22,6 +22,7 @@ type AgentService struct {
recordRepo repository.BackupRecordRepository
storageRepo repository.StorageTargetRepository
cmdRepo repository.AgentCommandRepository
+ restoreRepo repository.RestoreRecordRepository
cipher *codec.ConfigCipher
}
@@ -43,6 +44,12 @@ func NewAgentService(
}
}
+// SetRestoreRepository 注入恢复记录仓储,用于命令超时时联动 restore_record 状态。
+// 可选注入:未注入时恢复命令超时仅标记命令 timeout,记录需另行查验。
+func (s *AgentService) SetRestoreRepository(repo repository.RestoreRecordRepository) {
+ s.restoreRepo = repo
+}
+
// AuthenticatedNode 通过 token 解析并返回节点。失败返回 401。
func (s *AgentService) AuthenticatedNode(ctx context.Context, token string) (*model.Node, error) {
if strings.TrimSpace(token) == "" {
@@ -325,6 +332,8 @@ func (s *AgentService) WaitForCommandResult(ctx context.Context, cmdID uint, tim
}
// StartCommandTimeoutMonitor 启动后台定时任务,把超时命令标记为 timeout。
+// 对于 run_task / restore_record 命令,同时把关联的 BackupRecord / RestoreRecord
+// 标记为 failed,避免 Agent 离线/崩溃时记录永远卡在 running。
func (s *AgentService) StartCommandTimeoutMonitor(ctx context.Context, interval time.Duration, timeout time.Duration) {
if interval <= 0 {
interval = 30 * time.Second
@@ -341,12 +350,76 @@ func (s *AgentService) StartCommandTimeoutMonitor(ctx context.Context, interval
return
case <-ticker.C:
threshold := time.Now().UTC().Add(-timeout)
- _, _ = s.cmdRepo.MarkStaleTimeout(ctx, threshold)
+ s.processStaleCommands(ctx, threshold)
}
}
}()
}
+// processStaleCommands 扫描已超时的 dispatched 命令并联动关联记录。
+// 流程:先取超时候选 → 对每条联动 backup/restore 记录 → 把命令置为 timeout。
+// 单条失败不影响后续处理。
+func (s *AgentService) processStaleCommands(ctx context.Context, threshold time.Time) {
+ commands, err := s.cmdRepo.ListStaleDispatched(ctx, threshold)
+ if err != nil || len(commands) == 0 {
+ return
+ }
+ for i := range commands {
+ cmd := commands[i]
+ s.failLinkedRecord(ctx, &cmd)
+ now := time.Now().UTC()
+ cmd.Status = model.AgentCommandStatusTimeout
+ cmd.ErrorMessage = "agent did not report result before timeout"
+ cmd.CompletedAt = &now
+ _ = s.cmdRepo.Update(ctx, &cmd)
+ }
+}
+
+// failLinkedRecord 根据命令类型把关联记录标记为 failed。
+// 只对仍然处于 running 状态的记录生效,避免覆盖已完成的结果。
+func (s *AgentService) failLinkedRecord(ctx context.Context, cmd *model.AgentCommand) {
+ const failureMessage = "Agent 未在超时前回传状态(节点可能已离线或崩溃)"
+ switch cmd.Type {
+ case model.AgentCommandTypeRunTask:
+ var payload struct {
+ RecordID uint `json:"recordId"`
+ }
+ if err := json.Unmarshal([]byte(cmd.Payload), &payload); err != nil || payload.RecordID == 0 {
+ return
+ }
+ record, err := s.recordRepo.FindByID(ctx, payload.RecordID)
+ if err != nil || record == nil || record.Status != model.BackupRecordStatusRunning {
+ return
+ }
+ completedAt := time.Now().UTC()
+ record.Status = model.BackupRecordStatusFailed
+ record.ErrorMessage = failureMessage
+ record.CompletedAt = &completedAt
+ record.DurationSeconds = int(completedAt.Sub(record.StartedAt).Seconds())
+ _ = s.recordRepo.Update(ctx, record)
+ case model.AgentCommandTypeRestoreRecord:
+ if s.restoreRepo == nil {
+ return
+ }
+ var payload struct {
+ RestoreRecordID uint `json:"restoreRecordId"`
+ }
+ if err := json.Unmarshal([]byte(cmd.Payload), &payload); err != nil || payload.RestoreRecordID == 0 {
+ return
+ }
+ restore, err := s.restoreRepo.FindByID(ctx, payload.RestoreRecordID)
+ if err != nil || restore == nil || restore.Status != model.RestoreRecordStatusRunning {
+ return
+ }
+ completedAt := time.Now().UTC()
+ restore.Status = model.RestoreRecordStatusFailed
+ restore.ErrorMessage = failureMessage
+ restore.CompletedAt = &completedAt
+ restore.DurationSeconds = int(completedAt.Sub(restore.StartedAt).Seconds())
+ _ = s.restoreRepo.Update(ctx, restore)
+ }
+}
+
// AgentSelfStatus 是 /api/v1/agent/self 端点返回给 Agent 的轻量状态摘要。
type AgentSelfStatus struct {
ID uint `json:"id"`
diff --git a/server/internal/service/api_key_service.go b/server/internal/service/api_key_service.go
new file mode 100644
index 0000000..8bd8faa
--- /dev/null
+++ b/server/internal/service/api_key_service.go
@@ -0,0 +1,205 @@
+package service
+
+import (
+ "context"
+ "crypto/hmac"
+ "crypto/rand"
+ "crypto/sha256"
+ "encoding/hex"
+ "fmt"
+ "strings"
+ "time"
+
+ "backupx/server/internal/apperror"
+ "backupx/server/internal/model"
+ "backupx/server/internal/repository"
+)
+
+// ApiKeyPrefix 所有 API Key 的明文前缀,用于中间件快速识别。
+const ApiKeyPrefix = "bax_"
+
+// ApiKeyService 管理 API Key 生命周期。
+// 创建时生成 32 字节随机密钥 → 明文一次性返回 → 仅存储 SHA-256 哈希。
+// 验证时计算输入的 SHA-256 查表,避免时序攻击和泄漏。
+type ApiKeyService struct {
+ repo repository.ApiKeyRepository
+}
+
+func NewApiKeyService(repo repository.ApiKeyRepository) *ApiKeyService {
+ return &ApiKeyService{repo: repo}
+}
+
+// ApiKeyCreateInput 创建 API Key 的输入参数。
+type ApiKeyCreateInput struct {
+ Name string `json:"name" binding:"required,min=1,max=128"`
+ Role string `json:"role" binding:"required,oneof=admin operator viewer"`
+ TTLHours int `json:"ttlHours"` // 0 表示永不过期
+}
+
+// ApiKeyCreateResult 创建后返回给调用者一次。
+// PlainKey 只此一次,前端需要告知用户立即保存。
+type ApiKeyCreateResult struct {
+ ApiKey ApiKeySummary `json:"apiKey"`
+ PlainKey string `json:"plainKey"`
+}
+
+// ApiKeySummary 列表项(无明文)。
+type ApiKeySummary struct {
+ ID uint `json:"id"`
+ Name string `json:"name"`
+ Role string `json:"role"`
+ Prefix string `json:"prefix"`
+ CreatedBy string `json:"createdBy"`
+ LastUsedAt *time.Time `json:"lastUsedAt,omitempty"`
+ ExpiresAt *time.Time `json:"expiresAt,omitempty"`
+ Disabled bool `json:"disabled"`
+ CreatedAt time.Time `json:"createdAt"`
+}
+
+func (s *ApiKeyService) Create(ctx context.Context, createdBy string, input ApiKeyCreateInput) (*ApiKeyCreateResult, error) {
+ name := strings.TrimSpace(input.Name)
+ if name == "" {
+ return nil, apperror.BadRequest("API_KEY_INVALID", "名称不能为空", nil)
+ }
+ if !model.IsValidRole(input.Role) {
+ return nil, apperror.BadRequest("API_KEY_INVALID", "非法的角色", nil)
+ }
+ rawToken, err := generateApiKeyPlain()
+ if err != nil {
+ return nil, apperror.Internal("API_KEY_GEN_FAILED", "无法生成 API Key", err)
+ }
+ hash := hashApiKey(rawToken)
+ // Prefix 取前 12 字符供 UI 区分,不泄漏足够信息
+ prefix := rawToken
+ if len(prefix) > 12 {
+ prefix = prefix[:12]
+ }
+ key := &model.ApiKey{
+ Name: name,
+ Role: input.Role,
+ KeyHash: hash,
+ Prefix: prefix,
+ CreatedBy: strings.TrimSpace(createdBy),
+ }
+ if input.TTLHours > 0 {
+ expires := time.Now().UTC().Add(time.Duration(input.TTLHours) * time.Hour)
+ key.ExpiresAt = &expires
+ }
+ if err := s.repo.Create(ctx, key); err != nil {
+ return nil, apperror.Internal("API_KEY_CREATE_FAILED", "无法创建 API Key", err)
+ }
+ return &ApiKeyCreateResult{ApiKey: toApiKeySummary(key), PlainKey: rawToken}, nil
+}
+
+func (s *ApiKeyService) List(ctx context.Context) ([]ApiKeySummary, error) {
+ items, err := s.repo.List(ctx)
+ if err != nil {
+ return nil, apperror.Internal("API_KEY_LIST_FAILED", "无法获取 API Key 列表", err)
+ }
+ result := make([]ApiKeySummary, 0, len(items))
+ for i := range items {
+ result = append(result, toApiKeySummary(&items[i]))
+ }
+ return result, nil
+}
+
+// Revoke 撤销指定 API Key(物理删除,保持 db 紧凑)。
+func (s *ApiKeyService) Revoke(ctx context.Context, id uint) error {
+ key, err := s.repo.FindByID(ctx, id)
+ if err != nil {
+ return apperror.Internal("API_KEY_GET_FAILED", "无法获取 API Key", err)
+ }
+ if key == nil {
+ return apperror.New(404, "API_KEY_NOT_FOUND", "API Key 不存在", nil)
+ }
+ if err := s.repo.Delete(ctx, id); err != nil {
+ return apperror.Internal("API_KEY_DELETE_FAILED", "无法删除 API Key", err)
+ }
+ return nil
+}
+
+// ToggleDisabled 启用/停用 API Key(保留记录便于审计)。
+func (s *ApiKeyService) ToggleDisabled(ctx context.Context, id uint, disabled bool) error {
+ key, err := s.repo.FindByID(ctx, id)
+ if err != nil {
+ return apperror.Internal("API_KEY_GET_FAILED", "无法获取 API Key", err)
+ }
+ if key == nil {
+ return apperror.New(404, "API_KEY_NOT_FOUND", "API Key 不存在", nil)
+ }
+ key.Disabled = disabled
+ return s.repo.Update(ctx, key)
+}
+
+// Authenticate 实现 http.ApiKeyAuthenticator 接口。
+// 返回 (subject, role, error)。subject 形如 "api_key::",供审计记录。
+func (s *ApiKeyService) Authenticate(ctx context.Context, rawKey string) (string, string, error) {
+ rawKey = strings.TrimSpace(rawKey)
+ if !strings.HasPrefix(rawKey, ApiKeyPrefix) {
+ return "", "", apperror.Unauthorized("AUTH_INVALID_TOKEN", "无效的 API Key 格式", nil)
+ }
+ hash := hashApiKey(rawKey)
+ key, err := s.repo.FindByHash(ctx, hash)
+ if err != nil {
+ return "", "", apperror.Internal("API_KEY_LOOKUP_FAILED", "无法验证 API Key", err)
+ }
+ if key == nil {
+ return "", "", apperror.Unauthorized("AUTH_INVALID_TOKEN", "API Key 无效", nil)
+ }
+ if key.Disabled {
+ return "", "", apperror.Unauthorized("AUTH_KEY_DISABLED", "API Key 已被停用", nil)
+ }
+ if key.ExpiresAt != nil && time.Now().UTC().After(*key.ExpiresAt) {
+ return "", "", apperror.Unauthorized("AUTH_KEY_EXPIRED", "API Key 已过期", nil)
+ }
+ // 更新 last_used_at,失败忽略
+ _ = s.repo.MarkUsed(ctx, key.ID, time.Now().UTC())
+ subject := fmt.Sprintf("api_key:%d:%s", key.ID, key.Name)
+ return subject, key.Role, nil
+}
+
+func toApiKeySummary(key *model.ApiKey) ApiKeySummary {
+ return ApiKeySummary{
+ ID: key.ID,
+ Name: key.Name,
+ Role: key.Role,
+ Prefix: key.Prefix,
+ CreatedBy: key.CreatedBy,
+ LastUsedAt: key.LastUsedAt,
+ ExpiresAt: key.ExpiresAt,
+ Disabled: key.Disabled,
+ CreatedAt: key.CreatedAt,
+ }
+}
+
+// generateApiKeyPlain 生成 bax_<32hex> 格式的密钥。
+func generateApiKeyPlain() (string, error) {
+ buf := make([]byte, 24)
+ if _, err := rand.Read(buf); err != nil {
+ return "", err
+ }
+ return ApiKeyPrefix + hex.EncodeToString(buf), nil
+}
+
+// apiKeyHashPepper 用于 HMAC-SHA256 的应用级 pepper(固定常量)。
+//
+// 为什么安全:
+// - API Key 明文是 192 位随机值(24 字节),pepper 提供额外 256 位应用级 entropy
+// - 数据库泄漏场景下,攻击者即便拿到 key_hash 也无法离线反推(需同时泄漏二进制)
+// - HMAC-SHA256 是 RFC 2104 标准构造,广泛用于 API token 签名验证
+//
+// 为什么不使用 bcrypt/argon2:
+// - API Key 不是用户密码,而是系统生成的高熵 token(2^192 暴力枚举不可能)
+// - 慢哈希会让每次 API 调用引入 100ms+ 延迟,严重影响 Dashboard 实时 SSE / CI 脚本
+// - 业界方案(GitHub PAT、Stripe Key)也使用快速哈希 + 高熵原值
+//
+// 部署建议:若需要跨实例共享 key 数据库,通过环境变量覆盖 pepper(未来可扩展)。
+var apiKeyHashPepper = []byte("backupx-api-key-hmac-pepper-v1")
+
+// hashApiKey 对 API Key token 做 HMAC-SHA256,作为数据库存储指纹。
+// 绝不用于用户密码(用户密码走 bcrypt 在 security/password.go)。
+func hashApiKey(rawToken string) string {
+ mac := hmac.New(sha256.New, apiKeyHashPepper)
+ mac.Write([]byte(rawToken))
+ return hex.EncodeToString(mac.Sum(nil))
+}
diff --git a/server/internal/service/api_key_service_test.go b/server/internal/service/api_key_service_test.go
new file mode 100644
index 0000000..c22dd89
--- /dev/null
+++ b/server/internal/service/api_key_service_test.go
@@ -0,0 +1,113 @@
+package service
+
+import (
+ "context"
+ "path/filepath"
+ "strings"
+ "testing"
+ "time"
+
+ "backupx/server/internal/config"
+ "backupx/server/internal/database"
+ "backupx/server/internal/logger"
+ "backupx/server/internal/model"
+ "backupx/server/internal/repository"
+)
+
+func newApiKeyTestService(t *testing.T) *ApiKeyService {
+ t.Helper()
+ log, err := logger.New(config.LogConfig{Level: "error"})
+ if err != nil {
+ t.Fatalf("logger.New: %v", err)
+ }
+ db, err := database.Open(config.DatabaseConfig{Path: filepath.Join(t.TempDir(), "backupx.db")}, log)
+ if err != nil {
+ t.Fatalf("database.Open: %v", err)
+ }
+ return NewApiKeyService(repository.NewApiKeyRepository(db))
+}
+
+func TestApiKeyService_CreateAndAuthenticate(t *testing.T) {
+ svc := newApiKeyTestService(t)
+ ctx := context.Background()
+
+ result, err := svc.Create(ctx, "tester", ApiKeyCreateInput{
+ Name: "ci",
+ Role: model.UserRoleOperator,
+ TTLHours: 0,
+ })
+ if err != nil {
+ t.Fatalf("Create: %v", err)
+ }
+ if !strings.HasPrefix(result.PlainKey, ApiKeyPrefix) {
+ t.Fatalf("expected plain key with prefix %s, got %s", ApiKeyPrefix, result.PlainKey)
+ }
+ if result.ApiKey.Role != model.UserRoleOperator {
+ t.Fatalf("role not preserved")
+ }
+
+ subject, role, err := svc.Authenticate(ctx, result.PlainKey)
+ if err != nil {
+ t.Fatalf("Authenticate: %v", err)
+ }
+ if role != model.UserRoleOperator {
+ t.Fatalf("expected operator role, got %s", role)
+ }
+ if !strings.HasPrefix(subject, "api_key:") {
+ t.Fatalf("expected subject to start with api_key:, got %s", subject)
+ }
+}
+
+func TestApiKeyService_AuthenticateRejectsInvalid(t *testing.T) {
+ svc := newApiKeyTestService(t)
+ ctx := context.Background()
+
+ // 格式错误(无 bax_ 前缀)
+ if _, _, err := svc.Authenticate(ctx, "invalid-without-prefix"); err == nil {
+ t.Fatalf("expected error for missing prefix")
+ }
+ // 格式正确但不存在
+ if _, _, err := svc.Authenticate(ctx, "bax_"+strings.Repeat("0", 48)); err == nil {
+ t.Fatalf("expected error for unknown key")
+ }
+}
+
+func TestApiKeyService_AuthenticateRejectsExpired(t *testing.T) {
+ svc := newApiKeyTestService(t)
+ ctx := context.Background()
+
+ result, err := svc.Create(ctx, "tester", ApiKeyCreateInput{
+ Name: "ci-expired",
+ Role: model.UserRoleViewer,
+ TTLHours: 1,
+ })
+ if err != nil {
+ t.Fatalf("Create: %v", err)
+ }
+ // 手动把 expiresAt 设到过去
+ key, _ := svc.repo.FindByID(ctx, result.ApiKey.ID)
+ past := time.Now().UTC().Add(-time.Hour)
+ key.ExpiresAt = &past
+ if err := svc.repo.Update(ctx, key); err != nil {
+ t.Fatalf("Update: %v", err)
+ }
+ if _, _, err := svc.Authenticate(ctx, result.PlainKey); err == nil {
+ t.Fatalf("expected error for expired key")
+ }
+}
+
+func TestApiKeyService_AuthenticateRejectsDisabled(t *testing.T) {
+ svc := newApiKeyTestService(t)
+ ctx := context.Background()
+
+ result, err := svc.Create(ctx, "tester", ApiKeyCreateInput{Name: "disabled", Role: "admin"})
+ if err != nil {
+ t.Fatalf("Create: %v", err)
+ }
+ if err := svc.ToggleDisabled(ctx, result.ApiKey.ID, true); err != nil {
+ t.Fatalf("ToggleDisabled: %v", err)
+ }
+ if _, _, err := svc.Authenticate(ctx, result.PlainKey); err == nil {
+ t.Fatalf("expected error for disabled key")
+ }
+}
diff --git a/server/internal/service/audit_service.go b/server/internal/service/audit_service.go
index 472295f..94f115a 100644
--- a/server/internal/service/audit_service.go
+++ b/server/internal/service/audit_service.go
@@ -66,3 +66,21 @@ func (s *AuditService) List(ctx context.Context, category string, limit, offset
}
return result, nil
}
+
+// ListAdvanced 多字段筛选分页查询(合规审计常用)。
+func (s *AuditService) ListAdvanced(ctx context.Context, opts repository.AuditLogListOptions) (*repository.AuditLogListResult, error) {
+ result, err := s.repo.List(ctx, opts)
+ if err != nil {
+ return nil, apperror.Internal("AUDIT_LOG_LIST_FAILED", fmt.Sprintf("无法获取审计日志: %v", err), err)
+ }
+ return result, nil
+}
+
+// ExportAll 返回指定筛选条件下的全部审计日志(最多 10000 条),用于 CSV 导出。
+func (s *AuditService) ExportAll(ctx context.Context, opts repository.AuditLogListOptions) ([]model.AuditLog, error) {
+ items, err := s.repo.ListAll(ctx, opts)
+ if err != nil {
+ return nil, apperror.Internal("AUDIT_LOG_EXPORT_FAILED", fmt.Sprintf("无法导出审计日志: %v", err), err)
+ }
+ return items, nil
+}
diff --git a/server/internal/service/auth_service.go b/server/internal/service/auth_service.go
index 9813529..fd2633a 100644
--- a/server/internal/service/auth_service.go
+++ b/server/internal/service/auth_service.go
@@ -136,6 +136,16 @@ func (s *AuthService) Login(ctx context.Context, input LoginInput, clientKey str
}
return nil, apperror.Unauthorized("AUTH_INVALID_CREDENTIALS", "用户名或密码错误", nil)
}
+ if user.Disabled {
+ if s.auditService != nil {
+ s.auditService.Record(AuditEntry{
+ UserID: user.ID, Username: user.Username,
+ Category: "auth", Action: "login_rejected",
+ Detail: "账号已被停用", ClientIP: clientKey,
+ })
+ }
+ return nil, apperror.Unauthorized("AUTH_USER_DISABLED", "账号已被管理员停用", nil)
+ }
if err := security.ComparePassword(user.PasswordHash, input.Password); err != nil {
if s.auditService != nil {
s.auditService.Record(AuditEntry{
diff --git a/server/internal/service/auth_service_test.go b/server/internal/service/auth_service_test.go
index 03b5c6e..940072b 100644
--- a/server/internal/service/auth_service_test.go
+++ b/server/internal/service/auth_service_test.go
@@ -51,6 +51,34 @@ func (r *fakeUserRepository) Update(_ context.Context, user *model.User) error {
return nil
}
+func (r *fakeUserRepository) CountByRole(_ context.Context, role string) (int64, error) {
+ var n int64
+ for _, u := range r.users {
+ if u.Role == role && !u.Disabled {
+ n++
+ }
+ }
+ return n, nil
+}
+
+func (r *fakeUserRepository) List(_ context.Context) ([]model.User, error) {
+ result := make([]model.User, 0, len(r.users))
+ for _, u := range r.users {
+ result = append(result, *u)
+ }
+ return result, nil
+}
+
+func (r *fakeUserRepository) Delete(_ context.Context, id uint) error {
+ for i, u := range r.users {
+ if u.ID == id {
+ r.users = append(r.users[:i], r.users[i+1:]...)
+ return nil
+ }
+ }
+ return nil
+}
+
type fakeSystemConfigRepository struct{}
func (r *fakeSystemConfigRepository) GetByKey(context.Context, string) (*model.SystemConfig, error) {
diff --git a/server/internal/service/backup_execution_service.go b/server/internal/service/backup_execution_service.go
index b1e5925..f0d7f04 100644
--- a/server/internal/service/backup_execution_service.go
+++ b/server/internal/service/backup_execution_service.go
@@ -81,14 +81,40 @@ type BackupExecutionService struct {
logHub *backup.LogHub
retention *backupretention.Service
cipher *codec.ConfigCipher
- notifier BackupResultNotifier
- agentDispatcher AgentDispatcher
+ notifier BackupResultNotifier
+ agentDispatcher AgentDispatcher
+ replicationHook ReplicationTrigger
+ dependentsResolver DependentsResolver
async func(func())
now func() time.Time
tempDir string
semaphore chan struct{}
- retries int // rclone 底层重试次数
- bandwidthLimit string // rclone 带宽限制
+ // nodeSemaphores 节点级并发限制(按 NodeID 映射)。
+ // 没命中的 NodeID 走全局 semaphore,节点配置 MaxConcurrent>0 时按该节点独立排队。
+ nodeSemaphores sync.Map
+ retries int // rclone 底层重试次数
+ bandwidthLimit string // rclone 带宽限制
+}
+
+// ReplicationTrigger 抽象备份成功后的副本派发(实现者:ReplicationService)。
+type ReplicationTrigger interface {
+ TriggerAutoReplication(ctx context.Context, task *model.BackupTask, record *model.BackupRecord)
+}
+
+// SetReplicationTrigger 注入备份复制触发器。可选注入:未注入时不自动复制。
+func (s *BackupExecutionService) SetReplicationTrigger(trigger ReplicationTrigger) {
+ s.replicationHook = trigger
+}
+
+// DependentsResolver 根据 upstream 任务 ID 返回应触发的下游任务 ID。
+// 由 BackupTaskService 实现。抽象接口避免执行服务直接查仓储。
+type DependentsResolver interface {
+ TriggerDependents(ctx context.Context, upstreamID uint) ([]uint, error)
+}
+
+// SetDependentsResolver 注入下游依赖解析器。
+func (s *BackupExecutionService) SetDependentsResolver(r DependentsResolver) {
+ s.dependentsResolver = r
}
// AgentDispatcher 抽象把任务下发给 Agent 的能力,由 AgentService 实现。
@@ -157,7 +183,18 @@ func (s *BackupExecutionService) RunTaskByIDSync(ctx context.Context, id uint) (
}
func (s *BackupExecutionService) DownloadRecord(ctx context.Context, recordID uint) (*DownloadedArtifact, error) {
- record, provider, err := s.loadRecordProvider(ctx, recordID)
+ record, err := s.records.FindByID(ctx, recordID)
+ if err != nil {
+ return nil, apperror.Internal("BACKUP_RECORD_GET_FAILED", "无法获取备份记录详情", err)
+ }
+ if record == nil {
+ return nil, apperror.New(404, "BACKUP_RECORD_NOT_FOUND", "备份记录不存在", fmt.Errorf("backup record %d not found", recordID))
+ }
+ // 集群场景保护:local_disk 类型的存储文件只在执行节点本地可见,Master 不能跨节点访问
+ if err := s.validateClusterAccessible(ctx, record); err != nil {
+ return nil, err
+ }
+ provider, err := s.resolveProvider(ctx, record.StorageTargetID)
if err != nil {
return nil, err
}
@@ -219,11 +256,22 @@ func (s *BackupExecutionService) RestoreRecord(ctx context.Context, recordID uin
}
func (s *BackupExecutionService) DeleteRecord(ctx context.Context, recordID uint) error {
- record, provider, err := s.loadRecordProvider(ctx, recordID)
+ record, err := s.records.FindByID(ctx, recordID)
if err != nil {
+ return apperror.Internal("BACKUP_RECORD_GET_FAILED", "无法获取备份记录详情", err)
+ }
+ if record == nil {
+ return apperror.New(404, "BACKUP_RECORD_NOT_FOUND", "备份记录不存在", fmt.Errorf("backup record %d not found", recordID))
+ }
+ // 集群场景保护:跨节点 local_disk 文件 Master 无法远程删除,拒绝操作以避免存储泄漏的错觉
+ if err := s.validateClusterAccessible(ctx, record); err != nil {
return err
}
if strings.TrimSpace(record.StoragePath) != "" {
+ provider, err := s.resolveProvider(ctx, record.StorageTargetID)
+ if err != nil {
+ return err
+ }
if err := provider.Delete(ctx, record.StoragePath); err != nil {
return apperror.Internal("BACKUP_RECORD_DELETE_FAILED", "无法删除备份文件", err)
}
@@ -234,6 +282,35 @@ func (s *BackupExecutionService) DeleteRecord(ctx context.Context, recordID uint
return nil
}
+// validateClusterAccessible 在跨节点 + local_disk 场景下拒绝 Master 端直接访问。
+// 场景说明:远程 Agent 把备份写到其本机磁盘(local_disk basePath)时,Master 的
+// provider 指向的是 Master 本机的同名路径,访问会静默取错文件或 404。明确拒绝
+// 让用户知情,避免假成功。
+func (s *BackupExecutionService) validateClusterAccessible(ctx context.Context, record *model.BackupRecord) error {
+ if record == nil || record.NodeID == 0 {
+ return nil
+ }
+ // 检查是否为远程节点
+ if s.nodeRepo == nil {
+ return nil
+ }
+ node, err := s.nodeRepo.FindByID(ctx, record.NodeID)
+ if err != nil || node == nil || node.IsLocal {
+ return nil
+ }
+ // 检查存储类型是否为 local_disk(跨节点不可达)
+ target, err := s.targets.FindByID(ctx, record.StorageTargetID)
+ if err != nil || target == nil {
+ return nil
+ }
+ if strings.EqualFold(target.Type, "local_disk") {
+ return apperror.BadRequest("BACKUP_RECORD_CROSS_NODE_LOCAL_DISK",
+ fmt.Sprintf("该备份位于节点 %s 的本地磁盘(local_disk),Master 无法跨节点访问。请登录该节点或改用云存储后再操作。", node.Name),
+ nil)
+ }
+ return nil
+}
+
func (s *BackupExecutionService) startTask(ctx context.Context, id uint, async bool) (*BackupRecordDetail, error) {
task, err := s.tasks.FindByID(ctx, id)
if err != nil {
@@ -242,13 +319,22 @@ func (s *BackupExecutionService) startTask(ctx context.Context, id uint, async b
if task == nil {
return nil, apperror.New(404, "BACKUP_TASK_NOT_FOUND", "备份任务不存在", fmt.Errorf("backup task %d not found", id))
}
+ // 维护窗口校验:手动执行同样尊重窗口,避免业务高峰期误触发。
+ if strings.TrimSpace(task.MaintenanceWindows) != "" {
+ windows := backup.ParseMaintenanceWindows(task.MaintenanceWindows)
+ if len(windows) > 0 && !backup.IsWithinWindow(s.now(), windows) {
+ return nil, apperror.BadRequest("BACKUP_TASK_OUTSIDE_WINDOW",
+ fmt.Sprintf("当前时间不在任务「%s」的维护窗口内(%s),已拒绝执行。", task.Name, task.MaintenanceWindows),
+ nil)
+ }
+ }
startedAt := s.now()
// 取第一个存储目标 ID 做兼容
primaryTargetID := task.StorageTargetID
if tids := collectTargetIDs(task); len(tids) > 0 {
primaryTargetID = tids[0]
}
- record := &model.BackupRecord{TaskID: task.ID, StorageTargetID: primaryTargetID, Status: "running", StartedAt: startedAt}
+ record := &model.BackupRecord{TaskID: task.ID, StorageTargetID: primaryTargetID, NodeID: task.NodeID, Status: "running", StartedAt: startedAt}
if err := s.records.Create(ctx, record); err != nil {
return nil, apperror.Internal("BACKUP_RECORD_CREATE_FAILED", "无法创建备份记录", err)
}
@@ -259,7 +345,14 @@ func (s *BackupExecutionService) startTask(ctx context.Context, id uint, async b
}
// 多节点路由:task.NodeID 指向远程节点时,把执行任务入队给 Agent;
// NodeID=0 或本机节点时由 Master 直接执行。
- if s.isRemoteNode(ctx, task.NodeID) {
+ if remoteNode := s.resolveRemoteNode(ctx, task.NodeID); remoteNode != nil {
+ // 节点离线 → 立即把刚创建的 running 记录标记 failed,返回明确错误
+ if remoteNode.Status != model.NodeStatusOnline {
+ offlineMsg := fmt.Sprintf("节点 %s 当前离线,无法执行备份任务", remoteNode.Name)
+ _ = s.finalizeRecord(ctx, task, record.ID, startedAt, model.BackupRecordStatusFailed,
+ offlineMsg, "", "", 0, "", "")
+ return nil, apperror.BadRequest("NODE_OFFLINE", offlineMsg, nil)
+ }
if _, enqueueErr := s.agentDispatcher.EnqueueCommand(ctx, task.NodeID, model.AgentCommandTypeRunTask, map[string]any{
"taskId": task.ID,
"recordId": record.ID,
@@ -282,20 +375,84 @@ func (s *BackupExecutionService) startTask(ctx context.Context, id uint, async b
return s.getRecordDetail(ctx, record.ID)
}
+// shouldNotify 按任务的告警策略决定是否发送本次通知。
+// 成功结果:始终发送(方便用户确认备份状态)。
+// 失败结果:仅当"最近 N 条记录(含本次)均为 failed"时发送,N = AlertOnConsecutiveFails。
+// 该策略降低单次偶发失败的告警噪音,企业运维场景下更友好。
+func (s *BackupExecutionService) shouldNotify(ctx context.Context, task *model.BackupTask, status string) bool {
+ if task == nil {
+ return true
+ }
+ threshold := task.AlertOnConsecutiveFails
+ if threshold <= 1 {
+ return true
+ }
+ if status != model.BackupRecordStatusFailed {
+ return true
+ }
+ items, err := s.records.ListByTask(ctx, task.ID)
+ if err != nil || len(items) < threshold {
+ return true
+ }
+ // ListByTask 默认按 id desc 返回:取前 threshold 条
+ count := threshold
+ if len(items) < count {
+ count = len(items)
+ }
+ for i := 0; i < count; i++ {
+ if items[i].Status != model.BackupRecordStatusFailed {
+ return false
+ }
+ }
+ return true
+}
+
+// acquireNodeSemaphore 返回节点级并发通道。懒初始化:第一次为某节点排队时创建。
+// 如果节点未配置 MaxConcurrent 或 nodeRepo 未注入,返回 nil(调用方走全局 semaphore)。
+// 节点容量仅在首次创建时采用,后续变更需重启服务才生效(避免运行时 resize 通道的复杂度)。
+func (s *BackupExecutionService) acquireNodeSemaphore(ctx context.Context, nodeID uint) chan struct{} {
+ if nodeID == 0 || s.nodeRepo == nil {
+ return nil
+ }
+ if v, ok := s.nodeSemaphores.Load(nodeID); ok {
+ return v.(chan struct{})
+ }
+ node, err := s.nodeRepo.FindByID(ctx, nodeID)
+ if err != nil || node == nil || node.MaxConcurrent <= 0 {
+ return nil
+ }
+ created := make(chan struct{}, node.MaxConcurrent)
+ actual, _ := s.nodeSemaphores.LoadOrStore(nodeID, created)
+ return actual.(chan struct{})
+}
+
// isRemoteNode 判断 NodeID 是否指向一个有效的远程(非本机)节点。
// 当未注入集群依赖、nodeID 为 0、或节点为本机时,均返回 false(走本地执行)。
func (s *BackupExecutionService) isRemoteNode(ctx context.Context, nodeID uint) bool {
+ return s.resolveRemoteNode(ctx, nodeID) != nil
+}
+
+// resolveRemoteNode 返回 NodeID 对应的远程节点指针,或 nil 表示本机执行。
+// 相比 isRemoteNode,它让调用方能读取节点状态(在线/离线)做进一步判断。
+func (s *BackupExecutionService) resolveRemoteNode(ctx context.Context, nodeID uint) *model.Node {
if s.nodeRepo == nil || s.agentDispatcher == nil || nodeID == 0 {
- return false
+ return nil
}
node, err := s.nodeRepo.FindByID(ctx, nodeID)
- if err != nil || node == nil {
- return false
+ if err != nil || node == nil || node.IsLocal {
+ return nil
}
- return !node.IsLocal
+ return node
}
func (s *BackupExecutionService) executeTask(ctx context.Context, task *model.BackupTask, recordID uint, startedAt time.Time) {
+ // 节点级并发限流:当任务绑定节点且节点配置了 MaxConcurrent>0,
+ // 该节点上所有任务共享一个节点专属 semaphore,互相排队
+ nodeSem := s.acquireNodeSemaphore(ctx, task.NodeID)
+ if nodeSem != nil {
+ nodeSem <- struct{}{}
+ defer func() { <-nodeSem }()
+ }
s.semaphore <- struct{}{}
defer func() { <-s.semaphore }()
@@ -320,8 +477,12 @@ func (s *BackupExecutionService) executeTask(ctx context.Context, task *model.Ba
}
}
}
- if err := s.notifier.NotifyBackupResult(ctx, BackupExecutionNotification{Task: task, Record: &model.BackupRecord{ID: recordID, TaskID: task.ID, Status: status, FileName: fileName, FileSize: fileSize, StoragePath: storagePath, ErrorMessage: errMessage, StartedAt: startedAt}, Error: buildOptionalError(errMessage)}); err != nil {
- logger.Warnf("发送备份通知失败:%v", err)
+ if s.shouldNotify(ctx, task, status) {
+ if err := s.notifier.NotifyBackupResult(ctx, BackupExecutionNotification{Task: task, Record: &model.BackupRecord{ID: recordID, TaskID: task.ID, Status: status, FileName: fileName, FileSize: fileSize, StoragePath: storagePath, ErrorMessage: errMessage, StartedAt: startedAt}, Error: buildOptionalError(errMessage)}); err != nil {
+ logger.Warnf("发送备份通知失败:%v", err)
+ }
+ } else {
+ logger.Infof("连续失败次数未达通知阈值(%d),跳过本次告警", task.AlertOnConsecutiveFails)
}
s.logHub.Complete(recordID, status)
}
@@ -404,6 +565,24 @@ func (s *BackupExecutionService) executeTask(ctx context.Context, task *model.Ba
logger.Warnf("存储目标 %s 创建客户端失败:%v", targetName, resolveErr)
return
}
+ // 软限额校验:QuotaBytes > 0 时,已累计 + 本次 > 配额 → 拒绝上传
+ if target != nil && target.QuotaBytes > 0 {
+ currentUsed := int64(0)
+ if items, err := s.records.StorageUsage(ctx); err == nil {
+ for _, it := range items {
+ if it.StorageTargetID == targetID {
+ currentUsed = it.TotalSize
+ break
+ }
+ }
+ }
+ if currentUsed+fileSize > target.QuotaBytes {
+ quotaMsg := fmt.Sprintf("超出存储目标 %s 的配额(%d + %d > %d)", targetName, currentUsed, fileSize, target.QuotaBytes)
+ uploadResults[index] = StorageUploadResultItem{StorageTargetID: targetID, StorageTargetName: targetName, Status: "failed", Error: quotaMsg}
+ logger.Warnf("%s", quotaMsg)
+ return
+ }
+ }
logger.Infof("开始上传备份到存储目标:%s", targetName)
// 上传级重试:最多 3 次,指数退避(10s, 30s, 90s)
maxAttempts := 3
@@ -489,6 +668,47 @@ func (s *BackupExecutionService) executeTask(ctx context.Context, task *model.Ba
logger.Warnf("部分存储目标上传失败:%s", strings.Join(failedMessages, "; "))
}
logger.Infof("备份执行完成")
+ // 自动派发复制(3-2-1):任务配置 ReplicationTargetIDs 且本次有任意目标成功时生效
+ // 触发下游依赖任务(best-effort,失败仅 warn)
+ if s.dependentsResolver != nil {
+ go func(upstreamID uint, upstreamName string) {
+ dependents, err := s.dependentsResolver.TriggerDependents(context.Background(), upstreamID)
+ if err != nil {
+ return
+ }
+ for _, depID := range dependents {
+ _, runErr := s.RunTaskByID(context.Background(), depID)
+ if runErr != nil {
+ logger.Warnf("触发下游任务 #%d 失败(上游: %s): %v", depID, upstreamName, runErr)
+ } else {
+ logger.Infof("已触发下游任务 #%d(上游: %s)", depID, upstreamName)
+ }
+ }
+ }(task.ID, task.Name)
+ }
+ if s.replicationHook != nil && strings.TrimSpace(task.ReplicationTargetIDs) != "" {
+ record := &model.BackupRecord{
+ ID: recordID,
+ TaskID: task.ID,
+ StorageTargetID: task.StorageTargetID,
+ NodeID: task.NodeID,
+ Status: "success",
+ FileName: fileName,
+ FileSize: fileSize,
+ Checksum: checksum,
+ StoragePath: storagePath,
+ StartedAt: startedAt,
+ }
+ // 取第一个成功的上传作为源 target,避免从失败目标拉取
+ for _, r := range uploadResults {
+ if r.Status == "success" {
+ record.StorageTargetID = r.StorageTargetID
+ break
+ }
+ }
+ logger.Infof("触发自动复制(3-2-1 规则):%s", task.ReplicationTargetIDs)
+ s.replicationHook.TriggerAutoReplication(context.Background(), task, record)
+ }
} else {
errMessage = strings.Join(failedMessages, "; ")
logger.Errorf("所有存储目标上传均失败")
diff --git a/server/internal/service/backup_task_service.go b/server/internal/service/backup_task_service.go
index 5554b8a..4a9924e 100644
--- a/server/internal/service/backup_task_service.go
+++ b/server/internal/service/backup_task_service.go
@@ -5,10 +5,12 @@ import (
"encoding/json"
"fmt"
"net/http"
+ "strconv"
"strings"
"time"
"backupx/server/internal/apperror"
+ "backupx/server/internal/backup"
"backupx/server/internal/model"
"backupx/server/internal/repository"
"backupx/server/internal/storage"
@@ -33,12 +35,27 @@ type BackupTaskUpsertInput struct {
DBPath string `json:"dbPath" binding:"max=500"`
StorageTargetID uint `json:"storageTargetId"` // deprecated: 向后兼容
StorageTargetIDs []uint `json:"storageTargetIds"` // 新增:多存储目标
+ NodeID uint `json:"nodeId"` // 执行节点(0 = 本机 Master)
+ Tags string `json:"tags" binding:"max=500"` // 逗号分隔标签
RetentionDays int `json:"retentionDays"`
Compression string `json:"compression" binding:"omitempty,oneof=gzip none"`
Encrypt bool `json:"encrypt"`
MaxBackups int `json:"maxBackups"`
// ExtraConfig 类型特有扩展配置(如 SAP HANA 的 backupLevel/backupChannels)
ExtraConfig map[string]any `json:"extraConfig"`
+ // 验证(恢复演练)配置
+ VerifyEnabled bool `json:"verifyEnabled"`
+ VerifyCronExpr string `json:"verifyCronExpr" binding:"max=64"`
+ VerifyMode string `json:"verifyMode" binding:"omitempty,oneof=quick deep"`
+ // SLA 配置
+ SLAHoursRPO int `json:"slaHoursRpo"`
+ AlertOnConsecutiveFails int `json:"alertOnConsecutiveFails"`
+ // 备份复制目标存储 ID 列表(3-2-1 规则)
+ ReplicationTargetIDs []uint `json:"replicationTargetIds"`
+ // 维护窗口(CSV,详见 backup/window.go)
+ MaintenanceWindows string `json:"maintenanceWindows" binding:"max=500"`
+ // 依赖的上游任务 ID(上游成功后自动触发本任务)
+ DependsOnTaskIDs []uint `json:"dependsOnTaskIds"`
}
type BackupTaskToggleInput struct {
@@ -55,12 +72,25 @@ type BackupTaskSummary struct {
StorageTargetName string `json:"storageTargetName"` // deprecated: 取第一个
StorageTargetIDs []uint `json:"storageTargetIds"`
StorageTargetNames []string `json:"storageTargetNames"`
+ NodeID uint `json:"nodeId"`
+ NodeName string `json:"nodeName,omitempty"`
+ Tags string `json:"tags"`
RetentionDays int `json:"retentionDays"`
Compression string `json:"compression"`
Encrypt bool `json:"encrypt"`
MaxBackups int `json:"maxBackups"`
LastRunAt *time.Time `json:"lastRunAt,omitempty"`
LastStatus string `json:"lastStatus"`
+ // 验证与 SLA 元信息
+ VerifyEnabled bool `json:"verifyEnabled"`
+ VerifyCronExpr string `json:"verifyCronExpr"`
+ VerifyMode string `json:"verifyMode"`
+ SLAHoursRPO int `json:"slaHoursRpo"`
+ AlertOnConsecutiveFails int `json:"alertOnConsecutiveFails"`
+ // 备份复制目标(3-2-1)
+ ReplicationTargetIDs []uint `json:"replicationTargetIds"`
+ MaintenanceWindows string `json:"maintenanceWindows"`
+ DependsOnTaskIDs []uint `json:"dependsOnTaskIds"`
UpdatedAt time.Time `json:"updatedAt"`
}
@@ -88,6 +118,7 @@ type BackupTaskService struct {
tasks repository.BackupTaskRepository
targets repository.StorageTargetRepository
records repository.BackupRecordRepository
+ nodes repository.NodeRepository
storageRegistry *storage.Registry
cipher *codec.ConfigCipher
scheduler BackupTaskScheduler
@@ -107,6 +138,11 @@ func (s *BackupTaskService) SetRecordsAndStorage(records repository.BackupRecord
s.storageRegistry = registry
}
+// SetNodeRepository 注入节点仓库用于校验任务绑定的 NodeID 合法。
+func (s *BackupTaskService) SetNodeRepository(nodes repository.NodeRepository) {
+ s.nodes = nodes
+}
+
func (s *BackupTaskService) SetScheduler(scheduler BackupTaskScheduler) {
s.scheduler = scheduler
}
@@ -123,6 +159,129 @@ func (s *BackupTaskService) List(ctx context.Context) ([]BackupTaskSummary, erro
return result, nil
}
+// ListTags 返回全系统所有任务使用过的唯一标签。
+func (s *BackupTaskService) ListTags(ctx context.Context) ([]string, error) {
+ tags, err := s.tasks.DistinctTags(ctx)
+ if err != nil {
+ return nil, apperror.Internal("BACKUP_TASK_TAG_LIST_FAILED", "无法获取任务标签", err)
+ }
+ return tags, nil
+}
+
+// BatchResult 单条批量操作结果。best-effort:失败不中断其他。
+type BatchResult struct {
+ ID uint `json:"id"`
+ Name string `json:"name,omitempty"`
+ Success bool `json:"success"`
+ Error string `json:"error,omitempty"`
+}
+
+// BatchToggle 批量启停任务。
+func (s *BackupTaskService) BatchToggle(ctx context.Context, ids []uint, enabled bool) []BatchResult {
+ results := make([]BatchResult, 0, len(ids))
+ for _, id := range ids {
+ if id == 0 {
+ continue
+ }
+ summary, err := s.Toggle(ctx, id, enabled)
+ item := BatchResult{ID: id, Success: err == nil}
+ if err != nil {
+ item.Error = appErrorMessage(err)
+ } else if summary != nil {
+ item.Name = summary.Name
+ }
+ results = append(results, item)
+ }
+ return results
+}
+
+// BatchDeleteTasks 批量删除任务。
+func (s *BackupTaskService) BatchDeleteTasks(ctx context.Context, ids []uint) []BatchResult {
+ results := make([]BatchResult, 0, len(ids))
+ for _, id := range ids {
+ if id == 0 {
+ continue
+ }
+ result, err := s.Delete(ctx, id)
+ item := BatchResult{ID: id, Success: err == nil}
+ if err != nil {
+ item.Error = appErrorMessage(err)
+ } else if result != nil {
+ item.Name = result.TaskName
+ }
+ results = append(results, item)
+ }
+ return results
+}
+
+// hasCyclicDependency DFS 查找是否存在从 candidate 上游链回到 taskID 的路径。
+// 保守实现:遍历 depth 超过 32 视为潜在循环并返回 true。
+func (s *BackupTaskService) hasCyclicDependency(ctx context.Context, taskID uint, candidates []uint) bool {
+ visited := map[uint]bool{}
+ var dfs func(id uint, depth int) bool
+ dfs = func(id uint, depth int) bool {
+ if depth > 32 {
+ return true
+ }
+ if id == taskID {
+ return true
+ }
+ if visited[id] {
+ return false
+ }
+ visited[id] = true
+ upstream, err := s.tasks.FindByID(ctx, id)
+ if err != nil || upstream == nil {
+ return false
+ }
+ for _, up := range parseUintCSV(upstream.DependsOnTaskIDs) {
+ if dfs(up, depth+1) {
+ return true
+ }
+ }
+ return false
+ }
+ for _, c := range candidates {
+ if dfs(c, 0) {
+ return true
+ }
+ }
+ return false
+}
+
+// TriggerDependents 上游任务成功后找出所有 depends_on 中含有 upstreamID 的下游任务。
+// 供 BackupExecutionService 调用,避免后者直接触达 backup_task_repository。
+func (s *BackupTaskService) TriggerDependents(ctx context.Context, upstreamID uint) ([]uint, error) {
+ items, err := s.tasks.List(ctx, repository.BackupTaskListOptions{})
+ if err != nil {
+ return nil, err
+ }
+ var triggers []uint
+ for _, item := range items {
+ if !item.Enabled {
+ continue
+ }
+ for _, dep := range parseUintCSV(item.DependsOnTaskIDs) {
+ if dep == upstreamID {
+ triggers = append(triggers, item.ID)
+ break
+ }
+ }
+ }
+ return triggers, nil
+}
+
+// appErrorMessage 提取 apperror 的可读消息,回退到 error.Error()。
+func appErrorMessage(err error) string {
+ if err == nil {
+ return ""
+ }
+ if appErr, ok := err.(*apperror.AppError); ok {
+ return appErr.Message
+ }
+ return err.Error()
+}
+
func (s *BackupTaskService) Get(ctx context.Context, id uint) (*BackupTaskDetail, error) {
item, err := s.tasks.FindByID(ctx, id)
if err != nil {
@@ -326,6 +485,15 @@ func (s *BackupTaskService) validateInput(ctx context.Context, existing *model.B
return apperror.BadRequest("BACKUP_STORAGE_TARGET_INVALID", fmt.Sprintf("关联的存储目标 %d 不存在", tid), nil)
}
}
+ if input.NodeID > 0 && s.nodes != nil {
+ node, err := s.nodes.FindByID(ctx, input.NodeID)
+ if err != nil {
+ return apperror.Internal("BACKUP_TASK_NODE_LOOKUP_FAILED", "无法校验执行节点", err)
+ }
+ if node == nil {
+ return apperror.BadRequest("BACKUP_TASK_INVALID", "所选执行节点不存在", nil)
+ }
+ }
if input.RetentionDays < 0 {
return apperror.BadRequest("BACKUP_TASK_INVALID", "保留天数不能小于 0", nil)
}
@@ -338,6 +506,44 @@ func (s *BackupTaskService) validateInput(ctx context.Context, existing *model.B
if strings.TrimSpace(input.CronExpr) != "" && len(strings.Fields(strings.TrimSpace(input.CronExpr))) < 5 {
return apperror.BadRequest("BACKUP_TASK_INVALID", "Cron 表达式格式不正确", nil)
}
+ if input.VerifyEnabled {
+ if strings.TrimSpace(input.VerifyCronExpr) == "" {
+ return apperror.BadRequest("BACKUP_TASK_INVALID", "启用验证演练时必须填写验证 Cron 表达式", nil)
+ }
+ if len(strings.Fields(strings.TrimSpace(input.VerifyCronExpr))) < 5 {
+ return apperror.BadRequest("BACKUP_TASK_INVALID", "验证 Cron 表达式格式不正确", nil)
+ }
+ }
+ if strings.TrimSpace(input.MaintenanceWindows) != "" {
+ if err := backup.ValidateMaintenanceWindows(input.MaintenanceWindows); err != nil {
+ return apperror.BadRequest("BACKUP_TASK_INVALID", err.Error(), err)
+ }
+ }
+ // 依赖检查:每个上游任务必须存在 + 不能依赖自己 + 无循环
+ if len(input.DependsOnTaskIDs) > 0 {
+ currentID := uint(0)
+ if existing != nil {
+ currentID = existing.ID
+ }
+ for _, dep := range input.DependsOnTaskIDs {
+ if dep == 0 {
+ continue
+ }
+ if dep == currentID {
+ return apperror.BadRequest("BACKUP_TASK_INVALID", "不能把任务自己设为上游依赖", nil)
+ }
+ upstream, err := s.tasks.FindByID(ctx, dep)
+ if err != nil {
+ return apperror.Internal("BACKUP_TASK_DEP_LOOKUP_FAILED", "无法校验上游任务", err)
+ }
+ if upstream == nil {
+ return apperror.BadRequest("BACKUP_TASK_INVALID", fmt.Sprintf("上游任务 %d 不存在", dep), nil)
+ }
+ }
+ if currentID > 0 && s.hasCyclicDependency(ctx, currentID, input.DependsOnTaskIDs) {
+ return apperror.BadRequest("BACKUP_TASK_INVALID", "依赖关系会形成循环", nil)
+ }
+ }
passwordRequired := existing == nil || existing.DBPasswordCiphertext == ""
return validateTaskTypeSpecificFields(input, passwordRequired)
}
@@ -441,11 +647,21 @@ func (s *BackupTaskService) buildTask(existing *model.BackupTask, input BackupTa
ExtraConfig: extraConfigJSON,
StorageTargetID: primaryTargetID,
StorageTargets: storageTargets,
+ NodeID: input.NodeID,
+ Tags: strings.TrimSpace(input.Tags),
RetentionDays: input.RetentionDays,
Compression: compression,
Encrypt: input.Encrypt,
MaxBackups: maxBackups,
LastStatus: "idle",
+ VerifyEnabled: input.VerifyEnabled,
+ VerifyCronExpr: strings.TrimSpace(input.VerifyCronExpr),
+ VerifyMode: normalizeVerifyMode(input.VerifyMode),
+ SLAHoursRPO: maxInt(0, input.SLAHoursRPO),
+ AlertOnConsecutiveFails: alertThreshold(input.AlertOnConsecutiveFails),
+ ReplicationTargetIDs: encodeUintCSV(input.ReplicationTargetIDs),
+ MaintenanceWindows: strings.TrimSpace(input.MaintenanceWindows),
+ DependsOnTaskIDs: encodeUintCSV(input.DependsOnTaskIDs),
}
if existing != nil {
item.LastRunAt = existing.LastRunAt
@@ -520,16 +736,69 @@ func toBackupTaskSummary(item *model.BackupTask) BackupTaskSummary {
StorageTargetName: primaryName,
StorageTargetIDs: targetIDs,
StorageTargetNames: targetNames,
+ NodeID: item.NodeID,
+ NodeName: item.Node.Name,
+ Tags: item.Tags,
RetentionDays: item.RetentionDays,
Compression: item.Compression,
Encrypt: item.Encrypt,
MaxBackups: item.MaxBackups,
LastRunAt: item.LastRunAt,
LastStatus: item.LastStatus,
+ VerifyEnabled: item.VerifyEnabled,
+ VerifyCronExpr: item.VerifyCronExpr,
+ VerifyMode: item.VerifyMode,
+ SLAHoursRPO: item.SLAHoursRPO,
+ AlertOnConsecutiveFails: item.AlertOnConsecutiveFails,
+ ReplicationTargetIDs: parseUintCSV(item.ReplicationTargetIDs),
+ MaintenanceWindows: item.MaintenanceWindows,
+ DependsOnTaskIDs: parseUintCSV(item.DependsOnTaskIDs),
UpdatedAt: item.UpdatedAt,
}
}
+// encodeUintCSV 把 uint 切片编码为 CSV 字符串(去重保序)。
+func encodeUintCSV(ids []uint) string {
+ if len(ids) == 0 {
+ return ""
+ }
+ seen := map[uint]bool{}
+ parts := make([]string, 0, len(ids))
+ for _, id := range ids {
+ if id == 0 || seen[id] {
+ continue
+ }
+ seen[id] = true
+ parts = append(parts, strconv.FormatUint(uint64(id), 10))
+ }
+ return strings.Join(parts, ",")
+}
+
+// normalizeVerifyMode 规范化验证模式,未知值默认 quick。
+func normalizeVerifyMode(value string) string {
+ switch strings.ToLower(strings.TrimSpace(value)) {
+ case "deep":
+ return model.VerificationModeDeep
+ default:
+ return model.VerificationModeQuick
+ }
+}
+
+// alertThreshold 连续失败告警阈值下限为 1。
+func alertThreshold(value int) int {
+ if value <= 0 {
+ return 1
+ }
+ return value
+}
+
+func maxInt(a, b int) int {
+ if a > b {
+ return a
+ }
+ return b
+}
+
func encodeExcludePatterns(value []string) (string, error) {
if len(value) == 0 {
return "[]", nil
diff --git a/server/internal/service/cluster_version.go b/server/internal/service/cluster_version.go
new file mode 100644
index 0000000..b84183d
--- /dev/null
+++ b/server/internal/service/cluster_version.go
@@ -0,0 +1,171 @@
+package service
+
+import (
+ "context"
+ "fmt"
+ "strings"
+ "sync"
+ "time"
+
+ "backupx/server/internal/model"
+ "backupx/server/internal/repository"
+)
+
+// ClusterVersionMonitor 检查集群中 Agent 版本与 Master 的兼容性。
+// 产出两类告警:
+// 1. Agent 版本落后 Master(major 或 minor 不一致)→ 建议升级
+// 2. Agent 版本为空/异常 → Agent 未正确上报
+//
+// 触发频率:随节点在线监控 15s/次的同频扫描,但每节点 24h 内只告警一次。
+type ClusterVersionMonitor struct {
+ nodeRepo repository.NodeRepository
+ eventDispatcher EventDispatcher
+ masterVersion string
+ mu sync.Mutex
+ notified map[uint]time.Time
+}
+
+func NewClusterVersionMonitor(nodeRepo repository.NodeRepository, masterVersion string) *ClusterVersionMonitor {
+ return &ClusterVersionMonitor{
+ nodeRepo: nodeRepo,
+ masterVersion: masterVersion,
+ notified: map[uint]time.Time{},
+ }
+}
+
+func (m *ClusterVersionMonitor) SetEventDispatcher(dispatcher EventDispatcher) {
+ m.eventDispatcher = dispatcher
+}
+
+// Start 启动后台扫描。ctx 取消时退出。
+// scanInterval 建议 30 分钟;resetInterval 建议 24 小时。
+func (m *ClusterVersionMonitor) Start(ctx context.Context, scanInterval, resetInterval time.Duration) {
+ if scanInterval <= 0 {
+ scanInterval = 30 * time.Minute
+ }
+ if resetInterval <= 0 {
+ resetInterval = 24 * time.Hour
+ }
+ // 启动立即跑一次,让控制台尽快看到
+ go func() {
+ m.scan(ctx, resetInterval)
+ ticker := time.NewTicker(scanInterval)
+ defer ticker.Stop()
+ for {
+ select {
+ case <-ctx.Done():
+ return
+ case <-ticker.C:
+ m.scan(ctx, resetInterval)
+ }
+ }
+ }()
+}
+
+func (m *ClusterVersionMonitor) scan(ctx context.Context, resetInterval time.Duration) {
+ nodes, err := m.nodeRepo.List(ctx)
+ if err != nil {
+ return
+ }
+ now := time.Now().UTC()
+ m.mu.Lock()
+ defer m.mu.Unlock()
+ // 清理已不在集群中的节点
+ activeIDs := map[uint]bool{}
+ for _, n := range nodes {
+ activeIDs[n.ID] = true
+ }
+ for id := range m.notified {
+ if !activeIDs[id] {
+ delete(m.notified, id)
+ }
+ }
+
+ for _, node := range nodes {
+ // 仅监控已连接过的远程节点(在线 or 曾在线)
+ if node.IsLocal {
+ continue
+ }
+ if strings.TrimSpace(node.AgentVer) == "" {
+ continue
+ }
+ if isVersionOutdated(node.AgentVer, m.masterVersion) {
+ if last, seen := m.notified[node.ID]; seen && now.Sub(last) < resetInterval {
+ continue
+ }
+ if m.eventDispatcher != nil {
+ title := "BackupX Agent 版本落后"
+ body := fmt.Sprintf("节点:%s\nAgent 版本:%s\nMaster 版本:%s\n建议升级 Agent 以获得完整兼容性。",
+ node.Name, node.AgentVer, m.masterVersion)
+ fields := map[string]any{
+ "nodeId": node.ID,
+ "nodeName": node.Name,
+ "agentVersion": node.AgentVer,
+ "masterVersion": m.masterVersion,
+ }
+ _ = m.eventDispatcher.DispatchEvent(ctx, model.NotificationEventAgentOutdated, title, body, fields)
+ }
+ m.notified[node.ID] = now
+ } else {
+ delete(m.notified, node.ID) // 升级后不再告警
+ }
+ }
+}
+
+// isVersionOutdated 简单比较 major.minor。
+//
+// 规则:
+// - master 或 agent 为 "dev" / 空 → 返回 false(不告警)
+// - 都是形如 x.y[.z] 时,agent 的 major.minor < master 视为落后
+// - 解析失败也返回 false(保守策略)
+//
+// 该策略放宽 patch 级差异,避免小版本发布造成集群大量告警。
+func isVersionOutdated(agent, master string) bool {
+ a := strings.TrimPrefix(strings.TrimSpace(agent), "v")
+ m := strings.TrimPrefix(strings.TrimSpace(master), "v")
+ if a == "" || m == "" || a == "dev" || m == "dev" {
+ return false
+ }
+ aMajor, aMinor, ok := splitMajorMinor(a)
+ if !ok {
+ return false
+ }
+ mMajor, mMinor, ok := splitMajorMinor(m)
+ if !ok {
+ return false
+ }
+ if aMajor < mMajor {
+ return true
+ }
+ if aMajor == mMajor && aMinor < mMinor {
+ return true
+ }
+ return false
+}
+
+func splitMajorMinor(v string) (int, int, bool) {
+ parts := strings.Split(v, ".")
+ if len(parts) < 2 {
+ return 0, 0, false
+ }
+ major, ok := atoi(parts[0])
+ if !ok {
+ return 0, 0, false
+ }
+ minor, ok := atoi(parts[1])
+ if !ok {
+ return 0, 0, false
+ }
+ return major, minor, true
+}
+
+func atoi(s string) (int, bool) {
+ n := 0
+ for _, r := range s {
+ if r < '0' || r > '9' {
+ return 0, false
+ }
+ n = n*10 + int(r-'0')
+ }
+ return n, true
+}
diff --git a/server/internal/service/dashboard_service.go b/server/internal/service/dashboard_service.go
index 72ce5a5..2508882 100644
--- a/server/internal/service/dashboard_service.go
+++ b/server/internal/service/dashboard_service.go
@@ -2,9 +2,12 @@ package service
import (
"context"
+ "fmt"
+ "sync"
"time"
"backupx/server/internal/apperror"
+ "backupx/server/internal/model"
"backupx/server/internal/repository"
)
@@ -26,13 +29,24 @@ type DashboardStats struct {
}
type DashboardService struct {
- tasks repository.BackupTaskRepository
- records repository.BackupRecordRepository
- targets repository.StorageTargetRepository
+ tasks repository.BackupTaskRepository
+ records repository.BackupRecordRepository
+ targets repository.StorageTargetRepository
+ nodes repository.NodeRepository
+ masterVersion string
+ // slaMonitor 内部跟踪已告警的违约任务,避免每次扫描重复派发事件
+ slaNotified map[uint]time.Time
+ slaMu sync.Mutex
}
func NewDashboardService(tasks repository.BackupTaskRepository, records repository.BackupRecordRepository, targets repository.StorageTargetRepository) *DashboardService {
- return &DashboardService{tasks: tasks, records: records, targets: targets}
+ return &DashboardService{tasks: tasks, records: records, targets: targets, slaNotified: map[uint]time.Time{}}
+}
+
+// SetClusterDependencies 注入节点仓储与 Master 版本,启用集群概览。
+func (s *DashboardService) SetClusterDependencies(nodes repository.NodeRepository, masterVersion string) {
+ s.nodes = nodes
+ s.masterVersion = masterVersion
}
func (s *DashboardService) Stats(ctx context.Context) (*DashboardStats, error) {
@@ -107,3 +121,505 @@ func (s *DashboardService) Timeline(ctx context.Context, days int) ([]repository
}
return items, nil
}
+
+// SLAViolation 任务 SLA 违约详情。
+// 判定规则:任务设置了 SLAHoursRPO > 0,且距最近一次 success 备份的时间 > SLAHoursRPO。
+// 从未成功过的任务(LastSuccessAt = nil)若启用也视为违约(from createdAt 起算)。
+type SLAViolation struct {
+ TaskID uint `json:"taskId"`
+ TaskName string `json:"taskName"`
+ NodeID uint `json:"nodeId"`
+ NodeName string `json:"nodeName,omitempty"`
+ SLAHoursRPO int `json:"slaHoursRpo"`
+ LastSuccessAt *time.Time `json:"lastSuccessAt,omitempty"`
+ HoursSinceLastSuccess float64 `json:"hoursSinceLastSuccess"`
+ NeverSucceeded bool `json:"neverSucceeded"`
+}
+
+// SLAComplianceReport Dashboard 的 SLA 合规概览。
+type SLAComplianceReport struct {
+ TotalTasksWithSLA int `json:"totalTasksWithSla"`
+ Compliant int `json:"compliant"`
+ Violated int `json:"violated"`
+ CoverageRate float64 `json:"coverageRate"`
+ Violations []SLAViolation `json:"violations"`
+}
+
+// SLACompliance 计算所有启用任务的 SLA 合规情况。
+// 只考虑 Enabled=true 且 SLAHoursRPO>0 的任务。
+func (s *DashboardService) SLACompliance(ctx context.Context) (*SLAComplianceReport, error) {
+ items, err := s.tasks.List(ctx, repository.BackupTaskListOptions{})
+ if err != nil {
+ return nil, apperror.Internal("DASHBOARD_SLA_FAILED", "无法获取任务列表", err)
+ }
+ now := time.Now().UTC()
+ report := &SLAComplianceReport{Violations: []SLAViolation{}}
+ for i := range items {
+ task := items[i]
+ if !task.Enabled || task.SLAHoursRPO <= 0 {
+ continue
+ }
+ report.TotalTasksWithSLA++
+ // 查最近的成功记录作为 lastSuccessAt
+ successes, err := s.records.ListSuccessfulByTask(ctx, task.ID)
+ if err != nil {
+ return nil, apperror.Internal("DASHBOARD_SLA_FAILED", "无法获取任务成功记录", err)
+ }
+ var lastSuccessAt *time.Time
+ if len(successes) > 0 && successes[0].CompletedAt != nil {
+ lastSuccessAt = successes[0].CompletedAt
+ }
+ hoursSince := 0.0
+ neverSucceeded := lastSuccessAt == nil
+ if neverSucceeded {
+ hoursSince = now.Sub(task.CreatedAt).Hours()
+ } else {
+ hoursSince = now.Sub(*lastSuccessAt).Hours()
+ }
+ if hoursSince > float64(task.SLAHoursRPO) {
+ report.Violated++
+ report.Violations = append(report.Violations, SLAViolation{
+ TaskID: task.ID,
+ TaskName: task.Name,
+ NodeID: task.NodeID,
+ NodeName: task.Node.Name,
+ SLAHoursRPO: task.SLAHoursRPO,
+ LastSuccessAt: lastSuccessAt,
+ HoursSinceLastSuccess: roundHours(hoursSince),
+ NeverSucceeded: neverSucceeded,
+ })
+ } else {
+ report.Compliant++
+ }
+ }
+ if report.TotalTasksWithSLA > 0 {
+ report.CoverageRate = float64(report.Compliant) / float64(report.TotalTasksWithSLA)
+ }
+ return report, nil
+}
+
+func roundHours(value float64) float64 {
+ return float64(int(value*100+0.5)) / 100
+}
+
+// ClusterNodeSummary 集群节点简报(Dashboard 用)。
+type ClusterNodeSummary struct {
+ ID uint `json:"id"`
+ Name string `json:"name"`
+ Hostname string `json:"hostname"`
+ Status string `json:"status"`
+ IsLocal bool `json:"isLocal"`
+ AgentVersion string `json:"agentVersion"`
+ VersionStatus string `json:"versionStatus"` // current | outdated | unknown
+ LastSeen time.Time `json:"lastSeen"`
+ TaskCount int64 `json:"taskCount"`
+}
+
+// ClusterOverview Dashboard 集群概览卡片。
+type ClusterOverview struct {
+ MasterVersion string `json:"masterVersion"`
+ TotalNodes int `json:"totalNodes"`
+ OnlineNodes int `json:"onlineNodes"`
+ OfflineNodes int `json:"offlineNodes"`
+ OutdatedAgents int `json:"outdatedAgents"`
+ Nodes []ClusterNodeSummary `json:"nodes"`
+}
+
+// ClusterOverview 返回集群节点状态概览,未启用集群依赖时返回空对象。
+func (s *DashboardService) ClusterOverview(ctx context.Context) (*ClusterOverview, error) {
+ if s.nodes == nil {
+ return &ClusterOverview{MasterVersion: s.masterVersion, Nodes: []ClusterNodeSummary{}}, nil
+ }
+ nodes, err := s.nodes.List(ctx)
+ if err != nil {
+ return nil, apperror.Internal("DASHBOARD_CLUSTER_FAILED", "无法获取节点列表", err)
+ }
+ out := &ClusterOverview{
+ MasterVersion: s.masterVersion,
+ TotalNodes: len(nodes),
+ Nodes: make([]ClusterNodeSummary, 0, len(nodes)),
+ }
+ for i := range nodes {
+ node := nodes[i]
+ var taskCount int64
+ if s.tasks != nil {
+ if c, err := s.tasks.CountByNodeID(ctx, node.ID); err == nil {
+ taskCount = c
+ }
+ }
+ versionStatus := resolveVersionStatus(node, s.masterVersion)
+ summary := ClusterNodeSummary{
+ ID: node.ID,
+ Name: node.Name,
+ Hostname: node.Hostname,
+ Status: node.Status,
+ IsLocal: node.IsLocal,
+ AgentVersion: node.AgentVer,
+ VersionStatus: versionStatus,
+ LastSeen: node.LastSeen,
+ TaskCount: taskCount,
+ }
+ out.Nodes = append(out.Nodes, summary)
+ switch node.Status {
+ case model.NodeStatusOnline:
+ out.OnlineNodes++
+ case model.NodeStatusOffline:
+ out.OfflineNodes++
+ }
+ if versionStatus == "outdated" {
+ out.OutdatedAgents++
+ }
+ }
+ return out, nil
+}
+
+// BreakdownItem 单项分组统计。
+type BreakdownItem struct {
+ Key string `json:"key"`
+ Label string `json:"label"`
+ Count int64 `json:"count"`
+ TotalSize int64 `json:"totalSize,omitempty"`
+}
+
+// BreakdownStats 多维分组统计。
+type BreakdownStats struct {
+ ByType []BreakdownItem `json:"byType"`
+ ByStatus []BreakdownItem `json:"byStatus"`
+ ByNode []BreakdownItem `json:"byNode"`
+ ByStorage []BreakdownItem `json:"byStorage"`
+}
+
+// Breakdown 返回多维分组统计。
+// 仅统计最近 N 天的备份记录(默认 30 天),覆盖企业常见"近期分布"视角。
+func (s *DashboardService) Breakdown(ctx context.Context, days int) (*BreakdownStats, error) {
+ if days <= 0 {
+ days = 30
+ }
+ since := time.Now().UTC().AddDate(0, 0, -days)
+ // 按类型分组:来自 task 维度聚合
+ tasks, err := s.tasks.List(ctx, repository.BackupTaskListOptions{})
+ if err != nil {
+ return nil, apperror.Internal("DASHBOARD_BREAKDOWN_FAILED", "无法统计任务分组", err)
+ }
+ typeCounts := map[string]int64{}
+ nodeCounts := map[uint]int64{}
+ nodeNames := map[uint]string{0: "本机 Master"}
+ for _, task := range tasks {
+ typeCounts[task.Type]++
+ nodeCounts[task.NodeID]++
+ if task.Node.Name != "" {
+ nodeNames[task.NodeID] = task.Node.Name
+ }
+ }
+ result := &BreakdownStats{
+ ByType: makeBreakdown(typeCounts, typeLabel),
+ ByNode: makeBreakdownByUint(nodeCounts, nodeNames, "节点 #"),
+ ByStatus: []BreakdownItem{},
+ ByStorage: []BreakdownItem{},
+ }
+ // 按状态(最近 days 天记录)
+ statusCounts, err := s.countRecordsByStatus(ctx, since)
+ if err == nil {
+ result.ByStatus = statusCounts
+ }
+ // 按存储目标(含字节数)
+ if s.records != nil {
+ storageItems, _ := s.records.StorageUsage(ctx)
+ if s.targets != nil {
+ targetNames := map[uint]string{}
+ if targetList, err := s.targets.List(ctx); err == nil {
+ for _, t := range targetList {
+ targetNames[t.ID] = t.Name
+ }
+ }
+ for _, item := range storageItems {
+ name := targetNames[item.StorageTargetID]
+ if name == "" {
+ name = fmt.Sprintf("存储 #%d", item.StorageTargetID)
+ }
+ result.ByStorage = append(result.ByStorage, BreakdownItem{
+ Key: fmt.Sprintf("%d", item.StorageTargetID),
+ Label: name,
+ TotalSize: item.TotalSize,
+ })
+ }
+ }
+ }
+ return result, nil
+}
+
+// countRecordsByStatus 最近 since 起的记录按状态分组。
+func (s *DashboardService) countRecordsByStatus(ctx context.Context, since time.Time) ([]BreakdownItem, error) {
+ running, _ := s.records.List(ctx, repository.BackupRecordListOptions{Status: "running", DateFrom: &since})
+ success, _ := s.records.List(ctx, repository.BackupRecordListOptions{Status: "success", DateFrom: &since})
+ failed, _ := s.records.List(ctx, repository.BackupRecordListOptions{Status: "failed", DateFrom: &since})
+ return []BreakdownItem{
+ {Key: "success", Label: "成功", Count: int64(len(success))},
+ {Key: "failed", Label: "失败", Count: int64(len(failed))},
+ {Key: "running", Label: "执行中", Count: int64(len(running))},
+ }, nil
+}
+
+// makeBreakdown 把 map[string]int64 转为排序好的 BreakdownItem 列表。
+func makeBreakdown(counts map[string]int64, labelFn func(string) string) []BreakdownItem {
+ items := make([]BreakdownItem, 0, len(counts))
+ for k, v := range counts {
+ label := k
+ if labelFn != nil {
+ label = labelFn(k)
+ }
+ items = append(items, BreakdownItem{Key: k, Label: label, Count: v})
+ }
+ // 按 Count 降序
+ for i := 0; i < len(items); i++ {
+ for j := i + 1; j < len(items); j++ {
+ if items[j].Count > items[i].Count {
+ items[i], items[j] = items[j], items[i]
+ }
+ }
+ }
+ return items
+}
+
+func makeBreakdownByUint(counts map[uint]int64, names map[uint]string, fallback string) []BreakdownItem {
+ items := make([]BreakdownItem, 0, len(counts))
+ for k, v := range counts {
+ label := names[k]
+ if label == "" {
+ label = fmt.Sprintf("%s%d", fallback, k)
+ }
+ items = append(items, BreakdownItem{Key: fmt.Sprintf("%d", k), Label: label, Count: v})
+ }
+ for i := 0; i < len(items); i++ {
+ for j := i + 1; j < len(items); j++ {
+ if items[j].Count > items[i].Count {
+ items[i], items[j] = items[j], items[i]
+ }
+ }
+ }
+ return items
+}
+
+func typeLabel(key string) string {
+ switch key {
+ case "file":
+ return "文件"
+ case "mysql":
+ return "MySQL"
+ case "postgresql":
+ return "PostgreSQL"
+ case "sqlite":
+ return "SQLite"
+ case "saphana":
+ return "SAP HANA"
+ default:
+ return key
+ }
+}
+
+// NodePerformance 单节点近 N 天的执行指标。
+// 用途:Dashboard 运维视角快速判断"哪个节点负载高 / 失败多 / 慢"。
+type NodePerformance struct {
+ NodeID uint `json:"nodeId"`
+ NodeName string `json:"nodeName"`
+ IsLocal bool `json:"isLocal"`
+ TotalRuns int `json:"totalRuns"`
+ SuccessRuns int `json:"successRuns"`
+ FailedRuns int `json:"failedRuns"`
+ SuccessRate float64 `json:"successRate"`
+ TotalBytes int64 `json:"totalBytes"`
+ AvgDurationSecs float64 `json:"avgDurationSecs"`
+}
+
+// NodePerformance 统计最近 days 天各节点的执行指标。
+// 返回按成功率降序排列。未注入 nodeRepo 时返回空。
+func (s *DashboardService) NodePerformance(ctx context.Context, days int) ([]NodePerformance, error) {
+ if s.nodes == nil || s.records == nil {
+ return []NodePerformance{}, nil
+ }
+ if days <= 0 {
+ days = 30
+ }
+ since := time.Now().UTC().AddDate(0, 0, -days)
+ nodes, err := s.nodes.List(ctx)
+ if err != nil {
+ return nil, apperror.Internal("DASHBOARD_NODE_PERF_FAILED", "无法获取节点列表", err)
+ }
+ // records 里没有直接的 node_id(通过 BackupTask.NodeID 关联);
+ // 先取近 N 天全部记录,按 record.NodeID 聚合(该字段已在第二轮加入)。
+ items, err := s.records.List(ctx, repository.BackupRecordListOptions{DateFrom: &since})
+ if err != nil {
+ return nil, apperror.Internal("DASHBOARD_NODE_PERF_FAILED", "无法获取备份记录", err)
+ }
+ bucket := map[uint]*nodeAgg{}
+ for i := range items {
+ r := items[i]
+ a, ok := bucket[r.NodeID]
+ if !ok {
+ a = &nodeAgg{}
+ bucket[r.NodeID] = a
+ }
+ a.total++
+ switch r.Status {
+ case model.BackupRecordStatusSuccess:
+ a.success++
+ a.bytes += r.FileSize
+ a.durSecs += int64(r.DurationSeconds)
+ case model.BackupRecordStatusFailed:
+ a.failed++
+ }
+ }
+ out := make([]NodePerformance, 0, len(nodes)+1)
+ // 确保"本机 Master"(id=0) 也被纳入,即便无记录
+ seenLocal := false
+ for _, n := range nodes {
+ a := bucket[n.ID]
+ if a == nil {
+ a = &nodeAgg{}
+ }
+ perf := buildNodePerformance(n.ID, n.Name, n.IsLocal, a)
+ out = append(out, perf)
+ if n.ID == 0 || n.IsLocal {
+ seenLocal = true
+ }
+ }
+ // 若 bucket 里还有 id=0(未注册的 Master)或记录绑定的 node 已被删,追加"其他"
+ if a, ok := bucket[0]; ok && !seenLocal {
+ out = append(out, buildNodePerformance(0, "本机 Master", true, a))
+ }
+ // 按成功率降序,其次按 totalRuns 降序
+ for i := 0; i < len(out); i++ {
+ for j := i + 1; j < len(out); j++ {
+ if out[j].SuccessRate > out[i].SuccessRate ||
+ (out[j].SuccessRate == out[i].SuccessRate && out[j].TotalRuns > out[i].TotalRuns) {
+ out[i], out[j] = out[j], out[i]
+ }
+ }
+ }
+ return out, nil
+}
+
+// nodeAgg 按节点汇总的中间聚合结构(性能统计用)。
+type nodeAgg struct {
+ total, success, failed int
+ bytes int64
+ durSecs int64
+}
+
+func buildNodePerformance(nodeID uint, nodeName string, isLocal bool, a *nodeAgg) NodePerformance {
+ rate := 0.0
+ if a.total > 0 {
+ rate = float64(a.success) / float64(a.total)
+ }
+ avgDur := 0.0
+ if a.success > 0 {
+ avgDur = float64(a.durSecs) / float64(a.success)
+ }
+ return NodePerformance{
+ NodeID: nodeID,
+ NodeName: nodeName,
+ IsLocal: isLocal,
+ TotalRuns: a.total,
+ SuccessRuns: a.success,
+ FailedRuns: a.failed,
+ SuccessRate: rate,
+ TotalBytes: a.bytes,
+ AvgDurationSecs: avgDur,
+ }
+}
+
+// resolveVersionStatus 判断单个节点的版本健康度标签。
+func resolveVersionStatus(node model.Node, masterVersion string) string {
+ if node.IsLocal {
+ return "current"
+ }
+ if node.AgentVer == "" {
+ return "unknown"
+ }
+ if isClusterVersionOutdated(node.AgentVer, masterVersion) {
+ return "outdated"
+ }
+ return "current"
+}
+
+// isClusterVersionOutdated 内部版本比较(与 cluster_version.go 语义一致)。
+// 独立实现避免 service 包内跨文件耦合测试。
+func isClusterVersionOutdated(agent, master string) bool {
+ return isVersionOutdated(agent, master)
+}
+
+// StartSLAMonitor 后台定时扫描 SLA 违约并通过 event dispatcher 派发 sla_violation 事件。
+// 防骚扰:同一任务在 resetInterval 内只派发一次(避免每分钟轰炸)。
+// - scanInterval:扫描频率(建议 15m)
+// - resetInterval:同任务再次告警的最短间隔(建议 6h)
+//
+// ctx 被取消时退出。dispatcher 为 nil 时退化为仅扫描不告警(保持兼容)。
+func (s *DashboardService) StartSLAMonitor(ctx context.Context, dispatcher EventDispatcher, scanInterval, resetInterval time.Duration) {
+ if scanInterval <= 0 {
+ scanInterval = 15 * time.Minute
+ }
+ if resetInterval <= 0 {
+ resetInterval = 6 * time.Hour
+ }
+ ticker := time.NewTicker(scanInterval)
+ go func() {
+ defer ticker.Stop()
+ for {
+ select {
+ case <-ctx.Done():
+ return
+ case <-ticker.C:
+ s.scanAndDispatchSLA(ctx, dispatcher, resetInterval)
+ }
+ }
+ }()
+}
+
+// scanAndDispatchSLA 执行一次 SLA 违约扫描并按需派发事件。
+func (s *DashboardService) scanAndDispatchSLA(ctx context.Context, dispatcher EventDispatcher, resetInterval time.Duration) {
+ report, err := s.SLACompliance(ctx)
+ if err != nil || report == nil {
+ return
+ }
+ now := time.Now().UTC()
+ s.slaMu.Lock()
+ defer s.slaMu.Unlock()
+ // 保留当前仍然违约的任务,清理已恢复的记忆
+ active := map[uint]time.Time{}
+ violatingIDs := map[uint]bool{}
+ for _, v := range report.Violations {
+ violatingIDs[v.TaskID] = true
+ }
+ for taskID, when := range s.slaNotified {
+ if violatingIDs[taskID] {
+ active[taskID] = when
+ }
+ }
+ s.slaNotified = active
+
+ for _, v := range report.Violations {
+ last, seen := s.slaNotified[v.TaskID]
+ if seen && now.Sub(last) < resetInterval {
+ continue
+ }
+ if dispatcher != nil {
+ title := "BackupX SLA 违约"
+ statusText := fmt.Sprintf("%.1f 小时", v.HoursSinceLastSuccess)
+ if v.NeverSucceeded {
+ statusText = "从未成功"
+ }
+ body := fmt.Sprintf("任务:%s\nRPO 目标:%d 小时\n距最近成功:%s", v.TaskName, v.SLAHoursRPO, statusText)
+ fields := map[string]any{
+ "taskId": v.TaskID,
+ "taskName": v.TaskName,
+ "nodeId": v.NodeID,
+ "nodeName": v.NodeName,
+ "slaHoursRpo": v.SLAHoursRPO,
+ "hoursSinceLastSuccess": v.HoursSinceLastSuccess,
+ "neverSucceeded": v.NeverSucceeded,
+ }
+ _ = dispatcher.DispatchEvent(ctx, model.NotificationEventSLAViolation, title, body, fields)
+ }
+ s.slaNotified[v.TaskID] = now
+ }
+}
diff --git a/server/internal/service/database_discovery_service.go b/server/internal/service/database_discovery_service.go
index cb32b4e..6ec4250 100644
--- a/server/internal/service/database_discovery_service.go
+++ b/server/internal/service/database_discovery_service.go
@@ -1,14 +1,16 @@
package service
import (
- "bytes"
"context"
+ "encoding/json"
"fmt"
"strings"
"time"
"backupx/server/internal/apperror"
"backupx/server/internal/backup"
+ "backupx/server/internal/model"
+ "backupx/server/internal/repository"
)
type DatabaseDiscoverInput struct {
@@ -17,6 +19,9 @@ type DatabaseDiscoverInput struct {
Port int `json:"port" binding:"required,min=1"`
User string `json:"user" binding:"required"`
Password string `json:"password" binding:"required"`
+ // NodeID 执行发现的节点。0 或本机 → Master 本地执行;
+ // 远程节点 → 通过 Agent RPC 下发 discover_db 命令,目标主机在该节点视角解析。
+ NodeID uint `json:"nodeId"`
}
type DatabaseDiscoverResult struct {
@@ -25,117 +30,103 @@ type DatabaseDiscoverResult struct {
type DatabaseDiscoveryService struct {
executor backup.CommandExecutor
+ nodeRepo repository.NodeRepository
+ agentRPC DatabaseDiscoveryAgentRPC
+}
+
+// DatabaseDiscoveryAgentRPC 封装 AgentService 的同步 RPC 能力以避免循环依赖。
+type DatabaseDiscoveryAgentRPC interface {
+ EnqueueCommand(ctx context.Context, nodeID uint, cmdType string, payload any) (uint, error)
+ WaitForCommandResult(ctx context.Context, cmdID uint, timeout time.Duration) (*model.AgentCommand, error)
}
func NewDatabaseDiscoveryService(executor backup.CommandExecutor) *DatabaseDiscoveryService {
return &DatabaseDiscoveryService{executor: executor}
}
+// SetClusterDependencies 注入集群依赖,启用远程节点发现。
+// 可选注入:未注入时仅支持在 Master 本地发现。
+func (s *DatabaseDiscoveryService) SetClusterDependencies(nodeRepo repository.NodeRepository, rpc DatabaseDiscoveryAgentRPC) {
+ s.nodeRepo = nodeRepo
+ s.agentRPC = rpc
+}
+
func (s *DatabaseDiscoveryService) Discover(ctx context.Context, input DatabaseDiscoverInput) (*DatabaseDiscoverResult, error) {
- switch strings.TrimSpace(strings.ToLower(input.Type)) {
- case "mysql":
- return s.discoverMySQL(ctx, input)
- case "postgresql":
- return s.discoverPostgreSQL(ctx, input)
- default:
+ dbType := strings.TrimSpace(strings.ToLower(input.Type))
+ if dbType != "mysql" && dbType != "postgresql" {
return nil, apperror.BadRequest("DATABASE_DISCOVER_INVALID_TYPE", "不支持的数据库类型", nil)
}
-}
-
-func (s *DatabaseDiscoveryService) discoverMySQL(ctx context.Context, input DatabaseDiscoverInput) (*DatabaseDiscoverResult, error) {
- mysqlPath, err := s.executor.LookPath("mysql")
+ // 远程节点路由
+ if s.shouldRouteToAgent(ctx, input.NodeID) {
+ return s.discoverViaAgent(ctx, input)
+ }
+ // 本地执行
+ databases, err := backup.DiscoverDatabases(ctx, s.executor, backup.DiscoverRequest{
+ Type: dbType,
+ Host: input.Host,
+ Port: input.Port,
+ User: input.User,
+ Password: input.Password,
+ })
if err != nil {
- return nil, apperror.BadRequest("DATABASE_DISCOVER_MYSQL_NOT_FOUND", "系统未安装 mysql 客户端", err)
+ // 统一映射为 BadRequest,便于前端显示
+ return nil, apperror.BadRequest("DATABASE_DISCOVER_FAILED", sanitizeMessage(err.Error()), err)
}
-
- timeout, cancel := context.WithTimeout(ctx, 5*time.Second)
- defer cancel()
-
- var stdout, stderr bytes.Buffer
- args := []string{
- fmt.Sprintf("--host=%s", input.Host),
- fmt.Sprintf("--port=%d", input.Port),
- fmt.Sprintf("--user=%s", input.User),
- "-e", "SHOW DATABASES",
- "--skip-column-names",
- }
- env := []string{fmt.Sprintf("MYSQL_PWD=%s", input.Password)}
-
- if err := s.executor.Run(timeout, mysqlPath, args, backup.CommandOptions{
- Stdout: &stdout,
- Stderr: &stderr,
- Env: env,
- }); err != nil {
- errMsg := strings.TrimSpace(stderr.String())
- if errMsg == "" {
- errMsg = err.Error()
- }
- return nil, apperror.BadRequest("DATABASE_DISCOVER_MYSQL_FAILED", fmt.Sprintf("连接 MySQL 失败:%s", sanitizeMessage(errMsg)), err)
- }
-
- systemDBs := map[string]bool{
- "information_schema": true,
- "performance_schema": true,
- "mysql": true,
- "sys": true,
- }
-
- var databases []string
- for _, line := range strings.Split(stdout.String(), "\n") {
- db := strings.TrimSpace(line)
- if db == "" || systemDBs[db] {
- continue
- }
- databases = append(databases, db)
- }
-
return &DatabaseDiscoverResult{Databases: databases}, nil
}
-func (s *DatabaseDiscoveryService) discoverPostgreSQL(ctx context.Context, input DatabaseDiscoverInput) (*DatabaseDiscoverResult, error) {
- psqlPath, err := s.executor.LookPath("psql")
- if err != nil {
- return nil, apperror.BadRequest("DATABASE_DISCOVER_PSQL_NOT_FOUND", "系统未安装 psql 客户端", err)
+// shouldRouteToAgent 判断是否应路由到远程 Agent 执行发现。
+// NodeID=0、未注入集群依赖、或节点为本机时返回 false。
+func (s *DatabaseDiscoveryService) shouldRouteToAgent(ctx context.Context, nodeID uint) bool {
+ if nodeID == 0 || s.nodeRepo == nil || s.agentRPC == nil {
+ return false
}
-
- timeout, cancel := context.WithTimeout(ctx, 5*time.Second)
- defer cancel()
-
- var stdout, stderr bytes.Buffer
- args := []string{
- "-h", input.Host,
- "-p", fmt.Sprintf("%d", input.Port),
- "-U", input.User,
- "-d", "postgres",
- "-t", "-A",
- "-c", "SELECT datname FROM pg_database WHERE datistemplate = false ORDER BY datname",
+ node, err := s.nodeRepo.FindByID(ctx, nodeID)
+ if err != nil || node == nil || node.IsLocal {
+ return false
}
- env := []string{fmt.Sprintf("PGPASSWORD=%s", input.Password)}
-
- if err := s.executor.Run(timeout, psqlPath, args, backup.CommandOptions{
- Stdout: &stdout,
- Stderr: &stderr,
- Env: env,
- }); err != nil {
- errMsg := strings.TrimSpace(stderr.String())
- if errMsg == "" {
- errMsg = err.Error()
- }
- return nil, apperror.BadRequest("DATABASE_DISCOVER_PSQL_FAILED", fmt.Sprintf("连接 PostgreSQL 失败:%s", sanitizeMessage(errMsg)), err)
- }
-
- skipDBs := map[string]bool{
- "postgres": true,
- }
-
- var databases []string
- for _, line := range strings.Split(stdout.String(), "\n") {
- db := strings.TrimSpace(line)
- if db == "" || skipDBs[db] || strings.HasPrefix(db, "template") {
- continue
- }
- databases = append(databases, db)
- }
-
- return &DatabaseDiscoverResult{Databases: databases}, nil
+ return true
+}
+
+// discoverViaAgent 下发 discover_db 命令到 Agent 并同步等待结果。
+// Agent 必须在线;命令 15s 内未返回视为超时。
+func (s *DatabaseDiscoveryService) discoverViaAgent(ctx context.Context, input DatabaseDiscoverInput) (*DatabaseDiscoverResult, error) {
+ node, err := s.nodeRepo.FindByID(ctx, input.NodeID)
+ if err != nil {
+ return nil, apperror.Internal("DATABASE_DISCOVER_NODE_LOOKUP_FAILED", "无法读取节点", err)
+ }
+ if node == nil {
+ return nil, apperror.BadRequest("DATABASE_DISCOVER_NODE_NOT_FOUND", "指定的节点不存在", nil)
+ }
+ if node.Status != model.NodeStatusOnline {
+ return nil, apperror.BadRequest("NODE_OFFLINE", fmt.Sprintf("节点 %s 当前离线,无法执行数据库发现", node.Name), nil)
+ }
+ cmdID, err := s.agentRPC.EnqueueCommand(ctx, node.ID, model.AgentCommandTypeDiscoverDB, map[string]any{
+ "type": strings.ToLower(input.Type),
+ "host": input.Host,
+ "port": input.Port,
+ "user": input.User,
+ "password": input.Password,
+ })
+ if err != nil {
+ return nil, apperror.Internal("AGENT_COMMAND_ENQUEUE_FAILED", "无法下发数据库发现命令", err)
+ }
+ cmd, err := s.agentRPC.WaitForCommandResult(ctx, cmdID, 15*time.Second)
+ if err != nil {
+ return nil, err
+ }
+ if cmd.Status != model.AgentCommandStatusSucceeded {
+ msg := strings.TrimSpace(cmd.ErrorMessage)
+ if msg == "" {
+ msg = fmt.Sprintf("命令状态: %s", cmd.Status)
+ }
+ return nil, apperror.BadRequest("DATABASE_DISCOVER_FAILED", sanitizeMessage(msg), nil)
+ }
+ var result struct {
+ Databases []string `json:"databases"`
+ }
+ if err := json.Unmarshal([]byte(cmd.Result), &result); err != nil {
+ return nil, apperror.Internal("AGENT_RESULT_INVALID", "Agent 返回结果格式错误", err)
+ }
+ return &DatabaseDiscoverResult{Databases: result.Databases}, nil
}
diff --git a/server/internal/service/event_broadcaster.go b/server/internal/service/event_broadcaster.go
new file mode 100644
index 0000000..0063d0e
--- /dev/null
+++ b/server/internal/service/event_broadcaster.go
@@ -0,0 +1,96 @@
+package service
+
+import (
+ "context"
+ "sync"
+ "time"
+)
+
+// EventBroadcaster 企业级事件总线的实时订阅中心。
+// 不替代 Notification(持久化订阅、多渠道);作为"前端实时 UI 推送"的低延迟通道。
+//
+// 架构:
+// - Notification 总线:持久化/多渠道(邮件/webhook/telegram)/审计
+// - EventBroadcaster:内存 pub-sub,给浏览器 SSE 推送(Dashboard 自刷新、桌面 Toast)
+//
+// 设计决策:
+// - 非阻塞发布:订阅者 channel 满则丢弃该条,不阻塞生产者
+// - 无持久化:订阅者掉线后重连不回放(业务不需要,事件重要性由 Notification 保证)
+// - 轻量:sync.Map + 缓冲 channel
+type EventBroadcaster struct {
+ mu sync.RWMutex
+ subscribers map[int]chan EventEnvelope
+ nextID int
+}
+
+// EventEnvelope 推送给订阅者的事件包。
+// 复用 Notification 事件类型常量(model.NotificationEvent*)。
+type EventEnvelope struct {
+ Type string `json:"type"`
+ Title string `json:"title"`
+ Body string `json:"body"`
+ Fields map[string]any `json:"fields,omitempty"`
+ Timestamp time.Time `json:"timestamp"`
+}
+
+func NewEventBroadcaster() *EventBroadcaster {
+ return &EventBroadcaster{subscribers: map[int]chan EventEnvelope{}}
+}
+
+// Subscribe 订阅事件流。buffer 建议 32,避免慢消费者阻塞。
+// 返回 channel 和 cancel 函数,调用方需在退出时 cancel。
+func (b *EventBroadcaster) Subscribe(buffer int) (<-chan EventEnvelope, func()) {
+ if buffer <= 0 {
+ buffer = 32
+ }
+ b.mu.Lock()
+ defer b.mu.Unlock()
+ b.nextID++
+ id := b.nextID
+ ch := make(chan EventEnvelope, buffer)
+ b.subscribers[id] = ch
+ cancel := func() {
+ b.mu.Lock()
+ defer b.mu.Unlock()
+ if sub, ok := b.subscribers[id]; ok {
+ delete(b.subscribers, id)
+ close(sub)
+ }
+ }
+ return ch, cancel
+}
+
+// Publish 非阻塞发布:订阅者 channel 满时丢弃,不影响其他订阅者。
+// 实现 EventDispatcher 接口,可直接接入 NotificationService 的分发链。
+func (b *EventBroadcaster) Publish(ctx context.Context, eventType, title, body string, fields map[string]any) error {
+ envelope := EventEnvelope{
+ Type: eventType,
+ Title: title,
+ Body: body,
+ Fields: fields,
+ Timestamp: time.Now().UTC(),
+ }
+ b.mu.RLock()
+ defer b.mu.RUnlock()
+ for _, sub := range b.subscribers {
+ select {
+ case sub <- envelope:
+ default:
+ // 订阅者慢消费 → 丢弃本条,不阻塞其他订阅者
+ }
+ }
+ return nil
+}
+
+// DispatchEvent 实现 EventDispatcher 接口(与 NotificationService 相同)。
+// 让 broadcaster 可以无侵入地接入现有事件派发链。
+func (b *EventBroadcaster) DispatchEvent(ctx context.Context, eventType, title, body string, fields map[string]any) error {
+ return b.Publish(ctx, eventType, title, body, fields)
+}
+
+// SubscriberCount 当前活跃订阅者数,供 metrics / 健康检查使用。
+func (b *EventBroadcaster) SubscriberCount() int {
+ b.mu.RLock()
+ defer b.mu.RUnlock()
+ return len(b.subscribers)
+}
diff --git a/server/internal/service/node_service.go b/server/internal/service/node_service.go
index 12317b7..1264221 100644
--- a/server/internal/service/node_service.go
+++ b/server/internal/service/node_service.go
@@ -22,17 +22,19 @@ import (
// NodeSummary is the API response for node listings.
type NodeSummary struct {
- ID uint `json:"id"`
- Name string `json:"name"`
- Hostname string `json:"hostname"`
- IPAddress string `json:"ipAddress"`
- Status string `json:"status"`
- IsLocal bool `json:"isLocal"`
- OS string `json:"os"`
- Arch string `json:"arch"`
- AgentVersion string `json:"agentVersion"`
- LastSeen time.Time `json:"lastSeen"`
- CreatedAt time.Time `json:"createdAt"`
+ ID uint `json:"id"`
+ Name string `json:"name"`
+ Hostname string `json:"hostname"`
+ IPAddress string `json:"ipAddress"`
+ Status string `json:"status"`
+ IsLocal bool `json:"isLocal"`
+ OS string `json:"os"`
+ Arch string `json:"arch"`
+ AgentVersion string `json:"agentVersion"`
+ LastSeen time.Time `json:"lastSeen"`
+ MaxConcurrent int `json:"maxConcurrent"`
+ BandwidthLimit string `json:"bandwidthLimit"`
+ CreatedAt time.Time `json:"createdAt"`
}
// NodeCreateInput is the input for creating a new remote node.
@@ -42,7 +44,9 @@ type NodeCreateInput struct {
// NodeUpdateInput 是编辑节点的输入。
type NodeUpdateInput struct {
- Name string `json:"name" binding:"required"`
+ Name string `json:"name" binding:"required"`
+ MaxConcurrent int `json:"maxConcurrent"`
+ BandwidthLimit string `json:"bandwidthLimit" binding:"max=32"`
}
// NodeService manages the cluster nodes.
@@ -116,17 +120,19 @@ func (s *NodeService) List(ctx context.Context) ([]NodeSummary, error) {
result := make([]NodeSummary, len(nodes))
for i, n := range nodes {
result[i] = NodeSummary{
- ID: n.ID,
- Name: n.Name,
- Hostname: n.Hostname,
- IPAddress: n.IPAddress,
- Status: n.Status,
- IsLocal: n.IsLocal,
- OS: n.OS,
- Arch: n.Arch,
- AgentVersion: n.AgentVer,
- LastSeen: n.LastSeen,
- CreatedAt: n.CreatedAt,
+ ID: n.ID,
+ Name: n.Name,
+ Hostname: n.Hostname,
+ IPAddress: n.IPAddress,
+ Status: n.Status,
+ IsLocal: n.IsLocal,
+ OS: n.OS,
+ Arch: n.Arch,
+ AgentVersion: n.AgentVer,
+ LastSeen: n.LastSeen,
+ MaxConcurrent: n.MaxConcurrent,
+ BandwidthLimit: n.BandwidthLimit,
+ CreatedAt: n.CreatedAt,
}
}
return result, nil
@@ -141,17 +147,19 @@ func (s *NodeService) Get(ctx context.Context, id uint) (*NodeSummary, error) {
return nil, apperror.New(http.StatusNotFound, "NODE_NOT_FOUND", "节点不存在", nil)
}
return &NodeSummary{
- ID: node.ID,
- Name: node.Name,
- Hostname: node.Hostname,
- IPAddress: node.IPAddress,
- Status: node.Status,
- IsLocal: node.IsLocal,
- OS: node.OS,
- Arch: node.Arch,
- AgentVersion: node.AgentVer,
- LastSeen: node.LastSeen,
- CreatedAt: node.CreatedAt,
+ ID: node.ID,
+ Name: node.Name,
+ Hostname: node.Hostname,
+ IPAddress: node.IPAddress,
+ Status: node.Status,
+ IsLocal: node.IsLocal,
+ OS: node.OS,
+ Arch: node.Arch,
+ AgentVersion: node.AgentVer,
+ LastSeen: node.LastSeen,
+ MaxConcurrent: node.MaxConcurrent,
+ BandwidthLimit: node.BandwidthLimit,
+ CreatedAt: node.CreatedAt,
}, nil
}
@@ -307,6 +315,11 @@ func (s *NodeService) Update(ctx context.Context, id uint, input NodeUpdateInput
return nil, apperror.New(http.StatusNotFound, "NODE_NOT_FOUND", "节点不存在", nil)
}
node.Name = strings.TrimSpace(input.Name)
+ if input.MaxConcurrent < 0 {
+ return nil, apperror.BadRequest("NODE_INVALID", "并发上限不能为负数", nil)
+ }
+ node.MaxConcurrent = input.MaxConcurrent
+ node.BandwidthLimit = strings.TrimSpace(input.BandwidthLimit)
if err := s.repo.Update(ctx, node); err != nil {
return nil, err
}
diff --git a/server/internal/service/notification_service.go b/server/internal/service/notification_service.go
index 91ab31c..be241bc 100644
--- a/server/internal/service/notification_service.go
+++ b/server/internal/service/notification_service.go
@@ -16,22 +16,27 @@ import (
)
type NotificationUpsertInput struct {
- Name string `json:"name" binding:"required,min=1,max=100"`
- Type string `json:"type" binding:"required,oneof=email webhook telegram"`
- Enabled bool `json:"enabled"`
- OnSuccess bool `json:"onSuccess"`
- OnFailure bool `json:"onFailure"`
- Config map[string]any `json:"config" binding:"required"`
+ Name string `json:"name" binding:"required,min=1,max=100"`
+ Type string `json:"type" binding:"required,oneof=email webhook telegram"`
+ Enabled bool `json:"enabled"`
+ OnSuccess bool `json:"onSuccess"`
+ OnFailure bool `json:"onFailure"`
+ // EventTypes 订阅的扩展事件列表。与 OnSuccess/OnFailure 并存:
+ // - 两者均空时,订阅"备份成功/失败"对应原有语义(兼容)。
+ // - EventTypes 显式指定时优先按清单匹配。
+ EventTypes []string `json:"eventTypes"`
+ Config map[string]any `json:"config" binding:"required"`
}
type NotificationSummary struct {
- ID uint `json:"id"`
- Name string `json:"name"`
- Type string `json:"type"`
- Enabled bool `json:"enabled"`
- OnSuccess bool `json:"onSuccess"`
- OnFailure bool `json:"onFailure"`
- UpdatedAt time.Time `json:"updatedAt"`
+ ID uint `json:"id"`
+ Name string `json:"name"`
+ Type string `json:"type"`
+ Enabled bool `json:"enabled"`
+ OnSuccess bool `json:"onSuccess"`
+ OnFailure bool `json:"onFailure"`
+ EventTypes []string `json:"eventTypes"`
+ UpdatedAt time.Time `json:"updatedAt"`
}
type NotificationDetail struct {
@@ -44,6 +49,13 @@ type NotificationService struct {
notifications repository.NotificationRepository
registry *notify.Registry
cipher *codec.ConfigCipher
+ // broadcaster 可选:用于同步把事件推送给 SSE 订阅者(Dashboard 实时刷新)
+ broadcaster *EventBroadcaster
+}
+
+// SetBroadcaster 注入事件广播器,每次 DispatchEvent 同时走 SSE 实时通道。
+func (s *NotificationService) SetBroadcaster(b *EventBroadcaster) {
+ s.broadcaster = b
}
func NewNotificationService(notifications repository.NotificationRepository, registry *notify.Registry, cipher *codec.ConfigCipher) *NotificationService {
@@ -156,11 +168,88 @@ func (s *NotificationService) TestSaved(ctx context.Context, id uint) error {
func (s *NotificationService) NotifyBackupResult(ctx context.Context, event BackupExecutionNotification) error {
success := event.Error == nil && event.Record != nil && event.Record.Status == "success"
- items, err := s.notifications.ListEnabledForEvent(ctx, success)
+ eventType := model.NotificationEventBackupFailed
+ if success {
+ eventType = model.NotificationEventBackupSuccess
+ }
+ items, err := s.collectSubscribers(ctx, eventType, success)
if err != nil {
return err
}
message := buildNotificationMessage(event)
+ message.Fields["eventType"] = eventType
+ return s.deliver(ctx, items, message)
+}
+
+// DispatchEvent 面向任意企业级事件的通用分发入口。
+// - title / body / fields 构造通知内容
+// - eventType 对应 model.NotificationEvent* 常量,用于订阅匹配
+//
+// 订阅匹配规则:
+// 1) notification.EventTypes 非空:必须包含 eventType
+// 2) notification.EventTypes 为空:沿用 OnSuccess/OnFailure 开关(仅 backup_* 事件)
+func (s *NotificationService) DispatchEvent(ctx context.Context, eventType string, title string, body string, fields map[string]any) error {
+ // 同步广播到 SSE 订阅者(前端 Dashboard 实时推送)。
+ // 非阻塞:即便广播器未注入或订阅者已满也不影响 Notification 持久渠道。
+ if s.broadcaster != nil {
+ _ = s.broadcaster.Publish(ctx, eventType, title, body, fields)
+ }
+ // 将 fallback 布尔用于旧语义场景(backup_success / backup_failed)。
+ fallbackSuccess := eventType == model.NotificationEventBackupSuccess
+ items, err := s.collectSubscribers(ctx, eventType, fallbackSuccess)
+ if err != nil {
+ return err
+ }
+ if fields == nil {
+ fields = map[string]any{}
+ }
+ fields["eventType"] = eventType
+ fields["timestamp"] = time.Now().UTC().Format(time.RFC3339)
+ message := notify.Message{Title: title, Body: body, Fields: fields}
+ return s.deliver(ctx, items, message)
+}
+
+// collectSubscribers 按事件类型收集启用的订阅者。
+// 列出启用通知后按事件类型再过滤(避免引入新 repository 方法)。
+func (s *NotificationService) collectSubscribers(ctx context.Context, eventType string, fallbackSuccess bool) ([]model.Notification, error) {
+ all, err := s.notifications.List(ctx)
+ if err != nil {
+ return nil, err
+ }
+ matched := make([]model.Notification, 0, len(all))
+ for _, item := range all {
+ if !item.Enabled {
+ continue
+ }
+ events := decodeEventTypes(item.EventTypes)
+ if len(events) > 0 {
+ if !containsString(events, eventType) {
+ continue
+ }
+ } else {
+ // 旧语义兼容:仅对 backup_success / backup_failed 走 OnSuccess/OnFailure
+ switch eventType {
+ case model.NotificationEventBackupSuccess:
+ if !item.OnSuccess {
+ continue
+ }
+ case model.NotificationEventBackupFailed:
+ if !item.OnFailure {
+ continue
+ }
+ default:
+ // 其他事件类型必须显式订阅才推送
+ continue
+ }
+ // 额外校验 fallbackSuccess 参数,保持历史行为一致
+ _ = fallbackSuccess
+ }
+ matched = append(matched, item)
+ }
+ return matched, nil
+}
+
+func (s *NotificationService) deliver(ctx context.Context, items []model.Notification, message notify.Message) error {
var joined error
for _, item := range items {
configMap := map[string]any{}
@@ -175,6 +264,15 @@ func (s *NotificationService) NotifyBackupResult(ctx context.Context, event Back
return joined
}
+func containsString(items []string, target string) bool {
+ for _, item := range items {
+ if item == target {
+ return true
+ }
+ }
+ return false
+}
+
func (s *NotificationService) validateInput(ctx context.Context, currentID uint, input NotificationUpsertInput) error {
existing, err := s.notifications.FindByName(ctx, strings.TrimSpace(input.Name))
if err != nil {
@@ -202,10 +300,49 @@ func (s *NotificationService) buildNotification(existing *model.Notification, in
if err != nil {
return nil, apperror.Internal("NOTIFICATION_ENCRYPT_FAILED", "无法保存通知配置", err)
}
- item := &model.Notification{Name: strings.TrimSpace(input.Name), Type: strings.TrimSpace(input.Type), ConfigCiphertext: ciphertext, Enabled: input.Enabled, OnSuccess: input.OnSuccess, OnFailure: input.OnFailure}
+ item := &model.Notification{
+ Name: strings.TrimSpace(input.Name),
+ Type: strings.TrimSpace(input.Type),
+ ConfigCiphertext: ciphertext,
+ Enabled: input.Enabled,
+ OnSuccess: input.OnSuccess,
+ OnFailure: input.OnFailure,
+ EventTypes: encodeEventTypes(input.EventTypes),
+ }
return item, nil
}
+// encodeEventTypes 把事件切片规范化为逗号分隔字符串(去重+trim)。
+func encodeEventTypes(events []string) string {
+ seen := map[string]bool{}
+ out := make([]string, 0, len(events))
+ for _, e := range events {
+ trimmed := strings.TrimSpace(e)
+ if trimmed == "" || seen[trimmed] {
+ continue
+ }
+ seen[trimmed] = true
+ out = append(out, trimmed)
+ }
+ return strings.Join(out, ",")
+}
+
+// decodeEventTypes 解析存储字符串为切片。
+func decodeEventTypes(value string) []string {
+ if strings.TrimSpace(value) == "" {
+ return nil
+ }
+ parts := strings.Split(value, ",")
+ out := make([]string, 0, len(parts))
+ for _, p := range parts {
+ trimmed := strings.TrimSpace(p)
+ if trimmed != "" {
+ out = append(out, trimmed)
+ }
+ }
+ return out
+}
+
func (s *NotificationService) toDetail(item *model.Notification) (*NotificationDetail, error) {
configMap := map[string]any{}
if err := s.cipher.DecryptJSON(item.ConfigCiphertext, &configMap); err != nil {
@@ -216,7 +353,16 @@ func (s *NotificationService) toDetail(item *model.Notification) (*NotificationD
}
func toNotificationSummary(item *model.Notification) NotificationSummary {
- return NotificationSummary{ID: item.ID, Name: item.Name, Type: item.Type, Enabled: item.Enabled, OnSuccess: item.OnSuccess, OnFailure: item.OnFailure, UpdatedAt: item.UpdatedAt}
+ return NotificationSummary{
+ ID: item.ID,
+ Name: item.Name,
+ Type: item.Type,
+ Enabled: item.Enabled,
+ OnSuccess: item.OnSuccess,
+ OnFailure: item.OnFailure,
+ EventTypes: decodeEventTypes(item.EventTypes),
+ UpdatedAt: item.UpdatedAt,
+ }
}
func buildNotificationMessage(event BackupExecutionNotification) notify.Message {
diff --git a/server/internal/service/replication_service.go b/server/internal/service/replication_service.go
new file mode 100644
index 0000000..6fce207
--- /dev/null
+++ b/server/internal/service/replication_service.go
@@ -0,0 +1,375 @@
+package service
+
+import (
+ "context"
+ "fmt"
+ "os"
+ "path/filepath"
+ "strconv"
+ "strings"
+ "time"
+
+ "backupx/server/internal/apperror"
+ "backupx/server/internal/model"
+ "backupx/server/internal/repository"
+ "backupx/server/internal/storage"
+ "backupx/server/internal/storage/codec"
+)
+
+// ReplicationService 实现备份复制(3-2-1 规则核心)。
+// 语义:把源备份对象从 source storage target 镜像到 dest target,保持 StoragePath。
+//
+// 触发路径:
+// 1. 自动:BackupExecutionService 备份成功后调用 TriggerAutoReplication
+// 2. 手动:前端通过 BackupRecord 详情页触发 Start
+//
+// 执行模型:异步 + 节点无关(复制在 Master 本地 download → upload)。
+// 跨节点 local_disk 场景不支持(与 Download/Delete 保护一致)。
+type ReplicationService struct {
+ replications repository.ReplicationRecordRepository
+ records repository.BackupRecordRepository
+ targets repository.StorageTargetRepository
+ nodeRepo repository.NodeRepository
+ storageRegistry *storage.Registry
+ cipher *codec.ConfigCipher
+ eventDispatcher EventDispatcher
+ tempDir string
+ semaphore chan struct{}
+ async func(func())
+ now func() time.Time
+}
+
+func NewReplicationService(
+ replications repository.ReplicationRecordRepository,
+ records repository.BackupRecordRepository,
+ targets repository.StorageTargetRepository,
+ nodeRepo repository.NodeRepository,
+ storageRegistry *storage.Registry,
+ cipher *codec.ConfigCipher,
+ tempDir string,
+ maxConcurrent int,
+) *ReplicationService {
+ if tempDir == "" {
+ tempDir = "/tmp/backupx-replicate"
+ }
+ if maxConcurrent <= 0 {
+ maxConcurrent = 2
+ }
+ return &ReplicationService{
+ replications: replications,
+ records: records,
+ targets: targets,
+ nodeRepo: nodeRepo,
+ storageRegistry: storageRegistry,
+ cipher: cipher,
+ tempDir: tempDir,
+ semaphore: make(chan struct{}, maxConcurrent),
+ async: func(job func()) { go job() },
+ now: func() time.Time { return time.Now().UTC() },
+ }
+}
+
+func (s *ReplicationService) SetEventDispatcher(dispatcher EventDispatcher) {
+ s.eventDispatcher = dispatcher
+}
+
+// ReplicationRecordSummary 列表项。
+type ReplicationRecordSummary struct {
+ ID uint `json:"id"`
+ BackupRecordID uint `json:"backupRecordId"`
+ TaskID uint `json:"taskId"`
+ SourceTargetID uint `json:"sourceTargetId"`
+ SourceTargetName string `json:"sourceTargetName"`
+ DestTargetID uint `json:"destTargetId"`
+ DestTargetName string `json:"destTargetName"`
+ Status string `json:"status"`
+ StoragePath string `json:"storagePath"`
+ FileSize int64 `json:"fileSize"`
+ Checksum string `json:"checksum"`
+ ErrorMessage string `json:"errorMessage"`
+ DurationSeconds int `json:"durationSeconds"`
+ TriggeredBy string `json:"triggeredBy"`
+ StartedAt time.Time `json:"startedAt"`
+ CompletedAt *time.Time `json:"completedAt,omitempty"`
+}
+
+type ReplicationRecordListInput struct {
+ TaskID *uint
+ BackupRecordID *uint
+ DestTargetID *uint
+ Status string
+ DateFrom *time.Time
+ DateTo *time.Time
+ Limit int
+ Offset int
+}
+
+// TriggerAutoReplication 备份成功钩子:根据 task.ReplicationTargetIDs 自动派发复制。
+// best-effort:单个目标失败不影响其他。
+func (s *ReplicationService) TriggerAutoReplication(ctx context.Context, task *model.BackupTask, record *model.BackupRecord) {
+ if task == nil || record == nil {
+ return
+ }
+ destIDs := parseUintCSV(task.ReplicationTargetIDs)
+ if len(destIDs) == 0 {
+ return
+ }
+ // 跨节点 local_disk 场景保护:Master 无法访问远程节点本地文件
+ if err := s.validateClusterAccessible(ctx, record); err != nil {
+ return
+ }
+ for _, destID := range destIDs {
+ if destID == record.StorageTargetID {
+ continue // 源与目标相同,跳过
+ }
+ _, _ = s.Start(ctx, record.ID, destID, "system")
+ }
+}
+
+// Start 开始一次复制。同步创建 ReplicationRecord → 异步执行。
+func (s *ReplicationService) Start(ctx context.Context, backupRecordID, destTargetID uint, triggeredBy string) (*ReplicationRecordSummary, error) {
+ record, err := s.records.FindByID(ctx, backupRecordID)
+ if err != nil {
+ return nil, apperror.Internal("BACKUP_RECORD_GET_FAILED", "无法获取备份记录", err)
+ }
+ if record == nil {
+ return nil, apperror.New(404, "BACKUP_RECORD_NOT_FOUND", "备份记录不存在", nil)
+ }
+ if record.Status != model.BackupRecordStatusSuccess {
+ return nil, apperror.BadRequest("REPLICATION_SOURCE_INVALID", "只能复制成功的备份记录", nil)
+ }
+ if destTargetID == 0 || destTargetID == record.StorageTargetID {
+ return nil, apperror.BadRequest("REPLICATION_DEST_INVALID", "目标存储无效或与源相同", nil)
+ }
+ if err := s.validateClusterAccessible(ctx, record); err != nil {
+ return nil, err
+ }
+ dest, err := s.targets.FindByID(ctx, destTargetID)
+ if err != nil || dest == nil {
+ return nil, apperror.BadRequest("REPLICATION_DEST_INVALID", "目标存储不存在", err)
+ }
+ if !dest.Enabled {
+ return nil, apperror.BadRequest("REPLICATION_DEST_DISABLED", "目标存储已禁用", nil)
+ }
+ startedAt := s.now()
+ rep := &model.ReplicationRecord{
+ BackupRecordID: backupRecordID,
+ TaskID: record.TaskID,
+ SourceTargetID: record.StorageTargetID,
+ DestTargetID: destTargetID,
+ Status: model.ReplicationStatusRunning,
+ StoragePath: record.StoragePath,
+ TriggeredBy: strings.TrimSpace(triggeredBy),
+ StartedAt: startedAt,
+ }
+ if err := s.replications.Create(ctx, rep); err != nil {
+ return nil, apperror.Internal("REPLICATION_CREATE_FAILED", "无法创建复制记录", err)
+ }
+ s.async(func() {
+ s.executeReplication(context.Background(), rep.ID)
+ })
+ summary := s.toSummary(rep, "", dest.Name)
+ return &summary, nil
+}
+
+// executeReplication 实际执行:下载源对象到本地临时文件 → 上传到目标存储。
+func (s *ReplicationService) executeReplication(ctx context.Context, repID uint) {
+ s.semaphore <- struct{}{}
+ defer func() { <-s.semaphore }()
+
+ rep, err := s.replications.FindByID(ctx, repID)
+ if err != nil || rep == nil {
+ return
+ }
+ status := model.ReplicationStatusFailed
+ errMessage := ""
+ fileSize := int64(0)
+
+ defer func() {
+ completedAt := s.now()
+ rep.Status = status
+ rep.FileSize = fileSize
+ rep.ErrorMessage = strings.TrimSpace(errMessage)
+ rep.DurationSeconds = int(completedAt.Sub(rep.StartedAt).Seconds())
+ rep.CompletedAt = &completedAt
+ _ = s.replications.Update(ctx, rep)
+ if status == model.ReplicationStatusFailed {
+ s.dispatchFailed(ctx, rep, errMessage)
+ }
+ }()
+
+ sourceProvider, err := s.resolveProvider(ctx, rep.SourceTargetID)
+ if err != nil {
+ errMessage = err.Error()
+ return
+ }
+ destProvider, err := s.resolveProvider(ctx, rep.DestTargetID)
+ if err != nil {
+ errMessage = err.Error()
+ return
+ }
+ if err := os.MkdirAll(s.tempDir, 0o755); err != nil {
+ errMessage = err.Error()
+ return
+ }
+ tempDir, err := os.MkdirTemp(s.tempDir, "replicate-*")
+ if err != nil {
+ errMessage = err.Error()
+ return
+ }
+ defer os.RemoveAll(tempDir)
+
+ reader, err := sourceProvider.Download(ctx, rep.StoragePath)
+ if err != nil {
+ errMessage = fmt.Sprintf("下载源对象失败: %v", err)
+ return
+ }
+ localPath := filepath.Join(tempDir, filepath.Base(rep.StoragePath))
+ if err := writeReaderToFile(localPath, reader); err != nil {
+ errMessage = fmt.Sprintf("写入临时文件失败: %v", err)
+ return
+ }
+ info, err := os.Stat(localPath)
+ if err != nil {
+ errMessage = err.Error()
+ return
+ }
+ fileSize = info.Size()
+ file, err := os.Open(localPath)
+ if err != nil {
+ errMessage = err.Error()
+ return
+ }
+ defer file.Close()
+ meta := map[string]string{
+ "replicationId": strconv.FormatUint(uint64(rep.ID), 10),
+ "sourceRecord": strconv.FormatUint(uint64(rep.BackupRecordID), 10),
+ }
+ if err := destProvider.Upload(ctx, rep.StoragePath, file, fileSize, meta); err != nil {
+ errMessage = fmt.Sprintf("上传到目标存储失败: %v", err)
+ return
+ }
+ rep.Checksum = "" // 可选:调用方可按需复算 SHA-256
+ status = model.ReplicationStatusSuccess
+}
+
+func (s *ReplicationService) resolveProvider(ctx context.Context, targetID uint) (storage.StorageProvider, error) {
+ target, err := s.targets.FindByID(ctx, targetID)
+ if err != nil {
+ return nil, apperror.Internal("STORAGE_TARGET_GET_FAILED", "无法获取存储目标", err)
+ }
+ if target == nil {
+ return nil, apperror.BadRequest("STORAGE_TARGET_INVALID", "存储目标不存在", nil)
+ }
+ configMap := map[string]any{}
+ if err := s.cipher.DecryptJSON(target.ConfigCiphertext, &configMap); err != nil {
+ return nil, apperror.Internal("STORAGE_TARGET_DECRYPT_FAILED", "无法解密存储配置", err)
+ }
+ return s.storageRegistry.Create(ctx, target.Type, configMap)
+}
+
+// validateClusterAccessible 拒绝跨节点 local_disk 源(Master 无法拉取)
+func (s *ReplicationService) validateClusterAccessible(ctx context.Context, record *model.BackupRecord) error {
+ if record == nil || record.NodeID == 0 || s.nodeRepo == nil {
+ return nil
+ }
+ node, err := s.nodeRepo.FindByID(ctx, record.NodeID)
+ if err != nil || node == nil || node.IsLocal {
+ return nil
+ }
+ target, err := s.targets.FindByID(ctx, record.StorageTargetID)
+ if err != nil || target == nil {
+ return nil
+ }
+ if strings.EqualFold(target.Type, "local_disk") {
+ return apperror.BadRequest("REPLICATION_CROSS_NODE_LOCAL_DISK",
+ fmt.Sprintf("备份位于节点 %s 的本地磁盘(local_disk),Master 无法跨节点复制。请改用云存储作为主备份。", node.Name),
+ nil)
+ }
+ return nil
+}
+
+func (s *ReplicationService) dispatchFailed(ctx context.Context, rep *model.ReplicationRecord, message string) {
+ if s.eventDispatcher == nil || rep == nil {
+ return
+ }
+ title := "BackupX 备份复制失败"
+ body := fmt.Sprintf("备份记录:#%d\n源 → 目标:#%d → #%d\n错误:%s", rep.BackupRecordID, rep.SourceTargetID, rep.DestTargetID, message)
+ fields := map[string]any{
+ "replicationId": rep.ID,
+ "backupRecordId": rep.BackupRecordID,
+ "taskId": rep.TaskID,
+ "sourceTargetId": rep.SourceTargetID,
+ "destTargetId": rep.DestTargetID,
+ "error": message,
+ }
+ _ = s.eventDispatcher.DispatchEvent(ctx, model.NotificationEventReplicationFailed, title, body, fields)
+}
+
+// List / Get / toSummary
+func (s *ReplicationService) List(ctx context.Context, input ReplicationRecordListInput) ([]ReplicationRecordSummary, error) {
+ items, err := s.replications.List(ctx, repository.ReplicationRecordListOptions{
+ TaskID: input.TaskID, BackupRecordID: input.BackupRecordID, DestTargetID: input.DestTargetID,
+ Status: strings.TrimSpace(input.Status), DateFrom: input.DateFrom, DateTo: input.DateTo,
+ Limit: input.Limit, Offset: input.Offset,
+ })
+ if err != nil {
+ return nil, apperror.Internal("REPLICATION_LIST_FAILED", "无法获取复制记录", err)
+ }
+ result := make([]ReplicationRecordSummary, 0, len(items))
+ for i := range items {
+ item := items[i]
+ result = append(result, s.toSummary(&item, item.SourceTarget.Name, item.DestTarget.Name))
+ }
+ return result, nil
+}
+
+func (s *ReplicationService) Get(ctx context.Context, id uint) (*ReplicationRecordSummary, error) {
+ item, err := s.replications.FindByID(ctx, id)
+ if err != nil {
+ return nil, apperror.Internal("REPLICATION_GET_FAILED", "无法获取复制记录", err)
+ }
+ if item == nil {
+ return nil, apperror.New(404, "REPLICATION_NOT_FOUND", "复制记录不存在", nil)
+ }
+ summary := s.toSummary(item, item.SourceTarget.Name, item.DestTarget.Name)
+ return &summary, nil
+}
+
+func (s *ReplicationService) toSummary(rep *model.ReplicationRecord, sourceName, destName string) ReplicationRecordSummary {
+ return ReplicationRecordSummary{
+ ID: rep.ID, BackupRecordID: rep.BackupRecordID, TaskID: rep.TaskID,
+ SourceTargetID: rep.SourceTargetID, SourceTargetName: sourceName,
+ DestTargetID: rep.DestTargetID, DestTargetName: destName,
+ Status: rep.Status, StoragePath: rep.StoragePath, FileSize: rep.FileSize,
+ Checksum: rep.Checksum, ErrorMessage: rep.ErrorMessage, DurationSeconds: rep.DurationSeconds,
+ TriggeredBy: rep.TriggeredBy, StartedAt: rep.StartedAt, CompletedAt: rep.CompletedAt,
+ }
+}
+
+// parseUintCSV 解析逗号分隔的 uint 列表,跳过非法项。
+func parseUintCSV(value string) []uint {
+ if strings.TrimSpace(value) == "" {
+ return nil
+ }
+ parts := strings.Split(value, ",")
+ out := make([]uint, 0, len(parts))
+ seen := map[uint]bool{}
+ for _, p := range parts {
+ trimmed := strings.TrimSpace(p)
+ if trimmed == "" {
+ continue
+ }
+ parsed, err := strconv.ParseUint(trimmed, 10, 32)
+ if err != nil {
+ continue
+ }
+ id := uint(parsed)
+ if seen[id] {
+ continue
+ }
+ seen[id] = true
+ out = append(out, id)
+ }
+ return out
+}
diff --git a/server/internal/service/restore_service.go b/server/internal/service/restore_service.go
new file mode 100644
index 0000000..55135f2
--- /dev/null
+++ b/server/internal/service/restore_service.go
@@ -0,0 +1,715 @@
+package service
+
+import (
+ "context"
+ "encoding/json"
+ "fmt"
+ "os"
+ "path/filepath"
+ "strings"
+ "time"
+
+ "backupx/server/internal/apperror"
+ "backupx/server/internal/backup"
+ "backupx/server/internal/model"
+ "backupx/server/internal/repository"
+ "backupx/server/internal/storage"
+ "backupx/server/internal/storage/codec"
+ "backupx/server/pkg/compress"
+ backupcrypto "backupx/server/pkg/crypto"
+)
+
+// RestoreService 管理恢复记录生命周期并在集群中路由执行。
+//
+// 执行模型:
+// - task.NodeID == 0 或本机节点:Master 本地异步执行(runner.Restore),日志通过 LogHub 推到前端
+// - task.NodeID 指向远程节点:入队 AgentCommand("restore_record"),Agent 拉取 spec 后本地执行
+// 并通过 HTTP 回传日志/状态,Master 再广播到 LogHub
+type RestoreService struct {
+ restores repository.RestoreRecordRepository
+ records repository.BackupRecordRepository
+ tasks repository.BackupTaskRepository
+ targets repository.StorageTargetRepository
+ nodeRepo repository.NodeRepository
+ storageRegistry *storage.Registry
+ runnerRegistry *backup.Registry
+ logHub *backup.LogHub
+ cipher *codec.ConfigCipher
+ dispatcher AgentDispatcher
+ eventDispatcher EventDispatcher
+ tempDir string
+ semaphore chan struct{}
+ async func(func())
+ now func() time.Time
+}
+
+// NewRestoreService 构造恢复服务。maxConcurrent 控制本地并发恢复数。
+func NewRestoreService(
+ restores repository.RestoreRecordRepository,
+ records repository.BackupRecordRepository,
+ tasks repository.BackupTaskRepository,
+ targets repository.StorageTargetRepository,
+ nodeRepo repository.NodeRepository,
+ storageRegistry *storage.Registry,
+ runnerRegistry *backup.Registry,
+ logHub *backup.LogHub,
+ cipher *codec.ConfigCipher,
+ dispatcher AgentDispatcher,
+ tempDir string,
+ maxConcurrent int,
+) *RestoreService {
+ if tempDir == "" {
+ tempDir = "/tmp/backupx-restore"
+ }
+ if maxConcurrent <= 0 {
+ maxConcurrent = 2
+ }
+ return &RestoreService{
+ restores: restores,
+ records: records,
+ tasks: tasks,
+ targets: targets,
+ nodeRepo: nodeRepo,
+ storageRegistry: storageRegistry,
+ runnerRegistry: runnerRegistry,
+ logHub: logHub,
+ cipher: cipher,
+ dispatcher: dispatcher,
+ tempDir: tempDir,
+ semaphore: make(chan struct{}, maxConcurrent),
+ async: func(job func()) { go job() },
+ now: func() time.Time { return time.Now().UTC() },
+ }
+}
+
+// SetEventDispatcher 注入事件分发通道,用于恢复完成/失败的 Webhook 派发。
+func (s *RestoreService) SetEventDispatcher(dispatcher EventDispatcher) {
+ s.eventDispatcher = dispatcher
+}
+
+// RestoreRecordSummary 列表项。
+type RestoreRecordSummary struct {
+ ID uint `json:"id"`
+ BackupRecordID uint `json:"backupRecordId"`
+ TaskID uint `json:"taskId"`
+ TaskName string `json:"taskName"`
+ NodeID uint `json:"nodeId"`
+ NodeName string `json:"nodeName,omitempty"`
+ Status string `json:"status"`
+ ErrorMessage string `json:"errorMessage"`
+ DurationSeconds int `json:"durationSeconds"`
+ StartedAt time.Time `json:"startedAt"`
+ CompletedAt *time.Time `json:"completedAt,omitempty"`
+ TriggeredBy string `json:"triggeredBy"`
+ BackupFileName string `json:"backupFileName,omitempty"`
+}
+
+// RestoreRecordDetail 详情(含日志)。
+type RestoreRecordDetail struct {
+ RestoreRecordSummary
+ LogContent string `json:"logContent"`
+ LogEvents []backup.LogEvent `json:"logEvents,omitempty"`
+}
+
+// Start 触发一次恢复。返回新建 RestoreRecord 详情。
+// 若任务绑定远程节点:入队 AgentCommand 后立即返回(状态为 running)
+// 若本地:异步执行并立即返回。
+func (s *RestoreService) Start(ctx context.Context, backupRecordID uint, triggeredBy string) (*RestoreRecordDetail, error) {
+ record, err := s.records.FindByID(ctx, backupRecordID)
+ if err != nil {
+ return nil, apperror.Internal("BACKUP_RECORD_GET_FAILED", "无法获取备份记录", err)
+ }
+ if record == nil {
+ return nil, apperror.New(404, "BACKUP_RECORD_NOT_FOUND", "备份记录不存在", fmt.Errorf("backup record %d not found", backupRecordID))
+ }
+ if record.Status != model.BackupRecordStatusSuccess {
+ return nil, apperror.BadRequest("RESTORE_SOURCE_INVALID", "只能恢复状态为成功的备份记录", nil)
+ }
+ task, err := s.tasks.FindByID(ctx, record.TaskID)
+ if err != nil {
+ return nil, apperror.Internal("BACKUP_TASK_GET_FAILED", "无法获取关联备份任务", err)
+ }
+ if task == nil {
+ return nil, apperror.New(404, "BACKUP_TASK_NOT_FOUND", "关联的备份任务不存在", fmt.Errorf("backup task %d not found", record.TaskID))
+ }
+
+ startedAt := s.now()
+ restore := &model.RestoreRecord{
+ BackupRecordID: backupRecordID,
+ TaskID: record.TaskID,
+ NodeID: task.NodeID,
+ Status: model.RestoreRecordStatusRunning,
+ StartedAt: startedAt,
+ TriggeredBy: strings.TrimSpace(triggeredBy),
+ }
+ if err := s.restores.Create(ctx, restore); err != nil {
+ return nil, apperror.Internal("RESTORE_RECORD_CREATE_FAILED", "无法创建恢复记录", err)
+ }
+
+ // 远程节点路由
+ if remoteNode := s.resolveRemoteNode(ctx, task.NodeID); remoteNode != nil {
+ if s.dispatcher == nil {
+ return nil, apperror.Internal("RESTORE_DISPATCH_UNAVAILABLE", "Agent 下发通道未就绪", nil)
+ }
+ // 节点离线 → 立即标记 failed,避免记录永远卡在 running
+ if remoteNode.Status != model.NodeStatusOnline {
+ offlineMsg := fmt.Sprintf("节点 %s 当前离线,无法执行恢复", remoteNode.Name)
+ _ = s.finalize(ctx, restore.ID, model.RestoreRecordStatusFailed, offlineMsg)
+ s.logHub.Append(restore.ID, "error", offlineMsg)
+ s.logHub.Complete(restore.ID, model.RestoreRecordStatusFailed)
+ return nil, apperror.BadRequest("NODE_OFFLINE", offlineMsg, nil)
+ }
+ if _, dispatchErr := s.dispatcher.EnqueueCommand(ctx, task.NodeID, model.AgentCommandTypeRestoreRecord, map[string]any{
+ "restoreRecordId": restore.ID,
+ }); dispatchErr != nil {
+ _ = s.finalize(ctx, restore.ID, model.RestoreRecordStatusFailed,
+ "下发恢复任务到远程节点失败: "+dispatchErr.Error())
+ return nil, apperror.Internal("AGENT_COMMAND_ENQUEUE_FAILED", "无法下发恢复任务到远程节点", dispatchErr)
+ }
+ s.logHub.Append(restore.ID, "info", fmt.Sprintf("已下发恢复任务到节点 %s(#%d),等待 Agent 执行", remoteNode.Name, task.NodeID))
+ return s.getDetail(ctx, restore.ID)
+ }
+
+ // 本地节点:异步执行
+ run := func() {
+ s.executeLocally(context.Background(), restore.ID, task, record)
+ }
+ s.async(run)
+ return s.getDetail(ctx, restore.ID)
+}
+
+// isRemoteNode 判断 NodeID 是否指向有效的远程节点。
+func (s *RestoreService) isRemoteNode(ctx context.Context, nodeID uint) bool {
+ return s.resolveRemoteNode(ctx, nodeID) != nil
+}
+
+// resolveRemoteNode 返回远程节点指针(含 Status),用于离线判定。
+func (s *RestoreService) resolveRemoteNode(ctx context.Context, nodeID uint) *model.Node {
+ if s.nodeRepo == nil || s.dispatcher == nil || nodeID == 0 {
+ return nil
+ }
+ node, err := s.nodeRepo.FindByID(ctx, nodeID)
+ if err != nil || node == nil || node.IsLocal {
+ return nil
+ }
+ return node
+}
+
+// executeLocally 在 Master 本地执行恢复。
+func (s *RestoreService) executeLocally(ctx context.Context, restoreID uint, task *model.BackupTask, backupRecord *model.BackupRecord) {
+ s.semaphore <- struct{}{}
+ defer func() { <-s.semaphore }()
+
+ logger := backup.NewExecutionLogger(restoreID, s.logHub)
+ status := model.RestoreRecordStatusFailed
+ errMessage := ""
+
+ defer func() {
+ finalizeErr := s.finalizeWithLog(ctx, restoreID, status, errMessage, logger.String())
+ if finalizeErr != nil {
+ logger.Errorf("写回恢复记录失败:%v", finalizeErr)
+ }
+ s.logHub.Complete(restoreID, status)
+ s.dispatchRestoreEvent(ctx, restoreID, status, errMessage, task)
+ }()
+
+ logger.Infof("开始在本地执行恢复(备份记录 #%d)", backupRecord.ID)
+ provider, providerErr := s.resolveProvider(ctx, backupRecord.StorageTargetID)
+ if providerErr != nil {
+ errMessage = providerErr.Error()
+ logger.Errorf("创建存储客户端失败:%v", providerErr)
+ return
+ }
+
+ if err := os.MkdirAll(s.tempDir, 0o755); err != nil {
+ errMessage = err.Error()
+ logger.Errorf("创建恢复临时父目录失败:%v", err)
+ return
+ }
+ tempDir, tempErr := os.MkdirTemp(s.tempDir, "restore-*")
+ if tempErr != nil {
+ errMessage = tempErr.Error()
+ logger.Errorf("创建恢复临时目录失败:%v", tempErr)
+ return
+ }
+ defer os.RemoveAll(tempDir)
+
+ fileName := backupRecord.FileName
+ if strings.TrimSpace(fileName) == "" {
+ fileName = filepath.Base(backupRecord.StoragePath)
+ }
+ artifactPath := filepath.Join(tempDir, filepath.Base(fileName))
+ logger.Infof("开始下载备份文件:%s", backupRecord.StoragePath)
+ reader, downloadErr := provider.Download(ctx, backupRecord.StoragePath)
+ if downloadErr != nil {
+ errMessage = downloadErr.Error()
+ logger.Errorf("下载备份文件失败:%v", downloadErr)
+ return
+ }
+ if writeErr := writeReaderToFile(artifactPath, reader); writeErr != nil {
+ errMessage = writeErr.Error()
+ logger.Errorf("写入恢复文件失败:%v", writeErr)
+ return
+ }
+ preparedPath, prepareErr := s.prepareArtifact(artifactPath, logger)
+ if prepareErr != nil {
+ errMessage = prepareErr.Error()
+ logger.Errorf("准备恢复文件失败:%v", prepareErr)
+ return
+ }
+
+ spec, specErr := s.buildTaskSpec(task, backupRecord.StartedAt)
+ if specErr != nil {
+ errMessage = specErr.Error()
+ logger.Errorf("构建恢复规格失败:%v", specErr)
+ return
+ }
+ runner, runnerErr := s.runnerRegistry.Runner(spec.Type)
+ if runnerErr != nil {
+ errMessage = runnerErr.Error()
+ logger.Errorf("不支持的备份类型:%v", runnerErr)
+ return
+ }
+ logger.Infof("开始执行 %s 恢复", spec.Type)
+ if restoreErr := runner.Restore(ctx, spec, preparedPath, logger); restoreErr != nil {
+ errMessage = restoreErr.Error()
+ logger.Errorf("恢复执行失败:%v", restoreErr)
+ return
+ }
+ status = model.RestoreRecordStatusSuccess
+ logger.Infof("恢复执行成功")
+}
+
+// dispatchRestoreEvent 按终态向事件总线派发 restore_success 或 restore_failed。
+// eventDispatcher 未注入时静默忽略,保持向后兼容。
+func (s *RestoreService) dispatchRestoreEvent(ctx context.Context, restoreID uint, status, errMessage string, task *model.BackupTask) {
+ if s.eventDispatcher == nil {
+ return
+ }
+ var eventType, title string
+ switch status {
+ case model.RestoreRecordStatusSuccess:
+ eventType = model.NotificationEventRestoreSuccess
+ title = "BackupX 恢复成功"
+ case model.RestoreRecordStatusFailed:
+ eventType = model.NotificationEventRestoreFailed
+ title = "BackupX 恢复失败"
+ default:
+ return
+ }
+ taskName := "未知任务"
+ if task != nil {
+ taskName = task.Name
+ }
+ body := fmt.Sprintf("任务:%s\n恢复记录:#%d\n状态:%s", taskName, restoreID, status)
+ if errMessage != "" {
+ body += "\n错误:" + errMessage
+ }
+ fields := map[string]any{
+ "restoreId": restoreID,
+ "taskName": taskName,
+ "status": status,
+ "error": errMessage,
+ }
+ if task != nil {
+ fields["taskId"] = task.ID
+ }
+ _ = s.eventDispatcher.DispatchEvent(ctx, eventType, title, body, fields)
+}
+
+// resolveProvider 复用 BackupExecutionService 的逻辑(解密 → 创建 provider)。
+func (s *RestoreService) resolveProvider(ctx context.Context, targetID uint) (storage.StorageProvider, error) {
+ target, err := s.targets.FindByID(ctx, targetID)
+ if err != nil {
+ return nil, apperror.Internal("BACKUP_STORAGE_TARGET_GET_FAILED", "无法获取存储目标详情", err)
+ }
+ if target == nil {
+ return nil, apperror.BadRequest("BACKUP_STORAGE_TARGET_INVALID", "关联的存储目标不存在", nil)
+ }
+ configMap := map[string]any{}
+ if err := s.cipher.DecryptJSON(target.ConfigCiphertext, &configMap); err != nil {
+ return nil, apperror.Internal("BACKUP_STORAGE_TARGET_DECRYPT_FAILED", "无法解密存储目标配置", err)
+ }
+ return s.storageRegistry.Create(ctx, target.Type, configMap)
+}
+
+// prepareArtifact 根据文件后缀依次解密、解压。
+func (s *RestoreService) prepareArtifact(artifactPath string, logger *backup.ExecutionLogger) (string, error) {
+ currentPath := artifactPath
+ if strings.HasSuffix(strings.ToLower(currentPath), ".enc") {
+ logger.Infof("检测到加密后缀,开始解密")
+ decrypted, err := backupcrypto.DecryptFile(s.cipher.Key(), currentPath)
+ if err != nil {
+ return "", err
+ }
+ currentPath = decrypted
+ }
+ if strings.HasSuffix(strings.ToLower(currentPath), ".gz") {
+ logger.Infof("检测到 gzip 压缩,开始解压")
+ decompressed, err := compress.GunzipFile(currentPath)
+ if err != nil {
+ return "", err
+ }
+ currentPath = decompressed
+ }
+ return currentPath, nil
+}
+
+// buildTaskSpec 复刻 BackupExecutionService.buildTaskSpec 的核心逻辑。
+func (s *RestoreService) buildTaskSpec(task *model.BackupTask, startedAt time.Time) (backup.TaskSpec, error) {
+ excludePatterns := []string{}
+ if strings.TrimSpace(task.ExcludePatterns) != "" {
+ if err := json.Unmarshal([]byte(task.ExcludePatterns), &excludePatterns); err != nil {
+ return backup.TaskSpec{}, apperror.Internal("BACKUP_TASK_DECODE_FAILED", "无法解析排除规则", err)
+ }
+ }
+ password := ""
+ if strings.TrimSpace(task.DBPasswordCiphertext) != "" {
+ plain, err := s.cipher.Decrypt(task.DBPasswordCiphertext)
+ if err != nil {
+ return backup.TaskSpec{}, apperror.Internal("BACKUP_TASK_DECRYPT_FAILED", "无法解密数据库密码", err)
+ }
+ password = string(plain)
+ }
+ sourcePaths := []string{}
+ if strings.TrimSpace(task.SourcePaths) != "" {
+ if err := json.Unmarshal([]byte(task.SourcePaths), &sourcePaths); err != nil {
+ return backup.TaskSpec{}, apperror.Internal("BACKUP_TASK_DECODE_FAILED", "无法解析源路径配置", err)
+ }
+ }
+ dbSpec := backup.DatabaseSpec{
+ Host: task.DBHost,
+ Port: task.DBPort,
+ User: task.DBUser,
+ Password: password,
+ Names: []string{task.DBName},
+ Path: task.DBPath,
+ }
+ if strings.TrimSpace(task.ExtraConfig) != "" {
+ extra := map[string]any{}
+ if err := json.Unmarshal([]byte(task.ExtraConfig), &extra); err != nil {
+ return backup.TaskSpec{}, apperror.Internal("BACKUP_TASK_DECODE_FAILED", "无法解析扩展配置", err)
+ }
+ applyHANAExtraConfig(&dbSpec, extra)
+ }
+ return backup.TaskSpec{
+ ID: task.ID,
+ Name: task.Name,
+ Type: task.Type,
+ SourcePath: task.SourcePath,
+ SourcePaths: sourcePaths,
+ ExcludePatterns: excludePatterns,
+ StorageTargetID: task.StorageTargetID,
+ Compression: task.Compression,
+ Encrypt: task.Encrypt,
+ RetentionDays: task.RetentionDays,
+ MaxBackups: task.MaxBackups,
+ StartedAt: startedAt,
+ TempDir: s.tempDir,
+ Database: dbSpec,
+ }, nil
+}
+
+// finalize 只更新状态和错误信息,不写 log(用于失败的 dispatch 路径)。
+func (s *RestoreService) finalize(ctx context.Context, restoreID uint, status, errMessage string) error {
+ return s.finalizeWithLog(ctx, restoreID, status, errMessage, "")
+}
+
+// finalizeWithLog 把恢复记录写成终态。
+func (s *RestoreService) finalizeWithLog(ctx context.Context, restoreID uint, status, errMessage, logContent string) error {
+ record, err := s.restores.FindByID(ctx, restoreID)
+ if err != nil {
+ return err
+ }
+ if record == nil {
+ return fmt.Errorf("restore record %d not found", restoreID)
+ }
+ completedAt := s.now()
+ record.Status = status
+ record.ErrorMessage = strings.TrimSpace(errMessage)
+ if strings.TrimSpace(logContent) != "" {
+ record.LogContent = strings.TrimSpace(logContent)
+ }
+ record.DurationSeconds = int(completedAt.Sub(record.StartedAt).Seconds())
+ record.CompletedAt = &completedAt
+ return s.restores.Update(ctx, record)
+}
+
+// Get 查恢复记录详情。
+func (s *RestoreService) Get(ctx context.Context, restoreID uint) (*RestoreRecordDetail, error) {
+ return s.getDetail(ctx, restoreID)
+}
+
+// List 列表。
+func (s *RestoreService) List(ctx context.Context, input RestoreRecordListInput) ([]RestoreRecordSummary, error) {
+ items, err := s.restores.List(ctx, repository.RestoreRecordListOptions{
+ TaskID: input.TaskID,
+ BackupRecordID: input.BackupRecordID,
+ NodeID: input.NodeID,
+ Status: strings.TrimSpace(input.Status),
+ DateFrom: input.DateFrom,
+ DateTo: input.DateTo,
+ Limit: input.Limit,
+ Offset: input.Offset,
+ })
+ if err != nil {
+ return nil, apperror.Internal("RESTORE_RECORD_LIST_FAILED", "无法获取恢复记录列表", err)
+ }
+ result := make([]RestoreRecordSummary, 0, len(items))
+ nodeNames := map[uint]string{}
+ for _, item := range items {
+ nodeName := ""
+ if item.NodeID > 0 && s.nodeRepo != nil {
+ if cached, ok := nodeNames[item.NodeID]; ok {
+ nodeName = cached
+ } else if node, err := s.nodeRepo.FindByID(ctx, item.NodeID); err == nil && node != nil {
+ nodeName = node.Name
+ nodeNames[item.NodeID] = node.Name
+ }
+ }
+ result = append(result, toRestoreRecordSummary(&item, nodeName))
+ }
+ return result, nil
+}
+
+// SubscribeLogs 订阅指定恢复记录的实时日志。
+func (s *RestoreService) SubscribeLogs(ctx context.Context, restoreID uint, buffer int) (<-chan backup.LogEvent, func(), error) {
+ record, err := s.restores.FindByID(ctx, restoreID)
+ if err != nil {
+ return nil, nil, apperror.Internal("RESTORE_RECORD_GET_FAILED", "无法获取恢复记录详情", err)
+ }
+ if record == nil {
+ return nil, nil, apperror.New(404, "RESTORE_RECORD_NOT_FOUND", "恢复记录不存在", nil)
+ }
+ channel, cancel := s.logHub.Subscribe(restoreID, buffer)
+ return channel, cancel, nil
+}
+
+// RestoreRecordListInput 列表查询参数。
+type RestoreRecordListInput struct {
+ TaskID *uint
+ BackupRecordID *uint
+ NodeID *uint
+ Status string
+ DateFrom *time.Time
+ DateTo *time.Time
+ Limit int
+ Offset int
+}
+
+// --- Agent 侧调用接口 ---
+
+// AgentRestoreSpec 下发给 Agent 执行恢复的完整规格。
+type AgentRestoreSpec struct {
+ RestoreRecordID uint `json:"restoreRecordId"`
+ BackupRecordID uint `json:"backupRecordId"`
+ TaskID uint `json:"taskId"`
+ TaskName string `json:"taskName"`
+ Type string `json:"type"`
+ SourcePath string `json:"sourcePath,omitempty"`
+ SourcePaths []string `json:"sourcePaths,omitempty"`
+ DBHost string `json:"dbHost,omitempty"`
+ DBPort int `json:"dbPort,omitempty"`
+ DBUser string `json:"dbUser,omitempty"`
+ DBPassword string `json:"dbPassword,omitempty"`
+ DBName string `json:"dbName,omitempty"`
+ DBPath string `json:"dbPath,omitempty"`
+ ExtraConfig string `json:"extraConfig,omitempty"`
+ Compression string `json:"compression"`
+ Encrypt bool `json:"encrypt"`
+ Storage AgentStorageTargetConfig `json:"storage"`
+ StoragePath string `json:"storagePath"`
+ FileName string `json:"fileName"`
+}
+
+// AgentRestoreUpdate Agent 回传的增量更新。
+type AgentRestoreUpdate struct {
+ Status string `json:"status,omitempty"`
+ ErrorMessage string `json:"errorMessage,omitempty"`
+ LogAppend string `json:"logAppend,omitempty"`
+}
+
+// GetAgentRestoreSpec 供 Agent 拉取恢复规格。需校验恢复记录属于当前节点。
+func (s *RestoreService) GetAgentRestoreSpec(ctx context.Context, node *model.Node, restoreID uint) (*AgentRestoreSpec, error) {
+ restore, err := s.restores.FindByID(ctx, restoreID)
+ if err != nil {
+ return nil, err
+ }
+ if restore == nil {
+ return nil, apperror.New(404, "RESTORE_RECORD_NOT_FOUND", "恢复记录不存在", nil)
+ }
+ if restore.NodeID != node.ID {
+ return nil, apperror.Unauthorized("RESTORE_RECORD_FORBIDDEN", "恢复记录不属于当前节点", nil)
+ }
+ backupRecord, err := s.records.FindByID(ctx, restore.BackupRecordID)
+ if err != nil {
+ return nil, err
+ }
+ if backupRecord == nil {
+ return nil, apperror.New(404, "BACKUP_RECORD_NOT_FOUND", "源备份记录不存在", nil)
+ }
+ task, err := s.tasks.FindByID(ctx, restore.TaskID)
+ if err != nil {
+ return nil, err
+ }
+ if task == nil {
+ return nil, apperror.New(404, "BACKUP_TASK_NOT_FOUND", "备份任务不存在", nil)
+ }
+ // 解密数据库密码
+ dbPassword := ""
+ if strings.TrimSpace(task.DBPasswordCiphertext) != "" {
+ plain, decErr := s.cipher.Decrypt(task.DBPasswordCiphertext)
+ if decErr != nil {
+ return nil, fmt.Errorf("decrypt db password: %w", decErr)
+ }
+ dbPassword = string(plain)
+ }
+ // 解密备份时使用的存储目标
+ target, err := s.targets.FindByID(ctx, backupRecord.StorageTargetID)
+ if err != nil {
+ return nil, err
+ }
+ if target == nil {
+ return nil, apperror.BadRequest("BACKUP_STORAGE_TARGET_INVALID", "存储目标不存在", nil)
+ }
+ configRaw, err := s.cipher.Decrypt(target.ConfigCiphertext)
+ if err != nil {
+ return nil, fmt.Errorf("decrypt storage config: %w", err)
+ }
+ // 拆开 sourcePaths
+ sourcePaths := []string{}
+ if strings.TrimSpace(task.SourcePaths) != "" {
+ _ = json.Unmarshal([]byte(task.SourcePaths), &sourcePaths)
+ }
+ return &AgentRestoreSpec{
+ RestoreRecordID: restore.ID,
+ BackupRecordID: backupRecord.ID,
+ TaskID: task.ID,
+ TaskName: task.Name,
+ Type: task.Type,
+ SourcePath: task.SourcePath,
+ SourcePaths: sourcePaths,
+ DBHost: task.DBHost,
+ DBPort: task.DBPort,
+ DBUser: task.DBUser,
+ DBPassword: dbPassword,
+ DBName: task.DBName,
+ DBPath: task.DBPath,
+ ExtraConfig: task.ExtraConfig,
+ Compression: task.Compression,
+ Encrypt: task.Encrypt,
+ Storage: AgentStorageTargetConfig{
+ ID: target.ID,
+ Type: target.Type,
+ Name: target.Name,
+ Config: json.RawMessage(configRaw),
+ },
+ StoragePath: backupRecord.StoragePath,
+ FileName: backupRecord.FileName,
+ }, nil
+}
+
+// UpdateAgentRestore Agent 回传状态/日志。
+func (s *RestoreService) UpdateAgentRestore(ctx context.Context, node *model.Node, restoreID uint, update AgentRestoreUpdate) error {
+ restore, err := s.restores.FindByID(ctx, restoreID)
+ if err != nil {
+ return err
+ }
+ if restore == nil {
+ return apperror.New(404, "RESTORE_RECORD_NOT_FOUND", "恢复记录不存在", nil)
+ }
+ if restore.NodeID != node.ID {
+ return apperror.Unauthorized("RESTORE_RECORD_FORBIDDEN", "恢复记录不属于当前节点", nil)
+ }
+ // 追加日志到 LogHub + DB
+ if strings.TrimSpace(update.LogAppend) != "" {
+ for _, line := range strings.Split(update.LogAppend, "\n") {
+ trimmed := strings.TrimRight(line, "\r")
+ if strings.TrimSpace(trimmed) == "" {
+ continue
+ }
+ s.logHub.Append(restoreID, "info", trimmed)
+ }
+ if strings.TrimSpace(restore.LogContent) == "" {
+ restore.LogContent = update.LogAppend
+ } else {
+ if !strings.HasSuffix(restore.LogContent, "\n") {
+ restore.LogContent += "\n"
+ }
+ restore.LogContent += update.LogAppend
+ }
+ }
+ if update.Status != "" {
+ restore.Status = update.Status
+ if update.Status == model.RestoreRecordStatusSuccess || update.Status == model.RestoreRecordStatusFailed {
+ completedAt := s.now()
+ restore.CompletedAt = &completedAt
+ restore.DurationSeconds = int(completedAt.Sub(restore.StartedAt).Seconds())
+ if strings.TrimSpace(update.ErrorMessage) != "" {
+ restore.ErrorMessage = strings.TrimSpace(update.ErrorMessage)
+ }
+ }
+ }
+ if err := s.restores.Update(ctx, restore); err != nil {
+ return err
+ }
+ if update.Status == model.RestoreRecordStatusSuccess || update.Status == model.RestoreRecordStatusFailed {
+ s.logHub.Complete(restoreID, update.Status)
+ }
+ return nil
+}
+
+// --- 内部辅助 ---
+
+func (s *RestoreService) getDetail(ctx context.Context, restoreID uint) (*RestoreRecordDetail, error) {
+ record, err := s.restores.FindByID(ctx, restoreID)
+ if err != nil {
+ return nil, apperror.Internal("RESTORE_RECORD_GET_FAILED", "无法获取恢复记录详情", err)
+ }
+ if record == nil {
+ return nil, apperror.New(404, "RESTORE_RECORD_NOT_FOUND", "恢复记录不存在", nil)
+ }
+ nodeName := ""
+ if record.NodeID > 0 && s.nodeRepo != nil {
+ if node, err := s.nodeRepo.FindByID(ctx, record.NodeID); err == nil && node != nil {
+ nodeName = node.Name
+ }
+ }
+ detail := &RestoreRecordDetail{
+ RestoreRecordSummary: toRestoreRecordSummary(record, nodeName),
+ LogContent: record.LogContent,
+ }
+ if record.Status == model.RestoreRecordStatusRunning && s.logHub != nil {
+ events := s.logHub.Snapshot(record.ID)
+ detail.LogEvents = events
+ if len(events) > 0 {
+ lines := make([]string, 0, len(events))
+ for _, event := range events {
+ lines = append(lines, event.Message)
+ }
+ detail.LogContent = strings.Join(lines, "\n")
+ }
+ }
+ return detail, nil
+}
+
+func toRestoreRecordSummary(item *model.RestoreRecord, nodeName string) RestoreRecordSummary {
+ summary := RestoreRecordSummary{
+ ID: item.ID,
+ BackupRecordID: item.BackupRecordID,
+ TaskID: item.TaskID,
+ TaskName: item.Task.Name,
+ NodeID: item.NodeID,
+ NodeName: nodeName,
+ Status: item.Status,
+ ErrorMessage: item.ErrorMessage,
+ DurationSeconds: item.DurationSeconds,
+ StartedAt: item.StartedAt,
+ CompletedAt: item.CompletedAt,
+ TriggeredBy: item.TriggeredBy,
+ }
+ if strings.TrimSpace(item.BackupRecord.FileName) != "" {
+ summary.BackupFileName = item.BackupRecord.FileName
+ }
+ return summary
+}
diff --git a/server/internal/service/restore_service_test.go b/server/internal/service/restore_service_test.go
new file mode 100644
index 0000000..4184377
--- /dev/null
+++ b/server/internal/service/restore_service_test.go
@@ -0,0 +1,252 @@
+package service
+
+import (
+ "context"
+ "encoding/json"
+ "os"
+ "path/filepath"
+ "sync"
+ "testing"
+ "time"
+
+ "backupx/server/internal/backup"
+ "backupx/server/internal/config"
+ "backupx/server/internal/database"
+ "backupx/server/internal/logger"
+ "backupx/server/internal/model"
+ "backupx/server/internal/repository"
+ "backupx/server/internal/storage"
+ "backupx/server/internal/storage/codec"
+ storageRclone "backupx/server/internal/storage/rclone"
+)
+
+// fakeDispatcher 捕获入队调用,用于验证远程路由。
+type fakeDispatcher struct {
+ mu sync.Mutex
+ calls []dispatcherCall
+}
+
+type dispatcherCall struct {
+ NodeID uint
+ CmdType string
+ Payload map[string]any
+}
+
+func (f *fakeDispatcher) EnqueueCommand(_ context.Context, nodeID uint, cmdType string, payload any) (uint, error) {
+ f.mu.Lock()
+ defer f.mu.Unlock()
+ raw, _ := json.Marshal(payload)
+ m := map[string]any{}
+ _ = json.Unmarshal(raw, &m)
+ f.calls = append(f.calls, dispatcherCall{NodeID: nodeID, CmdType: cmdType, Payload: m})
+ return uint(len(f.calls)), nil
+}
+
+func (f *fakeDispatcher) snapshot() []dispatcherCall {
+ f.mu.Lock()
+ defer f.mu.Unlock()
+ out := make([]dispatcherCall, len(f.calls))
+ copy(out, f.calls)
+ return out
+}
+
+type restoreTestHarness struct {
+ service *RestoreService
+ execution *BackupExecutionService
+ records repository.BackupRecordRepository
+ restores repository.RestoreRecordRepository
+ tasks repository.BackupTaskRepository
+ nodes repository.NodeRepository
+ dispatcher *fakeDispatcher
+ sourceDir string
+ storageDir string
+}
+
+func newRestoreTestHarness(t *testing.T, remoteNode bool) *restoreTestHarness {
+ t.Helper()
+ baseDir := t.TempDir()
+ sourceDir := filepath.Join(baseDir, "source")
+ storageDir := filepath.Join(baseDir, "storage")
+ if err := os.MkdirAll(sourceDir, 0o755); err != nil {
+ t.Fatalf("mkdir source: %v", err)
+ }
+ if err := os.WriteFile(filepath.Join(sourceDir, "index.html"), []byte("hello-restore"), 0o644); err != nil {
+ t.Fatalf("write source file: %v", err)
+ }
+ log, err := logger.New(config.LogConfig{Level: "error"})
+ if err != nil {
+ t.Fatalf("logger.New: %v", err)
+ }
+ db, err := database.Open(config.DatabaseConfig{Path: filepath.Join(baseDir, "backupx.db")}, log)
+ if err != nil {
+ t.Fatalf("database.Open: %v", err)
+ }
+ cipher := codec.NewConfigCipher("restore-secret")
+ targets := repository.NewStorageTargetRepository(db)
+ tasks := repository.NewBackupTaskRepository(db)
+ records := repository.NewBackupRecordRepository(db)
+ restores := repository.NewRestoreRecordRepository(db)
+ nodes := repository.NewNodeRepository(db)
+ targetCipher, err := cipher.EncryptJSON(map[string]any{"basePath": storageDir})
+ if err != nil {
+ t.Fatalf("EncryptJSON: %v", err)
+ }
+ if err := targets.Create(context.Background(), &model.StorageTarget{Name: "local", Type: string(storage.ProviderTypeLocalDisk), Enabled: true, ConfigCiphertext: targetCipher, ConfigVersion: 1, LastTestStatus: "unknown"}); err != nil {
+ t.Fatalf("create target: %v", err)
+ }
+
+ // 构造本机节点(始终存在)+ 可选远程节点
+ localNode := &model.Node{Name: "local", Token: "local-token", Status: model.NodeStatusOnline, IsLocal: true, LastSeen: time.Now().UTC()}
+ if err := db.Create(localNode).Error; err != nil {
+ t.Fatalf("seed local node: %v", err)
+ }
+ taskNodeID := uint(0)
+ if remoteNode {
+ remote := &model.Node{Name: "edge-1", Token: "remote-token", Status: model.NodeStatusOnline, IsLocal: false, LastSeen: time.Now().UTC()}
+ if err := db.Create(remote).Error; err != nil {
+ t.Fatalf("seed remote node: %v", err)
+ }
+ taskNodeID = remote.ID
+ }
+
+ task := &model.BackupTask{Name: "restore-test", Type: "file", Enabled: true, SourcePath: sourceDir, StorageTargetID: 1, NodeID: taskNodeID, RetentionDays: 30, Compression: "gzip", MaxBackups: 10, LastStatus: "idle"}
+ if err := tasks.Create(context.Background(), task); err != nil {
+ t.Fatalf("create task: %v", err)
+ }
+
+ logHub := backup.NewLogHub()
+ runnerRegistry := backup.NewRegistry(backup.NewFileRunner(), backup.NewMySQLRunner(nil), backup.NewSQLiteRunner(), backup.NewPostgreSQLRunner(nil))
+ storageRegistry := storage.NewRegistry(storageRclone.NewLocalDiskFactory())
+
+ execution := NewBackupExecutionService(tasks, records, targets, storageRegistry, runnerRegistry, logHub, nil, cipher, nil, baseDir, 2, 10, "")
+ dispatcher := &fakeDispatcher{}
+ restoreLogHub := backup.NewLogHub()
+ restoreService := NewRestoreService(restores, records, tasks, targets, nodes, storageRegistry, runnerRegistry, restoreLogHub, cipher, dispatcher, baseDir, 2)
+
+ return &restoreTestHarness{
+ service: restoreService,
+ execution: execution,
+ records: records,
+ restores: restores,
+ tasks: tasks,
+ nodes: nodes,
+ dispatcher: dispatcher,
+ sourceDir: sourceDir,
+ storageDir: storageDir,
+ }
+}
+
+func TestRestoreServiceStart_LocalNodeExecutesInline(t *testing.T) {
+ h := newRestoreTestHarness(t, false)
+ ctx := context.Background()
+
+ // 先跑一次备份产出源备份记录
+ backupDetail, err := h.execution.RunTaskByIDSync(ctx, 1)
+ if err != nil {
+ t.Fatalf("RunTaskByIDSync: %v", err)
+ }
+ if backupDetail.Status != "success" {
+ t.Fatalf("expected backup success, got %s", backupDetail.Status)
+ }
+
+ // 清空源目录,期望 restore 把它还原
+ if err := os.RemoveAll(h.sourceDir); err != nil {
+ t.Fatalf("remove source: %v", err)
+ }
+
+ // 用同步 async 让测试可等待
+ done := make(chan struct{})
+ h.service.async = func(job func()) {
+ go func() {
+ job()
+ close(done)
+ }()
+ }
+ detail, err := h.service.Start(ctx, backupDetail.ID, "tester")
+ if err != nil {
+ t.Fatalf("Start: %v", err)
+ }
+ if detail.Status != model.RestoreRecordStatusRunning {
+ t.Fatalf("expected initial status running, got %s", detail.Status)
+ }
+ select {
+ case <-done:
+ case <-time.After(15 * time.Second):
+ t.Fatalf("restore did not complete in time")
+ }
+
+ final, err := h.service.Get(ctx, detail.ID)
+ if err != nil {
+ t.Fatalf("Get final: %v", err)
+ }
+ if final.Status != model.RestoreRecordStatusSuccess {
+ t.Fatalf("expected success, got %s (err=%s)", final.Status, final.ErrorMessage)
+ }
+ if final.TriggeredBy != "tester" {
+ t.Fatalf("expected triggeredBy=tester, got %q", final.TriggeredBy)
+ }
+ content, err := os.ReadFile(filepath.Join(h.sourceDir, "index.html"))
+ if err != nil {
+ t.Fatalf("read restored file: %v", err)
+ }
+ if string(content) != "hello-restore" {
+ t.Fatalf("unexpected restored content: %s", string(content))
+ }
+ if len(h.dispatcher.snapshot()) != 0 {
+ t.Fatalf("expected no dispatcher calls for local node, got %d", len(h.dispatcher.snapshot()))
+ }
+}
+
+func TestRestoreServiceStart_RemoteNodeEnqueuesCommand(t *testing.T) {
+ h := newRestoreTestHarness(t, true)
+ ctx := context.Background()
+
+ // 先在本地执行一次备份(备份路由对 RestoreService 无影响,仅用来生成源记录)
+ // 备份执行服务的 isRemoteNode 同样走 nodeRepo,但因为 execution.SetClusterDependencies 未注入,
+ // 会被判定为本地执行 → 测试保持纯粹。
+ backupDetail, err := h.execution.RunTaskByIDSync(ctx, 1)
+ if err != nil {
+ t.Fatalf("RunTaskByIDSync: %v", err)
+ }
+
+ detail, err := h.service.Start(ctx, backupDetail.ID, "tester-remote")
+ if err != nil {
+ t.Fatalf("Start: %v", err)
+ }
+ if detail.Status != model.RestoreRecordStatusRunning {
+ t.Fatalf("expected running, got %s", detail.Status)
+ }
+ calls := h.dispatcher.snapshot()
+ if len(calls) != 1 {
+ t.Fatalf("expected exactly 1 dispatcher call, got %d", len(calls))
+ }
+ if calls[0].CmdType != model.AgentCommandTypeRestoreRecord {
+ t.Fatalf("expected cmdType %s, got %s", model.AgentCommandTypeRestoreRecord, calls[0].CmdType)
+ }
+ if rid, ok := calls[0].Payload["restoreRecordId"].(float64); !ok || uint(rid) != detail.ID {
+ t.Fatalf("expected restoreRecordId=%d in payload, got %#v", detail.ID, calls[0].Payload)
+ }
+}
+
+func TestRestoreServiceStart_FailsOnNonSuccessBackup(t *testing.T) {
+ h := newRestoreTestHarness(t, false)
+ ctx := context.Background()
+
+ // 手动构造一条 failed 状态的备份记录
+ startedAt := time.Now().UTC()
+ failed := &model.BackupRecord{
+ TaskID: 1,
+ StorageTargetID: 1,
+ Status: model.BackupRecordStatusFailed,
+ FileName: "never.tar.gz",
+ StoragePath: "tasks/1/never.tar.gz",
+ StartedAt: startedAt,
+ }
+ if err := h.records.Create(ctx, failed); err != nil {
+ t.Fatalf("create failed record: %v", err)
+ }
+
+ if _, err := h.service.Start(ctx, failed.ID, "tester"); err == nil {
+ t.Fatalf("expected error when restoring from failed backup, got nil")
+ }
+}
diff --git a/server/internal/service/search_service.go b/server/internal/service/search_service.go
new file mode 100644
index 0000000..d5e3893
--- /dev/null
+++ b/server/internal/service/search_service.go
@@ -0,0 +1,195 @@
+package service
+
+import (
+ "context"
+ "strings"
+
+ "backupx/server/internal/repository"
+)
+
+// SearchService 跨任务/存储目标/最近备份记录的全局搜索。
+// 设计权衡:
+// - 只搜最近 100 条备份记录,避免全表扫描
+// - 所有 Name / Description / Tags / 文件名字段都做 Contains 匹配
+// - 返回结果按类型分组,前端分栏展示
+type SearchService struct {
+ tasks repository.BackupTaskRepository
+ records repository.BackupRecordRepository
+ targets repository.StorageTargetRepository
+ nodes repository.NodeRepository
+}
+
+func NewSearchService(
+ tasks repository.BackupTaskRepository,
+ records repository.BackupRecordRepository,
+ targets repository.StorageTargetRepository,
+ nodes repository.NodeRepository,
+) *SearchService {
+ return &SearchService{tasks: tasks, records: records, targets: targets, nodes: nodes}
+}
+
+// SearchResultItem 统一结果项。
+// URL 前端据此生成跳转链接,Highlight 显示匹配字段。
+type SearchResultItem struct {
+ Kind string `json:"kind"` // task | record | storage | node
+ ID uint `json:"id"`
+ Title string `json:"title"`
+ Subtitle string `json:"subtitle,omitempty"`
+ Highlight string `json:"highlight,omitempty"`
+ URL string `json:"url"`
+}
+
+// SearchResult 全局搜索总结果。
+type SearchResult struct {
+ Query string `json:"query"`
+ Tasks []SearchResultItem `json:"tasks"`
+ Records []SearchResultItem `json:"records"`
+ Storage []SearchResultItem `json:"storage"`
+ Nodes []SearchResultItem `json:"nodes"`
+ TotalCount int `json:"totalCount"`
+}
+
+// Search 执行全局搜索。空 query 返回空结果。
+// 每类最多返回 10 条,避免页面过长。
+func (s *SearchService) Search(ctx context.Context, query string) (*SearchResult, error) {
+ q := strings.TrimSpace(query)
+ result := &SearchResult{Query: q, Tasks: []SearchResultItem{}, Records: []SearchResultItem{}, Storage: []SearchResultItem{}, Nodes: []SearchResultItem{}}
+ if q == "" {
+ return result, nil
+ }
+ lowerQ := strings.ToLower(q)
+
+ // 搜任务
+ if s.tasks != nil {
+ if items, err := s.tasks.List(ctx, repository.BackupTaskListOptions{}); err == nil {
+ for _, item := range items {
+ if !matchesAny(lowerQ, item.Name, item.Type, item.Tags, item.SourcePath, item.DBHost, item.DBName) {
+ continue
+ }
+ hl := firstMatch(lowerQ, item.Name, item.Tags)
+ result.Tasks = append(result.Tasks, SearchResultItem{
+ Kind: "task",
+ ID: item.ID,
+ Title: item.Name,
+ Subtitle: item.Type,
+ Highlight: hl,
+ URL: "/backup/tasks",
+ })
+ if len(result.Tasks) >= 10 {
+ break
+ }
+ }
+ }
+ }
+
+ // 搜存储目标
+ if s.targets != nil {
+ if items, err := s.targets.List(ctx); err == nil {
+ for _, item := range items {
+ if !matchesAny(lowerQ, item.Name, item.Description, item.Type) {
+ continue
+ }
+ hl := firstMatch(lowerQ, item.Name, item.Type)
+ result.Storage = append(result.Storage, SearchResultItem{
+ Kind: "storage",
+ ID: item.ID,
+ Title: item.Name,
+ Subtitle: item.Type,
+ Highlight: hl,
+ URL: "/storage-targets",
+ })
+ if len(result.Storage) >= 10 {
+ break
+ }
+ }
+ }
+ }
+
+ // 搜节点
+ if s.nodes != nil {
+ if items, err := s.nodes.List(ctx); err == nil {
+ for _, item := range items {
+ if !matchesAny(lowerQ, item.Name, item.Hostname, item.IPAddress) {
+ continue
+ }
+ hl := firstMatch(lowerQ, item.Name, item.Hostname, item.IPAddress)
+ result.Nodes = append(result.Nodes, SearchResultItem{
+ Kind: "node",
+ ID: item.ID,
+ Title: item.Name,
+ Subtitle: item.Hostname,
+ Highlight: hl,
+ URL: "/nodes",
+ })
+ if len(result.Nodes) >= 10 {
+ break
+ }
+ }
+ }
+ }
+
+ // 搜最近 100 条备份记录(文件名)
+ if s.records != nil {
+ if items, err := s.records.ListRecent(ctx, 100); err == nil {
+ for _, item := range items {
+ if !matchesAny(lowerQ, item.FileName, item.StoragePath, item.Task.Name) {
+ continue
+ }
+ hl := firstMatch(lowerQ, item.FileName, item.StoragePath)
+ result.Records = append(result.Records, SearchResultItem{
+ Kind: "record",
+ ID: item.ID,
+ Title: item.FileName,
+ Subtitle: item.Task.Name,
+ Highlight: hl,
+ URL: "/backup/records?recordId=" + itoaUint(item.ID),
+ })
+ if len(result.Records) >= 10 {
+ break
+ }
+ }
+ }
+ }
+
+ result.TotalCount = len(result.Tasks) + len(result.Records) + len(result.Storage) + len(result.Nodes)
+ return result, nil
+}
+
+// matchesAny 忽略大小写匹配任一字段。
+func matchesAny(lowerQ string, fields ...string) bool {
+ for _, f := range fields {
+ if f == "" {
+ continue
+ }
+ if strings.Contains(strings.ToLower(f), lowerQ) {
+ return true
+ }
+ }
+ return false
+}
+
+// firstMatch 返回第一个匹配的字段值(用于 Highlight)。
+func firstMatch(lowerQ string, fields ...string) string {
+ for _, f := range fields {
+ if f == "" {
+ continue
+ }
+ if strings.Contains(strings.ToLower(f), lowerQ) {
+ return f
+ }
+ }
+ return ""
+}
+
+func itoaUint(v uint) string {
+ if v == 0 {
+ return "0"
+ }
+ buf := make([]byte, 0, 12)
+ n := v
+ for n > 0 {
+ buf = append([]byte{byte('0' + n%10)}, buf...)
+ n /= 10
+ }
+ return string(buf)
+}
diff --git a/server/internal/service/storage_target_service.go b/server/internal/service/storage_target_service.go
index 0896e79..bc8b552 100644
--- a/server/internal/service/storage_target_service.go
+++ b/server/internal/service/storage_target_service.go
@@ -5,6 +5,7 @@ import (
"fmt"
"net/http"
"strings"
+ "sync"
"time"
"backupx/server/internal/apperror"
@@ -25,6 +26,8 @@ type StorageTargetUpsertInput struct {
Description string `json:"description" binding:"max=255"`
Enabled bool `json:"enabled"`
Config map[string]any `json:"config" binding:"required"`
+ // QuotaBytes 软限额(字节),0 = 不限制。
+ QuotaBytes int64 `json:"quotaBytes"`
}
type StorageTargetTestInput struct {
@@ -58,6 +61,7 @@ type StorageTargetSummary struct {
LastTestedAt *time.Time `json:"lastTestedAt"`
LastTestStatus string `json:"lastTestStatus"`
LastTestMessage string `json:"lastTestMessage"`
+ QuotaBytes int64 `json:"quotaBytes"`
UpdatedAt time.Time `json:"updatedAt"`
}
@@ -258,6 +262,179 @@ func (s *StorageTargetService) TestConnection(ctx context.Context, input Storage
return nil
}
+// StartHealthMonitor 启动后台存储目标健康扫描。
+// 周期性对启用的存储目标跑 TestConnection(非阻塞),并在"从成功转失败"时派发 storage_unhealthy 事件。
+// interval 建议 5m;dispatcher 为 nil 时仅更新 LastTestStatus 不告警。
+func (s *StorageTargetService) StartHealthMonitor(ctx context.Context, dispatcher EventDispatcher, interval time.Duration) {
+ if interval <= 0 {
+ interval = 5 * time.Minute
+ }
+ ticker := time.NewTicker(interval)
+ // notified 跟踪已告警的目标,避免每轮重复
+ notified := map[uint]bool{}
+ capacityNotified := map[uint]bool{}
+ var mu sync.Mutex
+ go func() {
+ defer ticker.Stop()
+ for {
+ select {
+ case <-ctx.Done():
+ return
+ case <-ticker.C:
+ s.runHealthCheckOnce(ctx, dispatcher, &mu, notified)
+ s.runCapacityCheckOnce(ctx, dispatcher, &mu, capacityNotified)
+ }
+ }
+ }()
+}
+
+// StorageCapacityWarningThreshold 存储使用率告警阈值(85%)。
+// 超过此值视为容量预警,派发 storage_capacity_warning 事件。
+// 做成常量而非配置:企业运维场景下 85% 是业界通用预警线,无需用户调整。
+const StorageCapacityWarningThreshold = 0.85
+
+// runCapacityCheckOnce 扫描所有支持 StorageAbout 接口的启用存储目标,
+// 使用率超过阈值时派发 storage_capacity_warning 事件(避免重复派发)。
+// 降到阈值以下(例如清理/扩容后)自动清除记忆。
+func (s *StorageTargetService) runCapacityCheckOnce(ctx context.Context, dispatcher EventDispatcher, mu *sync.Mutex, notified map[uint]bool) {
+ if dispatcher == nil {
+ return
+ }
+ targets, err := s.targets.List(ctx)
+ if err != nil {
+ return
+ }
+ for i := range targets {
+ target := targets[i]
+ if !target.Enabled {
+ continue
+ }
+ configMap := map[string]any{}
+ if err := s.cipher.DecryptJSON(target.ConfigCiphertext, &configMap); err != nil {
+ continue
+ }
+ provider, err := s.registry.Create(ctx, storage.ParseProviderType(target.Type), configMap)
+ if err != nil {
+ continue
+ }
+ about, ok := provider.(storage.StorageAbout)
+ if !ok {
+ continue // 该后端不支持容量查询(如 S3 / FTP 等),跳过
+ }
+ info, err := about.About(ctx)
+ if err != nil || info == nil || info.Total == nil || info.Used == nil || *info.Total == 0 {
+ continue
+ }
+ usage := float64(*info.Used) / float64(*info.Total)
+ mu.Lock()
+ alreadyNotified := notified[target.ID]
+ if usage >= StorageCapacityWarningThreshold {
+ if !alreadyNotified {
+ notified[target.ID] = true
+ mu.Unlock()
+ s.dispatchCapacityWarning(ctx, dispatcher, &target, info, usage)
+ continue
+ }
+ } else {
+ delete(notified, target.ID) // 容量回落后允许下次再告警
+ }
+ mu.Unlock()
+ }
+}
+
+func (s *StorageTargetService) dispatchCapacityWarning(ctx context.Context, dispatcher EventDispatcher, target *model.StorageTarget, info *storage.StorageUsageInfo, usage float64) {
+ title := "BackupX 存储容量预警"
+ usedGB := float64(*info.Used) / (1 << 30)
+ totalGB := float64(*info.Total) / (1 << 30)
+ body := fmt.Sprintf("存储目标:%s (类型: %s)\n使用率:%.1f%%\n已用:%.2f GB / 总量:%.2f GB\n建议清理旧备份或扩容。",
+ target.Name, target.Type, usage*100, usedGB, totalGB)
+ fields := map[string]any{
+ "storageTargetId": target.ID,
+ "storageTargetName": target.Name,
+ "storageType": target.Type,
+ "usageRate": usage,
+ "usedBytes": *info.Used,
+ "totalBytes": *info.Total,
+ }
+ _ = dispatcher.DispatchEvent(ctx, model.NotificationEventStorageCapacity, title, body, fields)
+}
+
+// runHealthCheckOnce 对所有启用目标执行一次连接测试并按需派发事件。
+// "健康→故障"边沿触发告警;"故障→健康"边沿清除 notified 记忆,允许下次故障再次告警。
+func (s *StorageTargetService) runHealthCheckOnce(ctx context.Context, dispatcher EventDispatcher, mu *sync.Mutex, notified map[uint]bool) {
+ targets, err := s.targets.List(ctx)
+ if err != nil {
+ return
+ }
+ for i := range targets {
+ target := targets[i]
+ if !target.Enabled {
+ continue
+ }
+ previousStatus := target.LastTestStatus
+ configMap := map[string]any{}
+ if err := s.cipher.DecryptJSON(target.ConfigCiphertext, &configMap); err != nil {
+ continue
+ }
+ provider, err := s.registry.Create(ctx, storage.ParseProviderType(target.Type), configMap)
+ now := time.Now().UTC()
+ if err != nil {
+ s.applyHealthResult(ctx, &target, now, false, err.Error())
+ s.notifyUnhealthyTransition(ctx, dispatcher, mu, notified, &target, previousStatus, err.Error())
+ continue
+ }
+ testErr := provider.TestConnection(ctx)
+ if testErr != nil {
+ s.applyHealthResult(ctx, &target, now, false, testErr.Error())
+ s.notifyUnhealthyTransition(ctx, dispatcher, mu, notified, &target, previousStatus, testErr.Error())
+ continue
+ }
+ s.applyHealthResult(ctx, &target, now, true, "连接成功")
+ // 恢复健康:清除告警记忆
+ mu.Lock()
+ delete(notified, target.ID)
+ mu.Unlock()
+ }
+}
+
+func (s *StorageTargetService) applyHealthResult(ctx context.Context, target *model.StorageTarget, at time.Time, healthy bool, message string) {
+ target.LastTestedAt = &at
+ if healthy {
+ target.LastTestStatus = "success"
+ } else {
+ target.LastTestStatus = "failed"
+ }
+ target.LastTestMessage = sanitizeMessage(message)
+ _ = s.targets.Update(ctx, target)
+}
+
+func (s *StorageTargetService) notifyUnhealthyTransition(ctx context.Context, dispatcher EventDispatcher, mu *sync.Mutex, notified map[uint]bool, target *model.StorageTarget, previousStatus string, message string) {
+ if dispatcher == nil {
+ return
+ }
+ mu.Lock()
+ already := notified[target.ID]
+ if !already {
+ notified[target.ID] = true
+ }
+ mu.Unlock()
+ // 仅在上次状态是 success / unknown 且本次是 failed 时首次告警;
+ // 已告警过的持续故障不重复发送(等 resetInterval 或恢复后重新触发)。
+ if already {
+ return
+ }
+ _ = previousStatus // 保留参数便于未来扩展:区分"从未测试"与"从 success 掉线"
+ title := "BackupX 存储目标连接失败"
+ body := fmt.Sprintf("存储目标:%s (类型: %s)\n错误:%s", target.Name, target.Type, message)
+ fields := map[string]any{
+ "storageTargetId": target.ID,
+ "storageTargetName": target.Name,
+ "storageType": target.Type,
+ "error": message,
+ }
+ _ = dispatcher.DispatchEvent(ctx, model.NotificationEventStorageUnhealthy, title, body, fields)
+}
+
func (s *StorageTargetService) StartGoogleDriveOAuth(ctx context.Context, input GoogleDriveAuthStartInput, origin string) (*GoogleDriveAuthStartResult, error) {
origin = normalizeOrigin(origin)
if origin == "" {
@@ -394,6 +571,10 @@ func (s *StorageTargetService) buildStorageTarget(ctx context.Context, existing
if err != nil {
return nil, apperror.Internal("STORAGE_TARGET_ENCRYPT_FAILED", "无法保存存储目标配置", err)
}
+ quota := input.QuotaBytes
+ if quota < 0 {
+ quota = 0
+ }
item := &model.StorageTarget{
Name: strings.TrimSpace(input.Name),
Type: input.Type,
@@ -402,6 +583,7 @@ func (s *StorageTargetService) buildStorageTarget(ctx context.Context, existing
ConfigCiphertext: ciphertext,
ConfigVersion: 1,
LastTestStatus: "unknown",
+ QuotaBytes: quota,
}
if existing != nil {
item.LastTestedAt = existing.LastTestedAt
@@ -515,6 +697,7 @@ func toStorageTargetSummary(item *model.StorageTarget) StorageTargetSummary {
LastTestedAt: item.LastTestedAt,
LastTestStatus: item.LastTestStatus,
LastTestMessage: item.LastTestMessage,
+ QuotaBytes: item.QuotaBytes,
UpdatedAt: item.UpdatedAt,
}
}
diff --git a/server/internal/service/task_export_service.go b/server/internal/service/task_export_service.go
new file mode 100644
index 0000000..f690458
--- /dev/null
+++ b/server/internal/service/task_export_service.go
@@ -0,0 +1,318 @@
+package service
+
+import (
+ "context"
+ "encoding/json"
+ "fmt"
+ "strings"
+ "time"
+
+ "backupx/server/internal/apperror"
+ "backupx/server/internal/model"
+ "backupx/server/internal/repository"
+)
+
+// TaskExportService 管理备份任务的 JSON 导入 / 导出。
+// 用途:
+// 1. 集群迁移(旧 Master → 新 Master 的任务配置搬迁)
+// 2. 灾备恢复(任务配置本地文件化,Master 宕机后重建)
+// 3. 配置审计(版本化 Git 管理 JSON 快照)
+//
+// 出于安全考虑,导出/导入不包含任何敏感字段:
+// - 数据库密码(DBPasswordCiphertext):跳过,导入后需人工填补
+// - 存储目标具体配置:仅按 name 匹配现有目标,不搬运密钥
+// - Node 绑定:按 name 匹配现有节点,不存在时退化为 NodeID=0(本机)
+type TaskExportService struct {
+ tasks *BackupTaskService
+ taskRepo repository.BackupTaskRepository
+ targets repository.StorageTargetRepository
+ nodes repository.NodeRepository
+}
+
+func NewTaskExportService(
+ tasks *BackupTaskService,
+ taskRepo repository.BackupTaskRepository,
+ targets repository.StorageTargetRepository,
+ nodes repository.NodeRepository,
+) *TaskExportService {
+ return &TaskExportService{tasks: tasks, taskRepo: taskRepo, targets: targets, nodes: nodes}
+}
+
+// ExportedTask 导出格式:按名称引用存储/节点,不含敏感数据。
+type ExportedTask struct {
+ Name string `json:"name"`
+ Type string `json:"type"`
+ Enabled bool `json:"enabled"`
+ CronExpr string `json:"cronExpr,omitempty"`
+ SourcePath string `json:"sourcePath,omitempty"`
+ SourcePaths []string `json:"sourcePaths,omitempty"`
+ ExcludePatterns []string `json:"excludePatterns,omitempty"`
+ DBHost string `json:"dbHost,omitempty"`
+ DBPort int `json:"dbPort,omitempty"`
+ DBUser string `json:"dbUser,omitempty"`
+ DBName string `json:"dbName,omitempty"`
+ DBPath string `json:"dbPath,omitempty"`
+ ExtraConfig map[string]any `json:"extraConfig,omitempty"`
+ // 按名称引用:导入时按名称查找对应 ID
+ StorageTargetNames []string `json:"storageTargetNames"`
+ ReplicationTargetNames []string `json:"replicationTargetNames,omitempty"`
+ NodeName string `json:"nodeName,omitempty"`
+ DependsOnTaskNames []string `json:"dependsOnTaskNames,omitempty"`
+ Tags string `json:"tags,omitempty"`
+ Compression string `json:"compression,omitempty"`
+ Encrypt bool `json:"encrypt,omitempty"`
+ RetentionDays int `json:"retentionDays,omitempty"`
+ MaxBackups int `json:"maxBackups,omitempty"`
+ VerifyEnabled bool `json:"verifyEnabled,omitempty"`
+ VerifyCronExpr string `json:"verifyCronExpr,omitempty"`
+ VerifyMode string `json:"verifyMode,omitempty"`
+ SLAHoursRPO int `json:"slaHoursRpo,omitempty"`
+ AlertOnConsecutiveFails int `json:"alertOnConsecutiveFails,omitempty"`
+ MaintenanceWindows string `json:"maintenanceWindows,omitempty"`
+}
+
+// ExportPayload 导出整体结构,带元信息。
+type ExportPayload struct {
+ Version string `json:"version"`
+ ExportedAt time.Time `json:"exportedAt"`
+ TaskCount int `json:"taskCount"`
+ Tasks []ExportedTask `json:"tasks"`
+ Notice string `json:"notice"`
+}
+
+// ImportResult 导入单条结果,best-effort。
+type ImportResult struct {
+ Name string `json:"name"`
+ TaskID uint `json:"taskId,omitempty"`
+ Success bool `json:"success"`
+ Error string `json:"error,omitempty"`
+ Skipped bool `json:"skipped,omitempty"`
+}
+
+// Export 导出当前全部任务为 JSON。
+// taskIDs 为空则导出全部;否则仅导出指定 ID。
+func (s *TaskExportService) Export(ctx context.Context, taskIDs []uint) (*ExportPayload, error) {
+ items, err := s.taskRepo.List(ctx, repository.BackupTaskListOptions{})
+ if err != nil {
+ return nil, apperror.Internal("TASK_EXPORT_LIST_FAILED", "无法获取任务列表", err)
+ }
+ targetNames := map[uint]string{}
+ if all, err := s.targets.List(ctx); err == nil {
+ for _, t := range all {
+ targetNames[t.ID] = t.Name
+ }
+ }
+ nodeNames := map[uint]string{}
+ if all, err := s.nodes.List(ctx); err == nil {
+ for _, n := range all {
+ nodeNames[n.ID] = n.Name
+ }
+ }
+ taskNames := map[uint]string{}
+ for _, t := range items {
+ taskNames[t.ID] = t.Name
+ }
+ idFilter := map[uint]bool{}
+ for _, id := range taskIDs {
+ idFilter[id] = true
+ }
+ exported := make([]ExportedTask, 0, len(items))
+ for i := range items {
+ item := items[i]
+ if len(idFilter) > 0 && !idFilter[item.ID] {
+ continue
+ }
+ et := s.toExported(&item, targetNames, nodeNames, taskNames)
+ exported = append(exported, et)
+ }
+ return &ExportPayload{
+ Version: "v1",
+ ExportedAt: time.Now().UTC(),
+ TaskCount: len(exported),
+ Tasks: exported,
+ Notice: "敏感字段(数据库密码、存储凭证)已排除,导入后需人工补全。",
+ }, nil
+}
+
+// Import 批量导入任务。best-effort:单条失败不阻断。
+// 冲突策略:任务名重复则跳过(不覆盖)。
+func (s *TaskExportService) Import(ctx context.Context, payload ExportPayload) ([]ImportResult, error) {
+ // 预加载所有命名 → ID 映射
+ targetsByName := map[string]uint{}
+ if all, err := s.targets.List(ctx); err == nil {
+ for _, t := range all {
+ targetsByName[t.Name] = t.ID
+ }
+ }
+ nodesByName := map[string]uint{}
+ if all, err := s.nodes.List(ctx); err == nil {
+ for _, n := range all {
+ nodesByName[n.Name] = n.ID
+ }
+ }
+ tasksByName := map[string]uint{}
+ existing, err := s.taskRepo.List(ctx, repository.BackupTaskListOptions{})
+ if err != nil {
+ return nil, apperror.Internal("TASK_IMPORT_LIST_FAILED", "无法读取当前任务列表", err)
+ }
+ for _, t := range existing {
+ tasksByName[t.Name] = t.ID
+ }
+ results := make([]ImportResult, 0, len(payload.Tasks))
+ // 两阶段:先创建所有任务(忽略 DependsOn),再更新依赖
+ created := map[string]uint{}
+ for _, t := range payload.Tasks {
+ if t.Name == "" {
+ continue
+ }
+ if _, dup := tasksByName[t.Name]; dup {
+ results = append(results, ImportResult{Name: t.Name, Skipped: true, Success: true, Error: "已存在同名任务,跳过"})
+ continue
+ }
+ input := s.toUpsertInput(t, targetsByName, nodesByName, nil)
+ detail, err := s.tasks.Create(ctx, input)
+ if err != nil {
+ results = append(results, ImportResult{Name: t.Name, Success: false, Error: appErrorMessage(err)})
+ continue
+ }
+ created[t.Name] = detail.ID
+ tasksByName[t.Name] = detail.ID
+ results = append(results, ImportResult{Name: t.Name, TaskID: detail.ID, Success: true})
+ }
+ // 第二阶段:依赖链接(上游任务名 → 新 ID)
+ for i, t := range payload.Tasks {
+ if len(t.DependsOnTaskNames) == 0 {
+ continue
+ }
+ id, ok := created[t.Name]
+ if !ok {
+ continue
+ }
+ deps := []uint{}
+ for _, name := range t.DependsOnTaskNames {
+ if depID, ok := tasksByName[name]; ok && depID != id {
+ deps = append(deps, depID)
+ }
+ }
+ if len(deps) == 0 {
+ continue
+ }
+ input := s.toUpsertInput(t, targetsByName, nodesByName, deps)
+ if _, err := s.tasks.Update(ctx, id, input); err != nil {
+ // 已创建但依赖更新失败:降级为 warning,不影响任务本体
+ for idx := range results {
+ if results[idx].Name == t.Name {
+ results[idx].Error = fmt.Sprintf("任务已创建,但依赖更新失败: %s", appErrorMessage(err))
+ break
+ }
+ }
+ _ = i
+ }
+ }
+ return results, nil
+}
+
+func (s *TaskExportService) toExported(item *model.BackupTask, targetNames, nodeNames, taskNames map[uint]string) ExportedTask {
+ sourcePaths := []string{}
+ if strings.TrimSpace(item.SourcePaths) != "" {
+ _ = json.Unmarshal([]byte(item.SourcePaths), &sourcePaths)
+ }
+ excludes := []string{}
+ if strings.TrimSpace(item.ExcludePatterns) != "" {
+ _ = json.Unmarshal([]byte(item.ExcludePatterns), &excludes)
+ }
+ var extra map[string]any
+ if strings.TrimSpace(item.ExtraConfig) != "" {
+ _ = json.Unmarshal([]byte(item.ExtraConfig), &extra)
+ }
+ storageNames := namesFromIDs(collectTargetIDs(item), targetNames)
+ replicationNames := namesFromIDs(parseUintCSV(item.ReplicationTargetIDs), targetNames)
+ dependsOnNames := namesFromIDs(parseUintCSV(item.DependsOnTaskIDs), taskNames)
+ nodeName := ""
+ if item.NodeID > 0 {
+ nodeName = nodeNames[item.NodeID]
+ }
+ return ExportedTask{
+ Name: item.Name,
+ Type: item.Type,
+ Enabled: item.Enabled,
+ CronExpr: item.CronExpr,
+ SourcePath: item.SourcePath,
+ SourcePaths: sourcePaths,
+ ExcludePatterns: excludes,
+ DBHost: item.DBHost,
+ DBPort: item.DBPort,
+ DBUser: item.DBUser,
+ DBName: item.DBName,
+ DBPath: item.DBPath,
+ ExtraConfig: extra,
+ StorageTargetNames: storageNames,
+ ReplicationTargetNames: replicationNames,
+ NodeName: nodeName,
+ DependsOnTaskNames: dependsOnNames,
+ Tags: item.Tags,
+ Compression: item.Compression,
+ Encrypt: item.Encrypt,
+ RetentionDays: item.RetentionDays,
+ MaxBackups: item.MaxBackups,
+ VerifyEnabled: item.VerifyEnabled,
+ VerifyCronExpr: item.VerifyCronExpr,
+ VerifyMode: item.VerifyMode,
+ SLAHoursRPO: item.SLAHoursRPO,
+ AlertOnConsecutiveFails: item.AlertOnConsecutiveFails,
+ MaintenanceWindows: item.MaintenanceWindows,
+ }
+}
+
+func (s *TaskExportService) toUpsertInput(t ExportedTask, targetsByName, nodesByName map[string]uint, deps []uint) BackupTaskUpsertInput {
+ return BackupTaskUpsertInput{
+ Name: t.Name,
+ Type: t.Type,
+ Enabled: t.Enabled,
+ CronExpr: t.CronExpr,
+ SourcePath: t.SourcePath,
+ SourcePaths: t.SourcePaths,
+ ExcludePatterns: t.ExcludePatterns,
+ DBHost: t.DBHost,
+ DBPort: t.DBPort,
+ DBUser: t.DBUser,
+ DBName: t.DBName,
+ DBPath: t.DBPath,
+ ExtraConfig: t.ExtraConfig,
+ StorageTargetIDs: idsFromNames(t.StorageTargetNames, targetsByName),
+ ReplicationTargetIDs: idsFromNames(t.ReplicationTargetNames, targetsByName),
+ NodeID: nodesByName[t.NodeName],
+ Tags: t.Tags,
+ Compression: t.Compression,
+ Encrypt: t.Encrypt,
+ RetentionDays: t.RetentionDays,
+ MaxBackups: t.MaxBackups,
+ VerifyEnabled: t.VerifyEnabled,
+ VerifyCronExpr: t.VerifyCronExpr,
+ VerifyMode: t.VerifyMode,
+ SLAHoursRPO: t.SLAHoursRPO,
+ AlertOnConsecutiveFails: t.AlertOnConsecutiveFails,
+ MaintenanceWindows: t.MaintenanceWindows,
+ DependsOnTaskIDs: deps,
+ }
+}
+
+func namesFromIDs(ids []uint, lookup map[uint]string) []string {
+ out := make([]string, 0, len(ids))
+ for _, id := range ids {
+ if name, ok := lookup[id]; ok {
+ out = append(out, name)
+ }
+ }
+ return out
+}
+
+func idsFromNames(names []string, lookup map[string]uint) []uint {
+ out := make([]uint, 0, len(names))
+ for _, name := range names {
+ if id, ok := lookup[name]; ok {
+ out = append(out, id)
+ }
+ }
+ return out
+}
diff --git a/server/internal/service/task_template_service.go b/server/internal/service/task_template_service.go
new file mode 100644
index 0000000..1731d22
--- /dev/null
+++ b/server/internal/service/task_template_service.go
@@ -0,0 +1,240 @@
+package service
+
+import (
+ "context"
+ "encoding/json"
+ "fmt"
+ "strings"
+
+ "backupx/server/internal/apperror"
+ "backupx/server/internal/model"
+ "backupx/server/internal/repository"
+)
+
+// TaskTemplateService 管理任务模板 + 一键批量创建任务。
+type TaskTemplateService struct {
+ templates repository.TaskTemplateRepository
+ tasks *BackupTaskService
+}
+
+func NewTaskTemplateService(templates repository.TaskTemplateRepository, tasks *BackupTaskService) *TaskTemplateService {
+ return &TaskTemplateService{templates: templates, tasks: tasks}
+}
+
+type TaskTemplateSummary struct {
+ ID uint `json:"id"`
+ Name string `json:"name"`
+ Description string `json:"description"`
+ TaskType string `json:"taskType"`
+ CreatedBy string `json:"createdBy"`
+ CreatedAt string `json:"createdAt"`
+ UpdatedAt string `json:"updatedAt"`
+}
+
+type TaskTemplateDetail struct {
+ TaskTemplateSummary
+ Payload BackupTaskUpsertInput `json:"payload"`
+}
+
+// TaskTemplateUpsertInput 创建/更新模板时的输入。
+// Payload 字段与 BackupTaskUpsertInput 复用同一结构。
+type TaskTemplateUpsertInput struct {
+ Name string `json:"name" binding:"required,min=1,max=128"`
+ Description string `json:"description" binding:"max=500"`
+ Payload BackupTaskUpsertInput `json:"payload" binding:"required"`
+}
+
+// TaskTemplateApplyInput 应用模板批量创建任务。
+// 每个 Variables 条目会用 Variables 中的字段覆盖模板 Payload 生成一个新任务:
+// - name 必填(覆盖模板 Name,任务命名)
+// - sourcePath / sourcePaths / dbHost / dbName 若提供则覆盖
+type TaskTemplateApplyInput struct {
+ Variables []TaskTemplateVariables `json:"variables" binding:"required,min=1,max=100"`
+}
+
+type TaskTemplateVariables struct {
+ Name string `json:"name" binding:"required,min=1,max=100"`
+ SourcePath string `json:"sourcePath"`
+ SourcePaths []string `json:"sourcePaths"`
+ DBHost string `json:"dbHost"`
+ DBName string `json:"dbName"`
+ Tags string `json:"tags"`
+ NodeID *uint `json:"nodeId"`
+}
+
+// TaskTemplateApplyResult 单个任务的创建结果。
+type TaskTemplateApplyResult struct {
+ Name string `json:"name"`
+ TaskID uint `json:"taskId,omitempty"`
+ Success bool `json:"success"`
+ Error string `json:"error,omitempty"`
+}
+
+func (s *TaskTemplateService) List(ctx context.Context) ([]TaskTemplateSummary, error) {
+ items, err := s.templates.List(ctx)
+ if err != nil {
+ return nil, apperror.Internal("TASK_TEMPLATE_LIST_FAILED", "无法获取任务模板列表", err)
+ }
+ result := make([]TaskTemplateSummary, 0, len(items))
+ for i := range items {
+ result = append(result, toTemplateSummary(&items[i]))
+ }
+ return result, nil
+}
+
+func (s *TaskTemplateService) Get(ctx context.Context, id uint) (*TaskTemplateDetail, error) {
+ item, err := s.templates.FindByID(ctx, id)
+ if err != nil {
+ return nil, apperror.Internal("TASK_TEMPLATE_GET_FAILED", "无法获取任务模板", err)
+ }
+ if item == nil {
+ return nil, apperror.New(404, "TASK_TEMPLATE_NOT_FOUND", "任务模板不存在", nil)
+ }
+ var payload BackupTaskUpsertInput
+ if err := json.Unmarshal([]byte(item.Payload), &payload); err != nil {
+ return nil, apperror.Internal("TASK_TEMPLATE_DECODE_FAILED", "无法解析模板内容", err)
+ }
+ detail := &TaskTemplateDetail{TaskTemplateSummary: toTemplateSummary(item), Payload: payload}
+ return detail, nil
+}
+
+func (s *TaskTemplateService) Create(ctx context.Context, createdBy string, input TaskTemplateUpsertInput) (*TaskTemplateDetail, error) {
+ if strings.TrimSpace(input.Name) == "" {
+ return nil, apperror.BadRequest("TASK_TEMPLATE_INVALID", "名称不能为空", nil)
+ }
+ existing, err := s.templates.FindByName(ctx, strings.TrimSpace(input.Name))
+ if err != nil {
+ return nil, apperror.Internal("TASK_TEMPLATE_LOOKUP_FAILED", "无法校验模板名", err)
+ }
+ if existing != nil {
+ return nil, apperror.Conflict("TASK_TEMPLATE_NAME_EXISTS", "模板名称已存在", nil)
+ }
+ payloadJSON, err := json.Marshal(input.Payload)
+ if err != nil {
+ return nil, apperror.Internal("TASK_TEMPLATE_ENCODE_FAILED", "无法序列化模板参数", err)
+ }
+ item := &model.TaskTemplate{
+ Name: strings.TrimSpace(input.Name),
+ Description: strings.TrimSpace(input.Description),
+ TaskType: strings.TrimSpace(input.Payload.Type),
+ Payload: string(payloadJSON),
+ CreatedBy: strings.TrimSpace(createdBy),
+ }
+ if err := s.templates.Create(ctx, item); err != nil {
+ return nil, apperror.Internal("TASK_TEMPLATE_CREATE_FAILED", "无法创建任务模板", err)
+ }
+ return s.Get(ctx, item.ID)
+}
+
+func (s *TaskTemplateService) Update(ctx context.Context, id uint, input TaskTemplateUpsertInput) (*TaskTemplateDetail, error) {
+ item, err := s.templates.FindByID(ctx, id)
+ if err != nil {
+ return nil, apperror.Internal("TASK_TEMPLATE_GET_FAILED", "无法获取任务模板", err)
+ }
+ if item == nil {
+ return nil, apperror.New(404, "TASK_TEMPLATE_NOT_FOUND", "任务模板不存在", nil)
+ }
+ payloadJSON, err := json.Marshal(input.Payload)
+ if err != nil {
+ return nil, apperror.Internal("TASK_TEMPLATE_ENCODE_FAILED", "无法序列化模板参数", err)
+ }
+ if strings.TrimSpace(input.Name) != item.Name {
+ dup, err := s.templates.FindByName(ctx, strings.TrimSpace(input.Name))
+ if err != nil {
+ return nil, apperror.Internal("TASK_TEMPLATE_LOOKUP_FAILED", "无法校验模板名", err)
+ }
+ if dup != nil && dup.ID != id {
+ return nil, apperror.Conflict("TASK_TEMPLATE_NAME_EXISTS", "模板名称已存在", nil)
+ }
+ }
+ item.Name = strings.TrimSpace(input.Name)
+ item.Description = strings.TrimSpace(input.Description)
+ item.TaskType = strings.TrimSpace(input.Payload.Type)
+ item.Payload = string(payloadJSON)
+ if err := s.templates.Update(ctx, item); err != nil {
+ return nil, apperror.Internal("TASK_TEMPLATE_UPDATE_FAILED", "无法更新任务模板", err)
+ }
+ return s.Get(ctx, item.ID)
+}
+
+func (s *TaskTemplateService) Delete(ctx context.Context, id uint) error {
+ item, err := s.templates.FindByID(ctx, id)
+ if err != nil {
+ return apperror.Internal("TASK_TEMPLATE_GET_FAILED", "无法获取任务模板", err)
+ }
+ if item == nil {
+ return apperror.New(404, "TASK_TEMPLATE_NOT_FOUND", "任务模板不存在", nil)
+ }
+ return s.templates.Delete(ctx, id)
+}
+
+// Apply 从模板批量创建任务。best-effort:单个失败不影响其他。
+// 每个 Variables 条目按 name 覆盖任务名;其他字段(sourcePath/dbHost/dbName/tags/nodeId)非空则覆盖模板对应字段。
+func (s *TaskTemplateService) Apply(ctx context.Context, id uint, input TaskTemplateApplyInput) ([]TaskTemplateApplyResult, error) {
+ template, err := s.Get(ctx, id)
+ if err != nil {
+ return nil, err
+ }
+ if s.tasks == nil {
+ return nil, apperror.Internal("TASK_TEMPLATE_APPLY_UNAVAILABLE", "任务创建服务未注入", nil)
+ }
+ results := make([]TaskTemplateApplyResult, 0, len(input.Variables))
+ for _, v := range input.Variables {
+ payload := mergeVariables(template.Payload, v)
+ detail, createErr := s.tasks.Create(ctx, payload)
+ result := TaskTemplateApplyResult{Name: v.Name}
+ if createErr != nil {
+ result.Success = false
+ if appErr, ok := createErr.(*apperror.AppError); ok {
+ result.Error = appErr.Message
+ } else {
+ result.Error = createErr.Error()
+ }
+ } else {
+ result.Success = true
+ result.TaskID = detail.ID
+ }
+ results = append(results, result)
+ }
+ return results, nil
+}
+
+// mergeVariables 把 Variables 覆盖到模板 Payload 上。返回一个新的 Input(不污染模板)。
+func mergeVariables(base BackupTaskUpsertInput, v TaskTemplateVariables) BackupTaskUpsertInput {
+ out := base
+ out.Name = strings.TrimSpace(v.Name)
+ if strings.TrimSpace(v.SourcePath) != "" {
+ out.SourcePath = strings.TrimSpace(v.SourcePath)
+ }
+ if len(v.SourcePaths) > 0 {
+ out.SourcePaths = v.SourcePaths
+ }
+ if strings.TrimSpace(v.DBHost) != "" {
+ out.DBHost = strings.TrimSpace(v.DBHost)
+ }
+ if strings.TrimSpace(v.DBName) != "" {
+ out.DBName = strings.TrimSpace(v.DBName)
+ }
+ if strings.TrimSpace(v.Tags) != "" {
+ out.Tags = strings.TrimSpace(v.Tags)
+ }
+ if v.NodeID != nil {
+ out.NodeID = *v.NodeID
+ }
+ return out
+}
+
+func toTemplateSummary(item *model.TaskTemplate) TaskTemplateSummary {
+ return TaskTemplateSummary{
+ ID: item.ID,
+ Name: item.Name,
+ Description: item.Description,
+ TaskType: item.TaskType,
+ CreatedBy: item.CreatedBy,
+ CreatedAt: item.CreatedAt.Format("2006-01-02T15:04:05Z07:00"),
+ UpdatedAt: item.UpdatedAt.Format("2006-01-02T15:04:05Z07:00"),
+ }
+}
+
+// 确保未使用告警
+var _ = fmt.Sprintf
diff --git a/server/internal/service/user_service.go b/server/internal/service/user_service.go
new file mode 100644
index 0000000..6107950
--- /dev/null
+++ b/server/internal/service/user_service.go
@@ -0,0 +1,160 @@
+package service
+
+import (
+ "context"
+ "strings"
+
+ "backupx/server/internal/apperror"
+ "backupx/server/internal/model"
+ "backupx/server/internal/repository"
+ "backupx/server/internal/security"
+)
+
+// UserService 管理账号(admin 专属)。
+// 初始化阶段(无用户)由 AuthService.Setup 负责创建首个管理员,本服务从第二个用户开始。
+type UserService struct {
+ users repository.UserRepository
+}
+
+func NewUserService(users repository.UserRepository) *UserService {
+ return &UserService{users: users}
+}
+
+// UserSummary 用户列表项(不含密码哈希)。
+type UserSummary struct {
+ ID uint `json:"id"`
+ Username string `json:"username"`
+ DisplayName string `json:"displayName"`
+ Email string `json:"email"`
+ Role string `json:"role"`
+ Disabled bool `json:"disabled"`
+ CreatedAt string `json:"createdAt"`
+}
+
+// UserUpsertInput 创建/更新用户的输入。
+type UserUpsertInput struct {
+ Username string `json:"username" binding:"required,min=3,max=64"`
+ Password string `json:"password" binding:"omitempty,min=8,max=128"`
+ DisplayName string `json:"displayName" binding:"required,min=1,max=128"`
+ Email string `json:"email" binding:"omitempty,max=255"`
+ Role string `json:"role" binding:"required,oneof=admin operator viewer"`
+ Disabled bool `json:"disabled"`
+}
+
+func (s *UserService) List(ctx context.Context) ([]UserSummary, error) {
+ items, err := s.users.List(ctx)
+ if err != nil {
+ return nil, apperror.Internal("USER_LIST_FAILED", "无法获取用户列表", err)
+ }
+ result := make([]UserSummary, 0, len(items))
+ for i := range items {
+ result = append(result, toUserSummary(&items[i]))
+ }
+ return result, nil
+}
+
+func (s *UserService) Create(ctx context.Context, input UserUpsertInput) (*UserSummary, error) {
+ if !model.IsValidRole(input.Role) {
+ return nil, apperror.BadRequest("USER_INVALID", "非法的角色", nil)
+ }
+ if strings.TrimSpace(input.Password) == "" {
+ return nil, apperror.BadRequest("USER_INVALID", "创建用户必须指定密码", nil)
+ }
+ existing, err := s.users.FindByUsername(ctx, strings.TrimSpace(input.Username))
+ if err != nil {
+ return nil, apperror.Internal("USER_LOOKUP_FAILED", "无法校验用户名", err)
+ }
+ if existing != nil {
+ return nil, apperror.Conflict("USER_USERNAME_EXISTS", "用户名已存在", nil)
+ }
+ hash, err := security.HashPassword(input.Password)
+ if err != nil {
+ return nil, apperror.Internal("USER_HASH_FAILED", "无法处理密码", err)
+ }
+ user := &model.User{
+ Username: strings.TrimSpace(input.Username),
+ PasswordHash: hash,
+ DisplayName: strings.TrimSpace(input.DisplayName),
+ Email: strings.TrimSpace(input.Email),
+ Role: input.Role,
+ Disabled: input.Disabled,
+ }
+ if err := s.users.Create(ctx, user); err != nil {
+ return nil, apperror.Internal("USER_CREATE_FAILED", "无法创建用户", err)
+ }
+ summary := toUserSummary(user)
+ return &summary, nil
+}
+
+func (s *UserService) Update(ctx context.Context, id uint, input UserUpsertInput) (*UserSummary, error) {
+ existing, err := s.users.FindByID(ctx, id)
+ if err != nil {
+ return nil, apperror.Internal("USER_GET_FAILED", "无法获取用户", err)
+ }
+ if existing == nil {
+ return nil, apperror.New(404, "USER_NOT_FOUND", "用户不存在", nil)
+ }
+ if !model.IsValidRole(input.Role) {
+ return nil, apperror.BadRequest("USER_INVALID", "非法的角色", nil)
+ }
+ // 校验用户名冲突
+ if strings.TrimSpace(input.Username) != existing.Username {
+ dup, err := s.users.FindByUsername(ctx, strings.TrimSpace(input.Username))
+ if err != nil {
+ return nil, apperror.Internal("USER_LOOKUP_FAILED", "无法校验用户名", err)
+ }
+ if dup != nil {
+ return nil, apperror.Conflict("USER_USERNAME_EXISTS", "用户名已存在", nil)
+ }
+ }
+ existing.Username = strings.TrimSpace(input.Username)
+ existing.DisplayName = strings.TrimSpace(input.DisplayName)
+ existing.Email = strings.TrimSpace(input.Email)
+ existing.Role = input.Role
+ existing.Disabled = input.Disabled
+ if strings.TrimSpace(input.Password) != "" {
+ hash, err := security.HashPassword(input.Password)
+ if err != nil {
+ return nil, apperror.Internal("USER_HASH_FAILED", "无法处理密码", err)
+ }
+ existing.PasswordHash = hash
+ }
+ if err := s.users.Update(ctx, existing); err != nil {
+ return nil, apperror.Internal("USER_UPDATE_FAILED", "无法更新用户", err)
+ }
+ summary := toUserSummary(existing)
+ return &summary, nil
+}
+
+func (s *UserService) Delete(ctx context.Context, id uint) error {
+ existing, err := s.users.FindByID(ctx, id)
+ if err != nil {
+ return apperror.Internal("USER_GET_FAILED", "无法获取用户", err)
+ }
+ if existing == nil {
+ return apperror.New(404, "USER_NOT_FOUND", "用户不存在", nil)
+ }
+ // 禁止删除系统中最后一个 admin(防止系统失权)
+ if existing.Role == model.UserRoleAdmin {
+ count, err := s.users.CountByRole(ctx, model.UserRoleAdmin)
+ if err != nil {
+ return apperror.Internal("USER_COUNT_FAILED", "无法统计管理员数量", err)
+ }
+ if count <= 1 {
+ return apperror.BadRequest("USER_LAST_ADMIN", "不能删除系统最后一个管理员", nil)
+ }
+ }
+ return s.users.Delete(ctx, id)
+}
+
+func toUserSummary(u *model.User) UserSummary {
+ return UserSummary{
+ ID: u.ID,
+ Username: u.Username,
+ DisplayName: u.DisplayName,
+ Email: u.Email,
+ Role: u.Role,
+ Disabled: u.Disabled,
+ CreatedAt: u.CreatedAt.Format("2006-01-02T15:04:05Z07:00"),
+ }
+}
diff --git a/server/internal/service/verification_service.go b/server/internal/service/verification_service.go
new file mode 100644
index 0000000..220faf3
--- /dev/null
+++ b/server/internal/service/verification_service.go
@@ -0,0 +1,515 @@
+package service
+
+import (
+ "context"
+ "fmt"
+ "os"
+ "path/filepath"
+ "strings"
+ "time"
+
+ "backupx/server/internal/apperror"
+ "backupx/server/internal/backup"
+ "backupx/server/internal/model"
+ "backupx/server/internal/repository"
+ "backupx/server/internal/storage"
+ "backupx/server/internal/storage/codec"
+ "backupx/server/pkg/compress"
+ backupcrypto "backupx/server/pkg/crypto"
+)
+
+// VerificationService 管理备份验证(恢复演练)记录生命周期。
+//
+// 执行模型 v1:仅在 Master 本地执行。
+// - 下载备份对象到临时沙箱(local_disk 跨节点场景因 Master 取不到远程文件而失败;
+// 返回明确错误告知用户)
+// - 解密 + 解压
+// - 按任务类型调用 backup.Verify* 家族的 quick 校验
+// - 不触碰任务源数据
+//
+// Agent 侧执行(远程节点直接验证本地备份)作为未来扩展点。
+type VerificationService struct {
+ verifications repository.VerificationRecordRepository
+ records repository.BackupRecordRepository
+ tasks repository.BackupTaskRepository
+ targets repository.StorageTargetRepository
+ nodeRepo repository.NodeRepository
+ storageRegistry *storage.Registry
+ logHub *backup.LogHub
+ cipher *codec.ConfigCipher
+ notifier VerificationNotifier
+ tempDir string
+ semaphore chan struct{}
+ async func(func())
+ now func() time.Time
+}
+
+// VerificationNotifier 给用户推送验证完成/失败通知。
+// 可选注入:未注入时仅写记录。
+type VerificationNotifier interface {
+ NotifyVerificationResult(ctx context.Context, task *model.BackupTask, record *model.VerificationRecord) error
+}
+
+type noopVerificationNotifier struct{}
+
+func (noopVerificationNotifier) NotifyVerificationResult(context.Context, *model.BackupTask, *model.VerificationRecord) error {
+ return nil
+}
+
+// VerificationEventNotifier 适配 NotificationService 的事件分发,面向 verify_failed 事件。
+type VerificationEventNotifier struct {
+ dispatcher EventDispatcher
+}
+
+// EventDispatcher 抽象事件派发(实现者:NotificationService)。
+type EventDispatcher interface {
+ DispatchEvent(ctx context.Context, eventType, title, body string, fields map[string]any) error
+}
+
+// NewVerificationEventNotifier 构造一个事件分发 adapter。dispatcher 为 nil 时退化为 noop。
+func NewVerificationEventNotifier(dispatcher EventDispatcher) VerificationNotifier {
+ if dispatcher == nil {
+ return noopVerificationNotifier{}
+ }
+ return &VerificationEventNotifier{dispatcher: dispatcher}
+}
+
+func (v *VerificationEventNotifier) NotifyVerificationResult(ctx context.Context, task *model.BackupTask, record *model.VerificationRecord) error {
+ if record == nil || record.Status != model.VerificationRecordStatusFailed {
+ return nil
+ }
+ taskName := "未知任务"
+ if task != nil {
+ taskName = task.Name
+ }
+ title := "BackupX 备份验证失败"
+ body := fmt.Sprintf("任务:%s\n验证记录:#%d\n错误:%s", taskName, record.ID, record.ErrorMessage)
+ fields := map[string]any{
+ "taskId": record.TaskID,
+ "taskName": taskName,
+ "verifyId": record.ID,
+ "backupRecordId": record.BackupRecordID,
+ "error": record.ErrorMessage,
+ }
+ return v.dispatcher.DispatchEvent(ctx, model.NotificationEventVerifyFailed, title, body, fields)
+}
+
+func NewVerificationService(
+ verifications repository.VerificationRecordRepository,
+ records repository.BackupRecordRepository,
+ tasks repository.BackupTaskRepository,
+ targets repository.StorageTargetRepository,
+ nodeRepo repository.NodeRepository,
+ storageRegistry *storage.Registry,
+ logHub *backup.LogHub,
+ cipher *codec.ConfigCipher,
+ tempDir string,
+ maxConcurrent int,
+) *VerificationService {
+ if tempDir == "" {
+ tempDir = "/tmp/backupx-verify"
+ }
+ if maxConcurrent <= 0 {
+ maxConcurrent = 2
+ }
+ return &VerificationService{
+ verifications: verifications,
+ records: records,
+ tasks: tasks,
+ targets: targets,
+ nodeRepo: nodeRepo,
+ storageRegistry: storageRegistry,
+ logHub: logHub,
+ cipher: cipher,
+ notifier: noopVerificationNotifier{},
+ tempDir: tempDir,
+ semaphore: make(chan struct{}, maxConcurrent),
+ async: func(job func()) { go job() },
+ now: func() time.Time { return time.Now().UTC() },
+ }
+}
+
+// SetNotifier 注入通知器。
+func (s *VerificationService) SetNotifier(notifier VerificationNotifier) {
+ if notifier != nil {
+ s.notifier = notifier
+ }
+}
+
+// VerificationRecordSummary 列表项。
+type VerificationRecordSummary struct {
+ ID uint `json:"id"`
+ BackupRecordID uint `json:"backupRecordId"`
+ TaskID uint `json:"taskId"`
+ TaskName string `json:"taskName"`
+ NodeID uint `json:"nodeId"`
+ Mode string `json:"mode"`
+ Status string `json:"status"`
+ Summary string `json:"summary"`
+ ErrorMessage string `json:"errorMessage"`
+ DurationSeconds int `json:"durationSeconds"`
+ StartedAt time.Time `json:"startedAt"`
+ CompletedAt *time.Time `json:"completedAt,omitempty"`
+ TriggeredBy string `json:"triggeredBy"`
+ BackupFileName string `json:"backupFileName,omitempty"`
+}
+
+type VerificationRecordDetail struct {
+ VerificationRecordSummary
+ LogContent string `json:"logContent"`
+ LogEvents []backup.LogEvent `json:"logEvents,omitempty"`
+}
+
+type VerificationRecordListInput struct {
+ TaskID *uint
+ BackupRecordID *uint
+ Status string
+ DateFrom *time.Time
+ DateTo *time.Time
+ Limit int
+ Offset int
+}
+
+// StartByTask 从指定任务的"最新成功备份"触发一次验证。
+// 常用于调度器或手动 UI 按钮。
+func (s *VerificationService) StartByTask(ctx context.Context, taskID uint, mode, triggeredBy string) (*VerificationRecordDetail, error) {
+ records, err := s.records.ListSuccessfulByTask(ctx, taskID)
+ if err != nil {
+ return nil, apperror.Internal("BACKUP_RECORD_LIST_FAILED", "无法获取备份记录", err)
+ }
+ if len(records) == 0 {
+ return nil, apperror.BadRequest("VERIFY_NO_SOURCE", "该任务尚无成功的备份记录可验证", nil)
+ }
+ return s.Start(ctx, records[0].ID, mode, triggeredBy)
+}
+
+// Start 触发一次验证。创建 VerificationRecord → 异步本地执行。
+func (s *VerificationService) Start(ctx context.Context, backupRecordID uint, mode, triggeredBy string) (*VerificationRecordDetail, error) {
+ record, err := s.records.FindByID(ctx, backupRecordID)
+ if err != nil {
+ return nil, apperror.Internal("BACKUP_RECORD_GET_FAILED", "无法获取备份记录", err)
+ }
+ if record == nil {
+ return nil, apperror.New(404, "BACKUP_RECORD_NOT_FOUND", "备份记录不存在", nil)
+ }
+ if record.Status != model.BackupRecordStatusSuccess {
+ return nil, apperror.BadRequest("VERIFY_SOURCE_INVALID", "只能验证状态为成功的备份记录", nil)
+ }
+ task, err := s.tasks.FindByID(ctx, record.TaskID)
+ if err != nil {
+ return nil, apperror.Internal("BACKUP_TASK_GET_FAILED", "无法获取关联任务", err)
+ }
+ if task == nil {
+ return nil, apperror.New(404, "BACKUP_TASK_NOT_FOUND", "关联的备份任务不存在", nil)
+ }
+ // 集群场景保护:跨节点 local_disk 备份 Master 取不到 → 拒绝并提示
+ if err := s.validateClusterAccessible(ctx, record); err != nil {
+ return nil, err
+ }
+ if mode == "" {
+ mode = model.VerificationModeQuick
+ }
+ mode = strings.ToLower(strings.TrimSpace(mode))
+ if mode != model.VerificationModeQuick && mode != model.VerificationModeDeep {
+ return nil, apperror.BadRequest("VERIFY_MODE_INVALID", "不支持的验证模式", nil)
+ }
+ startedAt := s.now()
+ verification := &model.VerificationRecord{
+ BackupRecordID: backupRecordID,
+ TaskID: record.TaskID,
+ NodeID: record.NodeID,
+ Mode: mode,
+ Status: model.VerificationRecordStatusRunning,
+ StartedAt: startedAt,
+ TriggeredBy: strings.TrimSpace(triggeredBy),
+ }
+ if err := s.verifications.Create(ctx, verification); err != nil {
+ return nil, apperror.Internal("VERIFY_RECORD_CREATE_FAILED", "无法创建验证记录", err)
+ }
+ run := func() {
+ s.executeLocally(context.Background(), verification.ID, task, record)
+ }
+ s.async(run)
+ return s.getDetail(ctx, verification.ID)
+}
+
+// validateClusterAccessible 复刻 BackupExecutionService 的跨节点 local_disk 保护。
+// 避免 Master 端在错误机器下载/校验到假数据。
+func (s *VerificationService) validateClusterAccessible(ctx context.Context, record *model.BackupRecord) error {
+ if record == nil || record.NodeID == 0 || s.nodeRepo == nil {
+ return nil
+ }
+ node, err := s.nodeRepo.FindByID(ctx, record.NodeID)
+ if err != nil || node == nil || node.IsLocal {
+ return nil
+ }
+ target, err := s.targets.FindByID(ctx, record.StorageTargetID)
+ if err != nil || target == nil {
+ return nil
+ }
+ if strings.EqualFold(target.Type, "local_disk") {
+ return apperror.BadRequest("VERIFY_CROSS_NODE_LOCAL_DISK",
+ fmt.Sprintf("备份位于节点 %s 的本地磁盘(local_disk),Master 无法跨节点验证。", node.Name),
+ nil)
+ }
+ return nil
+}
+
+// executeLocally 异步执行验证:下载 → 解密 → 解压 → 按类型校验。
+func (s *VerificationService) executeLocally(ctx context.Context, verID uint, task *model.BackupTask, backupRecord *model.BackupRecord) {
+ s.semaphore <- struct{}{}
+ defer func() { <-s.semaphore }()
+
+ logger := backup.NewExecutionLogger(verID, s.logHub)
+ status := model.VerificationRecordStatusFailed
+ errMessage := ""
+ summary := ""
+
+ defer func() {
+ _ = s.finalize(ctx, verID, status, errMessage, summary, logger.String())
+ s.logHub.Complete(verID, status)
+ // 失败时推送通知(best-effort)
+ if status == model.VerificationRecordStatusFailed && s.notifier != nil {
+ if record, err := s.verifications.FindByID(ctx, verID); err == nil && record != nil {
+ _ = s.notifier.NotifyVerificationResult(ctx, task, record)
+ }
+ }
+ }()
+
+ logger.Infof("开始验证备份记录 #%d(模式:%s)", backupRecord.ID, model.VerificationModeQuick)
+
+ if err := os.MkdirAll(s.tempDir, 0o755); err != nil {
+ errMessage = err.Error()
+ logger.Errorf("创建验证临时父目录失败:%v", err)
+ return
+ }
+ sandbox, err := os.MkdirTemp(s.tempDir, "verify-*")
+ if err != nil {
+ errMessage = err.Error()
+ logger.Errorf("创建沙箱目录失败:%v", err)
+ return
+ }
+ defer os.RemoveAll(sandbox)
+
+ target, err := s.targets.FindByID(ctx, backupRecord.StorageTargetID)
+ if err != nil || target == nil {
+ errMessage = "存储目标不可用"
+ logger.Errorf("获取存储目标失败:%v", err)
+ return
+ }
+ configMap := map[string]any{}
+ if err := s.cipher.DecryptJSON(target.ConfigCiphertext, &configMap); err != nil {
+ errMessage = err.Error()
+ logger.Errorf("解密存储配置失败:%v", err)
+ return
+ }
+ provider, err := s.storageRegistry.Create(ctx, target.Type, configMap)
+ if err != nil {
+ errMessage = err.Error()
+ logger.Errorf("创建存储客户端失败:%v", err)
+ return
+ }
+ fileName := backupRecord.FileName
+ if strings.TrimSpace(fileName) == "" {
+ fileName = filepath.Base(backupRecord.StoragePath)
+ }
+ artifactPath := filepath.Join(sandbox, filepath.Base(fileName))
+ logger.Infof("下载备份:%s", backupRecord.StoragePath)
+ reader, err := provider.Download(ctx, backupRecord.StoragePath)
+ if err != nil {
+ errMessage = err.Error()
+ logger.Errorf("下载备份失败:%v", err)
+ return
+ }
+ if err := writeReaderToFile(artifactPath, reader); err != nil {
+ errMessage = err.Error()
+ logger.Errorf("写入沙箱失败:%v", err)
+ return
+ }
+ preparedPath, err := s.prepareArtifact(artifactPath, logger)
+ if err != nil {
+ errMessage = err.Error()
+ logger.Errorf("准备归档失败:%v", err)
+ return
+ }
+ // 按任务类型分派校验
+ report, verifyErr := s.verifyByType(task.Type, preparedPath, backupRecord.Checksum, logger)
+ if verifyErr != nil {
+ errMessage = verifyErr.Error()
+ if report != nil && report.Detail != "" {
+ summary = report.Detail
+ }
+ logger.Errorf("验证未通过:%v", verifyErr)
+ return
+ }
+ status = model.VerificationRecordStatusSuccess
+ if report != nil {
+ summary = report.Detail
+ }
+ logger.Infof("验证通过:%s", summary)
+}
+
+// prepareArtifact 按后缀解密/解压,返回可读路径。
+func (s *VerificationService) prepareArtifact(artifactPath string, logger *backup.ExecutionLogger) (string, error) {
+ current := artifactPath
+ if strings.HasSuffix(strings.ToLower(current), ".enc") {
+ logger.Infof("检测到加密后缀,开始解密")
+ decrypted, err := backupcrypto.DecryptFile(s.cipher.Key(), current)
+ if err != nil {
+ return "", err
+ }
+ current = decrypted
+ }
+ if strings.HasSuffix(strings.ToLower(current), ".gz") {
+ logger.Infof("检测到 gzip,解压")
+ decompressed, err := compress.GunzipFile(current)
+ if err != nil {
+ return "", err
+ }
+ current = decompressed
+ }
+ return current, nil
+}
+
+// verifyByType 按任务类型分派到对应 Verify* 策略。
+func (s *VerificationService) verifyByType(taskType, artifactPath, checksum string, logger *backup.ExecutionLogger) (*backup.VerifyReport, error) {
+ switch strings.ToLower(strings.TrimSpace(taskType)) {
+ case "file":
+ logger.Infof("执行文件归档校验")
+ return backup.VerifyTarArchive(artifactPath, "")
+ case "sqlite":
+ logger.Infof("执行 SQLite 文件头校验")
+ return backup.VerifySQLiteFile(artifactPath)
+ case "mysql":
+ logger.Infof("执行 MySQL dump 校验")
+ return backup.VerifyMySQLDump(artifactPath)
+ case "postgresql":
+ logger.Infof("执行 PostgreSQL dump 校验")
+ return backup.VerifyPostgreSQLDump(artifactPath)
+ case "saphana":
+ logger.Infof("执行 SAP HANA 归档校验")
+ return backup.VerifySAPHANAArchive(artifactPath)
+ default:
+ return nil, fmt.Errorf("unsupported task type for verification: %s", taskType)
+ }
+}
+
+func (s *VerificationService) finalize(ctx context.Context, verID uint, status, errMessage, summary, logContent string) error {
+ record, err := s.verifications.FindByID(ctx, verID)
+ if err != nil {
+ return err
+ }
+ if record == nil {
+ return fmt.Errorf("verification record %d not found", verID)
+ }
+ completedAt := s.now()
+ record.Status = status
+ record.ErrorMessage = strings.TrimSpace(errMessage)
+ if strings.TrimSpace(summary) != "" {
+ record.Summary = summary
+ }
+ if strings.TrimSpace(logContent) != "" {
+ record.LogContent = strings.TrimSpace(logContent)
+ }
+ record.DurationSeconds = int(completedAt.Sub(record.StartedAt).Seconds())
+ record.CompletedAt = &completedAt
+ return s.verifications.Update(ctx, record)
+}
+
+func (s *VerificationService) Get(ctx context.Context, id uint) (*VerificationRecordDetail, error) {
+ return s.getDetail(ctx, id)
+}
+
+func (s *VerificationService) List(ctx context.Context, input VerificationRecordListInput) ([]VerificationRecordSummary, error) {
+ items, err := s.verifications.List(ctx, repository.VerificationRecordListOptions{
+ TaskID: input.TaskID,
+ BackupRecordID: input.BackupRecordID,
+ Status: strings.TrimSpace(input.Status),
+ DateFrom: input.DateFrom,
+ DateTo: input.DateTo,
+ Limit: input.Limit,
+ Offset: input.Offset,
+ })
+ if err != nil {
+ return nil, apperror.Internal("VERIFY_RECORD_LIST_FAILED", "无法获取验证记录列表", err)
+ }
+ result := make([]VerificationRecordSummary, 0, len(items))
+ for i := range items {
+ result = append(result, toVerificationSummary(&items[i]))
+ }
+ return result, nil
+}
+
+// LatestByTask 返回任务的最近一次验证记录(nil 表示未验证过)。
+// 用于任务详情页显示"最近验证状态"。
+func (s *VerificationService) LatestByTask(ctx context.Context, taskID uint) (*VerificationRecordSummary, error) {
+ item, err := s.verifications.FindLatestByTask(ctx, taskID)
+ if err != nil {
+ return nil, apperror.Internal("VERIFY_RECORD_GET_FAILED", "无法获取最新验证记录", err)
+ }
+ if item == nil {
+ return nil, nil
+ }
+ summary := toVerificationSummary(item)
+ return &summary, nil
+}
+
+func (s *VerificationService) SubscribeLogs(ctx context.Context, id uint, buffer int) (<-chan backup.LogEvent, func(), error) {
+ record, err := s.verifications.FindByID(ctx, id)
+ if err != nil {
+ return nil, nil, apperror.Internal("VERIFY_RECORD_GET_FAILED", "无法获取验证记录", err)
+ }
+ if record == nil {
+ return nil, nil, apperror.New(404, "VERIFY_RECORD_NOT_FOUND", "验证记录不存在", nil)
+ }
+ channel, cancel := s.logHub.Subscribe(id, buffer)
+ return channel, cancel, nil
+}
+
+func (s *VerificationService) getDetail(ctx context.Context, id uint) (*VerificationRecordDetail, error) {
+ record, err := s.verifications.FindByID(ctx, id)
+ if err != nil {
+ return nil, apperror.Internal("VERIFY_RECORD_GET_FAILED", "无法获取验证记录详情", err)
+ }
+ if record == nil {
+ return nil, apperror.New(404, "VERIFY_RECORD_NOT_FOUND", "验证记录不存在", nil)
+ }
+ detail := &VerificationRecordDetail{
+ VerificationRecordSummary: toVerificationSummary(record),
+ LogContent: record.LogContent,
+ }
+ if record.Status == model.VerificationRecordStatusRunning && s.logHub != nil {
+ events := s.logHub.Snapshot(record.ID)
+ detail.LogEvents = events
+ if len(events) > 0 {
+ lines := make([]string, 0, len(events))
+ for _, event := range events {
+ lines = append(lines, event.Message)
+ }
+ detail.LogContent = strings.Join(lines, "\n")
+ }
+ }
+ return detail, nil
+}
+
+func toVerificationSummary(item *model.VerificationRecord) VerificationRecordSummary {
+ summary := VerificationRecordSummary{
+ ID: item.ID,
+ BackupRecordID: item.BackupRecordID,
+ TaskID: item.TaskID,
+ TaskName: item.Task.Name,
+ NodeID: item.NodeID,
+ Mode: item.Mode,
+ Status: item.Status,
+ Summary: item.Summary,
+ ErrorMessage: item.ErrorMessage,
+ DurationSeconds: item.DurationSeconds,
+ StartedAt: item.StartedAt,
+ CompletedAt: item.CompletedAt,
+ TriggeredBy: item.TriggeredBy,
+ }
+ if strings.TrimSpace(item.BackupRecord.FileName) != "" {
+ summary.BackupFileName = item.BackupRecord.FileName
+ }
+ return summary
+}
diff --git a/web/src/components/backup-records/BackupRecordLogDrawer.tsx b/web/src/components/backup-records/BackupRecordLogDrawer.tsx
index ca38a79..1bc1540 100644
--- a/web/src/components/backup-records/BackupRecordLogDrawer.tsx
+++ b/web/src/components/backup-records/BackupRecordLogDrawer.tsx
@@ -1,9 +1,17 @@
-import { Alert, Button, Descriptions, Drawer, Space, Spin, Tag, Typography } from '@arco-design/web-react'
+import { Alert, Button, Descriptions, Drawer, Message, Space, Spin, Tag, Typography } from '@arco-design/web-react'
import { useEffect, useMemo, useState } from 'react'
-import { deleteBackupRecord, downloadBackupRecord, getBackupRecord, restoreBackupRecord, streamBackupRecordLogs } from '../../services/backup-records'
+import { useNavigate } from 'react-router-dom'
+import { deleteBackupRecord, downloadBackupRecord, getBackupRecord, streamBackupRecordLogs } from '../../services/backup-records'
+import { getBackupTask } from '../../services/backup-tasks'
+import { startRestoreFromBackup } from '../../services/restore-records'
+import { startVerifyByRecord } from '../../services/verification-records'
+import { useAuthStore } from '../../stores/auth'
+import { canWrite } from '../../utils/permissions'
import type { BackupLogEvent, BackupRecordDetail, BackupRecordStatus, StorageUploadResultItem } from '../../types/backup-records'
+import type { BackupTaskDetail } from '../../types/backup-tasks'
import { resolveErrorMessage } from '../../utils/error'
import { formatBytes, formatDateTime, formatDuration } from '../../utils/format'
+import { RestoreConfirmModal } from '../restore-records/RestoreConfirmModal'
interface BackupRecordLogDrawerProps {
visible: boolean
@@ -31,12 +39,20 @@ function buildLogText(record: BackupRecordDetail | null, events: BackupLogEvent[
}
export function BackupRecordLogDrawer({ visible, recordId, onCancel, onChanged }: BackupRecordLogDrawerProps) {
+ const navigate = useNavigate()
+ const currentUser = useAuthStore((state) => state.user)
+ const writable = canWrite(currentUser)
const [record, setRecord] = useState(null)
const [events, setEvents] = useState([])
const [loading, setLoading] = useState(false)
const [acting, setActing] = useState(false)
const [error, setError] = useState('')
const [streamError, setStreamError] = useState('')
+ const [restoreModalVisible, setRestoreModalVisible] = useState(false)
+ const [restoreTask, setRestoreTask] = useState(null)
+ const [restoreLoading, setRestoreLoading] = useState(false)
+ const [restorePreparing, setRestorePreparing] = useState(false)
+ const [verifyLoading, setVerifyLoading] = useState(false)
useEffect(() => {
if (!visible || !recordId) {
@@ -141,19 +157,57 @@ export function BackupRecordLogDrawer({ visible, recordId, onCancel, onChanged }
}
}
- async function handleRestore() {
+ // handleOpenRestore 准备恢复所需的任务上下文并打开确认弹窗。
+ // 只有在用户明确二次确认后,才会真正触发异步恢复流程。
+ async function handleOpenRestore() {
+ if (!record) {
+ return
+ }
+ setRestorePreparing(true)
+ try {
+ const task = await getBackupTask(record.taskId)
+ setRestoreTask(task)
+ setRestoreModalVisible(true)
+ } catch (prepareError) {
+ Message.error(resolveErrorMessage(prepareError, '加载任务信息失败'))
+ } finally {
+ setRestorePreparing(false)
+ }
+ }
+
+ // handleVerify 基于当前备份记录启动一次快速验证,验证结果在"验证演练"页面查看。
+ async function handleVerify() {
+ if (!recordId) return
+ setVerifyLoading(true)
+ try {
+ const verify = await startVerifyByRecord(recordId, 'quick')
+ Message.success('验证已启动,正在打开结果')
+ navigate(`/verify/records?verifyId=${verify.id}`)
+ onCancel()
+ } catch (e) {
+ Message.error(resolveErrorMessage(e, '启动验证失败'))
+ } finally {
+ setVerifyLoading(false)
+ }
+ }
+
+ async function handleConfirmRestore() {
if (!recordId) {
return
}
- setActing(true)
+ setRestoreLoading(true)
try {
- await restoreBackupRecord(recordId)
- setStreamError('恢复命令已提交')
+ const restore = await startRestoreFromBackup(recordId)
+ Message.success('恢复已启动,正在打开日志')
+ setRestoreModalVisible(false)
+ setRestoreTask(null)
await onChanged?.()
+ navigate(`/restore/records?restoreId=${restore.id}`)
+ onCancel()
} catch (restoreError) {
- setStreamError(resolveErrorMessage(restoreError, '恢复备份失败'))
+ Message.error(resolveErrorMessage(restoreError, '启动恢复失败'))
} finally {
- setActing(false)
+ setRestoreLoading(false)
}
}
@@ -214,12 +268,30 @@ export function BackupRecordLogDrawer({ visible, recordId, onCancel, onChanged }
-
-
+ {writable && (
+
+ )}
+ {writable && (
+
+ )}
+ {writable && (
+
+ )}
{record.storageUploadResults && record.storageUploadResults.length > 1 && (
@@ -240,6 +312,18 @@ export function BackupRecordLogDrawer({ visible, recordId, onCancel, onChanged }
) : null}
+ {
+ if (restoreLoading) return
+ setRestoreModalVisible(false)
+ setRestoreTask(null)
+ }}
+ onConfirm={() => void handleConfirmRestore()}
+ />
)
}
diff --git a/web/src/components/backup-tasks/BackupTaskFormDrawer.tsx b/web/src/components/backup-tasks/BackupTaskFormDrawer.tsx
index 09fa165..c4e10ff 100644
--- a/web/src/components/backup-tasks/BackupTaskFormDrawer.tsx
+++ b/web/src/components/backup-tasks/BackupTaskFormDrawer.tsx
@@ -5,6 +5,7 @@ import { CronInput } from '../CronInput'
import type { StorageTargetDetail, StorageTargetPayload, StorageTargetSummary } from '../../types/storage-targets'
import type { StorageConnectionTestResult } from '../../types/storage-targets'
import type { BackupTaskDetail, BackupTaskPayload, BackupTaskType } from '../../types/backup-tasks'
+import type { NodeSummary } from '../../types/nodes'
import { DatabasePicker } from '../common/DatabasePicker'
import { DirectoryPicker } from '../common/DirectoryPicker'
import { StorageTargetFormDrawer } from '../storage-targets/StorageTargetFormDrawer'
@@ -28,6 +29,9 @@ interface BackupTaskFormDrawerProps {
initialValue: BackupTaskDetail | null
storageTargets: StorageTargetSummary[]
localNodeId?: number
+ nodes?: NodeSummary[]
+ /** 系统内全部任务,用于上游依赖多选 */
+ allTasks?: { id: number; name: string }[]
onCancel: () => void
onSubmit: (value: BackupTaskPayload, taskId?: number) => Promise
onCreateStorageTarget?: (value: StorageTargetPayload) => Promise
@@ -61,10 +65,18 @@ function createEmptyDraft(storageTargets?: StorageTargetSummary[]): BackupTaskPa
encrypt: false,
maxBackups: 10,
extraConfig: undefined,
+ verifyEnabled: false,
+ verifyCronExpr: '',
+ verifyMode: 'quick',
+ slaHoursRpo: 0,
+ alertOnConsecutiveFails: 1,
+ replicationTargetIds: [],
+ maintenanceWindows: '',
+ dependsOnTaskIds: [],
}
}
-export function BackupTaskFormDrawer({ visible, loading, initialValue, storageTargets, localNodeId, onCancel, onSubmit, onCreateStorageTarget, onTestStorageTarget, onGoogleDriveAuth, onStorageTargetCreated }: BackupTaskFormDrawerProps) {
+export function BackupTaskFormDrawer({ visible, loading, initialValue, storageTargets, localNodeId, nodes, allTasks, onCancel, onSubmit, onCreateStorageTarget, onTestStorageTarget, onGoogleDriveAuth, onStorageTargetCreated }: BackupTaskFormDrawerProps) {
const [draft, setDraft] = useState(createEmptyDraft())
const [excludePatternsText, setExcludePatternsText] = useState('')
const [currentStep, setCurrentStep] = useState(0)
@@ -115,12 +127,20 @@ export function BackupTaskFormDrawer({ visible, loading, initialValue, storageTa
storageTargetId: editTargetIds[0] ?? 0,
storageTargetIds: editTargetIds,
nodeId: (initialValue as any).nodeId ?? 0,
- tags: (initialValue as any).tags ?? '',
+ tags: initialValue.tags ?? '',
retentionDays: initialValue.retentionDays,
compression: initialValue.compression,
encrypt: initialValue.encrypt,
maxBackups: initialValue.maxBackups,
extraConfig: initialValue.extraConfig,
+ verifyEnabled: initialValue.verifyEnabled ?? false,
+ verifyCronExpr: initialValue.verifyCronExpr ?? '',
+ verifyMode: (initialValue.verifyMode ?? 'quick') as 'quick' | 'deep',
+ slaHoursRpo: initialValue.slaHoursRpo ?? 0,
+ alertOnConsecutiveFails: initialValue.alertOnConsecutiveFails ?? 1,
+ replicationTargetIds: initialValue.replicationTargetIds ?? [],
+ maintenanceWindows: initialValue.maintenanceWindows ?? '',
+ dependsOnTaskIds: initialValue.dependsOnTaskIds ?? [],
})
setExcludePatternsText(initialValue.excludePatterns.join('\n'))
setCurrentStep(0)
@@ -142,6 +162,21 @@ export function BackupTaskFormDrawer({ visible, loading, initialValue, storageTa
[storageTargets],
)
+ // 执行节点选项:本地节点显示 "本机 (local)",远程节点带状态后缀
+ const nodeOptions = useMemo(() => {
+ const list = nodes ?? []
+ return [
+ { label: '本机 (Master)', value: 0 },
+ ...list
+ .filter((item) => !item.isLocal)
+ .map((item) => ({
+ label: `${item.name}${item.status === 'online' ? '' : '(离线)'}`,
+ value: item.id,
+ disabled: item.status !== 'online',
+ })),
+ ]
+ }, [nodes])
+
function updateDraft(patch: Partial) {
setDraft((current) => ({ ...current, ...patch }))
}
@@ -257,6 +292,17 @@ export function BackupTaskFormDrawer({ visible, loading, initialValue, storageTa
备份类型