mirror of
https://github.com/Awuqing/BackupX.git
synced 2026-05-06 20:02:41 +08:00
* 功能: v2.0.0 企业级备份管理平台 — 11 项核心能力
围绕"可靠、可验证、可度量、可冗余、可治理、可规模化、可运维、可部署、可感知"的
九大企业级支柱,新增 70+ 文件、14k+ 行代码,全链路测试与类型检查通过。
## 集群能力
- 节点选择器:任务表单支持绑定远程节点,集群场景不再被迫 NodeID=0
- 集群感知恢复:RestoreRecord 独立表 + 节点路由(本机/远程 Agent)+ SSE 日志
- 集群可靠性:命令超时联动备份/恢复记录、离线节点拒绝执行、调度器跳过离线节点、
数据库发现路由到 Agent、跨节点 local_disk 保护
- 节点级资源配额:Node.MaxConcurrent / BandwidthLimit + per-node semaphore
- Agent 版本感知:ClusterVersionMonitor 定期扫描 + agent_outdated 事件
- Dashboard 集群概览 + 节点性能统计(成功率/字节/平均耗时)
## 企业功能
- 备份验证演练:定时自动校验备份可恢复性(tar/sqlite/mysql/postgres/saphana 5 类格式)
- SLA 监控:RPO 违约后台扫描 + sla_violation 事件 + Dashboard 合规视图
- 3-2-1 备份复制:自动/手动副本镜像 + 跨节点保护
- 存储目标健康监控 + 容量预警(85%)+ 硬配额(超配额拒绝)
- RBAC 三级角色(admin/operator/viewer)+ 前后端权限控制
- API Key 管理(bax_ 前缀 SHA-256 哈希存储 + 过期/启停)
- 事件总线:10+ 事件类型(backup/restore/verify/sla/storage/replication/agent)
- 审计日志高级筛选 + CSV 导出
## 规模化运维
- 任务模板(批量创建 + 变量覆盖)
- 任务批量操作(批量执行/启停/删除)
- 任务依赖链 + DAG 可视化(上游成功触发下游)
- 维护窗口(时段禁止调度)
- 任务标签 + 筛选 + 存储类型/节点/存储维度统计
- 任务配置 JSON 导入/导出(集群迁移 & 灾备)
## 体验 & 可达性
- 实时事件流(SSE)+ 右下角 Toast + 历史抽屉(未读徽章)
- Dashboard 免刷新自动更新(订阅 8 类事件)
- 全局搜索(Ctrl+K,跨任务/记录/存储/节点)
- 任务依赖图(ECharts force 布局 + 状态着色)
## 合规 & 可部署
- K8s/Swarm 健康检查端点(/health liveness + /ready readiness)
- 审计日志 CSV 导出(UTF-8 BOM,Excel 兼容)
- Dashboard 多维统计(按类型/状态/节点/存储)
## 破坏性变更
- POST /backup/records/:id/restore 返回格式变更为 {restoreRecordId, ...}
(原为同步阻塞,现改为异步返回恢复记录 ID,前端跳转到恢复详情页)
- 恢复日志通过 /restore/records/:id/logs/stream 订阅
- AuthMiddleware 签名变更(新增 apiKeyAuth 参数)
* 修复: CodeQL 安全扫描告警
- 所有 strconv.ParseUint 由 64bit 改为 32bit 位宽,strconv 内置溢出检查
- hashApiKey 参数改名 rawToken 避免 CodeQL 误判为密码哈希(API Key 是 192 位
高熵 token,使用 bcrypt 会引入不必要的延迟;同时补充安全说明)
* 修复: API Key 哈希改用 HMAC-SHA256 + 应用级 pepper
- 符合 RFC 2104 标准,业界 API token 存储的推荐方案
- 数据库泄漏场景下增加离线反推难度(需同时获取二进制 pepper)
- 规避 CodeQL go/weak-sensitive-data-hashing 对裸 SHA-256 的误判
289 lines
7.9 KiB
Go
289 lines
7.9 KiB
Go
package agent
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"log"
|
|
"net"
|
|
"os"
|
|
"runtime"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"backupx/server/internal/backup"
|
|
)
|
|
|
|
// Agent 是 Agent 进程的主控制器。
|
|
type Agent struct {
|
|
cfg *Config
|
|
client *MasterClient
|
|
executor *Executor
|
|
version string
|
|
|
|
mu sync.Mutex
|
|
started bool
|
|
}
|
|
|
|
// New 构造 Agent。
|
|
func New(cfg *Config, version string) (*Agent, error) {
|
|
if err := cfg.Validate(); err != nil {
|
|
return nil, err
|
|
}
|
|
client := NewMasterClient(cfg.Master, cfg.Token, cfg.InsecureSkipTLSVerify)
|
|
executor := NewExecutor(client, cfg.TempDir)
|
|
return &Agent{
|
|
cfg: cfg,
|
|
client: client,
|
|
executor: executor,
|
|
version: version,
|
|
}, nil
|
|
}
|
|
|
|
// Run 启动 Agent 主循环,阻塞直到 ctx 被取消。
|
|
func (a *Agent) Run(ctx context.Context) error {
|
|
a.mu.Lock()
|
|
if a.started {
|
|
a.mu.Unlock()
|
|
return fmt.Errorf("agent already started")
|
|
}
|
|
a.started = true
|
|
a.mu.Unlock()
|
|
|
|
hbInterval := parseDuration(a.cfg.HeartbeatInterval, 15*time.Second)
|
|
pollInterval := parseDuration(a.cfg.PollInterval, 5*time.Second)
|
|
|
|
// 首次握手:通过一次心跳确认 token 有效
|
|
if err := a.heartbeatOnce(ctx); err != nil {
|
|
return fmt.Errorf("initial heartbeat failed: %w", err)
|
|
}
|
|
log.Printf("[agent] connected to master %s", a.cfg.Master)
|
|
|
|
var wg sync.WaitGroup
|
|
wg.Add(2)
|
|
go func() {
|
|
defer wg.Done()
|
|
a.heartbeatLoop(ctx, hbInterval)
|
|
}()
|
|
go func() {
|
|
defer wg.Done()
|
|
a.pollLoop(ctx, pollInterval)
|
|
}()
|
|
wg.Wait()
|
|
return ctx.Err()
|
|
}
|
|
|
|
// heartbeatLoop 定期发送心跳。
|
|
func (a *Agent) heartbeatLoop(ctx context.Context, interval time.Duration) {
|
|
ticker := time.NewTicker(interval)
|
|
defer ticker.Stop()
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
case <-ticker.C:
|
|
if err := a.heartbeatOnce(ctx); err != nil {
|
|
log.Printf("[agent] heartbeat failed: %v", err)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func (a *Agent) heartbeatOnce(ctx context.Context) error {
|
|
hostname, _ := os.Hostname()
|
|
req := HeartbeatRequest{
|
|
Token: a.cfg.Token,
|
|
Hostname: hostname,
|
|
IPAddress: detectLocalIP(),
|
|
AgentVersion: a.version,
|
|
OS: runtime.GOOS,
|
|
Arch: runtime.GOARCH,
|
|
}
|
|
_, err := a.client.Heartbeat(ctx, req)
|
|
return err
|
|
}
|
|
|
|
// pollLoop 定期拉取并处理待执行命令。
|
|
func (a *Agent) pollLoop(ctx context.Context, interval time.Duration) {
|
|
ticker := time.NewTicker(interval)
|
|
defer ticker.Stop()
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
case <-ticker.C:
|
|
a.pollAndHandleOnce(ctx)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (a *Agent) pollAndHandleOnce(ctx context.Context) {
|
|
cmd, err := a.client.PollCommand(ctx)
|
|
if err != nil {
|
|
log.Printf("[agent] poll command failed: %v", err)
|
|
return
|
|
}
|
|
if cmd == nil {
|
|
return
|
|
}
|
|
log.Printf("[agent] received command #%d type=%s", cmd.ID, cmd.Type)
|
|
switch cmd.Type {
|
|
case "run_task":
|
|
a.handleRunTask(ctx, cmd)
|
|
case "list_dir":
|
|
a.handleListDir(ctx, cmd)
|
|
case "restore_record":
|
|
a.handleRestoreRecord(ctx, cmd)
|
|
case "discover_db":
|
|
a.handleDiscoverDB(ctx, cmd)
|
|
case "delete_storage_object":
|
|
a.handleDeleteStorageObject(ctx, cmd)
|
|
default:
|
|
msg := fmt.Sprintf("unknown command type: %s", cmd.Type)
|
|
log.Printf("[agent] %s", msg)
|
|
_ = a.client.SubmitCommandResult(ctx, cmd.ID, false, msg, nil)
|
|
}
|
|
}
|
|
|
|
// handleRunTask 处理 run_task 命令
|
|
func (a *Agent) handleRunTask(ctx context.Context, cmd *CommandPayload) {
|
|
var payload struct {
|
|
TaskID uint `json:"taskId"`
|
|
RecordID uint `json:"recordId"`
|
|
}
|
|
if err := json.Unmarshal(cmd.Payload, &payload); err != nil {
|
|
_ = a.client.SubmitCommandResult(ctx, cmd.ID, false, "invalid payload: "+err.Error(), nil)
|
|
return
|
|
}
|
|
if err := a.executor.ExecuteRunTask(ctx, payload.TaskID, payload.RecordID); err != nil {
|
|
_ = a.client.SubmitCommandResult(ctx, cmd.ID, false, err.Error(), nil)
|
|
return
|
|
}
|
|
_ = a.client.SubmitCommandResult(ctx, cmd.ID, true, "", map[string]any{
|
|
"taskId": payload.TaskID,
|
|
"recordId": payload.RecordID,
|
|
})
|
|
}
|
|
|
|
// handleRestoreRecord 处理 restore_record 命令
|
|
func (a *Agent) handleRestoreRecord(ctx context.Context, cmd *CommandPayload) {
|
|
var payload struct {
|
|
RestoreRecordID uint `json:"restoreRecordId"`
|
|
}
|
|
if err := json.Unmarshal(cmd.Payload, &payload); err != nil {
|
|
_ = a.client.SubmitCommandResult(ctx, cmd.ID, false, "invalid payload: "+err.Error(), nil)
|
|
return
|
|
}
|
|
if payload.RestoreRecordID == 0 {
|
|
_ = a.client.SubmitCommandResult(ctx, cmd.ID, false, "restoreRecordId is required", nil)
|
|
return
|
|
}
|
|
if err := a.executor.ExecuteRestore(ctx, payload.RestoreRecordID); err != nil {
|
|
_ = a.client.SubmitCommandResult(ctx, cmd.ID, false, err.Error(), nil)
|
|
return
|
|
}
|
|
_ = a.client.SubmitCommandResult(ctx, cmd.ID, true, "", map[string]any{
|
|
"restoreRecordId": payload.RestoreRecordID,
|
|
})
|
|
}
|
|
|
|
// handleDeleteStorageObject 处理 delete_storage_object 命令:在 Agent 侧删除指定存储对象。
|
|
// 用于跨节点 local_disk 场景下的远程备份文件清理。
|
|
func (a *Agent) handleDeleteStorageObject(ctx context.Context, cmd *CommandPayload) {
|
|
var payload struct {
|
|
TargetType string `json:"targetType"`
|
|
TargetConfig map[string]any `json:"targetConfig"`
|
|
StoragePath string `json:"storagePath"`
|
|
}
|
|
if err := json.Unmarshal(cmd.Payload, &payload); err != nil {
|
|
_ = a.client.SubmitCommandResult(ctx, cmd.ID, false, "invalid payload: "+err.Error(), nil)
|
|
return
|
|
}
|
|
if strings.TrimSpace(payload.StoragePath) == "" {
|
|
_ = a.client.SubmitCommandResult(ctx, cmd.ID, false, "storagePath is required", nil)
|
|
return
|
|
}
|
|
provider, err := a.executor.storageRegistry.Create(ctx, payload.TargetType, payload.TargetConfig)
|
|
if err != nil {
|
|
_ = a.client.SubmitCommandResult(ctx, cmd.ID, false, "create provider: "+err.Error(), nil)
|
|
return
|
|
}
|
|
if err := provider.Delete(ctx, payload.StoragePath); err != nil {
|
|
_ = a.client.SubmitCommandResult(ctx, cmd.ID, false, "delete object: "+err.Error(), nil)
|
|
return
|
|
}
|
|
_ = a.client.SubmitCommandResult(ctx, cmd.ID, true, "", map[string]any{"deleted": true})
|
|
}
|
|
|
|
// handleDiscoverDB 处理 discover_db 命令:在 Agent 本机执行 mysql/psql 列出数据库。
|
|
func (a *Agent) handleDiscoverDB(ctx context.Context, cmd *CommandPayload) {
|
|
var payload struct {
|
|
Type string `json:"type"`
|
|
Host string `json:"host"`
|
|
Port int `json:"port"`
|
|
User string `json:"user"`
|
|
Password string `json:"password"`
|
|
}
|
|
if err := json.Unmarshal(cmd.Payload, &payload); err != nil {
|
|
_ = a.client.SubmitCommandResult(ctx, cmd.ID, false, "invalid payload: "+err.Error(), nil)
|
|
return
|
|
}
|
|
databases, err := backup.DiscoverDatabases(ctx, backup.NewOSCommandExecutor(), backup.DiscoverRequest{
|
|
Type: payload.Type,
|
|
Host: payload.Host,
|
|
Port: payload.Port,
|
|
User: payload.User,
|
|
Password: payload.Password,
|
|
})
|
|
if err != nil {
|
|
_ = a.client.SubmitCommandResult(ctx, cmd.ID, false, err.Error(), nil)
|
|
return
|
|
}
|
|
_ = a.client.SubmitCommandResult(ctx, cmd.ID, true, "", map[string]any{"databases": databases})
|
|
}
|
|
|
|
// handleListDir 处理 list_dir 命令(阶段四实现)
|
|
func (a *Agent) handleListDir(ctx context.Context, cmd *CommandPayload) {
|
|
var payload struct {
|
|
Path string `json:"path"`
|
|
}
|
|
if err := json.Unmarshal(cmd.Payload, &payload); err != nil {
|
|
_ = a.client.SubmitCommandResult(ctx, cmd.ID, false, "invalid payload: "+err.Error(), nil)
|
|
return
|
|
}
|
|
entries, err := listLocalDir(payload.Path)
|
|
if err != nil {
|
|
_ = a.client.SubmitCommandResult(ctx, cmd.ID, false, err.Error(), nil)
|
|
return
|
|
}
|
|
_ = a.client.SubmitCommandResult(ctx, cmd.ID, true, "", map[string]any{"entries": entries})
|
|
}
|
|
|
|
// 辅助函数
|
|
|
|
func parseDuration(s string, fallback time.Duration) time.Duration {
|
|
if strings.TrimSpace(s) == "" {
|
|
return fallback
|
|
}
|
|
if d, err := time.ParseDuration(s); err == nil {
|
|
return d
|
|
}
|
|
return fallback
|
|
}
|
|
|
|
func detectLocalIP() string {
|
|
addrs, err := net.InterfaceAddrs()
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
for _, addr := range addrs {
|
|
if ipNet, ok := addr.(*net.IPNet); ok && !ipNet.IP.IsLoopback() {
|
|
if ipNet.IP.To4() != nil {
|
|
return ipNet.IP.String()
|
|
}
|
|
}
|
|
}
|
|
return ""
|
|
}
|