功能: 修复并实现多节点集群部署 (#38)

基础修复:
- 新增节点离线检测:每 15s 扫描,超 45s 未心跳的远程节点自动置离线
- 节点删除前检查关联任务,避免孤立备份任务
- BackupTaskRepository 新增 CountByNodeID/ListByNodeID

Master 端 Agent 协议:
- 新增 AgentCommand 模型与命令队列仓储(pending/dispatched/succeeded/failed/timeout)
- 新增 AgentService:任务下发、命令轮询、结果回收、超时扫描
- 新增专用 Agent HTTP API(X-Agent-Token 认证):
  /api/agent/heartbeat
  /api/agent/commands/poll
  /api/agent/commands/:id/result
  /api/agent/tasks/:id
  /api/agent/records/:id
- BackupExecutionService 支持 node 路由:task.NodeID 指向远程节点时自动入队派发

Agent CLI(backupx agent 子命令):
- 配置:YAML 文件 / 环境变量 / CLI 参数,优先级 CLI > 文件 > 环境
- 心跳循环 + 命令轮询循环 + 优雅退出
- 本地复用 BackupRunner 与 storage registry 执行备份并直接上传
- 支持 run_task 和 list_dir 两种命令

远程目录浏览:
- NodeService 支持通过 Agent RPC 列出远程节点目录(15s 超时)

前端:
- NodesPage 添加节点后展示 Agent 启动命令和环境变量配置

文档:
- README 中英文重写"多节点集群"章节,含架构图、步骤、限制、CLI 参考
This commit is contained in:
Wu Qing
2026-04-17 12:29:08 +08:00
committed by GitHub
parent e04774ff68
commit 757b0fa5ed
27 changed files with 2224 additions and 24 deletions

View File

@@ -0,0 +1,266 @@
package agent
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"time"
"backupx/server/internal/backup"
"backupx/server/internal/storage"
storageRclone "backupx/server/internal/storage/rclone"
"backupx/server/pkg/compress"
)
// Executor 负责在 Agent 本地执行命令。
type Executor struct {
client *MasterClient
tempDir string
backupRegistry *backup.Registry
storageRegistry *storage.Registry
}
// NewExecutor 构造执行器。预先初始化 backup runner 与 storage registry。
func NewExecutor(client *MasterClient, tempDir string) *Executor {
backupRegistry := backup.NewRegistry(
backup.NewFileRunner(),
backup.NewSQLiteRunner(),
backup.NewMySQLRunner(nil),
backup.NewPostgreSQLRunner(nil),
backup.NewSAPHANARunner(nil),
)
storageRegistry := storage.NewRegistry(
storageRclone.NewLocalDiskFactory(),
storageRclone.NewS3Factory(),
storageRclone.NewWebDAVFactory(),
storageRclone.NewGoogleDriveFactory(),
storageRclone.NewAliyunOSSFactory(),
storageRclone.NewTencentCOSFactory(),
storageRclone.NewQiniuKodoFactory(),
storageRclone.NewFTPFactory(),
storageRclone.NewRcloneFactory(),
)
storageRclone.RegisterAllBackends(storageRegistry)
return &Executor{
client: client,
tempDir: tempDir,
backupRegistry: backupRegistry,
storageRegistry: storageRegistry,
}
}
// ExecuteRunTask 处理 run_task 命令:拉规格 → 执行 runner → 压缩 → 上传 → 上报记录。
//
// 注意Agent 当前不支持 Encrypt=true加密密钥不下发到 Agent避免密钥扩散
// 遇到启用加密的任务会向 Master 上报失败并返回错误。
func (e *Executor) ExecuteRunTask(ctx context.Context, taskID, recordID uint) error {
// 1) 拉取任务规格
spec, err := e.client.GetTaskSpec(ctx, taskID)
if err != nil {
e.reportRecordFailure(ctx, recordID, fmt.Sprintf("拉取任务规格失败: %v", err))
return err
}
if spec.Encrypt {
msg := "Agent 不支持加密备份(加密密钥仅在 Master 端持有)"
e.reportRecordFailure(ctx, recordID, msg)
return fmt.Errorf("%s", msg)
}
e.appendLog(ctx, recordID, fmt.Sprintf("[agent] 开始执行任务 %s (type=%s)\n", spec.Name, spec.Type))
// 2) 构造 backup.TaskSpec 并找对应 runner
startedAt := time.Now().UTC()
if err := os.MkdirAll(e.tempDir, 0o755); err != nil {
e.reportRecordFailure(ctx, recordID, fmt.Sprintf("创建临时目录失败: %v", err))
return err
}
backupSpec := buildBackupTaskSpec(spec, startedAt, e.tempDir)
runner, err := e.backupRegistry.Runner(backupSpec.Type)
if err != nil {
e.reportRecordFailure(ctx, recordID, fmt.Sprintf("不支持的备份类型: %v", err))
return err
}
// 3) 运行 runner
logger := newRecordLogger(ctx, e.client, recordID)
result, err := runner.Run(ctx, backupSpec, logger)
if err != nil {
e.reportRecordFailure(ctx, recordID, err.Error())
return err
}
defer os.RemoveAll(result.TempDir)
// 4) 可选 gzip 压缩
finalPath := result.ArtifactPath
if strings.EqualFold(spec.Compression, "gzip") && !strings.HasSuffix(strings.ToLower(finalPath), ".gz") {
e.appendLog(ctx, recordID, "[agent] 开始压缩备份文件\n")
compressedPath, compressErr := compress.GzipFile(finalPath)
if compressErr != nil {
e.reportRecordFailure(ctx, recordID, fmt.Sprintf("压缩失败: %v", compressErr))
return compressErr
}
finalPath = compressedPath
}
info, err := os.Stat(finalPath)
if err != nil {
e.reportRecordFailure(ctx, recordID, fmt.Sprintf("获取文件信息失败: %v", err))
return err
}
fileName := filepath.Base(finalPath)
fileSize := info.Size()
storagePath := backup.BuildStorageKey(spec.Type, startedAt, fileName)
// 5) 计算 checksum一次读一次并上传到所有目标
checksum, err := computeFileSHA256(finalPath)
if err != nil {
e.reportRecordFailure(ctx, recordID, fmt.Sprintf("计算 checksum 失败: %v", err))
return err
}
if len(spec.StorageTargets) == 0 {
e.reportRecordFailure(ctx, recordID, "没有关联的存储目标")
return fmt.Errorf("no storage targets")
}
for _, target := range spec.StorageTargets {
if err := e.uploadToTarget(ctx, recordID, target, finalPath, storagePath, fileSize, spec.TaskID); err != nil {
e.reportRecordFailure(ctx, recordID, fmt.Sprintf("上传到 %s 失败: %v", target.Name, err))
return err
}
e.appendLog(ctx, recordID, fmt.Sprintf("[agent] 已上传到存储目标 %s\n", target.Name))
}
// 6) 上报最终成功
return e.client.UpdateRecord(ctx, recordID, RecordUpdate{
Status: "success",
FileName: fileName,
FileSize: fileSize,
Checksum: checksum,
StoragePath: storagePath,
LogAppend: fmt.Sprintf("[agent] 任务完成,总计 %d 字节\n", fileSize),
})
}
// uploadToTarget 上传单个目标。为保持简化不做上传级重试rclone 本身已有 low-level 重试)。
func (e *Executor) uploadToTarget(ctx context.Context, recordID uint, target StorageTargetConfig, filePath, objectKey string, fileSize int64, taskID uint) error {
var rawConfig map[string]any
if len(target.Config) > 0 {
// DecodeRawConfig 通过 json 解析
if err := jsonUnmarshalMap(target.Config, &rawConfig); err != nil {
return fmt.Errorf("parse storage config: %w", err)
}
}
provider, err := e.storageRegistry.Create(ctx, target.Type, rawConfig)
if err != nil {
return fmt.Errorf("create provider: %w", err)
}
f, err := os.Open(filePath)
if err != nil {
return fmt.Errorf("open artifact: %w", err)
}
defer f.Close()
meta := map[string]string{
"taskId": fmt.Sprintf("%d", taskID),
"recordId": fmt.Sprintf("%d", recordID),
}
return provider.Upload(ctx, objectKey, f, fileSize, meta)
}
// appendLog 追加日志到 Master 记录(尽力而为,失败不中断主流程)
func (e *Executor) appendLog(ctx context.Context, recordID uint, line string) {
_ = e.client.UpdateRecord(ctx, recordID, RecordUpdate{LogAppend: line})
}
// reportRecordFailure 上报失败状态
func (e *Executor) reportRecordFailure(ctx context.Context, recordID uint, msg string) {
_ = e.client.UpdateRecord(ctx, recordID, RecordUpdate{
Status: "failed",
ErrorMessage: msg,
LogAppend: fmt.Sprintf("[agent] 错误: %s\n", msg),
})
}
// buildBackupTaskSpec 把 AgentTaskSpec 转换为 backup.TaskSpec。
func buildBackupTaskSpec(spec *TaskSpec, startedAt time.Time, tempDir string) backup.TaskSpec {
var sourcePaths []string
if strings.TrimSpace(spec.SourcePaths) != "" {
for _, p := range strings.Split(spec.SourcePaths, "\n") {
if p = strings.TrimSpace(p); p != "" {
sourcePaths = append(sourcePaths, p)
}
}
}
var excludes []string
if strings.TrimSpace(spec.ExcludePatterns) != "" {
for _, p := range strings.Split(spec.ExcludePatterns, "\n") {
if p = strings.TrimSpace(p); p != "" {
excludes = append(excludes, p)
}
}
}
return backup.TaskSpec{
ID: spec.TaskID,
Name: spec.Name,
Type: spec.Type,
SourcePath: spec.SourcePath,
SourcePaths: sourcePaths,
ExcludePatterns: excludes,
Database: backup.DatabaseSpec{
Host: spec.DBHost,
Port: spec.DBPort,
User: spec.DBUser,
Password: spec.DBPassword,
Path: spec.DBPath,
Names: splitCommaOrNewline(spec.DBName),
},
Compression: spec.Compression,
Encrypt: spec.Encrypt,
StartedAt: startedAt,
TempDir: tempDir,
}
}
// recordLogger 把 runner 日志回传到 Master 记录。
// 实现 backup.LogWriter每条日志追加到 record.log_content。
type recordLogger struct {
ctx context.Context
client *MasterClient
recordID uint
}
func newRecordLogger(ctx context.Context, client *MasterClient, recordID uint) *recordLogger {
return &recordLogger{ctx: ctx, client: client, recordID: recordID}
}
func (l *recordLogger) WriteLine(message string) {
_ = l.client.UpdateRecord(l.ctx, l.recordID, RecordUpdate{LogAppend: message + "\n"})
}
// 辅助函数
func computeFileSHA256(path string) (string, error) {
f, err := os.Open(path)
if err != nil {
return "", err
}
defer f.Close()
h := sha256.New()
if _, err := io.Copy(h, f); err != nil {
return "", err
}
return hex.EncodeToString(h.Sum(nil)), nil
}
func splitCommaOrNewline(s string) []string {
var result []string
for _, part := range strings.FieldsFunc(s, func(r rune) bool {
return r == ',' || r == '\n' || r == ';'
}) {
if p := strings.TrimSpace(part); p != "" {
result = append(result, p)
}
}
return result
}