mirror of
https://github.com/Awuqing/BackupX.git
synced 2026-05-11 18:10:23 +08:00
基础修复: - 新增节点离线检测:每 15s 扫描,超 45s 未心跳的远程节点自动置离线 - 节点删除前检查关联任务,避免孤立备份任务 - BackupTaskRepository 新增 CountByNodeID/ListByNodeID Master 端 Agent 协议: - 新增 AgentCommand 模型与命令队列仓储(pending/dispatched/succeeded/failed/timeout) - 新增 AgentService:任务下发、命令轮询、结果回收、超时扫描 - 新增专用 Agent HTTP API(X-Agent-Token 认证): /api/agent/heartbeat /api/agent/commands/poll /api/agent/commands/:id/result /api/agent/tasks/:id /api/agent/records/:id - BackupExecutionService 支持 node 路由:task.NodeID 指向远程节点时自动入队派发 Agent CLI(backupx agent 子命令): - 配置:YAML 文件 / 环境变量 / CLI 参数,优先级 CLI > 文件 > 环境 - 心跳循环 + 命令轮询循环 + 优雅退出 - 本地复用 BackupRunner 与 storage registry 执行备份并直接上传 - 支持 run_task 和 list_dir 两种命令 远程目录浏览: - NodeService 支持通过 Agent RPC 列出远程节点目录(15s 超时) 前端: - NodesPage 添加节点后展示 Agent 启动命令和环境变量配置 文档: - README 中英文重写"多节点集群"章节,含架构图、步骤、限制、CLI 参考
349 lines
11 KiB
Go
349 lines
11 KiB
Go
package service
|
||
|
||
import (
|
||
"context"
|
||
"encoding/json"
|
||
"errors"
|
||
"fmt"
|
||
"strings"
|
||
"time"
|
||
|
||
"backupx/server/internal/apperror"
|
||
"backupx/server/internal/model"
|
||
"backupx/server/internal/repository"
|
||
"backupx/server/internal/storage/codec"
|
||
)
|
||
|
||
// AgentService 实现 Master 端 Agent 协议,提供给远程 Agent 通过 HTTP 调用。
|
||
// 所有方法使用 Agent Token 进行节点认证,避免暴露 JWT 给 Agent。
|
||
type AgentService struct {
|
||
nodeRepo repository.NodeRepository
|
||
taskRepo repository.BackupTaskRepository
|
||
recordRepo repository.BackupRecordRepository
|
||
storageRepo repository.StorageTargetRepository
|
||
cmdRepo repository.AgentCommandRepository
|
||
cipher *codec.ConfigCipher
|
||
}
|
||
|
||
func NewAgentService(
|
||
nodeRepo repository.NodeRepository,
|
||
taskRepo repository.BackupTaskRepository,
|
||
recordRepo repository.BackupRecordRepository,
|
||
storageRepo repository.StorageTargetRepository,
|
||
cmdRepo repository.AgentCommandRepository,
|
||
cipher *codec.ConfigCipher,
|
||
) *AgentService {
|
||
return &AgentService{
|
||
nodeRepo: nodeRepo,
|
||
taskRepo: taskRepo,
|
||
recordRepo: recordRepo,
|
||
storageRepo: storageRepo,
|
||
cmdRepo: cmdRepo,
|
||
cipher: cipher,
|
||
}
|
||
}
|
||
|
||
// AuthenticatedNode 通过 token 解析并返回节点。失败返回 401。
|
||
func (s *AgentService) AuthenticatedNode(ctx context.Context, token string) (*model.Node, error) {
|
||
if strings.TrimSpace(token) == "" {
|
||
return nil, apperror.Unauthorized("NODE_INVALID_TOKEN", "缺少认证令牌", nil)
|
||
}
|
||
node, err := s.nodeRepo.FindByToken(ctx, token)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
if node == nil {
|
||
return nil, apperror.Unauthorized("NODE_INVALID_TOKEN", "无效的节点认证令牌", nil)
|
||
}
|
||
return node, nil
|
||
}
|
||
|
||
// AgentCommandPayload 给 Agent 返回的命令描述
|
||
type AgentCommandPayload struct {
|
||
ID uint `json:"id"`
|
||
Type string `json:"type"`
|
||
Payload json.RawMessage `json:"payload,omitempty"`
|
||
}
|
||
|
||
// PollCommand 为指定节点拉取一条 pending 命令;无命令时返回 (nil, nil)。
|
||
func (s *AgentService) PollCommand(ctx context.Context, node *model.Node) (*AgentCommandPayload, error) {
|
||
cmd, err := s.cmdRepo.ClaimPending(ctx, node.ID)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
if cmd == nil {
|
||
return nil, nil
|
||
}
|
||
return &AgentCommandPayload{
|
||
ID: cmd.ID,
|
||
Type: cmd.Type,
|
||
Payload: json.RawMessage(cmd.Payload),
|
||
}, nil
|
||
}
|
||
|
||
// AgentCommandResult Agent 上报命令执行结果
|
||
type AgentCommandResult struct {
|
||
Success bool `json:"success"`
|
||
ErrorMessage string `json:"errorMessage,omitempty"`
|
||
Result json.RawMessage `json:"result,omitempty"`
|
||
}
|
||
|
||
// SubmitCommandResult 接收 Agent 上报的命令结果。
|
||
func (s *AgentService) SubmitCommandResult(ctx context.Context, node *model.Node, cmdID uint, result AgentCommandResult) error {
|
||
cmd, err := s.cmdRepo.FindByID(ctx, cmdID)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
if cmd == nil {
|
||
return apperror.New(404, "AGENT_COMMAND_NOT_FOUND", "命令不存在", fmt.Errorf("command %d not found", cmdID))
|
||
}
|
||
if cmd.NodeID != node.ID {
|
||
return apperror.Unauthorized("AGENT_COMMAND_FORBIDDEN", "命令不属于当前节点", nil)
|
||
}
|
||
now := time.Now().UTC()
|
||
if result.Success {
|
||
cmd.Status = model.AgentCommandStatusSucceeded
|
||
} else {
|
||
cmd.Status = model.AgentCommandStatusFailed
|
||
}
|
||
cmd.ErrorMessage = result.ErrorMessage
|
||
if len(result.Result) > 0 {
|
||
cmd.Result = string(result.Result)
|
||
}
|
||
cmd.CompletedAt = &now
|
||
return s.cmdRepo.Update(ctx, cmd)
|
||
}
|
||
|
||
// AgentTaskSpec 给 Agent 返回的任务规格,包含解密后的存储配置,供 Agent 直接执行。
|
||
// 敏感信息:此接口仅供 Agent 调用(token 认证),避免通过公共 API 泄露。
|
||
type AgentTaskSpec struct {
|
||
TaskID uint `json:"taskId"`
|
||
Name string `json:"name"`
|
||
Type string `json:"type"`
|
||
SourcePath string `json:"sourcePath,omitempty"`
|
||
SourcePaths string `json:"sourcePaths,omitempty"`
|
||
ExcludePatterns string `json:"excludePatterns,omitempty"`
|
||
DBHost string `json:"dbHost,omitempty"`
|
||
DBPort int `json:"dbPort,omitempty"`
|
||
DBUser string `json:"dbUser,omitempty"`
|
||
DBPassword string `json:"dbPassword,omitempty"`
|
||
DBName string `json:"dbName,omitempty"`
|
||
DBPath string `json:"dbPath,omitempty"`
|
||
ExtraConfig string `json:"extraConfig,omitempty"`
|
||
Compression string `json:"compression"`
|
||
Encrypt bool `json:"encrypt"`
|
||
StorageTargets []AgentStorageTargetConfig `json:"storageTargets"`
|
||
}
|
||
|
||
// AgentStorageTargetConfig 存储目标配置(已解密)
|
||
type AgentStorageTargetConfig struct {
|
||
ID uint `json:"id"`
|
||
Type string `json:"type"`
|
||
Name string `json:"name"`
|
||
Config json.RawMessage `json:"config"`
|
||
}
|
||
|
||
// GetTaskSpec 返回 Agent 执行任务所需的完整规格。
|
||
func (s *AgentService) GetTaskSpec(ctx context.Context, node *model.Node, taskID uint) (*AgentTaskSpec, error) {
|
||
task, err := s.taskRepo.FindByID(ctx, taskID)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
if task == nil {
|
||
return nil, apperror.New(404, "BACKUP_TASK_NOT_FOUND", "任务不存在", nil)
|
||
}
|
||
if task.NodeID != node.ID {
|
||
return nil, apperror.Unauthorized("BACKUP_TASK_FORBIDDEN", "任务不属于当前节点", nil)
|
||
}
|
||
// 解密数据库密码(若有)
|
||
dbPassword := ""
|
||
if task.DBPasswordCiphertext != "" {
|
||
plain, decErr := s.cipher.Decrypt(task.DBPasswordCiphertext)
|
||
if decErr != nil {
|
||
return nil, fmt.Errorf("decrypt db password: %w", decErr)
|
||
}
|
||
dbPassword = string(plain)
|
||
}
|
||
// 解密存储目标配置
|
||
targets := collectTargetIDs(task)
|
||
storageTargets := make([]AgentStorageTargetConfig, 0, len(targets))
|
||
for _, tid := range targets {
|
||
target, err := s.storageRepo.FindByID(ctx, tid)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
if target == nil {
|
||
continue
|
||
}
|
||
configRaw, err := s.cipher.Decrypt(target.ConfigCiphertext)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("decrypt storage config: %w", err)
|
||
}
|
||
storageTargets = append(storageTargets, AgentStorageTargetConfig{
|
||
ID: target.ID,
|
||
Type: target.Type,
|
||
Name: target.Name,
|
||
Config: json.RawMessage(configRaw),
|
||
})
|
||
}
|
||
return &AgentTaskSpec{
|
||
TaskID: task.ID,
|
||
Name: task.Name,
|
||
Type: task.Type,
|
||
SourcePath: task.SourcePath,
|
||
SourcePaths: task.SourcePaths,
|
||
ExcludePatterns: task.ExcludePatterns,
|
||
DBHost: task.DBHost,
|
||
DBPort: task.DBPort,
|
||
DBUser: task.DBUser,
|
||
DBPassword: dbPassword,
|
||
DBName: task.DBName,
|
||
DBPath: task.DBPath,
|
||
ExtraConfig: task.ExtraConfig,
|
||
Compression: task.Compression,
|
||
Encrypt: task.Encrypt,
|
||
StorageTargets: storageTargets,
|
||
}, nil
|
||
}
|
||
|
||
// AgentRecordUpdate Agent 上报备份记录的最终状态。
|
||
type AgentRecordUpdate struct {
|
||
Status string `json:"status"` // running | success | failed
|
||
FileName string `json:"fileName,omitempty"`
|
||
FileSize int64 `json:"fileSize,omitempty"`
|
||
Checksum string `json:"checksum,omitempty"`
|
||
StoragePath string `json:"storagePath,omitempty"`
|
||
ErrorMessage string `json:"errorMessage,omitempty"`
|
||
LogAppend string `json:"logAppend,omitempty"` // 增量日志,追加到 record.log_content
|
||
}
|
||
|
||
// UpdateRecord 更新备份记录的状态/日志。Agent 在执行过程中可多次调用。
|
||
func (s *AgentService) UpdateRecord(ctx context.Context, node *model.Node, recordID uint, update AgentRecordUpdate) error {
|
||
record, err := s.recordRepo.FindByID(ctx, recordID)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
if record == nil {
|
||
return apperror.New(404, "BACKUP_RECORD_NOT_FOUND", "记录不存在", nil)
|
||
}
|
||
// 通过 task.NodeID 判断是否属于当前 agent
|
||
task, err := s.taskRepo.FindByID(ctx, record.TaskID)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
if task == nil || task.NodeID != node.ID {
|
||
return apperror.Unauthorized("BACKUP_RECORD_FORBIDDEN", "记录不属于当前节点", nil)
|
||
}
|
||
if update.Status != "" {
|
||
record.Status = update.Status
|
||
}
|
||
if update.FileName != "" {
|
||
record.FileName = update.FileName
|
||
}
|
||
if update.FileSize > 0 {
|
||
record.FileSize = update.FileSize
|
||
}
|
||
if update.Checksum != "" {
|
||
record.Checksum = update.Checksum
|
||
}
|
||
if update.StoragePath != "" {
|
||
record.StoragePath = update.StoragePath
|
||
}
|
||
if update.ErrorMessage != "" {
|
||
record.ErrorMessage = update.ErrorMessage
|
||
}
|
||
if update.LogAppend != "" {
|
||
if record.LogContent == "" {
|
||
record.LogContent = update.LogAppend
|
||
} else {
|
||
record.LogContent += update.LogAppend
|
||
}
|
||
}
|
||
if update.Status == model.BackupRecordStatusSuccess || update.Status == model.BackupRecordStatusFailed {
|
||
now := time.Now().UTC()
|
||
record.CompletedAt = &now
|
||
record.DurationSeconds = int(now.Sub(record.StartedAt).Seconds())
|
||
}
|
||
if err := s.recordRepo.Update(ctx, record); err != nil {
|
||
return err
|
||
}
|
||
// 同步更新任务的 last_status
|
||
if update.Status == model.BackupRecordStatusSuccess || update.Status == model.BackupRecordStatusFailed {
|
||
task.LastStatus = update.Status
|
||
_ = s.taskRepo.Update(ctx, task)
|
||
}
|
||
return nil
|
||
}
|
||
|
||
// EnqueueCommand Master 端调用:给指定节点插入一条待执行命令。
|
||
// 返回命令 ID。
|
||
func (s *AgentService) EnqueueCommand(ctx context.Context, nodeID uint, cmdType string, payload any) (uint, error) {
|
||
if nodeID == 0 {
|
||
return 0, errors.New("nodeID is required")
|
||
}
|
||
payloadBytes, err := json.Marshal(payload)
|
||
if err != nil {
|
||
return 0, fmt.Errorf("marshal payload: %w", err)
|
||
}
|
||
cmd := &model.AgentCommand{
|
||
NodeID: nodeID,
|
||
Type: cmdType,
|
||
Status: model.AgentCommandStatusPending,
|
||
Payload: string(payloadBytes),
|
||
}
|
||
if err := s.cmdRepo.Create(ctx, cmd); err != nil {
|
||
return 0, err
|
||
}
|
||
return cmd.ID, nil
|
||
}
|
||
|
||
// WaitForCommandResult 同步等待指定命令完成(用于 list_dir 这类 RPC 式调用)。
|
||
// timeout 为 0 表示不限,建议传 10~30s。
|
||
func (s *AgentService) WaitForCommandResult(ctx context.Context, cmdID uint, timeout time.Duration) (*model.AgentCommand, error) {
|
||
deadline := time.Now().Add(timeout)
|
||
for {
|
||
cmd, err := s.cmdRepo.FindByID(ctx, cmdID)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
if cmd == nil {
|
||
return nil, apperror.New(404, "AGENT_COMMAND_NOT_FOUND", "命令不存在", nil)
|
||
}
|
||
switch cmd.Status {
|
||
case model.AgentCommandStatusSucceeded, model.AgentCommandStatusFailed, model.AgentCommandStatusTimeout:
|
||
return cmd, nil
|
||
}
|
||
if timeout > 0 && time.Now().After(deadline) {
|
||
return nil, apperror.New(504, "AGENT_COMMAND_TIMEOUT", "等待 Agent 响应超时", nil)
|
||
}
|
||
select {
|
||
case <-ctx.Done():
|
||
return nil, ctx.Err()
|
||
case <-time.After(300 * time.Millisecond):
|
||
}
|
||
}
|
||
}
|
||
|
||
// StartCommandTimeoutMonitor 启动后台定时任务,把超时命令标记为 timeout。
|
||
func (s *AgentService) StartCommandTimeoutMonitor(ctx context.Context, interval time.Duration, timeout time.Duration) {
|
||
if interval <= 0 {
|
||
interval = 30 * time.Second
|
||
}
|
||
if timeout <= 0 {
|
||
timeout = 10 * time.Minute
|
||
}
|
||
ticker := time.NewTicker(interval)
|
||
go func() {
|
||
defer ticker.Stop()
|
||
for {
|
||
select {
|
||
case <-ctx.Done():
|
||
return
|
||
case <-ticker.C:
|
||
threshold := time.Now().UTC().Add(-timeout)
|
||
_, _ = s.cmdRepo.MarkStaleTimeout(ctx, threshold)
|
||
}
|
||
}
|
||
}()
|
||
}
|