mirror of
https://github.com/Awuqing/BackupX.git
synced 2026-05-12 02:20:36 +08:00
基础修复: - 新增节点离线检测:每 15s 扫描,超 45s 未心跳的远程节点自动置离线 - 节点删除前检查关联任务,避免孤立备份任务 - BackupTaskRepository 新增 CountByNodeID/ListByNodeID Master 端 Agent 协议: - 新增 AgentCommand 模型与命令队列仓储(pending/dispatched/succeeded/failed/timeout) - 新增 AgentService:任务下发、命令轮询、结果回收、超时扫描 - 新增专用 Agent HTTP API(X-Agent-Token 认证): /api/agent/heartbeat /api/agent/commands/poll /api/agent/commands/:id/result /api/agent/tasks/:id /api/agent/records/:id - BackupExecutionService 支持 node 路由:task.NodeID 指向远程节点时自动入队派发 Agent CLI(backupx agent 子命令): - 配置:YAML 文件 / 环境变量 / CLI 参数,优先级 CLI > 文件 > 环境 - 心跳循环 + 命令轮询循环 + 优雅退出 - 本地复用 BackupRunner 与 storage registry 执行备份并直接上传 - 支持 run_task 和 list_dir 两种命令 远程目录浏览: - NodeService 支持通过 Agent RPC 列出远程节点目录(15s 超时) 前端: - NodesPage 添加节点后展示 Agent 启动命令和环境变量配置 文档: - README 中英文重写"多节点集群"章节,含架构图、步骤、限制、CLI 参考
102 lines
3.2 KiB
Go
102 lines
3.2 KiB
Go
package repository
|
||
|
||
import (
|
||
"context"
|
||
"errors"
|
||
"time"
|
||
|
||
"backupx/server/internal/model"
|
||
"gorm.io/gorm"
|
||
)
|
||
|
||
// AgentCommandRepository 维护 Agent 命令队列。
|
||
type AgentCommandRepository interface {
|
||
Create(ctx context.Context, cmd *model.AgentCommand) error
|
||
FindByID(ctx context.Context, id uint) (*model.AgentCommand, error)
|
||
// ClaimPending 以原子方式把该节点一条 pending 命令置为 dispatched,
|
||
// 并返回领取到的命令。无命令时返回 (nil, nil)。
|
||
ClaimPending(ctx context.Context, nodeID uint) (*model.AgentCommand, error)
|
||
Update(ctx context.Context, cmd *model.AgentCommand) error
|
||
// MarkStaleTimeout 把 dispatched 状态但超时未完成的命令标记为 timeout。
|
||
MarkStaleTimeout(ctx context.Context, threshold time.Time) (int64, error)
|
||
}
|
||
|
||
type GormAgentCommandRepository struct {
|
||
db *gorm.DB
|
||
}
|
||
|
||
func NewAgentCommandRepository(db *gorm.DB) *GormAgentCommandRepository {
|
||
return &GormAgentCommandRepository{db: db}
|
||
}
|
||
|
||
func (r *GormAgentCommandRepository) Create(ctx context.Context, cmd *model.AgentCommand) error {
|
||
return r.db.WithContext(ctx).Create(cmd).Error
|
||
}
|
||
|
||
func (r *GormAgentCommandRepository) FindByID(ctx context.Context, id uint) (*model.AgentCommand, error) {
|
||
var item model.AgentCommand
|
||
if err := r.db.WithContext(ctx).First(&item, id).Error; err != nil {
|
||
if errors.Is(err, gorm.ErrRecordNotFound) {
|
||
return nil, nil
|
||
}
|
||
return nil, err
|
||
}
|
||
return &item, nil
|
||
}
|
||
|
||
// ClaimPending 使用 UPDATE...WHERE id=(SELECT...) 的两步方式实现原子领取。
|
||
// SQLite 不支持 SELECT FOR UPDATE,这里用事务 + 乐观锁。
|
||
func (r *GormAgentCommandRepository) ClaimPending(ctx context.Context, nodeID uint) (*model.AgentCommand, error) {
|
||
var claimed *model.AgentCommand
|
||
err := r.db.WithContext(ctx).Transaction(func(tx *gorm.DB) error {
|
||
var item model.AgentCommand
|
||
err := tx.Where("node_id = ? AND status = ?", nodeID, model.AgentCommandStatusPending).
|
||
Order("id asc").First(&item).Error
|
||
if err != nil {
|
||
if errors.Is(err, gorm.ErrRecordNotFound) {
|
||
return nil
|
||
}
|
||
return err
|
||
}
|
||
now := time.Now().UTC()
|
||
result := tx.Model(&model.AgentCommand{}).
|
||
Where("id = ? AND status = ?", item.ID, model.AgentCommandStatusPending).
|
||
Updates(map[string]any{
|
||
"status": model.AgentCommandStatusDispatched,
|
||
"dispatched_at": &now,
|
||
})
|
||
if result.Error != nil {
|
||
return result.Error
|
||
}
|
||
if result.RowsAffected == 0 {
|
||
// 被其它 worker 抢占,放弃
|
||
return nil
|
||
}
|
||
item.Status = model.AgentCommandStatusDispatched
|
||
item.DispatchedAt = &now
|
||
claimed = &item
|
||
return nil
|
||
})
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
return claimed, nil
|
||
}
|
||
|
||
func (r *GormAgentCommandRepository) Update(ctx context.Context, cmd *model.AgentCommand) error {
|
||
return r.db.WithContext(ctx).Save(cmd).Error
|
||
}
|
||
|
||
func (r *GormAgentCommandRepository) MarkStaleTimeout(ctx context.Context, threshold time.Time) (int64, error) {
|
||
result := r.db.WithContext(ctx).Model(&model.AgentCommand{}).
|
||
Where("status = ? AND dispatched_at < ?", model.AgentCommandStatusDispatched, threshold).
|
||
Updates(map[string]any{
|
||
"status": model.AgentCommandStatusTimeout,
|
||
"error_message": "agent did not report result before timeout",
|
||
})
|
||
if result.Error != nil {
|
||
return 0, result.Error
|
||
}
|
||
return result.RowsAffected, nil
|
||
}
|