Files
BackupX/server/internal/repository/agent_command_repository.go
Wu Qing 7a6ffd4ddd feat(BackupX): 修复跨节点备份恢复终态处理 (#60)
* feat(BackupX): 修复集群部署管理逻辑

* feat(BackupX): 修复节点池任务运行归属

* feat(BackupX): 修复跨节点恢复路由

* feat(BackupX): 修复跨节点备份恢复终态处理

* test(BackupX): 稳定安装流HTTP测试
2026-05-09 23:03:25 +08:00

189 lines
7.0 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package repository
import (
"context"
"errors"
"time"
"backupx/server/internal/model"
"gorm.io/gorm"
)
// AgentCommandRepository 维护 Agent 命令队列。
type AgentCommandRepository interface {
Create(ctx context.Context, cmd *model.AgentCommand) error
FindByID(ctx context.Context, id uint) (*model.AgentCommand, error)
// ClaimPending 以原子方式把该节点一条 pending 命令置为 dispatched
// 并返回领取到的命令。无命令时返回 (nil, nil)。
ClaimPending(ctx context.Context, nodeID uint) (*model.AgentCommand, error)
Update(ctx context.Context, cmd *model.AgentCommand) error
// CompleteDispatched 只在命令仍处于 dispatched 时写入终态。
// 返回 false 表示命令已被超时监控或其它流程终结,调用方不应覆盖。
CompleteDispatched(ctx context.Context, cmd *model.AgentCommand) (bool, error)
// MarkStaleTimeout 把 dispatched 状态但超时未完成的命令标记为 timeout。
// 返回被标记的行数。不返回具体命令(供背景监控简单调用)。
MarkStaleTimeout(ctx context.Context, threshold time.Time) (int64, error)
// TimeoutActive 只在命令仍处于 pending/dispatched 时写入 timeout。
// 返回 false 表示命令已被 Agent 回写为终态,调用方不应覆盖。
TimeoutActive(ctx context.Context, cmd *model.AgentCommand) (bool, error)
// ListStaleDispatched 列出 dispatched 但已超时、尚未被标记的命令。
// 调用方需要把它们逐一标记 timeout 并联动关联记录状态。
ListStaleDispatched(ctx context.Context, threshold time.Time) ([]model.AgentCommand, error)
// ListStaleActive 列出 pending/dispatched 但已超时、尚未完成的命令。
// pending 使用 created_at 判定dispatched 使用 dispatched_at 判定。
ListStaleActive(ctx context.Context, threshold time.Time) ([]model.AgentCommand, error)
// ListPendingByNode 列出某节点下的所有 pending/dispatched 命令。
// 用于删除节点或节点离线时的清理。
ListPendingByNode(ctx context.Context, nodeID uint) ([]model.AgentCommand, error)
}
type GormAgentCommandRepository struct {
db *gorm.DB
}
func NewAgentCommandRepository(db *gorm.DB) *GormAgentCommandRepository {
return &GormAgentCommandRepository{db: db}
}
func (r *GormAgentCommandRepository) Create(ctx context.Context, cmd *model.AgentCommand) error {
return r.db.WithContext(ctx).Create(cmd).Error
}
func (r *GormAgentCommandRepository) FindByID(ctx context.Context, id uint) (*model.AgentCommand, error) {
var item model.AgentCommand
if err := r.db.WithContext(ctx).First(&item, id).Error; err != nil {
if errors.Is(err, gorm.ErrRecordNotFound) {
return nil, nil
}
return nil, err
}
return &item, nil
}
// ClaimPending 使用 UPDATE...WHERE id=(SELECT...) 的两步方式实现原子领取。
// SQLite 不支持 SELECT FOR UPDATE这里用事务 + 乐观锁。
func (r *GormAgentCommandRepository) ClaimPending(ctx context.Context, nodeID uint) (*model.AgentCommand, error) {
var claimed *model.AgentCommand
err := r.db.WithContext(ctx).Transaction(func(tx *gorm.DB) error {
var item model.AgentCommand
err := tx.Where("node_id = ? AND status = ?", nodeID, model.AgentCommandStatusPending).
Order("id asc").First(&item).Error
if err != nil {
if errors.Is(err, gorm.ErrRecordNotFound) {
return nil
}
return err
}
now := time.Now().UTC()
result := tx.Model(&model.AgentCommand{}).
Where("id = ? AND status = ?", item.ID, model.AgentCommandStatusPending).
Updates(map[string]any{
"status": model.AgentCommandStatusDispatched,
"dispatched_at": &now,
})
if result.Error != nil {
return result.Error
}
if result.RowsAffected == 0 {
// 被其它 worker 抢占,放弃
return nil
}
item.Status = model.AgentCommandStatusDispatched
item.DispatchedAt = &now
claimed = &item
return nil
})
if err != nil {
return nil, err
}
return claimed, nil
}
func (r *GormAgentCommandRepository) Update(ctx context.Context, cmd *model.AgentCommand) error {
return r.db.WithContext(ctx).Save(cmd).Error
}
func (r *GormAgentCommandRepository) CompleteDispatched(ctx context.Context, cmd *model.AgentCommand) (bool, error) {
result := r.db.WithContext(ctx).Model(&model.AgentCommand{}).
Where("id = ? AND node_id = ? AND status = ?", cmd.ID, cmd.NodeID, model.AgentCommandStatusDispatched).
Updates(map[string]any{
"status": cmd.Status,
"error_message": cmd.ErrorMessage,
"result": cmd.Result,
"completed_at": cmd.CompletedAt,
})
if result.Error != nil {
return false, result.Error
}
return result.RowsAffected > 0, nil
}
func (r *GormAgentCommandRepository) MarkStaleTimeout(ctx context.Context, threshold time.Time) (int64, error) {
result := r.db.WithContext(ctx).Model(&model.AgentCommand{}).
Where("status = ? AND dispatched_at < ?", model.AgentCommandStatusDispatched, threshold).
Updates(map[string]any{
"status": model.AgentCommandStatusTimeout,
"error_message": "agent did not report result before timeout",
})
if result.Error != nil {
return 0, result.Error
}
return result.RowsAffected, nil
}
func (r *GormAgentCommandRepository) TimeoutActive(ctx context.Context, cmd *model.AgentCommand) (bool, error) {
result := r.db.WithContext(ctx).Model(&model.AgentCommand{}).
Where("id = ? AND status IN ?", cmd.ID, []string{model.AgentCommandStatusPending, model.AgentCommandStatusDispatched}).
Updates(map[string]any{
"status": model.AgentCommandStatusTimeout,
"error_message": cmd.ErrorMessage,
"completed_at": cmd.CompletedAt,
})
if result.Error != nil {
return false, result.Error
}
return result.RowsAffected > 0, nil
}
// ListStaleDispatched 列出 dispatched 但 dispatched_at 早于 threshold 的命令。
func (r *GormAgentCommandRepository) ListStaleDispatched(ctx context.Context, threshold time.Time) ([]model.AgentCommand, error) {
var items []model.AgentCommand
if err := r.db.WithContext(ctx).
Where("status = ? AND dispatched_at < ?", model.AgentCommandStatusDispatched, threshold).
Order("id asc").
Find(&items).Error; err != nil {
return nil, err
}
return items, nil
}
func (r *GormAgentCommandRepository) ListStaleActive(ctx context.Context, threshold time.Time) ([]model.AgentCommand, error) {
var items []model.AgentCommand
if err := r.db.WithContext(ctx).
Where(
"(status = ? AND created_at < ?) OR (status = ? AND dispatched_at < ?)",
model.AgentCommandStatusPending, threshold,
model.AgentCommandStatusDispatched, threshold,
).
Order("id asc").
Find(&items).Error; err != nil {
return nil, err
}
return items, nil
}
// ListPendingByNode 列出某节点下所有待执行pending 或 dispatched命令。
func (r *GormAgentCommandRepository) ListPendingByNode(ctx context.Context, nodeID uint) ([]model.AgentCommand, error) {
var items []model.AgentCommand
if err := r.db.WithContext(ctx).
Where("node_id = ? AND status IN ?", nodeID, []string{
model.AgentCommandStatusPending,
model.AgentCommandStatusDispatched,
}).
Order("id asc").
Find(&items).Error; err != nil {
return nil, err
}
return items, nil
}