mirror of
https://github.com/Awuqing/BackupX.git
synced 2026-05-06 20:02:41 +08:00
* 功能: v2.0.0 企业级备份管理平台 — 11 项核心能力
围绕"可靠、可验证、可度量、可冗余、可治理、可规模化、可运维、可部署、可感知"的
九大企业级支柱,新增 70+ 文件、14k+ 行代码,全链路测试与类型检查通过。
## 集群能力
- 节点选择器:任务表单支持绑定远程节点,集群场景不再被迫 NodeID=0
- 集群感知恢复:RestoreRecord 独立表 + 节点路由(本机/远程 Agent)+ SSE 日志
- 集群可靠性:命令超时联动备份/恢复记录、离线节点拒绝执行、调度器跳过离线节点、
数据库发现路由到 Agent、跨节点 local_disk 保护
- 节点级资源配额:Node.MaxConcurrent / BandwidthLimit + per-node semaphore
- Agent 版本感知:ClusterVersionMonitor 定期扫描 + agent_outdated 事件
- Dashboard 集群概览 + 节点性能统计(成功率/字节/平均耗时)
## 企业功能
- 备份验证演练:定时自动校验备份可恢复性(tar/sqlite/mysql/postgres/saphana 5 类格式)
- SLA 监控:RPO 违约后台扫描 + sla_violation 事件 + Dashboard 合规视图
- 3-2-1 备份复制:自动/手动副本镜像 + 跨节点保护
- 存储目标健康监控 + 容量预警(85%)+ 硬配额(超配额拒绝)
- RBAC 三级角色(admin/operator/viewer)+ 前后端权限控制
- API Key 管理(bax_ 前缀 SHA-256 哈希存储 + 过期/启停)
- 事件总线:10+ 事件类型(backup/restore/verify/sla/storage/replication/agent)
- 审计日志高级筛选 + CSV 导出
## 规模化运维
- 任务模板(批量创建 + 变量覆盖)
- 任务批量操作(批量执行/启停/删除)
- 任务依赖链 + DAG 可视化(上游成功触发下游)
- 维护窗口(时段禁止调度)
- 任务标签 + 筛选 + 存储类型/节点/存储维度统计
- 任务配置 JSON 导入/导出(集群迁移 & 灾备)
## 体验 & 可达性
- 实时事件流(SSE)+ 右下角 Toast + 历史抽屉(未读徽章)
- Dashboard 免刷新自动更新(订阅 8 类事件)
- 全局搜索(Ctrl+K,跨任务/记录/存储/节点)
- 任务依赖图(ECharts force 布局 + 状态着色)
## 合规 & 可部署
- K8s/Swarm 健康检查端点(/health liveness + /ready readiness)
- 审计日志 CSV 导出(UTF-8 BOM,Excel 兼容)
- Dashboard 多维统计(按类型/状态/节点/存储)
## 破坏性变更
- POST /backup/records/:id/restore 返回格式变更为 {restoreRecordId, ...}
(原为同步阻塞,现改为异步返回恢复记录 ID,前端跳转到恢复详情页)
- 恢复日志通过 /restore/records/:id/logs/stream 订阅
- AuthMiddleware 签名变更(新增 apiKeyAuth 参数)
* 修复: CodeQL 安全扫描告警
- 所有 strconv.ParseUint 由 64bit 改为 32bit 位宽,strconv 内置溢出检查
- hashApiKey 参数改名 rawToken 避免 CodeQL 误判为密码哈希(API Key 是 192 位
高熵 token,使用 bcrypt 会引入不必要的延迟;同时补充安全说明)
* 修复: API Key 哈希改用 HMAC-SHA256 + 应用级 pepper
- 符合 RFC 2104 标准,业界 API token 存储的推荐方案
- 数据库泄漏场景下增加离线反推难度(需同时获取二进制 pepper)
- 规避 CodeQL go/weak-sensitive-data-hashing 对裸 SHA-256 的误判
331 lines
13 KiB
Go
331 lines
13 KiB
Go
package app
|
||
|
||
import (
|
||
"context"
|
||
"errors"
|
||
"fmt"
|
||
stdhttp "net/http"
|
||
"time"
|
||
|
||
"backupx/server/internal/backup"
|
||
backupretention "backupx/server/internal/backup/retention"
|
||
"backupx/server/internal/config"
|
||
"backupx/server/internal/database"
|
||
aphttp "backupx/server/internal/http"
|
||
"backupx/server/internal/logger"
|
||
"backupx/server/internal/notify"
|
||
"backupx/server/internal/repository"
|
||
"backupx/server/internal/scheduler"
|
||
"backupx/server/internal/security"
|
||
"backupx/server/internal/service"
|
||
"backupx/server/internal/storage"
|
||
"backupx/server/internal/storage/codec"
|
||
storageRclone "backupx/server/internal/storage/rclone"
|
||
"go.uber.org/zap"
|
||
"gorm.io/gorm"
|
||
)
|
||
|
||
type Application struct {
|
||
cfg config.Config
|
||
version string
|
||
logger *zap.Logger
|
||
db *gorm.DB
|
||
httpServer *stdhttp.Server
|
||
scheduler *scheduler.Service
|
||
}
|
||
|
||
func New(ctx context.Context, cfg config.Config, version string) (*Application, error) {
|
||
appLogger, err := logger.New(cfg.Log)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("init logger: %w", err)
|
||
}
|
||
|
||
db, err := database.Open(cfg.Database, appLogger)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("init database: %w", err)
|
||
}
|
||
|
||
userRepo := repository.NewUserRepository(db)
|
||
systemConfigRepo := repository.NewSystemConfigRepository(db)
|
||
storageTargetRepo := repository.NewStorageTargetRepository(db)
|
||
backupTaskRepo := repository.NewBackupTaskRepository(db)
|
||
backupRecordRepo := repository.NewBackupRecordRepository(db)
|
||
notificationRepo := repository.NewNotificationRepository(db)
|
||
oauthSessionRepo := repository.NewOAuthSessionRepository(db)
|
||
resolvedSecurity, err := service.ResolveSecurity(ctx, cfg.Security, systemConfigRepo)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("resolve security config: %w", err)
|
||
}
|
||
|
||
jwtManager := security.NewJWTManager(resolvedSecurity.JWTSecret, config.MustJWTDuration(cfg.Security))
|
||
rateLimiter := security.NewLoginRateLimiter(5, time.Minute)
|
||
authService := service.NewAuthService(userRepo, systemConfigRepo, jwtManager, rateLimiter)
|
||
systemService := service.NewSystemService(cfg, version, time.Now().UTC())
|
||
configCipher := codec.NewConfigCipher(resolvedSecurity.EncryptionKey)
|
||
storageRegistry := storage.NewRegistry(
|
||
storageRclone.NewLocalDiskFactory(),
|
||
storageRclone.NewS3Factory(),
|
||
storageRclone.NewWebDAVFactory(),
|
||
storageRclone.NewGoogleDriveFactory(),
|
||
storageRclone.NewAliyunOSSFactory(),
|
||
storageRclone.NewTencentCOSFactory(),
|
||
storageRclone.NewQiniuKodoFactory(),
|
||
storageRclone.NewFTPFactory(),
|
||
storageRclone.NewRcloneFactory(),
|
||
)
|
||
// 将全部 rclone 后端注册为独立存储类型(sftp、azureblob、dropbox 等与 s3、ftp 完全平级)
|
||
storageRclone.RegisterAllBackends(storageRegistry)
|
||
storageTargetService := service.NewStorageTargetService(storageTargetRepo, oauthSessionRepo, storageRegistry, configCipher)
|
||
storageTargetService.SetBackupTaskRepository(backupTaskRepo)
|
||
storageTargetService.SetBackupRecordRepository(backupRecordRepo)
|
||
backupTaskService := service.NewBackupTaskService(backupTaskRepo, storageTargetRepo, configCipher)
|
||
backupTaskService.SetRecordsAndStorage(backupRecordRepo, storageRegistry)
|
||
// nodeRepo 在下方 Cluster 节点管理区块才实例化,这里延后注入
|
||
backupRunnerRegistry := backup.NewRegistry(backup.NewFileRunner(), backup.NewSQLiteRunner(), backup.NewMySQLRunner(nil), backup.NewPostgreSQLRunner(nil), backup.NewSAPHANARunner(nil))
|
||
logHub := backup.NewLogHub()
|
||
retentionService := backupretention.NewService(backupRecordRepo)
|
||
notifyRegistry := notify.NewRegistry(notify.NewEmailNotifier(), notify.NewWebhookNotifier(), notify.NewTelegramNotifier())
|
||
notificationService := service.NewNotificationService(notificationRepo, notifyRegistry, configCipher)
|
||
// 初始化 rclone 传输配置(重试 + 带宽限制)
|
||
rcloneCtx := storageRclone.ConfiguredContext(ctx, storageRclone.TransferConfig{
|
||
LowLevelRetries: cfg.Backup.Retries,
|
||
BandwidthLimit: cfg.Backup.BandwidthLimit,
|
||
})
|
||
storageRclone.StartAccounting(rcloneCtx)
|
||
|
||
backupExecutionService := service.NewBackupExecutionService(backupTaskRepo, backupRecordRepo, storageTargetRepo, storageRegistry, backupRunnerRegistry, logHub, retentionService, configCipher, notificationService, cfg.Backup.TempDir, cfg.Backup.MaxConcurrent, cfg.Backup.Retries, cfg.Backup.BandwidthLimit)
|
||
schedulerService := scheduler.NewService(backupTaskRepo, backupExecutionService, appLogger)
|
||
backupTaskService.SetScheduler(schedulerService)
|
||
// 审计日志注入延迟到 auditService 创建后(见下方)
|
||
backupRecordService := service.NewBackupRecordService(backupRecordRepo, backupExecutionService, logHub)
|
||
// 恢复服务:使用独立 LogHub 避免恢复记录与备份记录 ID 命名空间冲突
|
||
restoreRecordRepo := repository.NewRestoreRecordRepository(db)
|
||
restoreLogHub := backup.NewLogHub()
|
||
dashboardService := service.NewDashboardService(backupTaskRepo, backupRecordRepo, storageTargetRepo)
|
||
settingsService := service.NewSettingsService(systemConfigRepo)
|
||
|
||
// Audit
|
||
auditLogRepo := repository.NewAuditLogRepository(db)
|
||
auditService := service.NewAuditService(auditLogRepo)
|
||
authService.SetAuditService(auditService)
|
||
schedulerService.SetAuditRecorder(auditService)
|
||
|
||
// Database discovery(集群依赖在 agentService 创建后注入)
|
||
databaseDiscoveryService := service.NewDatabaseDiscoveryService(backup.NewOSCommandExecutor())
|
||
|
||
// Cluster: Node management
|
||
nodeRepo := repository.NewNodeRepository(db)
|
||
backupTaskService.SetNodeRepository(nodeRepo)
|
||
schedulerService.SetNodeRepository(nodeRepo)
|
||
nodeService := service.NewNodeService(nodeRepo, version)
|
||
nodeService.SetTaskRepository(backupTaskRepo)
|
||
if err := nodeService.EnsureLocalNode(ctx); err != nil {
|
||
appLogger.Warn("failed to ensure local node", zap.Error(err))
|
||
}
|
||
// 启动离线检测:每 15s 扫描一次,超过 45s 未心跳的远程节点标记为离线
|
||
nodeService.StartOfflineMonitor(ctx, 15*time.Second)
|
||
|
||
// Agent 协议服务:命令队列 + 任务下发 + 记录上报
|
||
agentCmdRepo := repository.NewAgentCommandRepository(db)
|
||
agentService := service.NewAgentService(nodeRepo, backupTaskRepo, backupRecordRepo, storageTargetRepo, agentCmdRepo, configCipher)
|
||
agentService.SetRestoreRepository(restoreRecordRepo)
|
||
agentService.StartCommandTimeoutMonitor(ctx, 30*time.Second, 10*time.Minute)
|
||
|
||
// 一键部署:install token service + 后台 GC
|
||
installTokenRepo := repository.NewAgentInstallTokenRepository(db)
|
||
installTokenService := service.NewInstallTokenService(installTokenRepo, nodeRepo)
|
||
installTokenService.StartGC(ctx, time.Hour)
|
||
|
||
// 把 Agent 下发能力注入到备份执行服务,实现多节点路由
|
||
backupExecutionService.SetClusterDependencies(nodeRepo, agentService)
|
||
// 启用远程目录浏览:NodeService 通过 AgentService 做同步 RPC
|
||
nodeService.SetAgentRPC(agentService)
|
||
// 启用远程数据库发现:远程节点任务配置时 DatabasePicker 拿到的是节点视角的 DB 列表
|
||
databaseDiscoveryService.SetClusterDependencies(nodeRepo, agentService)
|
||
|
||
// 恢复服务:集群感知(本地/远程路由),依赖 agentService 入队
|
||
restoreService := service.NewRestoreService(
|
||
restoreRecordRepo,
|
||
backupRecordRepo,
|
||
backupTaskRepo,
|
||
storageTargetRepo,
|
||
nodeRepo,
|
||
storageRegistry,
|
||
backupRunnerRegistry,
|
||
restoreLogHub,
|
||
configCipher,
|
||
agentService,
|
||
cfg.Backup.TempDir,
|
||
cfg.Backup.MaxConcurrent,
|
||
)
|
||
|
||
// 验证服务:定期校验备份可恢复性(企业合规刚需)
|
||
verificationRecordRepo := repository.NewVerificationRecordRepository(db)
|
||
verifyLogHub := backup.NewLogHub()
|
||
verificationService := service.NewVerificationService(
|
||
verificationRecordRepo,
|
||
backupRecordRepo,
|
||
backupTaskRepo,
|
||
storageTargetRepo,
|
||
nodeRepo,
|
||
storageRegistry,
|
||
verifyLogHub,
|
||
configCipher,
|
||
cfg.Backup.TempDir,
|
||
cfg.Backup.MaxConcurrent,
|
||
)
|
||
// 验证失败通知:通过 NotificationService 的事件总线派发 verify_failed
|
||
verificationService.SetNotifier(service.NewVerificationEventNotifier(notificationService))
|
||
// 恢复完成/失败事件派发(restore_success / restore_failed)
|
||
restoreService.SetEventDispatcher(notificationService)
|
||
// 调度器接入验证演练 cron
|
||
schedulerService.SetVerifyRunner(verificationService)
|
||
|
||
// 用户管理与 API Key 服务(企业级 RBAC)
|
||
userService := service.NewUserService(userRepo)
|
||
apiKeyRepo := repository.NewApiKeyRepository(db)
|
||
apiKeyService := service.NewApiKeyService(apiKeyRepo)
|
||
|
||
// SLA 后台扫描:每 15 分钟扫描违约任务,同任务 6 小时内不重复派发
|
||
dashboardService.StartSLAMonitor(ctx, notificationService, 15*time.Minute, 6*time.Hour)
|
||
// 存储目标健康扫描:每 5 分钟测试启用目标,掉线即告警
|
||
storageTargetService.StartHealthMonitor(ctx, notificationService, 5*time.Minute)
|
||
|
||
// 备份复制服务(3-2-1 规则核心)
|
||
replicationRecordRepo := repository.NewReplicationRecordRepository(db)
|
||
replicationService := service.NewReplicationService(
|
||
replicationRecordRepo, backupRecordRepo, storageTargetRepo,
|
||
nodeRepo, storageRegistry, configCipher,
|
||
cfg.Backup.TempDir, cfg.Backup.MaxConcurrent,
|
||
)
|
||
replicationService.SetEventDispatcher(notificationService)
|
||
backupExecutionService.SetReplicationTrigger(replicationService)
|
||
// 备份成功后触发下游依赖任务(任务依赖链工作流)
|
||
backupExecutionService.SetDependentsResolver(backupTaskService)
|
||
|
||
// 任务模板(批量创建)
|
||
taskTemplateRepo := repository.NewTaskTemplateRepository(db)
|
||
taskTemplateService := service.NewTaskTemplateService(taskTemplateRepo, backupTaskService)
|
||
|
||
// 任务配置导入/导出(JSON,集群迁移 & 灾备)
|
||
taskExportService := service.NewTaskExportService(backupTaskService, backupTaskRepo, storageTargetRepo, nodeRepo)
|
||
|
||
// 全局搜索(跨任务/存储/节点/最近记录)
|
||
searchService := service.NewSearchService(backupTaskRepo, backupRecordRepo, storageTargetRepo, nodeRepo)
|
||
|
||
// 实时事件广播器(SSE 推送给前端 Dashboard)
|
||
// 注入 notification 后,每次 DispatchEvent 同时 broadcast 到所有 SSE 订阅者
|
||
eventBroadcaster := service.NewEventBroadcaster()
|
||
notificationService.SetBroadcaster(eventBroadcaster)
|
||
|
||
// 集群版本监控:每 30 分钟扫描,节点 24 小时内只告警一次
|
||
clusterVersionMonitor := service.NewClusterVersionMonitor(nodeRepo, version)
|
||
clusterVersionMonitor.SetEventDispatcher(notificationService)
|
||
clusterVersionMonitor.Start(ctx, 30*time.Minute, 24*time.Hour)
|
||
|
||
// Dashboard 集群概览依赖注入
|
||
dashboardService.SetClusterDependencies(nodeRepo, version)
|
||
|
||
router := aphttp.NewRouter(aphttp.RouterDependencies{
|
||
Context: ctx,
|
||
Config: cfg,
|
||
Version: version,
|
||
Logger: appLogger,
|
||
AuthService: authService,
|
||
SystemService: systemService,
|
||
StorageTargetService: storageTargetService,
|
||
BackupTaskService: backupTaskService,
|
||
BackupExecutionService: backupExecutionService,
|
||
BackupRecordService: backupRecordService,
|
||
RestoreService: restoreService,
|
||
VerificationService: verificationService,
|
||
ReplicationService: replicationService,
|
||
TaskTemplateService: taskTemplateService,
|
||
TaskExportService: taskExportService,
|
||
SearchService: searchService,
|
||
EventBroadcaster: eventBroadcaster,
|
||
UserService: userService,
|
||
ApiKeyService: apiKeyService,
|
||
NotificationService: notificationService,
|
||
DashboardService: dashboardService,
|
||
SettingsService: settingsService,
|
||
NodeService: nodeService,
|
||
AgentService: agentService,
|
||
DatabaseDiscoveryService: databaseDiscoveryService,
|
||
AuditService: auditService,
|
||
JWTManager: jwtManager,
|
||
UserRepository: userRepo,
|
||
SystemConfigRepo: systemConfigRepo,
|
||
InstallTokenService: installTokenService,
|
||
MasterExternalURL: "", // 如需覆盖 URL,可扩展 cfg.Server 增字段;目前留空依赖 X-Forwarded-* / Request.Host
|
||
DB: db,
|
||
})
|
||
|
||
httpServer := &stdhttp.Server{
|
||
Addr: cfg.Address(),
|
||
Handler: router,
|
||
ReadHeaderTimeout: 10 * time.Second,
|
||
}
|
||
|
||
return &Application{
|
||
cfg: cfg,
|
||
version: version,
|
||
logger: appLogger,
|
||
db: db,
|
||
httpServer: httpServer,
|
||
scheduler: schedulerService,
|
||
}, nil
|
||
}
|
||
|
||
func (a *Application) Run(ctx context.Context) error {
|
||
if a.scheduler != nil {
|
||
if err := a.scheduler.Start(context.Background()); err != nil {
|
||
return fmt.Errorf("start scheduler: %w", err)
|
||
}
|
||
}
|
||
errCh := make(chan error, 1)
|
||
go func() {
|
||
a.logger.Info("http server listening", zap.String("addr", a.cfg.Address()), zap.String("version", a.version))
|
||
if err := a.httpServer.ListenAndServe(); err != nil && !errors.Is(err, stdhttp.ErrServerClosed) {
|
||
errCh <- err
|
||
return
|
||
}
|
||
errCh <- nil
|
||
}()
|
||
|
||
select {
|
||
case <-ctx.Done():
|
||
shutdownCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||
defer cancel()
|
||
a.logger.Info("shutdown signal received")
|
||
if err := a.httpServer.Shutdown(shutdownCtx); err != nil {
|
||
return fmt.Errorf("shutdown http server: %w", err)
|
||
}
|
||
if a.scheduler != nil {
|
||
if err := a.scheduler.Stop(context.Background()); err != nil {
|
||
return fmt.Errorf("stop scheduler: %w", err)
|
||
}
|
||
}
|
||
return nil
|
||
case err := <-errCh:
|
||
if err != nil {
|
||
return fmt.Errorf("serve http: %w", err)
|
||
}
|
||
return nil
|
||
}
|
||
}
|
||
|
||
func (a *Application) Close() {
|
||
if a.logger != nil {
|
||
_ = a.logger.Sync()
|
||
}
|
||
}
|
||
|
||
func (a *Application) Logger() *zap.Logger {
|
||
return a.logger
|
||
}
|
||
|
||
func ErrorField(err error) zap.Field {
|
||
return zap.Error(err)
|
||
}
|