Track health check ListObjects as Class A operations

This commit is contained in:
DullJZ
2025-11-06 01:41:51 +08:00
parent c7e984aac2
commit 1402d40c05
6 changed files with 99 additions and 2 deletions

View File

@@ -184,12 +184,16 @@ func (m *Manager) initHealthMonitoring() {
// 创建S3健康检查器
healthChecker := health.NewS3Checker(healthConfig)
// 设置操作记录器以统计健康检查的 ListObjects 操作
healthChecker.SetOperationRecorder(reporter)
// 创建健康监控器
m.healthMonitor = health.NewMonitor(healthChecker, reporter)
// 创建统计收集器
statsCollector := health.NewS3StatsCollector(30 * time.Second)
// 设置操作记录器以统计 Stats 收集的 ListObjects 操作
statsCollector.SetOperationRecorder(reporter)
// 创建统计监控器
m.statsMonitor = health.NewStatsMonitor(

View File

@@ -1,6 +1,8 @@
package bucket
import (
"log"
"github.com/DullJZ/s3-balance/internal/health"
"github.com/DullJZ/s3-balance/internal/metrics"
)
@@ -64,3 +66,53 @@ func (r *MetricsReporter) ReportStats(stats *health.Stats) {
r.metrics.SetBucketUsage(stats.TargetID, stats.UsedSize, bucket.Config.MaxSizeBytes)
}
}
// RecordOperation 实现 health.OperationRecorder 接口
func (r *MetricsReporter) RecordOperation(targetID string, category health.OperationCategory) {
r.manager.mu.RLock()
bucket, exists := r.manager.buckets[targetID]
storage := r.manager.storage
r.manager.mu.RUnlock()
if !exists {
return
}
// 转换 health.OperationCategory 到 bucket.OperationCategory
var bucketCategory OperationCategory
switch category {
case health.OperationTypeA:
bucketCategory = OperationTypeA
case health.OperationTypeB:
bucketCategory = OperationTypeB
default:
return
}
// 更新 Prometheus 指标
if r.metrics != nil {
r.metrics.RecordBackendOperation(targetID, string(bucketCategory))
}
// 持久化操作计数到数据库并更新内存计数
var disabled bool
if storage != nil {
// 先持久化到数据库
newCount, err := storage.IncrementBucketOperation(targetID, string(bucketCategory))
if err != nil {
log.Printf("Failed to persist health check operation count for bucket %s: %v", targetID, err)
// 如果数据库更新失败,仍然更新内存计数
disabled = bucket.RecordOperation(bucketCategory)
} else {
// 使用数据库返回的最新计数更新内存
disabled = bucket.SetOperationCount(bucketCategory, newCount)
}
} else {
// 没有 storage service只更新内存
disabled = bucket.RecordOperation(bucketCategory)
}
if disabled {
log.Printf("Bucket %s disabled after exceeding %s-type operation limit (detected by health check)", targetID, bucketCategory)
}
}

View File

@@ -34,7 +34,8 @@ func (t *S3Target) GetEndpoint() string {
// S3Checker S3健康检查器
type S3Checker struct {
config Config
config Config
opRecorder OperationRecorder
}
// NewS3Checker 创建S3健康检查器
@@ -54,6 +55,11 @@ func NewS3Checker(config Config) *S3Checker {
}
}
// SetOperationRecorder 设置操作记录器
func (c *S3Checker) SetOperationRecorder(recorder OperationRecorder) {
c.opRecorder = recorder
}
// Check 执行S3健康检查
func (c *S3Checker) Check(ctx context.Context, target Target) Status {
s3Target, ok := target.(*S3Target)
@@ -121,6 +127,12 @@ func (c *S3Checker) performSimpleCheck(ctx context.Context, target *S3Target) er
Bucket: aws.String(target.Bucket),
MaxKeys: aws.Int32(1),
})
// 记录操作ListObjectsV2 是 Class A 操作)
if c.opRecorder != nil {
c.opRecorder.RecordOperation(target.GetID(), OperationTypeA)
}
return err
}

View File

@@ -26,7 +26,8 @@ type Stats struct {
// S3StatsCollector S3统计信息收集器
type S3StatsCollector struct {
timeout time.Duration
timeout time.Duration
opRecorder OperationRecorder
}
// NewS3StatsCollector 创建S3统计信息收集器
@@ -39,6 +40,11 @@ func NewS3StatsCollector(timeout time.Duration) *S3StatsCollector {
}
}
// SetOperationRecorder 设置操作记录器
func (c *S3StatsCollector) SetOperationRecorder(recorder OperationRecorder) {
c.opRecorder = recorder
}
// CollectStats 收集S3存储桶统计信息
func (c *S3StatsCollector) CollectStats(ctx context.Context, target Target) (*Stats, error) {
s3Target, ok := target.(*S3Target)
@@ -58,6 +64,12 @@ func (c *S3StatsCollector) CollectStats(ctx context.Context, target Target) (*St
Bucket: aws.String(s3Target.Bucket),
ContinuationToken: continuationToken,
})
// 记录操作(每次 ListObjectsV2 调用都是 Class A 操作)
if c.opRecorder != nil {
c.opRecorder.RecordOperation(s3Target.GetID(), OperationTypeA)
}
if err != nil {
return nil, err
}

View File

@@ -66,3 +66,19 @@ type HealthReporter interface {
// ReportHealth 报告健康状态
ReportHealth(targetID string, status Status)
}
// OperationCategory 操作分类
type OperationCategory string
const (
// OperationTypeA 写入类操作 (ListObjects, PutObject, etc.)
OperationTypeA OperationCategory = "A"
// OperationTypeB 读取类操作 (GetObject)
OperationTypeB OperationCategory = "B"
)
// OperationRecorder 操作记录器接口
type OperationRecorder interface {
// RecordOperation 记录一次后端操作
RecordOperation(targetID string, category OperationCategory)
}