️ perf(sync): 优化大表同步分页与批量写入

- 同步分析和预览改为分页扫描差异,避免一次性加载源表和目标表

- 直接导入与源查询同步支持分页读取和分批提交,降低低内存机器 OOM 风险

- 各数据库 ApplyChanges 统一使用参数化批量 INSERT,减少大表同步 SQL 超时

- MySQL 批量写入按行数和参数数量拆分,兼容超宽表场景

- 补充批量插入、分页差异和源查询同步回归测试
This commit is contained in:
Syngnat
2026-05-26 08:27:15 +08:00
parent aa2177d35a
commit 5ab50db51c
27 changed files with 2846 additions and 319 deletions

View File

@@ -5,11 +5,14 @@ import (
"GoNavi-Wails/internal/db"
"GoNavi-Wails/internal/logger"
"fmt"
"math"
"sort"
"strings"
"time"
)
const defaultSyncApplyBatchSize = 1000
// SyncConfig defines the parameters for a synchronization task
type SyncConfig struct {
SourceConfig connection.ConnectionConfig `json:"sourceConfig"`
@@ -251,6 +254,54 @@ func (s *SyncEngine) RunSync(config SyncConfig) SyncResult {
return
}
if handled, inserted, err := s.tryApplyDirectImportInPages(config, &result, i, totalTables, tableName, sourceDB, targetDB, plan, cols, targetCols, opts, sourceType, targetType, applyTableName); handled {
if err != nil {
logger.Error(err, "分页流式导入失败:表=%s", tableName)
s.appendLog(config.JobID, &result, "error", fmt.Sprintf(" -> 分页流式导入失败: %v", err))
return
}
result.RowsInserted += inserted
if inserted > 0 {
s.appendLog(config.JobID, &result, "info", fmt.Sprintf(" -> 分页流式导入完成:插入=%d 行", inserted))
} else {
s.appendLog(config.JobID, &result, "info", " -> 源表无可导入数据")
}
if len(plan.PostDataSQL) > 0 {
s.progress(config.JobID, i, totalTables, tableName, "创建索引")
if err := executeSQLStatements(targetDB.Exec, plan.PostDataSQL); err != nil {
s.appendLog(config.JobID, &result, "error", fmt.Sprintf("创建索引失败:表=%s 错误=%v", tableName, err))
return
}
}
result.TablesSynced++
return
}
if handled, counts, err := s.tryApplyDiffInPages(config, &result, i, totalTables, tableName, sourceDB, targetDB, plan, cols, targetCols, opts, sourceType, targetType, applyTableName, pkCol); handled {
if err != nil {
logger.Error(err, "分页差异同步失败:表=%s", tableName)
s.appendLog(config.JobID, &result, "error", fmt.Sprintf(" -> 分页差异同步失败: %v", err))
return
}
result.RowsInserted += counts.Inserts
result.RowsUpdated += counts.Updates
result.RowsDeleted += counts.Deletes
if counts.Inserts > 0 || counts.Updates > 0 || counts.Deletes > 0 {
s.appendLog(config.JobID, &result, "info", fmt.Sprintf(" -> 分页差异同步完成:插入=%d 更新=%d 删除=%d", counts.Inserts, counts.Updates, counts.Deletes))
} else {
s.appendLog(config.JobID, &result, "info", " -> 数据一致,无需变更.")
}
if len(plan.PostDataSQL) > 0 {
s.progress(config.JobID, i, totalTables, tableName, "创建索引")
if err := executeSQLStatements(targetDB.Exec, plan.PostDataSQL); err != nil {
s.appendLog(config.JobID, &result, "error", fmt.Sprintf("创建索引失败:表=%s 错误=%v", tableName, err))
return
}
}
result.TablesSynced++
return
}
s.progress(config.JobID, i, totalTables, tableName, "读取源表数据")
sourceRows, _, err := sourceDB.Query(fmt.Sprintf("SELECT * FROM %s", quoteQualifiedIdentByType(sourceType, sourceQueryTable)))
if err != nil {
@@ -401,7 +452,7 @@ func (s *SyncEngine) RunSync(config SyncConfig) SyncResult {
if len(changeSet.Inserts) > 0 || len(changeSet.Updates) > 0 || len(changeSet.Deletes) > 0 {
s.appendLog(config.JobID, &result, "info", fmt.Sprintf(" -> 需插入: %d 行, 需更新: %d 行, 需删除: %d 行", len(changeSet.Inserts), len(changeSet.Updates), len(changeSet.Deletes)))
if applier, ok := targetDB.(db.BatchApplier); ok {
if err := applier.ApplyChanges(applyTableName, changeSet); err != nil {
if err := s.applyChangesInBatches(config.JobID, &result, applyTableName, applier, changeSet); err != nil {
s.appendLog(config.JobID, &result, "error", fmt.Sprintf(" -> 应用变更失败: %v", err))
return
}
@@ -497,6 +548,75 @@ func (s *SyncEngine) fail(jobID string, totalTables int, res SyncResult, msg str
return res
}
func (s *SyncEngine) applyChangesInBatches(jobID string, res *SyncResult, tableName string, applier db.BatchApplier, changes connection.ChangeSet) error {
batches := splitChangeSetBatches(changes, defaultSyncApplyBatchSize)
if len(batches) == 0 {
return nil
}
if len(batches) > 1 {
s.appendLog(jobID, res, "info", fmt.Sprintf(" -> 大批量变更将拆分为 %d 批提交(每批最多 %d 行)", len(batches), defaultSyncApplyBatchSize))
}
for idx, batch := range batches {
if len(batches) > 1 {
s.appendLog(jobID, res, "info", fmt.Sprintf(" -> 提交批次 %d/%d插入=%d 更新=%d 删除=%d",
idx+1, len(batches), len(batch.Inserts), len(batch.Updates), len(batch.Deletes)))
}
if err := applier.ApplyChanges(tableName, batch); err != nil {
if len(batches) > 1 {
return fmt.Errorf("批次 %d/%d 失败: %w", idx+1, len(batches), err)
}
return err
}
}
return nil
}
func splitChangeSetBatches(changes connection.ChangeSet, batchSize int) []connection.ChangeSet {
if batchSize <= 0 {
batchSize = defaultSyncApplyBatchSize
}
total := len(changes.Deletes) + len(changes.Updates) + len(changes.Inserts)
if total == 0 {
return nil
}
batches := make([]connection.ChangeSet, 0, int(math.Ceil(float64(total)/float64(batchSize))))
current := connection.ChangeSet{LocatorStrategy: changes.LocatorStrategy}
currentSize := 0
flush := func() {
if currentSize == 0 {
return
}
batches = append(batches, current)
current = connection.ChangeSet{LocatorStrategy: changes.LocatorStrategy}
currentSize = 0
}
for _, row := range changes.Deletes {
if currentSize >= batchSize {
flush()
}
current.Deletes = append(current.Deletes, row)
currentSize++
}
for _, row := range changes.Updates {
if currentSize >= batchSize {
flush()
}
current.Updates = append(current.Updates, row)
currentSize++
}
for _, row := range changes.Inserts {
if currentSize >= batchSize {
flush()
}
current.Inserts = append(current.Inserts, row)
currentSize++
}
flush()
return batches
}
func (s *SyncEngine) execDDLStatements(jobID string, res *SyncResult, database db.Database, tableName string, stage string, statements []string) error {
for _, statement := range statements {
sqlText := strings.TrimSpace(statement)