refactor: single-pass hashing during upload via TeeReader

Previous approach read the file twice (once for SHA-256, once for upload),
doubling disk I/O. Under concurrent multi-target uploads this becomes a
bottleneck.

New design — hashingReader wraps io.TeeReader + sha256.Hash:
  file.Read() → TeeReader → sha256.Write() (hash) + provider (upload)
Single read pass yields both byte count and SHA-256 simultaneously.

Each upload goroutine independently opens the file and computes its own
hash. The first successful target writes checksum to the record via
sync.Once. Zero extra disk I/O, zero extra memory copies, fully
concurrent-safe.
This commit is contained in:
Awuqing
2026-03-31 13:08:10 +08:00
parent 7568d8a2a2
commit ad5c25f38e
2 changed files with 31 additions and 39 deletions

View File

@@ -1,10 +1,7 @@
package backup
import (
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"os"
"path/filepath"
"strings"
@@ -24,20 +21,6 @@ func createTempArtifact(baseDir, taskName string, extension string) (string, str
return tempDir, filepath.Join(tempDir, fileName), nil
}
// SHA256File 计算文件的 SHA-256 哈希值,返回十六进制字符串
func SHA256File(path string) (string, error) {
file, err := os.Open(path)
if err != nil {
return "", fmt.Errorf("open file for checksum: %w", err)
}
defer file.Close()
hash := sha256.New()
if _, err := io.Copy(hash, file); err != nil {
return "", fmt.Errorf("compute checksum: %w", err)
}
return hex.EncodeToString(hash.Sum(nil)), nil
}
func sanitizeFileName(value string) string {
builder := strings.Builder{}
for _, char := range strings.TrimSpace(value) {