️ perf(import-export): 降低 OceanBase 导出链路内存占用

- 为 optional driver-agent 补齐 streamQuery 分片协议,避免大结果集整批缓冲到内存
- 在 OceanBase 整表导出和查询结果导出前强校验 driver-agent revision,旧版代理直接拦截并提示重装
- 为 driver-agent 增加大查询和流式导出完成后的 GC/FreeOSMemory 回收逻辑
- 补充导出前校验、流式分片消费和 agent 内存回收的定向测试
- 更新 driver-agent revisions 以匹配新的流式导出协议
This commit is contained in:
Syngnat
2026-06-18 11:32:08 +08:00
parent 6bd87fa568
commit c8fe90cbee
8 changed files with 801 additions and 22 deletions

View File

@@ -7,8 +7,11 @@ import (
"fmt"
"os"
"reflect"
"runtime"
"runtime/debug"
"strconv"
"strings"
"sync/atomic"
"time"
"GoNavi-Wails/internal/connection"
@@ -33,6 +36,7 @@ type agentResponse struct {
Error string `json:"error,omitempty"`
Data interface{} `json:"data,omitempty"`
Fields []string `json:"fields,omitempty"`
ChunkType string `json:"chunkType,omitempty"`
RowsAffected int64 `json:"rowsAffected,omitempty"`
}
@@ -44,6 +48,7 @@ const (
agentMethodOpenSession = "openSession"
agentMethodCloseSession = "closeSession"
agentMethodQuery = "query"
agentMethodStreamQuery = "streamQuery"
agentMethodExec = "exec"
agentMethodGetDatabases = "getDatabases"
agentMethodGetTables = "getTables"
@@ -58,9 +63,27 @@ const (
const legacyClickHouseDefaultTimeout = 2 * time.Hour
const (
agentChunkColumns = "columns"
agentChunkRows = "rows"
agentChunkDone = "done"
agentStreamBatchSize = 256
agentMemoryTrimRowsThreshold = 100000
agentMemoryTrimMinInterval = 3 * time.Second
)
var (
agentDriverType string
agentDatabaseFactory func() db.Database
agentDriverType string
agentDatabaseFactory func() db.Database
agentMemoryTrimRunning atomic.Bool
agentMemoryTrimLastAt atomic.Int64
runAgentMemoryTrimAsync = func(fn func()) {
go fn()
}
agentMemoryTrimFn = func() {
runtime.GC()
debug.FreeOSMemory()
}
)
type agentRuntime struct {
@@ -99,11 +122,22 @@ func main() {
continue
}
if strings.TrimSpace(req.Method) == agentMethodStreamQuery {
if err := handleStreamRequest(runtimeState, req, writer); err != nil {
fmt.Fprintf(os.Stderr, "写入流式响应失败:%v\n", err)
break
}
continue
}
resp := handleRequest(runtimeState, req)
if err := writeResponse(writer, resp); err != nil {
fmt.Fprintf(os.Stderr, "写入响应失败:%v\n", err)
break
}
if strings.TrimSpace(req.Method) == agentMethodQuery {
maybeReleaseAgentMemory("query-response", countAgentResponseRows(resp.Data))
}
}
runtimeState.close()
@@ -288,6 +322,108 @@ func handleRequest(runtimeState *agentRuntime, req agentRequest) agentResponse {
return resp
}
type agentStreamResponseWriter struct {
writer *bufio.Writer
requestID int64
columns []string
rows [][]interface{}
rowCount int64
}
func newAgentStreamResponseWriter(writer *bufio.Writer, requestID int64) *agentStreamResponseWriter {
return &agentStreamResponseWriter{
writer: writer,
requestID: requestID,
}
}
func (w *agentStreamResponseWriter) SetColumns(columns []string) error {
w.columns = append([]string(nil), columns...)
return writeResponse(w.writer, agentResponse{
ID: w.requestID,
Success: true,
ChunkType: agentChunkColumns,
Fields: w.columns,
})
}
func (w *agentStreamResponseWriter) ConsumeRow(row map[string]interface{}) error {
if len(w.columns) == 0 {
return fmt.Errorf("流式查询缺少列定义")
}
values := make([]interface{}, len(w.columns))
for idx, column := range w.columns {
values[idx] = row[column]
}
return w.ConsumeRowValues(values)
}
func (w *agentStreamResponseWriter) ConsumeRowValues(values []interface{}) error {
row := append([]interface{}(nil), values...)
w.rows = append(w.rows, row)
w.rowCount++
if len(w.rows) < agentStreamBatchSize {
return nil
}
return w.flushRows()
}
func (w *agentStreamResponseWriter) flushRows() error {
if len(w.rows) == 0 {
return nil
}
rows := w.rows
w.rows = nil
return writeResponse(w.writer, agentResponse{
ID: w.requestID,
Success: true,
ChunkType: agentChunkRows,
Data: rows,
})
}
func (w *agentStreamResponseWriter) finish() error {
return w.flushRows()
}
func handleStreamRequest(runtimeState *agentRuntime, req agentRequest, writer *bufio.Writer) error {
resp := agentResponse{ID: req.ID, Success: true}
if runtimeState.inst == nil {
return writeResponse(writer, fail(resp, "connection not open"))
}
streamWriter := newAgentStreamResponseWriter(writer, req.ID)
if session, ok, err := runtimeState.session(req.SessionID); err != nil {
return writeResponse(writer, fail(resp, err.Error()))
} else if ok {
if err := streamStatementWithOptionalTimeout(session, req.Query, req.TimeoutMs, streamWriter); err != nil {
_ = streamWriter.finish()
return writeResponse(writer, fail(resp, err.Error()))
}
if err := streamWriter.finish(); err != nil {
return err
}
if err := writeResponse(writer, agentResponse{ID: req.ID, Success: true, ChunkType: agentChunkDone}); err != nil {
return err
}
maybeReleaseAgentMemory("stream-query-session", streamWriter.rowCount)
return nil
}
if err := streamDatabaseWithOptionalTimeout(runtimeState.inst, req.Query, req.TimeoutMs, streamWriter); err != nil {
_ = streamWriter.finish()
return writeResponse(writer, fail(resp, err.Error()))
}
if err := streamWriter.finish(); err != nil {
return err
}
if err := writeResponse(writer, agentResponse{ID: req.ID, Success: true, ChunkType: agentChunkDone}); err != nil {
return err
}
maybeReleaseAgentMemory("stream-query-db", streamWriter.rowCount)
return nil
}
func (r *agentRuntime) nextID() string {
r.ensureSessionMap()
r.nextSessionID++
@@ -459,6 +595,82 @@ func queryStatementWithOptionalTimeout(inst db.StatementExecer, query string, ti
return queryWithOptionalTimeout(queryRunner, query, timeoutMs)
}
func streamWithOptionalTimeout(inst db.StreamQueryExecer, query string, timeoutMs int64, consumer db.QueryStreamConsumer) error {
effectiveTimeoutMs := timeoutMs
if effectiveTimeoutMs <= 0 && strings.EqualFold(strings.TrimSpace(agentDriverType), "clickhouse") {
effectiveTimeoutMs = int64(legacyClickHouseDefaultTimeout / time.Millisecond)
}
if effectiveTimeoutMs <= 0 {
return inst.StreamQuery(query, consumer)
}
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(effectiveTimeoutMs)*time.Millisecond)
defer cancel()
return inst.StreamQueryContext(ctx, query, consumer)
}
func streamBufferedQueryResult(fields []string, data []map[string]interface{}, consumer db.QueryStreamConsumer) error {
if err := consumer.SetColumns(fields); err != nil {
return err
}
if valueConsumer, ok := consumer.(db.QueryStreamValueConsumer); ok {
for _, row := range data {
values := make([]interface{}, len(fields))
for idx, field := range fields {
values[idx] = row[field]
}
if err := valueConsumer.ConsumeRowValues(values); err != nil {
return err
}
}
return nil
}
for _, row := range data {
if err := consumer.ConsumeRow(row); err != nil {
return err
}
}
return nil
}
func streamStatementWithOptionalTimeout(inst db.StatementExecer, query string, timeoutMs int64, consumer db.QueryStreamConsumer) error {
if streamer, ok := inst.(db.StreamQueryExecer); ok {
return streamWithOptionalTimeout(streamer, query, timeoutMs, consumer)
}
data, fields, err := queryStatementWithOptionalTimeout(inst, query, timeoutMs)
if err != nil {
return err
}
return streamBufferedQueryResult(fields, data, consumer)
}
func streamDatabaseWithOptionalTimeout(inst db.Database, query string, timeoutMs int64, consumer db.QueryStreamConsumer) error {
if streamer, ok := inst.(db.StreamQueryExecer); ok {
return streamWithOptionalTimeout(streamer, query, timeoutMs, consumer)
}
if provider, ok := inst.(db.SessionExecerProvider); ok {
openCtx := context.Background()
var cancel context.CancelFunc
effectiveTimeoutMs := timeoutMs
if effectiveTimeoutMs <= 0 && strings.EqualFold(strings.TrimSpace(agentDriverType), "clickhouse") {
effectiveTimeoutMs = int64(legacyClickHouseDefaultTimeout / time.Millisecond)
}
if effectiveTimeoutMs > 0 {
openCtx, cancel = context.WithTimeout(context.Background(), time.Duration(effectiveTimeoutMs)*time.Millisecond)
defer cancel()
}
session, err := provider.OpenSessionExecer(openCtx)
if err == nil {
defer session.Close()
return streamStatementWithOptionalTimeout(session, query, timeoutMs, consumer)
}
}
data, fields, err := queryWithOptionalTimeout(inst, query, timeoutMs)
if err != nil {
return err
}
return streamBufferedQueryResult(fields, data, consumer)
}
func execWithOptionalTimeout(inst agentExecRunner, query string, timeoutMs int64) (int64, error) {
effectiveTimeoutMs := timeoutMs
if effectiveTimeoutMs <= 0 && strings.EqualFold(strings.TrimSpace(agentDriverType), "clickhouse") {
@@ -478,3 +690,41 @@ func execWithOptionalTimeout(inst agentExecRunner, query string, timeoutMs int64
func execStatementWithOptionalTimeout(inst db.StatementExecer, query string, timeoutMs int64) (int64, error) {
return execWithOptionalTimeout(inst, query, timeoutMs)
}
func countAgentResponseRows(data interface{}) int64 {
rows, ok := data.([]map[string]interface{})
if !ok {
return 0
}
return int64(len(rows))
}
func maybeReleaseAgentMemory(reason string, rows int64) {
if rows < agentMemoryTrimRowsThreshold {
return
}
if !agentMemoryTrimRunning.CompareAndSwap(false, true) {
return
}
runAgentMemoryTrimAsync(func() {
defer agentMemoryTrimRunning.Store(false)
if delay := nextAgentMemoryTrimDelay(); delay > 0 {
time.Sleep(delay)
}
agentMemoryTrimFn()
agentMemoryTrimLastAt.Store(time.Now().UnixNano())
})
}
func nextAgentMemoryTrimDelay() time.Duration {
lastUnixNano := agentMemoryTrimLastAt.Load()
if lastUnixNano <= 0 {
return 0
}
elapsed := time.Since(time.Unix(0, lastUnixNano))
if elapsed >= agentMemoryTrimMinInterval {
return 0
}
return agentMemoryTrimMinInterval - elapsed
}

View File

@@ -190,6 +190,64 @@ func (f *fakeAgentStatementSession) Close() error {
return nil
}
type fakeAgentStreamSession struct {
closed bool
streamCalls int
deadlineSet bool
}
func (f *fakeAgentStreamSession) Exec(query string) (int64, error) {
return 0, nil
}
func (f *fakeAgentStreamSession) ExecContext(ctx context.Context, query string) (int64, error) {
return 0, nil
}
func (f *fakeAgentStreamSession) Close() error {
f.closed = true
return nil
}
func (f *fakeAgentStreamSession) StreamQuery(query string, consumer db.QueryStreamConsumer) error {
return f.StreamQueryContext(context.Background(), query, consumer)
}
func (f *fakeAgentStreamSession) StreamQueryContext(ctx context.Context, query string, consumer db.QueryStreamConsumer) error {
f.streamCalls++
if _, ok := ctx.Deadline(); ok {
f.deadlineSet = true
}
if err := consumer.SetColumns([]string{"id", "name"}); err != nil {
return err
}
if valueConsumer, ok := consumer.(db.QueryStreamValueConsumer); ok {
if err := valueConsumer.ConsumeRowValues([]interface{}{1, "alice"}); err != nil {
return err
}
if err := valueConsumer.ConsumeRowValues([]interface{}{2, "bob"}); err != nil {
return err
}
return nil
}
if err := consumer.ConsumeRow(map[string]interface{}{"id": 1, "name": "alice"}); err != nil {
return err
}
return consumer.ConsumeRow(map[string]interface{}{"id": 2, "name": "bob"})
}
type fakeAgentSessionStreamDB struct {
fakeAgentTimeoutDB
session *fakeAgentStreamSession
openCalls int
}
func (f *fakeAgentSessionStreamDB) OpenSessionExecer(ctx context.Context) (db.StatementExecer, error) {
f.openCalls++
f.session = &fakeAgentStreamSession{}
return f.session, nil
}
func TestQueryWithOptionalTimeout_UsesQueryContext(t *testing.T) {
fake := &fakeAgentTimeoutDB{}
data, fields, err := queryWithOptionalTimeout(fake, "SELECT 1", int64((2 * time.Second).Milliseconds()))
@@ -306,3 +364,135 @@ func TestHandleRequest_UsesPinnedSessionForSessionScopedQueryAndExec(t *testing.
t.Fatal("expected pinned session to close")
}
}
func TestHandleStreamRequest_UsesSessionStreamerAndWritesChunks(t *testing.T) {
old := agentDriverType
originalAsync := runAgentMemoryTrimAsync
originalTrim := agentMemoryTrimFn
originalLastAt := agentMemoryTrimLastAt.Load()
defer func() { agentDriverType = old }()
defer func() {
runAgentMemoryTrimAsync = originalAsync
agentMemoryTrimFn = originalTrim
agentMemoryTrimRunning.Store(false)
agentMemoryTrimLastAt.Store(originalLastAt)
}()
agentDriverType = "oceanbase"
agentMemoryTrimRunning.Store(false)
agentMemoryTrimLastAt.Store(0)
fake := &fakeAgentSessionStreamDB{}
runtimeState := &agentRuntime{
inst: fake,
sessions: make(map[string]db.StatementExecer),
}
trimmed := 0
runAgentMemoryTrimAsync = func(fn func()) {
fn()
}
agentMemoryTrimFn = func() {
trimmed++
}
var out bytes.Buffer
writer := bufio.NewWriter(&out)
if err := handleStreamRequest(runtimeState, agentRequest{
ID: 9,
Method: agentMethodStreamQuery,
Query: "SELECT * FROM person_info",
TimeoutMs: int64((2 * time.Second).Milliseconds()),
}, writer); err != nil {
t.Fatalf("handleStreamRequest 返回错误: %v", err)
}
if fake.openCalls != 1 {
t.Fatalf("expected OpenSessionExecer called once, got %d", fake.openCalls)
}
if fake.session == nil || fake.session.streamCalls != 1 {
t.Fatalf("expected session streamer used once, session=%#v", fake.session)
}
if !fake.session.deadlineSet {
t.Fatal("expected stream query context deadline to be set")
}
if !fake.session.closed {
t.Fatal("expected session to close after streaming")
}
if fake.queryCalled || fake.queryContextCalled {
t.Fatalf("unexpected fallback query path, Query=%v QueryContext=%v", fake.queryCalled, fake.queryContextCalled)
}
lines := strings.Split(strings.TrimSpace(out.String()), "\n")
if len(lines) != 3 {
t.Fatalf("expected 3 stream responses, got %d: %q", len(lines), out.String())
}
var columnsResp struct {
Success bool `json:"success"`
ChunkType string `json:"chunkType"`
Fields []string `json:"fields"`
}
if err := json.Unmarshal([]byte(lines[0]), &columnsResp); err != nil {
t.Fatalf("decode columns response failed: %v", err)
}
if !columnsResp.Success || columnsResp.ChunkType != agentChunkColumns || len(columnsResp.Fields) != 2 {
t.Fatalf("unexpected columns response: %#v", columnsResp)
}
var rowsResp struct {
Success bool `json:"success"`
ChunkType string `json:"chunkType"`
Data [][]interface{} `json:"data"`
}
if err := json.Unmarshal([]byte(lines[1]), &rowsResp); err != nil {
t.Fatalf("decode rows response failed: %v", err)
}
if !rowsResp.Success || rowsResp.ChunkType != agentChunkRows || len(rowsResp.Data) != 2 {
t.Fatalf("unexpected rows response: %#v", rowsResp)
}
if got := rowsResp.Data[1][1]; got != "bob" {
t.Fatalf("unexpected streamed row payload: %v", rowsResp.Data)
}
var doneResp struct {
Success bool `json:"success"`
ChunkType string `json:"chunkType"`
}
if err := json.Unmarshal([]byte(lines[2]), &doneResp); err != nil {
t.Fatalf("decode done response failed: %v", err)
}
if !doneResp.Success || doneResp.ChunkType != agentChunkDone {
t.Fatalf("unexpected done response: %#v", doneResp)
}
if trimmed != 0 {
t.Fatalf("小流式任务不应触发内存回收got=%d", trimmed)
}
}
func TestMaybeReleaseAgentMemory_TriggersTrimForLargeJobs(t *testing.T) {
originalAsync := runAgentMemoryTrimAsync
originalTrim := agentMemoryTrimFn
originalLastAt := agentMemoryTrimLastAt.Load()
t.Cleanup(func() {
runAgentMemoryTrimAsync = originalAsync
agentMemoryTrimFn = originalTrim
agentMemoryTrimRunning.Store(false)
agentMemoryTrimLastAt.Store(originalLastAt)
})
agentMemoryTrimRunning.Store(false)
agentMemoryTrimLastAt.Store(0)
triggered := 0
runAgentMemoryTrimAsync = func(fn func()) {
fn()
}
agentMemoryTrimFn = func() {
triggered++
}
maybeReleaseAgentMemory("test-large-query", agentMemoryTrimRowsThreshold)
if triggered != 1 {
t.Fatalf("大查询完成后应触发一次内存回收got=%d", triggered)
}
}