feat: optimize test report UI and add LLM usage tracking

2026-06-27 02:21:23 +08:00 · 2025-06-09 16:04:13 +08:00
parent e85802cdda
commit 96da4515a1
7 changed files with 850 additions and 287 deletions
--- a/internal/version/VERSION
+++ b/internal/version/VERSION
@@ -1 +1 @@
-v5.0.0-beta-2506090029
+v5.0.0-beta-2506091704
--- a/report.go
+++ b/report.go
--- a/step.go
+++ b/step.go
@@ -59,10 +59,11 @@ type TStep struct {
 // one step contains one or multiple actions
 type ActionResult struct {
 	option.MobileAction `json:",inline"`
-	StartTime           int64                   `json:"start_time"`            // action start time
-	Elapsed             int64                   `json:"elapsed_ms"`            // action elapsed time(ms)
-	Error               error                   `json:"error"`                 // action execution result
-	SubActions          []*uixt.SubActionResult `json:"sub_actions,omitempty"` // store sub-actions
+	StartTime           int64                           `json:"start_time"`            // action start time
+	Elapsed             int64                           `json:"elapsed_ms"`            // action elapsed time(ms)
+	Error               error                           `json:"error"`                 // action execution result
+	Plannings           []*uixt.PlanningExecutionResult `json:"plannings,omitempty"`   // store planning results for start_to_goal actions
+	SubActions          []*uixt.SubActionResult         `json:"sub_actions,omitempty"` // store sub-actions for other actions
 }

 // one testcase contains one or multiple steps
--- a/step_ui.go
+++ b/step_ui.go
@@ -909,10 +909,10 @@ func runStepMobileUI(s *SessionRunner, step IStep) (stepResult *StepResult, err

 			// handle start_to_goal action
 			if action.Method == option.ACTION_StartToGoal {
-				subActionResults, err := uiDriver.StartToGoal(ctx,
+				planningResults, err := uiDriver.StartToGoal(ctx,
 					action.Params.(string), action.GetOptions()...)
 				actionResult.Elapsed = time.Since(actionStartTime).Milliseconds()
-				actionResult.SubActions = subActionResults
+				actionResult.Plannings = planningResults
 				stepResult.Actions = append(stepResult.Actions, actionResult)
 				if err != nil {
 					if !code.IsErrorPredefined(err) {
--- a/uixt/ai/planner.go
+++ b/uixt/ai/planner.go
@@ -28,11 +28,12 @@ type PlanningOptions struct {

 // PlanningResult represents the result of planning
 type PlanningResult struct {
-	ToolCalls []schema.ToolCall `json:"tool_calls"`
-	Thought   string            `json:"thought"`
-	Content   string            `json:"content"` // original content from model
-	Error     string            `json:"error,omitempty"`
-	ModelName string            `json:"model_name"` // model name used for planning
+	ToolCalls []schema.ToolCall  `json:"tool_calls"`
+	Thought   string             `json:"thought"`
+	Content   string             `json:"content"` // original content from model
+	Error     string             `json:"error,omitempty"`
+	ModelName string             `json:"model_name"`      // model name used for planning
+	Usage     *schema.TokenUsage `json:"usage,omitempty"` // token usage statistics
 }

 func NewPlanner(ctx context.Context, modelConfig *ModelConfig) (*Planner, error) {
@@ -81,7 +82,7 @@ func (p *Planner) RegisterTools(tools []*schema.ToolInfo) error {
 }

 // Call performs UI planning using Vision Language Model
-func (p *Planner) Call(ctx context.Context, opts *PlanningOptions) (*PlanningResult, error) {
+func (p *Planner) Call(ctx context.Context, opts *PlanningOptions) (result *PlanningResult, err error) {
 	// validate input parameters
 	if err := validatePlanningInput(opts); err != nil {
 		return nil, errors.Wrap(err, "validate planning parameters failed")
@@ -116,6 +117,13 @@ func (p *Planner) Call(ctx context.Context, opts *PlanningOptions) (*PlanningRes
 	}
 	logResponse(message)

+	defer func() {
+		// Extract usage information if available
+		if message.ResponseMeta != nil && message.ResponseMeta.Usage != nil {
+			result.Usage = message.ResponseMeta.Usage
+		}
+	}()
+
 	// handle tool calls
 	if len(message.ToolCalls) > 0 {
 		// append tool call message
@@ -130,7 +138,7 @@ func (p *Planner) Call(ctx context.Context, opts *PlanningOptions) (*PlanningRes
 			ToolCallID: toolCallID,
 		})
 		// history will be appended with tool calls execution result
-		result := &PlanningResult{
+		result = &PlanningResult{
 			ToolCalls: message.ToolCalls,
 			Thought:   message.Content,
 			ModelName: string(p.modelConfig.ModelType),
@@ -139,7 +147,7 @@ func (p *Planner) Call(ctx context.Context, opts *PlanningOptions) (*PlanningRes
 	}

 	// parse message content to actions (tool calls)
-	result, err := p.parser.Parse(message.Content, opts.Size)
+	result, err = p.parser.Parse(message.Content, opts.Size)
 	if err != nil {
 		result = &PlanningResult{
 			Thought:   message.Content,
--- a/uixt/driver_ext_ai.go
+++ b/uixt/driver_ext_ai.go
@@ -6,21 +6,23 @@ import (
 	"time"

 	"github.com/cloudwego/eino/schema"
+	"github.com/mark3labs/mcp-go/mcp"
+	"github.com/pkg/errors"
+	"github.com/rs/zerolog/log"
+
 	"github.com/httprunner/httprunner/v5/code"
 	"github.com/httprunner/httprunner/v5/internal/builtin"
 	"github.com/httprunner/httprunner/v5/internal/json"
 	"github.com/httprunner/httprunner/v5/uixt/ai"
 	"github.com/httprunner/httprunner/v5/uixt/option"
-	"github.com/mark3labs/mcp-go/mcp"
-	"github.com/pkg/errors"
-	"github.com/rs/zerolog/log"
+	"github.com/httprunner/httprunner/v5/uixt/types"
 )

-func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...option.ActionOption) ([]*SubActionResult, error) {
+func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...option.ActionOption) ([]*PlanningExecutionResult, error) {
 	options := option.NewActionOptions(opts...)
 	log.Info().Int("max_retry_times", options.MaxRetryTimes).Msg("StartToGoal")

-	var allSubActions []*SubActionResult
+	var allPlannings []*PlanningExecutionResult
 	var attempt int
 	for {
 		attempt++
@@ -30,7 +32,7 @@ func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...op
 		select {
 		case <-ctx.Done():
 			log.Warn().Msg("interrupted in StartToGoal")
-			return allSubActions, errors.Wrap(code.InterruptError, "StartToGoal interrupted")
+			return allPlannings, errors.Wrap(code.InterruptError, "StartToGoal interrupted")
 		default:
 		}

@@ -41,7 +43,8 @@ func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...op
 			// Add ResetHistory option for the first attempt
 			planningOpts = append(planningOpts, option.WithResetHistory(true))
 		}
-		result, err := dExt.PlanNextAction(ctx, prompt, planningOpts...)
+
+		planningResult, err := dExt.PlanNextAction(ctx, prompt, planningOpts...)
 		if err != nil {
 			// Check if this is a LLM service request error that should be retried
 			if errors.Is(err, code.LLMRequestServiceError) {
@@ -49,68 +52,81 @@ func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...op
 					Msg("LLM service request failed, retrying...")
 				continue
 			}
-			allSubActions = append(allSubActions, &SubActionResult{
-				ActionName:  "plan_next_action",
-				Arguments:   prompt,
-				Error:       err,
-				StartTime:   planningStartTime.Unix(),
-				Elapsed:     time.Since(planningStartTime).Milliseconds(),
-				SessionData: dExt.GetSession().GetData(true),
-			})
-			return allSubActions, err
+			// Create planning result with error
+			errorResult := &PlanningExecutionResult{
+				PlanningResult: ai.PlanningResult{
+					Thought:   "Planning failed",
+					ModelName: "",
+					Error:     err.Error(),
+				},
+				StartTime: planningStartTime.Unix(),
+				Elapsed:   time.Since(planningStartTime).Milliseconds(),
+			}
+			allPlannings = append(allPlannings, errorResult)
+			return allPlannings, err
 		}

+		// Set planning execution timing
+		planningResult.StartTime = planningStartTime.Unix()
+		planningResult.SubActions = []*SubActionResult{}
+
 		// Check if task is finished BEFORE executing actions
-		if dExt.isTaskFinished(result) {
+		if dExt.isTaskFinished(planningResult) {
 			log.Info().Msg("task finished, stopping StartToGoal")
-			// Create a sub-action result to record the planning result even when task is finished
-			subActionResult := &SubActionResult{
-				ActionName:  "plan_next_action",
-				Arguments:   prompt,
-				StartTime:   planningStartTime.Unix(),
-				Elapsed:     time.Since(planningStartTime).Milliseconds(),
-				Thought:     result.Thought,
-				ModelName:   result.ModelName,
-				SessionData: dExt.GetSession().GetData(true),
-			}
-			allSubActions = append(allSubActions, subActionResult)
-			return allSubActions, nil
+			planningResult.Elapsed = time.Since(planningStartTime).Milliseconds()
+			allPlannings = append(allPlannings, planningResult)
+			return allPlannings, nil
 		}

 		// Invoke tool calls
-		for _, toolCall := range result.ToolCalls {
+		for _, toolCall := range planningResult.ToolCalls {
 			// Check for context cancellation before each action
 			select {
 			case <-ctx.Done():
 				log.Warn().Msg("interrupted in invokeToolCalls")
-				return allSubActions, errors.Wrap(code.InterruptError, "invokeToolCalls interrupted")
+				planningResult.Elapsed = time.Since(planningStartTime).Milliseconds()
+				allPlannings = append(allPlannings, planningResult)
+				return allPlannings, errors.Wrap(code.InterruptError, "invokeToolCalls interrupted")
 			default:
 			}

-			subActionStartTime := time.Now()
-			// Create sub-action result
-			subActionResult := &SubActionResult{
-				ActionName: toolCall.Function.Name,
-				Arguments:  toolCall.Function.Arguments,
-				StartTime:  subActionStartTime.Unix(),
-				Thought:    result.Thought,
-				ModelName:  result.ModelName,
-			}
+			// Execute each tool call in a separate function to ensure proper defer execution
+			err := func() error {
+				subActionStartTime := time.Now()
+				subActionResult := &SubActionResult{
+					ActionName: toolCall.Function.Name,
+					Arguments:  toolCall.Function.Arguments,
+					StartTime:  subActionStartTime.Unix(),
+				}

-			if err := dExt.invokeToolCall(ctx, toolCall); err != nil {
-				subActionResult.Error = err
-				allSubActions = append(allSubActions, subActionResult)
-				return allSubActions, err
-			}
-			subActionResult.Elapsed = time.Since(subActionStartTime).Milliseconds()
+				// Use defer to ensure sub-action is always processed and added to results
+				defer func() {
+					subActionResult.Elapsed = time.Since(subActionStartTime).Milliseconds()
+					subActionResult.SessionData = dExt.GetSession().GetData(true) // reset after getting data
+					planningResult.SubActions = append(planningResult.SubActions, subActionResult)
+				}()

-			// Collect sub-action specific attachments and reset session data
-			subActionResult.SessionData = dExt.GetSession().GetData(true) // reset after getting data
-			allSubActions = append(allSubActions, subActionResult)
+				// Execute the tool call
+				if err := dExt.invokeToolCall(ctx, toolCall); err != nil {
+					subActionResult.Error = err
+					return err
+				}
+				return nil
+			}()
+			if err != nil {
+				planningResult.Elapsed = time.Since(planningStartTime).Milliseconds()
+				planningResult.Error = err.Error()
+				allPlannings = append(allPlannings, planningResult)
+				return allPlannings, err
+			}
 		}

+		// Complete this planning cycle
+		planningResult.Elapsed = time.Since(planningStartTime).Milliseconds()
+		allPlannings = append(allPlannings, planningResult)
+
 		if options.MaxRetryTimes > 1 && attempt >= options.MaxRetryTimes {
-			return allSubActions, errors.New("reached max retry times")
+			return allPlannings, errors.New("reached max retry times")
 		}
 	}
 }
@@ -119,13 +135,13 @@ func (dExt *XTDriver) AIAction(ctx context.Context, prompt string, opts ...optio
 	log.Info().Str("prompt", prompt).Msg("performing AI action")

 	// plan next action
-	result, err := dExt.PlanNextAction(ctx, prompt, opts...)
+	planningResult, err := dExt.PlanNextAction(ctx, prompt, opts...)
 	if err != nil {
 		return err
 	}

 	// Invoke tool calls
-	for _, toolCall := range result.ToolCalls {
+	for _, toolCall := range planningResult.ToolCalls {
 		err = dExt.invokeToolCall(ctx, toolCall)
 		if err != nil {
 			return err
@@ -135,7 +151,8 @@ func (dExt *XTDriver) AIAction(ctx context.Context, prompt string, opts ...optio
 	return nil
 }

-func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ...option.ActionOption) (*ai.PlanningResult, error) {
+// PlanNextAction performs planning and returns unified planning information
+func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ...option.ActionOption) (*PlanningExecutionResult, error) {
 	if dExt.LLMService == nil {
 		return nil, errors.New("LLM service is not initialized")
 	}
@@ -144,14 +161,21 @@ func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ..
 	options := option.NewActionOptions(opts...)
 	resetHistory := options.ResetHistory

+	// Step 1: Take screenshot
+	screenshotStartTime := time.Now()
 	// Use GetScreenResult to handle screenshot capture, save, and session tracking
 	screenResult, err := dExt.GetScreenResult(
 		option.WithScreenShotFileName(builtin.GenNameWithTimestamp("%d_screenshot")),
 	)
+	screenshotElapsed := time.Since(screenshotStartTime).Milliseconds()
 	if err != nil {
 		return nil, err
 	}

+	// Clear session data after planning screenshot to avoid including it in sub-actions
+	// The planning screenshot is already stored in planningResult.ScreenResult
+	dExt.GetSession().GetData(true) // reset session data to exclude planning screenshot from sub-actions
+
 	// convert buffer to base64 string for LLM
 	screenShotBase64 := "data:image/jpeg;base64," +
 		base64.StdEncoding.EncodeToString(screenResult.bufSource.Bytes())
@@ -162,6 +186,8 @@ func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ..
 		return nil, errors.Wrap(code.DeviceGetInfoError, err.Error())
 	}

+	// Step 2: Call model
+	modelCallStartTime := time.Now()
 	planningOpts := &ai.PlanningOptions{
 		UserInstruction: prompt,
 		Message: &schema.Message{
@@ -180,22 +206,48 @@ func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ..
 	}

 	result, err := dExt.LLMService.Call(ctx, planningOpts)
+	modelCallElapsed := time.Since(modelCallStartTime).Milliseconds()
+
 	if err != nil {
 		return nil, errors.Wrap(err, "failed to get next action from planner")
 	}
-	return result, nil
+
+	// Step 3: Parse result (this is already done in LLMService.Call, but we record it separately)
+	actionNames := make([]string, len(result.ToolCalls))
+	for i, toolCall := range result.ToolCalls {
+		actionNames[i] = toolCall.Function.Name
+	}
+
+	// Create unified planning result that inherits from ai.PlanningResult
+	planningResult := &PlanningExecutionResult{
+		PlanningResult: *result, // Inherit all fields from ai.PlanningResult
+		// Planning process timing and metadata
+		ScreenshotElapsed: screenshotElapsed,
+		ImagePath:         screenResult.ImagePath,
+		Resolution:        &screenResult.Resolution,
+		ScreenResult:      screenResult,
+		ModelCallElapsed:  modelCallElapsed,
+		ToolCallsCount:    len(result.ToolCalls),
+		ActionNames:       actionNames,
+		// Execution timing (will be set by StartToGoal)
+		StartTime:  0,   // Will be set by caller
+		Elapsed:    0,   // Will be set by caller
+		SubActions: nil, // Will be populated during execution
+	}
+
+	return planningResult, nil
 }

 // isTaskFinished checks if the task is completed based on the planning result
-func (dExt *XTDriver) isTaskFinished(result *ai.PlanningResult) bool {
+func (dExt *XTDriver) isTaskFinished(planningResult *PlanningExecutionResult) bool {
 	// Check if there are no tool calls (no actions to execute)
-	if len(result.ToolCalls) == 0 {
+	if len(planningResult.ToolCalls) == 0 {
 		log.Info().Msg("no tool calls returned, task may be finished")
 		return true
 	}

 	// Check if any tool call is a "finished" action
-	for _, toolCall := range result.ToolCalls {
+	for _, toolCall := range planningResult.ToolCalls {
 		if toolCall.Function.Name == "uixt__finished" {
 			log.Info().Str("reason", toolCall.Function.Arguments).Msg("finished action detected")
 			return true
@@ -236,15 +288,30 @@ func (dExt *XTDriver) invokeToolCall(ctx context.Context, toolCall schema.ToolCa
 	return nil
 }

+// PlanningExecutionResult represents a unified planning result that contains both planning information and execution results
+type PlanningExecutionResult struct {
+	ai.PlanningResult // Inherit all fields from ai.PlanningResult (ToolCalls, Thought, Content, Error, ModelName)
+	// Planning process information
+	ScreenshotElapsed int64         `json:"screenshot_elapsed_ms"` // screenshot elapsed time(ms)
+	ImagePath         string        `json:"image_path"`            // screenshot image path
+	Resolution        *types.Size   `json:"resolution"`            // image resolution
+	ScreenResult      *ScreenResult `json:"screen_result"`         // complete screen result data
+	ModelCallElapsed  int64         `json:"model_call_elapsed_ms"` // model call elapsed time(ms)
+	ToolCallsCount    int           `json:"tool_calls_count"`      // number of tool calls generated
+	ActionNames       []string      `json:"action_names"`          // names of parsed actions
+	// Execution information
+	StartTime  int64              `json:"start_time"`            // planning start time
+	Elapsed    int64              `json:"elapsed_ms"`            // planning elapsed time(ms)
+	SubActions []*SubActionResult `json:"sub_actions,omitempty"` // sub-actions generated from this planning
+}
+
 // SubActionResult represents a sub-action within a start_to_goal action
 type SubActionResult struct {
-	ActionName string      `json:"action_name"`          // name of the sub-action (e.g., "tap", "input")
-	Arguments  interface{} `json:"arguments,omitempty"`  // arguments passed to the sub-action
-	StartTime  int64       `json:"start_time"`           // sub-action start time
-	Elapsed    int64       `json:"elapsed_ms"`           // sub-action elapsed time(ms)
-	Error      error       `json:"error,omitempty"`      // sub-action execution result
-	Thought    string      `json:"thought,omitempty"`    // sub-action thought
-	ModelName  string      `json:"model_name,omitempty"` // model name used for AI actions
+	ActionName string      `json:"action_name"`         // name of the sub-action (e.g., "tap", "input")
+	Arguments  interface{} `json:"arguments,omitempty"` // arguments passed to the sub-action
+	StartTime  int64       `json:"start_time"`          // sub-action start time
+	Elapsed    int64       `json:"elapsed_ms"`          // sub-action elapsed time(ms)
+	Error      error       `json:"error,omitempty"`     // sub-action execution result
 	SessionData
 }

--- a/uixt/driver_ext_ai_test.go
+++ b/uixt/driver_ext_ai_test.go
@@ -52,9 +52,10 @@ func TestDriverExt_StartToGoal(t *testing.T) {

 func TestDriverExt_PlanNextAction(t *testing.T) {
 	driver := setupDriverExt(t)
-	result, err := driver.PlanNextAction(context.Background(), "启动抖音")
+	planningResult, err := driver.PlanNextAction(context.Background(), "启动抖音")
 	assert.Nil(t, err)
-	t.Log(result)
+	assert.NotNil(t, planningResult) // Should always return planningResult
+	t.Log(planningResult)
 }

 func TestXTDriver_isTaskFinished(t *testing.T) {
@@ -62,65 +63,73 @@ func TestXTDriver_isTaskFinished(t *testing.T) {

 	tests := []struct {
 		name     string
-		result   *ai.PlanningResult
+		result   *PlanningExecutionResult
 		expected bool
 	}{
 		{
 			name: "no tool calls - task finished",
-			result: &ai.PlanningResult{
-				ToolCalls: []schema.ToolCall{},
-				Thought:   "No actions needed",
+			result: &PlanningExecutionResult{
+				PlanningResult: ai.PlanningResult{
+					ToolCalls: []schema.ToolCall{},
+					Thought:   "No actions needed",
+				},
 			},
 			expected: true,
 		},
 		{
 			name: "finished action - task finished",
-			result: &ai.PlanningResult{
-				ToolCalls: []schema.ToolCall{
-					{
-						Function: schema.FunctionCall{
-							Name:      "uixt__finished",
-							Arguments: `{"content": "Task completed successfully"}`,
+			result: &PlanningExecutionResult{
+				PlanningResult: ai.PlanningResult{
+					ToolCalls: []schema.ToolCall{
+						{
+							Function: schema.FunctionCall{
+								Name:      "uixt__finished",
+								Arguments: `{"content": "Task completed successfully"}`,
+							},
 						},
 					},
+					Thought: "Task completed",
 				},
-				Thought: "Task completed",
 			},
 			expected: true,
 		},
 		{
 			name: "regular action - task not finished",
-			result: &ai.PlanningResult{
-				ToolCalls: []schema.ToolCall{
-					{
-						Function: schema.FunctionCall{
-							Name:      string(option.ACTION_TapXY),
-							Arguments: `{"x": 100, "y": 200}`,
+			result: &PlanningExecutionResult{
+				PlanningResult: ai.PlanningResult{
+					ToolCalls: []schema.ToolCall{
+						{
+							Function: schema.FunctionCall{
+								Name:      string(option.ACTION_TapXY),
+								Arguments: `{"x": 100, "y": 200}`,
+							},
 						},
 					},
+					Thought: "Click on button",
 				},
-				Thought: "Click on button",
 			},
 			expected: false,
 		},
 		{
 			name: "multiple actions with finished - task finished",
-			result: &ai.PlanningResult{
-				ToolCalls: []schema.ToolCall{
-					{
-						Function: schema.FunctionCall{
-							Name:      string(option.ACTION_TapXY),
-							Arguments: `{"x": 100, "y": 200}`,
-						},
-					},
-					{
-						Function: schema.FunctionCall{
-							Name:      "uixt__finished",
-							Arguments: `{"content": "All tasks completed"}`,
+			result: &PlanningExecutionResult{
+				PlanningResult: ai.PlanningResult{
+					ToolCalls: []schema.ToolCall{
+						{
+							Function: schema.FunctionCall{
+								Name:      string(option.ACTION_TapXY),
+								Arguments: `{"x": 100, "y": 200}`,
+							},
+						},
+						{
+							Function: schema.FunctionCall{
+								Name:      "uixt__finished",
+								Arguments: `{"content": "All tasks completed"}`,
+							},
 						},
 					},
+					Thought: "Complete all actions",
 				},
-				Thought: "Complete all actions",
 			},
 			expected: true,
 		},