diff --git a/internal/version/VERSION b/internal/version/VERSION index f9a3ba97..9caa1f35 100644 --- a/internal/version/VERSION +++ b/internal/version/VERSION @@ -1 +1 @@ -v5.0.0-beta-2506241342 +v5.0.0-beta-2506241525 diff --git a/report.go b/report.go index 61ffb0b0..fbc9933e 100644 --- a/report.go +++ b/report.go @@ -417,16 +417,53 @@ func (g *HTMLReportGenerator) calculateTotalUsage() map[string]interface{} { continue } for _, action := range step.Actions { - if action.Plannings == nil { - continue - } - for _, planning := range action.Plannings { - if planning.Usage == nil { - continue + // Calculate planning usage + if action.Plannings != nil { + for _, planning := range action.Plannings { + if planning.Usage != nil { + totalUsage["prompt_tokens"] = totalUsage["prompt_tokens"].(int) + planning.Usage.PromptTokens + totalUsage["completion_tokens"] = totalUsage["completion_tokens"].(int) + planning.Usage.CompletionTokens + totalUsage["total_tokens"] = totalUsage["total_tokens"].(int) + planning.Usage.TotalTokens + } + } + } + + // Calculate AI operations usage (ai_query, ai_action, ai_assert) + if action.AIResult != nil { + var usage *map[string]interface{} + + switch action.AIResult.Type { + case "query": + if action.AIResult.QueryResult != nil && action.AIResult.QueryResult.Usage != nil { + usage = &map[string]interface{}{ + "prompt_tokens": action.AIResult.QueryResult.Usage.PromptTokens, + "completion_tokens": action.AIResult.QueryResult.Usage.CompletionTokens, + "total_tokens": action.AIResult.QueryResult.Usage.TotalTokens, + } + } + case "action": + if action.AIResult.PlanningResult != nil && action.AIResult.PlanningResult.Usage != nil { + usage = &map[string]interface{}{ + "prompt_tokens": action.AIResult.PlanningResult.Usage.PromptTokens, + "completion_tokens": action.AIResult.PlanningResult.Usage.CompletionTokens, + "total_tokens": action.AIResult.PlanningResult.Usage.TotalTokens, + } + } + case "assert": + if action.AIResult.AssertionResult != nil && action.AIResult.AssertionResult.Usage != nil { + usage = &map[string]interface{}{ + "prompt_tokens": action.AIResult.AssertionResult.Usage.PromptTokens, + "completion_tokens": action.AIResult.AssertionResult.Usage.CompletionTokens, + "total_tokens": action.AIResult.AssertionResult.Usage.TotalTokens, + } + } + } + + if usage != nil { + totalUsage["prompt_tokens"] = totalUsage["prompt_tokens"].(int) + (*usage)["prompt_tokens"].(int) + totalUsage["completion_tokens"] = totalUsage["completion_tokens"].(int) + (*usage)["completion_tokens"].(int) + totalUsage["total_tokens"] = totalUsage["total_tokens"].(int) + (*usage)["total_tokens"].(int) } - totalUsage["prompt_tokens"] = totalUsage["prompt_tokens"].(int) + planning.Usage.PromptTokens - totalUsage["completion_tokens"] = totalUsage["completion_tokens"].(int) + planning.Usage.CompletionTokens - totalUsage["total_tokens"] = totalUsage["total_tokens"].(int) + planning.Usage.TotalTokens } } } @@ -1318,6 +1355,21 @@ const htmlTemplate = ` color: #495057; } + .structured-data { + background: #f8f9fa; + border: 1px solid #28a745; + border-radius: 6px; + padding: 10px 12px; + margin: 8px 0; + font-size: 0.85em; + color: #495057; + font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace; + white-space: pre-wrap; + word-wrap: break-word; + max-height: 200px; + overflow-y: auto; + } + @media screen and (max-width: 768px) { .planning-three-columns { flex-direction: column; @@ -2431,15 +2483,37 @@ const htmlTemplate = ` {{end}}
- {{if $action.AIResult.ModelName}} -
🤖 Model: {{$action.AIResult.ModelName}}
+ {{/* Model name and usage from specific result types */}} + {{if eq $action.AIResult.Type "query"}} + {{if $action.AIResult.QueryResult.ModelName}} +
🤖 Model: {{$action.AIResult.QueryResult.ModelName}}
+ {{end}} + {{if $action.AIResult.QueryResult.Usage}} +
📊 Tokens: {{$action.AIResult.QueryResult.Usage.PromptTokens}} in / {{$action.AIResult.QueryResult.Usage.CompletionTokens}} out / {{$action.AIResult.QueryResult.Usage.TotalTokens}} total
+ {{end}} + {{/* Display structured data for query results */}} + {{if $action.AIResult.QueryResult.Data}} +
📥 Structured Data:
+
{{safeHTML (toJSON $action.AIResult.QueryResult.Data)}}
+ {{end}} + {{else if eq $action.AIResult.Type "action"}} + {{if $action.AIResult.PlanningResult.ModelName}} +
🤖 Model: {{$action.AIResult.PlanningResult.ModelName}}
+ {{end}} + {{if $action.AIResult.PlanningResult.Usage}} +
📊 Tokens: {{$action.AIResult.PlanningResult.Usage.PromptTokens}} in / {{$action.AIResult.PlanningResult.Usage.CompletionTokens}} out / {{$action.AIResult.PlanningResult.Usage.TotalTokens}} total
+ {{end}} + {{else if eq $action.AIResult.Type "assert"}} + {{if $action.AIResult.AssertionResult.ModelName}} +
🤖 Model: {{$action.AIResult.AssertionResult.ModelName}}
+ {{end}} + {{if $action.AIResult.AssertionResult.Usage}} +
📊 Tokens: {{$action.AIResult.AssertionResult.Usage.PromptTokens}} in / {{$action.AIResult.AssertionResult.Usage.CompletionTokens}} out / {{$action.AIResult.AssertionResult.Usage.TotalTokens}} total
+ {{end}} {{end}} {{if $action.AIResult.Resolution}}
📐 Resolution: {{$action.AIResult.Resolution.Width}}x{{$action.AIResult.Resolution.Height}}
{{end}} - {{if $action.AIResult.Usage}} -
📊 Tokens: {{$action.AIResult.Usage.PromptTokens}} in / {{$action.AIResult.Usage.CompletionTokens}} out / {{$action.AIResult.Usage.TotalTokens}} total
- {{end}} {{if $action.AIResult.Content}}
💬 {{title $action.AIResult.Type}} Result: {{$action.AIResult.Content}}
{{end}} @@ -2495,9 +2569,6 @@ const htmlTemplate = `
{{base $screenshot.ImagePath}} - {{if $screenshot.Resolution}} - {{$screenshot.Resolution.Width}}x{{$screenshot.Resolution.Height}} - {{end}}
Screenshot @@ -2560,12 +2631,6 @@ const htmlTemplate = `
{{base $imagePath}} - {{if $screenshot.Resolution}} - {{$screenshot.Resolution.Width}}x{{$screenshot.Resolution.Height}} - {{else if index $screenshot "resolution"}} - {{$resolution := index $screenshot "resolution"}} - {{index $resolution "width"}}x{{index $resolution "height"}} - {{end}}
Screenshot diff --git a/uixt/ai/asserter.go b/uixt/ai/asserter.go index 58880c51..2760f6ff 100644 --- a/uixt/ai/asserter.go +++ b/uixt/ai/asserter.go @@ -30,8 +30,10 @@ type AssertOptions struct { // AssertionResult represents the response from an AI assertion type AssertionResult struct { - Pass bool `json:"pass"` - Thought string `json:"thought"` + Pass bool `json:"pass"` + Thought string `json:"thought"` + ModelName string `json:"model_name"` // model name used for assertion + Usage *schema.TokenUsage `json:"usage,omitempty"` // token usage statistics } // Asserter handles assertion using different AI models @@ -85,7 +87,7 @@ func NewAsserter(ctx context.Context, modelConfig *ModelConfig) (*Asserter, erro } // Assert performs the assertion check on the screenshot -func (a *Asserter) Assert(ctx context.Context, opts *AssertOptions) (*AssertionResult, error) { +func (a *Asserter) Assert(ctx context.Context, opts *AssertOptions) (result *AssertionResult, err error) { // Validate input parameters if err := validateAssertionInput(opts); err != nil { return nil, errors.Wrap(err, "validate assertion parameters failed") @@ -132,8 +134,15 @@ Here is the assertion. Please tell whether it is truthy according to the screens return nil, errors.Wrap(code.LLMRequestServiceError, err.Error()) } + defer func() { + // Extract usage information if available + if message.ResponseMeta != nil && message.ResponseMeta.Usage != nil { + result.Usage = message.ResponseMeta.Usage + } + }() + // Parse result - result, err := parseAssertionResult(message.Content) + result, err = parseAssertionResult(message.Content, a.modelConfig.ModelType) if err != nil { return nil, errors.Wrap(code.LLMParseAssertionResponseError, err.Error()) } @@ -159,7 +168,7 @@ func validateAssertionInput(opts *AssertOptions) error { } // parseAssertionResult parses the model response into AssertionResponse -func parseAssertionResult(content string) (*AssertionResult, error) { +func parseAssertionResult(content string, modelType option.LLMServiceType) (*AssertionResult, error) { var result AssertionResult // Use the generic structured response parser @@ -170,5 +179,6 @@ func parseAssertionResult(content string) (*AssertionResult, error) { return nil, errors.Wrap(code.LLMParseAssertionResponseError, err.Error()) } + result.ModelName = string(modelType) return &result, nil } diff --git a/uixt/ai/asserter_test.go b/uixt/ai/asserter_test.go index d9bdaaba..18fd3cdd 100644 --- a/uixt/ai/asserter_test.go +++ b/uixt/ai/asserter_test.go @@ -136,7 +136,7 @@ func TestParseAssertionResult(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - result, err := parseAssertionResult(tt.input) + result, err := parseAssertionResult(tt.input, option.DOUBAO_1_5_UI_TARS_250328) if tt.shouldSucceed { require.NoError(t, err) assert.NotNil(t, result) diff --git a/uixt/ai/querier.go b/uixt/ai/querier.go index 75d5dbc3..54194fad 100644 --- a/uixt/ai/querier.go +++ b/uixt/ai/querier.go @@ -32,9 +32,11 @@ type QueryOptions struct { // QueryResult represents the response from an AI query type QueryResult struct { - Content string `json:"content"` // The extracted content/information - Thought string `json:"thought"` // The reasoning process - Data interface{} `json:"data,omitempty"` // Structured data when OutputSchema is provided + Content string `json:"content"` // The extracted content/information + Thought string `json:"thought"` // The reasoning process + Data interface{} `json:"data,omitempty"` // Structured data when OutputSchema is provided + ModelName string `json:"model_name"` // model name used for query + Usage *schema.TokenUsage `json:"usage,omitempty"` // token usage statistics } // Querier handles query operations using different AI models @@ -89,7 +91,7 @@ func NewQuerier(ctx context.Context, modelConfig *ModelConfig) (*Querier, error) // callModelWithLogging calls the model with automatic logging and timing // Query performs the information extraction from the screenshot -func (q *Querier) Query(ctx context.Context, opts *QueryOptions) (*QueryResult, error) { +func (q *Querier) Query(ctx context.Context, opts *QueryOptions) (result *QueryResult, err error) { // Validate input parameters if err := validateQueryInput(opts); err != nil { return nil, errors.Wrap(err, "validate query parameters failed") @@ -141,8 +143,15 @@ Here is the query. Please extract the requested information from the screenshot. return nil, errors.Wrap(code.LLMRequestServiceError, err.Error()) } + defer func() { + // Extract usage information if available + if message.ResponseMeta != nil && message.ResponseMeta.Usage != nil { + result.Usage = message.ResponseMeta.Usage + } + }() + // Parse result - result, err := parseQueryResult(message.Content) + result, err = parseQueryResult(message.Content, q.modelConfig.ModelType) if err != nil { return nil, errors.Wrap(code.LLMParseQueryResponseError, err.Error()) } @@ -168,18 +177,20 @@ func validateQueryInput(opts *QueryOptions) error { } // parseQueryResult parses the model response into QueryResult -func parseQueryResult(content string) (*QueryResult, error) { +func parseQueryResult(content string, modelType option.LLMServiceType) (*QueryResult, error) { var result QueryResult // Use the generic structured response parser with enhanced error recovery if err := parseStructuredResponse(content, &result); err != nil { // If parseStructuredResponse fails completely, treat content as plain text return &QueryResult{ - Content: content, - Thought: "Failed to parse response, returning raw content", + Content: content, + Thought: "Failed to parse response, returning raw content", + ModelName: string(modelType), }, nil } + result.ModelName = string(modelType) return &result, nil } diff --git a/uixt/ai/querier_test.go b/uixt/ai/querier_test.go index 3793f7bf..99a0fed1 100644 --- a/uixt/ai/querier_test.go +++ b/uixt/ai/querier_test.go @@ -87,7 +87,6 @@ func loadTestImage(t *testing.T, path string) (string, types.Size) { } // Test functions - func TestParseQueryResult(t *testing.T) { tests := []struct { name string @@ -130,7 +129,7 @@ func TestParseQueryResult(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - result, err := parseQueryResult(tt.content) + result, err := parseQueryResult(tt.content, option.DOUBAO_1_5_UI_TARS_250328) assert.NoError(t, err) assert.Equal(t, tt.expected.Content, result.Content) assert.Equal(t, tt.expected.Thought, result.Thought) diff --git a/uixt/driver_ext_ai.go b/uixt/driver_ext_ai.go index f7f21009..e6aaf723 100644 --- a/uixt/driver_ext_ai.go +++ b/uixt/driver_ext_ai.go @@ -155,8 +155,6 @@ func (dExt *XTDriver) AIAction(ctx context.Context, prompt string, opts ...optio ScreenshotElapsed: screenshotElapsed, ImagePath: screenResult.ImagePath, Resolution: &screenResult.Resolution, - ModelName: planningResult.ModelName, - Usage: planningResult.Usage, PlanningResult: &planningResult.PlanningResult, Thought: planningResult.Thought, Content: planningResult.Content, @@ -331,13 +329,11 @@ type PlanningExecutionResult struct { // AIExecutionResult represents a unified result structure for all AI operations type AIExecutionResult struct { - Type string `json:"type"` // operation type: "query", "action", "assert" - ModelCallElapsed int64 `json:"model_call_elapsed"` // model call elapsed time in milliseconds - ScreenshotElapsed int64 `json:"screenshot_elapsed"` // screenshot elapsed time in milliseconds - ImagePath string `json:"image_path"` // path to screenshot used for operation - Resolution *types.Size `json:"resolution"` // screen resolution - ModelName string `json:"model_name"` // model name used for operation - Usage *schema.TokenUsage `json:"usage,omitempty"` // token usage statistics + Type string `json:"type"` // operation type: "query", "action", "assert" + ModelCallElapsed int64 `json:"model_call_elapsed"` // model call elapsed time in milliseconds + ScreenshotElapsed int64 `json:"screenshot_elapsed"` // screenshot elapsed time in milliseconds + ImagePath string `json:"image_path"` // path to screenshot used for operation + Resolution *types.Size `json:"resolution"` // screen resolution // Operation-specific results (only one will be populated based on Type) QueryResult *ai.QueryResult `json:"query_result,omitempty"` // for ai_query operations