From 96da4515a1509f2f36db20fee510bdac4f66cce6 Mon Sep 17 00:00:00 2001 From: "lilong.129" Date: Mon, 9 Jun 2025 16:04:13 +0800 Subject: [PATCH] feat: optimize test report UI and add LLM usage tracking --- internal/version/VERSION | 2 +- report.go | 828 +++++++++++++++++++++++++++++-------- step.go | 9 +- step_ui.go | 4 +- uixt/ai/planner.go | 24 +- uixt/driver_ext_ai.go | 195 ++++++--- uixt/driver_ext_ai_test.go | 75 ++-- 7 files changed, 850 insertions(+), 287 deletions(-) diff --git a/internal/version/VERSION b/internal/version/VERSION index c1ba7e00..69e16753 100644 --- a/internal/version/VERSION +++ b/internal/version/VERSION @@ -1 +1 @@ -v5.0.0-beta-2506090029 +v5.0.0-beta-2506091704 diff --git a/report.go b/report.go index 66cde1a1..0b70fd2e 100644 --- a/report.go +++ b/report.go @@ -305,9 +305,18 @@ func (g *HTMLReportGenerator) calculateTotalSubActions() int { for _, step := range testCase.Records { if step.Actions != nil { for _, action := range step.Actions { + // Count sub-actions from regular actions if action.SubActions != nil { total += len(action.SubActions) } + // Count sub-actions from planning results + if action.Plannings != nil { + for _, planning := range action.Plannings { + if planning.SubActions != nil { + total += len(planning.SubActions) + } + } + } } } } @@ -315,8 +324,8 @@ func (g *HTMLReportGenerator) calculateTotalSubActions() int { return total } -// calculateTotalRequests calculates the total number of requests across all test cases -func (g *HTMLReportGenerator) calculateTotalRequests() int { +// calculateTotalPlannings calculates the total number of planning results across all test cases +func (g *HTMLReportGenerator) calculateTotalPlannings() int { total := 0 if g.SummaryData == nil || g.SummaryData.Details == nil { return total @@ -329,12 +338,8 @@ func (g *HTMLReportGenerator) calculateTotalRequests() int { for _, step := range testCase.Records { if step.Actions != nil { for _, action := range step.Actions { - if action.SubActions != nil { - for _, subAction := range action.SubActions { - if subAction.Requests != nil { - total += len(subAction.Requests) - } - } + if action.Plannings != nil { + total += len(action.Plannings) } } } @@ -343,11 +348,16 @@ func (g *HTMLReportGenerator) calculateTotalRequests() int { return total } -// calculateTotalScreenshots calculates the total number of screenshots across all test cases -func (g *HTMLReportGenerator) calculateTotalScreenshots() int { - total := 0 +// calculateTotalUsage calculates the total token usage across all test cases +func (g *HTMLReportGenerator) calculateTotalUsage() map[string]interface{} { + totalUsage := map[string]interface{}{ + "prompt_tokens": 0, + "completion_tokens": 0, + "total_tokens": 0, + } + if g.SummaryData == nil || g.SummaryData.Details == nil { - return total + return totalUsage } for _, testCase := range g.SummaryData.Details { @@ -355,31 +365,25 @@ func (g *HTMLReportGenerator) calculateTotalScreenshots() int { continue } for _, step := range testCase.Records { - // Count screenshots in actions - if step.Actions != nil { - for _, action := range step.Actions { - if action.SubActions != nil { - for _, subAction := range action.SubActions { - if subAction.ScreenResults != nil { - total += len(subAction.ScreenResults) - } - } - } - } + if step.Actions == nil { + continue } - // Count screenshots in attachments - if step.Attachments != nil { - if attachments, ok := step.Attachments.(map[string]any); ok { - if screenResults, exists := attachments["screen_results"]; exists { - if screenResultsSlice, ok := screenResults.([]any); ok { - total += len(screenResultsSlice) - } + for _, action := range step.Actions { + if action.Plannings == nil { + continue + } + for _, planning := range action.Plannings { + if planning.Usage == nil { + continue } + totalUsage["prompt_tokens"] = totalUsage["prompt_tokens"].(int) + planning.Usage.PromptTokens + totalUsage["completion_tokens"] = totalUsage["completion_tokens"].(int) + planning.Usage.CompletionTokens + totalUsage["total_tokens"] = totalUsage["total_tokens"].(int) + planning.Usage.TotalTokens } } } } - return total + return totalUsage } // GenerateReport generates the complete HTML test report @@ -390,13 +394,13 @@ func (g *HTMLReportGenerator) GenerateReport(outputFile string) error { // Create template functions funcMap := template.FuncMap{ - "formatDuration": g.formatDuration, - "encodeImageBase64": g.encodeImageToBase64, - "getStepLogs": g.getStepLogsForTemplate, - "calculateTotalActions": g.calculateTotalActions, - "calculateTotalSubActions": g.calculateTotalSubActions, - "calculateTotalRequests": g.calculateTotalRequests, - "calculateTotalScreenshots": g.calculateTotalScreenshots, + "formatDuration": g.formatDuration, + "encodeImageBase64": g.encodeImageToBase64, + "getStepLogs": g.getStepLogsForTemplate, + "calculateTotalActions": g.calculateTotalActions, + "calculateTotalSubActions": g.calculateTotalSubActions, + "calculateTotalPlannings": g.calculateTotalPlannings, + "calculateTotalUsage": g.calculateTotalUsage, "getSummaryContentBase64": func() string { return base64.StdEncoding.EncodeToString([]byte(g.SummaryContent)) }, @@ -502,19 +506,26 @@ const htmlTemplate = ` gap: 15px; } - .start-time { + .download-section { background: rgba(255, 255, 255, 0.2); - padding: 12px 20px; + padding: 15px 20px; border-radius: 8px; backdrop-filter: blur(10px); - min-width: 200px; + min-width: 240px; + text-align: center; + } + + .download-title { + font-size: 0.9em; + font-weight: 600; + margin-bottom: 10px; + opacity: 0.9; } .download-buttons { display: flex; gap: 10px; width: 100%; - max-width: 240px; } .download-btn { @@ -548,19 +559,6 @@ const htmlTemplate = ` transform: translateY(0); } - .time-label { - display: block; - font-size: 0.9em; - opacity: 0.8; - margin-bottom: 4px; - } - - .time-value { - display: block; - font-size: 1.1em; - font-weight: bold; - } - .summary { background: white; padding: 25px; @@ -690,70 +688,138 @@ const htmlTemplate = ` box-shadow: 0 2px 4px rgba(0,0,0,0.1); } + .test-cases { + margin-top: 20px; + } + + .test-case { + background: white; + margin-bottom: 40px; + border-radius: 15px; + box-shadow: 0 6px 12px rgba(0,0,0,0.1); + overflow: hidden; + border: 2px solid #e9ecef; + padding-bottom: 8px; + } + + .test-case h2 { + background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%); + color: #495057; + margin: 0; + padding: 20px 30px; + font-size: 1.5em; + font-weight: 600; + border-bottom: 2px solid #dee2e6; + display: flex; + justify-content: space-between; + align-items: center; + } + + .case-info { + display: flex; + align-items: center; + gap: 15px; + } + .step-container { background: white; - margin-bottom: 20px; - border-radius: 10px; - box-shadow: 0 2px 4px rgba(0,0,0,0.1); + margin-bottom: 8px; + border-radius: 12px; + box-shadow: 0 4px 8px rgba(0,0,0,0.1); overflow: hidden; + border: 1px solid #e9ecef; + } + + .step-container:first-of-type { + margin-top: 8px; } .step-header { - background: #f8f9fa; - padding: 20px; + background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%); + padding: 25px 30px; cursor: pointer; - border-bottom: 1px solid #dee2e6; - transition: background-color 0.3s; + border-bottom: 2px solid #dee2e6; + transition: all 0.3s ease; } .step-header:hover { - background: #e9ecef; + background: linear-gradient(135deg, #e9ecef 0%, #dee2e6 100%); + transform: translateY(-1px); + box-shadow: 0 2px 4px rgba(0,0,0,0.1); } .step-header h3 { display: flex; align-items: center; - gap: 15px; + gap: 20px; margin: 0; font-size: 1.3em; + font-weight: 500; + } + + .step-info-group { + margin-left: auto; + display: flex; + align-items: center; + gap: 12px; + min-width: 300px; + justify-content: flex-end; + } + + .step-status { + min-width: 70px; + text-align: center; + } + + .step-duration { + min-width: 80px; + text-align: center; + } + + .step-type-fixed { + min-width: 120px; + text-align: center; } .step-number { - background: #007bff; + background: linear-gradient(135deg, #007bff 0%, #0056b3 100%); color: white; - width: 30px; - height: 30px; + width: 36px; + height: 36px; border-radius: 50%; display: flex; align-items: center; justify-content: center; - font-size: 0.9em; + font-size: 1.0em; font-weight: bold; + box-shadow: 0 2px 4px rgba(0, 123, 255, 0.3); } .status-badge { - padding: 5px 12px; + padding: 6px 14px; border-radius: 20px; - font-size: 0.8em; + font-size: 0.85em; font-weight: bold; + box-shadow: 0 2px 4px rgba(0,0,0,0.1); } .status-badge.success { - background: #28a745; + background: linear-gradient(135deg, #28a745 0%, #20c997 100%); color: white; } .status-badge.failure { - background: #dc3545; + background: linear-gradient(135deg, #dc3545 0%, #c82333 100%); color: white; } .duration { - background: #6c757d; + background: linear-gradient(135deg, #6c757d 0%, #5a6268 100%); color: white; - padding: 3px 8px; + padding: 4px 10px; border-radius: 12px; font-size: 0.8em; + box-shadow: 0 1px 3px rgba(0,0,0,0.2); } .toggle-icon { @@ -766,22 +832,20 @@ const htmlTemplate = ` transform: rotate(-90deg); } - .step-meta { - margin-top: 10px; - color: #6c757d; - } - .step-type { - background: #17a2b8; + background: linear-gradient(135deg, #17a2b8 0%, #138496 100%); color: white; - padding: 2px 8px; - border-radius: 10px; + padding: 3px 10px; + border-radius: 12px; font-size: 0.8em; + box-shadow: 0 1px 3px rgba(0,0,0,0.2); } .step-content { - padding: 25px; + padding: 30px; display: none; + background: #fafbfc; + border-top: 1px solid #e9ecef; } .step-content.show { @@ -789,41 +853,73 @@ const htmlTemplate = ` } .actions-section, .validators-section, .screenshots-section, .logs-section { - margin-bottom: 25px; + margin-bottom: 30px; } .actions-section h4, .validators-section h4, .screenshots-section h4, .logs-section h4 { color: #495057; - margin-bottom: 15px; - padding-bottom: 8px; - border-bottom: 1px solid #dee2e6; + margin-bottom: 20px; + padding-bottom: 12px; + border-bottom: 2px solid #dee2e6; + font-size: 1.2em; + font-weight: 600; } .action-item { - background: #f8f9fa; - border: 1px solid #dee2e6; - border-radius: 8px; - padding: 15px; - margin-bottom: 15px; + background: white; + border: 2px solid #e9ecef; + border-radius: 12px; + padding: 20px; + margin-bottom: 20px; + box-shadow: 0 2px 6px rgba(0,0,0,0.08); + transition: all 0.3s ease; + } + + .action-item:hover { + border-color: #007bff; + box-shadow: 0 4px 12px rgba(0, 123, 255, 0.15); + transform: translateY(-1px); } .action-header { display: flex; align-items: center; - gap: 15px; - margin-bottom: 10px; + gap: 18px; + margin-bottom: 15px; cursor: pointer; - transition: background-color 0.3s; - padding: 8px; - border-radius: 6px; + transition: all 0.3s ease; + padding: 12px 15px; + border-radius: 8px; + background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%); + border: 1px solid #dee2e6; } .action-header:hover { - background-color: rgba(0, 123, 255, 0.1); + background: linear-gradient(135deg, rgba(0, 123, 255, 0.1) 0%, rgba(0, 123, 255, 0.05) 100%); + border-color: #007bff; + transform: translateY(-1px); + box-shadow: 0 2px 4px rgba(0, 123, 255, 0.2); } .action-header strong { color: #007bff; + font-size: 1.1em; + font-weight: 600; + } + + .action-description { + color: #6c757d; + font-style: italic; + margin: 10px 0; + padding: 10px 15px; + white-space: pre-wrap; + word-wrap: break-word; + font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace; + background: #f8f9fa; + border: 1px solid #dee2e6; + border-radius: 4px; + font-size: 0.9em; + line-height: 1.4; } .action-toggle { @@ -849,26 +945,196 @@ const htmlTemplate = ` display: block; } - .action-params { - color: #6c757d; - font-style: italic; - margin-bottom: 10px; - white-space: pre-wrap; - word-wrap: break-word; - font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace; - background: #f8f9fa; - border: 1px solid #dee2e6; - border-radius: 4px; - padding: 10px; - font-size: 0.9em; - line-height: 1.4; - } - .error { color: #dc3545; font-weight: bold; } + .planning-results { + margin-top: 15px; + padding: 15px; + background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%); + border: 1px solid #dee2e6; + border-radius: 12px; + box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); + } + + .planning-item { + background: white; + border: 1px solid #dee2e6; + border-radius: 8px; + padding: 15px; + margin-bottom: 15px; + } + + .planning-item:last-child { + margin-bottom: 0; + } + + .planning-header { + display: flex; + align-items: center; + gap: 15px; + margin-bottom: 15px; + padding-bottom: 10px; + border-bottom: 1px solid #dee2e6; + } + + .planning-label { + background: #007bff; + color: white; + padding: 4px 12px; + border-radius: 15px; + font-size: 0.9em; + font-weight: bold; + } + + .plan-next-action { + margin: 15px 0; + padding: 15px; + background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%); + border: 2px solid #dee2e6; + border-radius: 12px; + box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1); + } + + .plan-next-action h5 { + color: #495057; + margin-bottom: 10px; + font-size: 1.0em; + font-weight: 600; + } + + .planning-two-columns { + display: flex; + gap: 20px; + margin: 15px 0; + } + + .planning-column-left, .planning-column-right { + flex: 1; + min-width: 0; + } + + .planning-step-compact { + background: white; + border: 1px solid #dee2e6; + border-radius: 8px; + box-shadow: 0 2px 4px rgba(0,0,0,0.1); + } + + .step-header-compact { + background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%); + padding: 10px 12px; + border-bottom: 1px solid #dee2e6; + border-radius: 8px 8px 0 0; + display: flex; + align-items: center; + justify-content: space-between; + } + + .step-name { + font-weight: 600; + color: #495057; + font-size: 0.9em; + } + + .screenshot-display { + padding: 12px; + } + + .screenshot-item-compact { + text-align: center; + } + + .screenshot-item-compact .screenshot-image { + min-height: 200px; + padding: 10px 0; + background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%); + border-radius: 6px; + display: flex; + justify-content: center; + align-items: center; + } + + .screenshot-item-compact .screenshot-image img { + max-width: 100%; + max-height: 180px; + border-radius: 4px; + cursor: pointer; + transition: transform 0.2s; + object-fit: contain; + box-shadow: 0 2px 6px rgba(0,0,0,0.1); + } + + .screenshot-item-compact .screenshot-image img:hover { + transform: scale(1.02); + } + + .model-output-compact { + padding: 12px; + } + + .model-info, .tool-calls-info, .actions-info, .usage-info { + background: #f8f9fa; + border: 1px solid #e9ecef; + border-radius: 4px; + padding: 8px 10px; + margin: 6px 0; + font-size: 0.85em; + color: #495057; + } + + @media screen and (max-width: 768px) { + .planning-two-columns { + flex-direction: column; + gap: 15px; + } + } + + .raw-content { + margin-top: 10px; + } + + .raw-content pre { + background: #f1f3f4; + border: 1px solid #dadce0; + border-radius: 4px; + padding: 8px; + font-size: 0.8em; + max-height: 150px; + overflow-y: auto; + white-space: pre-wrap; + word-wrap: break-word; + } + + .step-screenshots { + margin-top: 10px; + } + + .action-details { + display: flex; + align-items: center; + gap: 10px; + } + + .action-details .action-name { + background: #6f42c1; + color: white; + padding: 2px 8px; + border-radius: 12px; + font-size: 0.8em; + font-weight: bold; + } + + .action-details .action-desc { + color: #6c757d; + font-style: italic; + font-size: 0.9em; + } + + + .sub-actions { margin-top: 15px; padding-left: 20px; @@ -1449,7 +1715,7 @@ const htmlTemplate = ` gap: 15px; } - .start-time { + .download-section { width: 100%; text-align: center; min-width: auto; @@ -1458,7 +1724,6 @@ const htmlTemplate = ` .download-buttons { justify-content: center; width: 100%; - max-width: 300px; } .download-btn { @@ -1489,14 +1754,56 @@ const htmlTemplate = ` } .step-header h3 { - font-size: 1.1em; - gap: 10px; + font-size: 1.2em; + gap: 15px; + flex-direction: column; + align-items: flex-start; + } + + .step-info-group { + min-width: auto; + width: 100%; + justify-content: space-between; + margin-left: 0; + margin-top: 8px; + } + + .step-status { + min-width: 60px; + } + + .step-duration { + min-width: 70px; + } + + .step-type-fixed { + min-width: 100px; } .step-number { - width: 25px; - height: 25px; - font-size: 0.8em; + width: 32px; + height: 32px; + font-size: 0.9em; + } + + .test-case h2 { + font-size: 1.3em; + padding: 15px 20px; + flex-direction: column; + align-items: flex-start; + gap: 10px; + } + + .case-info { + align-self: flex-end; + } + + .step-header { + padding: 20px 25px; + } + + .step-content { + padding: 25px 20px; } .action-header { @@ -1585,6 +1892,34 @@ const htmlTemplate = ` margin: 0; } } + + .action-content { + margin-top: 10px; + } + + .action-content strong { + color: #6f42c1; + display: block; + margin-bottom: 8px; + font-size: 0.95em; + } + + .action-output { + background: #f8f9fa; + border: 2px solid #6f42c1; + border-radius: 6px; + padding: 10px; + font-size: 0.85em; + max-height: 120px; + overflow-y: auto; + white-space: pre-wrap; + word-wrap: break-word; + color: #495057; + font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace; + line-height: 1.4; + } + + @@ -1593,22 +1928,21 @@ const htmlTemplate = `

🚀 HttpRunner Test Report

-
Automated Testing Results
+
Start Time: {{.Time.StartAt.Format "2006-01-02 15:04:05"}}
-
- Start Time: - {{.Time.StartAt.Format "2006-01-02 15:04:05"}} -
-
- - +
+
📥 Download
+
+ + +
@@ -1617,17 +1951,13 @@ const htmlTemplate = `

📊 Test Summary

-
-
{{.Stat.TestCases.Total}}
-
Total Test Cases
-
{{.Stat.TestCases.Success}}
-
Passed
+
Passed TestCases
{{.Stat.TestCases.Fail}}
-
Failed
+
Failed TestCases
{{.Stat.TestSteps.Total}}
@@ -1642,17 +1972,26 @@ const htmlTemplate = `
Total Sub-Actions
-
{{calculateTotalRequests}}
-
Total Requests
-
-
-
{{calculateTotalScreenshots}}
-
Total Screenshots
+
{{calculateTotalPlannings}}
+
Total Plannings
{{printf "%.1f" .Time.Duration}}s
Duration
+ {{$usage := calculateTotalUsage}} +
+
{{index $usage "prompt_tokens"}}
+
Input Tokens
+
+
+
{{index $usage "completion_tokens"}}
+
Output Tokens
+
+
+
{{index $usage "total_tokens"}}
+
Total Tokens
+
@@ -1682,13 +2021,15 @@ const htmlTemplate = `
{{range $caseIndex, $testCase := .Details}}
-

📋 {{$testCase.Name}}

-
- - {{if $testCase.Success}}✓ PASS{{else}}✗ FAIL{{end}} - - {{printf "%.1f" $testCase.Time.Duration}}s -
+

+ 📋 {{$testCase.Name}} +
+ + {{if $testCase.Success}}✓ PASS{{else}}✗ FAIL{{end}} + + {{printf "%.1f" $testCase.Time.Duration}}s +
+

{{range $stepIndex, $step := $testCase.Records}}
@@ -1696,15 +2037,15 @@ const htmlTemplate = `

{{add $stepIndex 1}} {{$step.Name}} - - {{if $step.Success}}✓ PASS{{else}}✗ FAIL{{end}} - - {{formatDuration $step.Elapsed}} - +
+ + {{if $step.Success}}✓ PASS{{else}}✗ FAIL{{end}} + + {{formatDuration $step.Elapsed}} + {{$step.StepType}} + +

-
- {{$step.StepType}} -
@@ -1720,8 +2061,156 @@ const htmlTemplate = ` {{if $action.Error}}Error: {{$action.Error}}{{end}}
+
{{$action.Params}}
-
{{$action.Params}}
+ + {{if $action.Plannings}} +
+ {{range $planningIndex, $planning := $action.Plannings}} +
+
+ 🧠 Planning & Execution {{add $planningIndex 1}} + {{formatDuration $planning.Elapsed}} + {{if $planning.Error}}Error: {{$planning.Error}}{{end}} +
+ + {{if $planning.Thought}} +
{{$planning.Thought}}
+ {{end}} + + +
+
📋 Planning
+ + +
+ +
+
+
+ 📸 Take Screenshot + {{formatDuration $planning.ScreenshotElapsed}} +
+ {{if $planning.ScreenResult}} +
+ {{$screenshot := $planning.ScreenResult}} + {{$base64Image := encodeImageBase64 $screenshot.ImagePath}} + {{if $base64Image}} +
+
+ Planning Screenshot +
+
+ {{end}} +
+ {{end}} +
+
+ + +
+
+
+ 🤖 Call Model & Parse Result + {{formatDuration $planning.ModelCallElapsed}} +
+
+ {{if $planning.ModelName}} +
🤖 Model: {{$planning.ModelName}}
+ {{end}} + {{if $planning.Usage}} +
📊 Tokens: {{$planning.Usage.PromptTokens}} in / {{$planning.Usage.CompletionTokens}} out / {{$planning.Usage.TotalTokens}} total
+ {{end}} + {{if $planning.ToolCallsCount}} +
🔧 Tool Calls: {{$planning.ToolCallsCount}}
+ {{end}} + {{if $planning.ActionNames}} +
🎯 Actions: {{safeHTML (toJSON $planning.ActionNames)}}
+ {{end}} +
+
+
+
+
+ + {{if $planning.SubActions}} +
+
🎯 Actions
+ {{range $subAction := $planning.SubActions}} +
+
+ {{$subAction.ActionName}} + {{formatDuration $subAction.Elapsed}} + {{if $subAction.Error}}Error: {{$subAction.Error}}{{end}} +
+ +
+
+ {{if $subAction.Arguments}} +
Arguments: {{safeHTML (toJSON $subAction.Arguments)}}
+ {{end}} + + {{if $subAction.Requests}} +
+ +
+ {{range $request := $subAction.Requests}} +
+
+ {{$request.RequestMethod}} + {{$request.RequestUrl}} + Status: {{$request.ResponseStatus}} + {{formatDuration $request.ResponseDuration}} +
+ {{if $request.RequestBody}} +
Request: {{$request.RequestBody}}
+ {{end}} + {{if $request.ResponseBody}} +
Response: {{$request.ResponseBody}}
+ {{end}} +
+ {{end}} +
+
+ {{end}} +
+ + {{if $subAction.ScreenResults}} +
+
+
📸 Screenshots
+
+ {{range $screenshot := $subAction.ScreenResults}} + {{$base64Image := encodeImageBase64 $screenshot.ImagePath}} + {{if $base64Image}} +
+
+ {{base $screenshot.ImagePath}} + {{if $screenshot.Resolution}} + {{$screenshot.Resolution.Width}}x{{$screenshot.Resolution.Height}} + {{end}} +
+
+ Screenshot +
+
+ {{end}} + {{end}} +
+
+
+ {{end}} +
+
+ {{end}} +
+ {{end}} +
+ {{end}} +
+ {{end}} {{if $action.SubActions}}
@@ -1734,21 +2223,10 @@ const htmlTemplate = `
- {{if $subAction.Thought}} -
{{$subAction.Thought}}
- {{end}} - {{if $subAction.Arguments}}
Arguments: {{safeHTML (toJSON $subAction.Arguments)}}
{{end}} - {{if $subAction.ModelName}} -
- 🤖 Model: - {{$subAction.ModelName}} -
- {{end}} - {{if $subAction.Requests}}
Expected: {{$validator.expect}}
- {{if $validator.msg}} -
{{$validator.msg}}
+ {{if and $validator.msg (ne $validator.check_result "pass")}} +
{{$validator.msg}}
{{end}}
{{end}} diff --git a/step.go b/step.go index 609b351d..9b32ca58 100644 --- a/step.go +++ b/step.go @@ -59,10 +59,11 @@ type TStep struct { // one step contains one or multiple actions type ActionResult struct { option.MobileAction `json:",inline"` - StartTime int64 `json:"start_time"` // action start time - Elapsed int64 `json:"elapsed_ms"` // action elapsed time(ms) - Error error `json:"error"` // action execution result - SubActions []*uixt.SubActionResult `json:"sub_actions,omitempty"` // store sub-actions + StartTime int64 `json:"start_time"` // action start time + Elapsed int64 `json:"elapsed_ms"` // action elapsed time(ms) + Error error `json:"error"` // action execution result + Plannings []*uixt.PlanningExecutionResult `json:"plannings,omitempty"` // store planning results for start_to_goal actions + SubActions []*uixt.SubActionResult `json:"sub_actions,omitempty"` // store sub-actions for other actions } // one testcase contains one or multiple steps diff --git a/step_ui.go b/step_ui.go index ca2c430b..ab1d18f0 100644 --- a/step_ui.go +++ b/step_ui.go @@ -909,10 +909,10 @@ func runStepMobileUI(s *SessionRunner, step IStep) (stepResult *StepResult, err // handle start_to_goal action if action.Method == option.ACTION_StartToGoal { - subActionResults, err := uiDriver.StartToGoal(ctx, + planningResults, err := uiDriver.StartToGoal(ctx, action.Params.(string), action.GetOptions()...) actionResult.Elapsed = time.Since(actionStartTime).Milliseconds() - actionResult.SubActions = subActionResults + actionResult.Plannings = planningResults stepResult.Actions = append(stepResult.Actions, actionResult) if err != nil { if !code.IsErrorPredefined(err) { diff --git a/uixt/ai/planner.go b/uixt/ai/planner.go index fdcc4152..ea9d823d 100644 --- a/uixt/ai/planner.go +++ b/uixt/ai/planner.go @@ -28,11 +28,12 @@ type PlanningOptions struct { // PlanningResult represents the result of planning type PlanningResult struct { - ToolCalls []schema.ToolCall `json:"tool_calls"` - Thought string `json:"thought"` - Content string `json:"content"` // original content from model - Error string `json:"error,omitempty"` - ModelName string `json:"model_name"` // model name used for planning + ToolCalls []schema.ToolCall `json:"tool_calls"` + Thought string `json:"thought"` + Content string `json:"content"` // original content from model + Error string `json:"error,omitempty"` + ModelName string `json:"model_name"` // model name used for planning + Usage *schema.TokenUsage `json:"usage,omitempty"` // token usage statistics } func NewPlanner(ctx context.Context, modelConfig *ModelConfig) (*Planner, error) { @@ -81,7 +82,7 @@ func (p *Planner) RegisterTools(tools []*schema.ToolInfo) error { } // Call performs UI planning using Vision Language Model -func (p *Planner) Call(ctx context.Context, opts *PlanningOptions) (*PlanningResult, error) { +func (p *Planner) Call(ctx context.Context, opts *PlanningOptions) (result *PlanningResult, err error) { // validate input parameters if err := validatePlanningInput(opts); err != nil { return nil, errors.Wrap(err, "validate planning parameters failed") @@ -116,6 +117,13 @@ func (p *Planner) Call(ctx context.Context, opts *PlanningOptions) (*PlanningRes } logResponse(message) + defer func() { + // Extract usage information if available + if message.ResponseMeta != nil && message.ResponseMeta.Usage != nil { + result.Usage = message.ResponseMeta.Usage + } + }() + // handle tool calls if len(message.ToolCalls) > 0 { // append tool call message @@ -130,7 +138,7 @@ func (p *Planner) Call(ctx context.Context, opts *PlanningOptions) (*PlanningRes ToolCallID: toolCallID, }) // history will be appended with tool calls execution result - result := &PlanningResult{ + result = &PlanningResult{ ToolCalls: message.ToolCalls, Thought: message.Content, ModelName: string(p.modelConfig.ModelType), @@ -139,7 +147,7 @@ func (p *Planner) Call(ctx context.Context, opts *PlanningOptions) (*PlanningRes } // parse message content to actions (tool calls) - result, err := p.parser.Parse(message.Content, opts.Size) + result, err = p.parser.Parse(message.Content, opts.Size) if err != nil { result = &PlanningResult{ Thought: message.Content, diff --git a/uixt/driver_ext_ai.go b/uixt/driver_ext_ai.go index e00cf6b2..2dba01a3 100644 --- a/uixt/driver_ext_ai.go +++ b/uixt/driver_ext_ai.go @@ -6,21 +6,23 @@ import ( "time" "github.com/cloudwego/eino/schema" + "github.com/mark3labs/mcp-go/mcp" + "github.com/pkg/errors" + "github.com/rs/zerolog/log" + "github.com/httprunner/httprunner/v5/code" "github.com/httprunner/httprunner/v5/internal/builtin" "github.com/httprunner/httprunner/v5/internal/json" "github.com/httprunner/httprunner/v5/uixt/ai" "github.com/httprunner/httprunner/v5/uixt/option" - "github.com/mark3labs/mcp-go/mcp" - "github.com/pkg/errors" - "github.com/rs/zerolog/log" + "github.com/httprunner/httprunner/v5/uixt/types" ) -func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...option.ActionOption) ([]*SubActionResult, error) { +func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...option.ActionOption) ([]*PlanningExecutionResult, error) { options := option.NewActionOptions(opts...) log.Info().Int("max_retry_times", options.MaxRetryTimes).Msg("StartToGoal") - var allSubActions []*SubActionResult + var allPlannings []*PlanningExecutionResult var attempt int for { attempt++ @@ -30,7 +32,7 @@ func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...op select { case <-ctx.Done(): log.Warn().Msg("interrupted in StartToGoal") - return allSubActions, errors.Wrap(code.InterruptError, "StartToGoal interrupted") + return allPlannings, errors.Wrap(code.InterruptError, "StartToGoal interrupted") default: } @@ -41,7 +43,8 @@ func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...op // Add ResetHistory option for the first attempt planningOpts = append(planningOpts, option.WithResetHistory(true)) } - result, err := dExt.PlanNextAction(ctx, prompt, planningOpts...) + + planningResult, err := dExt.PlanNextAction(ctx, prompt, planningOpts...) if err != nil { // Check if this is a LLM service request error that should be retried if errors.Is(err, code.LLMRequestServiceError) { @@ -49,68 +52,81 @@ func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...op Msg("LLM service request failed, retrying...") continue } - allSubActions = append(allSubActions, &SubActionResult{ - ActionName: "plan_next_action", - Arguments: prompt, - Error: err, - StartTime: planningStartTime.Unix(), - Elapsed: time.Since(planningStartTime).Milliseconds(), - SessionData: dExt.GetSession().GetData(true), - }) - return allSubActions, err + // Create planning result with error + errorResult := &PlanningExecutionResult{ + PlanningResult: ai.PlanningResult{ + Thought: "Planning failed", + ModelName: "", + Error: err.Error(), + }, + StartTime: planningStartTime.Unix(), + Elapsed: time.Since(planningStartTime).Milliseconds(), + } + allPlannings = append(allPlannings, errorResult) + return allPlannings, err } + // Set planning execution timing + planningResult.StartTime = planningStartTime.Unix() + planningResult.SubActions = []*SubActionResult{} + // Check if task is finished BEFORE executing actions - if dExt.isTaskFinished(result) { + if dExt.isTaskFinished(planningResult) { log.Info().Msg("task finished, stopping StartToGoal") - // Create a sub-action result to record the planning result even when task is finished - subActionResult := &SubActionResult{ - ActionName: "plan_next_action", - Arguments: prompt, - StartTime: planningStartTime.Unix(), - Elapsed: time.Since(planningStartTime).Milliseconds(), - Thought: result.Thought, - ModelName: result.ModelName, - SessionData: dExt.GetSession().GetData(true), - } - allSubActions = append(allSubActions, subActionResult) - return allSubActions, nil + planningResult.Elapsed = time.Since(planningStartTime).Milliseconds() + allPlannings = append(allPlannings, planningResult) + return allPlannings, nil } // Invoke tool calls - for _, toolCall := range result.ToolCalls { + for _, toolCall := range planningResult.ToolCalls { // Check for context cancellation before each action select { case <-ctx.Done(): log.Warn().Msg("interrupted in invokeToolCalls") - return allSubActions, errors.Wrap(code.InterruptError, "invokeToolCalls interrupted") + planningResult.Elapsed = time.Since(planningStartTime).Milliseconds() + allPlannings = append(allPlannings, planningResult) + return allPlannings, errors.Wrap(code.InterruptError, "invokeToolCalls interrupted") default: } - subActionStartTime := time.Now() - // Create sub-action result - subActionResult := &SubActionResult{ - ActionName: toolCall.Function.Name, - Arguments: toolCall.Function.Arguments, - StartTime: subActionStartTime.Unix(), - Thought: result.Thought, - ModelName: result.ModelName, - } + // Execute each tool call in a separate function to ensure proper defer execution + err := func() error { + subActionStartTime := time.Now() + subActionResult := &SubActionResult{ + ActionName: toolCall.Function.Name, + Arguments: toolCall.Function.Arguments, + StartTime: subActionStartTime.Unix(), + } - if err := dExt.invokeToolCall(ctx, toolCall); err != nil { - subActionResult.Error = err - allSubActions = append(allSubActions, subActionResult) - return allSubActions, err - } - subActionResult.Elapsed = time.Since(subActionStartTime).Milliseconds() + // Use defer to ensure sub-action is always processed and added to results + defer func() { + subActionResult.Elapsed = time.Since(subActionStartTime).Milliseconds() + subActionResult.SessionData = dExt.GetSession().GetData(true) // reset after getting data + planningResult.SubActions = append(planningResult.SubActions, subActionResult) + }() - // Collect sub-action specific attachments and reset session data - subActionResult.SessionData = dExt.GetSession().GetData(true) // reset after getting data - allSubActions = append(allSubActions, subActionResult) + // Execute the tool call + if err := dExt.invokeToolCall(ctx, toolCall); err != nil { + subActionResult.Error = err + return err + } + return nil + }() + if err != nil { + planningResult.Elapsed = time.Since(planningStartTime).Milliseconds() + planningResult.Error = err.Error() + allPlannings = append(allPlannings, planningResult) + return allPlannings, err + } } + // Complete this planning cycle + planningResult.Elapsed = time.Since(planningStartTime).Milliseconds() + allPlannings = append(allPlannings, planningResult) + if options.MaxRetryTimes > 1 && attempt >= options.MaxRetryTimes { - return allSubActions, errors.New("reached max retry times") + return allPlannings, errors.New("reached max retry times") } } } @@ -119,13 +135,13 @@ func (dExt *XTDriver) AIAction(ctx context.Context, prompt string, opts ...optio log.Info().Str("prompt", prompt).Msg("performing AI action") // plan next action - result, err := dExt.PlanNextAction(ctx, prompt, opts...) + planningResult, err := dExt.PlanNextAction(ctx, prompt, opts...) if err != nil { return err } // Invoke tool calls - for _, toolCall := range result.ToolCalls { + for _, toolCall := range planningResult.ToolCalls { err = dExt.invokeToolCall(ctx, toolCall) if err != nil { return err @@ -135,7 +151,8 @@ func (dExt *XTDriver) AIAction(ctx context.Context, prompt string, opts ...optio return nil } -func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ...option.ActionOption) (*ai.PlanningResult, error) { +// PlanNextAction performs planning and returns unified planning information +func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ...option.ActionOption) (*PlanningExecutionResult, error) { if dExt.LLMService == nil { return nil, errors.New("LLM service is not initialized") } @@ -144,14 +161,21 @@ func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts .. options := option.NewActionOptions(opts...) resetHistory := options.ResetHistory + // Step 1: Take screenshot + screenshotStartTime := time.Now() // Use GetScreenResult to handle screenshot capture, save, and session tracking screenResult, err := dExt.GetScreenResult( option.WithScreenShotFileName(builtin.GenNameWithTimestamp("%d_screenshot")), ) + screenshotElapsed := time.Since(screenshotStartTime).Milliseconds() if err != nil { return nil, err } + // Clear session data after planning screenshot to avoid including it in sub-actions + // The planning screenshot is already stored in planningResult.ScreenResult + dExt.GetSession().GetData(true) // reset session data to exclude planning screenshot from sub-actions + // convert buffer to base64 string for LLM screenShotBase64 := "data:image/jpeg;base64," + base64.StdEncoding.EncodeToString(screenResult.bufSource.Bytes()) @@ -162,6 +186,8 @@ func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts .. return nil, errors.Wrap(code.DeviceGetInfoError, err.Error()) } + // Step 2: Call model + modelCallStartTime := time.Now() planningOpts := &ai.PlanningOptions{ UserInstruction: prompt, Message: &schema.Message{ @@ -180,22 +206,48 @@ func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts .. } result, err := dExt.LLMService.Call(ctx, planningOpts) + modelCallElapsed := time.Since(modelCallStartTime).Milliseconds() + if err != nil { return nil, errors.Wrap(err, "failed to get next action from planner") } - return result, nil + + // Step 3: Parse result (this is already done in LLMService.Call, but we record it separately) + actionNames := make([]string, len(result.ToolCalls)) + for i, toolCall := range result.ToolCalls { + actionNames[i] = toolCall.Function.Name + } + + // Create unified planning result that inherits from ai.PlanningResult + planningResult := &PlanningExecutionResult{ + PlanningResult: *result, // Inherit all fields from ai.PlanningResult + // Planning process timing and metadata + ScreenshotElapsed: screenshotElapsed, + ImagePath: screenResult.ImagePath, + Resolution: &screenResult.Resolution, + ScreenResult: screenResult, + ModelCallElapsed: modelCallElapsed, + ToolCallsCount: len(result.ToolCalls), + ActionNames: actionNames, + // Execution timing (will be set by StartToGoal) + StartTime: 0, // Will be set by caller + Elapsed: 0, // Will be set by caller + SubActions: nil, // Will be populated during execution + } + + return planningResult, nil } // isTaskFinished checks if the task is completed based on the planning result -func (dExt *XTDriver) isTaskFinished(result *ai.PlanningResult) bool { +func (dExt *XTDriver) isTaskFinished(planningResult *PlanningExecutionResult) bool { // Check if there are no tool calls (no actions to execute) - if len(result.ToolCalls) == 0 { + if len(planningResult.ToolCalls) == 0 { log.Info().Msg("no tool calls returned, task may be finished") return true } // Check if any tool call is a "finished" action - for _, toolCall := range result.ToolCalls { + for _, toolCall := range planningResult.ToolCalls { if toolCall.Function.Name == "uixt__finished" { log.Info().Str("reason", toolCall.Function.Arguments).Msg("finished action detected") return true @@ -236,15 +288,30 @@ func (dExt *XTDriver) invokeToolCall(ctx context.Context, toolCall schema.ToolCa return nil } +// PlanningExecutionResult represents a unified planning result that contains both planning information and execution results +type PlanningExecutionResult struct { + ai.PlanningResult // Inherit all fields from ai.PlanningResult (ToolCalls, Thought, Content, Error, ModelName) + // Planning process information + ScreenshotElapsed int64 `json:"screenshot_elapsed_ms"` // screenshot elapsed time(ms) + ImagePath string `json:"image_path"` // screenshot image path + Resolution *types.Size `json:"resolution"` // image resolution + ScreenResult *ScreenResult `json:"screen_result"` // complete screen result data + ModelCallElapsed int64 `json:"model_call_elapsed_ms"` // model call elapsed time(ms) + ToolCallsCount int `json:"tool_calls_count"` // number of tool calls generated + ActionNames []string `json:"action_names"` // names of parsed actions + // Execution information + StartTime int64 `json:"start_time"` // planning start time + Elapsed int64 `json:"elapsed_ms"` // planning elapsed time(ms) + SubActions []*SubActionResult `json:"sub_actions,omitempty"` // sub-actions generated from this planning +} + // SubActionResult represents a sub-action within a start_to_goal action type SubActionResult struct { - ActionName string `json:"action_name"` // name of the sub-action (e.g., "tap", "input") - Arguments interface{} `json:"arguments,omitempty"` // arguments passed to the sub-action - StartTime int64 `json:"start_time"` // sub-action start time - Elapsed int64 `json:"elapsed_ms"` // sub-action elapsed time(ms) - Error error `json:"error,omitempty"` // sub-action execution result - Thought string `json:"thought,omitempty"` // sub-action thought - ModelName string `json:"model_name,omitempty"` // model name used for AI actions + ActionName string `json:"action_name"` // name of the sub-action (e.g., "tap", "input") + Arguments interface{} `json:"arguments,omitempty"` // arguments passed to the sub-action + StartTime int64 `json:"start_time"` // sub-action start time + Elapsed int64 `json:"elapsed_ms"` // sub-action elapsed time(ms) + Error error `json:"error,omitempty"` // sub-action execution result SessionData } diff --git a/uixt/driver_ext_ai_test.go b/uixt/driver_ext_ai_test.go index b8c6d1ea..5db77431 100644 --- a/uixt/driver_ext_ai_test.go +++ b/uixt/driver_ext_ai_test.go @@ -52,9 +52,10 @@ func TestDriverExt_StartToGoal(t *testing.T) { func TestDriverExt_PlanNextAction(t *testing.T) { driver := setupDriverExt(t) - result, err := driver.PlanNextAction(context.Background(), "启动抖音") + planningResult, err := driver.PlanNextAction(context.Background(), "启动抖音") assert.Nil(t, err) - t.Log(result) + assert.NotNil(t, planningResult) // Should always return planningResult + t.Log(planningResult) } func TestXTDriver_isTaskFinished(t *testing.T) { @@ -62,65 +63,73 @@ func TestXTDriver_isTaskFinished(t *testing.T) { tests := []struct { name string - result *ai.PlanningResult + result *PlanningExecutionResult expected bool }{ { name: "no tool calls - task finished", - result: &ai.PlanningResult{ - ToolCalls: []schema.ToolCall{}, - Thought: "No actions needed", + result: &PlanningExecutionResult{ + PlanningResult: ai.PlanningResult{ + ToolCalls: []schema.ToolCall{}, + Thought: "No actions needed", + }, }, expected: true, }, { name: "finished action - task finished", - result: &ai.PlanningResult{ - ToolCalls: []schema.ToolCall{ - { - Function: schema.FunctionCall{ - Name: "uixt__finished", - Arguments: `{"content": "Task completed successfully"}`, + result: &PlanningExecutionResult{ + PlanningResult: ai.PlanningResult{ + ToolCalls: []schema.ToolCall{ + { + Function: schema.FunctionCall{ + Name: "uixt__finished", + Arguments: `{"content": "Task completed successfully"}`, + }, }, }, + Thought: "Task completed", }, - Thought: "Task completed", }, expected: true, }, { name: "regular action - task not finished", - result: &ai.PlanningResult{ - ToolCalls: []schema.ToolCall{ - { - Function: schema.FunctionCall{ - Name: string(option.ACTION_TapXY), - Arguments: `{"x": 100, "y": 200}`, + result: &PlanningExecutionResult{ + PlanningResult: ai.PlanningResult{ + ToolCalls: []schema.ToolCall{ + { + Function: schema.FunctionCall{ + Name: string(option.ACTION_TapXY), + Arguments: `{"x": 100, "y": 200}`, + }, }, }, + Thought: "Click on button", }, - Thought: "Click on button", }, expected: false, }, { name: "multiple actions with finished - task finished", - result: &ai.PlanningResult{ - ToolCalls: []schema.ToolCall{ - { - Function: schema.FunctionCall{ - Name: string(option.ACTION_TapXY), - Arguments: `{"x": 100, "y": 200}`, - }, - }, - { - Function: schema.FunctionCall{ - Name: "uixt__finished", - Arguments: `{"content": "All tasks completed"}`, + result: &PlanningExecutionResult{ + PlanningResult: ai.PlanningResult{ + ToolCalls: []schema.ToolCall{ + { + Function: schema.FunctionCall{ + Name: string(option.ACTION_TapXY), + Arguments: `{"x": 100, "y": 200}`, + }, + }, + { + Function: schema.FunctionCall{ + Name: "uixt__finished", + Arguments: `{"content": "All tasks completed"}`, + }, }, }, + Thought: "Complete all actions", }, - Thought: "Complete all actions", }, expected: true, },