diff --git a/internal/version/VERSION b/internal/version/VERSION index e3cc677a..6926ae89 100644 --- a/internal/version/VERSION +++ b/internal/version/VERSION @@ -1 +1 @@ -v5.0.0-beta-2506181717 +v5.0.0-beta-2506182235 diff --git a/report.go b/report.go index 0a42c048..9cc202b2 100644 --- a/report.go +++ b/report.go @@ -605,13 +605,23 @@ func (g *HTMLReportGenerator) GenerateReport(outputFile string) error { result := buf.String() return strings.TrimSpace(result) }, - "mul": func(a, b float64) float64 { return a * b }, "add": func(a, b int) int { return a + b }, - "sub": func(a, b int) int { return a - b }, - "lt": func(a, b int) bool { return a < b }, - "gt": func(a, b int) bool { return a > b }, "base": filepath.Base, "index": func(m map[string]any, key string) any { return m[key] }, + "extractThought": func(content string) string { + if content == "" { + return "" + } + // Try to parse as JSON to extract thought field + var data map[string]interface{} + if err := json.Unmarshal([]byte(content), &data); err == nil { + if thought, ok := data["thought"].(string); ok { + return thought + } + } + // If not JSON or no thought field, return original content + return content + }, } // Parse template @@ -850,8 +860,6 @@ const htmlTemplate = ` word-break: break-all; } - - .test-cases { margin-top: 20px; } @@ -1429,25 +1437,7 @@ const htmlTemplate = ` } } - .raw-content { - margin-top: 10px; - } - .raw-content pre { - background: #f1f3f4; - border: 1px solid #dadce0; - border-radius: 4px; - padding: 8px; - font-size: 0.8em; - max-height: 150px; - overflow-y: auto; - white-space: pre-wrap; - word-wrap: break-word; - } - - .step-screenshots { - margin-top: 10px; - } .action-details { display: flex; @@ -1470,12 +1460,6 @@ const htmlTemplate = ` font-size: 0.9em; } - - - - - - .thought { background: linear-gradient(135deg, #e3f2fd 0%, #f3e5f5 100%); border: 2px solid #2196f3; @@ -1500,31 +1484,7 @@ const htmlTemplate = ` line-height: 1; } - .model-name-container { - background: #f8f9fa; - border: 1px solid #e9ecef; - border-radius: 6px; - padding: 8px 12px; - margin: 8px 0; - font-size: 0.9em; - display: flex; - align-items: center; - gap: 8px; - } - .model-label { - font-weight: 600; - color: #495057; - } - - .model-value { - font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace; - background: #e9ecef; - padding: 2px 6px; - border-radius: 4px; - color: #495057; - font-size: 0.85em; - } .arguments { background: #f8f9fa; @@ -1536,92 +1496,7 @@ const htmlTemplate = ` font-size: 0.9em; } - .requests { - margin-top: 15px; - } - .requests-toggle { - background: #6c757d; - color: white; - border: none; - padding: 6px 12px; - border-radius: 4px; - cursor: pointer; - font-size: 0.8em; - margin-bottom: 10px; - transition: background-color 0.3s; - } - - .requests-toggle:hover { - background: #5a6268; - } - - .requests-content { - display: none; - } - - .requests-content.show { - display: block; - } - - .request-item { - background: #f1f3f4; - border: 1px solid #dadce0; - border-radius: 4px; - padding: 8px; - margin: 6px 0; - } - - .request-header { - display: flex; - align-items: center; - gap: 10px; - margin-bottom: 6px; - } - - .method { - background: #007bff; - color: white; - padding: 2px 6px; - border-radius: 4px; - font-size: 0.8em; - font-weight: bold; - } - - .url { - color: #495057; - font-family: monospace; - font-size: 0.9em; - } - - .status { - padding: 2px 6px; - border-radius: 4px; - font-size: 0.8em; - font-weight: bold; - } - - .status.success { - background: #d4edda; - color: #155724; - } - - .status.failure { - background: #f8d7da; - color: #721c24; - } - - .request-body, .response-body { - background: #ffffff; - border: 1px solid #e9ecef; - border-radius: 4px; - padding: 6px; - margin: 4px 0; - font-family: monospace; - font-size: 0.8em; - max-height: 100px; - overflow-y: auto; - } .screenshots-section { background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%); @@ -1644,6 +1519,30 @@ const htmlTemplate = ` gap: 10px; } + .screenshots-horizontal { + display: flex; + gap: 15px; + overflow-x: auto; + padding: 10px 0; + } + + .screenshots-horizontal .screenshot-item { + flex: 0 0 auto; + min-width: 200px; + max-width: 300px; + margin-bottom: 0; + } + + .screenshots-horizontal .screenshot-image { + min-height: 200px; + padding: 10px 0; + } + + .screenshots-horizontal .screenshot-image img { + max-height: 250px; + width: auto; + } + .screenshot-item { background: white; border: 1px solid #dee2e6; @@ -1740,8 +1639,18 @@ const htmlTemplate = ` .validator-header { display: flex; align-items: center; - gap: 10px; - margin-bottom: 8px; + gap: 15px; + margin-bottom: 15px; + padding: 12px 15px; + border-radius: 8px; + background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%); + border: 1px solid #dee2e6; + } + + .validator-header strong { + color: #007bff; + font-size: 1.1em; + font-weight: 600; } .check-type, .assert-type { @@ -1756,9 +1665,84 @@ const htmlTemplate = ` font-weight: bold; } - .validator-expect, .validator-message { - margin: 4px 0; + .validator-expect, .validator-message { + margin: 8px 0; font-size: 0.9em; + padding: 8px 12px; + background: #f8f9fa; + border: 1px solid #dee2e6; + border-radius: 4px; + } + + .validator-ai-content { + margin-top: 15px; + padding: 15px; + background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%); + border: 1px solid #dee2e6; + border-radius: 12px; + box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); + } + + .validator-ai-layout { + display: flex; + gap: 20px; + margin: 15px 0; + } + + .validator-column-screenshot { + flex: 0.9; + min-width: 250px; + max-width: 35%; + } + + .validator-column-analysis { + flex: 1.6; + min-width: 350px; + } + + .validator-step-compact { + background: white; + border: 1px solid #dee2e6; + border-radius: 8px; + box-shadow: 0 2px 4px rgba(0,0,0,0.1); + height: fit-content; + } + + .validator-ai-details { + padding: 12px; + } + + .validator-thought { + background: linear-gradient(135deg, #e3f2fd 0%, #f3e5f5 100%); + border: 2px solid #2196f3; + border-radius: 12px; + padding: 15px; + margin: 10px 0; + font-style: italic; + color: #1565c0; + font-size: 1.0em; + font-weight: 500; + box-shadow: 0 2px 8px rgba(33, 150, 243, 0.15); + white-space: pre-wrap; + word-wrap: break-word; + } + + @media screen and (max-width: 768px) { + .validator-ai-layout { + flex-direction: column; + gap: 15px; + } + + .validator-column-screenshot { + flex: none; + min-width: auto; + max-width: none; + } + + .validator-column-analysis { + flex: none; + min-width: auto; + } } .logs-section { @@ -2132,6 +2116,18 @@ const htmlTemplate = ` gap: 10px; } + .screenshots-horizontal { + flex-direction: column; + overflow-x: visible; + } + + .screenshots-horizontal .screenshot-item { + flex: none; + min-width: auto; + max-width: none; + width: 100%; + } + .screenshot-image { min-height: 250px; padding: 15px 0; @@ -2349,7 +2345,6 @@ const htmlTemplate = `
{{if $action.Plannings}} -
{{range $planningIndex, $planning := $action.Plannings}}
@@ -2468,81 +2463,94 @@ const htmlTemplate = ` {{/* SubActions are now displayed in the right panel, so we don't show them here */}}
{{end}} -
{{end}} + {{/* Handle special case: ai_query needs enhanced display even when not in planning */}} {{if $action.SubActions}} -
{{range $subAction := $action.SubActions}} -
-
- {{$subAction.ActionName}} - {{formatDuration $subAction.Elapsed}} -
- -
-
- {{if $subAction.Arguments}} -
Arguments: {{safeHTML (toJSON $subAction.Arguments)}}
- {{end}} - - {{if $subAction.Requests}} -
- -
- {{range $request := $subAction.Requests}} -
-
- {{$request.RequestMethod}} - {{$request.RequestUrl}} - Status: {{$request.ResponseStatus}} - {{formatDuration $request.ResponseDuration}} -
- {{if $request.RequestBody}} -
Request: {{$request.RequestBody}}
- {{end}} - {{if $request.ResponseBody}} -
Response: {{$request.ResponseBody}}
- {{end}} -
+ {{if eq $subAction.ActionName "ai_query"}} +
+ +
+ + {{$stepLogs := getStepLogs $step}} + {{$queryThought := ""}} + {{$queryModel := ""}} + {{$queryUsage := ""}} + {{$queryScreenshot := ""}} + {{$queryResult := ""}} + {{range $logEntry := $stepLogs}} + {{if and (eq $logEntry.Message "log response message") (index $logEntry.Fields "content")}} + {{$content := index $logEntry.Fields "content"}} + {{if $content}} + {{$queryResult = $content}} {{end}} -
-
+ {{end}} + {{if and (eq $logEntry.Message "call model service for query") (index $logEntry.Fields "model")}} + {{$queryModel = index $logEntry.Fields "model"}} + {{end}} + {{if and (eq $logEntry.Message "usage statistics") (index $logEntry.Fields "input_tokens")}} + {{$inputTokens := index $logEntry.Fields "input_tokens"}} + {{$outputTokens := index $logEntry.Fields "output_tokens"}} + {{$totalTokens := index $logEntry.Fields "total_tokens"}} + {{$queryUsage = printf "📊 Tokens: %v in / %v out / %v total" $inputTokens $outputTokens $totalTokens}} + {{end}} + {{if and (eq $logEntry.Message "log screenshot") (index $logEntry.Fields "imagePath")}} + {{$queryScreenshot = index $logEntry.Fields "imagePath"}} + {{end}} {{end}} -
- {{if $subAction.ScreenResults}} -
-
-
📸 Screenshots
-
- {{range $screenshot := $subAction.ScreenResults}} - {{$base64Image := encodeImageBase64 $screenshot.ImagePath}} - {{if $base64Image}} -
-
- {{base $screenshot.ImagePath}} - {{if $screenshot.Resolution}} - {{$screenshot.Resolution.Width}}x{{$screenshot.Resolution.Height}} + + {{if $queryResult}} +
{{$queryResult}}
+ {{end}} + + +
+ + {{if $queryScreenshot}} +
+
+
+ 📸 Query Screenshot +
+
+ {{$base64Image := encodeImageBase64 $queryScreenshot}} + {{if $base64Image}} +
+
+ Query Screenshot +
+
{{end}}
-
- Screenshot +
+
+ {{end}} + + +
+
+
+ 🤖 AI Query +
+
+ {{if $queryModel}} +
🤖 Model: {{$queryModel}}
+ {{end}} + {{if $queryUsage}} +
{{$queryUsage}}
+ {{end}}
- {{end}} - {{end}}
- {{end}}
-
- {{end}} -
+ {{end}} {{end}} + {{end}} + {{/* Other SubActions (non-ai_query) are displayed in the Planning section's right panel to avoid duplication */}}
{{end}} @@ -2552,18 +2560,97 @@ const htmlTemplate = ` {{if and $step.Data $step.Data.validators}}
-

Validators

- {{range $validator := $step.Data.validators}} +

🔍 Validators

+ {{range $validatorIndex, $validator := $step.Data.validators}}
- {{$validator.check}} - {{$validator.assert}} - {{$validator.check_result}} + {{$validator.check}} - {{$validator.assert}} + + {{if eq $validator.check_result "pass"}}✓ PASS{{else}}✗ FAIL{{end}} +
Expected: {{$validator.expect}}
{{if and $validator.msg (ne $validator.check_result "pass")}}
{{$validator.msg}}
{{end}} + + + {{if eq $validator.check "ui_ai"}} +
+ + {{$stepLogs := getStepLogs $step}} + {{$validationThought := ""}} + {{$validationModel := ""}} + {{$validationUsage := ""}} + {{$validationScreenshot := ""}} + {{range $logEntry := $stepLogs}} + {{if and (eq $logEntry.Message "log response message") (index $logEntry.Fields "content")}} + {{$content := index $logEntry.Fields "content"}} + {{if $content}} + {{$validationThought = $content}} + {{end}} + {{end}} + {{if and (eq $logEntry.Message "call model service for assertion") (index $logEntry.Fields "model")}} + {{$validationModel = index $logEntry.Fields "model"}} + {{end}} + {{if and (eq $logEntry.Message "usage statistics") (index $logEntry.Fields "input_tokens")}} + {{$inputTokens := index $logEntry.Fields "input_tokens"}} + {{$outputTokens := index $logEntry.Fields "output_tokens"}} + {{$totalTokens := index $logEntry.Fields "total_tokens"}} + {{$validationUsage = printf "📊 Tokens: %v in / %v out / %v total" $inputTokens $outputTokens $totalTokens}} + {{end}} + {{if and (eq $logEntry.Message "log screenshot") (index $logEntry.Fields "imagePath")}} + {{$validationScreenshot = index $logEntry.Fields "imagePath"}} + {{end}} + {{end}} + + + {{if $validationThought}} +
{{extractThought $validationThought}}
+ {{end}} + + +
+ + {{if $validationScreenshot}} +
+
+
+ 📸 Validation Screenshot +
+
+ {{$base64Image := encodeImageBase64 $validationScreenshot}} + {{if $base64Image}} +
+
+ Validation Screenshot +
+
+ {{end}} +
+
+
+ {{end}} + + +
+
+
+ 🤖 AI Analysis +
+
+ {{if $validationModel}} +
🤖 Model: {{$validationModel}}
+ {{end}} + {{if $validationUsage}} +
{{$validationUsage}}
+ {{end}} +
+
+
+
+
+ {{end}}
{{end}}
@@ -2576,22 +2663,35 @@ const htmlTemplate = ` {{if index $attachments "screen_results"}}

Screenshots

- {{range $screenshot := index $attachments "screen_results"}} - {{$base64Image := encodeImageBase64 $screenshot.ImagePath}} - {{if $base64Image}} -
-
- {{base $screenshot.ImagePath}} - {{if $screenshot.Resolution}} - {{$screenshot.Resolution.Width}}x{{$screenshot.Resolution.Height}} - {{end}} -
-
- Screenshot +
+ {{range $screenshot := index $attachments "screen_results"}} + {{$imagePath := ""}} + {{if $screenshot.ImagePath}} + {{$imagePath = $screenshot.ImagePath}} + {{else if index $screenshot "image_path"}} + {{$imagePath = index $screenshot "image_path"}} + {{end}} + {{if $imagePath}} + {{$base64Image := encodeImageBase64 $imagePath}} + {{if $base64Image}} +
+
+ {{base $imagePath}} + {{if $screenshot.Resolution}} + {{$screenshot.Resolution.Width}}x{{$screenshot.Resolution.Height}} + {{else if index $screenshot "resolution"}} + {{$resolution := index $screenshot "resolution"}} + {{index $resolution "width"}}x{{index $resolution "height"}} + {{end}} +
+
+ Screenshot +
+ {{end}} + {{end}} + {{end}}
- {{end}} - {{end}}
{{end}} {{end}} @@ -2736,19 +2836,6 @@ const htmlTemplate = ` } } - function toggleRequests(buttonElement) { - const requestsDiv = buttonElement.parentElement; - const requestsContent = requestsDiv.querySelector('.requests-content'); - - if (requestsContent.classList.contains('show')) { - requestsContent.classList.remove('show'); - buttonElement.textContent = buttonElement.textContent.replace('Hide', 'Show'); - } else { - requestsContent.classList.add('show'); - buttonElement.textContent = buttonElement.textContent.replace('Show', 'Hide'); - } - } - function toggleRequestsCompact(buttonElement) { const requestsDiv = buttonElement.parentElement; const requestsContent = requestsDiv.querySelector('.requests-content-compact'); @@ -2762,8 +2849,6 @@ const htmlTemplate = ` } } - - function openImageModal(src) { const modal = document.getElementById('imageModal'); const modalImg = document.getElementById('modalImage'); @@ -2783,8 +2868,6 @@ const htmlTemplate = ` } } - - // Auto-expand all steps on load to show actions document.addEventListener('DOMContentLoaded', function() { // Expand all steps to show the actions list diff --git a/step_ui.go b/step_ui.go index 2a67bd1d..1fec9f4b 100644 --- a/step_ui.go +++ b/step_ui.go @@ -793,6 +793,16 @@ func runStepMobileUI(s *SessionRunner, step IStep) (stepResult *StepResult, err stepResult.Actions = append(stepResult.Actions, actionResult) } + // Get session data and add to attachments, clear session for next step + if uiDriver != nil { + sessionData := uiDriver.GetSession().GetData(true) // clear session after getting data + if len(sessionData.ScreenResults) > 0 { + attachments["screen_results"] = sessionData.ScreenResults + log.Debug().Int("count", len(sessionData.ScreenResults)). + Str("step", step.Name()).Msg("added screen results to step attachments") + } + } + var config *TConfig if s.caseRunner != nil && s.caseRunner.Config != nil { config = s.caseRunner.Config.Get() diff --git a/uixt/driver_ext_ai.go b/uixt/driver_ext_ai.go index 74dfc7e2..72dc58f9 100644 --- a/uixt/driver_ext_ai.go +++ b/uixt/driver_ext_ai.go @@ -164,7 +164,7 @@ func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts .. // Step 1: Take screenshot screenshotStartTime := time.Now() // Use GetScreenResult to handle screenshot capture, save, and session tracking - screenResult, err := dExt.GetScreenResult( + screenResult, err := dExt.createScreenshotWithSession( option.WithScreenShotFileName(builtin.GenNameWithTimestamp("%d_screenshot")), ) screenshotElapsed := time.Since(screenshotStartTime).Milliseconds()