diff --git a/internal/sdk/ga4.go b/internal/sdk/ga4.go index c7f72a89..6a2995dd 100644 --- a/internal/sdk/ga4.go +++ b/internal/sdk/ga4.go @@ -206,6 +206,6 @@ func SendGA4Event(name string, params map[string]interface{}) { } err := ga4Client.SendEvent(event) if err != nil { - log.Error().Err(err).Msg("send GA4 event failed") + log.Warn().Err(err).Msg("send GA4 event failed") } } diff --git a/internal/version/VERSION b/internal/version/VERSION index 63fd7a19..9b90e21e 100644 --- a/internal/version/VERSION +++ b/internal/version/VERSION @@ -1 +1 @@ -v5.0.0-beta-2506171946 +v5.0.0-beta-2506191048 diff --git a/report.go b/report.go index d9508c03..5e803bcd 100644 --- a/report.go +++ b/report.go @@ -8,6 +8,7 @@ import ( "html/template" "os" "path/filepath" + "sort" "strings" "time" @@ -43,10 +44,11 @@ type HTMLReportGenerator struct { // LogEntry represents a single log entry type LogEntry struct { - Time string `json:"time"` - Level string `json:"level"` - Message string `json:"message"` - Fields map[string]any `json:"-"` // Store all other fields + Time string `json:"time"` + Level string `json:"level"` + Message string `json:"message"` + Fields map[string]any `json:"-"` // Store all other fields + LogIndex int `json:"-"` // Original index to maintain order for same timestamps } // NewHTMLReportGenerator creates a new HTML report generator @@ -126,6 +128,7 @@ func (g *HTMLReportGenerator) loadLogData() error { defer file.Close() scanner := bufio.NewScanner(file) + logIndex := 0 // Track original order for scanner.Scan() { line := strings.TrimSpace(scanner.Text()) if line == "" { @@ -141,8 +144,10 @@ func (g *HTMLReportGenerator) loadLogData() error { // Create LogEntry with basic fields logEntry := LogEntry{ - Fields: make(map[string]any), + Fields: make(map[string]any), + LogIndex: logIndex, // Store original order } + logIndex++ // Extract standard fields if time, ok := rawEntry["time"].(string); ok { @@ -168,36 +173,63 @@ func (g *HTMLReportGenerator) loadLogData() error { return scanner.Err() } -// getStepLogs filters log entries for a specific test step based on time range +// getStepLogs filters log entries for a specific test step based on step boundaries func (g *HTMLReportGenerator) getStepLogs(stepName string, startTime int64, elapsed int64) []LogEntry { if len(g.LogData) == 0 { return nil } var stepLogs []LogEntry + var inCurrentStep bool = false - // startTime is in seconds, elapsed is in milliseconds - // Calculate end time (startTime in seconds + elapsed in milliseconds converted to seconds) - endTime := startTime + elapsed/1000 - - // Convert Unix timestamps to time.Time for comparison - startTimeObj := time.Unix(startTime, 0) - endTimeObj := time.Unix(endTime, 0) - + // Simple approach: use step start/end markers for precise boundaries for _, logEntry := range g.LogData { - // Parse log entry time - logTime, err := g.parseLogTime(logEntry.Time) - if err != nil { - continue + // Check for step boundaries to control inclusion + if logEntry.Message == RUN_STEP_START { + if stepFieldValue, exists := logEntry.Fields["step"]; exists { + if stepFieldValue == stepName { + inCurrentStep = true + stepLogs = append(stepLogs, logEntry) + continue + } else if inCurrentStep { + // This is a different step starting, we're done + break + } + } } - // Check if log entry falls within step time range - if (logTime.Equal(startTimeObj) || logTime.After(startTimeObj)) && - (logTime.Equal(endTimeObj) || logTime.Before(endTimeObj)) { + if logEntry.Message == RUN_STEP_END { + if stepFieldValue, exists := logEntry.Fields["step"]; exists { + if stepFieldValue == stepName { + stepLogs = append(stepLogs, logEntry) + inCurrentStep = false + continue + } + } + } + + // Only include logs when we're in the current step + if inCurrentStep { stepLogs = append(stepLogs, logEntry) } } + // Sort logs by time, then by original index for stable ordering + sort.Slice(stepLogs, func(i, j int) bool { + timeI, errI := g.parseLogTime(stepLogs[i].Time) + timeJ, errJ := g.parseLogTime(stepLogs[j].Time) + + if errI != nil || errJ != nil { + return stepLogs[i].LogIndex < stepLogs[j].LogIndex + } + + if timeI.Equal(timeJ) { + // For same timestamps, use original log index to maintain order + return stepLogs[i].LogIndex < stepLogs[j].LogIndex + } + return timeI.Before(timeJ) + }) + return stepLogs } @@ -293,79 +325,39 @@ func (g *HTMLReportGenerator) getStepLogsForTemplate(step *StepResult) []LogEntr // calculateTotalActions calculates the total number of actions across all test cases func (g *HTMLReportGenerator) calculateTotalActions() int { - total := 0 - if g.SummaryData == nil || g.SummaryData.Details == nil { - return total - } - - for _, testCase := range g.SummaryData.Details { - if testCase.Records == nil { - continue - } - for _, step := range testCase.Records { - if step.Actions != nil { - total += len(step.Actions) - } - } - } - return total + return g.iterateTestData(func(action *ActionResult) int { + return 1 // Count each action + }) } // calculateTotalSubActions calculates the total number of sub-actions across all test cases func (g *HTMLReportGenerator) calculateTotalSubActions() int { - total := 0 - if g.SummaryData == nil || g.SummaryData.Details == nil { - return total - } - - for _, testCase := range g.SummaryData.Details { - if testCase.Records == nil { - continue + return g.iterateTestData(func(action *ActionResult) int { + total := 0 + // Count sub-actions from regular actions + if action.SubActions != nil { + total += len(action.SubActions) } - for _, step := range testCase.Records { - if step.Actions != nil { - for _, action := range step.Actions { - // Count sub-actions from regular actions - if action.SubActions != nil { - total += len(action.SubActions) - } - // Count sub-actions from planning results - if action.Plannings != nil { - for _, planning := range action.Plannings { - if planning.SubActions != nil { - total += len(planning.SubActions) - } - } - } + // Count sub-actions from planning results + if action.Plannings != nil { + for _, planning := range action.Plannings { + if planning.SubActions != nil { + total += len(planning.SubActions) } } } - } - return total + return total + }) } // calculateTotalPlannings calculates the total number of planning results across all test cases func (g *HTMLReportGenerator) calculateTotalPlannings() int { - total := 0 - if g.SummaryData == nil || g.SummaryData.Details == nil { - return total - } - - for _, testCase := range g.SummaryData.Details { - if testCase.Records == nil { - continue + return g.iterateTestData(func(action *ActionResult) int { + if action.Plannings != nil { + return len(action.Plannings) } - for _, step := range testCase.Records { - if step.Actions != nil { - for _, action := range step.Actions { - if action.Plannings != nil { - total += len(action.Plannings) - } - } - } - } - } - return total + return 0 + }) } // calculateTotalUsage calculates the total token usage across all test cases @@ -406,6 +398,28 @@ func (g *HTMLReportGenerator) calculateTotalUsage() map[string]interface{} { return totalUsage } +// iterateTestData is a helper function that iterates through all actions and applies a counting function +func (g *HTMLReportGenerator) iterateTestData(countFunc func(*ActionResult) int) int { + total := 0 + if g.SummaryData == nil || g.SummaryData.Details == nil { + return total + } + + for _, testCase := range g.SummaryData.Details { + if testCase.Records == nil { + continue + } + for _, step := range testCase.Records { + if step.Actions != nil { + for _, action := range step.Actions { + total += countFunc(action) + } + } + } + } + return total +} + // GenerateReport generates the complete HTML test report func (g *HTMLReportGenerator) GenerateReport(outputFile string) error { if outputFile == "" { @@ -438,13 +452,23 @@ func (g *HTMLReportGenerator) GenerateReport(outputFile string) error { result := buf.String() return strings.TrimSpace(result) }, - "mul": func(a, b float64) float64 { return a * b }, "add": func(a, b int) int { return a + b }, - "sub": func(a, b int) int { return a - b }, - "lt": func(a, b int) bool { return a < b }, - "gt": func(a, b int) bool { return a > b }, "base": filepath.Base, "index": func(m map[string]any, key string) any { return m[key] }, + "extractThought": func(content string) string { + if content == "" { + return "" + } + // Try to parse as JSON to extract thought field + var data map[string]interface{} + if err := json.Unmarshal([]byte(content), &data); err == nil { + if thought, ok := data["thought"].(string); ok { + return thought + } + } + // If not JSON or no thought field, return original content + return content + }, } // Parse template @@ -683,8 +707,6 @@ const htmlTemplate = ` word-break: break-all; } - - .test-cases { margin-top: 20px; } @@ -1262,25 +1284,7 @@ const htmlTemplate = ` } } - .raw-content { - margin-top: 10px; - } - .raw-content pre { - background: #f1f3f4; - border: 1px solid #dadce0; - border-radius: 4px; - padding: 8px; - font-size: 0.8em; - max-height: 150px; - overflow-y: auto; - white-space: pre-wrap; - word-wrap: break-word; - } - - .step-screenshots { - margin-top: 10px; - } .action-details { display: flex; @@ -1303,12 +1307,6 @@ const htmlTemplate = ` font-size: 0.9em; } - - - - - - .thought { background: linear-gradient(135deg, #e3f2fd 0%, #f3e5f5 100%); border: 2px solid #2196f3; @@ -1333,31 +1331,7 @@ const htmlTemplate = ` line-height: 1; } - .model-name-container { - background: #f8f9fa; - border: 1px solid #e9ecef; - border-radius: 6px; - padding: 8px 12px; - margin: 8px 0; - font-size: 0.9em; - display: flex; - align-items: center; - gap: 8px; - } - .model-label { - font-weight: 600; - color: #495057; - } - - .model-value { - font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace; - background: #e9ecef; - padding: 2px 6px; - border-radius: 4px; - color: #495057; - font-size: 0.85em; - } .arguments { background: #f8f9fa; @@ -1369,92 +1343,7 @@ const htmlTemplate = ` font-size: 0.9em; } - .requests { - margin-top: 15px; - } - .requests-toggle { - background: #6c757d; - color: white; - border: none; - padding: 6px 12px; - border-radius: 4px; - cursor: pointer; - font-size: 0.8em; - margin-bottom: 10px; - transition: background-color 0.3s; - } - - .requests-toggle:hover { - background: #5a6268; - } - - .requests-content { - display: none; - } - - .requests-content.show { - display: block; - } - - .request-item { - background: #f1f3f4; - border: 1px solid #dadce0; - border-radius: 4px; - padding: 8px; - margin: 6px 0; - } - - .request-header { - display: flex; - align-items: center; - gap: 10px; - margin-bottom: 6px; - } - - .method { - background: #007bff; - color: white; - padding: 2px 6px; - border-radius: 4px; - font-size: 0.8em; - font-weight: bold; - } - - .url { - color: #495057; - font-family: monospace; - font-size: 0.9em; - } - - .status { - padding: 2px 6px; - border-radius: 4px; - font-size: 0.8em; - font-weight: bold; - } - - .status.success { - background: #d4edda; - color: #155724; - } - - .status.failure { - background: #f8d7da; - color: #721c24; - } - - .request-body, .response-body { - background: #ffffff; - border: 1px solid #e9ecef; - border-radius: 4px; - padding: 6px; - margin: 4px 0; - font-family: monospace; - font-size: 0.8em; - max-height: 100px; - overflow-y: auto; - } .screenshots-section { background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%); @@ -1477,6 +1366,30 @@ const htmlTemplate = ` gap: 10px; } + .screenshots-horizontal { + display: flex; + gap: 15px; + overflow-x: auto; + padding: 10px 0; + } + + .screenshots-horizontal .screenshot-item { + flex: 0 0 auto; + min-width: 200px; + max-width: 300px; + margin-bottom: 0; + } + + .screenshots-horizontal .screenshot-image { + min-height: 200px; + padding: 10px 0; + } + + .screenshots-horizontal .screenshot-image img { + max-height: 250px; + width: auto; + } + .screenshot-item { background: white; border: 1px solid #dee2e6; @@ -1573,8 +1486,18 @@ const htmlTemplate = ` .validator-header { display: flex; align-items: center; - gap: 10px; - margin-bottom: 8px; + gap: 15px; + margin-bottom: 15px; + padding: 12px 15px; + border-radius: 8px; + background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%); + border: 1px solid #dee2e6; + } + + .validator-header strong { + color: #007bff; + font-size: 1.1em; + font-weight: 600; } .check-type, .assert-type { @@ -1589,9 +1512,84 @@ const htmlTemplate = ` font-weight: bold; } - .validator-expect, .validator-message { - margin: 4px 0; + .validator-expect, .validator-message { + margin: 8px 0; font-size: 0.9em; + padding: 8px 12px; + background: #f8f9fa; + border: 1px solid #dee2e6; + border-radius: 4px; + } + + .validator-ai-content { + margin-top: 15px; + padding: 15px; + background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%); + border: 1px solid #dee2e6; + border-radius: 12px; + box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); + } + + .validator-ai-layout { + display: flex; + gap: 20px; + margin: 15px 0; + } + + .validator-column-screenshot { + flex: 0.9; + min-width: 250px; + max-width: 35%; + } + + .validator-column-analysis { + flex: 1.6; + min-width: 350px; + } + + .validator-step-compact { + background: white; + border: 1px solid #dee2e6; + border-radius: 8px; + box-shadow: 0 2px 4px rgba(0,0,0,0.1); + height: fit-content; + } + + .validator-ai-details { + padding: 12px; + } + + .validator-thought { + background: linear-gradient(135deg, #e3f2fd 0%, #f3e5f5 100%); + border: 2px solid #2196f3; + border-radius: 12px; + padding: 15px; + margin: 10px 0; + font-style: italic; + color: #1565c0; + font-size: 1.0em; + font-weight: 500; + box-shadow: 0 2px 8px rgba(33, 150, 243, 0.15); + white-space: pre-wrap; + word-wrap: break-word; + } + + @media screen and (max-width: 768px) { + .validator-ai-layout { + flex-direction: column; + gap: 15px; + } + + .validator-column-screenshot { + flex: none; + min-width: auto; + max-width: none; + } + + .validator-column-analysis { + flex: none; + min-width: auto; + } } .logs-section { @@ -1965,6 +1963,18 @@ const htmlTemplate = ` gap: 10px; } + .screenshots-horizontal { + flex-direction: column; + overflow-x: visible; + } + + .screenshots-horizontal .screenshot-item { + flex: none; + min-width: auto; + max-width: none; + width: 100%; + } + .screenshot-image { min-height: 250px; padding: 15px 0; @@ -2182,7 +2192,6 @@ const htmlTemplate = `
{{if $action.Plannings}} -
{{range $planningIndex, $planning := $action.Plannings}}
@@ -2301,81 +2310,94 @@ const htmlTemplate = ` {{/* SubActions are now displayed in the right panel, so we don't show them here */}}
{{end}} -
{{end}} + {{/* Handle special case: ai_query needs enhanced display even when not in planning */}} {{if $action.SubActions}} -
{{range $subAction := $action.SubActions}} -
-
- {{$subAction.ActionName}} - {{formatDuration $subAction.Elapsed}} -
- -
-
- {{if $subAction.Arguments}} -
Arguments: {{safeHTML (toJSON $subAction.Arguments)}}
- {{end}} - - {{if $subAction.Requests}} -
- -
- {{range $request := $subAction.Requests}} -
-
- {{$request.RequestMethod}} - {{$request.RequestUrl}} - Status: {{$request.ResponseStatus}} - {{formatDuration $request.ResponseDuration}} -
- {{if $request.RequestBody}} -
Request: {{$request.RequestBody}}
- {{end}} - {{if $request.ResponseBody}} -
Response: {{$request.ResponseBody}}
- {{end}} -
+ {{if eq $subAction.ActionName "ai_query"}} +
+ +
+ + {{$stepLogs := getStepLogs $step}} + {{$queryThought := ""}} + {{$queryModel := ""}} + {{$queryUsage := ""}} + {{$queryScreenshot := ""}} + {{$queryResult := ""}} + {{range $logEntry := $stepLogs}} + {{if and (eq $logEntry.Message "log response message") (index $logEntry.Fields "content")}} + {{$content := index $logEntry.Fields "content"}} + {{if $content}} + {{$queryResult = $content}} {{end}} -
-
+ {{end}} + {{if and (eq $logEntry.Message "call model service for query") (index $logEntry.Fields "model")}} + {{$queryModel = index $logEntry.Fields "model"}} + {{end}} + {{if and (eq $logEntry.Message "usage statistics") (index $logEntry.Fields "input_tokens")}} + {{$inputTokens := index $logEntry.Fields "input_tokens"}} + {{$outputTokens := index $logEntry.Fields "output_tokens"}} + {{$totalTokens := index $logEntry.Fields "total_tokens"}} + {{$queryUsage = printf "📊 Tokens: %v in / %v out / %v total" $inputTokens $outputTokens $totalTokens}} + {{end}} + {{if and (eq $logEntry.Message "log screenshot") (index $logEntry.Fields "imagePath")}} + {{$queryScreenshot = index $logEntry.Fields "imagePath"}} + {{end}} {{end}} -
- {{if $subAction.ScreenResults}} -
-
-
📸 Screenshots
-
- {{range $screenshot := $subAction.ScreenResults}} - {{$base64Image := encodeImageBase64 $screenshot.ImagePath}} - {{if $base64Image}} -
-
- {{base $screenshot.ImagePath}} - {{if $screenshot.Resolution}} - {{$screenshot.Resolution.Width}}x{{$screenshot.Resolution.Height}} + + {{if $queryResult}} +
{{$queryResult}}
+ {{end}} + + +
+ + {{if $queryScreenshot}} +
+
+
+ 📸 Query Screenshot +
+
+ {{$base64Image := encodeImageBase64 $queryScreenshot}} + {{if $base64Image}} +
+
+ Query Screenshot +
+
{{end}}
-
- Screenshot +
+
+ {{end}} + + +
+
+
+ 🤖 AI Query +
+
+ {{if $queryModel}} +
🤖 Model: {{$queryModel}}
+ {{end}} + {{if $queryUsage}} +
{{$queryUsage}}
+ {{end}}
- {{end}} - {{end}}
- {{end}}
-
- {{end}} -
+ {{end}} {{end}} + {{end}} + {{/* Other SubActions (non-ai_query) are displayed in the Planning section's right panel to avoid duplication */}}
{{end}} @@ -2385,18 +2407,97 @@ const htmlTemplate = ` {{if and $step.Data $step.Data.validators}}
-

Validators

- {{range $validator := $step.Data.validators}} +

🔍 Validators

+ {{range $validatorIndex, $validator := $step.Data.validators}}
- {{$validator.check}} - {{$validator.assert}} - {{$validator.check_result}} + {{$validator.check}} - {{$validator.assert}} + + {{if eq $validator.check_result "pass"}}✓ PASS{{else}}✗ FAIL{{end}} +
Expected: {{$validator.expect}}
{{if and $validator.msg (ne $validator.check_result "pass")}}
{{$validator.msg}}
{{end}} + + + {{if or (eq $validator.check "ui_ai") (eq $validator.assert "ai_assert")}} +
+ + {{$stepLogs := getStepLogs $step}} + {{$validationThought := ""}} + {{$validationModel := ""}} + {{$validationUsage := ""}} + {{$validationScreenshot := ""}} + {{range $logEntry := $stepLogs}} + {{if and (eq $logEntry.Message "log response message") (index $logEntry.Fields "content")}} + {{$content := index $logEntry.Fields "content"}} + {{if $content}} + {{$validationThought = $content}} + {{end}} + {{end}} + {{if and (eq $logEntry.Message "call model service for assertion") (index $logEntry.Fields "model")}} + {{$validationModel = index $logEntry.Fields "model"}} + {{end}} + {{if and (eq $logEntry.Message "usage statistics") (index $logEntry.Fields "input_tokens")}} + {{$inputTokens := index $logEntry.Fields "input_tokens"}} + {{$outputTokens := index $logEntry.Fields "output_tokens"}} + {{$totalTokens := index $logEntry.Fields "total_tokens"}} + {{$validationUsage = printf "📊 Tokens: %v in / %v out / %v total" $inputTokens $outputTokens $totalTokens}} + {{end}} + {{if and (eq $logEntry.Message "log screenshot") (index $logEntry.Fields "imagePath")}} + {{$validationScreenshot = index $logEntry.Fields "imagePath"}} + {{end}} + {{end}} + + + {{if $validationThought}} +
{{extractThought $validationThought}}
+ {{end}} + + +
+ + {{if $validationScreenshot}} +
+
+
+ 📸 Validation Screenshot +
+
+ {{$base64Image := encodeImageBase64 $validationScreenshot}} + {{if $base64Image}} +
+
+ Validation Screenshot +
+
+ {{end}} +
+
+
+ {{end}} + + +
+
+
+ 🤖 AI Analysis +
+
+ {{if $validationModel}} +
🤖 Model: {{$validationModel}}
+ {{end}} + {{if $validationUsage}} +
{{$validationUsage}}
+ {{end}} +
+
+
+
+
+ {{end}}
{{end}}
@@ -2409,22 +2510,35 @@ const htmlTemplate = ` {{if index $attachments "screen_results"}}

Screenshots

- {{range $screenshot := index $attachments "screen_results"}} - {{$base64Image := encodeImageBase64 $screenshot.ImagePath}} - {{if $base64Image}} -
-
- {{base $screenshot.ImagePath}} - {{if $screenshot.Resolution}} - {{$screenshot.Resolution.Width}}x{{$screenshot.Resolution.Height}} - {{end}} -
-
- Screenshot +
+ {{range $screenshot := index $attachments "screen_results"}} + {{$imagePath := ""}} + {{if $screenshot.ImagePath}} + {{$imagePath = $screenshot.ImagePath}} + {{else if index $screenshot "image_path"}} + {{$imagePath = index $screenshot "image_path"}} + {{end}} + {{if $imagePath}} + {{$base64Image := encodeImageBase64 $imagePath}} + {{if $base64Image}} +
+
+ {{base $imagePath}} + {{if $screenshot.Resolution}} + {{$screenshot.Resolution.Width}}x{{$screenshot.Resolution.Height}} + {{else if index $screenshot "resolution"}} + {{$resolution := index $screenshot "resolution"}} + {{index $resolution "width"}}x{{index $resolution "height"}} + {{end}} +
+
+ Screenshot +
+ {{end}} + {{end}} + {{end}}
- {{end}} - {{end}}
{{end}} {{end}} @@ -2569,19 +2683,6 @@ const htmlTemplate = ` } } - function toggleRequests(buttonElement) { - const requestsDiv = buttonElement.parentElement; - const requestsContent = requestsDiv.querySelector('.requests-content'); - - if (requestsContent.classList.contains('show')) { - requestsContent.classList.remove('show'); - buttonElement.textContent = buttonElement.textContent.replace('Hide', 'Show'); - } else { - requestsContent.classList.add('show'); - buttonElement.textContent = buttonElement.textContent.replace('Show', 'Hide'); - } - } - function toggleRequestsCompact(buttonElement) { const requestsDiv = buttonElement.parentElement; const requestsContent = requestsDiv.querySelector('.requests-content-compact'); @@ -2595,8 +2696,6 @@ const htmlTemplate = ` } } - - function openImageModal(src) { const modal = document.getElementById('imageModal'); const modalImg = document.getElementById('modalImage'); @@ -2616,8 +2715,6 @@ const htmlTemplate = ` } } - - // Auto-expand all steps on load to show actions document.addEventListener('DOMContentLoaded', function() { // Expand all steps to show the actions list diff --git a/runner.go b/runner.go index e565d628..73ac82b1 100644 --- a/runner.go +++ b/runner.go @@ -729,6 +729,11 @@ func (r *SessionRunner) Start(givenVars map[string]interface{}) (summary *TestCa return summary, nil } +const ( + RUN_STEP_START = "run step start" + RUN_STEP_END = "run step end" +) + func (r *SessionRunner) RunStep(step IStep) (stepResult *StepResult, err error) { // check for interrupt signal before running step select { @@ -748,7 +753,7 @@ func (r *SessionRunner) RunStep(step IStep) (stepResult *StepResult, err error) stepName := step.Name() stepType := string(step.Type()) - log.Info().Str("step", stepName).Str("type", stepType).Msg("run step start") + log.Info().Str("step", stepName).Str("type", stepType).Msg(RUN_STEP_START) // run times of step loopTimes := step.Config().Loops @@ -785,7 +790,7 @@ func (r *SessionRunner) RunStep(step IStep) (stepResult *StepResult, err error) Bool("success", true). Int64("elapsed(ms)", stepResult.Elapsed). Interface("exportVars", stepResult.ExportVars). - Msg("run step end") + Msg(RUN_STEP_END) continue } // run step failed @@ -793,7 +798,7 @@ func (r *SessionRunner) RunStep(step IStep) (stepResult *StepResult, err error) Str("type", stepType). Bool("success", false). Int64("elapsed(ms)", stepResult.Elapsed). - Msg("run step end") + Msg(RUN_STEP_END) return stepResult, err } diff --git a/step.go b/step.go index 6b232cea..143abe24 100644 --- a/step.go +++ b/step.go @@ -58,7 +58,7 @@ type TStep struct { // one step contains one or multiple actions type ActionResult struct { option.MobileAction `json:",inline"` - StartTime int64 `json:"start_time"` // action start time + StartTime int64 `json:"start_time"` // action start time in millisecond(ms) Elapsed int64 `json:"elapsed_ms"` // action elapsed time(ms) Error error `json:"error"` // action execution result Plannings []*uixt.PlanningExecutionResult `json:"plannings,omitempty"` // store planning results for start_to_goal actions @@ -69,7 +69,7 @@ type ActionResult struct { type StepResult struct { Name string `json:"name" yaml:"name"` // step name Identifier string `json:"identifier,omitempty" yaml:"identifier,omitempty"` // step identifier - StartTime int64 `json:"start_time" yaml:"time"` // step start time + StartTime int64 `json:"start_time" yaml:"time"` // step start time in millisecond(ms) StepType StepType `json:"step_type" yaml:"step_type"` // step type, testcase/request/transaction/rendezvous Success bool `json:"success" yaml:"success"` // step execution result Elapsed int64 `json:"elapsed_ms" yaml:"elapsed_ms"` // step execution time in millisecond(ms) diff --git a/step_function.go b/step_function.go index 3aae4bce..d06c18cb 100644 --- a/step_function.go +++ b/step_function.go @@ -27,10 +27,7 @@ func (s *StepFunction) Type() StepType { } func (s *StepFunction) Config() *StepConfig { - return &StepConfig{ - StepName: s.StepName, - Variables: s.Variables, - } + return &s.StepConfig } func (s *StepFunction) Run(r *SessionRunner) (*StepResult, error) { @@ -57,7 +54,7 @@ func runStepFunction(r *SessionRunner, step IStep) (stepResult *StepResult, err StepType: step.Type(), Success: false, ContentSize: 0, - StartTime: start.Unix(), + StartTime: start.UnixMilli(), } defer func() { attachments := uixt.Attachments{} diff --git a/step_rendezvous.go b/step_rendezvous.go index c1c46518..545c4d33 100644 --- a/step_rendezvous.go +++ b/step_rendezvous.go @@ -26,10 +26,7 @@ func (s *StepRendezvous) Type() StepType { } func (s *StepRendezvous) Config() *StepConfig { - return &StepConfig{ - StepName: s.StepName, - Variables: s.Variables, - } + return &s.StepConfig } func (s *StepRendezvous) Run(r *SessionRunner) (*StepResult, error) { diff --git a/step_request.go b/step_request.go index 6976b06a..25b88666 100644 --- a/step_request.go +++ b/step_request.go @@ -285,7 +285,7 @@ func runStepRequest(r *SessionRunner, step IStep) (stepResult *StepResult, err e StepType: step.Type(), Success: false, ContentSize: 0, - StartTime: start.Unix(), + StartTime: start.UnixMilli(), } defer func() { diff --git a/step_shell.go b/step_shell.go index 52263757..ef52b1a2 100644 --- a/step_shell.go +++ b/step_shell.go @@ -30,10 +30,7 @@ func (s *StepShell) Type() StepType { } func (s *StepShell) Config() *StepConfig { - return &StepConfig{ - StepName: s.StepName, - Variables: s.Variables, - } + return &s.StepConfig } func (s *StepShell) Run(r *SessionRunner) (*StepResult, error) { @@ -63,10 +60,7 @@ func (s *StepShellValidation) Type() StepType { } func (s *StepShellValidation) Config() *StepConfig { - return &StepConfig{ - StepName: s.StepName, - Variables: s.Variables, - } + return &s.StepConfig } func (s *StepShellValidation) Run(r *SessionRunner) (*StepResult, error) { @@ -101,7 +95,7 @@ func runStepShell(r *SessionRunner, step IStep) (stepResult *StepResult, err err StepType: step.Type(), Success: false, ContentSize: 0, - StartTime: start.Unix(), + StartTime: start.UnixMilli(), } defer func() { stepResult.Elapsed = time.Since(start).Milliseconds() diff --git a/step_testcase.go b/step_testcase.go index 43df5bb4..480e4864 100644 --- a/step_testcase.go +++ b/step_testcase.go @@ -51,7 +51,7 @@ func (s *StepTestCaseWithOptionalArgs) Run(r *SessionRunner) (stepResult *StepRe Name: s.Name(), StepType: s.Type(), Success: false, - StartTime: start.Unix(), + StartTime: start.UnixMilli(), } defer func() { diff --git a/step_ui.go b/step_ui.go index a7357ce9..da922435 100644 --- a/step_ui.go +++ b/step_ui.go @@ -691,11 +691,11 @@ func (s *StepMobileUIValidation) Type() StepType { } func (s *StepMobileUIValidation) Config() *StepConfig { - return &StepConfig{ - StepName: s.StepName, - Variables: s.Variables, - Validators: s.Validators, - } + // Get the original StepConfig from embedded StepMobile + config := &s.StepMobile.StepConfig + // Sync validators to the StepConfig + config.Validators = s.Validators + return config } func (s *StepMobileUIValidation) Run(r *SessionRunner) (*StepResult, error) { @@ -709,7 +709,7 @@ func runStepMobileUI(s *SessionRunner, step IStep) (stepResult *StepResult, err StepType: step.Type(), Success: false, ContentSize: 0, - StartTime: start.Unix(), + StartTime: start.UnixMilli(), } var stepVariables map[string]interface{} @@ -781,7 +781,7 @@ func runStepMobileUI(s *SessionRunner, step IStep) (stepResult *StepResult, err Method: option.ACTION_GetForegroundApp, Params: "[ForDebug] check foreground app", }, - StartTime: startTime.Unix(), + StartTime: startTime.UnixMilli(), } subActionResults, err1 := uiDriver.ExecuteAction( context.Background(), actionResult.MobileAction) @@ -793,6 +793,16 @@ func runStepMobileUI(s *SessionRunner, step IStep) (stepResult *StepResult, err stepResult.Actions = append(stepResult.Actions, actionResult) } + // Get session data and add to attachments, clear session for next step + if uiDriver != nil { + sessionData := uiDriver.GetSession().GetData(true) // clear session after getting data + if len(sessionData.ScreenResults) > 0 { + attachments["screen_results"] = sessionData.ScreenResults + log.Debug().Int("count", len(sessionData.ScreenResults)). + Str("step", step.Name()).Msg("added screen results to step attachments") + } + } + var config *TConfig if s.caseRunner != nil && s.caseRunner.Config != nil { config = s.caseRunner.Config.Get() @@ -815,7 +825,7 @@ func runStepMobileUI(s *SessionRunner, step IStep) (stepResult *StepResult, err Method: option.ACTION_ClosePopups, Params: "[ForDebug] close popups handler", }, - StartTime: startTime.Unix(), + StartTime: startTime.UnixMilli(), } subActionResults, err2 := uiDriver.ExecuteAction( context.Background(), actionResult.MobileAction) @@ -842,10 +852,7 @@ func runStepMobileUI(s *SessionRunner, step IStep) (stepResult *StepResult, err return stepResult, errors.Wrap(code.InterruptError, "mobile UI runner interrupted") default: actionStartTime := time.Now() - actionResult := &ActionResult{ - MobileAction: action, - StartTime: actionStartTime.Unix(), // action 开始时间 - } + // Parse action params first for variable substitution if action.Params, err = s.caseRunner.parser.Parse(action.Params, stepVariables); err != nil { if !code.IsErrorPredefined(err) { err = errors.Wrap(code.ParseError, @@ -854,6 +861,12 @@ func runStepMobileUI(s *SessionRunner, step IStep) (stepResult *StepResult, err return stepResult, err } + // Create ActionResult with parsed params for accurate reporting + actionResult := &ActionResult{ + MobileAction: action, // Now contains parsed params + StartTime: actionStartTime.UnixMilli(), // action start time + } + // Apply global configuration from testcase config if s.caseRunner != nil && s.caseRunner.Config != nil { config := s.caseRunner.Config.Get() @@ -951,55 +964,111 @@ func runStepMobileUI(s *SessionRunner, step IStep) (stepResult *StepResult, err } // validate - validateResults, err := validateUI(uiDriver, stepValidators) - if err != nil { - if !code.IsErrorPredefined(err) { - err = errors.Wrap(code.MobileUIValidationError, err.Error()) - } - return - } + validateResults, err := validateUI(uiDriver, stepValidators, s.caseRunner.parser, stepVariables) if len(validateResults) > 0 { + // Always save validation results if any exist, regardless of success or failure sessionData := &SessionData{ Validators: validateResults, } stepResult.Data = sessionData } + if err != nil { + // Handle validation error after saving results + if !code.IsErrorPredefined(err) { + err = errors.Wrap(code.MobileUIValidationError, err.Error()) + } + return stepResult, err + } + stepResult.Success = true return stepResult, nil } -func validateUI(ud *uixt.XTDriver, iValidators []interface{}) (validateResults []*ValidationResult, err error) { +func validateUI(ud *uixt.XTDriver, iValidators []interface{}, parser *Parser, stepVariables map[string]interface{}) (validateResults []*ValidationResult, err error) { + // Parse all validators for variable substitution + parsedValidators, err := parseStepValidators(iValidators, parser, stepVariables) + if err != nil { + return nil, err + } + + // Execute validation for each parsed validator + for _, validator := range parsedValidators { + // Debug: print validator details + log.Debug(). + Str("check", validator.Check). + Str("assert", validator.Assert). + Interface("expect", validator.Expect). + Str("message", validator.Message). + Msg("processing validator") + + validationResult := &ValidationResult{ + Validator: validator, // Use parsed validator for accurate reporting + CheckResult: "fail", + } + + // Check if this is a UI validator or AI assert validator + if !strings.HasPrefix(validator.Check, "ui_") && validator.Assert != "ai_assert" { + validationResult.CheckResult = "skip" + log.Warn().Interface("validator", validator).Msg("skip validator") + validateResults = append(validateResults, validationResult) + continue + } + + // Validate expected value type + expected, ok := validator.Expect.(string) + if !ok { + return nil, errors.New("validator expect should be string") + } + + // Perform validation + err = ud.DoValidation(validator.Check, validator.Assert, expected, validator.Message) + if err != nil { + // Add the failed validation result to the list before returning error + validateResults = append(validateResults, validationResult) + return validateResults, errors.Wrap(err, "step validation failed") + } + + validationResult.CheckResult = "pass" + validateResults = append(validateResults, validationResult) + } + + return validateResults, nil +} + +// parseStepValidators parses all validators for variable substitution +func parseStepValidators(iValidators []interface{}, parser *Parser, stepVariables map[string]interface{}) ([]Validator, error) { + var parsedValidators []Validator + for _, iValidator := range iValidators { validator, ok := iValidator.(Validator) if !ok { return nil, errors.New("validator type error") } - validataResult := &ValidationResult{ - Validator: validator, - CheckResult: "fail", + parsedValidator := validator + + // Parse Expect field for variable substitution + if expectedStr, ok := validator.Expect.(string); ok { + if parsedExpected, err := parser.Parse(expectedStr, stepVariables); err != nil { + return nil, errors.Wrap(err, "failed to parse validator expect field") + } else { + parsedValidator.Expect = parsedExpected + } } - // parse check value - if !strings.HasPrefix(validator.Check, "ui_") { - validataResult.CheckResult = "skip" - log.Warn().Interface("validator", validator).Msg("skip validator") - validateResults = append(validateResults, validataResult) - continue + // Parse Message field for variable substitution + if validator.Message != "" { + if parsedMessage, err := parser.Parse(validator.Message, stepVariables); err != nil { + return nil, errors.Wrap(err, "failed to parse validator message field") + } else { + if msgStr, ok := parsedMessage.(string); ok { + parsedValidator.Message = msgStr + } + } } - expected, ok := validator.Expect.(string) - if !ok { - return nil, errors.New("validator expect should be string") - } - - err := ud.DoValidation(validator.Check, validator.Assert, expected, validator.Message) - if err != nil { - return validateResults, errors.Wrap(err, "step validation failed") - } - - validataResult.CheckResult = "pass" - validateResults = append(validateResults, validataResult) + parsedValidators = append(parsedValidators, parsedValidator) } - return validateResults, nil + + return parsedValidators, nil } diff --git a/step_websocket.go b/step_websocket.go index 91702334..04dc925a 100644 --- a/step_websocket.go +++ b/step_websocket.go @@ -381,7 +381,7 @@ func runStepWebSocket(r *SessionRunner, step IStep) (stepResult *StepResult, err StepType: step.Type(), Success: false, ContentSize: 0, - StartTime: start.Unix(), + StartTime: start.UnixMilli(), } defer func() { diff --git a/uixt/ai/asserter.go b/uixt/ai/asserter.go index 8fd0ddf0..58880c51 100644 --- a/uixt/ai/asserter.go +++ b/uixt/ai/asserter.go @@ -10,10 +10,10 @@ import ( "github.com/cloudwego/eino/schema" "github.com/getkin/kin-openapi/openapi3gen" "github.com/httprunner/httprunner/v5/code" - "github.com/httprunner/httprunner/v5/internal/json" "github.com/httprunner/httprunner/v5/uixt/option" "github.com/httprunner/httprunner/v5/uixt/types" "github.com/pkg/errors" + "github.com/rs/zerolog/log" ) // IAsserter interface defines the contract for assertion operations @@ -160,15 +160,13 @@ func validateAssertionInput(opts *AssertOptions) error { // parseAssertionResult parses the model response into AssertionResponse func parseAssertionResult(content string) (*AssertionResult, error) { - // Extract JSON content from response - jsonContent := extractJSONFromContent(content) - if jsonContent == "" { - return nil, errors.New("could not extract JSON from response") - } - - // Parse JSON response var result AssertionResult - if err := json.Unmarshal([]byte(jsonContent), &result); err != nil { + + // Use the generic structured response parser + if err := parseStructuredResponse(content, &result); err != nil { + log.Warn(). + Interface("original_content", content). + Msg("parse assertion result failed") return nil, errors.Wrap(code.LLMParseAssertionResponseError, err.Error()) } diff --git a/uixt/ai/asserter_test.go b/uixt/ai/asserter_test.go index 9012260a..d9bdaaba 100644 --- a/uixt/ai/asserter_test.go +++ b/uixt/ai/asserter_test.go @@ -104,3 +104,46 @@ func TestInvalidParameters(t *testing.T) { }) } } + +// Test the main parseAssertionResult function with problematic input +func TestParseAssertionResult(t *testing.T) { + tests := []struct { + name string + input string + shouldSucceed bool + }{ + { + name: "valid JSON response", + input: `{"pass": true, "thought": "Assertion passed"}`, + shouldSucceed: true, + }, + { + name: "response with UTF-8 replacement characters", + input: "浅蓝色的搜索框,里面显示着输入的\"ma\",而\ufffd\ufffd且在搜索框的右上角有一个喇叭 {\"pass\": true, \"thought\": \"found search box\"}", + shouldSucceed: true, + }, + { + name: "malformed JSON with extraction", + input: `malformed start {"pass": true, "thought": "extracted successfully"} malformed end`, + shouldSucceed: true, + }, + { + name: "completely malformed but analyzable", + input: "This assertion test passed and was successful", + shouldSucceed: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := parseAssertionResult(tt.input) + if tt.shouldSucceed { + require.NoError(t, err) + assert.NotNil(t, result) + assert.NotEmpty(t, result.Thought) + } else { + assert.Error(t, err) + } + }) + } +} diff --git a/uixt/ai/parser_default.go b/uixt/ai/parser_default.go index 69dcb4ab..5169dc1a 100644 --- a/uixt/ai/parser_default.go +++ b/uixt/ai/parser_default.go @@ -5,7 +5,6 @@ import ( "strings" "github.com/cloudwego/eino/schema" - "github.com/httprunner/httprunner/v5/internal/json" "github.com/httprunner/httprunner/v5/uixt/option" "github.com/httprunner/httprunner/v5/uixt/types" "github.com/pkg/errors" @@ -48,20 +47,9 @@ func (p *JSONContentParser) SystemPrompt() string { func (p *JSONContentParser) Parse(content string, size types.Size) (*PlanningResult, error) { content = strings.TrimSpace(content) - // Extract JSON content from markdown code blocks - jsonContent := extractJSONFromContent(content) - if jsonContent == "" { - return nil, fmt.Errorf("no valid JSON content found in response") - } - - // Define a temporary struct to parse the expected JSON format - var jsonResponse struct { - Actions []Action `json:"actions"` - Thought string `json:"thought"` - Error string `json:"error"` - } - - if err := json.Unmarshal([]byte(jsonContent), &jsonResponse); err != nil { + // Use the generic structured response parser + var jsonResponse PlanningJSONResponse + if err := parseStructuredResponse(content, &jsonResponse); err != nil { return nil, fmt.Errorf("failed to parse VLM response: %v", err) } diff --git a/uixt/ai/planner_prompts.go b/uixt/ai/planner_prompts.go index 57c1704f..64d0b3f4 100644 --- a/uixt/ai/planner_prompts.go +++ b/uixt/ai/planner_prompts.go @@ -41,6 +41,8 @@ var doubao_1_5_ui_tars_action_mapping = map[string]option.ActionName{ "type": option.ACTION_Input, "scroll": option.ACTION_Swipe, // swipe up/down/left/right "wait": option.ACTION_Sleep, + "press_home": option.ACTION_Home, + "press_back": option.ACTION_Back, "finished": option.ACTION_Finished, } @@ -138,5 +140,7 @@ var doubao_1_5_thinking_vision_pro_action_mapping = map[string]option.ActionName "type": option.ACTION_Input, "scroll": option.ACTION_Swipe, // swipe up/down/left/right "wait": option.ACTION_Sleep, + "press_home": option.ACTION_Home, + "press_back": option.ACTION_Back, "finished": option.ACTION_Finished, } diff --git a/uixt/ai/querier.go b/uixt/ai/querier.go index 02f3676e..75d5dbc3 100644 --- a/uixt/ai/querier.go +++ b/uixt/ai/querier.go @@ -169,24 +169,14 @@ func validateQueryInput(opts *QueryOptions) error { // parseQueryResult parses the model response into QueryResult func parseQueryResult(content string) (*QueryResult, error) { - // Extract JSON content from response - jsonContent := extractJSONFromContent(content) - if jsonContent == "" { - // If no JSON found, treat the entire content as the result - // This handles cases where the model returns plain text instead of JSON - return &QueryResult{ - Content: content, - Thought: "Direct response from model", - }, nil - } - - // Parse JSON response var result QueryResult - if err := json.Unmarshal([]byte(jsonContent), &result); err != nil { - // If JSON parsing fails, treat the content as plain text result + + // Use the generic structured response parser with enhanced error recovery + if err := parseStructuredResponse(content, &result); err != nil { + // If parseStructuredResponse fails completely, treat content as plain text return &QueryResult{ Content: content, - Thought: "Failed to parse as JSON, returning raw content", + Thought: "Failed to parse response, returning raw content", }, nil } diff --git a/uixt/ai/querier_test.go b/uixt/ai/querier_test.go index 38ecdc00..3793f7bf 100644 --- a/uixt/ai/querier_test.go +++ b/uixt/ai/querier_test.go @@ -95,38 +95,35 @@ func TestParseQueryResult(t *testing.T) { expected *QueryResult }{ { - name: "valid JSON response", - content: `{ - "content": "这是一个14行8列的连连看游戏界面,包含25种不同的图案", - "thought": "通过分析图片,我识别出了游戏界面的结构和图案类型" - }`, + name: "valid JSON response", + content: `{"content": "extracted information", "thought": "analysis complete"}`, expected: &QueryResult{ - Content: "这是一个14行8列的连连看游戏界面,包含25种不同的图案", - Thought: "通过分析图片,我识别出了游戏界面的结构和图案类型", + Content: "extracted information", + Thought: "analysis complete", }, }, { name: "JSON in markdown", - content: "```json\n{\n \"content\": \"游戏界面分析结果\",\n \"thought\": \"分析过程\"\n}\n```", + content: "```json\n{\"content\": \"data from markdown\", \"thought\": \"parsed from code block\"}\n```", expected: &QueryResult{ - Content: "游戏界面分析结果", - Thought: "分析过程", + Content: "data from markdown", + Thought: "parsed from code block", }, }, { name: "plain text response", - content: "这是一个连连看游戏界面,包含多种图案。", + content: "This is just plain text without JSON structure", expected: &QueryResult{ - Content: "这是一个连连看游戏界面,包含多种图案。", - Thought: "Direct response from model", + Content: "This is just plain text without JSON structure", + Thought: "Failed to parse as JSON, returning raw content", }, }, { name: "invalid JSON", content: `{"content": "incomplete json", "missing_closing_brace": true`, expected: &QueryResult{ - Content: `{"content": "incomplete json", "missing_closing_brace": true`, - Thought: "Direct response from model", + Content: "incomplete json", + Thought: "Partial extraction from malformed response", }, }, } diff --git a/uixt/ai/session.go b/uixt/ai/session.go index d5707fe3..ced09e09 100644 --- a/uixt/ai/session.go +++ b/uixt/ai/session.go @@ -73,6 +73,11 @@ func (h *ConversationHistory) Clear() { log.Warn().Msg("conversation history cleared completely") } +const ( + LOG_REQUEST_MESSAGES = "log request messages" + LOG_RESPONSE_MESSAGE = "log response message" +) + func logRequest(messages ConversationHistory) { msgs := make(ConversationHistory, 0, len(messages)) for _, message := range messages { @@ -99,7 +104,7 @@ func logRequest(messages ConversationHistory) { } msgs = append(msgs, msg) } - log.Debug().Interface("messages", msgs).Msg("log request messages") + log.Debug().Interface("messages", msgs).Msg(LOG_REQUEST_MESSAGES) } func logResponse(message *schema.Message) { @@ -126,5 +131,5 @@ func logResponse(message *schema.Message) { if message.Extra != nil { logger = logger.Interface("extra", message.Extra) } - logger.Msg("log response message") + logger.Msg(LOG_RESPONSE_MESSAGE) } diff --git a/uixt/ai/utils.go b/uixt/ai/utils.go index 572b705e..1a6b1748 100644 --- a/uixt/ai/utils.go +++ b/uixt/ai/utils.go @@ -2,6 +2,7 @@ package ai import ( "context" + "fmt" "regexp" "strings" "time" @@ -11,9 +12,57 @@ import ( "github.com/cloudwego/eino/schema" "github.com/rs/zerolog/log" + "github.com/httprunner/httprunner/v5/internal/json" "github.com/httprunner/httprunner/v5/uixt/option" + "github.com/pkg/errors" ) +// PlanningJSONResponse represents the JSON response structure for planning +type PlanningJSONResponse struct { + Actions []Action `json:"actions"` + Thought string `json:"thought"` + Error string `json:"error"` +} + +// parseStructuredResponse parses model response into structured format with error recovery +func parseStructuredResponse(content string, result interface{}) error { + // Clean and validate UTF-8 content first + cleanContent := sanitizeUTF8Content(content) + + // Extract JSON content from response + jsonContent := extractJSONFromContent(cleanContent) + if jsonContent == "" { + // If JSON extraction failed, try parsing the content directly as a fallback + jsonContent = cleanContent + } + + // Parse JSON response with error recovery + return parseJSONWithFallback(jsonContent, result) +} + +// sanitizeUTF8Content cleans invalid UTF-8 characters from content +func sanitizeUTF8Content(content string) string { + if utf8.ValidString(content) { + return content + } + + // Convert to bytes and filter out invalid UTF-8 sequences + bytes := []byte(content) + var validBytes []byte + + for len(bytes) > 0 { + r, size := utf8.DecodeRune(bytes) + if r != utf8.RuneError { + // Valid rune, keep it + validBytes = append(validBytes, bytes[:size]...) + } + // Skip invalid bytes (including RuneError) + bytes = bytes[size:] + } + + return string(validBytes) +} + // extractJSONFromContent extracts JSON content from various formats in the response // This function handles multiple formats: // 1. ```json ... ``` markdown code blocks @@ -111,6 +160,294 @@ func extractJSONFromContent(content string) string { return "" } +// parseJSONWithFallback attempts to parse JSON with multiple strategies for any struct type +func parseJSONWithFallback(jsonContent string, result interface{}) error { + // Strategy 1: Direct JSON unmarshaling + if err := json.Unmarshal([]byte(jsonContent), result); err == nil { + // For specific types, ensure required fields have default values even after successful parsing + switch v := result.(type) { + case *QueryResult: + // Ensure QueryResult has meaningful defaults for empty fields + if v.Content == "" && v.Thought == "" { + v.Content = "Empty response content" + v.Thought = "No content extracted from response" + } else if v.Content == "" { + v.Content = "No content extracted" + } else if v.Thought == "" { + v.Thought = "Successfully parsed structured response" + } + case *AssertionResult: + // Ensure AssertionResult has meaningful defaults + if v.Thought == "" { + v.Thought = "Successfully parsed assertion response" + } + } + return nil + } + + // Strategy 2: Try cleaning JSON content and parse again + cleanedJSON := cleanJSONContent(jsonContent) + if err := json.Unmarshal([]byte(cleanedJSON), result); err == nil { + // Apply the same default value logic for cleaned JSON + switch v := result.(type) { + case *QueryResult: + if v.Content == "" && v.Thought == "" { + v.Content = "Empty response content" + v.Thought = "No content extracted from response" + } else if v.Content == "" { + v.Content = "No content extracted" + } else if v.Thought == "" { + v.Thought = "Successfully parsed structured response" + } + case *AssertionResult: + if v.Thought == "" { + v.Thought = "Successfully parsed assertion response" + } + } + return nil + } + + // Strategy 3: For specific types, try manual extraction or content analysis + switch v := result.(type) { + case *AssertionResult: + if fallbackResult, err := extractAssertionFieldsManually(jsonContent); err == nil { + *v = *fallbackResult + return nil + } + // Final fallback for assertions: content analysis + *v = *analyzeContentForAssertion(jsonContent) + return nil + + case *QueryResult: + // For QueryResult, try basic field extraction + if fallbackResult, err := extractQueryFieldsManually(jsonContent); err == nil { + *v = *fallbackResult + return nil + } + // Fallback to treating content as plain text + *v = QueryResult{ + Content: jsonContent, + Thought: "Failed to parse as JSON, returning raw content", + } + return nil + + case *PlanningJSONResponse: + // For PlanningJSONResponse, try basic field extraction + if fallbackResult, err := extractPlanningFieldsManually(jsonContent); err == nil { + *v = *fallbackResult + return nil + } + // Fallback with empty actions but preserve any recognizable thought content + *v = PlanningJSONResponse{ + Actions: []Action{}, + Thought: "Failed to parse structured response", + Error: "JSON parsing failed, returning minimal structure", + } + return nil + } + + return errors.New("failed to parse JSON with all strategies") +} + +// extractAssertionFieldsManually extracts pass and thought fields from text +func extractAssertionFieldsManually(content string) (*AssertionResult, error) { + result := &AssertionResult{} + + // Try to extract "pass" field + if strings.Contains(strings.ToLower(content), `"pass":true`) || + strings.Contains(strings.ToLower(content), `"pass": true`) { + result.Pass = true + } else if strings.Contains(strings.ToLower(content), `"pass":false`) || + strings.Contains(strings.ToLower(content), `"pass": false`) { + result.Pass = false + } else { + return nil, errors.New("cannot extract pass field") + } + + // Try to extract "thought" field + thoughtStart := strings.Index(content, `"thought"`) + if thoughtStart != -1 { + thoughtSection := content[thoughtStart:] + colonIndex := strings.Index(thoughtSection, ":") + if colonIndex != -1 { + afterColon := strings.TrimSpace(thoughtSection[colonIndex+1:]) + if strings.HasPrefix(afterColon, `"`) { + // Find the matching closing quote, handling escaped quotes + thoughtContent := extractQuotedString(afterColon) + result.Thought = thoughtContent + } + } + } + + return result, nil +} + +// extractQuotedString extracts content from a quoted string, handling escaped quotes +func extractQuotedString(s string) string { + if !strings.HasPrefix(s, `"`) { + return "" + } + + s = s[1:] // Remove opening quote + var result strings.Builder + escaped := false + + for _, r := range s { + if escaped { + result.WriteRune(r) + escaped = false + continue + } + + if r == '\\' { + escaped = true + continue + } + + if r == '"' { + // Found closing quote + return result.String() + } + + result.WriteRune(r) + } + + return result.String() +} + +// cleanJSONContent removes common JSON formatting issues +func cleanJSONContent(content string) string { + // Remove any non-printable characters + cleaned := strings.Map(func(r rune) rune { + if r >= 32 && r < 127 || r > 127 { // Keep printable ASCII and Unicode + return r + } + return -1 // Remove non-printable characters + }, content) + + // Remove any trailing commas before closing braces/brackets + cleaned = strings.ReplaceAll(cleaned, ",}", "}") + cleaned = strings.ReplaceAll(cleaned, ",]", "]") + + return cleaned +} + +// analyzeContentForAssertion creates a fallback result by analyzing content +func analyzeContentForAssertion(content string) *AssertionResult { + content = strings.ToLower(content) + + // Simple heuristic: look for positive/negative indicators + positiveIndicators := []string{"true", "pass", "success", "correct", "valid", "match"} + negativeIndicators := []string{"false", "fail", "error", "incorrect", "invalid", "mismatch"} + + positiveCount := 0 + negativeCount := 0 + + for _, indicator := range positiveIndicators { + if strings.Contains(content, indicator) { + positiveCount++ + } + } + + for _, indicator := range negativeIndicators { + if strings.Contains(content, indicator) { + negativeCount++ + } + } + + pass := positiveCount > negativeCount + thought := fmt.Sprintf("Fallback analysis of malformed response (positive: %d, negative: %d)", + positiveCount, negativeCount) + + return &AssertionResult{ + Pass: pass, + Thought: thought, + } +} + +// extractQueryFieldsManually extracts content and thought fields for QueryResult +func extractQueryFieldsManually(content string) (*QueryResult, error) { + result := &QueryResult{} + + // Try to extract "content" field + if contentStart := strings.Index(content, `"content"`); contentStart != -1 { + contentSection := content[contentStart:] + if colonIndex := strings.Index(contentSection, ":"); colonIndex != -1 { + afterColon := strings.TrimSpace(contentSection[colonIndex+1:]) + if strings.HasPrefix(afterColon, `"`) { + result.Content = extractQuotedString(afterColon) + } + } + } + + // Try to extract "thought" field + if thoughtStart := strings.Index(content, `"thought"`); thoughtStart != -1 { + thoughtSection := content[thoughtStart:] + if colonIndex := strings.Index(thoughtSection, ":"); colonIndex != -1 { + afterColon := strings.TrimSpace(thoughtSection[colonIndex+1:]) + if strings.HasPrefix(afterColon, `"`) { + result.Thought = extractQuotedString(afterColon) + } + } + } + + // If we couldn't extract any fields, return error + if result.Content == "" && result.Thought == "" { + return nil, errors.New("cannot extract content or thought fields") + } + + // Set defaults for missing fields (ALWAYS set defaults if any field was extracted) + if result.Content == "" { + result.Content = "Extracted partial information" + } + if result.Thought == "" { + result.Thought = "Partial extraction from malformed response" + } + + return result, nil +} + +// extractPlanningFieldsManually extracts thought and error fields for PlanningJSONResponse +func extractPlanningFieldsManually(content string) (*PlanningJSONResponse, error) { + result := &PlanningJSONResponse{ + Actions: []Action{}, // Default to empty actions + } + + // Try to extract "thought" field + if thoughtStart := strings.Index(content, `"thought"`); thoughtStart != -1 { + thoughtSection := content[thoughtStart:] + if colonIndex := strings.Index(thoughtSection, ":"); colonIndex != -1 { + afterColon := strings.TrimSpace(thoughtSection[colonIndex+1:]) + if strings.HasPrefix(afterColon, `"`) { + result.Thought = extractQuotedString(afterColon) + } + } + } + + // Try to extract "error" field + if errorStart := strings.Index(content, `"error"`); errorStart != -1 { + errorSection := content[errorStart:] + if colonIndex := strings.Index(errorSection, ":"); colonIndex != -1 { + afterColon := strings.TrimSpace(errorSection[colonIndex+1:]) + if strings.HasPrefix(afterColon, `"`) { + result.Error = extractQuotedString(afterColon) + } + } + } + + // If we couldn't extract any meaningful fields, return error + if result.Thought == "" && result.Error == "" { + return nil, errors.New("cannot extract thought or error fields") + } + + // Set defaults for missing fields + if result.Thought == "" { + result.Thought = "Partial extraction from malformed response" + } + + return result, nil +} + // callModelWithLogging is a common function to call model with logging and timing // It handles the common pattern of: // 1. Log request diff --git a/uixt/ai/utils_test.go b/uixt/ai/utils_test.go index 6a6a38ea..0a43c044 100644 --- a/uixt/ai/utils_test.go +++ b/uixt/ai/utils_test.go @@ -4,195 +4,701 @@ import ( "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestExtractJSONFromContent(t *testing.T) { tests := []struct { name string - content string + input string expected string }{ { - name: "simple JSON", - content: `{ - "actions": [ - { - "action_type": "click", - "action_inputs": { - "start_box": [371, 235, 425, 270] - } - } - ], - "thought": "点击桌面上的抖音应用图标以启动抖音", - "error": null -}`, - expected: `{ - "actions": [ - { - "action_type": "click", - "action_inputs": { - "start_box": [371, 235, 425, 270] - } - } - ], - "thought": "点击桌面上的抖音应用图标以启动抖音", - "error": null -}`, + name: "simple JSON object", + input: `{"key": "value"}`, + expected: `{"key": "value"}`, }, { - name: "JSON with Chinese characters in strings", - content: `{ - "actions": [ - { - "action_type": "type", - "action_inputs": { - "content": "2048经典" - } - } - ], - "thought": "搜索框已经清空了,现在我要输入\"2048经典\"这个关键词。看到键盘已经弹出来了,正好可以直接开始输入。这样一来,就能找到我们想要玩的那个小游戏了。", - "error": null -}`, - expected: `{ - "actions": [ - { - "action_type": "type", - "action_inputs": { - "content": "2048经典" - } - } - ], - "thought": "搜索框已经清空了,现在我要输入\"2048经典\"这个关键词。看到键盘已经弹出来了,正好可以直接开始输入。这样一来,就能找到我们想要玩的那个小游戏了。", - "error": null -}`, + name: "JSON in markdown code block", + input: "```json\n{\"key\": \"value\"}\n```", + expected: `{"key": "value"}`, }, { - name: "JSON with markdown wrapper", - content: "```json\n" + `{ - "actions": [ - { - "action_type": "click", - "action_inputs": { - "start_box": [100, 200, 150, 250] - } - } - ], - "thought": "点击按钮", - "error": null -}` + "\n```", - expected: `{ - "actions": [ - { - "action_type": "click", - "action_inputs": { - "start_box": [100, 200, 150, 250] - } - } - ], - "thought": "点击按钮", - "error": null -}`, + name: "JSON in code block without language", + input: "```\n{\"key\": \"value\"}\n```", + expected: `{"key": "value"}`, }, { - name: "JSON embedded in text with Chinese", - content: `这是一个包含中文的响应:{ - "actions": [ - { - "action_type": "type", - "action_inputs": { - "content": "测试内容" - } - } - ], - "thought": "这是一个测试思路", - "error": null -} 后面还有一些文本`, - expected: `{ - "actions": [ - { - "action_type": "type", - "action_inputs": { - "content": "测试内容" - } - } - ], - "thought": "这是一个测试思路", - "error": null -}`, + name: "JSON with surrounding text", + input: `Here is the result: {"key": "value"} and some more text`, + expected: `{"key": "value"}`, }, { - name: "JSON with escaped quotes and Chinese", - content: `{ - "actions": [ - { - "action_type": "type", - "action_inputs": { - "content": "他说:\"你好,世界!\"" - } - } - ], - "thought": "输入包含引号的中文文本", - "error": null -}`, - expected: `{ - "actions": [ - { - "action_type": "type", - "action_inputs": { - "content": "他说:\"你好,世界!\"" - } - } - ], - "thought": "输入包含引号的中文文本", - "error": null -}`, + name: "multiple JSON objects", + input: `{"first": "object"} and {"second": "object"}`, + expected: `{"first": "object"}`, }, { - name: "no JSON content", - content: "这只是一些普通的文本,没有JSON内容", + name: "nested JSON in markdown", + input: "```json\n{\"data\": {\"nested\": \"value\"}}\n```", + expected: `{"data": {"nested": "value"}}`, + }, + { + name: "JSON array", + input: `[{"item": 1}, {"item": 2}]`, + expected: `[{"item": 1}, {"item": 2}]`, + }, + { + name: "JSON array in markdown", + input: "```json\n[{\"item\": 1}, {\"item\": 2}]\n```", + expected: `[{"item": 1}, {"item": 2}]`, + }, + { + name: "text without JSON", + input: "This is just plain text without any JSON", expected: "", }, { - name: "nested JSON objects with Chinese", - content: `{ - "actions": [ - { - "action_type": "click", - "action_inputs": { - "start_box": [100, 200, 150, 250], - "metadata": { - "description": "点击操作", - "target": "按钮" - } - } - } - ], - "thought": "执行嵌套对象的点击操作", - "error": null -}`, - expected: `{ - "actions": [ - { - "action_type": "click", - "action_inputs": { - "start_box": [100, 200, 150, 250], - "metadata": { - "description": "点击操作", - "target": "按钮" - } - } - } - ], - "thought": "执行嵌套对象的点击操作", - "error": null -}`, + name: "malformed JSON", + input: `{"key": "value"`, + expected: `{"key": "value"`, + }, + { + name: "JSON with unicode", + input: `{"message": "测试消息"}`, + expected: `{"message": "测试消息"}`, + }, + { + name: "multiple code blocks, select first JSON", + input: "First block:\n```json\n{\"first\": true}\n```\nSecond block:\n```json\n{\"second\": true}\n```", + expected: `{"first": true}`, + }, + { + name: "mixed language code blocks", + input: "```python\nprint('hello')\n```\n```json\n{\"key\": \"value\"}\n```", + expected: `{"key": "value"}`, + }, + { + name: "JSON with special characters", + input: `{"special": "chars: @#$%^&*()"}`, + expected: `{"special": "chars: @#$%^&*()"}`, + }, + { + name: "empty JSON object", + input: `{}`, + expected: `{}`, + }, + { + name: "empty JSON array", + input: `[]`, + expected: `[]`, + }, + { + name: "JSON with line breaks", + input: "{\n \"key\": \"value\",\n \"number\": 123\n}", + expected: "{\n \"key\": \"value\",\n \"number\": 123\n}", + }, + { + name: "markdown with extra whitespace", + input: " ```json \n {\"key\": \"value\"} \n ``` ", + expected: `{"key": "value"}`, + }, + { + name: "code block with tildes", + input: "~~~json\n{\"key\": \"value\"}\n~~~", + expected: `{"key": "value"}`, + }, + { + name: "JSON after other text patterns", + input: `The response should be formatted as: {"status": "success"}`, + expected: `{"status": "success"}`, + }, + { + name: "JSON in mixed content", + input: `Analysis complete. Result: {"analysis": "positive", "confidence": 0.95} - End of analysis.`, + expected: `{"analysis": "positive", "confidence": 0.95}`, + }, + { + name: "complex nested JSON", + input: `{"outer": {"inner": {"deep": "value", "numbers": [1, 2, 3]}}}`, + expected: `{"outer": {"inner": {"deep": "value", "numbers": [1, 2, 3]}}}`, + }, + { + name: "JSON with escaped quotes", + input: `{"message": "He said \"Hello\" to me"}`, + expected: `{"message": "He said \"Hello\" to me"}`, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - result := extractJSONFromContent(tt.content) + result := extractJSONFromContent(tt.input) assert.Equal(t, tt.expected, result) }) } } + +func TestSanitizeUTF8Content(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "valid UTF-8", + input: "Hello 世界", + expected: "Hello 世界", + }, + { + name: "invalid UTF-8 with replacement characters", + input: "Hello \ufffd\ufffd World", + expected: "Hello World", + }, + { + name: "mixed valid and invalid", + input: "测试\ufffd消息\ufffd", + expected: "测试消息", + }, + { + name: "only replacement characters", + input: "\ufffd\ufffd\ufffd", + expected: "", + }, + { + name: "empty string", + input: "", + expected: "", + }, + { + name: "ASCII only", + input: "Hello World 123", + expected: "Hello World 123", + }, + { + name: "JSON with UTF-8 issues", + input: `{"message": "搜索框\ufffd\ufffd显示"}`, + expected: `{"message": "搜索框显示"}`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := sanitizeUTF8Content(tt.input) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestParseJSONWithFallback(t *testing.T) { + tests := []struct { + name string + input string + expectedValid bool + expectedPass bool + expectedThought string + }{ + { + name: "valid JSON", + input: `{"pass": true, "thought": "test passed"}`, + expectedValid: true, + expectedPass: true, + expectedThought: "test passed", + }, + { + name: "valid JSON with false", + input: `{"pass": false, "thought": "test failed"}`, + expectedValid: true, + expectedPass: false, + expectedThought: "test failed", + }, + { + name: "malformed JSON with extractable fields", + input: `malformed start {"pass": true, "thought": "extracted"} end`, + expectedValid: true, + expectedPass: true, + expectedThought: "extracted", + }, + { + name: "content analysis fallback - positive", + input: `The test was successful and passed with true result`, + expectedValid: true, + expectedPass: true, + expectedThought: "Fallback analysis of malformed response (positive: 3, negative: 0)", + }, + { + name: "content analysis fallback - negative", + input: `The test failed with false result and error occurred`, + expectedValid: true, + expectedPass: false, + expectedThought: "Fallback analysis of malformed response (positive: 0, negative: 3)", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var result AssertionResult + err := parseJSONWithFallback(tt.input, &result) + + if tt.expectedValid { + assert.NoError(t, err) + assert.Equal(t, tt.expectedPass, result.Pass) + assert.Equal(t, tt.expectedThought, result.Thought) + } else { + assert.Error(t, err) + } + }) + } +} + +func TestExtractAssertionFieldsManually(t *testing.T) { + tests := []struct { + name string + input string + expectedPass bool + expectedThought string + shouldError bool + }{ + { + name: "pass true", + input: `{"pass": true, "thought": "manual test"}`, + expectedPass: true, + expectedThought: "manual test", + shouldError: false, + }, + { + name: "pass false", + input: `{"pass": false, "thought": "manual fail"}`, + expectedPass: false, + expectedThought: "manual fail", + shouldError: false, + }, + { + name: "no pass field", + input: `{"thought": "no pass field"}`, + shouldError: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := extractAssertionFieldsManually(tt.input) + if tt.shouldError { + assert.Error(t, err) + } else { + assert.NoError(t, err) + assert.Equal(t, tt.expectedPass, result.Pass) + assert.Equal(t, tt.expectedThought, result.Thought) + } + }) + } +} + +func TestExtractQuotedString(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "simple quoted string", + input: `"hello world"`, + expected: "hello world", + }, + { + name: "quoted string with escaped quotes", + input: `"He said \"Hello\""`, + expected: `He said "Hello"`, + }, + { + name: "quoted string with escaped backslash", + input: `"path\\to\\file"`, + expected: `path\to\file`, + }, + { + name: "empty quoted string", + input: `""`, + expected: "", + }, + { + name: "quoted string with unicode", + input: `"测试消息"`, + expected: "测试消息", + }, + { + name: "not a quoted string", + input: "hello world", + expected: "", + }, + { + name: "unclosed quoted string", + input: `"unclosed string`, + expected: "unclosed string", + }, + { + name: "quoted string with extra content after", + input: `"content" and more`, + expected: "content", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := extractQuotedString(tt.input) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestCleanJSONContent(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "remove trailing comma in object", + input: `{"key": "value",}`, + expected: `{"key": "value"}`, + }, + { + name: "remove trailing comma in array", + input: `["item1", "item2",]`, + expected: `["item1", "item2"]`, + }, + { + name: "clean non-printable characters", + input: "{\n\"key\": \"value\"\u0000\u0001}", + expected: "{\n\"key\": \"value\"}", + }, + { + name: "preserve unicode characters", + input: `{"message": "测试消息"}`, + expected: `{"message": "测试消息"}`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := cleanJSONContent(tt.input) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestAnalyzeContentForAssertion(t *testing.T) { + tests := []struct { + name string + input string + expectedPass bool + }{ + { + name: "positive indicators", + input: "The test was successful and passed", + expectedPass: true, + }, + { + name: "negative indicators", + input: "The test failed with error", + expectedPass: false, + }, + { + name: "mixed with more positive", + input: "Some errors occurred but overall test passed successfully", + expectedPass: true, + }, + { + name: "no clear indicators", + input: "This is just plain text", + expectedPass: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := analyzeContentForAssertion(tt.input) + assert.Equal(t, tt.expectedPass, result.Pass) + assert.NotEmpty(t, result.Thought) + }) + } +} + +func TestParseStructuredResponse(t *testing.T) { + tests := []struct { + name string + input string + shouldSucceed bool + }{ + { + name: "valid AssertionResult JSON", + input: `{"pass": true, "thought": "test passed"}`, + shouldSucceed: true, + }, + { + name: "malformed JSON with extractable fields", + input: `malformed start {"pass": false, "thought": "extracted thought"} end`, + shouldSucceed: true, + }, + { + name: "UTF-8 issues with JSON", + input: "测试结果:\ufffd\ufffd {\"pass\": true, \"thought\": \"处理完成\"}", + shouldSucceed: true, + }, + { + name: "content analysis fallback", + input: "The assertion was successful and passed correctly", + shouldSucceed: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var result AssertionResult + err := parseStructuredResponse(tt.input, &result) + if tt.shouldSucceed { + require.NoError(t, err) + assert.NotEmpty(t, result.Thought) + } else { + assert.Error(t, err) + } + }) + } +} + +// Add more test cases for different struct types +func TestParseJSONWithFallback_QueryResult(t *testing.T) { + tests := []struct { + name string + input string + expectedValid bool + expectedContent string + expectedThought string + }{ + { + name: "valid QueryResult JSON", + input: `{"content": "extracted info", "thought": "analysis complete"}`, + expectedValid: true, + expectedContent: "extracted info", + expectedThought: "analysis complete", + }, + { + name: "malformed QueryResult with extractable fields", + input: `malformed { "content": "partial info", "thought": "partial analysis" } more text`, + expectedValid: true, + expectedContent: "partial info", + expectedThought: "partial analysis", + }, + { + name: "completely malformed QueryResult", + input: `This is just plain text with no structure`, + expectedValid: true, + expectedContent: "This is just plain text with no structure", + expectedThought: "Failed to parse as JSON, returning raw content", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var result QueryResult + err := parseJSONWithFallback(tt.input, &result) + + if tt.expectedValid { + assert.NoError(t, err) + assert.Equal(t, tt.expectedContent, result.Content) + assert.Equal(t, tt.expectedThought, result.Thought) + } else { + assert.Error(t, err) + } + }) + } +} + +func TestParseJSONWithFallback_PlanningResponse(t *testing.T) { + tests := []struct { + name string + input string + expectedValid bool + expectedThought string + expectedError string + expectedActions int + }{ + { + name: "valid PlanningJSONResponse", + input: `{"actions": [{"action_type": "click"}], "thought": "planning complete", "error": ""}`, + expectedValid: true, + expectedThought: "planning complete", + expectedError: "", + expectedActions: 1, + }, + { + name: "malformed PlanningResponse with extractable thought", + input: `malformed { "thought": "partial planning" } more text`, + expectedValid: true, + expectedThought: "partial planning", + expectedActions: 0, + }, + { + name: "completely malformed PlanningResponse", + input: `This is just plain text with no structure`, + expectedValid: true, + expectedThought: "Failed to parse structured response", + expectedError: "JSON parsing failed, returning minimal structure", + expectedActions: 0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var result PlanningJSONResponse + err := parseJSONWithFallback(tt.input, &result) + + if tt.expectedValid { + assert.NoError(t, err) + assert.Equal(t, tt.expectedThought, result.Thought) + assert.Equal(t, tt.expectedError, result.Error) + assert.Len(t, result.Actions, tt.expectedActions) + } else { + assert.Error(t, err) + } + }) + } +} + +func TestExtractQueryFieldsManually(t *testing.T) { + tests := []struct { + name string + input string + expectedContent string + expectedThought string + shouldError bool + }{ + { + name: "both content and thought", + input: `{"content": "test content", "thought": "test thought"}`, + expectedContent: "test content", + expectedThought: "test thought", + shouldError: false, + }, + { + name: "only content", + input: `{"content": "only content"}`, + expectedContent: "only content", + expectedThought: "Partial extraction from malformed response", + shouldError: false, + }, + { + name: "only thought", + input: `{"thought": "only thought"}`, + expectedContent: "Extracted partial information", + expectedThought: "only thought", + shouldError: false, + }, + { + name: "no extractable fields", + input: `{"other": "data"}`, + shouldError: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := extractQueryFieldsManually(tt.input) + if tt.shouldError { + assert.Error(t, err) + } else { + assert.NoError(t, err) + assert.Equal(t, tt.expectedContent, result.Content) + assert.Equal(t, tt.expectedThought, result.Thought) + } + }) + } +} + +func TestExtractPlanningFieldsManually(t *testing.T) { + tests := []struct { + name string + input string + expectedThought string + expectedError string + shouldError bool + }{ + { + name: "both thought and error", + input: `{"thought": "test planning", "error": "test error"}`, + expectedThought: "test planning", + expectedError: "test error", + shouldError: false, + }, + { + name: "only thought", + input: `{"thought": "only planning"}`, + expectedThought: "only planning", + expectedError: "", + shouldError: false, + }, + { + name: "only error", + input: `{"error": "only error"}`, + expectedThought: "Partial extraction from malformed response", + expectedError: "only error", + shouldError: false, + }, + { + name: "no extractable fields", + input: `{"other": "data"}`, + shouldError: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := extractPlanningFieldsManually(tt.input) + if tt.shouldError { + assert.Error(t, err) + } else { + assert.NoError(t, err) + assert.Equal(t, tt.expectedThought, result.Thought) + assert.Equal(t, tt.expectedError, result.Error) + assert.NotNil(t, result.Actions) // Should always be initialized + } + }) + } +} + +// Test the integrated parseStructuredResponse with QueryResult +func TestParseStructuredResponse_QueryResult(t *testing.T) { + tests := []struct { + name string + input string + shouldSucceed bool + }{ + { + name: "valid QueryResult JSON", + input: `{"content": "extracted data", "thought": "processing complete"}`, + shouldSucceed: true, + }, + { + name: "QueryResult with UTF-8 issues", + input: "extracted data: 搜索框,里面显示着\ufffd\ufffd {\"content\": \"search box found\", \"thought\": \"visual analysis\"}", + shouldSucceed: true, + }, + { + name: "malformed QueryResult", + input: `malformed start {"content": "partial info"} end`, + shouldSucceed: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var result QueryResult + err := parseStructuredResponse(tt.input, &result) + if tt.shouldSucceed { + require.NoError(t, err) + assert.NotEmpty(t, result.Content, "Content should not be empty") + assert.NotEmpty(t, result.Thought, "Thought should not be empty") + } else { + assert.Error(t, err) + } + }) + } +} diff --git a/uixt/driver_ext_ai.go b/uixt/driver_ext_ai.go index dba564d3..72dc58f9 100644 --- a/uixt/driver_ext_ai.go +++ b/uixt/driver_ext_ai.go @@ -59,7 +59,7 @@ func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...op ModelName: "", Error: err.Error(), }, - StartTime: planningStartTime.Unix(), + StartTime: planningStartTime.UnixMilli(), Elapsed: time.Since(planningStartTime).Milliseconds(), } allPlannings = append(allPlannings, errorResult) @@ -67,7 +67,7 @@ func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...op } // Set planning execution timing - planningResult.StartTime = planningStartTime.Unix() + planningResult.StartTime = planningStartTime.UnixMilli() planningResult.SubActions = []*SubActionResult{} // Check if task is finished BEFORE executing actions @@ -96,7 +96,7 @@ func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...op subActionResult := &SubActionResult{ ActionName: toolCall.Function.Name, Arguments: toolCall.Function.Arguments, - StartTime: subActionStartTime.Unix(), + StartTime: subActionStartTime.UnixMilli(), } // Use defer to ensure sub-action is always processed and added to results @@ -164,7 +164,7 @@ func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts .. // Step 1: Take screenshot screenshotStartTime := time.Now() // Use GetScreenResult to handle screenshot capture, save, and session tracking - screenResult, err := dExt.GetScreenResult( + screenResult, err := dExt.createScreenshotWithSession( option.WithScreenShotFileName(builtin.GenNameWithTimestamp("%d_screenshot")), ) screenshotElapsed := time.Since(screenshotStartTime).Milliseconds() diff --git a/uixt/driver_ext_screenshot.go b/uixt/driver_ext_screenshot.go index e9329f78..ae3e4238 100644 --- a/uixt/driver_ext_screenshot.go +++ b/uixt/driver_ext_screenshot.go @@ -50,27 +50,25 @@ func (s *ScreenResult) FilterTextsByScope(x1, y1, x2, y2 float64) ai.OCRTexts { } // GetScreenshotBase64WithSize takes a screenshot, returns the compressed image buffer in base64 format and screen size +// Also saves the screenshot to session for report display func (dExt *XTDriver) GetScreenshotBase64WithSize() (compressedBufBase64 string, size types.Size, err error) { - compressBufSource, err := getScreenShotBuffer(dExt) + // Create screenshot with session saving, minimal CV processing for AI operations + screenResult, err := dExt.createScreenshotWithSession( + option.WithScreenShotFileName("screenshot_base64"), + ) if err != nil { return "", types.Size{}, err } // convert buffer to base64 string screenShotBase64 := "data:image/jpeg;base64," + - base64.StdEncoding.EncodeToString(compressBufSource.Bytes()) + base64.StdEncoding.EncodeToString(screenResult.bufSource.Bytes()) - // get screen size - size, err = dExt.IDriver.WindowSize() - if err != nil { - return "", types.Size{}, errors.Wrap(err, "get window size failed") - } - - return screenShotBase64, size, nil + return screenShotBase64, screenResult.Resolution, nil } -// GetScreenResult takes a screenshot, returns the image recognition result -func (dExt *XTDriver) GetScreenResult(opts ...option.ActionOption) (screenResult *ScreenResult, err error) { +// createScreenshotWithSession creates a screenshot with optional OCR processing and saves to session +func (dExt *XTDriver) createScreenshotWithSession(opts ...option.ActionOption) (screenResult *ScreenResult, err error) { // get compressed screenshot buffer compressBufSource, err := getScreenShotBuffer(dExt.IDriver) if err != nil { @@ -105,34 +103,40 @@ func (dExt *XTDriver) GetScreenResult(opts ...option.ActionOption) (screenResult return nil, errors.Wrap(code.DeviceGetInfoError, err.Error()) } - // read image from buffer with CV + // create basic screen result screenResult = &ScreenResult{ bufSource: compressBufSource, ImagePath: imagePath, Tags: nil, Resolution: windowSize, } - imageResult, err := dExt.CVService.ReadFromBuffer(compressBufSource, opts...) - if err != nil { - log.Error().Err(err).Msg("ReadFromBuffer from ImageService failed") - return nil, err - } - if imageResult != nil { - screenResult.Texts = imageResult.OCRResult.ToOCRTexts() - screenResult.UploadedURL = imageResult.URL - screenResult.Icons = imageResult.UIResult - if screenshotOptions.ScreenShotWithClosePopups && imageResult.ClosePopupsResult != nil { - screenResult.Popup = &PopupInfo{ - ClosePopupsResult: imageResult.ClosePopupsResult, - PicName: imagePath, - PicURL: imageResult.URL, - } + logger := log.Debug().Str("imagePath", imagePath) + // perform CV processing if any CV-related option is enabled + if needsCVProcessing(screenshotOptions) { + imageResult, err := dExt.CVService.ReadFromBuffer(compressBufSource, opts...) + if err != nil { + log.Error().Err(err).Msg("ReadFromBuffer from ImageService failed") + return nil, err + } + if imageResult != nil { + screenResult.Texts = imageResult.OCRResult.ToOCRTexts() + screenResult.UploadedURL = imageResult.URL + screenResult.Icons = imageResult.UIResult - closeAreas, _ := imageResult.UIResult.FilterUIResults([]string{"close"}) - for _, closeArea := range closeAreas { - screenResult.Popup.ClosePoints = append(screenResult.Popup.ClosePoints, closeArea.Center()) + if screenshotOptions.ScreenShotWithClosePopups && imageResult.ClosePopupsResult != nil { + screenResult.Popup = &PopupInfo{ + ClosePopupsResult: imageResult.ClosePopupsResult, + PicName: imagePath, + PicURL: imageResult.URL, + } + + closeAreas, _ := imageResult.UIResult.FilterUIResults([]string{"close"}) + for _, closeArea := range closeAreas { + screenResult.Popup.ClosePoints = append(screenResult.Popup.ClosePoints, closeArea.Center()) + } } + logger.Str("imageUrl", screenResult.UploadedURL) } } @@ -140,13 +144,28 @@ func (dExt *XTDriver) GetScreenResult(opts ...option.ActionOption) (screenResult session := dExt.GetSession() session.screenResults = append(session.screenResults, screenResult) - log.Debug(). - Str("imagePath", imagePath). - Str("imageUrl", screenResult.UploadedURL). - Msg("log screenshot") + logger.Msg("log screenshot") return screenResult, nil } +// needsCVProcessing determines if CV service processing is required based on screenshot options +func needsCVProcessing(options *option.ActionOptions) bool { + return options.ScreenShotWithOCR || + options.ScreenShotWithUpload || + options.ScreenShotWithLiveType || + options.ScreenShotWithLivePopularity || + len(options.ScreenShotWithUITypes) > 0 || + options.ScreenShotWithClosePopups || + options.ScreenShotWithOCRCluster != "" +} + +// GetScreenResult takes a screenshot, returns the image recognition result +func (dExt *XTDriver) GetScreenResult(opts ...option.ActionOption) (screenResult *ScreenResult, err error) { + // Enable OCR processing for GetScreenResult + opts = append(opts, option.WithScreenShotOCR(true)) + return dExt.createScreenshotWithSession(opts...) +} + func (dExt *XTDriver) GetScreenTexts(opts ...option.ActionOption) (ocrTexts ai.OCRTexts, err error) { options := option.NewActionOptions(opts...) if options.ScreenShotFileName == "" { diff --git a/uixt/sdk.go b/uixt/sdk.go index c871c975..cdcbf65b 100644 --- a/uixt/sdk.go +++ b/uixt/sdk.go @@ -132,7 +132,7 @@ func (dExt *XTDriver) ExecuteAction(ctx context.Context, action option.MobileAct subActionResult := &SubActionResult{ ActionName: string(action.Method), Arguments: action.Params, - StartTime: subActionStartTime.Unix(), + StartTime: subActionStartTime.UnixMilli(), } // Execute via MCP tool