Merge branch 'merge-wings' into 'master'

Html report 中新增展示 AI Query 和 AI Assert 的完整内容 See merge request iesqa/httprunner!101
2026-07-02 21:11:38 +08:00 · 2025-06-19 02:56:24 +00:00
parent d20504f41f c568be5dc2
commit 54b17de98f
24 changed files with 1693 additions and 647 deletions
--- a/internal/sdk/ga4.go
+++ b/internal/sdk/ga4.go
@@ -206,6 +206,6 @@ func SendGA4Event(name string, params map[string]interface{}) {
 	}
 	err := ga4Client.SendEvent(event)
 	if err != nil {
-		log.Error().Err(err).Msg("send GA4 event failed")
+		log.Warn().Err(err).Msg("send GA4 event failed")
 	}
 }
--- a/internal/version/VERSION
+++ b/internal/version/VERSION
@@ -1 +1 @@
-v5.0.0-beta-2506171946
+v5.0.0-beta-2506191048
--- a/report.go
+++ b/report.go
@@ -8,6 +8,7 @@ import (
 	"html/template"
 	"os"
 	"path/filepath"
+	"sort"
 	"strings"
 	"time"

@@ -43,10 +44,11 @@ type HTMLReportGenerator struct {

 // LogEntry represents a single log entry
 type LogEntry struct {
-	Time    string         `json:"time"`
-	Level   string         `json:"level"`
-	Message string         `json:"message"`
-	Fields  map[string]any `json:"-"` // Store all other fields
+	Time     string         `json:"time"`
+	Level    string         `json:"level"`
+	Message  string         `json:"message"`
+	Fields   map[string]any `json:"-"` // Store all other fields
+	LogIndex int            `json:"-"` // Original index to maintain order for same timestamps
 }

 // NewHTMLReportGenerator creates a new HTML report generator
@@ -126,6 +128,7 @@ func (g *HTMLReportGenerator) loadLogData() error {
 	defer file.Close()

 	scanner := bufio.NewScanner(file)
+	logIndex := 0 // Track original order
 	for scanner.Scan() {
 		line := strings.TrimSpace(scanner.Text())
 		if line == "" {
@@ -141,8 +144,10 @@ func (g *HTMLReportGenerator) loadLogData() error {

 		// Create LogEntry with basic fields
 		logEntry := LogEntry{
-			Fields: make(map[string]any),
+			Fields:   make(map[string]any),
+			LogIndex: logIndex, // Store original order
 		}
+		logIndex++

 		// Extract standard fields
 		if time, ok := rawEntry["time"].(string); ok {
@@ -168,36 +173,63 @@ func (g *HTMLReportGenerator) loadLogData() error {
 	return scanner.Err()
 }

-// getStepLogs filters log entries for a specific test step based on time range
+// getStepLogs filters log entries for a specific test step based on step boundaries
 func (g *HTMLReportGenerator) getStepLogs(stepName string, startTime int64, elapsed int64) []LogEntry {
 	if len(g.LogData) == 0 {
 		return nil
 	}

 	var stepLogs []LogEntry
+	var inCurrentStep bool = false

-	// startTime is in seconds, elapsed is in milliseconds
-	// Calculate end time (startTime in seconds + elapsed in milliseconds converted to seconds)
-	endTime := startTime + elapsed/1000
-
-	// Convert Unix timestamps to time.Time for comparison
-	startTimeObj := time.Unix(startTime, 0)
-	endTimeObj := time.Unix(endTime, 0)
-
+	// Simple approach: use step start/end markers for precise boundaries
 	for _, logEntry := range g.LogData {
-		// Parse log entry time
-		logTime, err := g.parseLogTime(logEntry.Time)
-		if err != nil {
-			continue
+		// Check for step boundaries to control inclusion
+		if logEntry.Message == RUN_STEP_START {
+			if stepFieldValue, exists := logEntry.Fields["step"]; exists {
+				if stepFieldValue == stepName {
+					inCurrentStep = true
+					stepLogs = append(stepLogs, logEntry)
+					continue
+				} else if inCurrentStep {
+					// This is a different step starting, we're done
+					break
+				}
+			}
 		}

-		// Check if log entry falls within step time range
-		if (logTime.Equal(startTimeObj) || logTime.After(startTimeObj)) &&
-			(logTime.Equal(endTimeObj) || logTime.Before(endTimeObj)) {
+		if logEntry.Message == RUN_STEP_END {
+			if stepFieldValue, exists := logEntry.Fields["step"]; exists {
+				if stepFieldValue == stepName {
+					stepLogs = append(stepLogs, logEntry)
+					inCurrentStep = false
+					continue
+				}
+			}
+		}
+
+		// Only include logs when we're in the current step
+		if inCurrentStep {
 			stepLogs = append(stepLogs, logEntry)
 		}
 	}

+	// Sort logs by time, then by original index for stable ordering
+	sort.Slice(stepLogs, func(i, j int) bool {
+		timeI, errI := g.parseLogTime(stepLogs[i].Time)
+		timeJ, errJ := g.parseLogTime(stepLogs[j].Time)
+
+		if errI != nil || errJ != nil {
+			return stepLogs[i].LogIndex < stepLogs[j].LogIndex
+		}
+
+		if timeI.Equal(timeJ) {
+			// For same timestamps, use original log index to maintain order
+			return stepLogs[i].LogIndex < stepLogs[j].LogIndex
+		}
+		return timeI.Before(timeJ)
+	})
+
 	return stepLogs
 }

@@ -293,79 +325,39 @@ func (g *HTMLReportGenerator) getStepLogsForTemplate(step *StepResult) []LogEntr

 // calculateTotalActions calculates the total number of actions across all test cases
 func (g *HTMLReportGenerator) calculateTotalActions() int {
-	total := 0
-	if g.SummaryData == nil || g.SummaryData.Details == nil {
-		return total
-	}
-
-	for _, testCase := range g.SummaryData.Details {
-		if testCase.Records == nil {
-			continue
-		}
-		for _, step := range testCase.Records {
-			if step.Actions != nil {
-				total += len(step.Actions)
-			}
-		}
-	}
-	return total
+	return g.iterateTestData(func(action *ActionResult) int {
+		return 1 // Count each action
+	})
 }

 // calculateTotalSubActions calculates the total number of sub-actions across all test cases
 func (g *HTMLReportGenerator) calculateTotalSubActions() int {
-	total := 0
-	if g.SummaryData == nil || g.SummaryData.Details == nil {
-		return total
-	}
-
-	for _, testCase := range g.SummaryData.Details {
-		if testCase.Records == nil {
-			continue
+	return g.iterateTestData(func(action *ActionResult) int {
+		total := 0
+		// Count sub-actions from regular actions
+		if action.SubActions != nil {
+			total += len(action.SubActions)
 		}
-		for _, step := range testCase.Records {
-			if step.Actions != nil {
-				for _, action := range step.Actions {
-					// Count sub-actions from regular actions
-					if action.SubActions != nil {
-						total += len(action.SubActions)
-					}
-					// Count sub-actions from planning results
-					if action.Plannings != nil {
-						for _, planning := range action.Plannings {
-							if planning.SubActions != nil {
-								total += len(planning.SubActions)
-							}
-						}
-					}
+		// Count sub-actions from planning results
+		if action.Plannings != nil {
+			for _, planning := range action.Plannings {
+				if planning.SubActions != nil {
+					total += len(planning.SubActions)
 				}
 			}
 		}
-	}
-	return total
+		return total
+	})
 }

 // calculateTotalPlannings calculates the total number of planning results across all test cases
 func (g *HTMLReportGenerator) calculateTotalPlannings() int {
-	total := 0
-	if g.SummaryData == nil || g.SummaryData.Details == nil {
-		return total
-	}
-
-	for _, testCase := range g.SummaryData.Details {
-		if testCase.Records == nil {
-			continue
+	return g.iterateTestData(func(action *ActionResult) int {
+		if action.Plannings != nil {
+			return len(action.Plannings)
 		}
-		for _, step := range testCase.Records {
-			if step.Actions != nil {
-				for _, action := range step.Actions {
-					if action.Plannings != nil {
-						total += len(action.Plannings)
-					}
-				}
-			}
-		}
-	}
-	return total
+		return 0
+	})
 }

 // calculateTotalUsage calculates the total token usage across all test cases
@@ -406,6 +398,28 @@ func (g *HTMLReportGenerator) calculateTotalUsage() map[string]interface{} {
 	return totalUsage
 }

+// iterateTestData is a helper function that iterates through all actions and applies a counting function
+func (g *HTMLReportGenerator) iterateTestData(countFunc func(*ActionResult) int) int {
+	total := 0
+	if g.SummaryData == nil || g.SummaryData.Details == nil {
+		return total
+	}
+
+	for _, testCase := range g.SummaryData.Details {
+		if testCase.Records == nil {
+			continue
+		}
+		for _, step := range testCase.Records {
+			if step.Actions != nil {
+				for _, action := range step.Actions {
+					total += countFunc(action)
+				}
+			}
+		}
+	}
+	return total
+}
+
 // GenerateReport generates the complete HTML test report
 func (g *HTMLReportGenerator) GenerateReport(outputFile string) error {
 	if outputFile == "" {
@@ -438,13 +452,23 @@ func (g *HTMLReportGenerator) GenerateReport(outputFile string) error {
 			result := buf.String()
 			return strings.TrimSpace(result)
 		},
-		"mul":   func(a, b float64) float64 { return a * b },
 		"add":   func(a, b int) int { return a + b },
-		"sub":   func(a, b int) int { return a - b },
-		"lt":    func(a, b int) bool { return a < b },
-		"gt":    func(a, b int) bool { return a > b },
 		"base":  filepath.Base,
 		"index": func(m map[string]any, key string) any { return m[key] },
+		"extractThought": func(content string) string {
+			if content == "" {
+				return ""
+			}
+			// Try to parse as JSON to extract thought field
+			var data map[string]interface{}
+			if err := json.Unmarshal([]byte(content), &data); err == nil {
+				if thought, ok := data["thought"].(string); ok {
+					return thought
+				}
+			}
+			// If not JSON or no thought field, return original content
+			return content
+		},
 	}

 	// Parse template
@@ -683,8 +707,6 @@ const htmlTemplate = `<!DOCTYPE html>
            word-break: break-all;
        }

-
-
        .test-cases {
            margin-top: 20px;
        }
@@ -1262,25 +1284,7 @@ const htmlTemplate = `<!DOCTYPE html>
            }
        }

-        .raw-content {
-            margin-top: 10px;
-        }

-        .raw-content pre {
-            background: #f1f3f4;
-            border: 1px solid #dadce0;
-            border-radius: 4px;
-            padding: 8px;
-            font-size: 0.8em;
-            max-height: 150px;
-            overflow-y: auto;
-            white-space: pre-wrap;
-            word-wrap: break-word;
-        }
-
-        .step-screenshots {
-            margin-top: 10px;
-        }

        .action-details {
            display: flex;
@@ -1303,12 +1307,6 @@ const htmlTemplate = `<!DOCTYPE html>
            font-size: 0.9em;
        }

-
-
-
-
-
-
        .thought {
            background: linear-gradient(135deg, #e3f2fd 0%, #f3e5f5 100%);
            border: 2px solid #2196f3;
@@ -1333,31 +1331,7 @@ const htmlTemplate = `<!DOCTYPE html>
            line-height: 1;
        }

-        .model-name-container {
-            background: #f8f9fa;
-            border: 1px solid #e9ecef;
-            border-radius: 6px;
-            padding: 8px 12px;
-            margin: 8px 0;
-            font-size: 0.9em;
-            display: flex;
-            align-items: center;
-            gap: 8px;
-        }

-        .model-label {
-            font-weight: 600;
-            color: #495057;
-        }
-
-        .model-value {
-            font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace;
-            background: #e9ecef;
-            padding: 2px 6px;
-            border-radius: 4px;
-            color: #495057;
-            font-size: 0.85em;
-        }

        .arguments {
            background: #f8f9fa;
@@ -1369,92 +1343,7 @@ const htmlTemplate = `<!DOCTYPE html>
            font-size: 0.9em;
        }

-        .requests {
-            margin-top: 15px;
-        }

-        .requests-toggle {
-            background: #6c757d;
-            color: white;
-            border: none;
-            padding: 6px 12px;
-            border-radius: 4px;
-            cursor: pointer;
-            font-size: 0.8em;
-            margin-bottom: 10px;
-            transition: background-color 0.3s;
-        }
-
-        .requests-toggle:hover {
-            background: #5a6268;
-        }
-
-        .requests-content {
-            display: none;
-        }
-
-        .requests-content.show {
-            display: block;
-        }
-
-        .request-item {
-            background: #f1f3f4;
-            border: 1px solid #dadce0;
-            border-radius: 4px;
-            padding: 8px;
-            margin: 6px 0;
-        }
-
-        .request-header {
-            display: flex;
-            align-items: center;
-            gap: 10px;
-            margin-bottom: 6px;
-        }
-
-        .method {
-            background: #007bff;
-            color: white;
-            padding: 2px 6px;
-            border-radius: 4px;
-            font-size: 0.8em;
-            font-weight: bold;
-        }
-
-        .url {
-            color: #495057;
-            font-family: monospace;
-            font-size: 0.9em;
-        }
-
-        .status {
-            padding: 2px 6px;
-            border-radius: 4px;
-            font-size: 0.8em;
-            font-weight: bold;
-        }
-
-        .status.success {
-            background: #d4edda;
-            color: #155724;
-        }
-
-        .status.failure {
-            background: #f8d7da;
-            color: #721c24;
-        }
-
-        .request-body, .response-body {
-            background: #ffffff;
-            border: 1px solid #e9ecef;
-            border-radius: 4px;
-            padding: 6px;
-            margin: 4px 0;
-            font-family: monospace;
-            font-size: 0.8em;
-            max-height: 100px;
-            overflow-y: auto;
-        }

        .screenshots-section {
            background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%);
@@ -1477,6 +1366,30 @@ const htmlTemplate = `<!DOCTYPE html>
            gap: 10px;
        }

+        .screenshots-horizontal {
+            display: flex;
+            gap: 15px;
+            overflow-x: auto;
+            padding: 10px 0;
+        }
+
+        .screenshots-horizontal .screenshot-item {
+            flex: 0 0 auto;
+            min-width: 200px;
+            max-width: 300px;
+            margin-bottom: 0;
+        }
+
+        .screenshots-horizontal .screenshot-image {
+            min-height: 200px;
+            padding: 10px 0;
+        }
+
+        .screenshots-horizontal .screenshot-image img {
+            max-height: 250px;
+            width: auto;
+        }
+
        .screenshot-item {
            background: white;
            border: 1px solid #dee2e6;
@@ -1573,8 +1486,18 @@ const htmlTemplate = `<!DOCTYPE html>
        .validator-header {
            display: flex;
            align-items: center;
-            gap: 10px;
-            margin-bottom: 8px;
+            gap: 15px;
+            margin-bottom: 15px;
+            padding: 12px 15px;
+            border-radius: 8px;
+            background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%);
+            border: 1px solid #dee2e6;
+        }
+
+        .validator-header strong {
+            color: #007bff;
+            font-size: 1.1em;
+            font-weight: 600;
        }

        .check-type, .assert-type {
@@ -1589,9 +1512,84 @@ const htmlTemplate = `<!DOCTYPE html>
            font-weight: bold;
        }

-        .validator-expect,         .validator-message {
-            margin: 4px 0;
+        .validator-expect, .validator-message {
+            margin: 8px 0;
            font-size: 0.9em;
+            padding: 8px 12px;
+            background: #f8f9fa;
+            border: 1px solid #dee2e6;
+            border-radius: 4px;
+        }
+
+        .validator-ai-content {
+            margin-top: 15px;
+            padding: 15px;
+            background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%);
+            border: 1px solid #dee2e6;
+            border-radius: 12px;
+            box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
+        }
+
+        .validator-ai-layout {
+            display: flex;
+            gap: 20px;
+            margin: 15px 0;
+        }
+
+        .validator-column-screenshot {
+            flex: 0.9;
+            min-width: 250px;
+            max-width: 35%;
+        }
+
+        .validator-column-analysis {
+            flex: 1.6;
+            min-width: 350px;
+        }
+
+        .validator-step-compact {
+            background: white;
+            border: 1px solid #dee2e6;
+            border-radius: 8px;
+            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+            height: fit-content;
+        }
+
+        .validator-ai-details {
+            padding: 12px;
+        }
+
+        .validator-thought {
+            background: linear-gradient(135deg, #e3f2fd 0%, #f3e5f5 100%);
+            border: 2px solid #2196f3;
+            border-radius: 12px;
+            padding: 15px;
+            margin: 10px 0;
+            font-style: italic;
+            color: #1565c0;
+            font-size: 1.0em;
+            font-weight: 500;
+            box-shadow: 0 2px 8px rgba(33, 150, 243, 0.15);
+            white-space: pre-wrap;
+            word-wrap: break-word;
+        }
+
+        @media screen and (max-width: 768px) {
+            .validator-ai-layout {
+                flex-direction: column;
+                gap: 15px;
+            }
+
+            .validator-column-screenshot {
+                flex: none;
+                min-width: auto;
+                max-width: none;
+            }
+
+            .validator-column-analysis {
+                flex: none;
+                min-width: auto;
+            }
        }

        .logs-section {
@@ -1965,6 +1963,18 @@ const htmlTemplate = `<!DOCTYPE html>
                gap: 10px;
            }

+            .screenshots-horizontal {
+                flex-direction: column;
+                overflow-x: visible;
+            }
+
+            .screenshots-horizontal .screenshot-item {
+                flex: none;
+                min-width: auto;
+                max-width: none;
+                width: 100%;
+            }
+
            .screenshot-image {
                min-height: 250px;
                padding: 15px 0;
@@ -2182,7 +2192,6 @@ const htmlTemplate = `<!DOCTYPE html>
                                <div class="action-content">

                                {{if $action.Plannings}}
-                                <div class="planning-results">
                                    {{range $planningIndex, $planning := $action.Plannings}}
                                    <div class="planning-item">
                                        <div class="planning-header">
@@ -2301,81 +2310,94 @@ const htmlTemplate = `<!DOCTYPE html>
                                        {{/* SubActions are now displayed in the right panel, so we don't show them here */}}
                                    </div>
                                    {{end}}
-                                </div>
                                {{end}}

+                                {{/* Handle special case: ai_query needs enhanced display even when not in planning */}}
                                {{if $action.SubActions}}
-                                <div class="sub-actions">
                                    {{range $subAction := $action.SubActions}}
-                                    <div class="sub-action-item">
-                                        <div class="sub-action-header">
-                                            <span class="action-name">{{$subAction.ActionName}}</span>
-                                            <span class="duration">{{formatDuration $subAction.Elapsed}}</span>
-                                        </div>
-
-                                        <div class="sub-action-content">
-                                            <div class="sub-action-left">
-                                                {{if $subAction.Arguments}}
-                                                <div class="arguments">Arguments: {{safeHTML (toJSON $subAction.Arguments)}}</div>
-                                                {{end}}
-
-                                                {{if $subAction.Requests}}
-                                                <div class="requests">
-                                                    <button class="requests-toggle" onclick="toggleRequests(this)">
-                                                        📡 Show Requests ({{len $subAction.Requests}})
-                                                    </button>
-                                                    <div class="requests-content">
-                                                        {{range $request := $subAction.Requests}}
-                                                        <div class="request-item">
-                                                            <div class="request-header">
-                                                                <span class="method">{{$request.RequestMethod}}</span>
-                                                                <span class="url">{{$request.RequestUrl}}</span>
-                                                                <span class="status {{if $request.Success}}success{{else}}failure{{end}}">Status: {{$request.ResponseStatus}}</span>
-                                                                <span class="duration">{{formatDuration $request.ResponseDuration}}</span>
-                                                            </div>
-                                                            {{if $request.RequestBody}}
-                                                            <div class="request-body">Request: {{$request.RequestBody}}</div>
-                                                            {{end}}
-                                                            {{if $request.ResponseBody}}
-                                                            <div class="response-body">Response: {{$request.ResponseBody}}</div>
-                                                            {{end}}
-                                                        </div>
+                                        {{if eq $subAction.ActionName "ai_query"}}
+                                        <div class="sub-action-item">
+                                            <!-- Enhanced AI Query Display -->
+                                            <div class="validator-ai-content">
+                                                <!-- Extract AI query details from step logs -->
+                                                {{$stepLogs := getStepLogs $step}}
+                                                {{$queryThought := ""}}
+                                                {{$queryModel := ""}}
+                                                {{$queryUsage := ""}}
+                                                {{$queryScreenshot := ""}}
+                                                {{$queryResult := ""}}
+                                                {{range $logEntry := $stepLogs}}
+                                                    {{if and (eq $logEntry.Message "log response message") (index $logEntry.Fields "content")}}
+                                                        {{$content := index $logEntry.Fields "content"}}
+                                                        {{if $content}}
+                                                            {{$queryResult = $content}}
                                                        {{end}}
-                                                    </div>
-                                                </div>
+                                                    {{end}}
+                                                    {{if and (eq $logEntry.Message "call model service for query") (index $logEntry.Fields "model")}}
+                                                        {{$queryModel = index $logEntry.Fields "model"}}
+                                                    {{end}}
+                                                    {{if and (eq $logEntry.Message "usage statistics") (index $logEntry.Fields "input_tokens")}}
+                                                        {{$inputTokens := index $logEntry.Fields "input_tokens"}}
+                                                        {{$outputTokens := index $logEntry.Fields "output_tokens"}}
+                                                        {{$totalTokens := index $logEntry.Fields "total_tokens"}}
+                                                        {{$queryUsage = printf "📊 Tokens: %v in / %v out / %v total" $inputTokens $outputTokens $totalTokens}}
+                                                    {{end}}
+                                                    {{if and (eq $logEntry.Message "log screenshot") (index $logEntry.Fields "imagePath")}}
+                                                        {{$queryScreenshot = index $logEntry.Fields "imagePath"}}
+                                                    {{end}}
                                                {{end}}
-                                            </div>

-                                            {{if $subAction.ScreenResults}}
-                                            <div class="sub-action-right">
-                                                <div class="sub-action-screenshots">
-                                                    <h5>📸 Screenshots</h5>
-                                                    <div class="screenshots-grid">
-                                                        {{range $screenshot := $subAction.ScreenResults}}
-                                                        {{$base64Image := encodeImageBase64 $screenshot.ImagePath}}
-                                                        {{if $base64Image}}
-                                                        <div class="screenshot-item small">
-                                                            <div class="screenshot-info">
-                                                                <span class="filename">{{base $screenshot.ImagePath}}</span>
-                                                                {{if $screenshot.Resolution}}
-                                                                <span class="resolution">{{$screenshot.Resolution.Width}}x{{$screenshot.Resolution.Height}}</span>
+                                                <!-- Display AI Query Result at the top -->
+                                                {{if $queryResult}}
+                                                <div class="thought">{{$queryResult}}</div>
+                                                {{end}}
+
+                                                <!-- AI Query Layout - similar to validator layout -->
+                                                <div class="validator-ai-layout">
+                                                    <!-- Left column: Screenshot -->
+                                                    {{if $queryScreenshot}}
+                                                    <div class="validator-column-screenshot">
+                                                        <div class="validator-step-compact">
+                                                            <div class="step-header-compact">
+                                                                <span class="step-name">📸 Query Screenshot</span>
+                                                            </div>
+                                                            <div class="screenshot-display">
+                                                                {{$base64Image := encodeImageBase64 $queryScreenshot}}
+                                                                {{if $base64Image}}
+                                                                <div class="screenshot-item-compact">
+                                                                    <div class="screenshot-image">
+                                                                        <img src="data:image/jpeg;base64,{{$base64Image}}" alt="Query Screenshot" onclick="openImageModal(this.src)" />
+                                                                    </div>
+                                                                </div>
                                                                {{end}}
                                                            </div>
-                                                            <div class="screenshot-image">
-                                                                <img src="data:image/jpeg;base64,{{$base64Image}}" alt="Screenshot" onclick="openImageModal(this.src)" />
+                                                        </div>
+                                                    </div>
+                                                    {{end}}
+
+                                                    <!-- Right column: AI Query -->
+                                                    <div class="validator-column-analysis">
+                                                        <div class="validator-step-compact">
+                                                            <div class="step-header-compact">
+                                                                <span class="step-name">🤖 AI Query</span>
+                                                            </div>
+                                                            <div class="validator-ai-details">
+                                                                {{if $queryModel}}
+                                                                <div class="model-info">🤖 Model: {{$queryModel}}</div>
+                                                                {{end}}
+                                                                {{if $queryUsage}}
+                                                                <div class="usage-info">{{$queryUsage}}</div>
+                                                                {{end}}
                                                            </div>
                                                        </div>
-                                                        {{end}}
-                                                        {{end}}
                                                    </div>
                                                </div>
                                            </div>
-                                            {{end}}
                                        </div>
-                                    </div>
-                                    {{end}}
-                                    </div>
+                                        {{end}}
                                    {{end}}
+                                {{end}}
+                                {{/* Other SubActions (non-ai_query) are displayed in the Planning section's right panel to avoid duplication */}}
                                </div>
                            </div>
                            {{end}}
@@ -2385,18 +2407,97 @@ const htmlTemplate = `<!DOCTYPE html>
                        <!-- Validators -->
                        {{if and $step.Data $step.Data.validators}}
                        <div class="validators-section">
-                            <h4>Validators</h4>
-                            {{range $validator := $step.Data.validators}}
+                            <h4>🔍 Validators</h4>
+                            {{range $validatorIndex, $validator := $step.Data.validators}}
                            <div class="validator-item {{if eq $validator.check_result "pass"}}success{{else}}failure{{end}}">
                                <div class="validator-header">
-                                    <span class="check-type">{{$validator.check}}</span>
-                                    <span class="assert-type">{{$validator.assert}}</span>
-                                    <span class="result">{{$validator.check_result}}</span>
+                                    <strong>{{$validator.check}} - {{$validator.assert}}</strong>
+                                    <span class="status-badge {{if eq $validator.check_result "pass"}}success{{else}}failure{{end}}">
+                                        {{if eq $validator.check_result "pass"}}✓ PASS{{else}}✗ FAIL{{end}}
+                                    </span>
                                </div>
                                <div class="validator-expect">Expected: {{$validator.expect}}</div>
                                {{if and $validator.msg (ne $validator.check_result "pass")}}
                                    <div class="validator-message">{{$validator.msg}}</div>
                                {{end}}
+
+                                <!-- Enhanced AI Validator Display -->
+                                {{if or (eq $validator.check "ui_ai") (eq $validator.assert "ai_assert")}}
+                                <div class="validator-ai-content">
+                                    <!-- Extract AI validation details from step logs -->
+                                    {{$stepLogs := getStepLogs $step}}
+                                    {{$validationThought := ""}}
+                                    {{$validationModel := ""}}
+                                    {{$validationUsage := ""}}
+                                    {{$validationScreenshot := ""}}
+                                    {{range $logEntry := $stepLogs}}
+                                        {{if and (eq $logEntry.Message "log response message") (index $logEntry.Fields "content")}}
+                                            {{$content := index $logEntry.Fields "content"}}
+                                            {{if $content}}
+                                                {{$validationThought = $content}}
+                                            {{end}}
+                                        {{end}}
+                                        {{if and (eq $logEntry.Message "call model service for assertion") (index $logEntry.Fields "model")}}
+                                            {{$validationModel = index $logEntry.Fields "model"}}
+                                        {{end}}
+                                        {{if and (eq $logEntry.Message "usage statistics") (index $logEntry.Fields "input_tokens")}}
+                                            {{$inputTokens := index $logEntry.Fields "input_tokens"}}
+                                            {{$outputTokens := index $logEntry.Fields "output_tokens"}}
+                                            {{$totalTokens := index $logEntry.Fields "total_tokens"}}
+                                            {{$validationUsage = printf "📊 Tokens: %v in / %v out / %v total" $inputTokens $outputTokens $totalTokens}}
+                                        {{end}}
+                                        {{if and (eq $logEntry.Message "log screenshot") (index $logEntry.Fields "imagePath")}}
+                                            {{$validationScreenshot = index $logEntry.Fields "imagePath"}}
+                                        {{end}}
+                                    {{end}}
+
+                                    <!-- Display AI Thought at the top, same as planning -->
+                                    {{if $validationThought}}
+                                    <div class="thought">{{extractThought $validationThought}}</div>
+                                    {{end}}
+
+                                    <!-- AI Validation Layout - similar to planning layout -->
+                                    <div class="validator-ai-layout">
+                                        <!-- Left column: Screenshot -->
+                                        {{if $validationScreenshot}}
+                                        <div class="validator-column-screenshot">
+                                            <div class="validator-step-compact">
+                                                <div class="step-header-compact">
+                                                    <span class="step-name">📸 Validation Screenshot</span>
+                                                </div>
+                                                <div class="screenshot-display">
+                                                    {{$base64Image := encodeImageBase64 $validationScreenshot}}
+                                                    {{if $base64Image}}
+                                                    <div class="screenshot-item-compact">
+                                                        <div class="screenshot-image">
+                                                            <img src="data:image/jpeg;base64,{{$base64Image}}" alt="Validation Screenshot" onclick="openImageModal(this.src)" />
+                                                        </div>
+                                                    </div>
+                                                    {{end}}
+                                                </div>
+                                            </div>
+                                        </div>
+                                        {{end}}
+
+                                        <!-- Right column: AI Analysis -->
+                                        <div class="validator-column-analysis">
+                                            <div class="validator-step-compact">
+                                                <div class="step-header-compact">
+                                                    <span class="step-name">🤖 AI Analysis</span>
+                                                </div>
+                                                <div class="validator-ai-details">
+                                                    {{if $validationModel}}
+                                                    <div class="model-info">🤖 Model: {{$validationModel}}</div>
+                                                    {{end}}
+                                                    {{if $validationUsage}}
+                                                    <div class="usage-info">{{$validationUsage}}</div>
+                                                    {{end}}
+                                                </div>
+                                            </div>
+                                        </div>
+                                    </div>
+                                </div>
+                                {{end}}
                            </div>
                            {{end}}
                        </div>
@@ -2409,22 +2510,35 @@ const htmlTemplate = `<!DOCTYPE html>
                        {{if index $attachments "screen_results"}}
                        <div class="screenshots-section">
                            <h4>Screenshots</h4>
-                            {{range $screenshot := index $attachments "screen_results"}}
-                            {{$base64Image := encodeImageBase64 $screenshot.ImagePath}}
-                            {{if $base64Image}}
-                            <div class="screenshot-item">
-                                <div class="screenshot-info">
-                                    <span class="filename">{{base $screenshot.ImagePath}}</span>
-                                    {{if $screenshot.Resolution}}
-                                    <span class="resolution">{{$screenshot.Resolution.Width}}x{{$screenshot.Resolution.Height}}</span>
-                                    {{end}}
-                                </div>
-                                <div class="screenshot-image">
-                                    <img src="data:image/jpeg;base64,{{$base64Image}}" alt="Screenshot" onclick="openImageModal(this.src)" />
+                            <div class="screenshots-horizontal">
+                                {{range $screenshot := index $attachments "screen_results"}}
+                                {{$imagePath := ""}}
+                                {{if $screenshot.ImagePath}}
+                                    {{$imagePath = $screenshot.ImagePath}}
+                                {{else if index $screenshot "image_path"}}
+                                    {{$imagePath = index $screenshot "image_path"}}
+                                {{end}}
+                                {{if $imagePath}}
+                                {{$base64Image := encodeImageBase64 $imagePath}}
+                                {{if $base64Image}}
+                                <div class="screenshot-item">
+                                    <div class="screenshot-info">
+                                        <span class="filename">{{base $imagePath}}</span>
+                                        {{if $screenshot.Resolution}}
+                                        <span class="resolution">{{$screenshot.Resolution.Width}}x{{$screenshot.Resolution.Height}}</span>
+                                        {{else if index $screenshot "resolution"}}
+                                        {{$resolution := index $screenshot "resolution"}}
+                                        <span class="resolution">{{index $resolution "width"}}x{{index $resolution "height"}}</span>
+                                        {{end}}
+                                    </div>
+                                    <div class="screenshot-image">
+                                        <img src="data:image/jpeg;base64,{{$base64Image}}" alt="Screenshot" onclick="openImageModal(this.src)" />
+                                    </div>
                                </div>
+                                {{end}}
+                                {{end}}
+                                {{end}}
                            </div>
-                            {{end}}
-                            {{end}}
                        </div>
                        {{end}}
                        {{end}}
@@ -2569,19 +2683,6 @@ const htmlTemplate = `<!DOCTYPE html>
            }
        }

-                function toggleRequests(buttonElement) {
-            const requestsDiv = buttonElement.parentElement;
-            const requestsContent = requestsDiv.querySelector('.requests-content');
-
-            if (requestsContent.classList.contains('show')) {
-                requestsContent.classList.remove('show');
-                buttonElement.textContent = buttonElement.textContent.replace('Hide', 'Show');
-            } else {
-                requestsContent.classList.add('show');
-                buttonElement.textContent = buttonElement.textContent.replace('Show', 'Hide');
-            }
-        }
-
        function toggleRequestsCompact(buttonElement) {
            const requestsDiv = buttonElement.parentElement;
            const requestsContent = requestsDiv.querySelector('.requests-content-compact');
@@ -2595,8 +2696,6 @@ const htmlTemplate = `<!DOCTYPE html>
            }
        }

-
-
        function openImageModal(src) {
            const modal = document.getElementById('imageModal');
            const modalImg = document.getElementById('modalImage');
@@ -2616,8 +2715,6 @@ const htmlTemplate = `<!DOCTYPE html>
            }
        }

-
-
        // Auto-expand all steps on load to show actions
        document.addEventListener('DOMContentLoaded', function() {
            // Expand all steps to show the actions list
--- a/runner.go
+++ b/runner.go
@@ -729,6 +729,11 @@ func (r *SessionRunner) Start(givenVars map[string]interface{}) (summary *TestCa
 	return summary, nil
 }

+const (
+	RUN_STEP_START = "run step start"
+	RUN_STEP_END   = "run step end"
+)
+
 func (r *SessionRunner) RunStep(step IStep) (stepResult *StepResult, err error) {
 	// check for interrupt signal before running step
 	select {
@@ -748,7 +753,7 @@ func (r *SessionRunner) RunStep(step IStep) (stepResult *StepResult, err error)

 	stepName := step.Name()
 	stepType := string(step.Type())
-	log.Info().Str("step", stepName).Str("type", stepType).Msg("run step start")
+	log.Info().Str("step", stepName).Str("type", stepType).Msg(RUN_STEP_START)

 	// run times of step
 	loopTimes := step.Config().Loops
@@ -785,7 +790,7 @@ func (r *SessionRunner) RunStep(step IStep) (stepResult *StepResult, err error)
 				Bool("success", true).
 				Int64("elapsed(ms)", stepResult.Elapsed).
 				Interface("exportVars", stepResult.ExportVars).
-				Msg("run step end")
+				Msg(RUN_STEP_END)
 			continue
 		}
 		// run step failed
@@ -793,7 +798,7 @@ func (r *SessionRunner) RunStep(step IStep) (stepResult *StepResult, err error)
 			Str("type", stepType).
 			Bool("success", false).
 			Int64("elapsed(ms)", stepResult.Elapsed).
-			Msg("run step end")
+			Msg(RUN_STEP_END)
 		return stepResult, err
 	}

--- a/step.go
+++ b/step.go
@@ -58,7 +58,7 @@ type TStep struct {
 // one step contains one or multiple actions
 type ActionResult struct {
 	option.MobileAction `json:",inline"`
-	StartTime           int64                           `json:"start_time"`            // action start time
+	StartTime           int64                           `json:"start_time"`            // action start time in millisecond(ms)
 	Elapsed             int64                           `json:"elapsed_ms"`            // action elapsed time(ms)
 	Error               error                           `json:"error"`                 // action execution result
 	Plannings           []*uixt.PlanningExecutionResult `json:"plannings,omitempty"`   // store planning results for start_to_goal actions
@@ -69,7 +69,7 @@ type ActionResult struct {
 type StepResult struct {
 	Name        string                 `json:"name" yaml:"name"`                                     // step name
 	Identifier  string                 `json:"identifier,omitempty" yaml:"identifier,omitempty"`     // step identifier
-	StartTime   int64                  `json:"start_time" yaml:"time"`                               // step start time
+	StartTime   int64                  `json:"start_time" yaml:"time"`                               // step start time in millisecond(ms)
 	StepType    StepType               `json:"step_type" yaml:"step_type"`                           // step type, testcase/request/transaction/rendezvous
 	Success     bool                   `json:"success" yaml:"success"`                               // step execution result
 	Elapsed     int64                  `json:"elapsed_ms" yaml:"elapsed_ms"`                         // step execution time in millisecond(ms)
--- a/step_function.go
+++ b/step_function.go
@@ -27,10 +27,7 @@ func (s *StepFunction) Type() StepType {
 }

 func (s *StepFunction) Config() *StepConfig {
-	return &StepConfig{
-		StepName:  s.StepName,
-		Variables: s.Variables,
-	}
+	return &s.StepConfig
 }

 func (s *StepFunction) Run(r *SessionRunner) (*StepResult, error) {
@@ -57,7 +54,7 @@ func runStepFunction(r *SessionRunner, step IStep) (stepResult *StepResult, err
 		StepType:    step.Type(),
 		Success:     false,
 		ContentSize: 0,
-		StartTime:   start.Unix(),
+		StartTime:   start.UnixMilli(),
 	}
 	defer func() {
 		attachments := uixt.Attachments{}
--- a/step_rendezvous.go
+++ b/step_rendezvous.go
@@ -26,10 +26,7 @@ func (s *StepRendezvous) Type() StepType {
 }

 func (s *StepRendezvous) Config() *StepConfig {
-	return &StepConfig{
-		StepName:  s.StepName,
-		Variables: s.Variables,
-	}
+	return &s.StepConfig
 }

 func (s *StepRendezvous) Run(r *SessionRunner) (*StepResult, error) {
--- a/step_request.go
+++ b/step_request.go
@@ -285,7 +285,7 @@ func runStepRequest(r *SessionRunner, step IStep) (stepResult *StepResult, err e
 		StepType:    step.Type(),
 		Success:     false,
 		ContentSize: 0,
-		StartTime:   start.Unix(),
+		StartTime:   start.UnixMilli(),
 	}

 	defer func() {
--- a/step_shell.go
+++ b/step_shell.go
@@ -30,10 +30,7 @@ func (s *StepShell) Type() StepType {
 }

 func (s *StepShell) Config() *StepConfig {
-	return &StepConfig{
-		StepName:  s.StepName,
-		Variables: s.Variables,
-	}
+	return &s.StepConfig
 }

 func (s *StepShell) Run(r *SessionRunner) (*StepResult, error) {
@@ -63,10 +60,7 @@ func (s *StepShellValidation) Type() StepType {
 }

 func (s *StepShellValidation) Config() *StepConfig {
-	return &StepConfig{
-		StepName:  s.StepName,
-		Variables: s.Variables,
-	}
+	return &s.StepConfig
 }

 func (s *StepShellValidation) Run(r *SessionRunner) (*StepResult, error) {
@@ -101,7 +95,7 @@ func runStepShell(r *SessionRunner, step IStep) (stepResult *StepResult, err err
 		StepType:    step.Type(),
 		Success:     false,
 		ContentSize: 0,
-		StartTime:   start.Unix(),
+		StartTime:   start.UnixMilli(),
 	}
 	defer func() {
 		stepResult.Elapsed = time.Since(start).Milliseconds()
--- a/step_testcase.go
+++ b/step_testcase.go
@@ -51,7 +51,7 @@ func (s *StepTestCaseWithOptionalArgs) Run(r *SessionRunner) (stepResult *StepRe
 		Name:      s.Name(),
 		StepType:  s.Type(),
 		Success:   false,
-		StartTime: start.Unix(),
+		StartTime: start.UnixMilli(),
 	}

 	defer func() {
--- a/step_ui.go
+++ b/step_ui.go
@@ -691,11 +691,11 @@ func (s *StepMobileUIValidation) Type() StepType {
 }

 func (s *StepMobileUIValidation) Config() *StepConfig {
-	return &StepConfig{
-		StepName:   s.StepName,
-		Variables:  s.Variables,
-		Validators: s.Validators,
-	}
+	// Get the original StepConfig from embedded StepMobile
+	config := &s.StepMobile.StepConfig
+	// Sync validators to the StepConfig
+	config.Validators = s.Validators
+	return config
 }

 func (s *StepMobileUIValidation) Run(r *SessionRunner) (*StepResult, error) {
@@ -709,7 +709,7 @@ func runStepMobileUI(s *SessionRunner, step IStep) (stepResult *StepResult, err
 		StepType:    step.Type(),
 		Success:     false,
 		ContentSize: 0,
-		StartTime:   start.Unix(),
+		StartTime:   start.UnixMilli(),
 	}

 	var stepVariables map[string]interface{}
@@ -781,7 +781,7 @@ func runStepMobileUI(s *SessionRunner, step IStep) (stepResult *StepResult, err
 					Method: option.ACTION_GetForegroundApp,
 					Params: "[ForDebug] check foreground app",
 				},
-				StartTime: startTime.Unix(),
+				StartTime: startTime.UnixMilli(),
 			}
 			subActionResults, err1 := uiDriver.ExecuteAction(
 				context.Background(), actionResult.MobileAction)
@@ -793,6 +793,16 @@ func runStepMobileUI(s *SessionRunner, step IStep) (stepResult *StepResult, err
 			stepResult.Actions = append(stepResult.Actions, actionResult)
 		}

+		// Get session data and add to attachments, clear session for next step
+		if uiDriver != nil {
+			sessionData := uiDriver.GetSession().GetData(true) // clear session after getting data
+			if len(sessionData.ScreenResults) > 0 {
+				attachments["screen_results"] = sessionData.ScreenResults
+				log.Debug().Int("count", len(sessionData.ScreenResults)).
+					Str("step", step.Name()).Msg("added screen results to step attachments")
+			}
+		}
+
 		var config *TConfig
 		if s.caseRunner != nil && s.caseRunner.Config != nil {
 			config = s.caseRunner.Config.Get()
@@ -815,7 +825,7 @@ func runStepMobileUI(s *SessionRunner, step IStep) (stepResult *StepResult, err
 					Method: option.ACTION_ClosePopups,
 					Params: "[ForDebug] close popups handler",
 				},
-				StartTime: startTime.Unix(),
+				StartTime: startTime.UnixMilli(),
 			}
 			subActionResults, err2 := uiDriver.ExecuteAction(
 				context.Background(), actionResult.MobileAction)
@@ -842,10 +852,7 @@ func runStepMobileUI(s *SessionRunner, step IStep) (stepResult *StepResult, err
 			return stepResult, errors.Wrap(code.InterruptError, "mobile UI runner interrupted")
 		default:
 			actionStartTime := time.Now()
-			actionResult := &ActionResult{
-				MobileAction: action,
-				StartTime:    actionStartTime.Unix(), // action 开始时间
-			}
+			// Parse action params first for variable substitution
 			if action.Params, err = s.caseRunner.parser.Parse(action.Params, stepVariables); err != nil {
 				if !code.IsErrorPredefined(err) {
 					err = errors.Wrap(code.ParseError,
@@ -854,6 +861,12 @@ func runStepMobileUI(s *SessionRunner, step IStep) (stepResult *StepResult, err
 				return stepResult, err
 			}

+			// Create ActionResult with parsed params for accurate reporting
+			actionResult := &ActionResult{
+				MobileAction: action,                      // Now contains parsed params
+				StartTime:    actionStartTime.UnixMilli(), // action start time
+			}
+
 			// Apply global configuration from testcase config
 			if s.caseRunner != nil && s.caseRunner.Config != nil {
 				config := s.caseRunner.Config.Get()
@@ -951,55 +964,111 @@ func runStepMobileUI(s *SessionRunner, step IStep) (stepResult *StepResult, err
 	}

 	// validate
-	validateResults, err := validateUI(uiDriver, stepValidators)
-	if err != nil {
-		if !code.IsErrorPredefined(err) {
-			err = errors.Wrap(code.MobileUIValidationError, err.Error())
-		}
-		return
-	}
+	validateResults, err := validateUI(uiDriver, stepValidators, s.caseRunner.parser, stepVariables)
 	if len(validateResults) > 0 {
+		// Always save validation results if any exist, regardless of success or failure
 		sessionData := &SessionData{
 			Validators: validateResults,
 		}
 		stepResult.Data = sessionData
 	}
+	if err != nil {
+		// Handle validation error after saving results
+		if !code.IsErrorPredefined(err) {
+			err = errors.Wrap(code.MobileUIValidationError, err.Error())
+		}
+		return stepResult, err
+	}
+
 	stepResult.Success = true
 	return stepResult, nil
 }

-func validateUI(ud *uixt.XTDriver, iValidators []interface{}) (validateResults []*ValidationResult, err error) {
+func validateUI(ud *uixt.XTDriver, iValidators []interface{}, parser *Parser, stepVariables map[string]interface{}) (validateResults []*ValidationResult, err error) {
+	// Parse all validators for variable substitution
+	parsedValidators, err := parseStepValidators(iValidators, parser, stepVariables)
+	if err != nil {
+		return nil, err
+	}
+
+	// Execute validation for each parsed validator
+	for _, validator := range parsedValidators {
+		// Debug: print validator details
+		log.Debug().
+			Str("check", validator.Check).
+			Str("assert", validator.Assert).
+			Interface("expect", validator.Expect).
+			Str("message", validator.Message).
+			Msg("processing validator")
+
+		validationResult := &ValidationResult{
+			Validator:   validator, // Use parsed validator for accurate reporting
+			CheckResult: "fail",
+		}
+
+		// Check if this is a UI validator or AI assert validator
+		if !strings.HasPrefix(validator.Check, "ui_") && validator.Assert != "ai_assert" {
+			validationResult.CheckResult = "skip"
+			log.Warn().Interface("validator", validator).Msg("skip validator")
+			validateResults = append(validateResults, validationResult)
+			continue
+		}
+
+		// Validate expected value type
+		expected, ok := validator.Expect.(string)
+		if !ok {
+			return nil, errors.New("validator expect should be string")
+		}
+
+		// Perform validation
+		err = ud.DoValidation(validator.Check, validator.Assert, expected, validator.Message)
+		if err != nil {
+			// Add the failed validation result to the list before returning error
+			validateResults = append(validateResults, validationResult)
+			return validateResults, errors.Wrap(err, "step validation failed")
+		}
+
+		validationResult.CheckResult = "pass"
+		validateResults = append(validateResults, validationResult)
+	}
+
+	return validateResults, nil
+}
+
+// parseStepValidators parses all validators for variable substitution
+func parseStepValidators(iValidators []interface{}, parser *Parser, stepVariables map[string]interface{}) ([]Validator, error) {
+	var parsedValidators []Validator
+
 	for _, iValidator := range iValidators {
 		validator, ok := iValidator.(Validator)
 		if !ok {
 			return nil, errors.New("validator type error")
 		}

-		validataResult := &ValidationResult{
-			Validator:   validator,
-			CheckResult: "fail",
+		parsedValidator := validator
+
+		// Parse Expect field for variable substitution
+		if expectedStr, ok := validator.Expect.(string); ok {
+			if parsedExpected, err := parser.Parse(expectedStr, stepVariables); err != nil {
+				return nil, errors.Wrap(err, "failed to parse validator expect field")
+			} else {
+				parsedValidator.Expect = parsedExpected
+			}
 		}

-		// parse check value
-		if !strings.HasPrefix(validator.Check, "ui_") {
-			validataResult.CheckResult = "skip"
-			log.Warn().Interface("validator", validator).Msg("skip validator")
-			validateResults = append(validateResults, validataResult)
-			continue
+		// Parse Message field for variable substitution
+		if validator.Message != "" {
+			if parsedMessage, err := parser.Parse(validator.Message, stepVariables); err != nil {
+				return nil, errors.Wrap(err, "failed to parse validator message field")
+			} else {
+				if msgStr, ok := parsedMessage.(string); ok {
+					parsedValidator.Message = msgStr
+				}
+			}
 		}

-		expected, ok := validator.Expect.(string)
-		if !ok {
-			return nil, errors.New("validator expect should be string")
-		}
-
-		err := ud.DoValidation(validator.Check, validator.Assert, expected, validator.Message)
-		if err != nil {
-			return validateResults, errors.Wrap(err, "step validation failed")
-		}
-
-		validataResult.CheckResult = "pass"
-		validateResults = append(validateResults, validataResult)
+		parsedValidators = append(parsedValidators, parsedValidator)
 	}
-	return validateResults, nil
+
+	return parsedValidators, nil
 }
--- a/step_websocket.go
+++ b/step_websocket.go
@@ -381,7 +381,7 @@ func runStepWebSocket(r *SessionRunner, step IStep) (stepResult *StepResult, err
 		StepType:    step.Type(),
 		Success:     false,
 		ContentSize: 0,
-		StartTime:   start.Unix(),
+		StartTime:   start.UnixMilli(),
 	}

 	defer func() {
--- a/uixt/ai/asserter.go
+++ b/uixt/ai/asserter.go
@@ -10,10 +10,10 @@ import (
 	"github.com/cloudwego/eino/schema"
 	"github.com/getkin/kin-openapi/openapi3gen"
 	"github.com/httprunner/httprunner/v5/code"
-	"github.com/httprunner/httprunner/v5/internal/json"
 	"github.com/httprunner/httprunner/v5/uixt/option"
 	"github.com/httprunner/httprunner/v5/uixt/types"
 	"github.com/pkg/errors"
+	"github.com/rs/zerolog/log"
 )

 // IAsserter interface defines the contract for assertion operations
@@ -160,15 +160,13 @@ func validateAssertionInput(opts *AssertOptions) error {

 // parseAssertionResult parses the model response into AssertionResponse
 func parseAssertionResult(content string) (*AssertionResult, error) {
-	// Extract JSON content from response
-	jsonContent := extractJSONFromContent(content)
-	if jsonContent == "" {
-		return nil, errors.New("could not extract JSON from response")
-	}
-
-	// Parse JSON response
 	var result AssertionResult
-	if err := json.Unmarshal([]byte(jsonContent), &result); err != nil {
+
+	// Use the generic structured response parser
+	if err := parseStructuredResponse(content, &result); err != nil {
+		log.Warn().
+			Interface("original_content", content).
+			Msg("parse assertion result failed")
 		return nil, errors.Wrap(code.LLMParseAssertionResponseError, err.Error())
 	}

--- a/uixt/ai/asserter_test.go
+++ b/uixt/ai/asserter_test.go
@@ -104,3 +104,46 @@ func TestInvalidParameters(t *testing.T) {
 		})
 	}
 }
+
+// Test the main parseAssertionResult function with problematic input
+func TestParseAssertionResult(t *testing.T) {
+	tests := []struct {
+		name          string
+		input         string
+		shouldSucceed bool
+	}{
+		{
+			name:          "valid JSON response",
+			input:         `{"pass": true, "thought": "Assertion passed"}`,
+			shouldSucceed: true,
+		},
+		{
+			name:          "response with UTF-8 replacement characters",
+			input:         "浅蓝色的搜索框，里面显示着输入的\"ma\"，而\ufffd\ufffd且在搜索框的右上角有一个喇叭 {\"pass\": true, \"thought\": \"found search box\"}",
+			shouldSucceed: true,
+		},
+		{
+			name:          "malformed JSON with extraction",
+			input:         `malformed start {"pass": true, "thought": "extracted successfully"} malformed end`,
+			shouldSucceed: true,
+		},
+		{
+			name:          "completely malformed but analyzable",
+			input:         "This assertion test passed and was successful",
+			shouldSucceed: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result, err := parseAssertionResult(tt.input)
+			if tt.shouldSucceed {
+				require.NoError(t, err)
+				assert.NotNil(t, result)
+				assert.NotEmpty(t, result.Thought)
+			} else {
+				assert.Error(t, err)
+			}
+		})
+	}
+}
--- a/uixt/ai/parser_default.go
+++ b/uixt/ai/parser_default.go
@@ -5,7 +5,6 @@ import (
 	"strings"

 	"github.com/cloudwego/eino/schema"
-	"github.com/httprunner/httprunner/v5/internal/json"
 	"github.com/httprunner/httprunner/v5/uixt/option"
 	"github.com/httprunner/httprunner/v5/uixt/types"
 	"github.com/pkg/errors"
@@ -48,20 +47,9 @@ func (p *JSONContentParser) SystemPrompt() string {
 func (p *JSONContentParser) Parse(content string, size types.Size) (*PlanningResult, error) {
 	content = strings.TrimSpace(content)

-	// Extract JSON content from markdown code blocks
-	jsonContent := extractJSONFromContent(content)
-	if jsonContent == "" {
-		return nil, fmt.Errorf("no valid JSON content found in response")
-	}
-
-	// Define a temporary struct to parse the expected JSON format
-	var jsonResponse struct {
-		Actions []Action `json:"actions"`
-		Thought string   `json:"thought"`
-		Error   string   `json:"error"`
-	}
-
-	if err := json.Unmarshal([]byte(jsonContent), &jsonResponse); err != nil {
+	// Use the generic structured response parser
+	var jsonResponse PlanningJSONResponse
+	if err := parseStructuredResponse(content, &jsonResponse); err != nil {
 		return nil, fmt.Errorf("failed to parse VLM response: %v", err)
 	}

--- a/uixt/ai/planner_prompts.go
+++ b/uixt/ai/planner_prompts.go
@@ -41,6 +41,8 @@ var doubao_1_5_ui_tars_action_mapping = map[string]option.ActionName{
 	"type":         option.ACTION_Input,
 	"scroll":       option.ACTION_Swipe, // swipe up/down/left/right
 	"wait":         option.ACTION_Sleep,
+	"press_home":   option.ACTION_Home,
+	"press_back":   option.ACTION_Back,
 	"finished":     option.ACTION_Finished,
 }

@@ -138,5 +140,7 @@ var doubao_1_5_thinking_vision_pro_action_mapping = map[string]option.ActionName
 	"type":         option.ACTION_Input,
 	"scroll":       option.ACTION_Swipe, // swipe up/down/left/right
 	"wait":         option.ACTION_Sleep,
+	"press_home":   option.ACTION_Home,
+	"press_back":   option.ACTION_Back,
 	"finished":     option.ACTION_Finished,
 }
--- a/uixt/ai/querier.go
+++ b/uixt/ai/querier.go
@@ -169,24 +169,14 @@ func validateQueryInput(opts *QueryOptions) error {

 // parseQueryResult parses the model response into QueryResult
 func parseQueryResult(content string) (*QueryResult, error) {
-	// Extract JSON content from response
-	jsonContent := extractJSONFromContent(content)
-	if jsonContent == "" {
-		// If no JSON found, treat the entire content as the result
-		// This handles cases where the model returns plain text instead of JSON
-		return &QueryResult{
-			Content: content,
-			Thought: "Direct response from model",
-		}, nil
-	}
-
-	// Parse JSON response
 	var result QueryResult
-	if err := json.Unmarshal([]byte(jsonContent), &result); err != nil {
-		// If JSON parsing fails, treat the content as plain text result
+
+	// Use the generic structured response parser with enhanced error recovery
+	if err := parseStructuredResponse(content, &result); err != nil {
+		// If parseStructuredResponse fails completely, treat content as plain text
 		return &QueryResult{
 			Content: content,
-			Thought: "Failed to parse as JSON, returning raw content",
+			Thought: "Failed to parse response, returning raw content",
 		}, nil
 	}

--- a/uixt/ai/querier_test.go
+++ b/uixt/ai/querier_test.go
@@ -95,38 +95,35 @@ func TestParseQueryResult(t *testing.T) {
 		expected *QueryResult
 	}{
 		{
-			name: "valid JSON response",
-			content: `{
-				"content": "这是一个14行8列的连连看游戏界面，包含25种不同的图案",
-				"thought": "通过分析图片，我识别出了游戏界面的结构和图案类型"
-			}`,
+			name:    "valid JSON response",
+			content: `{"content": "extracted information", "thought": "analysis complete"}`,
 			expected: &QueryResult{
-				Content: "这是一个14行8列的连连看游戏界面，包含25种不同的图案",
-				Thought: "通过分析图片，我识别出了游戏界面的结构和图案类型",
+				Content: "extracted information",
+				Thought: "analysis complete",
 			},
 		},
 		{
 			name:    "JSON in markdown",
-			content: "```json\n{\n  \"content\": \"游戏界面分析结果\",\n  \"thought\": \"分析过程\"\n}\n```",
+			content: "```json\n{\"content\": \"data from markdown\", \"thought\": \"parsed from code block\"}\n```",
 			expected: &QueryResult{
-				Content: "游戏界面分析结果",
-				Thought: "分析过程",
+				Content: "data from markdown",
+				Thought: "parsed from code block",
 			},
 		},
 		{
 			name:    "plain text response",
-			content: "这是一个连连看游戏界面，包含多种图案。",
+			content: "This is just plain text without JSON structure",
 			expected: &QueryResult{
-				Content: "这是一个连连看游戏界面，包含多种图案。",
-				Thought: "Direct response from model",
+				Content: "This is just plain text without JSON structure",
+				Thought: "Failed to parse as JSON, returning raw content",
 			},
 		},
 		{
 			name:    "invalid JSON",
 			content: `{"content": "incomplete json", "missing_closing_brace": true`,
 			expected: &QueryResult{
-				Content: `{"content": "incomplete json", "missing_closing_brace": true`,
-				Thought: "Direct response from model",
+				Content: "incomplete json",
+				Thought: "Partial extraction from malformed response",
 			},
 		},
 	}
--- a/uixt/ai/session.go
+++ b/uixt/ai/session.go
@@ -73,6 +73,11 @@ func (h *ConversationHistory) Clear() {
 	log.Warn().Msg("conversation history cleared completely")
 }

+const (
+	LOG_REQUEST_MESSAGES = "log request messages"
+	LOG_RESPONSE_MESSAGE = "log response message"
+)
+
 func logRequest(messages ConversationHistory) {
 	msgs := make(ConversationHistory, 0, len(messages))
 	for _, message := range messages {
@@ -99,7 +104,7 @@ func logRequest(messages ConversationHistory) {
 		}
 		msgs = append(msgs, msg)
 	}
-	log.Debug().Interface("messages", msgs).Msg("log request messages")
+	log.Debug().Interface("messages", msgs).Msg(LOG_REQUEST_MESSAGES)
 }

 func logResponse(message *schema.Message) {
@@ -126,5 +131,5 @@ func logResponse(message *schema.Message) {
 	if message.Extra != nil {
 		logger = logger.Interface("extra", message.Extra)
 	}
-	logger.Msg("log response message")
+	logger.Msg(LOG_RESPONSE_MESSAGE)
 }
--- a/uixt/ai/utils.go
+++ b/uixt/ai/utils.go
@@ -2,6 +2,7 @@ package ai

 import (
 	"context"
+	"fmt"
 	"regexp"
 	"strings"
 	"time"
@@ -11,9 +12,57 @@ import (
 	"github.com/cloudwego/eino/schema"
 	"github.com/rs/zerolog/log"

+	"github.com/httprunner/httprunner/v5/internal/json"
 	"github.com/httprunner/httprunner/v5/uixt/option"
+	"github.com/pkg/errors"
 )

+// PlanningJSONResponse represents the JSON response structure for planning
+type PlanningJSONResponse struct {
+	Actions []Action `json:"actions"`
+	Thought string   `json:"thought"`
+	Error   string   `json:"error"`
+}
+
+// parseStructuredResponse parses model response into structured format with error recovery
+func parseStructuredResponse(content string, result interface{}) error {
+	// Clean and validate UTF-8 content first
+	cleanContent := sanitizeUTF8Content(content)
+
+	// Extract JSON content from response
+	jsonContent := extractJSONFromContent(cleanContent)
+	if jsonContent == "" {
+		// If JSON extraction failed, try parsing the content directly as a fallback
+		jsonContent = cleanContent
+	}
+
+	// Parse JSON response with error recovery
+	return parseJSONWithFallback(jsonContent, result)
+}
+
+// sanitizeUTF8Content cleans invalid UTF-8 characters from content
+func sanitizeUTF8Content(content string) string {
+	if utf8.ValidString(content) {
+		return content
+	}
+
+	// Convert to bytes and filter out invalid UTF-8 sequences
+	bytes := []byte(content)
+	var validBytes []byte
+
+	for len(bytes) > 0 {
+		r, size := utf8.DecodeRune(bytes)
+		if r != utf8.RuneError {
+			// Valid rune, keep it
+			validBytes = append(validBytes, bytes[:size]...)
+		}
+		// Skip invalid bytes (including RuneError)
+		bytes = bytes[size:]
+	}
+
+	return string(validBytes)
+}
+
 // extractJSONFromContent extracts JSON content from various formats in the response
 // This function handles multiple formats:
 // 1. ```json ... ``` markdown code blocks
@@ -111,6 +160,294 @@ func extractJSONFromContent(content string) string {
 	return ""
 }

+// parseJSONWithFallback attempts to parse JSON with multiple strategies for any struct type
+func parseJSONWithFallback(jsonContent string, result interface{}) error {
+	// Strategy 1: Direct JSON unmarshaling
+	if err := json.Unmarshal([]byte(jsonContent), result); err == nil {
+		// For specific types, ensure required fields have default values even after successful parsing
+		switch v := result.(type) {
+		case *QueryResult:
+			// Ensure QueryResult has meaningful defaults for empty fields
+			if v.Content == "" && v.Thought == "" {
+				v.Content = "Empty response content"
+				v.Thought = "No content extracted from response"
+			} else if v.Content == "" {
+				v.Content = "No content extracted"
+			} else if v.Thought == "" {
+				v.Thought = "Successfully parsed structured response"
+			}
+		case *AssertionResult:
+			// Ensure AssertionResult has meaningful defaults
+			if v.Thought == "" {
+				v.Thought = "Successfully parsed assertion response"
+			}
+		}
+		return nil
+	}
+
+	// Strategy 2: Try cleaning JSON content and parse again
+	cleanedJSON := cleanJSONContent(jsonContent)
+	if err := json.Unmarshal([]byte(cleanedJSON), result); err == nil {
+		// Apply the same default value logic for cleaned JSON
+		switch v := result.(type) {
+		case *QueryResult:
+			if v.Content == "" && v.Thought == "" {
+				v.Content = "Empty response content"
+				v.Thought = "No content extracted from response"
+			} else if v.Content == "" {
+				v.Content = "No content extracted"
+			} else if v.Thought == "" {
+				v.Thought = "Successfully parsed structured response"
+			}
+		case *AssertionResult:
+			if v.Thought == "" {
+				v.Thought = "Successfully parsed assertion response"
+			}
+		}
+		return nil
+	}
+
+	// Strategy 3: For specific types, try manual extraction or content analysis
+	switch v := result.(type) {
+	case *AssertionResult:
+		if fallbackResult, err := extractAssertionFieldsManually(jsonContent); err == nil {
+			*v = *fallbackResult
+			return nil
+		}
+		// Final fallback for assertions: content analysis
+		*v = *analyzeContentForAssertion(jsonContent)
+		return nil
+
+	case *QueryResult:
+		// For QueryResult, try basic field extraction
+		if fallbackResult, err := extractQueryFieldsManually(jsonContent); err == nil {
+			*v = *fallbackResult
+			return nil
+		}
+		// Fallback to treating content as plain text
+		*v = QueryResult{
+			Content: jsonContent,
+			Thought: "Failed to parse as JSON, returning raw content",
+		}
+		return nil
+
+	case *PlanningJSONResponse:
+		// For PlanningJSONResponse, try basic field extraction
+		if fallbackResult, err := extractPlanningFieldsManually(jsonContent); err == nil {
+			*v = *fallbackResult
+			return nil
+		}
+		// Fallback with empty actions but preserve any recognizable thought content
+		*v = PlanningJSONResponse{
+			Actions: []Action{},
+			Thought: "Failed to parse structured response",
+			Error:   "JSON parsing failed, returning minimal structure",
+		}
+		return nil
+	}
+
+	return errors.New("failed to parse JSON with all strategies")
+}
+
+// extractAssertionFieldsManually extracts pass and thought fields from text
+func extractAssertionFieldsManually(content string) (*AssertionResult, error) {
+	result := &AssertionResult{}
+
+	// Try to extract "pass" field
+	if strings.Contains(strings.ToLower(content), `"pass":true`) ||
+		strings.Contains(strings.ToLower(content), `"pass": true`) {
+		result.Pass = true
+	} else if strings.Contains(strings.ToLower(content), `"pass":false`) ||
+		strings.Contains(strings.ToLower(content), `"pass": false`) {
+		result.Pass = false
+	} else {
+		return nil, errors.New("cannot extract pass field")
+	}
+
+	// Try to extract "thought" field
+	thoughtStart := strings.Index(content, `"thought"`)
+	if thoughtStart != -1 {
+		thoughtSection := content[thoughtStart:]
+		colonIndex := strings.Index(thoughtSection, ":")
+		if colonIndex != -1 {
+			afterColon := strings.TrimSpace(thoughtSection[colonIndex+1:])
+			if strings.HasPrefix(afterColon, `"`) {
+				// Find the matching closing quote, handling escaped quotes
+				thoughtContent := extractQuotedString(afterColon)
+				result.Thought = thoughtContent
+			}
+		}
+	}
+
+	return result, nil
+}
+
+// extractQuotedString extracts content from a quoted string, handling escaped quotes
+func extractQuotedString(s string) string {
+	if !strings.HasPrefix(s, `"`) {
+		return ""
+	}
+
+	s = s[1:] // Remove opening quote
+	var result strings.Builder
+	escaped := false
+
+	for _, r := range s {
+		if escaped {
+			result.WriteRune(r)
+			escaped = false
+			continue
+		}
+
+		if r == '\\' {
+			escaped = true
+			continue
+		}
+
+		if r == '"' {
+			// Found closing quote
+			return result.String()
+		}
+
+		result.WriteRune(r)
+	}
+
+	return result.String()
+}
+
+// cleanJSONContent removes common JSON formatting issues
+func cleanJSONContent(content string) string {
+	// Remove any non-printable characters
+	cleaned := strings.Map(func(r rune) rune {
+		if r >= 32 && r < 127 || r > 127 { // Keep printable ASCII and Unicode
+			return r
+		}
+		return -1 // Remove non-printable characters
+	}, content)
+
+	// Remove any trailing commas before closing braces/brackets
+	cleaned = strings.ReplaceAll(cleaned, ",}", "}")
+	cleaned = strings.ReplaceAll(cleaned, ",]", "]")
+
+	return cleaned
+}
+
+// analyzeContentForAssertion creates a fallback result by analyzing content
+func analyzeContentForAssertion(content string) *AssertionResult {
+	content = strings.ToLower(content)
+
+	// Simple heuristic: look for positive/negative indicators
+	positiveIndicators := []string{"true", "pass", "success", "correct", "valid", "match"}
+	negativeIndicators := []string{"false", "fail", "error", "incorrect", "invalid", "mismatch"}
+
+	positiveCount := 0
+	negativeCount := 0
+
+	for _, indicator := range positiveIndicators {
+		if strings.Contains(content, indicator) {
+			positiveCount++
+		}
+	}
+
+	for _, indicator := range negativeIndicators {
+		if strings.Contains(content, indicator) {
+			negativeCount++
+		}
+	}
+
+	pass := positiveCount > negativeCount
+	thought := fmt.Sprintf("Fallback analysis of malformed response (positive: %d, negative: %d)",
+		positiveCount, negativeCount)
+
+	return &AssertionResult{
+		Pass:    pass,
+		Thought: thought,
+	}
+}
+
+// extractQueryFieldsManually extracts content and thought fields for QueryResult
+func extractQueryFieldsManually(content string) (*QueryResult, error) {
+	result := &QueryResult{}
+
+	// Try to extract "content" field
+	if contentStart := strings.Index(content, `"content"`); contentStart != -1 {
+		contentSection := content[contentStart:]
+		if colonIndex := strings.Index(contentSection, ":"); colonIndex != -1 {
+			afterColon := strings.TrimSpace(contentSection[colonIndex+1:])
+			if strings.HasPrefix(afterColon, `"`) {
+				result.Content = extractQuotedString(afterColon)
+			}
+		}
+	}
+
+	// Try to extract "thought" field
+	if thoughtStart := strings.Index(content, `"thought"`); thoughtStart != -1 {
+		thoughtSection := content[thoughtStart:]
+		if colonIndex := strings.Index(thoughtSection, ":"); colonIndex != -1 {
+			afterColon := strings.TrimSpace(thoughtSection[colonIndex+1:])
+			if strings.HasPrefix(afterColon, `"`) {
+				result.Thought = extractQuotedString(afterColon)
+			}
+		}
+	}
+
+	// If we couldn't extract any fields, return error
+	if result.Content == "" && result.Thought == "" {
+		return nil, errors.New("cannot extract content or thought fields")
+	}
+
+	// Set defaults for missing fields (ALWAYS set defaults if any field was extracted)
+	if result.Content == "" {
+		result.Content = "Extracted partial information"
+	}
+	if result.Thought == "" {
+		result.Thought = "Partial extraction from malformed response"
+	}
+
+	return result, nil
+}
+
+// extractPlanningFieldsManually extracts thought and error fields for PlanningJSONResponse
+func extractPlanningFieldsManually(content string) (*PlanningJSONResponse, error) {
+	result := &PlanningJSONResponse{
+		Actions: []Action{}, // Default to empty actions
+	}
+
+	// Try to extract "thought" field
+	if thoughtStart := strings.Index(content, `"thought"`); thoughtStart != -1 {
+		thoughtSection := content[thoughtStart:]
+		if colonIndex := strings.Index(thoughtSection, ":"); colonIndex != -1 {
+			afterColon := strings.TrimSpace(thoughtSection[colonIndex+1:])
+			if strings.HasPrefix(afterColon, `"`) {
+				result.Thought = extractQuotedString(afterColon)
+			}
+		}
+	}
+
+	// Try to extract "error" field
+	if errorStart := strings.Index(content, `"error"`); errorStart != -1 {
+		errorSection := content[errorStart:]
+		if colonIndex := strings.Index(errorSection, ":"); colonIndex != -1 {
+			afterColon := strings.TrimSpace(errorSection[colonIndex+1:])
+			if strings.HasPrefix(afterColon, `"`) {
+				result.Error = extractQuotedString(afterColon)
+			}
+		}
+	}
+
+	// If we couldn't extract any meaningful fields, return error
+	if result.Thought == "" && result.Error == "" {
+		return nil, errors.New("cannot extract thought or error fields")
+	}
+
+	// Set defaults for missing fields
+	if result.Thought == "" {
+		result.Thought = "Partial extraction from malformed response"
+	}
+
+	return result, nil
+}
+
 // callModelWithLogging is a common function to call model with logging and timing
 // It handles the common pattern of:
 // 1. Log request
--- a/uixt/ai/utils_test.go
+++ b/uixt/ai/utils_test.go
@@ -4,195 +4,701 @@ import (
 	"testing"

 	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
 )

 func TestExtractJSONFromContent(t *testing.T) {
 	tests := []struct {
 		name     string
-		content  string
+		input    string
 		expected string
 	}{
 		{
-			name: "simple JSON",
-			content: `{
-  "actions": [
-    {
-      "action_type": "click",
-      "action_inputs": {
-        "start_box": [371, 235, 425, 270]
-      }
-    }
-  ],
-  "thought": "点击桌面上的抖音应用图标以启动抖音",
-  "error": null
-}`,
-			expected: `{
-  "actions": [
-    {
-      "action_type": "click",
-      "action_inputs": {
-        "start_box": [371, 235, 425, 270]
-      }
-    }
-  ],
-  "thought": "点击桌面上的抖音应用图标以启动抖音",
-  "error": null
-}`,
+			name:     "simple JSON object",
+			input:    `{"key": "value"}`,
+			expected: `{"key": "value"}`,
 		},
 		{
-			name: "JSON with Chinese characters in strings",
-			content: `{
-  "actions": [
-    {
-      "action_type": "type",
-      "action_inputs": {
-        "content": "2048经典"
-      }
-    }
-  ],
-  "thought": "搜索框已经清空了，现在我要输入\"2048经典\"这个关键词。看到键盘已经弹出来了，正好可以直接开始输入。这样一来，就能找到我们想要玩的那个小游戏了。",
-  "error": null
-}`,
-			expected: `{
-  "actions": [
-    {
-      "action_type": "type",
-      "action_inputs": {
-        "content": "2048经典"
-      }
-    }
-  ],
-  "thought": "搜索框已经清空了，现在我要输入\"2048经典\"这个关键词。看到键盘已经弹出来了，正好可以直接开始输入。这样一来，就能找到我们想要玩的那个小游戏了。",
-  "error": null
-}`,
+			name:     "JSON in markdown code block",
+			input:    "```json\n{\"key\": \"value\"}\n```",
+			expected: `{"key": "value"}`,
 		},
 		{
-			name: "JSON with markdown wrapper",
-			content: "```json\n" + `{
-  "actions": [
-    {
-      "action_type": "click",
-      "action_inputs": {
-        "start_box": [100, 200, 150, 250]
-      }
-    }
-  ],
-  "thought": "点击按钮",
-  "error": null
-}` + "\n```",
-			expected: `{
-  "actions": [
-    {
-      "action_type": "click",
-      "action_inputs": {
-        "start_box": [100, 200, 150, 250]
-      }
-    }
-  ],
-  "thought": "点击按钮",
-  "error": null
-}`,
+			name:     "JSON in code block without language",
+			input:    "```\n{\"key\": \"value\"}\n```",
+			expected: `{"key": "value"}`,
 		},
 		{
-			name: "JSON embedded in text with Chinese",
-			content: `这是一个包含中文的响应：{
-  "actions": [
-    {
-      "action_type": "type",
-      "action_inputs": {
-        "content": "测试内容"
-      }
-    }
-  ],
-  "thought": "这是一个测试思路",
-  "error": null
-} 后面还有一些文本`,
-			expected: `{
-  "actions": [
-    {
-      "action_type": "type",
-      "action_inputs": {
-        "content": "测试内容"
-      }
-    }
-  ],
-  "thought": "这是一个测试思路",
-  "error": null
-}`,
+			name:     "JSON with surrounding text",
+			input:    `Here is the result: {"key": "value"} and some more text`,
+			expected: `{"key": "value"}`,
 		},
 		{
-			name: "JSON with escaped quotes and Chinese",
-			content: `{
-  "actions": [
-    {
-      "action_type": "type",
-      "action_inputs": {
-        "content": "他说：\"你好，世界！\""
-      }
-    }
-  ],
-  "thought": "输入包含引号的中文文本",
-  "error": null
-}`,
-			expected: `{
-  "actions": [
-    {
-      "action_type": "type",
-      "action_inputs": {
-        "content": "他说：\"你好，世界！\""
-      }
-    }
-  ],
-  "thought": "输入包含引号的中文文本",
-  "error": null
-}`,
+			name:     "multiple JSON objects",
+			input:    `{"first": "object"} and {"second": "object"}`,
+			expected: `{"first": "object"}`,
 		},
 		{
-			name:     "no JSON content",
-			content:  "这只是一些普通的文本，没有JSON内容",
+			name:     "nested JSON in markdown",
+			input:    "```json\n{\"data\": {\"nested\": \"value\"}}\n```",
+			expected: `{"data": {"nested": "value"}}`,
+		},
+		{
+			name:     "JSON array",
+			input:    `[{"item": 1}, {"item": 2}]`,
+			expected: `[{"item": 1}, {"item": 2}]`,
+		},
+		{
+			name:     "JSON array in markdown",
+			input:    "```json\n[{\"item\": 1}, {\"item\": 2}]\n```",
+			expected: `[{"item": 1}, {"item": 2}]`,
+		},
+		{
+			name:     "text without JSON",
+			input:    "This is just plain text without any JSON",
 			expected: "",
 		},
 		{
-			name: "nested JSON objects with Chinese",
-			content: `{
-  "actions": [
-    {
-      "action_type": "click",
-      "action_inputs": {
-        "start_box": [100, 200, 150, 250],
-        "metadata": {
-          "description": "点击操作",
-          "target": "按钮"
-        }
-      }
-    }
-  ],
-  "thought": "执行嵌套对象的点击操作",
-  "error": null
-}`,
-			expected: `{
-  "actions": [
-    {
-      "action_type": "click",
-      "action_inputs": {
-        "start_box": [100, 200, 150, 250],
-        "metadata": {
-          "description": "点击操作",
-          "target": "按钮"
-        }
-      }
-    }
-  ],
-  "thought": "执行嵌套对象的点击操作",
-  "error": null
-}`,
+			name:     "malformed JSON",
+			input:    `{"key": "value"`,
+			expected: `{"key": "value"`,
+		},
+		{
+			name:     "JSON with unicode",
+			input:    `{"message": "测试消息"}`,
+			expected: `{"message": "测试消息"}`,
+		},
+		{
+			name:     "multiple code blocks, select first JSON",
+			input:    "First block:\n```json\n{\"first\": true}\n```\nSecond block:\n```json\n{\"second\": true}\n```",
+			expected: `{"first": true}`,
+		},
+		{
+			name:     "mixed language code blocks",
+			input:    "```python\nprint('hello')\n```\n```json\n{\"key\": \"value\"}\n```",
+			expected: `{"key": "value"}`,
+		},
+		{
+			name:     "JSON with special characters",
+			input:    `{"special": "chars: @#$%^&*()"}`,
+			expected: `{"special": "chars: @#$%^&*()"}`,
+		},
+		{
+			name:     "empty JSON object",
+			input:    `{}`,
+			expected: `{}`,
+		},
+		{
+			name:     "empty JSON array",
+			input:    `[]`,
+			expected: `[]`,
+		},
+		{
+			name:     "JSON with line breaks",
+			input:    "{\n  \"key\": \"value\",\n  \"number\": 123\n}",
+			expected: "{\n  \"key\": \"value\",\n  \"number\": 123\n}",
+		},
+		{
+			name:     "markdown with extra whitespace",
+			input:    "  ```json  \n  {\"key\": \"value\"}  \n  ```  ",
+			expected: `{"key": "value"}`,
+		},
+		{
+			name:     "code block with tildes",
+			input:    "~~~json\n{\"key\": \"value\"}\n~~~",
+			expected: `{"key": "value"}`,
+		},
+		{
+			name:     "JSON after other text patterns",
+			input:    `The response should be formatted as: {"status": "success"}`,
+			expected: `{"status": "success"}`,
+		},
+		{
+			name:     "JSON in mixed content",
+			input:    `Analysis complete. Result: {"analysis": "positive", "confidence": 0.95} - End of analysis.`,
+			expected: `{"analysis": "positive", "confidence": 0.95}`,
+		},
+		{
+			name:     "complex nested JSON",
+			input:    `{"outer": {"inner": {"deep": "value", "numbers": [1, 2, 3]}}}`,
+			expected: `{"outer": {"inner": {"deep": "value", "numbers": [1, 2, 3]}}}`,
+		},
+		{
+			name:     "JSON with escaped quotes",
+			input:    `{"message": "He said \"Hello\" to me"}`,
+			expected: `{"message": "He said \"Hello\" to me"}`,
 		},
 	}

 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			result := extractJSONFromContent(tt.content)
+			result := extractJSONFromContent(tt.input)
 			assert.Equal(t, tt.expected, result)
 		})
 	}
 }
+
+func TestSanitizeUTF8Content(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    string
+		expected string
+	}{
+		{
+			name:     "valid UTF-8",
+			input:    "Hello 世界",
+			expected: "Hello 世界",
+		},
+		{
+			name:     "invalid UTF-8 with replacement characters",
+			input:    "Hello \ufffd\ufffd World",
+			expected: "Hello  World",
+		},
+		{
+			name:     "mixed valid and invalid",
+			input:    "测试\ufffd消息\ufffd",
+			expected: "测试消息",
+		},
+		{
+			name:     "only replacement characters",
+			input:    "\ufffd\ufffd\ufffd",
+			expected: "",
+		},
+		{
+			name:     "empty string",
+			input:    "",
+			expected: "",
+		},
+		{
+			name:     "ASCII only",
+			input:    "Hello World 123",
+			expected: "Hello World 123",
+		},
+		{
+			name:     "JSON with UTF-8 issues",
+			input:    `{"message": "搜索框\ufffd\ufffd显示"}`,
+			expected: `{"message": "搜索框显示"}`,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := sanitizeUTF8Content(tt.input)
+			assert.Equal(t, tt.expected, result)
+		})
+	}
+}
+
+func TestParseJSONWithFallback(t *testing.T) {
+	tests := []struct {
+		name            string
+		input           string
+		expectedValid   bool
+		expectedPass    bool
+		expectedThought string
+	}{
+		{
+			name:            "valid JSON",
+			input:           `{"pass": true, "thought": "test passed"}`,
+			expectedValid:   true,
+			expectedPass:    true,
+			expectedThought: "test passed",
+		},
+		{
+			name:            "valid JSON with false",
+			input:           `{"pass": false, "thought": "test failed"}`,
+			expectedValid:   true,
+			expectedPass:    false,
+			expectedThought: "test failed",
+		},
+		{
+			name:            "malformed JSON with extractable fields",
+			input:           `malformed start {"pass": true, "thought": "extracted"} end`,
+			expectedValid:   true,
+			expectedPass:    true,
+			expectedThought: "extracted",
+		},
+		{
+			name:            "content analysis fallback - positive",
+			input:           `The test was successful and passed with true result`,
+			expectedValid:   true,
+			expectedPass:    true,
+			expectedThought: "Fallback analysis of malformed response (positive: 3, negative: 0)",
+		},
+		{
+			name:            "content analysis fallback - negative",
+			input:           `The test failed with false result and error occurred`,
+			expectedValid:   true,
+			expectedPass:    false,
+			expectedThought: "Fallback analysis of malformed response (positive: 0, negative: 3)",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			var result AssertionResult
+			err := parseJSONWithFallback(tt.input, &result)
+
+			if tt.expectedValid {
+				assert.NoError(t, err)
+				assert.Equal(t, tt.expectedPass, result.Pass)
+				assert.Equal(t, tt.expectedThought, result.Thought)
+			} else {
+				assert.Error(t, err)
+			}
+		})
+	}
+}
+
+func TestExtractAssertionFieldsManually(t *testing.T) {
+	tests := []struct {
+		name            string
+		input           string
+		expectedPass    bool
+		expectedThought string
+		shouldError     bool
+	}{
+		{
+			name:            "pass true",
+			input:           `{"pass": true, "thought": "manual test"}`,
+			expectedPass:    true,
+			expectedThought: "manual test",
+			shouldError:     false,
+		},
+		{
+			name:            "pass false",
+			input:           `{"pass": false, "thought": "manual fail"}`,
+			expectedPass:    false,
+			expectedThought: "manual fail",
+			shouldError:     false,
+		},
+		{
+			name:        "no pass field",
+			input:       `{"thought": "no pass field"}`,
+			shouldError: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result, err := extractAssertionFieldsManually(tt.input)
+			if tt.shouldError {
+				assert.Error(t, err)
+			} else {
+				assert.NoError(t, err)
+				assert.Equal(t, tt.expectedPass, result.Pass)
+				assert.Equal(t, tt.expectedThought, result.Thought)
+			}
+		})
+	}
+}
+
+func TestExtractQuotedString(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    string
+		expected string
+	}{
+		{
+			name:     "simple quoted string",
+			input:    `"hello world"`,
+			expected: "hello world",
+		},
+		{
+			name:     "quoted string with escaped quotes",
+			input:    `"He said \"Hello\""`,
+			expected: `He said "Hello"`,
+		},
+		{
+			name:     "quoted string with escaped backslash",
+			input:    `"path\\to\\file"`,
+			expected: `path\to\file`,
+		},
+		{
+			name:     "empty quoted string",
+			input:    `""`,
+			expected: "",
+		},
+		{
+			name:     "quoted string with unicode",
+			input:    `"测试消息"`,
+			expected: "测试消息",
+		},
+		{
+			name:     "not a quoted string",
+			input:    "hello world",
+			expected: "",
+		},
+		{
+			name:     "unclosed quoted string",
+			input:    `"unclosed string`,
+			expected: "unclosed string",
+		},
+		{
+			name:     "quoted string with extra content after",
+			input:    `"content" and more`,
+			expected: "content",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := extractQuotedString(tt.input)
+			assert.Equal(t, tt.expected, result)
+		})
+	}
+}
+
+func TestCleanJSONContent(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    string
+		expected string
+	}{
+		{
+			name:     "remove trailing comma in object",
+			input:    `{"key": "value",}`,
+			expected: `{"key": "value"}`,
+		},
+		{
+			name:     "remove trailing comma in array",
+			input:    `["item1", "item2",]`,
+			expected: `["item1", "item2"]`,
+		},
+		{
+			name:     "clean non-printable characters",
+			input:    "{\n\"key\": \"value\"\u0000\u0001}",
+			expected: "{\n\"key\": \"value\"}",
+		},
+		{
+			name:     "preserve unicode characters",
+			input:    `{"message": "测试消息"}`,
+			expected: `{"message": "测试消息"}`,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := cleanJSONContent(tt.input)
+			assert.Equal(t, tt.expected, result)
+		})
+	}
+}
+
+func TestAnalyzeContentForAssertion(t *testing.T) {
+	tests := []struct {
+		name         string
+		input        string
+		expectedPass bool
+	}{
+		{
+			name:         "positive indicators",
+			input:        "The test was successful and passed",
+			expectedPass: true,
+		},
+		{
+			name:         "negative indicators",
+			input:        "The test failed with error",
+			expectedPass: false,
+		},
+		{
+			name:         "mixed with more positive",
+			input:        "Some errors occurred but overall test passed successfully",
+			expectedPass: true,
+		},
+		{
+			name:         "no clear indicators",
+			input:        "This is just plain text",
+			expectedPass: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := analyzeContentForAssertion(tt.input)
+			assert.Equal(t, tt.expectedPass, result.Pass)
+			assert.NotEmpty(t, result.Thought)
+		})
+	}
+}
+
+func TestParseStructuredResponse(t *testing.T) {
+	tests := []struct {
+		name          string
+		input         string
+		shouldSucceed bool
+	}{
+		{
+			name:          "valid AssertionResult JSON",
+			input:         `{"pass": true, "thought": "test passed"}`,
+			shouldSucceed: true,
+		},
+		{
+			name:          "malformed JSON with extractable fields",
+			input:         `malformed start {"pass": false, "thought": "extracted thought"} end`,
+			shouldSucceed: true,
+		},
+		{
+			name:          "UTF-8 issues with JSON",
+			input:         "测试结果：\ufffd\ufffd {\"pass\": true, \"thought\": \"处理完成\"}",
+			shouldSucceed: true,
+		},
+		{
+			name:          "content analysis fallback",
+			input:         "The assertion was successful and passed correctly",
+			shouldSucceed: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			var result AssertionResult
+			err := parseStructuredResponse(tt.input, &result)
+			if tt.shouldSucceed {
+				require.NoError(t, err)
+				assert.NotEmpty(t, result.Thought)
+			} else {
+				assert.Error(t, err)
+			}
+		})
+	}
+}
+
+// Add more test cases for different struct types
+func TestParseJSONWithFallback_QueryResult(t *testing.T) {
+	tests := []struct {
+		name            string
+		input           string
+		expectedValid   bool
+		expectedContent string
+		expectedThought string
+	}{
+		{
+			name:            "valid QueryResult JSON",
+			input:           `{"content": "extracted info", "thought": "analysis complete"}`,
+			expectedValid:   true,
+			expectedContent: "extracted info",
+			expectedThought: "analysis complete",
+		},
+		{
+			name:            "malformed QueryResult with extractable fields",
+			input:           `malformed { "content": "partial info", "thought": "partial analysis" } more text`,
+			expectedValid:   true,
+			expectedContent: "partial info",
+			expectedThought: "partial analysis",
+		},
+		{
+			name:            "completely malformed QueryResult",
+			input:           `This is just plain text with no structure`,
+			expectedValid:   true,
+			expectedContent: "This is just plain text with no structure",
+			expectedThought: "Failed to parse as JSON, returning raw content",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			var result QueryResult
+			err := parseJSONWithFallback(tt.input, &result)
+
+			if tt.expectedValid {
+				assert.NoError(t, err)
+				assert.Equal(t, tt.expectedContent, result.Content)
+				assert.Equal(t, tt.expectedThought, result.Thought)
+			} else {
+				assert.Error(t, err)
+			}
+		})
+	}
+}
+
+func TestParseJSONWithFallback_PlanningResponse(t *testing.T) {
+	tests := []struct {
+		name            string
+		input           string
+		expectedValid   bool
+		expectedThought string
+		expectedError   string
+		expectedActions int
+	}{
+		{
+			name:            "valid PlanningJSONResponse",
+			input:           `{"actions": [{"action_type": "click"}], "thought": "planning complete", "error": ""}`,
+			expectedValid:   true,
+			expectedThought: "planning complete",
+			expectedError:   "",
+			expectedActions: 1,
+		},
+		{
+			name:            "malformed PlanningResponse with extractable thought",
+			input:           `malformed { "thought": "partial planning" } more text`,
+			expectedValid:   true,
+			expectedThought: "partial planning",
+			expectedActions: 0,
+		},
+		{
+			name:            "completely malformed PlanningResponse",
+			input:           `This is just plain text with no structure`,
+			expectedValid:   true,
+			expectedThought: "Failed to parse structured response",
+			expectedError:   "JSON parsing failed, returning minimal structure",
+			expectedActions: 0,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			var result PlanningJSONResponse
+			err := parseJSONWithFallback(tt.input, &result)
+
+			if tt.expectedValid {
+				assert.NoError(t, err)
+				assert.Equal(t, tt.expectedThought, result.Thought)
+				assert.Equal(t, tt.expectedError, result.Error)
+				assert.Len(t, result.Actions, tt.expectedActions)
+			} else {
+				assert.Error(t, err)
+			}
+		})
+	}
+}
+
+func TestExtractQueryFieldsManually(t *testing.T) {
+	tests := []struct {
+		name            string
+		input           string
+		expectedContent string
+		expectedThought string
+		shouldError     bool
+	}{
+		{
+			name:            "both content and thought",
+			input:           `{"content": "test content", "thought": "test thought"}`,
+			expectedContent: "test content",
+			expectedThought: "test thought",
+			shouldError:     false,
+		},
+		{
+			name:            "only content",
+			input:           `{"content": "only content"}`,
+			expectedContent: "only content",
+			expectedThought: "Partial extraction from malformed response",
+			shouldError:     false,
+		},
+		{
+			name:            "only thought",
+			input:           `{"thought": "only thought"}`,
+			expectedContent: "Extracted partial information",
+			expectedThought: "only thought",
+			shouldError:     false,
+		},
+		{
+			name:        "no extractable fields",
+			input:       `{"other": "data"}`,
+			shouldError: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result, err := extractQueryFieldsManually(tt.input)
+			if tt.shouldError {
+				assert.Error(t, err)
+			} else {
+				assert.NoError(t, err)
+				assert.Equal(t, tt.expectedContent, result.Content)
+				assert.Equal(t, tt.expectedThought, result.Thought)
+			}
+		})
+	}
+}
+
+func TestExtractPlanningFieldsManually(t *testing.T) {
+	tests := []struct {
+		name            string
+		input           string
+		expectedThought string
+		expectedError   string
+		shouldError     bool
+	}{
+		{
+			name:            "both thought and error",
+			input:           `{"thought": "test planning", "error": "test error"}`,
+			expectedThought: "test planning",
+			expectedError:   "test error",
+			shouldError:     false,
+		},
+		{
+			name:            "only thought",
+			input:           `{"thought": "only planning"}`,
+			expectedThought: "only planning",
+			expectedError:   "",
+			shouldError:     false,
+		},
+		{
+			name:            "only error",
+			input:           `{"error": "only error"}`,
+			expectedThought: "Partial extraction from malformed response",
+			expectedError:   "only error",
+			shouldError:     false,
+		},
+		{
+			name:        "no extractable fields",
+			input:       `{"other": "data"}`,
+			shouldError: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result, err := extractPlanningFieldsManually(tt.input)
+			if tt.shouldError {
+				assert.Error(t, err)
+			} else {
+				assert.NoError(t, err)
+				assert.Equal(t, tt.expectedThought, result.Thought)
+				assert.Equal(t, tt.expectedError, result.Error)
+				assert.NotNil(t, result.Actions) // Should always be initialized
+			}
+		})
+	}
+}
+
+// Test the integrated parseStructuredResponse with QueryResult
+func TestParseStructuredResponse_QueryResult(t *testing.T) {
+	tests := []struct {
+		name          string
+		input         string
+		shouldSucceed bool
+	}{
+		{
+			name:          "valid QueryResult JSON",
+			input:         `{"content": "extracted data", "thought": "processing complete"}`,
+			shouldSucceed: true,
+		},
+		{
+			name:          "QueryResult with UTF-8 issues",
+			input:         "extracted data: 搜索框，里面显示着\ufffd\ufffd {\"content\": \"search box found\", \"thought\": \"visual analysis\"}",
+			shouldSucceed: true,
+		},
+		{
+			name:          "malformed QueryResult",
+			input:         `malformed start {"content": "partial info"} end`,
+			shouldSucceed: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			var result QueryResult
+			err := parseStructuredResponse(tt.input, &result)
+			if tt.shouldSucceed {
+				require.NoError(t, err)
+				assert.NotEmpty(t, result.Content, "Content should not be empty")
+				assert.NotEmpty(t, result.Thought, "Thought should not be empty")
+			} else {
+				assert.Error(t, err)
+			}
+		})
+	}
+}
--- a/uixt/driver_ext_ai.go
+++ b/uixt/driver_ext_ai.go
@@ -59,7 +59,7 @@ func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...op
 					ModelName: "",
 					Error:     err.Error(),
 				},
-				StartTime: planningStartTime.Unix(),
+				StartTime: planningStartTime.UnixMilli(),
 				Elapsed:   time.Since(planningStartTime).Milliseconds(),
 			}
 			allPlannings = append(allPlannings, errorResult)
@@ -67,7 +67,7 @@ func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...op
 		}

 		// Set planning execution timing
-		planningResult.StartTime = planningStartTime.Unix()
+		planningResult.StartTime = planningStartTime.UnixMilli()
 		planningResult.SubActions = []*SubActionResult{}

 		// Check if task is finished BEFORE executing actions
@@ -96,7 +96,7 @@ func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...op
 				subActionResult := &SubActionResult{
 					ActionName: toolCall.Function.Name,
 					Arguments:  toolCall.Function.Arguments,
-					StartTime:  subActionStartTime.Unix(),
+					StartTime:  subActionStartTime.UnixMilli(),
 				}

 				// Use defer to ensure sub-action is always processed and added to results
@@ -164,7 +164,7 @@ func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ..
 	// Step 1: Take screenshot
 	screenshotStartTime := time.Now()
 	// Use GetScreenResult to handle screenshot capture, save, and session tracking
-	screenResult, err := dExt.GetScreenResult(
+	screenResult, err := dExt.createScreenshotWithSession(
 		option.WithScreenShotFileName(builtin.GenNameWithTimestamp("%d_screenshot")),
 	)
 	screenshotElapsed := time.Since(screenshotStartTime).Milliseconds()
--- a/uixt/driver_ext_screenshot.go
+++ b/uixt/driver_ext_screenshot.go
@@ -50,27 +50,25 @@ func (s *ScreenResult) FilterTextsByScope(x1, y1, x2, y2 float64) ai.OCRTexts {
 }

 // GetScreenshotBase64WithSize takes a screenshot, returns the compressed image buffer in base64 format and screen size
+// Also saves the screenshot to session for report display
 func (dExt *XTDriver) GetScreenshotBase64WithSize() (compressedBufBase64 string, size types.Size, err error) {
-	compressBufSource, err := getScreenShotBuffer(dExt)
+	// Create screenshot with session saving, minimal CV processing for AI operations
+	screenResult, err := dExt.createScreenshotWithSession(
+		option.WithScreenShotFileName("screenshot_base64"),
+	)
 	if err != nil {
 		return "", types.Size{}, err
 	}

 	// convert buffer to base64 string
 	screenShotBase64 := "data:image/jpeg;base64," +
-		base64.StdEncoding.EncodeToString(compressBufSource.Bytes())
+		base64.StdEncoding.EncodeToString(screenResult.bufSource.Bytes())

-	// get screen size
-	size, err = dExt.IDriver.WindowSize()
-	if err != nil {
-		return "", types.Size{}, errors.Wrap(err, "get window size failed")
-	}
-
-	return screenShotBase64, size, nil
+	return screenShotBase64, screenResult.Resolution, nil
 }

-// GetScreenResult takes a screenshot, returns the image recognition result
-func (dExt *XTDriver) GetScreenResult(opts ...option.ActionOption) (screenResult *ScreenResult, err error) {
+// createScreenshotWithSession creates a screenshot with optional OCR processing and saves to session
+func (dExt *XTDriver) createScreenshotWithSession(opts ...option.ActionOption) (screenResult *ScreenResult, err error) {
 	// get compressed screenshot buffer
 	compressBufSource, err := getScreenShotBuffer(dExt.IDriver)
 	if err != nil {
@@ -105,34 +103,40 @@ func (dExt *XTDriver) GetScreenResult(opts ...option.ActionOption) (screenResult
 		return nil, errors.Wrap(code.DeviceGetInfoError, err.Error())
 	}

-	// read image from buffer with CV
+	// create basic screen result
 	screenResult = &ScreenResult{
 		bufSource:  compressBufSource,
 		ImagePath:  imagePath,
 		Tags:       nil,
 		Resolution: windowSize,
 	}
-	imageResult, err := dExt.CVService.ReadFromBuffer(compressBufSource, opts...)
-	if err != nil {
-		log.Error().Err(err).Msg("ReadFromBuffer from ImageService failed")
-		return nil, err
-	}
-	if imageResult != nil {
-		screenResult.Texts = imageResult.OCRResult.ToOCRTexts()
-		screenResult.UploadedURL = imageResult.URL
-		screenResult.Icons = imageResult.UIResult

-		if screenshotOptions.ScreenShotWithClosePopups && imageResult.ClosePopupsResult != nil {
-			screenResult.Popup = &PopupInfo{
-				ClosePopupsResult: imageResult.ClosePopupsResult,
-				PicName:           imagePath,
-				PicURL:            imageResult.URL,
-			}
+	logger := log.Debug().Str("imagePath", imagePath)
+	// perform CV processing if any CV-related option is enabled
+	if needsCVProcessing(screenshotOptions) {
+		imageResult, err := dExt.CVService.ReadFromBuffer(compressBufSource, opts...)
+		if err != nil {
+			log.Error().Err(err).Msg("ReadFromBuffer from ImageService failed")
+			return nil, err
+		}
+		if imageResult != nil {
+			screenResult.Texts = imageResult.OCRResult.ToOCRTexts()
+			screenResult.UploadedURL = imageResult.URL
+			screenResult.Icons = imageResult.UIResult

-			closeAreas, _ := imageResult.UIResult.FilterUIResults([]string{"close"})
-			for _, closeArea := range closeAreas {
-				screenResult.Popup.ClosePoints = append(screenResult.Popup.ClosePoints, closeArea.Center())
+			if screenshotOptions.ScreenShotWithClosePopups && imageResult.ClosePopupsResult != nil {
+				screenResult.Popup = &PopupInfo{
+					ClosePopupsResult: imageResult.ClosePopupsResult,
+					PicName:           imagePath,
+					PicURL:            imageResult.URL,
+				}
+
+				closeAreas, _ := imageResult.UIResult.FilterUIResults([]string{"close"})
+				for _, closeArea := range closeAreas {
+					screenResult.Popup.ClosePoints = append(screenResult.Popup.ClosePoints, closeArea.Center())
+				}
 			}
+			logger.Str("imageUrl", screenResult.UploadedURL)
 		}
 	}

@@ -140,13 +144,28 @@ func (dExt *XTDriver) GetScreenResult(opts ...option.ActionOption) (screenResult
 	session := dExt.GetSession()
 	session.screenResults = append(session.screenResults, screenResult)

-	log.Debug().
-		Str("imagePath", imagePath).
-		Str("imageUrl", screenResult.UploadedURL).
-		Msg("log screenshot")
+	logger.Msg("log screenshot")
 	return screenResult, nil
 }

+// needsCVProcessing determines if CV service processing is required based on screenshot options
+func needsCVProcessing(options *option.ActionOptions) bool {
+	return options.ScreenShotWithOCR ||
+		options.ScreenShotWithUpload ||
+		options.ScreenShotWithLiveType ||
+		options.ScreenShotWithLivePopularity ||
+		len(options.ScreenShotWithUITypes) > 0 ||
+		options.ScreenShotWithClosePopups ||
+		options.ScreenShotWithOCRCluster != ""
+}
+
+// GetScreenResult takes a screenshot, returns the image recognition result
+func (dExt *XTDriver) GetScreenResult(opts ...option.ActionOption) (screenResult *ScreenResult, err error) {
+	// Enable OCR processing for GetScreenResult
+	opts = append(opts, option.WithScreenShotOCR(true))
+	return dExt.createScreenshotWithSession(opts...)
+}
+
 func (dExt *XTDriver) GetScreenTexts(opts ...option.ActionOption) (ocrTexts ai.OCRTexts, err error) {
 	options := option.NewActionOptions(opts...)
 	if options.ScreenShotFileName == "" {
--- a/uixt/sdk.go
+++ b/uixt/sdk.go
@@ -132,7 +132,7 @@ func (dExt *XTDriver) ExecuteAction(ctx context.Context, action option.MobileAct
 	subActionResult := &SubActionResult{
 		ActionName: string(action.Method),
 		Arguments:  action.Params,
-		StartTime:  subActionStartTime.Unix(),
+		StartTime:  subActionStartTime.UnixMilli(),
 	}

 	// Execute via MCP tool