feat: implement structured response parsing with enhanced error recovery and UTF-8 sanitization

2026-06-27 02:21:23 +08:00 · 2025-06-18 16:59:35 +08:00
parent 6965cf9fe9
commit 1f3366453e
8 changed files with 1076 additions and 217 deletions
--- a/internal/version/VERSION
+++ b/internal/version/VERSION
@@ -1 +1 @@
-v5.0.0-beta-2506181613
+v5.0.0-beta-2506181659
--- a/uixt/ai/asserter.go
+++ b/uixt/ai/asserter.go
@@ -10,10 +10,10 @@ import (
 	"github.com/cloudwego/eino/schema"
 	"github.com/getkin/kin-openapi/openapi3gen"
 	"github.com/httprunner/httprunner/v5/code"
-	"github.com/httprunner/httprunner/v5/internal/json"
 	"github.com/httprunner/httprunner/v5/uixt/option"
 	"github.com/httprunner/httprunner/v5/uixt/types"
 	"github.com/pkg/errors"
+	"github.com/rs/zerolog/log"
 )

 // IAsserter interface defines the contract for assertion operations
@@ -160,15 +160,13 @@ func validateAssertionInput(opts *AssertOptions) error {

 // parseAssertionResult parses the model response into AssertionResponse
 func parseAssertionResult(content string) (*AssertionResult, error) {
-	// Extract JSON content from response
-	jsonContent := extractJSONFromContent(content)
-	if jsonContent == "" {
-		return nil, errors.New("could not extract JSON from response")
-	}
-
-	// Parse JSON response
 	var result AssertionResult
-	if err := json.Unmarshal([]byte(jsonContent), &result); err != nil {
+
+	// Use the generic structured response parser
+	if err := parseStructuredResponse(content, &result); err != nil {
+		log.Warn().
+			Interface("original_content", content).
+			Msg("parse assertion result failed")
 		return nil, errors.Wrap(code.LLMParseAssertionResponseError, err.Error())
 	}

--- a/uixt/ai/asserter_test.go
+++ b/uixt/ai/asserter_test.go
@@ -104,3 +104,46 @@ func TestInvalidParameters(t *testing.T) {
 		})
 	}
 }
+
+// Test the main parseAssertionResult function with problematic input
+func TestParseAssertionResult(t *testing.T) {
+	tests := []struct {
+		name          string
+		input         string
+		shouldSucceed bool
+	}{
+		{
+			name:          "valid JSON response",
+			input:         `{"pass": true, "thought": "Assertion passed"}`,
+			shouldSucceed: true,
+		},
+		{
+			name:          "response with UTF-8 replacement characters",
+			input:         "浅蓝色的搜索框，里面显示着输入的\"ma\"，而\ufffd\ufffd且在搜索框的右上角有一个喇叭 {\"pass\": true, \"thought\": \"found search box\"}",
+			shouldSucceed: true,
+		},
+		{
+			name:          "malformed JSON with extraction",
+			input:         `malformed start {"pass": true, "thought": "extracted successfully"} malformed end`,
+			shouldSucceed: true,
+		},
+		{
+			name:          "completely malformed but analyzable",
+			input:         "This assertion test passed and was successful",
+			shouldSucceed: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result, err := parseAssertionResult(tt.input)
+			if tt.shouldSucceed {
+				require.NoError(t, err)
+				assert.NotNil(t, result)
+				assert.NotEmpty(t, result.Thought)
+			} else {
+				assert.Error(t, err)
+			}
+		})
+	}
+}
--- a/uixt/ai/parser_default.go
+++ b/uixt/ai/parser_default.go
@@ -5,7 +5,6 @@ import (
 	"strings"

 	"github.com/cloudwego/eino/schema"
-	"github.com/httprunner/httprunner/v5/internal/json"
 	"github.com/httprunner/httprunner/v5/uixt/option"
 	"github.com/httprunner/httprunner/v5/uixt/types"
 	"github.com/pkg/errors"
@@ -48,20 +47,9 @@ func (p *JSONContentParser) SystemPrompt() string {
 func (p *JSONContentParser) Parse(content string, size types.Size) (*PlanningResult, error) {
 	content = strings.TrimSpace(content)

-	// Extract JSON content from markdown code blocks
-	jsonContent := extractJSONFromContent(content)
-	if jsonContent == "" {
-		return nil, fmt.Errorf("no valid JSON content found in response")
-	}
-
-	// Define a temporary struct to parse the expected JSON format
-	var jsonResponse struct {
-		Actions []Action `json:"actions"`
-		Thought string   `json:"thought"`
-		Error   string   `json:"error"`
-	}
-
-	if err := json.Unmarshal([]byte(jsonContent), &jsonResponse); err != nil {
+	// Use the generic structured response parser
+	var jsonResponse PlanningJSONResponse
+	if err := parseStructuredResponse(content, &jsonResponse); err != nil {
 		return nil, fmt.Errorf("failed to parse VLM response: %v", err)
 	}

--- a/uixt/ai/querier.go
+++ b/uixt/ai/querier.go
@@ -169,24 +169,14 @@ func validateQueryInput(opts *QueryOptions) error {

 // parseQueryResult parses the model response into QueryResult
 func parseQueryResult(content string) (*QueryResult, error) {
-	// Extract JSON content from response
-	jsonContent := extractJSONFromContent(content)
-	if jsonContent == "" {
-		// If no JSON found, treat the entire content as the result
-		// This handles cases where the model returns plain text instead of JSON
-		return &QueryResult{
-			Content: content,
-			Thought: "Direct response from model",
-		}, nil
-	}
-
-	// Parse JSON response
 	var result QueryResult
-	if err := json.Unmarshal([]byte(jsonContent), &result); err != nil {
-		// If JSON parsing fails, treat the content as plain text result
+
+	// Use the generic structured response parser with enhanced error recovery
+	if err := parseStructuredResponse(content, &result); err != nil {
+		// If parseStructuredResponse fails completely, treat content as plain text
 		return &QueryResult{
 			Content: content,
-			Thought: "Failed to parse as JSON, returning raw content",
+			Thought: "Failed to parse response, returning raw content",
 		}, nil
 	}

--- a/uixt/ai/querier_test.go
+++ b/uixt/ai/querier_test.go
@@ -95,38 +95,35 @@ func TestParseQueryResult(t *testing.T) {
 		expected *QueryResult
 	}{
 		{
-			name: "valid JSON response",
-			content: `{
-				"content": "这是一个14行8列的连连看游戏界面，包含25种不同的图案",
-				"thought": "通过分析图片，我识别出了游戏界面的结构和图案类型"
-			}`,
+			name:    "valid JSON response",
+			content: `{"content": "extracted information", "thought": "analysis complete"}`,
 			expected: &QueryResult{
-				Content: "这是一个14行8列的连连看游戏界面，包含25种不同的图案",
-				Thought: "通过分析图片，我识别出了游戏界面的结构和图案类型",
+				Content: "extracted information",
+				Thought: "analysis complete",
 			},
 		},
 		{
 			name:    "JSON in markdown",
-			content: "```json\n{\n  \"content\": \"游戏界面分析结果\",\n  \"thought\": \"分析过程\"\n}\n```",
+			content: "```json\n{\"content\": \"data from markdown\", \"thought\": \"parsed from code block\"}\n```",
 			expected: &QueryResult{
-				Content: "游戏界面分析结果",
-				Thought: "分析过程",
+				Content: "data from markdown",
+				Thought: "parsed from code block",
 			},
 		},
 		{
 			name:    "plain text response",
-			content: "这是一个连连看游戏界面，包含多种图案。",
+			content: "This is just plain text without JSON structure",
 			expected: &QueryResult{
-				Content: "这是一个连连看游戏界面，包含多种图案。",
-				Thought: "Direct response from model",
+				Content: "This is just plain text without JSON structure",
+				Thought: "Failed to parse as JSON, returning raw content",
 			},
 		},
 		{
 			name:    "invalid JSON",
 			content: `{"content": "incomplete json", "missing_closing_brace": true`,
 			expected: &QueryResult{
-				Content: `{"content": "incomplete json", "missing_closing_brace": true`,
-				Thought: "Direct response from model",
+				Content: "incomplete json",
+				Thought: "Partial extraction from malformed response",
 			},
 		},
 	}
--- a/uixt/ai/utils.go
+++ b/uixt/ai/utils.go
@@ -2,6 +2,7 @@ package ai

 import (
 	"context"
+	"fmt"
 	"regexp"
 	"strings"
 	"time"
@@ -11,9 +12,18 @@ import (
 	"github.com/cloudwego/eino/schema"
 	"github.com/rs/zerolog/log"

+	"github.com/httprunner/httprunner/v5/internal/json"
 	"github.com/httprunner/httprunner/v5/uixt/option"
+	"github.com/pkg/errors"
 )

+// PlanningJSONResponse represents the JSON response structure for planning
+type PlanningJSONResponse struct {
+	Actions []Action `json:"actions"`
+	Thought string   `json:"thought"`
+	Error   string   `json:"error"`
+}
+
 // extractJSONFromContent extracts JSON content from various formats in the response
 // This function handles multiple formats:
 // 1. ```json ... ``` markdown code blocks
@@ -111,6 +121,333 @@ func extractJSONFromContent(content string) string {
 	return ""
 }

+// sanitizeUTF8Content cleans invalid UTF-8 characters from content
+func sanitizeUTF8Content(content string) string {
+	if utf8.ValidString(content) {
+		return content
+	}
+
+	// Convert to bytes and filter out invalid UTF-8 sequences
+	bytes := []byte(content)
+	var validBytes []byte
+
+	for len(bytes) > 0 {
+		r, size := utf8.DecodeRune(bytes)
+		if r != utf8.RuneError {
+			// Valid rune, keep it
+			validBytes = append(validBytes, bytes[:size]...)
+		}
+		// Skip invalid bytes (including RuneError)
+		bytes = bytes[size:]
+	}
+
+	return string(validBytes)
+}
+
+// parseJSONWithFallback attempts to parse JSON with multiple strategies for any struct type
+func parseJSONWithFallback(jsonContent string, result interface{}) error {
+	// Strategy 1: Direct JSON unmarshaling
+	if err := json.Unmarshal([]byte(jsonContent), result); err == nil {
+		// For specific types, ensure required fields have default values even after successful parsing
+		switch v := result.(type) {
+		case *QueryResult:
+			// Ensure QueryResult has meaningful defaults for empty fields
+			if v.Content == "" && v.Thought == "" {
+				v.Content = "Empty response content"
+				v.Thought = "No content extracted from response"
+			} else if v.Content == "" {
+				v.Content = "No content extracted"
+			} else if v.Thought == "" {
+				v.Thought = "Successfully parsed structured response"
+			}
+		case *AssertionResult:
+			// Ensure AssertionResult has meaningful defaults
+			if v.Thought == "" {
+				v.Thought = "Successfully parsed assertion response"
+			}
+		}
+		return nil
+	}
+
+	// Strategy 2: Try cleaning JSON content and parse again
+	cleanedJSON := cleanJSONContent(jsonContent)
+	if err := json.Unmarshal([]byte(cleanedJSON), result); err == nil {
+		// Apply the same default value logic for cleaned JSON
+		switch v := result.(type) {
+		case *QueryResult:
+			if v.Content == "" && v.Thought == "" {
+				v.Content = "Empty response content"
+				v.Thought = "No content extracted from response"
+			} else if v.Content == "" {
+				v.Content = "No content extracted"
+			} else if v.Thought == "" {
+				v.Thought = "Successfully parsed structured response"
+			}
+		case *AssertionResult:
+			if v.Thought == "" {
+				v.Thought = "Successfully parsed assertion response"
+			}
+		}
+		return nil
+	}
+
+	// Strategy 3: For specific types, try manual extraction or content analysis
+	switch v := result.(type) {
+	case *AssertionResult:
+		if fallbackResult, err := extractAssertionFieldsManually(jsonContent); err == nil {
+			*v = *fallbackResult
+			return nil
+		}
+		// Final fallback for assertions: content analysis
+		*v = *analyzeContentForAssertion(jsonContent)
+		return nil
+
+	case *QueryResult:
+		// For QueryResult, try basic field extraction
+		if fallbackResult, err := extractQueryFieldsManually(jsonContent); err == nil {
+			*v = *fallbackResult
+			return nil
+		}
+		// Fallback to treating content as plain text
+		*v = QueryResult{
+			Content: jsonContent,
+			Thought: "Failed to parse as JSON, returning raw content",
+		}
+		return nil
+
+	case *PlanningJSONResponse:
+		// For PlanningJSONResponse, try basic field extraction
+		if fallbackResult, err := extractPlanningFieldsManually(jsonContent); err == nil {
+			*v = *fallbackResult
+			return nil
+		}
+		// Fallback with empty actions but preserve any recognizable thought content
+		*v = PlanningJSONResponse{
+			Actions: []Action{},
+			Thought: "Failed to parse structured response",
+			Error:   "JSON parsing failed, returning minimal structure",
+		}
+		return nil
+	}
+
+	return errors.New("failed to parse JSON with all strategies")
+}
+
+// extractAssertionFieldsManually extracts pass and thought fields from text
+func extractAssertionFieldsManually(content string) (*AssertionResult, error) {
+	result := &AssertionResult{}
+
+	// Try to extract "pass" field
+	if strings.Contains(strings.ToLower(content), `"pass":true`) ||
+		strings.Contains(strings.ToLower(content), `"pass": true`) {
+		result.Pass = true
+	} else if strings.Contains(strings.ToLower(content), `"pass":false`) ||
+		strings.Contains(strings.ToLower(content), `"pass": false`) {
+		result.Pass = false
+	} else {
+		return nil, errors.New("cannot extract pass field")
+	}
+
+	// Try to extract "thought" field
+	thoughtStart := strings.Index(content, `"thought"`)
+	if thoughtStart != -1 {
+		thoughtSection := content[thoughtStart:]
+		colonIndex := strings.Index(thoughtSection, ":")
+		if colonIndex != -1 {
+			afterColon := strings.TrimSpace(thoughtSection[colonIndex+1:])
+			if strings.HasPrefix(afterColon, `"`) {
+				// Find the matching closing quote, handling escaped quotes
+				thoughtContent := extractQuotedString(afterColon)
+				result.Thought = thoughtContent
+			}
+		}
+	}
+
+	return result, nil
+}
+
+// extractQuotedString extracts content from a quoted string, handling escaped quotes
+func extractQuotedString(s string) string {
+	if !strings.HasPrefix(s, `"`) {
+		return ""
+	}
+
+	s = s[1:] // Remove opening quote
+	var result strings.Builder
+	escaped := false
+
+	for _, r := range s {
+		if escaped {
+			result.WriteRune(r)
+			escaped = false
+			continue
+		}
+
+		if r == '\\' {
+			escaped = true
+			continue
+		}
+
+		if r == '"' {
+			// Found closing quote
+			return result.String()
+		}
+
+		result.WriteRune(r)
+	}
+
+	return result.String()
+}
+
+// cleanJSONContent removes common JSON formatting issues
+func cleanJSONContent(content string) string {
+	// Remove any non-printable characters
+	cleaned := strings.Map(func(r rune) rune {
+		if r >= 32 && r < 127 || r > 127 { // Keep printable ASCII and Unicode
+			return r
+		}
+		return -1 // Remove non-printable characters
+	}, content)
+
+	// Remove any trailing commas before closing braces/brackets
+	cleaned = strings.ReplaceAll(cleaned, ",}", "}")
+	cleaned = strings.ReplaceAll(cleaned, ",]", "]")
+
+	return cleaned
+}
+
+// analyzeContentForAssertion creates a fallback result by analyzing content
+func analyzeContentForAssertion(content string) *AssertionResult {
+	content = strings.ToLower(content)
+
+	// Simple heuristic: look for positive/negative indicators
+	positiveIndicators := []string{"true", "pass", "success", "correct", "valid", "match"}
+	negativeIndicators := []string{"false", "fail", "error", "incorrect", "invalid", "mismatch"}
+
+	positiveCount := 0
+	negativeCount := 0
+
+	for _, indicator := range positiveIndicators {
+		if strings.Contains(content, indicator) {
+			positiveCount++
+		}
+	}
+
+	for _, indicator := range negativeIndicators {
+		if strings.Contains(content, indicator) {
+			negativeCount++
+		}
+	}
+
+	pass := positiveCount > negativeCount
+	thought := fmt.Sprintf("Fallback analysis of malformed response (positive: %d, negative: %d)",
+		positiveCount, negativeCount)
+
+	return &AssertionResult{
+		Pass:    pass,
+		Thought: thought,
+	}
+}
+
+// extractQueryFieldsManually extracts content and thought fields for QueryResult
+func extractQueryFieldsManually(content string) (*QueryResult, error) {
+	result := &QueryResult{}
+
+	// Try to extract "content" field
+	if contentStart := strings.Index(content, `"content"`); contentStart != -1 {
+		contentSection := content[contentStart:]
+		if colonIndex := strings.Index(contentSection, ":"); colonIndex != -1 {
+			afterColon := strings.TrimSpace(contentSection[colonIndex+1:])
+			if strings.HasPrefix(afterColon, `"`) {
+				result.Content = extractQuotedString(afterColon)
+			}
+		}
+	}
+
+	// Try to extract "thought" field
+	if thoughtStart := strings.Index(content, `"thought"`); thoughtStart != -1 {
+		thoughtSection := content[thoughtStart:]
+		if colonIndex := strings.Index(thoughtSection, ":"); colonIndex != -1 {
+			afterColon := strings.TrimSpace(thoughtSection[colonIndex+1:])
+			if strings.HasPrefix(afterColon, `"`) {
+				result.Thought = extractQuotedString(afterColon)
+			}
+		}
+	}
+
+	// If we couldn't extract any fields, return error
+	if result.Content == "" && result.Thought == "" {
+		return nil, errors.New("cannot extract content or thought fields")
+	}
+
+	// Set defaults for missing fields (ALWAYS set defaults if any field was extracted)
+	if result.Content == "" {
+		result.Content = "Extracted partial information"
+	}
+	if result.Thought == "" {
+		result.Thought = "Partial extraction from malformed response"
+	}
+
+	return result, nil
+}
+
+// extractPlanningFieldsManually extracts thought and error fields for PlanningJSONResponse
+func extractPlanningFieldsManually(content string) (*PlanningJSONResponse, error) {
+	result := &PlanningJSONResponse{
+		Actions: []Action{}, // Default to empty actions
+	}
+
+	// Try to extract "thought" field
+	if thoughtStart := strings.Index(content, `"thought"`); thoughtStart != -1 {
+		thoughtSection := content[thoughtStart:]
+		if colonIndex := strings.Index(thoughtSection, ":"); colonIndex != -1 {
+			afterColon := strings.TrimSpace(thoughtSection[colonIndex+1:])
+			if strings.HasPrefix(afterColon, `"`) {
+				result.Thought = extractQuotedString(afterColon)
+			}
+		}
+	}
+
+	// Try to extract "error" field
+	if errorStart := strings.Index(content, `"error"`); errorStart != -1 {
+		errorSection := content[errorStart:]
+		if colonIndex := strings.Index(errorSection, ":"); colonIndex != -1 {
+			afterColon := strings.TrimSpace(errorSection[colonIndex+1:])
+			if strings.HasPrefix(afterColon, `"`) {
+				result.Error = extractQuotedString(afterColon)
+			}
+		}
+	}
+
+	// If we couldn't extract any meaningful fields, return error
+	if result.Thought == "" && result.Error == "" {
+		return nil, errors.New("cannot extract thought or error fields")
+	}
+
+	// Set defaults for missing fields
+	if result.Thought == "" {
+		result.Thought = "Partial extraction from malformed response"
+	}
+
+	return result, nil
+}
+
+// parseStructuredResponse parses model response into structured format with error recovery
+func parseStructuredResponse(content string, result interface{}) error {
+	// Clean and validate UTF-8 content first
+	cleanContent := sanitizeUTF8Content(content)
+
+	// Extract JSON content from response
+	jsonContent := extractJSONFromContent(cleanContent)
+	if jsonContent == "" {
+		// If JSON extraction failed, try parsing the content directly as a fallback
+		jsonContent = cleanContent
+	}
+
+	// Parse JSON response with error recovery
+	return parseJSONWithFallback(jsonContent, result)
+}
+
 // callModelWithLogging is a common function to call model with logging and timing
 // It handles the common pattern of:
 // 1. Log request
--- a/uixt/ai/utils_test.go
+++ b/uixt/ai/utils_test.go
@@ -4,195 +4,701 @@ import (
 	"testing"

 	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
 )

 func TestExtractJSONFromContent(t *testing.T) {
 	tests := []struct {
 		name     string
-		content  string
+		input    string
 		expected string
 	}{
 		{
-			name: "simple JSON",
-			content: `{
-  "actions": [
-    {
-      "action_type": "click",
-      "action_inputs": {
-        "start_box": [371, 235, 425, 270]
-      }
-    }
-  ],
-  "thought": "点击桌面上的抖音应用图标以启动抖音",
-  "error": null
-}`,
-			expected: `{
-  "actions": [
-    {
-      "action_type": "click",
-      "action_inputs": {
-        "start_box": [371, 235, 425, 270]
-      }
-    }
-  ],
-  "thought": "点击桌面上的抖音应用图标以启动抖音",
-  "error": null
-}`,
+			name:     "simple JSON object",
+			input:    `{"key": "value"}`,
+			expected: `{"key": "value"}`,
 		},
 		{
-			name: "JSON with Chinese characters in strings",
-			content: `{
-  "actions": [
-    {
-      "action_type": "type",
-      "action_inputs": {
-        "content": "2048经典"
-      }
-    }
-  ],
-  "thought": "搜索框已经清空了，现在我要输入\"2048经典\"这个关键词。看到键盘已经弹出来了，正好可以直接开始输入。这样一来，就能找到我们想要玩的那个小游戏了。",
-  "error": null
-}`,
-			expected: `{
-  "actions": [
-    {
-      "action_type": "type",
-      "action_inputs": {
-        "content": "2048经典"
-      }
-    }
-  ],
-  "thought": "搜索框已经清空了，现在我要输入\"2048经典\"这个关键词。看到键盘已经弹出来了，正好可以直接开始输入。这样一来，就能找到我们想要玩的那个小游戏了。",
-  "error": null
-}`,
+			name:     "JSON in markdown code block",
+			input:    "```json\n{\"key\": \"value\"}\n```",
+			expected: `{"key": "value"}`,
 		},
 		{
-			name: "JSON with markdown wrapper",
-			content: "```json\n" + `{
-  "actions": [
-    {
-      "action_type": "click",
-      "action_inputs": {
-        "start_box": [100, 200, 150, 250]
-      }
-    }
-  ],
-  "thought": "点击按钮",
-  "error": null
-}` + "\n```",
-			expected: `{
-  "actions": [
-    {
-      "action_type": "click",
-      "action_inputs": {
-        "start_box": [100, 200, 150, 250]
-      }
-    }
-  ],
-  "thought": "点击按钮",
-  "error": null
-}`,
+			name:     "JSON in code block without language",
+			input:    "```\n{\"key\": \"value\"}\n```",
+			expected: `{"key": "value"}`,
 		},
 		{
-			name: "JSON embedded in text with Chinese",
-			content: `这是一个包含中文的响应：{
-  "actions": [
-    {
-      "action_type": "type",
-      "action_inputs": {
-        "content": "测试内容"
-      }
-    }
-  ],
-  "thought": "这是一个测试思路",
-  "error": null
-} 后面还有一些文本`,
-			expected: `{
-  "actions": [
-    {
-      "action_type": "type",
-      "action_inputs": {
-        "content": "测试内容"
-      }
-    }
-  ],
-  "thought": "这是一个测试思路",
-  "error": null
-}`,
+			name:     "JSON with surrounding text",
+			input:    `Here is the result: {"key": "value"} and some more text`,
+			expected: `{"key": "value"}`,
 		},
 		{
-			name: "JSON with escaped quotes and Chinese",
-			content: `{
-  "actions": [
-    {
-      "action_type": "type",
-      "action_inputs": {
-        "content": "他说：\"你好，世界！\""
-      }
-    }
-  ],
-  "thought": "输入包含引号的中文文本",
-  "error": null
-}`,
-			expected: `{
-  "actions": [
-    {
-      "action_type": "type",
-      "action_inputs": {
-        "content": "他说：\"你好，世界！\""
-      }
-    }
-  ],
-  "thought": "输入包含引号的中文文本",
-  "error": null
-}`,
+			name:     "multiple JSON objects",
+			input:    `{"first": "object"} and {"second": "object"}`,
+			expected: `{"first": "object"}`,
 		},
 		{
-			name:     "no JSON content",
-			content:  "这只是一些普通的文本，没有JSON内容",
+			name:     "nested JSON in markdown",
+			input:    "```json\n{\"data\": {\"nested\": \"value\"}}\n```",
+			expected: `{"data": {"nested": "value"}}`,
+		},
+		{
+			name:     "JSON array",
+			input:    `[{"item": 1}, {"item": 2}]`,
+			expected: `[{"item": 1}, {"item": 2}]`,
+		},
+		{
+			name:     "JSON array in markdown",
+			input:    "```json\n[{\"item\": 1}, {\"item\": 2}]\n```",
+			expected: `[{"item": 1}, {"item": 2}]`,
+		},
+		{
+			name:     "text without JSON",
+			input:    "This is just plain text without any JSON",
 			expected: "",
 		},
 		{
-			name: "nested JSON objects with Chinese",
-			content: `{
-  "actions": [
-    {
-      "action_type": "click",
-      "action_inputs": {
-        "start_box": [100, 200, 150, 250],
-        "metadata": {
-          "description": "点击操作",
-          "target": "按钮"
-        }
-      }
-    }
-  ],
-  "thought": "执行嵌套对象的点击操作",
-  "error": null
-}`,
-			expected: `{
-  "actions": [
-    {
-      "action_type": "click",
-      "action_inputs": {
-        "start_box": [100, 200, 150, 250],
-        "metadata": {
-          "description": "点击操作",
-          "target": "按钮"
-        }
-      }
-    }
-  ],
-  "thought": "执行嵌套对象的点击操作",
-  "error": null
-}`,
+			name:     "malformed JSON",
+			input:    `{"key": "value"`,
+			expected: `{"key": "value"`,
+		},
+		{
+			name:     "JSON with unicode",
+			input:    `{"message": "测试消息"}`,
+			expected: `{"message": "测试消息"}`,
+		},
+		{
+			name:     "multiple code blocks, select first JSON",
+			input:    "First block:\n```json\n{\"first\": true}\n```\nSecond block:\n```json\n{\"second\": true}\n```",
+			expected: `{"first": true}`,
+		},
+		{
+			name:     "mixed language code blocks",
+			input:    "```python\nprint('hello')\n```\n```json\n{\"key\": \"value\"}\n```",
+			expected: `{"key": "value"}`,
+		},
+		{
+			name:     "JSON with special characters",
+			input:    `{"special": "chars: @#$%^&*()"}`,
+			expected: `{"special": "chars: @#$%^&*()"}`,
+		},
+		{
+			name:     "empty JSON object",
+			input:    `{}`,
+			expected: `{}`,
+		},
+		{
+			name:     "empty JSON array",
+			input:    `[]`,
+			expected: `[]`,
+		},
+		{
+			name:     "JSON with line breaks",
+			input:    "{\n  \"key\": \"value\",\n  \"number\": 123\n}",
+			expected: "{\n  \"key\": \"value\",\n  \"number\": 123\n}",
+		},
+		{
+			name:     "markdown with extra whitespace",
+			input:    "  ```json  \n  {\"key\": \"value\"}  \n  ```  ",
+			expected: `{"key": "value"}`,
+		},
+		{
+			name:     "code block with tildes",
+			input:    "~~~json\n{\"key\": \"value\"}\n~~~",
+			expected: `{"key": "value"}`,
+		},
+		{
+			name:     "JSON after other text patterns",
+			input:    `The response should be formatted as: {"status": "success"}`,
+			expected: `{"status": "success"}`,
+		},
+		{
+			name:     "JSON in mixed content",
+			input:    `Analysis complete. Result: {"analysis": "positive", "confidence": 0.95} - End of analysis.`,
+			expected: `{"analysis": "positive", "confidence": 0.95}`,
+		},
+		{
+			name:     "complex nested JSON",
+			input:    `{"outer": {"inner": {"deep": "value", "numbers": [1, 2, 3]}}}`,
+			expected: `{"outer": {"inner": {"deep": "value", "numbers": [1, 2, 3]}}}`,
+		},
+		{
+			name:     "JSON with escaped quotes",
+			input:    `{"message": "He said \"Hello\" to me"}`,
+			expected: `{"message": "He said \"Hello\" to me"}`,
 		},
 	}

 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			result := extractJSONFromContent(tt.content)
+			result := extractJSONFromContent(tt.input)
 			assert.Equal(t, tt.expected, result)
 		})
 	}
 }
+
+func TestSanitizeUTF8Content(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    string
+		expected string
+	}{
+		{
+			name:     "valid UTF-8",
+			input:    "Hello 世界",
+			expected: "Hello 世界",
+		},
+		{
+			name:     "invalid UTF-8 with replacement characters",
+			input:    "Hello \ufffd\ufffd World",
+			expected: "Hello  World",
+		},
+		{
+			name:     "mixed valid and invalid",
+			input:    "测试\ufffd消息\ufffd",
+			expected: "测试消息",
+		},
+		{
+			name:     "only replacement characters",
+			input:    "\ufffd\ufffd\ufffd",
+			expected: "",
+		},
+		{
+			name:     "empty string",
+			input:    "",
+			expected: "",
+		},
+		{
+			name:     "ASCII only",
+			input:    "Hello World 123",
+			expected: "Hello World 123",
+		},
+		{
+			name:     "JSON with UTF-8 issues",
+			input:    `{"message": "搜索框\ufffd\ufffd显示"}`,
+			expected: `{"message": "搜索框显示"}`,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := sanitizeUTF8Content(tt.input)
+			assert.Equal(t, tt.expected, result)
+		})
+	}
+}
+
+func TestParseJSONWithFallback(t *testing.T) {
+	tests := []struct {
+		name            string
+		input           string
+		expectedValid   bool
+		expectedPass    bool
+		expectedThought string
+	}{
+		{
+			name:            "valid JSON",
+			input:           `{"pass": true, "thought": "test passed"}`,
+			expectedValid:   true,
+			expectedPass:    true,
+			expectedThought: "test passed",
+		},
+		{
+			name:            "valid JSON with false",
+			input:           `{"pass": false, "thought": "test failed"}`,
+			expectedValid:   true,
+			expectedPass:    false,
+			expectedThought: "test failed",
+		},
+		{
+			name:            "malformed JSON with extractable fields",
+			input:           `malformed start {"pass": true, "thought": "extracted"} end`,
+			expectedValid:   true,
+			expectedPass:    true,
+			expectedThought: "extracted",
+		},
+		{
+			name:            "content analysis fallback - positive",
+			input:           `The test was successful and passed with true result`,
+			expectedValid:   true,
+			expectedPass:    true,
+			expectedThought: "Fallback analysis of malformed response (positive: 3, negative: 0)",
+		},
+		{
+			name:            "content analysis fallback - negative",
+			input:           `The test failed with false result and error occurred`,
+			expectedValid:   true,
+			expectedPass:    false,
+			expectedThought: "Fallback analysis of malformed response (positive: 0, negative: 3)",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			var result AssertionResult
+			err := parseJSONWithFallback(tt.input, &result)
+
+			if tt.expectedValid {
+				assert.NoError(t, err)
+				assert.Equal(t, tt.expectedPass, result.Pass)
+				assert.Equal(t, tt.expectedThought, result.Thought)
+			} else {
+				assert.Error(t, err)
+			}
+		})
+	}
+}
+
+func TestExtractAssertionFieldsManually(t *testing.T) {
+	tests := []struct {
+		name            string
+		input           string
+		expectedPass    bool
+		expectedThought string
+		shouldError     bool
+	}{
+		{
+			name:            "pass true",
+			input:           `{"pass": true, "thought": "manual test"}`,
+			expectedPass:    true,
+			expectedThought: "manual test",
+			shouldError:     false,
+		},
+		{
+			name:            "pass false",
+			input:           `{"pass": false, "thought": "manual fail"}`,
+			expectedPass:    false,
+			expectedThought: "manual fail",
+			shouldError:     false,
+		},
+		{
+			name:        "no pass field",
+			input:       `{"thought": "no pass field"}`,
+			shouldError: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result, err := extractAssertionFieldsManually(tt.input)
+			if tt.shouldError {
+				assert.Error(t, err)
+			} else {
+				assert.NoError(t, err)
+				assert.Equal(t, tt.expectedPass, result.Pass)
+				assert.Equal(t, tt.expectedThought, result.Thought)
+			}
+		})
+	}
+}
+
+func TestExtractQuotedString(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    string
+		expected string
+	}{
+		{
+			name:     "simple quoted string",
+			input:    `"hello world"`,
+			expected: "hello world",
+		},
+		{
+			name:     "quoted string with escaped quotes",
+			input:    `"He said \"Hello\""`,
+			expected: `He said "Hello"`,
+		},
+		{
+			name:     "quoted string with escaped backslash",
+			input:    `"path\\to\\file"`,
+			expected: `path\to\file`,
+		},
+		{
+			name:     "empty quoted string",
+			input:    `""`,
+			expected: "",
+		},
+		{
+			name:     "quoted string with unicode",
+			input:    `"测试消息"`,
+			expected: "测试消息",
+		},
+		{
+			name:     "not a quoted string",
+			input:    "hello world",
+			expected: "",
+		},
+		{
+			name:     "unclosed quoted string",
+			input:    `"unclosed string`,
+			expected: "unclosed string",
+		},
+		{
+			name:     "quoted string with extra content after",
+			input:    `"content" and more`,
+			expected: "content",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := extractQuotedString(tt.input)
+			assert.Equal(t, tt.expected, result)
+		})
+	}
+}
+
+func TestCleanJSONContent(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    string
+		expected string
+	}{
+		{
+			name:     "remove trailing comma in object",
+			input:    `{"key": "value",}`,
+			expected: `{"key": "value"}`,
+		},
+		{
+			name:     "remove trailing comma in array",
+			input:    `["item1", "item2",]`,
+			expected: `["item1", "item2"]`,
+		},
+		{
+			name:     "clean non-printable characters",
+			input:    "{\n\"key\": \"value\"\u0000\u0001}",
+			expected: "{\n\"key\": \"value\"}",
+		},
+		{
+			name:     "preserve unicode characters",
+			input:    `{"message": "测试消息"}`,
+			expected: `{"message": "测试消息"}`,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := cleanJSONContent(tt.input)
+			assert.Equal(t, tt.expected, result)
+		})
+	}
+}
+
+func TestAnalyzeContentForAssertion(t *testing.T) {
+	tests := []struct {
+		name         string
+		input        string
+		expectedPass bool
+	}{
+		{
+			name:         "positive indicators",
+			input:        "The test was successful and passed",
+			expectedPass: true,
+		},
+		{
+			name:         "negative indicators",
+			input:        "The test failed with error",
+			expectedPass: false,
+		},
+		{
+			name:         "mixed with more positive",
+			input:        "Some errors occurred but overall test passed successfully",
+			expectedPass: true,
+		},
+		{
+			name:         "no clear indicators",
+			input:        "This is just plain text",
+			expectedPass: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := analyzeContentForAssertion(tt.input)
+			assert.Equal(t, tt.expectedPass, result.Pass)
+			assert.NotEmpty(t, result.Thought)
+		})
+	}
+}
+
+func TestParseStructuredResponse(t *testing.T) {
+	tests := []struct {
+		name          string
+		input         string
+		shouldSucceed bool
+	}{
+		{
+			name:          "valid AssertionResult JSON",
+			input:         `{"pass": true, "thought": "test passed"}`,
+			shouldSucceed: true,
+		},
+		{
+			name:          "malformed JSON with extractable fields",
+			input:         `malformed start {"pass": false, "thought": "extracted thought"} end`,
+			shouldSucceed: true,
+		},
+		{
+			name:          "UTF-8 issues with JSON",
+			input:         "测试结果：\ufffd\ufffd {\"pass\": true, \"thought\": \"处理完成\"}",
+			shouldSucceed: true,
+		},
+		{
+			name:          "content analysis fallback",
+			input:         "The assertion was successful and passed correctly",
+			shouldSucceed: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			var result AssertionResult
+			err := parseStructuredResponse(tt.input, &result)
+			if tt.shouldSucceed {
+				require.NoError(t, err)
+				assert.NotEmpty(t, result.Thought)
+			} else {
+				assert.Error(t, err)
+			}
+		})
+	}
+}
+
+// Add more test cases for different struct types
+func TestParseJSONWithFallback_QueryResult(t *testing.T) {
+	tests := []struct {
+		name            string
+		input           string
+		expectedValid   bool
+		expectedContent string
+		expectedThought string
+	}{
+		{
+			name:            "valid QueryResult JSON",
+			input:           `{"content": "extracted info", "thought": "analysis complete"}`,
+			expectedValid:   true,
+			expectedContent: "extracted info",
+			expectedThought: "analysis complete",
+		},
+		{
+			name:            "malformed QueryResult with extractable fields",
+			input:           `malformed { "content": "partial info", "thought": "partial analysis" } more text`,
+			expectedValid:   true,
+			expectedContent: "partial info",
+			expectedThought: "partial analysis",
+		},
+		{
+			name:            "completely malformed QueryResult",
+			input:           `This is just plain text with no structure`,
+			expectedValid:   true,
+			expectedContent: "This is just plain text with no structure",
+			expectedThought: "Failed to parse as JSON, returning raw content",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			var result QueryResult
+			err := parseJSONWithFallback(tt.input, &result)
+
+			if tt.expectedValid {
+				assert.NoError(t, err)
+				assert.Equal(t, tt.expectedContent, result.Content)
+				assert.Equal(t, tt.expectedThought, result.Thought)
+			} else {
+				assert.Error(t, err)
+			}
+		})
+	}
+}
+
+func TestParseJSONWithFallback_PlanningResponse(t *testing.T) {
+	tests := []struct {
+		name            string
+		input           string
+		expectedValid   bool
+		expectedThought string
+		expectedError   string
+		expectedActions int
+	}{
+		{
+			name:            "valid PlanningJSONResponse",
+			input:           `{"actions": [{"action_type": "click"}], "thought": "planning complete", "error": ""}`,
+			expectedValid:   true,
+			expectedThought: "planning complete",
+			expectedError:   "",
+			expectedActions: 1,
+		},
+		{
+			name:            "malformed PlanningResponse with extractable thought",
+			input:           `malformed { "thought": "partial planning" } more text`,
+			expectedValid:   true,
+			expectedThought: "partial planning",
+			expectedActions: 0,
+		},
+		{
+			name:            "completely malformed PlanningResponse",
+			input:           `This is just plain text with no structure`,
+			expectedValid:   true,
+			expectedThought: "Failed to parse structured response",
+			expectedError:   "JSON parsing failed, returning minimal structure",
+			expectedActions: 0,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			var result PlanningJSONResponse
+			err := parseJSONWithFallback(tt.input, &result)
+
+			if tt.expectedValid {
+				assert.NoError(t, err)
+				assert.Equal(t, tt.expectedThought, result.Thought)
+				assert.Equal(t, tt.expectedError, result.Error)
+				assert.Len(t, result.Actions, tt.expectedActions)
+			} else {
+				assert.Error(t, err)
+			}
+		})
+	}
+}
+
+func TestExtractQueryFieldsManually(t *testing.T) {
+	tests := []struct {
+		name            string
+		input           string
+		expectedContent string
+		expectedThought string
+		shouldError     bool
+	}{
+		{
+			name:            "both content and thought",
+			input:           `{"content": "test content", "thought": "test thought"}`,
+			expectedContent: "test content",
+			expectedThought: "test thought",
+			shouldError:     false,
+		},
+		{
+			name:            "only content",
+			input:           `{"content": "only content"}`,
+			expectedContent: "only content",
+			expectedThought: "Partial extraction from malformed response",
+			shouldError:     false,
+		},
+		{
+			name:            "only thought",
+			input:           `{"thought": "only thought"}`,
+			expectedContent: "Extracted partial information",
+			expectedThought: "only thought",
+			shouldError:     false,
+		},
+		{
+			name:        "no extractable fields",
+			input:       `{"other": "data"}`,
+			shouldError: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result, err := extractQueryFieldsManually(tt.input)
+			if tt.shouldError {
+				assert.Error(t, err)
+			} else {
+				assert.NoError(t, err)
+				assert.Equal(t, tt.expectedContent, result.Content)
+				assert.Equal(t, tt.expectedThought, result.Thought)
+			}
+		})
+	}
+}
+
+func TestExtractPlanningFieldsManually(t *testing.T) {
+	tests := []struct {
+		name            string
+		input           string
+		expectedThought string
+		expectedError   string
+		shouldError     bool
+	}{
+		{
+			name:            "both thought and error",
+			input:           `{"thought": "test planning", "error": "test error"}`,
+			expectedThought: "test planning",
+			expectedError:   "test error",
+			shouldError:     false,
+		},
+		{
+			name:            "only thought",
+			input:           `{"thought": "only planning"}`,
+			expectedThought: "only planning",
+			expectedError:   "",
+			shouldError:     false,
+		},
+		{
+			name:            "only error",
+			input:           `{"error": "only error"}`,
+			expectedThought: "Partial extraction from malformed response",
+			expectedError:   "only error",
+			shouldError:     false,
+		},
+		{
+			name:        "no extractable fields",
+			input:       `{"other": "data"}`,
+			shouldError: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result, err := extractPlanningFieldsManually(tt.input)
+			if tt.shouldError {
+				assert.Error(t, err)
+			} else {
+				assert.NoError(t, err)
+				assert.Equal(t, tt.expectedThought, result.Thought)
+				assert.Equal(t, tt.expectedError, result.Error)
+				assert.NotNil(t, result.Actions) // Should always be initialized
+			}
+		})
+	}
+}
+
+// Test the integrated parseStructuredResponse with QueryResult
+func TestParseStructuredResponse_QueryResult(t *testing.T) {
+	tests := []struct {
+		name          string
+		input         string
+		shouldSucceed bool
+	}{
+		{
+			name:          "valid QueryResult JSON",
+			input:         `{"content": "extracted data", "thought": "processing complete"}`,
+			shouldSucceed: true,
+		},
+		{
+			name:          "QueryResult with UTF-8 issues",
+			input:         "extracted data: 搜索框，里面显示着\ufffd\ufffd {\"content\": \"search box found\", \"thought\": \"visual analysis\"}",
+			shouldSucceed: true,
+		},
+		{
+			name:          "malformed QueryResult",
+			input:         `malformed start {"content": "partial info"} end`,
+			shouldSucceed: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			var result QueryResult
+			err := parseStructuredResponse(tt.input, &result)
+			if tt.shouldSucceed {
+				require.NoError(t, err)
+				assert.NotEmpty(t, result.Content, "Content should not be empty")
+				assert.NotEmpty(t, result.Thought, "Thought should not be empty")
+			} else {
+				assert.Error(t, err)
+			}
+		})
+	}
+}