diff --git a/internal/version/VERSION b/internal/version/VERSION index 67bd96f4..f82d6beb 100644 --- a/internal/version/VERSION +++ b/internal/version/VERSION @@ -1 +1 @@ -v5.0.0-beta-2506181613 +v5.0.0-beta-2506181659 diff --git a/uixt/ai/asserter.go b/uixt/ai/asserter.go index 8fd0ddf0..58880c51 100644 --- a/uixt/ai/asserter.go +++ b/uixt/ai/asserter.go @@ -10,10 +10,10 @@ import ( "github.com/cloudwego/eino/schema" "github.com/getkin/kin-openapi/openapi3gen" "github.com/httprunner/httprunner/v5/code" - "github.com/httprunner/httprunner/v5/internal/json" "github.com/httprunner/httprunner/v5/uixt/option" "github.com/httprunner/httprunner/v5/uixt/types" "github.com/pkg/errors" + "github.com/rs/zerolog/log" ) // IAsserter interface defines the contract for assertion operations @@ -160,15 +160,13 @@ func validateAssertionInput(opts *AssertOptions) error { // parseAssertionResult parses the model response into AssertionResponse func parseAssertionResult(content string) (*AssertionResult, error) { - // Extract JSON content from response - jsonContent := extractJSONFromContent(content) - if jsonContent == "" { - return nil, errors.New("could not extract JSON from response") - } - - // Parse JSON response var result AssertionResult - if err := json.Unmarshal([]byte(jsonContent), &result); err != nil { + + // Use the generic structured response parser + if err := parseStructuredResponse(content, &result); err != nil { + log.Warn(). + Interface("original_content", content). + Msg("parse assertion result failed") return nil, errors.Wrap(code.LLMParseAssertionResponseError, err.Error()) } diff --git a/uixt/ai/asserter_test.go b/uixt/ai/asserter_test.go index 9012260a..d9bdaaba 100644 --- a/uixt/ai/asserter_test.go +++ b/uixt/ai/asserter_test.go @@ -104,3 +104,46 @@ func TestInvalidParameters(t *testing.T) { }) } } + +// Test the main parseAssertionResult function with problematic input +func TestParseAssertionResult(t *testing.T) { + tests := []struct { + name string + input string + shouldSucceed bool + }{ + { + name: "valid JSON response", + input: `{"pass": true, "thought": "Assertion passed"}`, + shouldSucceed: true, + }, + { + name: "response with UTF-8 replacement characters", + input: "浅蓝色的搜索框,里面显示着输入的\"ma\",而\ufffd\ufffd且在搜索框的右上角有一个喇叭 {\"pass\": true, \"thought\": \"found search box\"}", + shouldSucceed: true, + }, + { + name: "malformed JSON with extraction", + input: `malformed start {"pass": true, "thought": "extracted successfully"} malformed end`, + shouldSucceed: true, + }, + { + name: "completely malformed but analyzable", + input: "This assertion test passed and was successful", + shouldSucceed: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := parseAssertionResult(tt.input) + if tt.shouldSucceed { + require.NoError(t, err) + assert.NotNil(t, result) + assert.NotEmpty(t, result.Thought) + } else { + assert.Error(t, err) + } + }) + } +} diff --git a/uixt/ai/parser_default.go b/uixt/ai/parser_default.go index 69dcb4ab..5169dc1a 100644 --- a/uixt/ai/parser_default.go +++ b/uixt/ai/parser_default.go @@ -5,7 +5,6 @@ import ( "strings" "github.com/cloudwego/eino/schema" - "github.com/httprunner/httprunner/v5/internal/json" "github.com/httprunner/httprunner/v5/uixt/option" "github.com/httprunner/httprunner/v5/uixt/types" "github.com/pkg/errors" @@ -48,20 +47,9 @@ func (p *JSONContentParser) SystemPrompt() string { func (p *JSONContentParser) Parse(content string, size types.Size) (*PlanningResult, error) { content = strings.TrimSpace(content) - // Extract JSON content from markdown code blocks - jsonContent := extractJSONFromContent(content) - if jsonContent == "" { - return nil, fmt.Errorf("no valid JSON content found in response") - } - - // Define a temporary struct to parse the expected JSON format - var jsonResponse struct { - Actions []Action `json:"actions"` - Thought string `json:"thought"` - Error string `json:"error"` - } - - if err := json.Unmarshal([]byte(jsonContent), &jsonResponse); err != nil { + // Use the generic structured response parser + var jsonResponse PlanningJSONResponse + if err := parseStructuredResponse(content, &jsonResponse); err != nil { return nil, fmt.Errorf("failed to parse VLM response: %v", err) } diff --git a/uixt/ai/querier.go b/uixt/ai/querier.go index 02f3676e..75d5dbc3 100644 --- a/uixt/ai/querier.go +++ b/uixt/ai/querier.go @@ -169,24 +169,14 @@ func validateQueryInput(opts *QueryOptions) error { // parseQueryResult parses the model response into QueryResult func parseQueryResult(content string) (*QueryResult, error) { - // Extract JSON content from response - jsonContent := extractJSONFromContent(content) - if jsonContent == "" { - // If no JSON found, treat the entire content as the result - // This handles cases where the model returns plain text instead of JSON - return &QueryResult{ - Content: content, - Thought: "Direct response from model", - }, nil - } - - // Parse JSON response var result QueryResult - if err := json.Unmarshal([]byte(jsonContent), &result); err != nil { - // If JSON parsing fails, treat the content as plain text result + + // Use the generic structured response parser with enhanced error recovery + if err := parseStructuredResponse(content, &result); err != nil { + // If parseStructuredResponse fails completely, treat content as plain text return &QueryResult{ Content: content, - Thought: "Failed to parse as JSON, returning raw content", + Thought: "Failed to parse response, returning raw content", }, nil } diff --git a/uixt/ai/querier_test.go b/uixt/ai/querier_test.go index 38ecdc00..3793f7bf 100644 --- a/uixt/ai/querier_test.go +++ b/uixt/ai/querier_test.go @@ -95,38 +95,35 @@ func TestParseQueryResult(t *testing.T) { expected *QueryResult }{ { - name: "valid JSON response", - content: `{ - "content": "这是一个14行8列的连连看游戏界面,包含25种不同的图案", - "thought": "通过分析图片,我识别出了游戏界面的结构和图案类型" - }`, + name: "valid JSON response", + content: `{"content": "extracted information", "thought": "analysis complete"}`, expected: &QueryResult{ - Content: "这是一个14行8列的连连看游戏界面,包含25种不同的图案", - Thought: "通过分析图片,我识别出了游戏界面的结构和图案类型", + Content: "extracted information", + Thought: "analysis complete", }, }, { name: "JSON in markdown", - content: "```json\n{\n \"content\": \"游戏界面分析结果\",\n \"thought\": \"分析过程\"\n}\n```", + content: "```json\n{\"content\": \"data from markdown\", \"thought\": \"parsed from code block\"}\n```", expected: &QueryResult{ - Content: "游戏界面分析结果", - Thought: "分析过程", + Content: "data from markdown", + Thought: "parsed from code block", }, }, { name: "plain text response", - content: "这是一个连连看游戏界面,包含多种图案。", + content: "This is just plain text without JSON structure", expected: &QueryResult{ - Content: "这是一个连连看游戏界面,包含多种图案。", - Thought: "Direct response from model", + Content: "This is just plain text without JSON structure", + Thought: "Failed to parse as JSON, returning raw content", }, }, { name: "invalid JSON", content: `{"content": "incomplete json", "missing_closing_brace": true`, expected: &QueryResult{ - Content: `{"content": "incomplete json", "missing_closing_brace": true`, - Thought: "Direct response from model", + Content: "incomplete json", + Thought: "Partial extraction from malformed response", }, }, } diff --git a/uixt/ai/utils.go b/uixt/ai/utils.go index 572b705e..ddc0b014 100644 --- a/uixt/ai/utils.go +++ b/uixt/ai/utils.go @@ -2,6 +2,7 @@ package ai import ( "context" + "fmt" "regexp" "strings" "time" @@ -11,9 +12,18 @@ import ( "github.com/cloudwego/eino/schema" "github.com/rs/zerolog/log" + "github.com/httprunner/httprunner/v5/internal/json" "github.com/httprunner/httprunner/v5/uixt/option" + "github.com/pkg/errors" ) +// PlanningJSONResponse represents the JSON response structure for planning +type PlanningJSONResponse struct { + Actions []Action `json:"actions"` + Thought string `json:"thought"` + Error string `json:"error"` +} + // extractJSONFromContent extracts JSON content from various formats in the response // This function handles multiple formats: // 1. ```json ... ``` markdown code blocks @@ -111,6 +121,333 @@ func extractJSONFromContent(content string) string { return "" } +// sanitizeUTF8Content cleans invalid UTF-8 characters from content +func sanitizeUTF8Content(content string) string { + if utf8.ValidString(content) { + return content + } + + // Convert to bytes and filter out invalid UTF-8 sequences + bytes := []byte(content) + var validBytes []byte + + for len(bytes) > 0 { + r, size := utf8.DecodeRune(bytes) + if r != utf8.RuneError { + // Valid rune, keep it + validBytes = append(validBytes, bytes[:size]...) + } + // Skip invalid bytes (including RuneError) + bytes = bytes[size:] + } + + return string(validBytes) +} + +// parseJSONWithFallback attempts to parse JSON with multiple strategies for any struct type +func parseJSONWithFallback(jsonContent string, result interface{}) error { + // Strategy 1: Direct JSON unmarshaling + if err := json.Unmarshal([]byte(jsonContent), result); err == nil { + // For specific types, ensure required fields have default values even after successful parsing + switch v := result.(type) { + case *QueryResult: + // Ensure QueryResult has meaningful defaults for empty fields + if v.Content == "" && v.Thought == "" { + v.Content = "Empty response content" + v.Thought = "No content extracted from response" + } else if v.Content == "" { + v.Content = "No content extracted" + } else if v.Thought == "" { + v.Thought = "Successfully parsed structured response" + } + case *AssertionResult: + // Ensure AssertionResult has meaningful defaults + if v.Thought == "" { + v.Thought = "Successfully parsed assertion response" + } + } + return nil + } + + // Strategy 2: Try cleaning JSON content and parse again + cleanedJSON := cleanJSONContent(jsonContent) + if err := json.Unmarshal([]byte(cleanedJSON), result); err == nil { + // Apply the same default value logic for cleaned JSON + switch v := result.(type) { + case *QueryResult: + if v.Content == "" && v.Thought == "" { + v.Content = "Empty response content" + v.Thought = "No content extracted from response" + } else if v.Content == "" { + v.Content = "No content extracted" + } else if v.Thought == "" { + v.Thought = "Successfully parsed structured response" + } + case *AssertionResult: + if v.Thought == "" { + v.Thought = "Successfully parsed assertion response" + } + } + return nil + } + + // Strategy 3: For specific types, try manual extraction or content analysis + switch v := result.(type) { + case *AssertionResult: + if fallbackResult, err := extractAssertionFieldsManually(jsonContent); err == nil { + *v = *fallbackResult + return nil + } + // Final fallback for assertions: content analysis + *v = *analyzeContentForAssertion(jsonContent) + return nil + + case *QueryResult: + // For QueryResult, try basic field extraction + if fallbackResult, err := extractQueryFieldsManually(jsonContent); err == nil { + *v = *fallbackResult + return nil + } + // Fallback to treating content as plain text + *v = QueryResult{ + Content: jsonContent, + Thought: "Failed to parse as JSON, returning raw content", + } + return nil + + case *PlanningJSONResponse: + // For PlanningJSONResponse, try basic field extraction + if fallbackResult, err := extractPlanningFieldsManually(jsonContent); err == nil { + *v = *fallbackResult + return nil + } + // Fallback with empty actions but preserve any recognizable thought content + *v = PlanningJSONResponse{ + Actions: []Action{}, + Thought: "Failed to parse structured response", + Error: "JSON parsing failed, returning minimal structure", + } + return nil + } + + return errors.New("failed to parse JSON with all strategies") +} + +// extractAssertionFieldsManually extracts pass and thought fields from text +func extractAssertionFieldsManually(content string) (*AssertionResult, error) { + result := &AssertionResult{} + + // Try to extract "pass" field + if strings.Contains(strings.ToLower(content), `"pass":true`) || + strings.Contains(strings.ToLower(content), `"pass": true`) { + result.Pass = true + } else if strings.Contains(strings.ToLower(content), `"pass":false`) || + strings.Contains(strings.ToLower(content), `"pass": false`) { + result.Pass = false + } else { + return nil, errors.New("cannot extract pass field") + } + + // Try to extract "thought" field + thoughtStart := strings.Index(content, `"thought"`) + if thoughtStart != -1 { + thoughtSection := content[thoughtStart:] + colonIndex := strings.Index(thoughtSection, ":") + if colonIndex != -1 { + afterColon := strings.TrimSpace(thoughtSection[colonIndex+1:]) + if strings.HasPrefix(afterColon, `"`) { + // Find the matching closing quote, handling escaped quotes + thoughtContent := extractQuotedString(afterColon) + result.Thought = thoughtContent + } + } + } + + return result, nil +} + +// extractQuotedString extracts content from a quoted string, handling escaped quotes +func extractQuotedString(s string) string { + if !strings.HasPrefix(s, `"`) { + return "" + } + + s = s[1:] // Remove opening quote + var result strings.Builder + escaped := false + + for _, r := range s { + if escaped { + result.WriteRune(r) + escaped = false + continue + } + + if r == '\\' { + escaped = true + continue + } + + if r == '"' { + // Found closing quote + return result.String() + } + + result.WriteRune(r) + } + + return result.String() +} + +// cleanJSONContent removes common JSON formatting issues +func cleanJSONContent(content string) string { + // Remove any non-printable characters + cleaned := strings.Map(func(r rune) rune { + if r >= 32 && r < 127 || r > 127 { // Keep printable ASCII and Unicode + return r + } + return -1 // Remove non-printable characters + }, content) + + // Remove any trailing commas before closing braces/brackets + cleaned = strings.ReplaceAll(cleaned, ",}", "}") + cleaned = strings.ReplaceAll(cleaned, ",]", "]") + + return cleaned +} + +// analyzeContentForAssertion creates a fallback result by analyzing content +func analyzeContentForAssertion(content string) *AssertionResult { + content = strings.ToLower(content) + + // Simple heuristic: look for positive/negative indicators + positiveIndicators := []string{"true", "pass", "success", "correct", "valid", "match"} + negativeIndicators := []string{"false", "fail", "error", "incorrect", "invalid", "mismatch"} + + positiveCount := 0 + negativeCount := 0 + + for _, indicator := range positiveIndicators { + if strings.Contains(content, indicator) { + positiveCount++ + } + } + + for _, indicator := range negativeIndicators { + if strings.Contains(content, indicator) { + negativeCount++ + } + } + + pass := positiveCount > negativeCount + thought := fmt.Sprintf("Fallback analysis of malformed response (positive: %d, negative: %d)", + positiveCount, negativeCount) + + return &AssertionResult{ + Pass: pass, + Thought: thought, + } +} + +// extractQueryFieldsManually extracts content and thought fields for QueryResult +func extractQueryFieldsManually(content string) (*QueryResult, error) { + result := &QueryResult{} + + // Try to extract "content" field + if contentStart := strings.Index(content, `"content"`); contentStart != -1 { + contentSection := content[contentStart:] + if colonIndex := strings.Index(contentSection, ":"); colonIndex != -1 { + afterColon := strings.TrimSpace(contentSection[colonIndex+1:]) + if strings.HasPrefix(afterColon, `"`) { + result.Content = extractQuotedString(afterColon) + } + } + } + + // Try to extract "thought" field + if thoughtStart := strings.Index(content, `"thought"`); thoughtStart != -1 { + thoughtSection := content[thoughtStart:] + if colonIndex := strings.Index(thoughtSection, ":"); colonIndex != -1 { + afterColon := strings.TrimSpace(thoughtSection[colonIndex+1:]) + if strings.HasPrefix(afterColon, `"`) { + result.Thought = extractQuotedString(afterColon) + } + } + } + + // If we couldn't extract any fields, return error + if result.Content == "" && result.Thought == "" { + return nil, errors.New("cannot extract content or thought fields") + } + + // Set defaults for missing fields (ALWAYS set defaults if any field was extracted) + if result.Content == "" { + result.Content = "Extracted partial information" + } + if result.Thought == "" { + result.Thought = "Partial extraction from malformed response" + } + + return result, nil +} + +// extractPlanningFieldsManually extracts thought and error fields for PlanningJSONResponse +func extractPlanningFieldsManually(content string) (*PlanningJSONResponse, error) { + result := &PlanningJSONResponse{ + Actions: []Action{}, // Default to empty actions + } + + // Try to extract "thought" field + if thoughtStart := strings.Index(content, `"thought"`); thoughtStart != -1 { + thoughtSection := content[thoughtStart:] + if colonIndex := strings.Index(thoughtSection, ":"); colonIndex != -1 { + afterColon := strings.TrimSpace(thoughtSection[colonIndex+1:]) + if strings.HasPrefix(afterColon, `"`) { + result.Thought = extractQuotedString(afterColon) + } + } + } + + // Try to extract "error" field + if errorStart := strings.Index(content, `"error"`); errorStart != -1 { + errorSection := content[errorStart:] + if colonIndex := strings.Index(errorSection, ":"); colonIndex != -1 { + afterColon := strings.TrimSpace(errorSection[colonIndex+1:]) + if strings.HasPrefix(afterColon, `"`) { + result.Error = extractQuotedString(afterColon) + } + } + } + + // If we couldn't extract any meaningful fields, return error + if result.Thought == "" && result.Error == "" { + return nil, errors.New("cannot extract thought or error fields") + } + + // Set defaults for missing fields + if result.Thought == "" { + result.Thought = "Partial extraction from malformed response" + } + + return result, nil +} + +// parseStructuredResponse parses model response into structured format with error recovery +func parseStructuredResponse(content string, result interface{}) error { + // Clean and validate UTF-8 content first + cleanContent := sanitizeUTF8Content(content) + + // Extract JSON content from response + jsonContent := extractJSONFromContent(cleanContent) + if jsonContent == "" { + // If JSON extraction failed, try parsing the content directly as a fallback + jsonContent = cleanContent + } + + // Parse JSON response with error recovery + return parseJSONWithFallback(jsonContent, result) +} + // callModelWithLogging is a common function to call model with logging and timing // It handles the common pattern of: // 1. Log request diff --git a/uixt/ai/utils_test.go b/uixt/ai/utils_test.go index 6a6a38ea..0a43c044 100644 --- a/uixt/ai/utils_test.go +++ b/uixt/ai/utils_test.go @@ -4,195 +4,701 @@ import ( "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestExtractJSONFromContent(t *testing.T) { tests := []struct { name string - content string + input string expected string }{ { - name: "simple JSON", - content: `{ - "actions": [ - { - "action_type": "click", - "action_inputs": { - "start_box": [371, 235, 425, 270] - } - } - ], - "thought": "点击桌面上的抖音应用图标以启动抖音", - "error": null -}`, - expected: `{ - "actions": [ - { - "action_type": "click", - "action_inputs": { - "start_box": [371, 235, 425, 270] - } - } - ], - "thought": "点击桌面上的抖音应用图标以启动抖音", - "error": null -}`, + name: "simple JSON object", + input: `{"key": "value"}`, + expected: `{"key": "value"}`, }, { - name: "JSON with Chinese characters in strings", - content: `{ - "actions": [ - { - "action_type": "type", - "action_inputs": { - "content": "2048经典" - } - } - ], - "thought": "搜索框已经清空了,现在我要输入\"2048经典\"这个关键词。看到键盘已经弹出来了,正好可以直接开始输入。这样一来,就能找到我们想要玩的那个小游戏了。", - "error": null -}`, - expected: `{ - "actions": [ - { - "action_type": "type", - "action_inputs": { - "content": "2048经典" - } - } - ], - "thought": "搜索框已经清空了,现在我要输入\"2048经典\"这个关键词。看到键盘已经弹出来了,正好可以直接开始输入。这样一来,就能找到我们想要玩的那个小游戏了。", - "error": null -}`, + name: "JSON in markdown code block", + input: "```json\n{\"key\": \"value\"}\n```", + expected: `{"key": "value"}`, }, { - name: "JSON with markdown wrapper", - content: "```json\n" + `{ - "actions": [ - { - "action_type": "click", - "action_inputs": { - "start_box": [100, 200, 150, 250] - } - } - ], - "thought": "点击按钮", - "error": null -}` + "\n```", - expected: `{ - "actions": [ - { - "action_type": "click", - "action_inputs": { - "start_box": [100, 200, 150, 250] - } - } - ], - "thought": "点击按钮", - "error": null -}`, + name: "JSON in code block without language", + input: "```\n{\"key\": \"value\"}\n```", + expected: `{"key": "value"}`, }, { - name: "JSON embedded in text with Chinese", - content: `这是一个包含中文的响应:{ - "actions": [ - { - "action_type": "type", - "action_inputs": { - "content": "测试内容" - } - } - ], - "thought": "这是一个测试思路", - "error": null -} 后面还有一些文本`, - expected: `{ - "actions": [ - { - "action_type": "type", - "action_inputs": { - "content": "测试内容" - } - } - ], - "thought": "这是一个测试思路", - "error": null -}`, + name: "JSON with surrounding text", + input: `Here is the result: {"key": "value"} and some more text`, + expected: `{"key": "value"}`, }, { - name: "JSON with escaped quotes and Chinese", - content: `{ - "actions": [ - { - "action_type": "type", - "action_inputs": { - "content": "他说:\"你好,世界!\"" - } - } - ], - "thought": "输入包含引号的中文文本", - "error": null -}`, - expected: `{ - "actions": [ - { - "action_type": "type", - "action_inputs": { - "content": "他说:\"你好,世界!\"" - } - } - ], - "thought": "输入包含引号的中文文本", - "error": null -}`, + name: "multiple JSON objects", + input: `{"first": "object"} and {"second": "object"}`, + expected: `{"first": "object"}`, }, { - name: "no JSON content", - content: "这只是一些普通的文本,没有JSON内容", + name: "nested JSON in markdown", + input: "```json\n{\"data\": {\"nested\": \"value\"}}\n```", + expected: `{"data": {"nested": "value"}}`, + }, + { + name: "JSON array", + input: `[{"item": 1}, {"item": 2}]`, + expected: `[{"item": 1}, {"item": 2}]`, + }, + { + name: "JSON array in markdown", + input: "```json\n[{\"item\": 1}, {\"item\": 2}]\n```", + expected: `[{"item": 1}, {"item": 2}]`, + }, + { + name: "text without JSON", + input: "This is just plain text without any JSON", expected: "", }, { - name: "nested JSON objects with Chinese", - content: `{ - "actions": [ - { - "action_type": "click", - "action_inputs": { - "start_box": [100, 200, 150, 250], - "metadata": { - "description": "点击操作", - "target": "按钮" - } - } - } - ], - "thought": "执行嵌套对象的点击操作", - "error": null -}`, - expected: `{ - "actions": [ - { - "action_type": "click", - "action_inputs": { - "start_box": [100, 200, 150, 250], - "metadata": { - "description": "点击操作", - "target": "按钮" - } - } - } - ], - "thought": "执行嵌套对象的点击操作", - "error": null -}`, + name: "malformed JSON", + input: `{"key": "value"`, + expected: `{"key": "value"`, + }, + { + name: "JSON with unicode", + input: `{"message": "测试消息"}`, + expected: `{"message": "测试消息"}`, + }, + { + name: "multiple code blocks, select first JSON", + input: "First block:\n```json\n{\"first\": true}\n```\nSecond block:\n```json\n{\"second\": true}\n```", + expected: `{"first": true}`, + }, + { + name: "mixed language code blocks", + input: "```python\nprint('hello')\n```\n```json\n{\"key\": \"value\"}\n```", + expected: `{"key": "value"}`, + }, + { + name: "JSON with special characters", + input: `{"special": "chars: @#$%^&*()"}`, + expected: `{"special": "chars: @#$%^&*()"}`, + }, + { + name: "empty JSON object", + input: `{}`, + expected: `{}`, + }, + { + name: "empty JSON array", + input: `[]`, + expected: `[]`, + }, + { + name: "JSON with line breaks", + input: "{\n \"key\": \"value\",\n \"number\": 123\n}", + expected: "{\n \"key\": \"value\",\n \"number\": 123\n}", + }, + { + name: "markdown with extra whitespace", + input: " ```json \n {\"key\": \"value\"} \n ``` ", + expected: `{"key": "value"}`, + }, + { + name: "code block with tildes", + input: "~~~json\n{\"key\": \"value\"}\n~~~", + expected: `{"key": "value"}`, + }, + { + name: "JSON after other text patterns", + input: `The response should be formatted as: {"status": "success"}`, + expected: `{"status": "success"}`, + }, + { + name: "JSON in mixed content", + input: `Analysis complete. Result: {"analysis": "positive", "confidence": 0.95} - End of analysis.`, + expected: `{"analysis": "positive", "confidence": 0.95}`, + }, + { + name: "complex nested JSON", + input: `{"outer": {"inner": {"deep": "value", "numbers": [1, 2, 3]}}}`, + expected: `{"outer": {"inner": {"deep": "value", "numbers": [1, 2, 3]}}}`, + }, + { + name: "JSON with escaped quotes", + input: `{"message": "He said \"Hello\" to me"}`, + expected: `{"message": "He said \"Hello\" to me"}`, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - result := extractJSONFromContent(tt.content) + result := extractJSONFromContent(tt.input) assert.Equal(t, tt.expected, result) }) } } + +func TestSanitizeUTF8Content(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "valid UTF-8", + input: "Hello 世界", + expected: "Hello 世界", + }, + { + name: "invalid UTF-8 with replacement characters", + input: "Hello \ufffd\ufffd World", + expected: "Hello World", + }, + { + name: "mixed valid and invalid", + input: "测试\ufffd消息\ufffd", + expected: "测试消息", + }, + { + name: "only replacement characters", + input: "\ufffd\ufffd\ufffd", + expected: "", + }, + { + name: "empty string", + input: "", + expected: "", + }, + { + name: "ASCII only", + input: "Hello World 123", + expected: "Hello World 123", + }, + { + name: "JSON with UTF-8 issues", + input: `{"message": "搜索框\ufffd\ufffd显示"}`, + expected: `{"message": "搜索框显示"}`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := sanitizeUTF8Content(tt.input) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestParseJSONWithFallback(t *testing.T) { + tests := []struct { + name string + input string + expectedValid bool + expectedPass bool + expectedThought string + }{ + { + name: "valid JSON", + input: `{"pass": true, "thought": "test passed"}`, + expectedValid: true, + expectedPass: true, + expectedThought: "test passed", + }, + { + name: "valid JSON with false", + input: `{"pass": false, "thought": "test failed"}`, + expectedValid: true, + expectedPass: false, + expectedThought: "test failed", + }, + { + name: "malformed JSON with extractable fields", + input: `malformed start {"pass": true, "thought": "extracted"} end`, + expectedValid: true, + expectedPass: true, + expectedThought: "extracted", + }, + { + name: "content analysis fallback - positive", + input: `The test was successful and passed with true result`, + expectedValid: true, + expectedPass: true, + expectedThought: "Fallback analysis of malformed response (positive: 3, negative: 0)", + }, + { + name: "content analysis fallback - negative", + input: `The test failed with false result and error occurred`, + expectedValid: true, + expectedPass: false, + expectedThought: "Fallback analysis of malformed response (positive: 0, negative: 3)", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var result AssertionResult + err := parseJSONWithFallback(tt.input, &result) + + if tt.expectedValid { + assert.NoError(t, err) + assert.Equal(t, tt.expectedPass, result.Pass) + assert.Equal(t, tt.expectedThought, result.Thought) + } else { + assert.Error(t, err) + } + }) + } +} + +func TestExtractAssertionFieldsManually(t *testing.T) { + tests := []struct { + name string + input string + expectedPass bool + expectedThought string + shouldError bool + }{ + { + name: "pass true", + input: `{"pass": true, "thought": "manual test"}`, + expectedPass: true, + expectedThought: "manual test", + shouldError: false, + }, + { + name: "pass false", + input: `{"pass": false, "thought": "manual fail"}`, + expectedPass: false, + expectedThought: "manual fail", + shouldError: false, + }, + { + name: "no pass field", + input: `{"thought": "no pass field"}`, + shouldError: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := extractAssertionFieldsManually(tt.input) + if tt.shouldError { + assert.Error(t, err) + } else { + assert.NoError(t, err) + assert.Equal(t, tt.expectedPass, result.Pass) + assert.Equal(t, tt.expectedThought, result.Thought) + } + }) + } +} + +func TestExtractQuotedString(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "simple quoted string", + input: `"hello world"`, + expected: "hello world", + }, + { + name: "quoted string with escaped quotes", + input: `"He said \"Hello\""`, + expected: `He said "Hello"`, + }, + { + name: "quoted string with escaped backslash", + input: `"path\\to\\file"`, + expected: `path\to\file`, + }, + { + name: "empty quoted string", + input: `""`, + expected: "", + }, + { + name: "quoted string with unicode", + input: `"测试消息"`, + expected: "测试消息", + }, + { + name: "not a quoted string", + input: "hello world", + expected: "", + }, + { + name: "unclosed quoted string", + input: `"unclosed string`, + expected: "unclosed string", + }, + { + name: "quoted string with extra content after", + input: `"content" and more`, + expected: "content", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := extractQuotedString(tt.input) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestCleanJSONContent(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "remove trailing comma in object", + input: `{"key": "value",}`, + expected: `{"key": "value"}`, + }, + { + name: "remove trailing comma in array", + input: `["item1", "item2",]`, + expected: `["item1", "item2"]`, + }, + { + name: "clean non-printable characters", + input: "{\n\"key\": \"value\"\u0000\u0001}", + expected: "{\n\"key\": \"value\"}", + }, + { + name: "preserve unicode characters", + input: `{"message": "测试消息"}`, + expected: `{"message": "测试消息"}`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := cleanJSONContent(tt.input) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestAnalyzeContentForAssertion(t *testing.T) { + tests := []struct { + name string + input string + expectedPass bool + }{ + { + name: "positive indicators", + input: "The test was successful and passed", + expectedPass: true, + }, + { + name: "negative indicators", + input: "The test failed with error", + expectedPass: false, + }, + { + name: "mixed with more positive", + input: "Some errors occurred but overall test passed successfully", + expectedPass: true, + }, + { + name: "no clear indicators", + input: "This is just plain text", + expectedPass: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := analyzeContentForAssertion(tt.input) + assert.Equal(t, tt.expectedPass, result.Pass) + assert.NotEmpty(t, result.Thought) + }) + } +} + +func TestParseStructuredResponse(t *testing.T) { + tests := []struct { + name string + input string + shouldSucceed bool + }{ + { + name: "valid AssertionResult JSON", + input: `{"pass": true, "thought": "test passed"}`, + shouldSucceed: true, + }, + { + name: "malformed JSON with extractable fields", + input: `malformed start {"pass": false, "thought": "extracted thought"} end`, + shouldSucceed: true, + }, + { + name: "UTF-8 issues with JSON", + input: "测试结果:\ufffd\ufffd {\"pass\": true, \"thought\": \"处理完成\"}", + shouldSucceed: true, + }, + { + name: "content analysis fallback", + input: "The assertion was successful and passed correctly", + shouldSucceed: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var result AssertionResult + err := parseStructuredResponse(tt.input, &result) + if tt.shouldSucceed { + require.NoError(t, err) + assert.NotEmpty(t, result.Thought) + } else { + assert.Error(t, err) + } + }) + } +} + +// Add more test cases for different struct types +func TestParseJSONWithFallback_QueryResult(t *testing.T) { + tests := []struct { + name string + input string + expectedValid bool + expectedContent string + expectedThought string + }{ + { + name: "valid QueryResult JSON", + input: `{"content": "extracted info", "thought": "analysis complete"}`, + expectedValid: true, + expectedContent: "extracted info", + expectedThought: "analysis complete", + }, + { + name: "malformed QueryResult with extractable fields", + input: `malformed { "content": "partial info", "thought": "partial analysis" } more text`, + expectedValid: true, + expectedContent: "partial info", + expectedThought: "partial analysis", + }, + { + name: "completely malformed QueryResult", + input: `This is just plain text with no structure`, + expectedValid: true, + expectedContent: "This is just plain text with no structure", + expectedThought: "Failed to parse as JSON, returning raw content", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var result QueryResult + err := parseJSONWithFallback(tt.input, &result) + + if tt.expectedValid { + assert.NoError(t, err) + assert.Equal(t, tt.expectedContent, result.Content) + assert.Equal(t, tt.expectedThought, result.Thought) + } else { + assert.Error(t, err) + } + }) + } +} + +func TestParseJSONWithFallback_PlanningResponse(t *testing.T) { + tests := []struct { + name string + input string + expectedValid bool + expectedThought string + expectedError string + expectedActions int + }{ + { + name: "valid PlanningJSONResponse", + input: `{"actions": [{"action_type": "click"}], "thought": "planning complete", "error": ""}`, + expectedValid: true, + expectedThought: "planning complete", + expectedError: "", + expectedActions: 1, + }, + { + name: "malformed PlanningResponse with extractable thought", + input: `malformed { "thought": "partial planning" } more text`, + expectedValid: true, + expectedThought: "partial planning", + expectedActions: 0, + }, + { + name: "completely malformed PlanningResponse", + input: `This is just plain text with no structure`, + expectedValid: true, + expectedThought: "Failed to parse structured response", + expectedError: "JSON parsing failed, returning minimal structure", + expectedActions: 0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var result PlanningJSONResponse + err := parseJSONWithFallback(tt.input, &result) + + if tt.expectedValid { + assert.NoError(t, err) + assert.Equal(t, tt.expectedThought, result.Thought) + assert.Equal(t, tt.expectedError, result.Error) + assert.Len(t, result.Actions, tt.expectedActions) + } else { + assert.Error(t, err) + } + }) + } +} + +func TestExtractQueryFieldsManually(t *testing.T) { + tests := []struct { + name string + input string + expectedContent string + expectedThought string + shouldError bool + }{ + { + name: "both content and thought", + input: `{"content": "test content", "thought": "test thought"}`, + expectedContent: "test content", + expectedThought: "test thought", + shouldError: false, + }, + { + name: "only content", + input: `{"content": "only content"}`, + expectedContent: "only content", + expectedThought: "Partial extraction from malformed response", + shouldError: false, + }, + { + name: "only thought", + input: `{"thought": "only thought"}`, + expectedContent: "Extracted partial information", + expectedThought: "only thought", + shouldError: false, + }, + { + name: "no extractable fields", + input: `{"other": "data"}`, + shouldError: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := extractQueryFieldsManually(tt.input) + if tt.shouldError { + assert.Error(t, err) + } else { + assert.NoError(t, err) + assert.Equal(t, tt.expectedContent, result.Content) + assert.Equal(t, tt.expectedThought, result.Thought) + } + }) + } +} + +func TestExtractPlanningFieldsManually(t *testing.T) { + tests := []struct { + name string + input string + expectedThought string + expectedError string + shouldError bool + }{ + { + name: "both thought and error", + input: `{"thought": "test planning", "error": "test error"}`, + expectedThought: "test planning", + expectedError: "test error", + shouldError: false, + }, + { + name: "only thought", + input: `{"thought": "only planning"}`, + expectedThought: "only planning", + expectedError: "", + shouldError: false, + }, + { + name: "only error", + input: `{"error": "only error"}`, + expectedThought: "Partial extraction from malformed response", + expectedError: "only error", + shouldError: false, + }, + { + name: "no extractable fields", + input: `{"other": "data"}`, + shouldError: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := extractPlanningFieldsManually(tt.input) + if tt.shouldError { + assert.Error(t, err) + } else { + assert.NoError(t, err) + assert.Equal(t, tt.expectedThought, result.Thought) + assert.Equal(t, tt.expectedError, result.Error) + assert.NotNil(t, result.Actions) // Should always be initialized + } + }) + } +} + +// Test the integrated parseStructuredResponse with QueryResult +func TestParseStructuredResponse_QueryResult(t *testing.T) { + tests := []struct { + name string + input string + shouldSucceed bool + }{ + { + name: "valid QueryResult JSON", + input: `{"content": "extracted data", "thought": "processing complete"}`, + shouldSucceed: true, + }, + { + name: "QueryResult with UTF-8 issues", + input: "extracted data: 搜索框,里面显示着\ufffd\ufffd {\"content\": \"search box found\", \"thought\": \"visual analysis\"}", + shouldSucceed: true, + }, + { + name: "malformed QueryResult", + input: `malformed start {"content": "partial info"} end`, + shouldSucceed: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var result QueryResult + err := parseStructuredResponse(tt.input, &result) + if tt.shouldSucceed { + require.NoError(t, err) + assert.NotEmpty(t, result.Content, "Content should not be empty") + assert.NotEmpty(t, result.Thought, "Thought should not be empty") + } else { + assert.Error(t, err) + } + }) + } +}