feat: implement structured response parsing with enhanced error recovery and UTF-8 sanitization

This commit is contained in:
lilong.129
2025-06-18 16:59:35 +08:00
parent 6965cf9fe9
commit 1f3366453e
8 changed files with 1076 additions and 217 deletions

View File

@@ -1 +1 @@
v5.0.0-beta-2506181613
v5.0.0-beta-2506181659

View File

@@ -10,10 +10,10 @@ import (
"github.com/cloudwego/eino/schema"
"github.com/getkin/kin-openapi/openapi3gen"
"github.com/httprunner/httprunner/v5/code"
"github.com/httprunner/httprunner/v5/internal/json"
"github.com/httprunner/httprunner/v5/uixt/option"
"github.com/httprunner/httprunner/v5/uixt/types"
"github.com/pkg/errors"
"github.com/rs/zerolog/log"
)
// IAsserter interface defines the contract for assertion operations
@@ -160,15 +160,13 @@ func validateAssertionInput(opts *AssertOptions) error {
// parseAssertionResult parses the model response into AssertionResponse
func parseAssertionResult(content string) (*AssertionResult, error) {
// Extract JSON content from response
jsonContent := extractJSONFromContent(content)
if jsonContent == "" {
return nil, errors.New("could not extract JSON from response")
}
// Parse JSON response
var result AssertionResult
if err := json.Unmarshal([]byte(jsonContent), &result); err != nil {
// Use the generic structured response parser
if err := parseStructuredResponse(content, &result); err != nil {
log.Warn().
Interface("original_content", content).
Msg("parse assertion result failed")
return nil, errors.Wrap(code.LLMParseAssertionResponseError, err.Error())
}

View File

@@ -104,3 +104,46 @@ func TestInvalidParameters(t *testing.T) {
})
}
}
// Test the main parseAssertionResult function with problematic input
func TestParseAssertionResult(t *testing.T) {
tests := []struct {
name string
input string
shouldSucceed bool
}{
{
name: "valid JSON response",
input: `{"pass": true, "thought": "Assertion passed"}`,
shouldSucceed: true,
},
{
name: "response with UTF-8 replacement characters",
input: "浅蓝色的搜索框,里面显示着输入的\"ma\",而\ufffd\ufffd且在搜索框的右上角有一个喇叭 {\"pass\": true, \"thought\": \"found search box\"}",
shouldSucceed: true,
},
{
name: "malformed JSON with extraction",
input: `malformed start {"pass": true, "thought": "extracted successfully"} malformed end`,
shouldSucceed: true,
},
{
name: "completely malformed but analyzable",
input: "This assertion test passed and was successful",
shouldSucceed: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result, err := parseAssertionResult(tt.input)
if tt.shouldSucceed {
require.NoError(t, err)
assert.NotNil(t, result)
assert.NotEmpty(t, result.Thought)
} else {
assert.Error(t, err)
}
})
}
}

View File

@@ -5,7 +5,6 @@ import (
"strings"
"github.com/cloudwego/eino/schema"
"github.com/httprunner/httprunner/v5/internal/json"
"github.com/httprunner/httprunner/v5/uixt/option"
"github.com/httprunner/httprunner/v5/uixt/types"
"github.com/pkg/errors"
@@ -48,20 +47,9 @@ func (p *JSONContentParser) SystemPrompt() string {
func (p *JSONContentParser) Parse(content string, size types.Size) (*PlanningResult, error) {
content = strings.TrimSpace(content)
// Extract JSON content from markdown code blocks
jsonContent := extractJSONFromContent(content)
if jsonContent == "" {
return nil, fmt.Errorf("no valid JSON content found in response")
}
// Define a temporary struct to parse the expected JSON format
var jsonResponse struct {
Actions []Action `json:"actions"`
Thought string `json:"thought"`
Error string `json:"error"`
}
if err := json.Unmarshal([]byte(jsonContent), &jsonResponse); err != nil {
// Use the generic structured response parser
var jsonResponse PlanningJSONResponse
if err := parseStructuredResponse(content, &jsonResponse); err != nil {
return nil, fmt.Errorf("failed to parse VLM response: %v", err)
}

View File

@@ -169,24 +169,14 @@ func validateQueryInput(opts *QueryOptions) error {
// parseQueryResult parses the model response into QueryResult
func parseQueryResult(content string) (*QueryResult, error) {
// Extract JSON content from response
jsonContent := extractJSONFromContent(content)
if jsonContent == "" {
// If no JSON found, treat the entire content as the result
// This handles cases where the model returns plain text instead of JSON
return &QueryResult{
Content: content,
Thought: "Direct response from model",
}, nil
}
// Parse JSON response
var result QueryResult
if err := json.Unmarshal([]byte(jsonContent), &result); err != nil {
// If JSON parsing fails, treat the content as plain text result
// Use the generic structured response parser with enhanced error recovery
if err := parseStructuredResponse(content, &result); err != nil {
// If parseStructuredResponse fails completely, treat content as plain text
return &QueryResult{
Content: content,
Thought: "Failed to parse as JSON, returning raw content",
Thought: "Failed to parse response, returning raw content",
}, nil
}

View File

@@ -95,38 +95,35 @@ func TestParseQueryResult(t *testing.T) {
expected *QueryResult
}{
{
name: "valid JSON response",
content: `{
"content": "这是一个14行8列的连连看游戏界面包含25种不同的图案",
"thought": "通过分析图片,我识别出了游戏界面的结构和图案类型"
}`,
name: "valid JSON response",
content: `{"content": "extracted information", "thought": "analysis complete"}`,
expected: &QueryResult{
Content: "这是一个14行8列的连连看游戏界面包含25种不同的图案",
Thought: "通过分析图片,我识别出了游戏界面的结构和图案类型",
Content: "extracted information",
Thought: "analysis complete",
},
},
{
name: "JSON in markdown",
content: "```json\n{\n \"content\": \"游戏界面分析结果\",\n \"thought\": \"分析过程\"\n}\n```",
content: "```json\n{\"content\": \"data from markdown\", \"thought\": \"parsed from code block\"}\n```",
expected: &QueryResult{
Content: "游戏界面分析结果",
Thought: "分析过程",
Content: "data from markdown",
Thought: "parsed from code block",
},
},
{
name: "plain text response",
content: "这是一个连连看游戏界面,包含多种图案。",
content: "This is just plain text without JSON structure",
expected: &QueryResult{
Content: "这是一个连连看游戏界面,包含多种图案。",
Thought: "Direct response from model",
Content: "This is just plain text without JSON structure",
Thought: "Failed to parse as JSON, returning raw content",
},
},
{
name: "invalid JSON",
content: `{"content": "incomplete json", "missing_closing_brace": true`,
expected: &QueryResult{
Content: `{"content": "incomplete json", "missing_closing_brace": true`,
Thought: "Direct response from model",
Content: "incomplete json",
Thought: "Partial extraction from malformed response",
},
},
}

View File

@@ -2,6 +2,7 @@ package ai
import (
"context"
"fmt"
"regexp"
"strings"
"time"
@@ -11,9 +12,18 @@ import (
"github.com/cloudwego/eino/schema"
"github.com/rs/zerolog/log"
"github.com/httprunner/httprunner/v5/internal/json"
"github.com/httprunner/httprunner/v5/uixt/option"
"github.com/pkg/errors"
)
// PlanningJSONResponse represents the JSON response structure for planning
type PlanningJSONResponse struct {
Actions []Action `json:"actions"`
Thought string `json:"thought"`
Error string `json:"error"`
}
// extractJSONFromContent extracts JSON content from various formats in the response
// This function handles multiple formats:
// 1. ```json ... ``` markdown code blocks
@@ -111,6 +121,333 @@ func extractJSONFromContent(content string) string {
return ""
}
// sanitizeUTF8Content cleans invalid UTF-8 characters from content
func sanitizeUTF8Content(content string) string {
if utf8.ValidString(content) {
return content
}
// Convert to bytes and filter out invalid UTF-8 sequences
bytes := []byte(content)
var validBytes []byte
for len(bytes) > 0 {
r, size := utf8.DecodeRune(bytes)
if r != utf8.RuneError {
// Valid rune, keep it
validBytes = append(validBytes, bytes[:size]...)
}
// Skip invalid bytes (including RuneError)
bytes = bytes[size:]
}
return string(validBytes)
}
// parseJSONWithFallback attempts to parse JSON with multiple strategies for any struct type
func parseJSONWithFallback(jsonContent string, result interface{}) error {
// Strategy 1: Direct JSON unmarshaling
if err := json.Unmarshal([]byte(jsonContent), result); err == nil {
// For specific types, ensure required fields have default values even after successful parsing
switch v := result.(type) {
case *QueryResult:
// Ensure QueryResult has meaningful defaults for empty fields
if v.Content == "" && v.Thought == "" {
v.Content = "Empty response content"
v.Thought = "No content extracted from response"
} else if v.Content == "" {
v.Content = "No content extracted"
} else if v.Thought == "" {
v.Thought = "Successfully parsed structured response"
}
case *AssertionResult:
// Ensure AssertionResult has meaningful defaults
if v.Thought == "" {
v.Thought = "Successfully parsed assertion response"
}
}
return nil
}
// Strategy 2: Try cleaning JSON content and parse again
cleanedJSON := cleanJSONContent(jsonContent)
if err := json.Unmarshal([]byte(cleanedJSON), result); err == nil {
// Apply the same default value logic for cleaned JSON
switch v := result.(type) {
case *QueryResult:
if v.Content == "" && v.Thought == "" {
v.Content = "Empty response content"
v.Thought = "No content extracted from response"
} else if v.Content == "" {
v.Content = "No content extracted"
} else if v.Thought == "" {
v.Thought = "Successfully parsed structured response"
}
case *AssertionResult:
if v.Thought == "" {
v.Thought = "Successfully parsed assertion response"
}
}
return nil
}
// Strategy 3: For specific types, try manual extraction or content analysis
switch v := result.(type) {
case *AssertionResult:
if fallbackResult, err := extractAssertionFieldsManually(jsonContent); err == nil {
*v = *fallbackResult
return nil
}
// Final fallback for assertions: content analysis
*v = *analyzeContentForAssertion(jsonContent)
return nil
case *QueryResult:
// For QueryResult, try basic field extraction
if fallbackResult, err := extractQueryFieldsManually(jsonContent); err == nil {
*v = *fallbackResult
return nil
}
// Fallback to treating content as plain text
*v = QueryResult{
Content: jsonContent,
Thought: "Failed to parse as JSON, returning raw content",
}
return nil
case *PlanningJSONResponse:
// For PlanningJSONResponse, try basic field extraction
if fallbackResult, err := extractPlanningFieldsManually(jsonContent); err == nil {
*v = *fallbackResult
return nil
}
// Fallback with empty actions but preserve any recognizable thought content
*v = PlanningJSONResponse{
Actions: []Action{},
Thought: "Failed to parse structured response",
Error: "JSON parsing failed, returning minimal structure",
}
return nil
}
return errors.New("failed to parse JSON with all strategies")
}
// extractAssertionFieldsManually extracts pass and thought fields from text
func extractAssertionFieldsManually(content string) (*AssertionResult, error) {
result := &AssertionResult{}
// Try to extract "pass" field
if strings.Contains(strings.ToLower(content), `"pass":true`) ||
strings.Contains(strings.ToLower(content), `"pass": true`) {
result.Pass = true
} else if strings.Contains(strings.ToLower(content), `"pass":false`) ||
strings.Contains(strings.ToLower(content), `"pass": false`) {
result.Pass = false
} else {
return nil, errors.New("cannot extract pass field")
}
// Try to extract "thought" field
thoughtStart := strings.Index(content, `"thought"`)
if thoughtStart != -1 {
thoughtSection := content[thoughtStart:]
colonIndex := strings.Index(thoughtSection, ":")
if colonIndex != -1 {
afterColon := strings.TrimSpace(thoughtSection[colonIndex+1:])
if strings.HasPrefix(afterColon, `"`) {
// Find the matching closing quote, handling escaped quotes
thoughtContent := extractQuotedString(afterColon)
result.Thought = thoughtContent
}
}
}
return result, nil
}
// extractQuotedString extracts content from a quoted string, handling escaped quotes
func extractQuotedString(s string) string {
if !strings.HasPrefix(s, `"`) {
return ""
}
s = s[1:] // Remove opening quote
var result strings.Builder
escaped := false
for _, r := range s {
if escaped {
result.WriteRune(r)
escaped = false
continue
}
if r == '\\' {
escaped = true
continue
}
if r == '"' {
// Found closing quote
return result.String()
}
result.WriteRune(r)
}
return result.String()
}
// cleanJSONContent removes common JSON formatting issues
func cleanJSONContent(content string) string {
// Remove any non-printable characters
cleaned := strings.Map(func(r rune) rune {
if r >= 32 && r < 127 || r > 127 { // Keep printable ASCII and Unicode
return r
}
return -1 // Remove non-printable characters
}, content)
// Remove any trailing commas before closing braces/brackets
cleaned = strings.ReplaceAll(cleaned, ",}", "}")
cleaned = strings.ReplaceAll(cleaned, ",]", "]")
return cleaned
}
// analyzeContentForAssertion creates a fallback result by analyzing content
func analyzeContentForAssertion(content string) *AssertionResult {
content = strings.ToLower(content)
// Simple heuristic: look for positive/negative indicators
positiveIndicators := []string{"true", "pass", "success", "correct", "valid", "match"}
negativeIndicators := []string{"false", "fail", "error", "incorrect", "invalid", "mismatch"}
positiveCount := 0
negativeCount := 0
for _, indicator := range positiveIndicators {
if strings.Contains(content, indicator) {
positiveCount++
}
}
for _, indicator := range negativeIndicators {
if strings.Contains(content, indicator) {
negativeCount++
}
}
pass := positiveCount > negativeCount
thought := fmt.Sprintf("Fallback analysis of malformed response (positive: %d, negative: %d)",
positiveCount, negativeCount)
return &AssertionResult{
Pass: pass,
Thought: thought,
}
}
// extractQueryFieldsManually extracts content and thought fields for QueryResult
func extractQueryFieldsManually(content string) (*QueryResult, error) {
result := &QueryResult{}
// Try to extract "content" field
if contentStart := strings.Index(content, `"content"`); contentStart != -1 {
contentSection := content[contentStart:]
if colonIndex := strings.Index(contentSection, ":"); colonIndex != -1 {
afterColon := strings.TrimSpace(contentSection[colonIndex+1:])
if strings.HasPrefix(afterColon, `"`) {
result.Content = extractQuotedString(afterColon)
}
}
}
// Try to extract "thought" field
if thoughtStart := strings.Index(content, `"thought"`); thoughtStart != -1 {
thoughtSection := content[thoughtStart:]
if colonIndex := strings.Index(thoughtSection, ":"); colonIndex != -1 {
afterColon := strings.TrimSpace(thoughtSection[colonIndex+1:])
if strings.HasPrefix(afterColon, `"`) {
result.Thought = extractQuotedString(afterColon)
}
}
}
// If we couldn't extract any fields, return error
if result.Content == "" && result.Thought == "" {
return nil, errors.New("cannot extract content or thought fields")
}
// Set defaults for missing fields (ALWAYS set defaults if any field was extracted)
if result.Content == "" {
result.Content = "Extracted partial information"
}
if result.Thought == "" {
result.Thought = "Partial extraction from malformed response"
}
return result, nil
}
// extractPlanningFieldsManually extracts thought and error fields for PlanningJSONResponse
func extractPlanningFieldsManually(content string) (*PlanningJSONResponse, error) {
result := &PlanningJSONResponse{
Actions: []Action{}, // Default to empty actions
}
// Try to extract "thought" field
if thoughtStart := strings.Index(content, `"thought"`); thoughtStart != -1 {
thoughtSection := content[thoughtStart:]
if colonIndex := strings.Index(thoughtSection, ":"); colonIndex != -1 {
afterColon := strings.TrimSpace(thoughtSection[colonIndex+1:])
if strings.HasPrefix(afterColon, `"`) {
result.Thought = extractQuotedString(afterColon)
}
}
}
// Try to extract "error" field
if errorStart := strings.Index(content, `"error"`); errorStart != -1 {
errorSection := content[errorStart:]
if colonIndex := strings.Index(errorSection, ":"); colonIndex != -1 {
afterColon := strings.TrimSpace(errorSection[colonIndex+1:])
if strings.HasPrefix(afterColon, `"`) {
result.Error = extractQuotedString(afterColon)
}
}
}
// If we couldn't extract any meaningful fields, return error
if result.Thought == "" && result.Error == "" {
return nil, errors.New("cannot extract thought or error fields")
}
// Set defaults for missing fields
if result.Thought == "" {
result.Thought = "Partial extraction from malformed response"
}
return result, nil
}
// parseStructuredResponse parses model response into structured format with error recovery
func parseStructuredResponse(content string, result interface{}) error {
// Clean and validate UTF-8 content first
cleanContent := sanitizeUTF8Content(content)
// Extract JSON content from response
jsonContent := extractJSONFromContent(cleanContent)
if jsonContent == "" {
// If JSON extraction failed, try parsing the content directly as a fallback
jsonContent = cleanContent
}
// Parse JSON response with error recovery
return parseJSONWithFallback(jsonContent, result)
}
// callModelWithLogging is a common function to call model with logging and timing
// It handles the common pattern of:
// 1. Log request

View File

@@ -4,195 +4,701 @@ import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestExtractJSONFromContent(t *testing.T) {
tests := []struct {
name string
content string
input string
expected string
}{
{
name: "simple JSON",
content: `{
"actions": [
{
"action_type": "click",
"action_inputs": {
"start_box": [371, 235, 425, 270]
}
}
],
"thought": "点击桌面上的抖音应用图标以启动抖音",
"error": null
}`,
expected: `{
"actions": [
{
"action_type": "click",
"action_inputs": {
"start_box": [371, 235, 425, 270]
}
}
],
"thought": "点击桌面上的抖音应用图标以启动抖音",
"error": null
}`,
name: "simple JSON object",
input: `{"key": "value"}`,
expected: `{"key": "value"}`,
},
{
name: "JSON with Chinese characters in strings",
content: `{
"actions": [
{
"action_type": "type",
"action_inputs": {
"content": "2048经典"
}
}
],
"thought": "搜索框已经清空了,现在我要输入\"2048经典\"这个关键词。看到键盘已经弹出来了,正好可以直接开始输入。这样一来,就能找到我们想要玩的那个小游戏了。",
"error": null
}`,
expected: `{
"actions": [
{
"action_type": "type",
"action_inputs": {
"content": "2048经典"
}
}
],
"thought": "搜索框已经清空了,现在我要输入\"2048经典\"这个关键词。看到键盘已经弹出来了,正好可以直接开始输入。这样一来,就能找到我们想要玩的那个小游戏了。",
"error": null
}`,
name: "JSON in markdown code block",
input: "```json\n{\"key\": \"value\"}\n```",
expected: `{"key": "value"}`,
},
{
name: "JSON with markdown wrapper",
content: "```json\n" + `{
"actions": [
{
"action_type": "click",
"action_inputs": {
"start_box": [100, 200, 150, 250]
}
}
],
"thought": "点击按钮",
"error": null
}` + "\n```",
expected: `{
"actions": [
{
"action_type": "click",
"action_inputs": {
"start_box": [100, 200, 150, 250]
}
}
],
"thought": "点击按钮",
"error": null
}`,
name: "JSON in code block without language",
input: "```\n{\"key\": \"value\"}\n```",
expected: `{"key": "value"}`,
},
{
name: "JSON embedded in text with Chinese",
content: `这是一个包含中文的响应:{
"actions": [
{
"action_type": "type",
"action_inputs": {
"content": "测试内容"
}
}
],
"thought": "这是一个测试思路",
"error": null
} 后面还有一些文本`,
expected: `{
"actions": [
{
"action_type": "type",
"action_inputs": {
"content": "测试内容"
}
}
],
"thought": "这是一个测试思路",
"error": null
}`,
name: "JSON with surrounding text",
input: `Here is the result: {"key": "value"} and some more text`,
expected: `{"key": "value"}`,
},
{
name: "JSON with escaped quotes and Chinese",
content: `{
"actions": [
{
"action_type": "type",
"action_inputs": {
"content": "他说:\"你好,世界!\""
}
}
],
"thought": "输入包含引号的中文文本",
"error": null
}`,
expected: `{
"actions": [
{
"action_type": "type",
"action_inputs": {
"content": "他说:\"你好,世界!\""
}
}
],
"thought": "输入包含引号的中文文本",
"error": null
}`,
name: "multiple JSON objects",
input: `{"first": "object"} and {"second": "object"}`,
expected: `{"first": "object"}`,
},
{
name: "no JSON content",
content: "这只是一些普通的文本没有JSON内容",
name: "nested JSON in markdown",
input: "```json\n{\"data\": {\"nested\": \"value\"}}\n```",
expected: `{"data": {"nested": "value"}}`,
},
{
name: "JSON array",
input: `[{"item": 1}, {"item": 2}]`,
expected: `[{"item": 1}, {"item": 2}]`,
},
{
name: "JSON array in markdown",
input: "```json\n[{\"item\": 1}, {\"item\": 2}]\n```",
expected: `[{"item": 1}, {"item": 2}]`,
},
{
name: "text without JSON",
input: "This is just plain text without any JSON",
expected: "",
},
{
name: "nested JSON objects with Chinese",
content: `{
"actions": [
{
"action_type": "click",
"action_inputs": {
"start_box": [100, 200, 150, 250],
"metadata": {
"description": "点击操作",
"target": "按钮"
}
}
}
],
"thought": "执行嵌套对象的点击操作",
"error": null
}`,
expected: `{
"actions": [
{
"action_type": "click",
"action_inputs": {
"start_box": [100, 200, 150, 250],
"metadata": {
"description": "点击操作",
"target": "按钮"
}
}
}
],
"thought": "执行嵌套对象的点击操作",
"error": null
}`,
name: "malformed JSON",
input: `{"key": "value"`,
expected: `{"key": "value"`,
},
{
name: "JSON with unicode",
input: `{"message": "测试消息"}`,
expected: `{"message": "测试消息"}`,
},
{
name: "multiple code blocks, select first JSON",
input: "First block:\n```json\n{\"first\": true}\n```\nSecond block:\n```json\n{\"second\": true}\n```",
expected: `{"first": true}`,
},
{
name: "mixed language code blocks",
input: "```python\nprint('hello')\n```\n```json\n{\"key\": \"value\"}\n```",
expected: `{"key": "value"}`,
},
{
name: "JSON with special characters",
input: `{"special": "chars: @#$%^&*()"}`,
expected: `{"special": "chars: @#$%^&*()"}`,
},
{
name: "empty JSON object",
input: `{}`,
expected: `{}`,
},
{
name: "empty JSON array",
input: `[]`,
expected: `[]`,
},
{
name: "JSON with line breaks",
input: "{\n \"key\": \"value\",\n \"number\": 123\n}",
expected: "{\n \"key\": \"value\",\n \"number\": 123\n}",
},
{
name: "markdown with extra whitespace",
input: " ```json \n {\"key\": \"value\"} \n ``` ",
expected: `{"key": "value"}`,
},
{
name: "code block with tildes",
input: "~~~json\n{\"key\": \"value\"}\n~~~",
expected: `{"key": "value"}`,
},
{
name: "JSON after other text patterns",
input: `The response should be formatted as: {"status": "success"}`,
expected: `{"status": "success"}`,
},
{
name: "JSON in mixed content",
input: `Analysis complete. Result: {"analysis": "positive", "confidence": 0.95} - End of analysis.`,
expected: `{"analysis": "positive", "confidence": 0.95}`,
},
{
name: "complex nested JSON",
input: `{"outer": {"inner": {"deep": "value", "numbers": [1, 2, 3]}}}`,
expected: `{"outer": {"inner": {"deep": "value", "numbers": [1, 2, 3]}}}`,
},
{
name: "JSON with escaped quotes",
input: `{"message": "He said \"Hello\" to me"}`,
expected: `{"message": "He said \"Hello\" to me"}`,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := extractJSONFromContent(tt.content)
result := extractJSONFromContent(tt.input)
assert.Equal(t, tt.expected, result)
})
}
}
func TestSanitizeUTF8Content(t *testing.T) {
tests := []struct {
name string
input string
expected string
}{
{
name: "valid UTF-8",
input: "Hello 世界",
expected: "Hello 世界",
},
{
name: "invalid UTF-8 with replacement characters",
input: "Hello \ufffd\ufffd World",
expected: "Hello World",
},
{
name: "mixed valid and invalid",
input: "测试\ufffd消息\ufffd",
expected: "测试消息",
},
{
name: "only replacement characters",
input: "\ufffd\ufffd\ufffd",
expected: "",
},
{
name: "empty string",
input: "",
expected: "",
},
{
name: "ASCII only",
input: "Hello World 123",
expected: "Hello World 123",
},
{
name: "JSON with UTF-8 issues",
input: `{"message": "搜索框\ufffd\ufffd显示"}`,
expected: `{"message": "搜索框显示"}`,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := sanitizeUTF8Content(tt.input)
assert.Equal(t, tt.expected, result)
})
}
}
func TestParseJSONWithFallback(t *testing.T) {
tests := []struct {
name string
input string
expectedValid bool
expectedPass bool
expectedThought string
}{
{
name: "valid JSON",
input: `{"pass": true, "thought": "test passed"}`,
expectedValid: true,
expectedPass: true,
expectedThought: "test passed",
},
{
name: "valid JSON with false",
input: `{"pass": false, "thought": "test failed"}`,
expectedValid: true,
expectedPass: false,
expectedThought: "test failed",
},
{
name: "malformed JSON with extractable fields",
input: `malformed start {"pass": true, "thought": "extracted"} end`,
expectedValid: true,
expectedPass: true,
expectedThought: "extracted",
},
{
name: "content analysis fallback - positive",
input: `The test was successful and passed with true result`,
expectedValid: true,
expectedPass: true,
expectedThought: "Fallback analysis of malformed response (positive: 3, negative: 0)",
},
{
name: "content analysis fallback - negative",
input: `The test failed with false result and error occurred`,
expectedValid: true,
expectedPass: false,
expectedThought: "Fallback analysis of malformed response (positive: 0, negative: 3)",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var result AssertionResult
err := parseJSONWithFallback(tt.input, &result)
if tt.expectedValid {
assert.NoError(t, err)
assert.Equal(t, tt.expectedPass, result.Pass)
assert.Equal(t, tt.expectedThought, result.Thought)
} else {
assert.Error(t, err)
}
})
}
}
func TestExtractAssertionFieldsManually(t *testing.T) {
tests := []struct {
name string
input string
expectedPass bool
expectedThought string
shouldError bool
}{
{
name: "pass true",
input: `{"pass": true, "thought": "manual test"}`,
expectedPass: true,
expectedThought: "manual test",
shouldError: false,
},
{
name: "pass false",
input: `{"pass": false, "thought": "manual fail"}`,
expectedPass: false,
expectedThought: "manual fail",
shouldError: false,
},
{
name: "no pass field",
input: `{"thought": "no pass field"}`,
shouldError: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result, err := extractAssertionFieldsManually(tt.input)
if tt.shouldError {
assert.Error(t, err)
} else {
assert.NoError(t, err)
assert.Equal(t, tt.expectedPass, result.Pass)
assert.Equal(t, tt.expectedThought, result.Thought)
}
})
}
}
func TestExtractQuotedString(t *testing.T) {
tests := []struct {
name string
input string
expected string
}{
{
name: "simple quoted string",
input: `"hello world"`,
expected: "hello world",
},
{
name: "quoted string with escaped quotes",
input: `"He said \"Hello\""`,
expected: `He said "Hello"`,
},
{
name: "quoted string with escaped backslash",
input: `"path\\to\\file"`,
expected: `path\to\file`,
},
{
name: "empty quoted string",
input: `""`,
expected: "",
},
{
name: "quoted string with unicode",
input: `"测试消息"`,
expected: "测试消息",
},
{
name: "not a quoted string",
input: "hello world",
expected: "",
},
{
name: "unclosed quoted string",
input: `"unclosed string`,
expected: "unclosed string",
},
{
name: "quoted string with extra content after",
input: `"content" and more`,
expected: "content",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := extractQuotedString(tt.input)
assert.Equal(t, tt.expected, result)
})
}
}
func TestCleanJSONContent(t *testing.T) {
tests := []struct {
name string
input string
expected string
}{
{
name: "remove trailing comma in object",
input: `{"key": "value",}`,
expected: `{"key": "value"}`,
},
{
name: "remove trailing comma in array",
input: `["item1", "item2",]`,
expected: `["item1", "item2"]`,
},
{
name: "clean non-printable characters",
input: "{\n\"key\": \"value\"\u0000\u0001}",
expected: "{\n\"key\": \"value\"}",
},
{
name: "preserve unicode characters",
input: `{"message": "测试消息"}`,
expected: `{"message": "测试消息"}`,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := cleanJSONContent(tt.input)
assert.Equal(t, tt.expected, result)
})
}
}
func TestAnalyzeContentForAssertion(t *testing.T) {
tests := []struct {
name string
input string
expectedPass bool
}{
{
name: "positive indicators",
input: "The test was successful and passed",
expectedPass: true,
},
{
name: "negative indicators",
input: "The test failed with error",
expectedPass: false,
},
{
name: "mixed with more positive",
input: "Some errors occurred but overall test passed successfully",
expectedPass: true,
},
{
name: "no clear indicators",
input: "This is just plain text",
expectedPass: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := analyzeContentForAssertion(tt.input)
assert.Equal(t, tt.expectedPass, result.Pass)
assert.NotEmpty(t, result.Thought)
})
}
}
func TestParseStructuredResponse(t *testing.T) {
tests := []struct {
name string
input string
shouldSucceed bool
}{
{
name: "valid AssertionResult JSON",
input: `{"pass": true, "thought": "test passed"}`,
shouldSucceed: true,
},
{
name: "malformed JSON with extractable fields",
input: `malformed start {"pass": false, "thought": "extracted thought"} end`,
shouldSucceed: true,
},
{
name: "UTF-8 issues with JSON",
input: "测试结果:\ufffd\ufffd {\"pass\": true, \"thought\": \"处理完成\"}",
shouldSucceed: true,
},
{
name: "content analysis fallback",
input: "The assertion was successful and passed correctly",
shouldSucceed: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var result AssertionResult
err := parseStructuredResponse(tt.input, &result)
if tt.shouldSucceed {
require.NoError(t, err)
assert.NotEmpty(t, result.Thought)
} else {
assert.Error(t, err)
}
})
}
}
// Add more test cases for different struct types
func TestParseJSONWithFallback_QueryResult(t *testing.T) {
tests := []struct {
name string
input string
expectedValid bool
expectedContent string
expectedThought string
}{
{
name: "valid QueryResult JSON",
input: `{"content": "extracted info", "thought": "analysis complete"}`,
expectedValid: true,
expectedContent: "extracted info",
expectedThought: "analysis complete",
},
{
name: "malformed QueryResult with extractable fields",
input: `malformed { "content": "partial info", "thought": "partial analysis" } more text`,
expectedValid: true,
expectedContent: "partial info",
expectedThought: "partial analysis",
},
{
name: "completely malformed QueryResult",
input: `This is just plain text with no structure`,
expectedValid: true,
expectedContent: "This is just plain text with no structure",
expectedThought: "Failed to parse as JSON, returning raw content",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var result QueryResult
err := parseJSONWithFallback(tt.input, &result)
if tt.expectedValid {
assert.NoError(t, err)
assert.Equal(t, tt.expectedContent, result.Content)
assert.Equal(t, tt.expectedThought, result.Thought)
} else {
assert.Error(t, err)
}
})
}
}
func TestParseJSONWithFallback_PlanningResponse(t *testing.T) {
tests := []struct {
name string
input string
expectedValid bool
expectedThought string
expectedError string
expectedActions int
}{
{
name: "valid PlanningJSONResponse",
input: `{"actions": [{"action_type": "click"}], "thought": "planning complete", "error": ""}`,
expectedValid: true,
expectedThought: "planning complete",
expectedError: "",
expectedActions: 1,
},
{
name: "malformed PlanningResponse with extractable thought",
input: `malformed { "thought": "partial planning" } more text`,
expectedValid: true,
expectedThought: "partial planning",
expectedActions: 0,
},
{
name: "completely malformed PlanningResponse",
input: `This is just plain text with no structure`,
expectedValid: true,
expectedThought: "Failed to parse structured response",
expectedError: "JSON parsing failed, returning minimal structure",
expectedActions: 0,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var result PlanningJSONResponse
err := parseJSONWithFallback(tt.input, &result)
if tt.expectedValid {
assert.NoError(t, err)
assert.Equal(t, tt.expectedThought, result.Thought)
assert.Equal(t, tt.expectedError, result.Error)
assert.Len(t, result.Actions, tt.expectedActions)
} else {
assert.Error(t, err)
}
})
}
}
func TestExtractQueryFieldsManually(t *testing.T) {
tests := []struct {
name string
input string
expectedContent string
expectedThought string
shouldError bool
}{
{
name: "both content and thought",
input: `{"content": "test content", "thought": "test thought"}`,
expectedContent: "test content",
expectedThought: "test thought",
shouldError: false,
},
{
name: "only content",
input: `{"content": "only content"}`,
expectedContent: "only content",
expectedThought: "Partial extraction from malformed response",
shouldError: false,
},
{
name: "only thought",
input: `{"thought": "only thought"}`,
expectedContent: "Extracted partial information",
expectedThought: "only thought",
shouldError: false,
},
{
name: "no extractable fields",
input: `{"other": "data"}`,
shouldError: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result, err := extractQueryFieldsManually(tt.input)
if tt.shouldError {
assert.Error(t, err)
} else {
assert.NoError(t, err)
assert.Equal(t, tt.expectedContent, result.Content)
assert.Equal(t, tt.expectedThought, result.Thought)
}
})
}
}
func TestExtractPlanningFieldsManually(t *testing.T) {
tests := []struct {
name string
input string
expectedThought string
expectedError string
shouldError bool
}{
{
name: "both thought and error",
input: `{"thought": "test planning", "error": "test error"}`,
expectedThought: "test planning",
expectedError: "test error",
shouldError: false,
},
{
name: "only thought",
input: `{"thought": "only planning"}`,
expectedThought: "only planning",
expectedError: "",
shouldError: false,
},
{
name: "only error",
input: `{"error": "only error"}`,
expectedThought: "Partial extraction from malformed response",
expectedError: "only error",
shouldError: false,
},
{
name: "no extractable fields",
input: `{"other": "data"}`,
shouldError: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result, err := extractPlanningFieldsManually(tt.input)
if tt.shouldError {
assert.Error(t, err)
} else {
assert.NoError(t, err)
assert.Equal(t, tt.expectedThought, result.Thought)
assert.Equal(t, tt.expectedError, result.Error)
assert.NotNil(t, result.Actions) // Should always be initialized
}
})
}
}
// Test the integrated parseStructuredResponse with QueryResult
func TestParseStructuredResponse_QueryResult(t *testing.T) {
tests := []struct {
name string
input string
shouldSucceed bool
}{
{
name: "valid QueryResult JSON",
input: `{"content": "extracted data", "thought": "processing complete"}`,
shouldSucceed: true,
},
{
name: "QueryResult with UTF-8 issues",
input: "extracted data: 搜索框,里面显示着\ufffd\ufffd {\"content\": \"search box found\", \"thought\": \"visual analysis\"}",
shouldSucceed: true,
},
{
name: "malformed QueryResult",
input: `malformed start {"content": "partial info"} end`,
shouldSucceed: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var result QueryResult
err := parseStructuredResponse(tt.input, &result)
if tt.shouldSucceed {
require.NoError(t, err)
assert.NotEmpty(t, result.Content, "Content should not be empty")
assert.NotEmpty(t, result.Thought, "Thought should not be empty")
} else {
assert.Error(t, err)
}
})
}
}