Files
httprunner/uixt/ai/utils.go
lilong.129 c322d7c36c fix: improve JSON extraction to handle UTF-8 Chinese characters properly
- Replace byte-based brace counting with UTF-8 aware rune iteration
- Add proper string state tracking to handle escaped quotes
- Add comprehensive test cases for Chinese character handling
- Fix parsing errors when JSON contains Chinese text like 2048经典
2025-06-10 16:09:50 +08:00

105 lines
2.6 KiB
Go

package ai
import (
"regexp"
"strings"
"unicode/utf8"
)
// extractJSONFromContent extracts JSON content from various formats in the response
// This function handles multiple formats:
// 1. ```json ... ``` markdown code blocks
// 2. ``` ... ``` generic code blocks
// 3. JSON objects embedded in text
// 4. Plain JSON content
func extractJSONFromContent(content string) string {
content = strings.TrimSpace(content)
// Case 1: Content wrapped in ```json ... ```
if strings.Contains(content, "```json") {
start := strings.Index(content, "```json")
if start != -1 {
start += 7 // length of "```json"
end := strings.Index(content[start:], "```")
if end != -1 {
jsonContent := strings.TrimSpace(content[start : start+end])
return jsonContent
}
}
}
// Case 2: Content wrapped in ``` ... ``` (without json specifier)
if strings.HasPrefix(content, "```") && strings.HasSuffix(content, "```") {
lines := strings.Split(content, "\n")
if len(lines) >= 3 {
// Remove first and last lines (the ``` markers)
jsonLines := lines[1 : len(lines)-1]
jsonContent := strings.Join(jsonLines, "\n")
jsonContent = strings.TrimSpace(jsonContent)
// Check if it looks like JSON
if strings.HasPrefix(jsonContent, "{") && strings.HasSuffix(jsonContent, "}") {
return jsonContent
}
}
}
// Case 3: Look for JSON object in the content using rune-based brace counting (most reliable method)
start := strings.Index(content, "{")
if start != -1 {
// Find the matching closing brace using rune-based iteration to handle UTF-8 properly
braceCount := 0
inString := false
escaped := false
// Use byte-based iteration but track string state properly
for i := start; i < len(content); {
r, size := utf8.DecodeRuneInString(content[i:])
if escaped {
escaped = false
i += size
continue
}
if r == '\\' && inString {
escaped = true
i += size
continue
}
if r == '"' {
inString = !inString
i += size
continue
}
if !inString {
if r == '{' {
braceCount++
} else if r == '}' {
braceCount--
if braceCount == 0 {
jsonContent := strings.TrimSpace(content[start : i+size])
return jsonContent
}
}
}
i += size
}
}
// Case 4: Try regex approach for markdown-like formats (fallback)
jsonRegex := regexp.MustCompile(`(?:json)?\s*({[\s\S]*?})\s*`)
matches := jsonRegex.FindStringSubmatch(content)
if len(matches) > 1 {
return strings.TrimSpace(matches[1])
}
// Case 5: If content itself looks like JSON
if strings.HasPrefix(content, "{") && strings.HasSuffix(content, "}") {
return content
}
return ""
}