mirror of
https://github.com/httprunner/httprunner.git
synced 2026-05-10 17:43:00 +08:00
- Replace byte-based brace counting with UTF-8 aware rune iteration - Add proper string state tracking to handle escaped quotes - Add comprehensive test cases for Chinese character handling - Fix parsing errors when JSON contains Chinese text like 2048经典
105 lines
2.6 KiB
Go
105 lines
2.6 KiB
Go
package ai
|
|
|
|
import (
|
|
"regexp"
|
|
"strings"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
// extractJSONFromContent extracts JSON content from various formats in the response
|
|
// This function handles multiple formats:
|
|
// 1. ```json ... ``` markdown code blocks
|
|
// 2. ``` ... ``` generic code blocks
|
|
// 3. JSON objects embedded in text
|
|
// 4. Plain JSON content
|
|
func extractJSONFromContent(content string) string {
|
|
content = strings.TrimSpace(content)
|
|
|
|
// Case 1: Content wrapped in ```json ... ```
|
|
if strings.Contains(content, "```json") {
|
|
start := strings.Index(content, "```json")
|
|
if start != -1 {
|
|
start += 7 // length of "```json"
|
|
end := strings.Index(content[start:], "```")
|
|
if end != -1 {
|
|
jsonContent := strings.TrimSpace(content[start : start+end])
|
|
return jsonContent
|
|
}
|
|
}
|
|
}
|
|
|
|
// Case 2: Content wrapped in ``` ... ``` (without json specifier)
|
|
if strings.HasPrefix(content, "```") && strings.HasSuffix(content, "```") {
|
|
lines := strings.Split(content, "\n")
|
|
if len(lines) >= 3 {
|
|
// Remove first and last lines (the ``` markers)
|
|
jsonLines := lines[1 : len(lines)-1]
|
|
jsonContent := strings.Join(jsonLines, "\n")
|
|
jsonContent = strings.TrimSpace(jsonContent)
|
|
// Check if it looks like JSON
|
|
if strings.HasPrefix(jsonContent, "{") && strings.HasSuffix(jsonContent, "}") {
|
|
return jsonContent
|
|
}
|
|
}
|
|
}
|
|
|
|
// Case 3: Look for JSON object in the content using rune-based brace counting (most reliable method)
|
|
start := strings.Index(content, "{")
|
|
if start != -1 {
|
|
// Find the matching closing brace using rune-based iteration to handle UTF-8 properly
|
|
braceCount := 0
|
|
inString := false
|
|
escaped := false
|
|
|
|
// Use byte-based iteration but track string state properly
|
|
for i := start; i < len(content); {
|
|
r, size := utf8.DecodeRuneInString(content[i:])
|
|
|
|
if escaped {
|
|
escaped = false
|
|
i += size
|
|
continue
|
|
}
|
|
|
|
if r == '\\' && inString {
|
|
escaped = true
|
|
i += size
|
|
continue
|
|
}
|
|
|
|
if r == '"' {
|
|
inString = !inString
|
|
i += size
|
|
continue
|
|
}
|
|
|
|
if !inString {
|
|
if r == '{' {
|
|
braceCount++
|
|
} else if r == '}' {
|
|
braceCount--
|
|
if braceCount == 0 {
|
|
jsonContent := strings.TrimSpace(content[start : i+size])
|
|
return jsonContent
|
|
}
|
|
}
|
|
}
|
|
i += size
|
|
}
|
|
}
|
|
|
|
// Case 4: Try regex approach for markdown-like formats (fallback)
|
|
jsonRegex := regexp.MustCompile(`(?:json)?\s*({[\s\S]*?})\s*`)
|
|
matches := jsonRegex.FindStringSubmatch(content)
|
|
if len(matches) > 1 {
|
|
return strings.TrimSpace(matches[1])
|
|
}
|
|
|
|
// Case 5: If content itself looks like JSON
|
|
if strings.HasPrefix(content, "{") && strings.HasSuffix(content, "}") {
|
|
return content
|
|
}
|
|
|
|
return ""
|
|
}
|