fix: improve JSON extraction to handle UTF-8 Chinese characters properly

- Replace byte-based brace counting with UTF-8 aware rune iteration - Add proper string state tracking to handle escaped quotes - Add comprehensive test cases for Chinese character handling - Fix parsing errors when JSON contains Chinese text like 2048经典
2026-06-08 09:19:41 +08:00 · 2025-06-10 16:05:43 +08:00
parent 88ae8faee1
commit c322d7c36c
4 changed files with 193 additions and 47 deletions
--- a/uixt/ai/utils.go
+++ b/uixt/ai/utils.go
@@ -3,6 +3,7 @@ package ai
 import (
 	"regexp"
 	"strings"
+	"unicode/utf8"
 )

 // extractJSONFromContent extracts JSON content from various formats in the response
@@ -42,21 +43,48 @@ func extractJSONFromContent(content string) string {
 		}
 	}

-	// Case 3: Look for JSON object in the content using brace counting (most reliable method)
+	// Case 3: Look for JSON object in the content using rune-based brace counting (most reliable method)
 	start := strings.Index(content, "{")
 	if start != -1 {
-		// Find the matching closing brace
+		// Find the matching closing brace using rune-based iteration to handle UTF-8 properly
 		braceCount := 0
-		for i := start; i < len(content); i++ {
-			if content[i] == '{' {
-				braceCount++
-			} else if content[i] == '}' {
-				braceCount--
-				if braceCount == 0 {
-					jsonContent := strings.TrimSpace(content[start : i+1])
-					return jsonContent
+		inString := false
+		escaped := false
+
+		// Use byte-based iteration but track string state properly
+		for i := start; i < len(content); {
+			r, size := utf8.DecodeRuneInString(content[i:])
+
+			if escaped {
+				escaped = false
+				i += size
+				continue
+			}
+
+			if r == '\\' && inString {
+				escaped = true
+				i += size
+				continue
+			}
+
+			if r == '"' {
+				inString = !inString
+				i += size
+				continue
+			}
+
+			if !inString {
+				if r == '{' {
+					braceCount++
+				} else if r == '}' {
+					braceCount--
+					if braceCount == 0 {
+						jsonContent := strings.TrimSpace(content[start : i+size])
+						return jsonContent
+					}
 				}
 			}
+			i += size
 		}
 	}