fix: improve JSON extraction to handle UTF-8 Chinese characters properly

- Replace byte-based brace counting with UTF-8 aware rune iteration - Add proper string state tracking to handle escaped quotes - Add comprehensive test cases for Chinese character handling - Fix parsing errors when JSON contains Chinese text like 2048经典
2026-06-26 01:51:29 +08:00 · 2025-06-10 16:05:43 +08:00
parent 88ae8faee1
commit c322d7c36c
4 changed files with 193 additions and 47 deletions
--- a/internal/version/VERSION
+++ b/internal/version/VERSION
@@ -1 +1 @@
-v5.0.0-beta-2506101556
+v5.0.0-beta-2506101609
--- a/uixt/ai/parser_default.go
+++ b/uixt/ai/parser_default.go
@@ -19,14 +19,13 @@ type LLMContentParser interface {
 }

 func NewLLMContentParser(modelType option.LLMServiceType) LLMContentParser {
-	switch modelType {
-	case option.DOUBAO_1_5_UI_TARS_250428:
+	if option.IS_UI_TARS(modelType) {
 		return &UITARSContentParser{
 			modelType:     modelType,
 			systemPrompt:  doubao_1_5_ui_tars_planning_prompt,
 			actionMapping: doubao_1_5_ui_tars_action_mapping,
 		}
-	default:
+	} else {
 		return &JSONContentParser{
 			modelType:     modelType,
 			systemPrompt:  doubao_1_5_thinking_vision_pro_planning_prompt,
--- a/uixt/ai/utils.go
+++ b/uixt/ai/utils.go
@@ -3,6 +3,7 @@ package ai
 import (
 	"regexp"
 	"strings"
+	"unicode/utf8"
 )

 // extractJSONFromContent extracts JSON content from various formats in the response
@@ -42,21 +43,48 @@ func extractJSONFromContent(content string) string {
 		}
 	}

-	// Case 3: Look for JSON object in the content using brace counting (most reliable method)
+	// Case 3: Look for JSON object in the content using rune-based brace counting (most reliable method)
 	start := strings.Index(content, "{")
 	if start != -1 {
-		// Find the matching closing brace
+		// Find the matching closing brace using rune-based iteration to handle UTF-8 properly
 		braceCount := 0
-		for i := start; i < len(content); i++ {
-			if content[i] == '{' {
-				braceCount++
-			} else if content[i] == '}' {
-				braceCount--
-				if braceCount == 0 {
-					jsonContent := strings.TrimSpace(content[start : i+1])
-					return jsonContent
+		inString := false
+		escaped := false
+
+		// Use byte-based iteration but track string state properly
+		for i := start; i < len(content); {
+			r, size := utf8.DecodeRuneInString(content[i:])
+
+			if escaped {
+				escaped = false
+				i += size
+				continue
+			}
+
+			if r == '\\' && inString {
+				escaped = true
+				i += size
+				continue
+			}
+
+			if r == '"' {
+				inString = !inString
+				i += size
+				continue
+			}
+
+			if !inString {
+				if r == '{' {
+					braceCount++
+				} else if r == '}' {
+					braceCount--
+					if braceCount == 0 {
+						jsonContent := strings.TrimSpace(content[start : i+size])
+						return jsonContent
+					}
 				}
 			}
+			i += size
 		}
 	}

--- a/uixt/ai/utils_test.go
+++ b/uixt/ai/utils_test.go
@@ -9,39 +9,12 @@ import (
 func TestExtractJSONFromContent(t *testing.T) {
 	tests := []struct {
 		name     string
-		input    string
+		content  string
 		expected string
 	}{
 		{
-			name:     "valid JSON",
-			input:    `{"content": "test", "thought": "test"}`,
-			expected: `{"content": "test", "thought": "test"}`,
-		},
-		{
-			name:  "JSON in markdown",
-			input: "```json\n{\n  \"content\": \"test\"\n}\n```",
-			expected: `{
-  "content": "test"
-}`,
-		},
-		{
-			name:     "incomplete JSON without closing brace",
-			input:    `{"content": "incomplete json"`,
-			expected: "",
-		},
-		{
-			name:     "incomplete JSON with missing closing brace",
-			input:    `{"content": "incomplete json", "missing_closing_brace": true`,
-			expected: "",
-		},
-		{
-			name:     "plain text",
-			input:    "This is just plain text",
-			expected: "",
-		},
-		{
-			name: "complex nested JSON with arrays",
-			input: `{
+			name: "simple JSON",
+			content: `{
  "actions": [
    {
      "action_type": "click",
@@ -64,15 +37,161 @@ func TestExtractJSONFromContent(t *testing.T) {
  ],
  "thought": "点击桌面上的抖音应用图标以启动抖音",
  "error": null
+}`,
+		},
+		{
+			name: "JSON with Chinese characters in strings",
+			content: `{
+  "actions": [
+    {
+      "action_type": "type",
+      "action_inputs": {
+        "content": "2048经典"
+      }
+    }
+  ],
+  "thought": "搜索框已经清空了，现在我要输入\"2048经典\"这个关键词。看到键盘已经弹出来了，正好可以直接开始输入。这样一来，就能找到我们想要玩的那个小游戏了。",
+  "error": null
+}`,
+			expected: `{
+  "actions": [
+    {
+      "action_type": "type",
+      "action_inputs": {
+        "content": "2048经典"
+      }
+    }
+  ],
+  "thought": "搜索框已经清空了，现在我要输入\"2048经典\"这个关键词。看到键盘已经弹出来了，正好可以直接开始输入。这样一来，就能找到我们想要玩的那个小游戏了。",
+  "error": null
+}`,
+		},
+		{
+			name: "JSON with markdown wrapper",
+			content: "```json\n" + `{
+  "actions": [
+    {
+      "action_type": "click",
+      "action_inputs": {
+        "start_box": [100, 200, 150, 250]
+      }
+    }
+  ],
+  "thought": "点击按钮",
+  "error": null
+}` + "\n```",
+			expected: `{
+  "actions": [
+    {
+      "action_type": "click",
+      "action_inputs": {
+        "start_box": [100, 200, 150, 250]
+      }
+    }
+  ],
+  "thought": "点击按钮",
+  "error": null
+}`,
+		},
+		{
+			name: "JSON embedded in text with Chinese",
+			content: `这是一个包含中文的响应：{
+  "actions": [
+    {
+      "action_type": "type",
+      "action_inputs": {
+        "content": "测试内容"
+      }
+    }
+  ],
+  "thought": "这是一个测试思路",
+  "error": null
+} 后面还有一些文本`,
+			expected: `{
+  "actions": [
+    {
+      "action_type": "type",
+      "action_inputs": {
+        "content": "测试内容"
+      }
+    }
+  ],
+  "thought": "这是一个测试思路",
+  "error": null
+}`,
+		},
+		{
+			name: "JSON with escaped quotes and Chinese",
+			content: `{
+  "actions": [
+    {
+      "action_type": "type",
+      "action_inputs": {
+        "content": "他说：\"你好，世界！\""
+      }
+    }
+  ],
+  "thought": "输入包含引号的中文文本",
+  "error": null
+}`,
+			expected: `{
+  "actions": [
+    {
+      "action_type": "type",
+      "action_inputs": {
+        "content": "他说：\"你好，世界！\""
+      }
+    }
+  ],
+  "thought": "输入包含引号的中文文本",
+  "error": null
+}`,
+		},
+		{
+			name:     "no JSON content",
+			content:  "这只是一些普通的文本，没有JSON内容",
+			expected: "",
+		},
+		{
+			name: "nested JSON objects with Chinese",
+			content: `{
+  "actions": [
+    {
+      "action_type": "click",
+      "action_inputs": {
+        "start_box": [100, 200, 150, 250],
+        "metadata": {
+          "description": "点击操作",
+          "target": "按钮"
+        }
+      }
+    }
+  ],
+  "thought": "执行嵌套对象的点击操作",
+  "error": null
+}`,
+			expected: `{
+  "actions": [
+    {
+      "action_type": "click",
+      "action_inputs": {
+        "start_box": [100, 200, 150, 250],
+        "metadata": {
+          "description": "点击操作",
+          "target": "按钮"
+        }
+      }
+    }
+  ],
+  "thought": "执行嵌套对象的点击操作",
+  "error": null
 }`,
 		},
 	}

 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			result := extractJSONFromContent(tt.input)
-			t.Logf("Input: %s", tt.input)
-			t.Logf("Output: %s", result)
+			result := extractJSONFromContent(tt.content)
 			assert.Equal(t, tt.expected, result)
 		})
 	}