- {{range $screenshot := index $attachments "screen_results"}}
- {{$base64Image := encodeImageBase64 $screenshot.ImagePath}}
- {{if $base64Image}}
-
-
- {{base $screenshot.ImagePath}}
- {{if $screenshot.Resolution}}
- {{$screenshot.Resolution.Width}}x{{$screenshot.Resolution.Height}}
- {{end}}
-
-
-

+
+ {{range $screenshot := index $attachments "screen_results"}}
+ {{$imagePath := ""}}
+ {{if $screenshot.ImagePath}}
+ {{$imagePath = $screenshot.ImagePath}}
+ {{else if index $screenshot "image_path"}}
+ {{$imagePath = index $screenshot "image_path"}}
+ {{end}}
+ {{if $imagePath}}
+ {{$base64Image := encodeImageBase64 $imagePath}}
+ {{if $base64Image}}
+
+
+ {{base $imagePath}}
+ {{if $screenshot.Resolution}}
+ {{$screenshot.Resolution.Width}}x{{$screenshot.Resolution.Height}}
+ {{else if index $screenshot "resolution"}}
+ {{$resolution := index $screenshot "resolution"}}
+ {{index $resolution "width"}}x{{index $resolution "height"}}
+ {{end}}
+
+
+

+
+ {{end}}
+ {{end}}
+ {{end}}
- {{end}}
- {{end}}
{{end}}
{{end}}
@@ -2569,19 +2683,6 @@ const htmlTemplate = `
}
}
- function toggleRequests(buttonElement) {
- const requestsDiv = buttonElement.parentElement;
- const requestsContent = requestsDiv.querySelector('.requests-content');
-
- if (requestsContent.classList.contains('show')) {
- requestsContent.classList.remove('show');
- buttonElement.textContent = buttonElement.textContent.replace('Hide', 'Show');
- } else {
- requestsContent.classList.add('show');
- buttonElement.textContent = buttonElement.textContent.replace('Show', 'Hide');
- }
- }
-
function toggleRequestsCompact(buttonElement) {
const requestsDiv = buttonElement.parentElement;
const requestsContent = requestsDiv.querySelector('.requests-content-compact');
@@ -2595,8 +2696,6 @@ const htmlTemplate = `
}
}
-
-
function openImageModal(src) {
const modal = document.getElementById('imageModal');
const modalImg = document.getElementById('modalImage');
@@ -2616,8 +2715,6 @@ const htmlTemplate = `
}
}
-
-
// Auto-expand all steps on load to show actions
document.addEventListener('DOMContentLoaded', function() {
// Expand all steps to show the actions list
diff --git a/runner.go b/runner.go
index e565d628..73ac82b1 100644
--- a/runner.go
+++ b/runner.go
@@ -729,6 +729,11 @@ func (r *SessionRunner) Start(givenVars map[string]interface{}) (summary *TestCa
return summary, nil
}
+const (
+ RUN_STEP_START = "run step start"
+ RUN_STEP_END = "run step end"
+)
+
func (r *SessionRunner) RunStep(step IStep) (stepResult *StepResult, err error) {
// check for interrupt signal before running step
select {
@@ -748,7 +753,7 @@ func (r *SessionRunner) RunStep(step IStep) (stepResult *StepResult, err error)
stepName := step.Name()
stepType := string(step.Type())
- log.Info().Str("step", stepName).Str("type", stepType).Msg("run step start")
+ log.Info().Str("step", stepName).Str("type", stepType).Msg(RUN_STEP_START)
// run times of step
loopTimes := step.Config().Loops
@@ -785,7 +790,7 @@ func (r *SessionRunner) RunStep(step IStep) (stepResult *StepResult, err error)
Bool("success", true).
Int64("elapsed(ms)", stepResult.Elapsed).
Interface("exportVars", stepResult.ExportVars).
- Msg("run step end")
+ Msg(RUN_STEP_END)
continue
}
// run step failed
@@ -793,7 +798,7 @@ func (r *SessionRunner) RunStep(step IStep) (stepResult *StepResult, err error)
Str("type", stepType).
Bool("success", false).
Int64("elapsed(ms)", stepResult.Elapsed).
- Msg("run step end")
+ Msg(RUN_STEP_END)
return stepResult, err
}
diff --git a/step.go b/step.go
index 6b232cea..143abe24 100644
--- a/step.go
+++ b/step.go
@@ -58,7 +58,7 @@ type TStep struct {
// one step contains one or multiple actions
type ActionResult struct {
option.MobileAction `json:",inline"`
- StartTime int64 `json:"start_time"` // action start time
+ StartTime int64 `json:"start_time"` // action start time in millisecond(ms)
Elapsed int64 `json:"elapsed_ms"` // action elapsed time(ms)
Error error `json:"error"` // action execution result
Plannings []*uixt.PlanningExecutionResult `json:"plannings,omitempty"` // store planning results for start_to_goal actions
@@ -69,7 +69,7 @@ type ActionResult struct {
type StepResult struct {
Name string `json:"name" yaml:"name"` // step name
Identifier string `json:"identifier,omitempty" yaml:"identifier,omitempty"` // step identifier
- StartTime int64 `json:"start_time" yaml:"time"` // step start time
+ StartTime int64 `json:"start_time" yaml:"time"` // step start time in millisecond(ms)
StepType StepType `json:"step_type" yaml:"step_type"` // step type, testcase/request/transaction/rendezvous
Success bool `json:"success" yaml:"success"` // step execution result
Elapsed int64 `json:"elapsed_ms" yaml:"elapsed_ms"` // step execution time in millisecond(ms)
diff --git a/step_function.go b/step_function.go
index 3aae4bce..d06c18cb 100644
--- a/step_function.go
+++ b/step_function.go
@@ -27,10 +27,7 @@ func (s *StepFunction) Type() StepType {
}
func (s *StepFunction) Config() *StepConfig {
- return &StepConfig{
- StepName: s.StepName,
- Variables: s.Variables,
- }
+ return &s.StepConfig
}
func (s *StepFunction) Run(r *SessionRunner) (*StepResult, error) {
@@ -57,7 +54,7 @@ func runStepFunction(r *SessionRunner, step IStep) (stepResult *StepResult, err
StepType: step.Type(),
Success: false,
ContentSize: 0,
- StartTime: start.Unix(),
+ StartTime: start.UnixMilli(),
}
defer func() {
attachments := uixt.Attachments{}
diff --git a/step_rendezvous.go b/step_rendezvous.go
index c1c46518..545c4d33 100644
--- a/step_rendezvous.go
+++ b/step_rendezvous.go
@@ -26,10 +26,7 @@ func (s *StepRendezvous) Type() StepType {
}
func (s *StepRendezvous) Config() *StepConfig {
- return &StepConfig{
- StepName: s.StepName,
- Variables: s.Variables,
- }
+ return &s.StepConfig
}
func (s *StepRendezvous) Run(r *SessionRunner) (*StepResult, error) {
diff --git a/step_request.go b/step_request.go
index 6976b06a..25b88666 100644
--- a/step_request.go
+++ b/step_request.go
@@ -285,7 +285,7 @@ func runStepRequest(r *SessionRunner, step IStep) (stepResult *StepResult, err e
StepType: step.Type(),
Success: false,
ContentSize: 0,
- StartTime: start.Unix(),
+ StartTime: start.UnixMilli(),
}
defer func() {
diff --git a/step_shell.go b/step_shell.go
index 52263757..ef52b1a2 100644
--- a/step_shell.go
+++ b/step_shell.go
@@ -30,10 +30,7 @@ func (s *StepShell) Type() StepType {
}
func (s *StepShell) Config() *StepConfig {
- return &StepConfig{
- StepName: s.StepName,
- Variables: s.Variables,
- }
+ return &s.StepConfig
}
func (s *StepShell) Run(r *SessionRunner) (*StepResult, error) {
@@ -63,10 +60,7 @@ func (s *StepShellValidation) Type() StepType {
}
func (s *StepShellValidation) Config() *StepConfig {
- return &StepConfig{
- StepName: s.StepName,
- Variables: s.Variables,
- }
+ return &s.StepConfig
}
func (s *StepShellValidation) Run(r *SessionRunner) (*StepResult, error) {
@@ -101,7 +95,7 @@ func runStepShell(r *SessionRunner, step IStep) (stepResult *StepResult, err err
StepType: step.Type(),
Success: false,
ContentSize: 0,
- StartTime: start.Unix(),
+ StartTime: start.UnixMilli(),
}
defer func() {
stepResult.Elapsed = time.Since(start).Milliseconds()
diff --git a/step_testcase.go b/step_testcase.go
index 43df5bb4..480e4864 100644
--- a/step_testcase.go
+++ b/step_testcase.go
@@ -51,7 +51,7 @@ func (s *StepTestCaseWithOptionalArgs) Run(r *SessionRunner) (stepResult *StepRe
Name: s.Name(),
StepType: s.Type(),
Success: false,
- StartTime: start.Unix(),
+ StartTime: start.UnixMilli(),
}
defer func() {
diff --git a/step_ui.go b/step_ui.go
index a7357ce9..da922435 100644
--- a/step_ui.go
+++ b/step_ui.go
@@ -691,11 +691,11 @@ func (s *StepMobileUIValidation) Type() StepType {
}
func (s *StepMobileUIValidation) Config() *StepConfig {
- return &StepConfig{
- StepName: s.StepName,
- Variables: s.Variables,
- Validators: s.Validators,
- }
+ // Get the original StepConfig from embedded StepMobile
+ config := &s.StepMobile.StepConfig
+ // Sync validators to the StepConfig
+ config.Validators = s.Validators
+ return config
}
func (s *StepMobileUIValidation) Run(r *SessionRunner) (*StepResult, error) {
@@ -709,7 +709,7 @@ func runStepMobileUI(s *SessionRunner, step IStep) (stepResult *StepResult, err
StepType: step.Type(),
Success: false,
ContentSize: 0,
- StartTime: start.Unix(),
+ StartTime: start.UnixMilli(),
}
var stepVariables map[string]interface{}
@@ -781,7 +781,7 @@ func runStepMobileUI(s *SessionRunner, step IStep) (stepResult *StepResult, err
Method: option.ACTION_GetForegroundApp,
Params: "[ForDebug] check foreground app",
},
- StartTime: startTime.Unix(),
+ StartTime: startTime.UnixMilli(),
}
subActionResults, err1 := uiDriver.ExecuteAction(
context.Background(), actionResult.MobileAction)
@@ -793,6 +793,16 @@ func runStepMobileUI(s *SessionRunner, step IStep) (stepResult *StepResult, err
stepResult.Actions = append(stepResult.Actions, actionResult)
}
+ // Get session data and add to attachments, clear session for next step
+ if uiDriver != nil {
+ sessionData := uiDriver.GetSession().GetData(true) // clear session after getting data
+ if len(sessionData.ScreenResults) > 0 {
+ attachments["screen_results"] = sessionData.ScreenResults
+ log.Debug().Int("count", len(sessionData.ScreenResults)).
+ Str("step", step.Name()).Msg("added screen results to step attachments")
+ }
+ }
+
var config *TConfig
if s.caseRunner != nil && s.caseRunner.Config != nil {
config = s.caseRunner.Config.Get()
@@ -815,7 +825,7 @@ func runStepMobileUI(s *SessionRunner, step IStep) (stepResult *StepResult, err
Method: option.ACTION_ClosePopups,
Params: "[ForDebug] close popups handler",
},
- StartTime: startTime.Unix(),
+ StartTime: startTime.UnixMilli(),
}
subActionResults, err2 := uiDriver.ExecuteAction(
context.Background(), actionResult.MobileAction)
@@ -842,10 +852,7 @@ func runStepMobileUI(s *SessionRunner, step IStep) (stepResult *StepResult, err
return stepResult, errors.Wrap(code.InterruptError, "mobile UI runner interrupted")
default:
actionStartTime := time.Now()
- actionResult := &ActionResult{
- MobileAction: action,
- StartTime: actionStartTime.Unix(), // action 开始时间
- }
+ // Parse action params first for variable substitution
if action.Params, err = s.caseRunner.parser.Parse(action.Params, stepVariables); err != nil {
if !code.IsErrorPredefined(err) {
err = errors.Wrap(code.ParseError,
@@ -854,6 +861,12 @@ func runStepMobileUI(s *SessionRunner, step IStep) (stepResult *StepResult, err
return stepResult, err
}
+ // Create ActionResult with parsed params for accurate reporting
+ actionResult := &ActionResult{
+ MobileAction: action, // Now contains parsed params
+ StartTime: actionStartTime.UnixMilli(), // action start time
+ }
+
// Apply global configuration from testcase config
if s.caseRunner != nil && s.caseRunner.Config != nil {
config := s.caseRunner.Config.Get()
@@ -951,55 +964,111 @@ func runStepMobileUI(s *SessionRunner, step IStep) (stepResult *StepResult, err
}
// validate
- validateResults, err := validateUI(uiDriver, stepValidators)
- if err != nil {
- if !code.IsErrorPredefined(err) {
- err = errors.Wrap(code.MobileUIValidationError, err.Error())
- }
- return
- }
+ validateResults, err := validateUI(uiDriver, stepValidators, s.caseRunner.parser, stepVariables)
if len(validateResults) > 0 {
+ // Always save validation results if any exist, regardless of success or failure
sessionData := &SessionData{
Validators: validateResults,
}
stepResult.Data = sessionData
}
+ if err != nil {
+ // Handle validation error after saving results
+ if !code.IsErrorPredefined(err) {
+ err = errors.Wrap(code.MobileUIValidationError, err.Error())
+ }
+ return stepResult, err
+ }
+
stepResult.Success = true
return stepResult, nil
}
-func validateUI(ud *uixt.XTDriver, iValidators []interface{}) (validateResults []*ValidationResult, err error) {
+func validateUI(ud *uixt.XTDriver, iValidators []interface{}, parser *Parser, stepVariables map[string]interface{}) (validateResults []*ValidationResult, err error) {
+ // Parse all validators for variable substitution
+ parsedValidators, err := parseStepValidators(iValidators, parser, stepVariables)
+ if err != nil {
+ return nil, err
+ }
+
+ // Execute validation for each parsed validator
+ for _, validator := range parsedValidators {
+ // Debug: print validator details
+ log.Debug().
+ Str("check", validator.Check).
+ Str("assert", validator.Assert).
+ Interface("expect", validator.Expect).
+ Str("message", validator.Message).
+ Msg("processing validator")
+
+ validationResult := &ValidationResult{
+ Validator: validator, // Use parsed validator for accurate reporting
+ CheckResult: "fail",
+ }
+
+ // Check if this is a UI validator or AI assert validator
+ if !strings.HasPrefix(validator.Check, "ui_") && validator.Assert != "ai_assert" {
+ validationResult.CheckResult = "skip"
+ log.Warn().Interface("validator", validator).Msg("skip validator")
+ validateResults = append(validateResults, validationResult)
+ continue
+ }
+
+ // Validate expected value type
+ expected, ok := validator.Expect.(string)
+ if !ok {
+ return nil, errors.New("validator expect should be string")
+ }
+
+ // Perform validation
+ err = ud.DoValidation(validator.Check, validator.Assert, expected, validator.Message)
+ if err != nil {
+ // Add the failed validation result to the list before returning error
+ validateResults = append(validateResults, validationResult)
+ return validateResults, errors.Wrap(err, "step validation failed")
+ }
+
+ validationResult.CheckResult = "pass"
+ validateResults = append(validateResults, validationResult)
+ }
+
+ return validateResults, nil
+}
+
+// parseStepValidators parses all validators for variable substitution
+func parseStepValidators(iValidators []interface{}, parser *Parser, stepVariables map[string]interface{}) ([]Validator, error) {
+ var parsedValidators []Validator
+
for _, iValidator := range iValidators {
validator, ok := iValidator.(Validator)
if !ok {
return nil, errors.New("validator type error")
}
- validataResult := &ValidationResult{
- Validator: validator,
- CheckResult: "fail",
+ parsedValidator := validator
+
+ // Parse Expect field for variable substitution
+ if expectedStr, ok := validator.Expect.(string); ok {
+ if parsedExpected, err := parser.Parse(expectedStr, stepVariables); err != nil {
+ return nil, errors.Wrap(err, "failed to parse validator expect field")
+ } else {
+ parsedValidator.Expect = parsedExpected
+ }
}
- // parse check value
- if !strings.HasPrefix(validator.Check, "ui_") {
- validataResult.CheckResult = "skip"
- log.Warn().Interface("validator", validator).Msg("skip validator")
- validateResults = append(validateResults, validataResult)
- continue
+ // Parse Message field for variable substitution
+ if validator.Message != "" {
+ if parsedMessage, err := parser.Parse(validator.Message, stepVariables); err != nil {
+ return nil, errors.Wrap(err, "failed to parse validator message field")
+ } else {
+ if msgStr, ok := parsedMessage.(string); ok {
+ parsedValidator.Message = msgStr
+ }
+ }
}
- expected, ok := validator.Expect.(string)
- if !ok {
- return nil, errors.New("validator expect should be string")
- }
-
- err := ud.DoValidation(validator.Check, validator.Assert, expected, validator.Message)
- if err != nil {
- return validateResults, errors.Wrap(err, "step validation failed")
- }
-
- validataResult.CheckResult = "pass"
- validateResults = append(validateResults, validataResult)
+ parsedValidators = append(parsedValidators, parsedValidator)
}
- return validateResults, nil
+
+ return parsedValidators, nil
}
diff --git a/step_websocket.go b/step_websocket.go
index 91702334..04dc925a 100644
--- a/step_websocket.go
+++ b/step_websocket.go
@@ -381,7 +381,7 @@ func runStepWebSocket(r *SessionRunner, step IStep) (stepResult *StepResult, err
StepType: step.Type(),
Success: false,
ContentSize: 0,
- StartTime: start.Unix(),
+ StartTime: start.UnixMilli(),
}
defer func() {
diff --git a/uixt/ai/asserter.go b/uixt/ai/asserter.go
index 8fd0ddf0..58880c51 100644
--- a/uixt/ai/asserter.go
+++ b/uixt/ai/asserter.go
@@ -10,10 +10,10 @@ import (
"github.com/cloudwego/eino/schema"
"github.com/getkin/kin-openapi/openapi3gen"
"github.com/httprunner/httprunner/v5/code"
- "github.com/httprunner/httprunner/v5/internal/json"
"github.com/httprunner/httprunner/v5/uixt/option"
"github.com/httprunner/httprunner/v5/uixt/types"
"github.com/pkg/errors"
+ "github.com/rs/zerolog/log"
)
// IAsserter interface defines the contract for assertion operations
@@ -160,15 +160,13 @@ func validateAssertionInput(opts *AssertOptions) error {
// parseAssertionResult parses the model response into AssertionResponse
func parseAssertionResult(content string) (*AssertionResult, error) {
- // Extract JSON content from response
- jsonContent := extractJSONFromContent(content)
- if jsonContent == "" {
- return nil, errors.New("could not extract JSON from response")
- }
-
- // Parse JSON response
var result AssertionResult
- if err := json.Unmarshal([]byte(jsonContent), &result); err != nil {
+
+ // Use the generic structured response parser
+ if err := parseStructuredResponse(content, &result); err != nil {
+ log.Warn().
+ Interface("original_content", content).
+ Msg("parse assertion result failed")
return nil, errors.Wrap(code.LLMParseAssertionResponseError, err.Error())
}
diff --git a/uixt/ai/asserter_test.go b/uixt/ai/asserter_test.go
index 9012260a..d9bdaaba 100644
--- a/uixt/ai/asserter_test.go
+++ b/uixt/ai/asserter_test.go
@@ -104,3 +104,46 @@ func TestInvalidParameters(t *testing.T) {
})
}
}
+
+// Test the main parseAssertionResult function with problematic input
+func TestParseAssertionResult(t *testing.T) {
+ tests := []struct {
+ name string
+ input string
+ shouldSucceed bool
+ }{
+ {
+ name: "valid JSON response",
+ input: `{"pass": true, "thought": "Assertion passed"}`,
+ shouldSucceed: true,
+ },
+ {
+ name: "response with UTF-8 replacement characters",
+ input: "浅蓝色的搜索框,里面显示着输入的\"ma\",而\ufffd\ufffd且在搜索框的右上角有一个喇叭 {\"pass\": true, \"thought\": \"found search box\"}",
+ shouldSucceed: true,
+ },
+ {
+ name: "malformed JSON with extraction",
+ input: `malformed start {"pass": true, "thought": "extracted successfully"} malformed end`,
+ shouldSucceed: true,
+ },
+ {
+ name: "completely malformed but analyzable",
+ input: "This assertion test passed and was successful",
+ shouldSucceed: true,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ result, err := parseAssertionResult(tt.input)
+ if tt.shouldSucceed {
+ require.NoError(t, err)
+ assert.NotNil(t, result)
+ assert.NotEmpty(t, result.Thought)
+ } else {
+ assert.Error(t, err)
+ }
+ })
+ }
+}
diff --git a/uixt/ai/parser_default.go b/uixt/ai/parser_default.go
index 69dcb4ab..5169dc1a 100644
--- a/uixt/ai/parser_default.go
+++ b/uixt/ai/parser_default.go
@@ -5,7 +5,6 @@ import (
"strings"
"github.com/cloudwego/eino/schema"
- "github.com/httprunner/httprunner/v5/internal/json"
"github.com/httprunner/httprunner/v5/uixt/option"
"github.com/httprunner/httprunner/v5/uixt/types"
"github.com/pkg/errors"
@@ -48,20 +47,9 @@ func (p *JSONContentParser) SystemPrompt() string {
func (p *JSONContentParser) Parse(content string, size types.Size) (*PlanningResult, error) {
content = strings.TrimSpace(content)
- // Extract JSON content from markdown code blocks
- jsonContent := extractJSONFromContent(content)
- if jsonContent == "" {
- return nil, fmt.Errorf("no valid JSON content found in response")
- }
-
- // Define a temporary struct to parse the expected JSON format
- var jsonResponse struct {
- Actions []Action `json:"actions"`
- Thought string `json:"thought"`
- Error string `json:"error"`
- }
-
- if err := json.Unmarshal([]byte(jsonContent), &jsonResponse); err != nil {
+ // Use the generic structured response parser
+ var jsonResponse PlanningJSONResponse
+ if err := parseStructuredResponse(content, &jsonResponse); err != nil {
return nil, fmt.Errorf("failed to parse VLM response: %v", err)
}
diff --git a/uixt/ai/planner_prompts.go b/uixt/ai/planner_prompts.go
index 57c1704f..64d0b3f4 100644
--- a/uixt/ai/planner_prompts.go
+++ b/uixt/ai/planner_prompts.go
@@ -41,6 +41,8 @@ var doubao_1_5_ui_tars_action_mapping = map[string]option.ActionName{
"type": option.ACTION_Input,
"scroll": option.ACTION_Swipe, // swipe up/down/left/right
"wait": option.ACTION_Sleep,
+ "press_home": option.ACTION_Home,
+ "press_back": option.ACTION_Back,
"finished": option.ACTION_Finished,
}
@@ -138,5 +140,7 @@ var doubao_1_5_thinking_vision_pro_action_mapping = map[string]option.ActionName
"type": option.ACTION_Input,
"scroll": option.ACTION_Swipe, // swipe up/down/left/right
"wait": option.ACTION_Sleep,
+ "press_home": option.ACTION_Home,
+ "press_back": option.ACTION_Back,
"finished": option.ACTION_Finished,
}
diff --git a/uixt/ai/querier.go b/uixt/ai/querier.go
index 02f3676e..75d5dbc3 100644
--- a/uixt/ai/querier.go
+++ b/uixt/ai/querier.go
@@ -169,24 +169,14 @@ func validateQueryInput(opts *QueryOptions) error {
// parseQueryResult parses the model response into QueryResult
func parseQueryResult(content string) (*QueryResult, error) {
- // Extract JSON content from response
- jsonContent := extractJSONFromContent(content)
- if jsonContent == "" {
- // If no JSON found, treat the entire content as the result
- // This handles cases where the model returns plain text instead of JSON
- return &QueryResult{
- Content: content,
- Thought: "Direct response from model",
- }, nil
- }
-
- // Parse JSON response
var result QueryResult
- if err := json.Unmarshal([]byte(jsonContent), &result); err != nil {
- // If JSON parsing fails, treat the content as plain text result
+
+ // Use the generic structured response parser with enhanced error recovery
+ if err := parseStructuredResponse(content, &result); err != nil {
+ // If parseStructuredResponse fails completely, treat content as plain text
return &QueryResult{
Content: content,
- Thought: "Failed to parse as JSON, returning raw content",
+ Thought: "Failed to parse response, returning raw content",
}, nil
}
diff --git a/uixt/ai/querier_test.go b/uixt/ai/querier_test.go
index 38ecdc00..3793f7bf 100644
--- a/uixt/ai/querier_test.go
+++ b/uixt/ai/querier_test.go
@@ -95,38 +95,35 @@ func TestParseQueryResult(t *testing.T) {
expected *QueryResult
}{
{
- name: "valid JSON response",
- content: `{
- "content": "这是一个14行8列的连连看游戏界面,包含25种不同的图案",
- "thought": "通过分析图片,我识别出了游戏界面的结构和图案类型"
- }`,
+ name: "valid JSON response",
+ content: `{"content": "extracted information", "thought": "analysis complete"}`,
expected: &QueryResult{
- Content: "这是一个14行8列的连连看游戏界面,包含25种不同的图案",
- Thought: "通过分析图片,我识别出了游戏界面的结构和图案类型",
+ Content: "extracted information",
+ Thought: "analysis complete",
},
},
{
name: "JSON in markdown",
- content: "```json\n{\n \"content\": \"游戏界面分析结果\",\n \"thought\": \"分析过程\"\n}\n```",
+ content: "```json\n{\"content\": \"data from markdown\", \"thought\": \"parsed from code block\"}\n```",
expected: &QueryResult{
- Content: "游戏界面分析结果",
- Thought: "分析过程",
+ Content: "data from markdown",
+ Thought: "parsed from code block",
},
},
{
name: "plain text response",
- content: "这是一个连连看游戏界面,包含多种图案。",
+ content: "This is just plain text without JSON structure",
expected: &QueryResult{
- Content: "这是一个连连看游戏界面,包含多种图案。",
- Thought: "Direct response from model",
+ Content: "This is just plain text without JSON structure",
+ Thought: "Failed to parse as JSON, returning raw content",
},
},
{
name: "invalid JSON",
content: `{"content": "incomplete json", "missing_closing_brace": true`,
expected: &QueryResult{
- Content: `{"content": "incomplete json", "missing_closing_brace": true`,
- Thought: "Direct response from model",
+ Content: "incomplete json",
+ Thought: "Partial extraction from malformed response",
},
},
}
diff --git a/uixt/ai/session.go b/uixt/ai/session.go
index d5707fe3..ced09e09 100644
--- a/uixt/ai/session.go
+++ b/uixt/ai/session.go
@@ -73,6 +73,11 @@ func (h *ConversationHistory) Clear() {
log.Warn().Msg("conversation history cleared completely")
}
+const (
+ LOG_REQUEST_MESSAGES = "log request messages"
+ LOG_RESPONSE_MESSAGE = "log response message"
+)
+
func logRequest(messages ConversationHistory) {
msgs := make(ConversationHistory, 0, len(messages))
for _, message := range messages {
@@ -99,7 +104,7 @@ func logRequest(messages ConversationHistory) {
}
msgs = append(msgs, msg)
}
- log.Debug().Interface("messages", msgs).Msg("log request messages")
+ log.Debug().Interface("messages", msgs).Msg(LOG_REQUEST_MESSAGES)
}
func logResponse(message *schema.Message) {
@@ -126,5 +131,5 @@ func logResponse(message *schema.Message) {
if message.Extra != nil {
logger = logger.Interface("extra", message.Extra)
}
- logger.Msg("log response message")
+ logger.Msg(LOG_RESPONSE_MESSAGE)
}
diff --git a/uixt/ai/utils.go b/uixt/ai/utils.go
index 572b705e..1a6b1748 100644
--- a/uixt/ai/utils.go
+++ b/uixt/ai/utils.go
@@ -2,6 +2,7 @@ package ai
import (
"context"
+ "fmt"
"regexp"
"strings"
"time"
@@ -11,9 +12,57 @@ import (
"github.com/cloudwego/eino/schema"
"github.com/rs/zerolog/log"
+ "github.com/httprunner/httprunner/v5/internal/json"
"github.com/httprunner/httprunner/v5/uixt/option"
+ "github.com/pkg/errors"
)
+// PlanningJSONResponse represents the JSON response structure for planning
+type PlanningJSONResponse struct {
+ Actions []Action `json:"actions"`
+ Thought string `json:"thought"`
+ Error string `json:"error"`
+}
+
+// parseStructuredResponse parses model response into structured format with error recovery
+func parseStructuredResponse(content string, result interface{}) error {
+ // Clean and validate UTF-8 content first
+ cleanContent := sanitizeUTF8Content(content)
+
+ // Extract JSON content from response
+ jsonContent := extractJSONFromContent(cleanContent)
+ if jsonContent == "" {
+ // If JSON extraction failed, try parsing the content directly as a fallback
+ jsonContent = cleanContent
+ }
+
+ // Parse JSON response with error recovery
+ return parseJSONWithFallback(jsonContent, result)
+}
+
+// sanitizeUTF8Content cleans invalid UTF-8 characters from content
+func sanitizeUTF8Content(content string) string {
+ if utf8.ValidString(content) {
+ return content
+ }
+
+ // Convert to bytes and filter out invalid UTF-8 sequences
+ bytes := []byte(content)
+ var validBytes []byte
+
+ for len(bytes) > 0 {
+ r, size := utf8.DecodeRune(bytes)
+ if r != utf8.RuneError {
+ // Valid rune, keep it
+ validBytes = append(validBytes, bytes[:size]...)
+ }
+ // Skip invalid bytes (including RuneError)
+ bytes = bytes[size:]
+ }
+
+ return string(validBytes)
+}
+
// extractJSONFromContent extracts JSON content from various formats in the response
// This function handles multiple formats:
// 1. ```json ... ``` markdown code blocks
@@ -111,6 +160,294 @@ func extractJSONFromContent(content string) string {
return ""
}
+// parseJSONWithFallback attempts to parse JSON with multiple strategies for any struct type
+func parseJSONWithFallback(jsonContent string, result interface{}) error {
+ // Strategy 1: Direct JSON unmarshaling
+ if err := json.Unmarshal([]byte(jsonContent), result); err == nil {
+ // For specific types, ensure required fields have default values even after successful parsing
+ switch v := result.(type) {
+ case *QueryResult:
+ // Ensure QueryResult has meaningful defaults for empty fields
+ if v.Content == "" && v.Thought == "" {
+ v.Content = "Empty response content"
+ v.Thought = "No content extracted from response"
+ } else if v.Content == "" {
+ v.Content = "No content extracted"
+ } else if v.Thought == "" {
+ v.Thought = "Successfully parsed structured response"
+ }
+ case *AssertionResult:
+ // Ensure AssertionResult has meaningful defaults
+ if v.Thought == "" {
+ v.Thought = "Successfully parsed assertion response"
+ }
+ }
+ return nil
+ }
+
+ // Strategy 2: Try cleaning JSON content and parse again
+ cleanedJSON := cleanJSONContent(jsonContent)
+ if err := json.Unmarshal([]byte(cleanedJSON), result); err == nil {
+ // Apply the same default value logic for cleaned JSON
+ switch v := result.(type) {
+ case *QueryResult:
+ if v.Content == "" && v.Thought == "" {
+ v.Content = "Empty response content"
+ v.Thought = "No content extracted from response"
+ } else if v.Content == "" {
+ v.Content = "No content extracted"
+ } else if v.Thought == "" {
+ v.Thought = "Successfully parsed structured response"
+ }
+ case *AssertionResult:
+ if v.Thought == "" {
+ v.Thought = "Successfully parsed assertion response"
+ }
+ }
+ return nil
+ }
+
+ // Strategy 3: For specific types, try manual extraction or content analysis
+ switch v := result.(type) {
+ case *AssertionResult:
+ if fallbackResult, err := extractAssertionFieldsManually(jsonContent); err == nil {
+ *v = *fallbackResult
+ return nil
+ }
+ // Final fallback for assertions: content analysis
+ *v = *analyzeContentForAssertion(jsonContent)
+ return nil
+
+ case *QueryResult:
+ // For QueryResult, try basic field extraction
+ if fallbackResult, err := extractQueryFieldsManually(jsonContent); err == nil {
+ *v = *fallbackResult
+ return nil
+ }
+ // Fallback to treating content as plain text
+ *v = QueryResult{
+ Content: jsonContent,
+ Thought: "Failed to parse as JSON, returning raw content",
+ }
+ return nil
+
+ case *PlanningJSONResponse:
+ // For PlanningJSONResponse, try basic field extraction
+ if fallbackResult, err := extractPlanningFieldsManually(jsonContent); err == nil {
+ *v = *fallbackResult
+ return nil
+ }
+ // Fallback with empty actions but preserve any recognizable thought content
+ *v = PlanningJSONResponse{
+ Actions: []Action{},
+ Thought: "Failed to parse structured response",
+ Error: "JSON parsing failed, returning minimal structure",
+ }
+ return nil
+ }
+
+ return errors.New("failed to parse JSON with all strategies")
+}
+
+// extractAssertionFieldsManually extracts pass and thought fields from text
+func extractAssertionFieldsManually(content string) (*AssertionResult, error) {
+ result := &AssertionResult{}
+
+ // Try to extract "pass" field
+ if strings.Contains(strings.ToLower(content), `"pass":true`) ||
+ strings.Contains(strings.ToLower(content), `"pass": true`) {
+ result.Pass = true
+ } else if strings.Contains(strings.ToLower(content), `"pass":false`) ||
+ strings.Contains(strings.ToLower(content), `"pass": false`) {
+ result.Pass = false
+ } else {
+ return nil, errors.New("cannot extract pass field")
+ }
+
+ // Try to extract "thought" field
+ thoughtStart := strings.Index(content, `"thought"`)
+ if thoughtStart != -1 {
+ thoughtSection := content[thoughtStart:]
+ colonIndex := strings.Index(thoughtSection, ":")
+ if colonIndex != -1 {
+ afterColon := strings.TrimSpace(thoughtSection[colonIndex+1:])
+ if strings.HasPrefix(afterColon, `"`) {
+ // Find the matching closing quote, handling escaped quotes
+ thoughtContent := extractQuotedString(afterColon)
+ result.Thought = thoughtContent
+ }
+ }
+ }
+
+ return result, nil
+}
+
+// extractQuotedString extracts content from a quoted string, handling escaped quotes
+func extractQuotedString(s string) string {
+ if !strings.HasPrefix(s, `"`) {
+ return ""
+ }
+
+ s = s[1:] // Remove opening quote
+ var result strings.Builder
+ escaped := false
+
+ for _, r := range s {
+ if escaped {
+ result.WriteRune(r)
+ escaped = false
+ continue
+ }
+
+ if r == '\\' {
+ escaped = true
+ continue
+ }
+
+ if r == '"' {
+ // Found closing quote
+ return result.String()
+ }
+
+ result.WriteRune(r)
+ }
+
+ return result.String()
+}
+
+// cleanJSONContent removes common JSON formatting issues
+func cleanJSONContent(content string) string {
+ // Remove any non-printable characters
+ cleaned := strings.Map(func(r rune) rune {
+ if r >= 32 && r < 127 || r > 127 { // Keep printable ASCII and Unicode
+ return r
+ }
+ return -1 // Remove non-printable characters
+ }, content)
+
+ // Remove any trailing commas before closing braces/brackets
+ cleaned = strings.ReplaceAll(cleaned, ",}", "}")
+ cleaned = strings.ReplaceAll(cleaned, ",]", "]")
+
+ return cleaned
+}
+
+// analyzeContentForAssertion creates a fallback result by analyzing content
+func analyzeContentForAssertion(content string) *AssertionResult {
+ content = strings.ToLower(content)
+
+ // Simple heuristic: look for positive/negative indicators
+ positiveIndicators := []string{"true", "pass", "success", "correct", "valid", "match"}
+ negativeIndicators := []string{"false", "fail", "error", "incorrect", "invalid", "mismatch"}
+
+ positiveCount := 0
+ negativeCount := 0
+
+ for _, indicator := range positiveIndicators {
+ if strings.Contains(content, indicator) {
+ positiveCount++
+ }
+ }
+
+ for _, indicator := range negativeIndicators {
+ if strings.Contains(content, indicator) {
+ negativeCount++
+ }
+ }
+
+ pass := positiveCount > negativeCount
+ thought := fmt.Sprintf("Fallback analysis of malformed response (positive: %d, negative: %d)",
+ positiveCount, negativeCount)
+
+ return &AssertionResult{
+ Pass: pass,
+ Thought: thought,
+ }
+}
+
+// extractQueryFieldsManually extracts content and thought fields for QueryResult
+func extractQueryFieldsManually(content string) (*QueryResult, error) {
+ result := &QueryResult{}
+
+ // Try to extract "content" field
+ if contentStart := strings.Index(content, `"content"`); contentStart != -1 {
+ contentSection := content[contentStart:]
+ if colonIndex := strings.Index(contentSection, ":"); colonIndex != -1 {
+ afterColon := strings.TrimSpace(contentSection[colonIndex+1:])
+ if strings.HasPrefix(afterColon, `"`) {
+ result.Content = extractQuotedString(afterColon)
+ }
+ }
+ }
+
+ // Try to extract "thought" field
+ if thoughtStart := strings.Index(content, `"thought"`); thoughtStart != -1 {
+ thoughtSection := content[thoughtStart:]
+ if colonIndex := strings.Index(thoughtSection, ":"); colonIndex != -1 {
+ afterColon := strings.TrimSpace(thoughtSection[colonIndex+1:])
+ if strings.HasPrefix(afterColon, `"`) {
+ result.Thought = extractQuotedString(afterColon)
+ }
+ }
+ }
+
+ // If we couldn't extract any fields, return error
+ if result.Content == "" && result.Thought == "" {
+ return nil, errors.New("cannot extract content or thought fields")
+ }
+
+ // Set defaults for missing fields (ALWAYS set defaults if any field was extracted)
+ if result.Content == "" {
+ result.Content = "Extracted partial information"
+ }
+ if result.Thought == "" {
+ result.Thought = "Partial extraction from malformed response"
+ }
+
+ return result, nil
+}
+
+// extractPlanningFieldsManually extracts thought and error fields for PlanningJSONResponse
+func extractPlanningFieldsManually(content string) (*PlanningJSONResponse, error) {
+ result := &PlanningJSONResponse{
+ Actions: []Action{}, // Default to empty actions
+ }
+
+ // Try to extract "thought" field
+ if thoughtStart := strings.Index(content, `"thought"`); thoughtStart != -1 {
+ thoughtSection := content[thoughtStart:]
+ if colonIndex := strings.Index(thoughtSection, ":"); colonIndex != -1 {
+ afterColon := strings.TrimSpace(thoughtSection[colonIndex+1:])
+ if strings.HasPrefix(afterColon, `"`) {
+ result.Thought = extractQuotedString(afterColon)
+ }
+ }
+ }
+
+ // Try to extract "error" field
+ if errorStart := strings.Index(content, `"error"`); errorStart != -1 {
+ errorSection := content[errorStart:]
+ if colonIndex := strings.Index(errorSection, ":"); colonIndex != -1 {
+ afterColon := strings.TrimSpace(errorSection[colonIndex+1:])
+ if strings.HasPrefix(afterColon, `"`) {
+ result.Error = extractQuotedString(afterColon)
+ }
+ }
+ }
+
+ // If we couldn't extract any meaningful fields, return error
+ if result.Thought == "" && result.Error == "" {
+ return nil, errors.New("cannot extract thought or error fields")
+ }
+
+ // Set defaults for missing fields
+ if result.Thought == "" {
+ result.Thought = "Partial extraction from malformed response"
+ }
+
+ return result, nil
+}
+
// callModelWithLogging is a common function to call model with logging and timing
// It handles the common pattern of:
// 1. Log request
diff --git a/uixt/ai/utils_test.go b/uixt/ai/utils_test.go
index 6a6a38ea..0a43c044 100644
--- a/uixt/ai/utils_test.go
+++ b/uixt/ai/utils_test.go
@@ -4,195 +4,701 @@ import (
"testing"
"github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
)
func TestExtractJSONFromContent(t *testing.T) {
tests := []struct {
name string
- content string
+ input string
expected string
}{
{
- name: "simple JSON",
- content: `{
- "actions": [
- {
- "action_type": "click",
- "action_inputs": {
- "start_box": [371, 235, 425, 270]
- }
- }
- ],
- "thought": "点击桌面上的抖音应用图标以启动抖音",
- "error": null
-}`,
- expected: `{
- "actions": [
- {
- "action_type": "click",
- "action_inputs": {
- "start_box": [371, 235, 425, 270]
- }
- }
- ],
- "thought": "点击桌面上的抖音应用图标以启动抖音",
- "error": null
-}`,
+ name: "simple JSON object",
+ input: `{"key": "value"}`,
+ expected: `{"key": "value"}`,
},
{
- name: "JSON with Chinese characters in strings",
- content: `{
- "actions": [
- {
- "action_type": "type",
- "action_inputs": {
- "content": "2048经典"
- }
- }
- ],
- "thought": "搜索框已经清空了,现在我要输入\"2048经典\"这个关键词。看到键盘已经弹出来了,正好可以直接开始输入。这样一来,就能找到我们想要玩的那个小游戏了。",
- "error": null
-}`,
- expected: `{
- "actions": [
- {
- "action_type": "type",
- "action_inputs": {
- "content": "2048经典"
- }
- }
- ],
- "thought": "搜索框已经清空了,现在我要输入\"2048经典\"这个关键词。看到键盘已经弹出来了,正好可以直接开始输入。这样一来,就能找到我们想要玩的那个小游戏了。",
- "error": null
-}`,
+ name: "JSON in markdown code block",
+ input: "```json\n{\"key\": \"value\"}\n```",
+ expected: `{"key": "value"}`,
},
{
- name: "JSON with markdown wrapper",
- content: "```json\n" + `{
- "actions": [
- {
- "action_type": "click",
- "action_inputs": {
- "start_box": [100, 200, 150, 250]
- }
- }
- ],
- "thought": "点击按钮",
- "error": null
-}` + "\n```",
- expected: `{
- "actions": [
- {
- "action_type": "click",
- "action_inputs": {
- "start_box": [100, 200, 150, 250]
- }
- }
- ],
- "thought": "点击按钮",
- "error": null
-}`,
+ name: "JSON in code block without language",
+ input: "```\n{\"key\": \"value\"}\n```",
+ expected: `{"key": "value"}`,
},
{
- name: "JSON embedded in text with Chinese",
- content: `这是一个包含中文的响应:{
- "actions": [
- {
- "action_type": "type",
- "action_inputs": {
- "content": "测试内容"
- }
- }
- ],
- "thought": "这是一个测试思路",
- "error": null
-} 后面还有一些文本`,
- expected: `{
- "actions": [
- {
- "action_type": "type",
- "action_inputs": {
- "content": "测试内容"
- }
- }
- ],
- "thought": "这是一个测试思路",
- "error": null
-}`,
+ name: "JSON with surrounding text",
+ input: `Here is the result: {"key": "value"} and some more text`,
+ expected: `{"key": "value"}`,
},
{
- name: "JSON with escaped quotes and Chinese",
- content: `{
- "actions": [
- {
- "action_type": "type",
- "action_inputs": {
- "content": "他说:\"你好,世界!\""
- }
- }
- ],
- "thought": "输入包含引号的中文文本",
- "error": null
-}`,
- expected: `{
- "actions": [
- {
- "action_type": "type",
- "action_inputs": {
- "content": "他说:\"你好,世界!\""
- }
- }
- ],
- "thought": "输入包含引号的中文文本",
- "error": null
-}`,
+ name: "multiple JSON objects",
+ input: `{"first": "object"} and {"second": "object"}`,
+ expected: `{"first": "object"}`,
},
{
- name: "no JSON content",
- content: "这只是一些普通的文本,没有JSON内容",
+ name: "nested JSON in markdown",
+ input: "```json\n{\"data\": {\"nested\": \"value\"}}\n```",
+ expected: `{"data": {"nested": "value"}}`,
+ },
+ {
+ name: "JSON array",
+ input: `[{"item": 1}, {"item": 2}]`,
+ expected: `[{"item": 1}, {"item": 2}]`,
+ },
+ {
+ name: "JSON array in markdown",
+ input: "```json\n[{\"item\": 1}, {\"item\": 2}]\n```",
+ expected: `[{"item": 1}, {"item": 2}]`,
+ },
+ {
+ name: "text without JSON",
+ input: "This is just plain text without any JSON",
expected: "",
},
{
- name: "nested JSON objects with Chinese",
- content: `{
- "actions": [
- {
- "action_type": "click",
- "action_inputs": {
- "start_box": [100, 200, 150, 250],
- "metadata": {
- "description": "点击操作",
- "target": "按钮"
- }
- }
- }
- ],
- "thought": "执行嵌套对象的点击操作",
- "error": null
-}`,
- expected: `{
- "actions": [
- {
- "action_type": "click",
- "action_inputs": {
- "start_box": [100, 200, 150, 250],
- "metadata": {
- "description": "点击操作",
- "target": "按钮"
- }
- }
- }
- ],
- "thought": "执行嵌套对象的点击操作",
- "error": null
-}`,
+ name: "malformed JSON",
+ input: `{"key": "value"`,
+ expected: `{"key": "value"`,
+ },
+ {
+ name: "JSON with unicode",
+ input: `{"message": "测试消息"}`,
+ expected: `{"message": "测试消息"}`,
+ },
+ {
+ name: "multiple code blocks, select first JSON",
+ input: "First block:\n```json\n{\"first\": true}\n```\nSecond block:\n```json\n{\"second\": true}\n```",
+ expected: `{"first": true}`,
+ },
+ {
+ name: "mixed language code blocks",
+ input: "```python\nprint('hello')\n```\n```json\n{\"key\": \"value\"}\n```",
+ expected: `{"key": "value"}`,
+ },
+ {
+ name: "JSON with special characters",
+ input: `{"special": "chars: @#$%^&*()"}`,
+ expected: `{"special": "chars: @#$%^&*()"}`,
+ },
+ {
+ name: "empty JSON object",
+ input: `{}`,
+ expected: `{}`,
+ },
+ {
+ name: "empty JSON array",
+ input: `[]`,
+ expected: `[]`,
+ },
+ {
+ name: "JSON with line breaks",
+ input: "{\n \"key\": \"value\",\n \"number\": 123\n}",
+ expected: "{\n \"key\": \"value\",\n \"number\": 123\n}",
+ },
+ {
+ name: "markdown with extra whitespace",
+ input: " ```json \n {\"key\": \"value\"} \n ``` ",
+ expected: `{"key": "value"}`,
+ },
+ {
+ name: "code block with tildes",
+ input: "~~~json\n{\"key\": \"value\"}\n~~~",
+ expected: `{"key": "value"}`,
+ },
+ {
+ name: "JSON after other text patterns",
+ input: `The response should be formatted as: {"status": "success"}`,
+ expected: `{"status": "success"}`,
+ },
+ {
+ name: "JSON in mixed content",
+ input: `Analysis complete. Result: {"analysis": "positive", "confidence": 0.95} - End of analysis.`,
+ expected: `{"analysis": "positive", "confidence": 0.95}`,
+ },
+ {
+ name: "complex nested JSON",
+ input: `{"outer": {"inner": {"deep": "value", "numbers": [1, 2, 3]}}}`,
+ expected: `{"outer": {"inner": {"deep": "value", "numbers": [1, 2, 3]}}}`,
+ },
+ {
+ name: "JSON with escaped quotes",
+ input: `{"message": "He said \"Hello\" to me"}`,
+ expected: `{"message": "He said \"Hello\" to me"}`,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
- result := extractJSONFromContent(tt.content)
+ result := extractJSONFromContent(tt.input)
assert.Equal(t, tt.expected, result)
})
}
}
+
+func TestSanitizeUTF8Content(t *testing.T) {
+ tests := []struct {
+ name string
+ input string
+ expected string
+ }{
+ {
+ name: "valid UTF-8",
+ input: "Hello 世界",
+ expected: "Hello 世界",
+ },
+ {
+ name: "invalid UTF-8 with replacement characters",
+ input: "Hello \ufffd\ufffd World",
+ expected: "Hello World",
+ },
+ {
+ name: "mixed valid and invalid",
+ input: "测试\ufffd消息\ufffd",
+ expected: "测试消息",
+ },
+ {
+ name: "only replacement characters",
+ input: "\ufffd\ufffd\ufffd",
+ expected: "",
+ },
+ {
+ name: "empty string",
+ input: "",
+ expected: "",
+ },
+ {
+ name: "ASCII only",
+ input: "Hello World 123",
+ expected: "Hello World 123",
+ },
+ {
+ name: "JSON with UTF-8 issues",
+ input: `{"message": "搜索框\ufffd\ufffd显示"}`,
+ expected: `{"message": "搜索框显示"}`,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ result := sanitizeUTF8Content(tt.input)
+ assert.Equal(t, tt.expected, result)
+ })
+ }
+}
+
+func TestParseJSONWithFallback(t *testing.T) {
+ tests := []struct {
+ name string
+ input string
+ expectedValid bool
+ expectedPass bool
+ expectedThought string
+ }{
+ {
+ name: "valid JSON",
+ input: `{"pass": true, "thought": "test passed"}`,
+ expectedValid: true,
+ expectedPass: true,
+ expectedThought: "test passed",
+ },
+ {
+ name: "valid JSON with false",
+ input: `{"pass": false, "thought": "test failed"}`,
+ expectedValid: true,
+ expectedPass: false,
+ expectedThought: "test failed",
+ },
+ {
+ name: "malformed JSON with extractable fields",
+ input: `malformed start {"pass": true, "thought": "extracted"} end`,
+ expectedValid: true,
+ expectedPass: true,
+ expectedThought: "extracted",
+ },
+ {
+ name: "content analysis fallback - positive",
+ input: `The test was successful and passed with true result`,
+ expectedValid: true,
+ expectedPass: true,
+ expectedThought: "Fallback analysis of malformed response (positive: 3, negative: 0)",
+ },
+ {
+ name: "content analysis fallback - negative",
+ input: `The test failed with false result and error occurred`,
+ expectedValid: true,
+ expectedPass: false,
+ expectedThought: "Fallback analysis of malformed response (positive: 0, negative: 3)",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ var result AssertionResult
+ err := parseJSONWithFallback(tt.input, &result)
+
+ if tt.expectedValid {
+ assert.NoError(t, err)
+ assert.Equal(t, tt.expectedPass, result.Pass)
+ assert.Equal(t, tt.expectedThought, result.Thought)
+ } else {
+ assert.Error(t, err)
+ }
+ })
+ }
+}
+
+func TestExtractAssertionFieldsManually(t *testing.T) {
+ tests := []struct {
+ name string
+ input string
+ expectedPass bool
+ expectedThought string
+ shouldError bool
+ }{
+ {
+ name: "pass true",
+ input: `{"pass": true, "thought": "manual test"}`,
+ expectedPass: true,
+ expectedThought: "manual test",
+ shouldError: false,
+ },
+ {
+ name: "pass false",
+ input: `{"pass": false, "thought": "manual fail"}`,
+ expectedPass: false,
+ expectedThought: "manual fail",
+ shouldError: false,
+ },
+ {
+ name: "no pass field",
+ input: `{"thought": "no pass field"}`,
+ shouldError: true,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ result, err := extractAssertionFieldsManually(tt.input)
+ if tt.shouldError {
+ assert.Error(t, err)
+ } else {
+ assert.NoError(t, err)
+ assert.Equal(t, tt.expectedPass, result.Pass)
+ assert.Equal(t, tt.expectedThought, result.Thought)
+ }
+ })
+ }
+}
+
+func TestExtractQuotedString(t *testing.T) {
+ tests := []struct {
+ name string
+ input string
+ expected string
+ }{
+ {
+ name: "simple quoted string",
+ input: `"hello world"`,
+ expected: "hello world",
+ },
+ {
+ name: "quoted string with escaped quotes",
+ input: `"He said \"Hello\""`,
+ expected: `He said "Hello"`,
+ },
+ {
+ name: "quoted string with escaped backslash",
+ input: `"path\\to\\file"`,
+ expected: `path\to\file`,
+ },
+ {
+ name: "empty quoted string",
+ input: `""`,
+ expected: "",
+ },
+ {
+ name: "quoted string with unicode",
+ input: `"测试消息"`,
+ expected: "测试消息",
+ },
+ {
+ name: "not a quoted string",
+ input: "hello world",
+ expected: "",
+ },
+ {
+ name: "unclosed quoted string",
+ input: `"unclosed string`,
+ expected: "unclosed string",
+ },
+ {
+ name: "quoted string with extra content after",
+ input: `"content" and more`,
+ expected: "content",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ result := extractQuotedString(tt.input)
+ assert.Equal(t, tt.expected, result)
+ })
+ }
+}
+
+func TestCleanJSONContent(t *testing.T) {
+ tests := []struct {
+ name string
+ input string
+ expected string
+ }{
+ {
+ name: "remove trailing comma in object",
+ input: `{"key": "value",}`,
+ expected: `{"key": "value"}`,
+ },
+ {
+ name: "remove trailing comma in array",
+ input: `["item1", "item2",]`,
+ expected: `["item1", "item2"]`,
+ },
+ {
+ name: "clean non-printable characters",
+ input: "{\n\"key\": \"value\"\u0000\u0001}",
+ expected: "{\n\"key\": \"value\"}",
+ },
+ {
+ name: "preserve unicode characters",
+ input: `{"message": "测试消息"}`,
+ expected: `{"message": "测试消息"}`,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ result := cleanJSONContent(tt.input)
+ assert.Equal(t, tt.expected, result)
+ })
+ }
+}
+
+func TestAnalyzeContentForAssertion(t *testing.T) {
+ tests := []struct {
+ name string
+ input string
+ expectedPass bool
+ }{
+ {
+ name: "positive indicators",
+ input: "The test was successful and passed",
+ expectedPass: true,
+ },
+ {
+ name: "negative indicators",
+ input: "The test failed with error",
+ expectedPass: false,
+ },
+ {
+ name: "mixed with more positive",
+ input: "Some errors occurred but overall test passed successfully",
+ expectedPass: true,
+ },
+ {
+ name: "no clear indicators",
+ input: "This is just plain text",
+ expectedPass: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ result := analyzeContentForAssertion(tt.input)
+ assert.Equal(t, tt.expectedPass, result.Pass)
+ assert.NotEmpty(t, result.Thought)
+ })
+ }
+}
+
+func TestParseStructuredResponse(t *testing.T) {
+ tests := []struct {
+ name string
+ input string
+ shouldSucceed bool
+ }{
+ {
+ name: "valid AssertionResult JSON",
+ input: `{"pass": true, "thought": "test passed"}`,
+ shouldSucceed: true,
+ },
+ {
+ name: "malformed JSON with extractable fields",
+ input: `malformed start {"pass": false, "thought": "extracted thought"} end`,
+ shouldSucceed: true,
+ },
+ {
+ name: "UTF-8 issues with JSON",
+ input: "测试结果:\ufffd\ufffd {\"pass\": true, \"thought\": \"处理完成\"}",
+ shouldSucceed: true,
+ },
+ {
+ name: "content analysis fallback",
+ input: "The assertion was successful and passed correctly",
+ shouldSucceed: true,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ var result AssertionResult
+ err := parseStructuredResponse(tt.input, &result)
+ if tt.shouldSucceed {
+ require.NoError(t, err)
+ assert.NotEmpty(t, result.Thought)
+ } else {
+ assert.Error(t, err)
+ }
+ })
+ }
+}
+
+// Add more test cases for different struct types
+func TestParseJSONWithFallback_QueryResult(t *testing.T) {
+ tests := []struct {
+ name string
+ input string
+ expectedValid bool
+ expectedContent string
+ expectedThought string
+ }{
+ {
+ name: "valid QueryResult JSON",
+ input: `{"content": "extracted info", "thought": "analysis complete"}`,
+ expectedValid: true,
+ expectedContent: "extracted info",
+ expectedThought: "analysis complete",
+ },
+ {
+ name: "malformed QueryResult with extractable fields",
+ input: `malformed { "content": "partial info", "thought": "partial analysis" } more text`,
+ expectedValid: true,
+ expectedContent: "partial info",
+ expectedThought: "partial analysis",
+ },
+ {
+ name: "completely malformed QueryResult",
+ input: `This is just plain text with no structure`,
+ expectedValid: true,
+ expectedContent: "This is just plain text with no structure",
+ expectedThought: "Failed to parse as JSON, returning raw content",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ var result QueryResult
+ err := parseJSONWithFallback(tt.input, &result)
+
+ if tt.expectedValid {
+ assert.NoError(t, err)
+ assert.Equal(t, tt.expectedContent, result.Content)
+ assert.Equal(t, tt.expectedThought, result.Thought)
+ } else {
+ assert.Error(t, err)
+ }
+ })
+ }
+}
+
+func TestParseJSONWithFallback_PlanningResponse(t *testing.T) {
+ tests := []struct {
+ name string
+ input string
+ expectedValid bool
+ expectedThought string
+ expectedError string
+ expectedActions int
+ }{
+ {
+ name: "valid PlanningJSONResponse",
+ input: `{"actions": [{"action_type": "click"}], "thought": "planning complete", "error": ""}`,
+ expectedValid: true,
+ expectedThought: "planning complete",
+ expectedError: "",
+ expectedActions: 1,
+ },
+ {
+ name: "malformed PlanningResponse with extractable thought",
+ input: `malformed { "thought": "partial planning" } more text`,
+ expectedValid: true,
+ expectedThought: "partial planning",
+ expectedActions: 0,
+ },
+ {
+ name: "completely malformed PlanningResponse",
+ input: `This is just plain text with no structure`,
+ expectedValid: true,
+ expectedThought: "Failed to parse structured response",
+ expectedError: "JSON parsing failed, returning minimal structure",
+ expectedActions: 0,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ var result PlanningJSONResponse
+ err := parseJSONWithFallback(tt.input, &result)
+
+ if tt.expectedValid {
+ assert.NoError(t, err)
+ assert.Equal(t, tt.expectedThought, result.Thought)
+ assert.Equal(t, tt.expectedError, result.Error)
+ assert.Len(t, result.Actions, tt.expectedActions)
+ } else {
+ assert.Error(t, err)
+ }
+ })
+ }
+}
+
+func TestExtractQueryFieldsManually(t *testing.T) {
+ tests := []struct {
+ name string
+ input string
+ expectedContent string
+ expectedThought string
+ shouldError bool
+ }{
+ {
+ name: "both content and thought",
+ input: `{"content": "test content", "thought": "test thought"}`,
+ expectedContent: "test content",
+ expectedThought: "test thought",
+ shouldError: false,
+ },
+ {
+ name: "only content",
+ input: `{"content": "only content"}`,
+ expectedContent: "only content",
+ expectedThought: "Partial extraction from malformed response",
+ shouldError: false,
+ },
+ {
+ name: "only thought",
+ input: `{"thought": "only thought"}`,
+ expectedContent: "Extracted partial information",
+ expectedThought: "only thought",
+ shouldError: false,
+ },
+ {
+ name: "no extractable fields",
+ input: `{"other": "data"}`,
+ shouldError: true,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ result, err := extractQueryFieldsManually(tt.input)
+ if tt.shouldError {
+ assert.Error(t, err)
+ } else {
+ assert.NoError(t, err)
+ assert.Equal(t, tt.expectedContent, result.Content)
+ assert.Equal(t, tt.expectedThought, result.Thought)
+ }
+ })
+ }
+}
+
+func TestExtractPlanningFieldsManually(t *testing.T) {
+ tests := []struct {
+ name string
+ input string
+ expectedThought string
+ expectedError string
+ shouldError bool
+ }{
+ {
+ name: "both thought and error",
+ input: `{"thought": "test planning", "error": "test error"}`,
+ expectedThought: "test planning",
+ expectedError: "test error",
+ shouldError: false,
+ },
+ {
+ name: "only thought",
+ input: `{"thought": "only planning"}`,
+ expectedThought: "only planning",
+ expectedError: "",
+ shouldError: false,
+ },
+ {
+ name: "only error",
+ input: `{"error": "only error"}`,
+ expectedThought: "Partial extraction from malformed response",
+ expectedError: "only error",
+ shouldError: false,
+ },
+ {
+ name: "no extractable fields",
+ input: `{"other": "data"}`,
+ shouldError: true,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ result, err := extractPlanningFieldsManually(tt.input)
+ if tt.shouldError {
+ assert.Error(t, err)
+ } else {
+ assert.NoError(t, err)
+ assert.Equal(t, tt.expectedThought, result.Thought)
+ assert.Equal(t, tt.expectedError, result.Error)
+ assert.NotNil(t, result.Actions) // Should always be initialized
+ }
+ })
+ }
+}
+
+// Test the integrated parseStructuredResponse with QueryResult
+func TestParseStructuredResponse_QueryResult(t *testing.T) {
+ tests := []struct {
+ name string
+ input string
+ shouldSucceed bool
+ }{
+ {
+ name: "valid QueryResult JSON",
+ input: `{"content": "extracted data", "thought": "processing complete"}`,
+ shouldSucceed: true,
+ },
+ {
+ name: "QueryResult with UTF-8 issues",
+ input: "extracted data: 搜索框,里面显示着\ufffd\ufffd {\"content\": \"search box found\", \"thought\": \"visual analysis\"}",
+ shouldSucceed: true,
+ },
+ {
+ name: "malformed QueryResult",
+ input: `malformed start {"content": "partial info"} end`,
+ shouldSucceed: true,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ var result QueryResult
+ err := parseStructuredResponse(tt.input, &result)
+ if tt.shouldSucceed {
+ require.NoError(t, err)
+ assert.NotEmpty(t, result.Content, "Content should not be empty")
+ assert.NotEmpty(t, result.Thought, "Thought should not be empty")
+ } else {
+ assert.Error(t, err)
+ }
+ })
+ }
+}
diff --git a/uixt/driver_ext_ai.go b/uixt/driver_ext_ai.go
index dba564d3..72dc58f9 100644
--- a/uixt/driver_ext_ai.go
+++ b/uixt/driver_ext_ai.go
@@ -59,7 +59,7 @@ func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...op
ModelName: "",
Error: err.Error(),
},
- StartTime: planningStartTime.Unix(),
+ StartTime: planningStartTime.UnixMilli(),
Elapsed: time.Since(planningStartTime).Milliseconds(),
}
allPlannings = append(allPlannings, errorResult)
@@ -67,7 +67,7 @@ func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...op
}
// Set planning execution timing
- planningResult.StartTime = planningStartTime.Unix()
+ planningResult.StartTime = planningStartTime.UnixMilli()
planningResult.SubActions = []*SubActionResult{}
// Check if task is finished BEFORE executing actions
@@ -96,7 +96,7 @@ func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...op
subActionResult := &SubActionResult{
ActionName: toolCall.Function.Name,
Arguments: toolCall.Function.Arguments,
- StartTime: subActionStartTime.Unix(),
+ StartTime: subActionStartTime.UnixMilli(),
}
// Use defer to ensure sub-action is always processed and added to results
@@ -164,7 +164,7 @@ func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ..
// Step 1: Take screenshot
screenshotStartTime := time.Now()
// Use GetScreenResult to handle screenshot capture, save, and session tracking
- screenResult, err := dExt.GetScreenResult(
+ screenResult, err := dExt.createScreenshotWithSession(
option.WithScreenShotFileName(builtin.GenNameWithTimestamp("%d_screenshot")),
)
screenshotElapsed := time.Since(screenshotStartTime).Milliseconds()
diff --git a/uixt/driver_ext_screenshot.go b/uixt/driver_ext_screenshot.go
index e9329f78..ae3e4238 100644
--- a/uixt/driver_ext_screenshot.go
+++ b/uixt/driver_ext_screenshot.go
@@ -50,27 +50,25 @@ func (s *ScreenResult) FilterTextsByScope(x1, y1, x2, y2 float64) ai.OCRTexts {
}
// GetScreenshotBase64WithSize takes a screenshot, returns the compressed image buffer in base64 format and screen size
+// Also saves the screenshot to session for report display
func (dExt *XTDriver) GetScreenshotBase64WithSize() (compressedBufBase64 string, size types.Size, err error) {
- compressBufSource, err := getScreenShotBuffer(dExt)
+ // Create screenshot with session saving, minimal CV processing for AI operations
+ screenResult, err := dExt.createScreenshotWithSession(
+ option.WithScreenShotFileName("screenshot_base64"),
+ )
if err != nil {
return "", types.Size{}, err
}
// convert buffer to base64 string
screenShotBase64 := "data:image/jpeg;base64," +
- base64.StdEncoding.EncodeToString(compressBufSource.Bytes())
+ base64.StdEncoding.EncodeToString(screenResult.bufSource.Bytes())
- // get screen size
- size, err = dExt.IDriver.WindowSize()
- if err != nil {
- return "", types.Size{}, errors.Wrap(err, "get window size failed")
- }
-
- return screenShotBase64, size, nil
+ return screenShotBase64, screenResult.Resolution, nil
}
-// GetScreenResult takes a screenshot, returns the image recognition result
-func (dExt *XTDriver) GetScreenResult(opts ...option.ActionOption) (screenResult *ScreenResult, err error) {
+// createScreenshotWithSession creates a screenshot with optional OCR processing and saves to session
+func (dExt *XTDriver) createScreenshotWithSession(opts ...option.ActionOption) (screenResult *ScreenResult, err error) {
// get compressed screenshot buffer
compressBufSource, err := getScreenShotBuffer(dExt.IDriver)
if err != nil {
@@ -105,34 +103,40 @@ func (dExt *XTDriver) GetScreenResult(opts ...option.ActionOption) (screenResult
return nil, errors.Wrap(code.DeviceGetInfoError, err.Error())
}
- // read image from buffer with CV
+ // create basic screen result
screenResult = &ScreenResult{
bufSource: compressBufSource,
ImagePath: imagePath,
Tags: nil,
Resolution: windowSize,
}
- imageResult, err := dExt.CVService.ReadFromBuffer(compressBufSource, opts...)
- if err != nil {
- log.Error().Err(err).Msg("ReadFromBuffer from ImageService failed")
- return nil, err
- }
- if imageResult != nil {
- screenResult.Texts = imageResult.OCRResult.ToOCRTexts()
- screenResult.UploadedURL = imageResult.URL
- screenResult.Icons = imageResult.UIResult
- if screenshotOptions.ScreenShotWithClosePopups && imageResult.ClosePopupsResult != nil {
- screenResult.Popup = &PopupInfo{
- ClosePopupsResult: imageResult.ClosePopupsResult,
- PicName: imagePath,
- PicURL: imageResult.URL,
- }
+ logger := log.Debug().Str("imagePath", imagePath)
+ // perform CV processing if any CV-related option is enabled
+ if needsCVProcessing(screenshotOptions) {
+ imageResult, err := dExt.CVService.ReadFromBuffer(compressBufSource, opts...)
+ if err != nil {
+ log.Error().Err(err).Msg("ReadFromBuffer from ImageService failed")
+ return nil, err
+ }
+ if imageResult != nil {
+ screenResult.Texts = imageResult.OCRResult.ToOCRTexts()
+ screenResult.UploadedURL = imageResult.URL
+ screenResult.Icons = imageResult.UIResult
- closeAreas, _ := imageResult.UIResult.FilterUIResults([]string{"close"})
- for _, closeArea := range closeAreas {
- screenResult.Popup.ClosePoints = append(screenResult.Popup.ClosePoints, closeArea.Center())
+ if screenshotOptions.ScreenShotWithClosePopups && imageResult.ClosePopupsResult != nil {
+ screenResult.Popup = &PopupInfo{
+ ClosePopupsResult: imageResult.ClosePopupsResult,
+ PicName: imagePath,
+ PicURL: imageResult.URL,
+ }
+
+ closeAreas, _ := imageResult.UIResult.FilterUIResults([]string{"close"})
+ for _, closeArea := range closeAreas {
+ screenResult.Popup.ClosePoints = append(screenResult.Popup.ClosePoints, closeArea.Center())
+ }
}
+ logger.Str("imageUrl", screenResult.UploadedURL)
}
}
@@ -140,13 +144,28 @@ func (dExt *XTDriver) GetScreenResult(opts ...option.ActionOption) (screenResult
session := dExt.GetSession()
session.screenResults = append(session.screenResults, screenResult)
- log.Debug().
- Str("imagePath", imagePath).
- Str("imageUrl", screenResult.UploadedURL).
- Msg("log screenshot")
+ logger.Msg("log screenshot")
return screenResult, nil
}
+// needsCVProcessing determines if CV service processing is required based on screenshot options
+func needsCVProcessing(options *option.ActionOptions) bool {
+ return options.ScreenShotWithOCR ||
+ options.ScreenShotWithUpload ||
+ options.ScreenShotWithLiveType ||
+ options.ScreenShotWithLivePopularity ||
+ len(options.ScreenShotWithUITypes) > 0 ||
+ options.ScreenShotWithClosePopups ||
+ options.ScreenShotWithOCRCluster != ""
+}
+
+// GetScreenResult takes a screenshot, returns the image recognition result
+func (dExt *XTDriver) GetScreenResult(opts ...option.ActionOption) (screenResult *ScreenResult, err error) {
+ // Enable OCR processing for GetScreenResult
+ opts = append(opts, option.WithScreenShotOCR(true))
+ return dExt.createScreenshotWithSession(opts...)
+}
+
func (dExt *XTDriver) GetScreenTexts(opts ...option.ActionOption) (ocrTexts ai.OCRTexts, err error) {
options := option.NewActionOptions(opts...)
if options.ScreenShotFileName == "" {
diff --git a/uixt/sdk.go b/uixt/sdk.go
index c871c975..cdcbf65b 100644
--- a/uixt/sdk.go
+++ b/uixt/sdk.go
@@ -132,7 +132,7 @@ func (dExt *XTDriver) ExecuteAction(ctx context.Context, action option.MobileAct
subActionResult := &SubActionResult{
ActionName: string(action.Method),
Arguments: action.Params,
- StartTime: subActionStartTime.Unix(),
+ StartTime: subActionStartTime.UnixMilli(),
}
// Execute via MCP tool