+
+ {{$stepLogs := getStepLogs $step}}
+ {{$queryThought := ""}}
+ {{$queryModel := ""}}
+ {{$queryUsage := ""}}
+ {{$queryScreenshot := ""}}
+ {{$queryResult := ""}}
+ {{range $logEntry := $stepLogs}}
+ {{if and (eq $logEntry.Message "log response message") (index $logEntry.Fields "content")}}
+ {{$content := index $logEntry.Fields "content"}}
+ {{if $content}}
+ {{$queryResult = $content}}
{{end}}
+ {{end}}
+ {{if and (eq $logEntry.Message "call model service for query") (index $logEntry.Fields "model")}}
+ {{$queryModel = index $logEntry.Fields "model"}}
+ {{end}}
+ {{if and (eq $logEntry.Message "usage statistics") (index $logEntry.Fields "input_tokens")}}
+ {{$inputTokens := index $logEntry.Fields "input_tokens"}}
+ {{$outputTokens := index $logEntry.Fields "output_tokens"}}
+ {{$totalTokens := index $logEntry.Fields "total_tokens"}}
+ {{$queryUsage = printf "📊 Tokens: %v in / %v out / %v total" $inputTokens $outputTokens $totalTokens}}
+ {{end}}
+ {{if and (eq $logEntry.Message "log screenshot") (index $logEntry.Fields "imagePath")}}
+ {{$queryScreenshot = index $logEntry.Fields "imagePath"}}
+ {{end}}
+ {{end}}
-
- {{if $queryResult}}
-
- {{end}}
+
+ {{if $queryResult}}
+
+ {{end}}
-
-
-
- {{if $queryScreenshot}}
-
-
-
-
- {{$base64Image := encodeImageBase64 $queryScreenshot}}
- {{if $base64Image}}
-
-
-
-
-
- {{end}}
-
-
+
+
+
+ {{if $queryScreenshot}}
+
+
+
- {{end}}
-
-
-
-
-
-
- {{if $queryModel}}
-
🤖 Model: {{$queryModel}}
- {{end}}
- {{if $queryUsage}}
-
{{$queryUsage}}
- {{end}}
+
+ {{$base64Image := encodeImageBase64 $queryScreenshot}}
+ {{if $base64Image}}
+
+
+
+ {{end}}
+
+
+
+ {{end}}
+
+
+
+
+
+
+ {{if $queryModel}}
+
🤖 Model: {{$queryModel}}
+ {{end}}
+ {{if $queryUsage}}
+
{{$queryUsage}}
+ {{end}}
+
+
+ {{end}}
+
+ {{/* Handle SessionData: display requests and screen results for non-planning actions */}}
+ {{if not $action.Plannings}}
+ {{if or $action.Requests $action.ScreenResults}}
+
+
+ {{if $action.Requests}}
+
+
+ 📡 {{len $action.Requests}} request(s)
+
+
+ {{range $request := $action.Requests}}
+
+
+ {{if $request.RequestBody}}
+
Request: {{$request.RequestBody}}
+ {{end}}
+ {{if $request.ResponseBody}}
+
Response: {{$request.ResponseBody}}
+ {{end}}
+
+ {{end}}
+
+
{{end}}
+
+
+ {{if $action.ScreenResults}}
+
+
📸 Screen Results ({{len $action.ScreenResults}})
+
+ {{range $screenshot := $action.ScreenResults}}
+ {{if $screenshot.ImagePath}}
+ {{$base64Image := encodeImageBase64 $screenshot.ImagePath}}
+ {{if $base64Image}}
+
+
+ {{base $screenshot.ImagePath}}
+ {{if $screenshot.Resolution}}
+ {{$screenshot.Resolution.Width}}x{{$screenshot.Resolution.Height}}
+ {{end}}
+
+
+
+
+
+ {{end}}
+ {{end}}
+ {{end}}
+
+
+ {{end}}
+
{{end}}
{{end}}
- {{/* Other SubActions (non-ai_query) are displayed in the Planning section's right panel to avoid duplication */}}
{{end}}
diff --git a/runner_uixt.go b/runner_uixt.go
index 87c934ae..36e6078a 100644
--- a/runner_uixt.go
+++ b/runner_uixt.go
@@ -49,10 +49,9 @@ type UIXTConfig struct {
WDAPort int
WDAMjpegPort int
- OSType string // platform
- Serial string
- PackageName string
- LLMService option.LLMServiceType // LLM 服务类型
+ OSType string // platform
+ Serial string
+ LLMService option.LLMServiceType // LLM 服务类型
}
const (
diff --git a/step.go b/step.go
index 143abe24..3b2c866f 100644
--- a/step.go
+++ b/step.go
@@ -58,11 +58,11 @@ type TStep struct {
// one step contains one or multiple actions
type ActionResult struct {
option.MobileAction `json:",inline"`
- StartTime int64 `json:"start_time"` // action start time in millisecond(ms)
- Elapsed int64 `json:"elapsed_ms"` // action elapsed time(ms)
- Error error `json:"error"` // action execution result
- Plannings []*uixt.PlanningExecutionResult `json:"plannings,omitempty"` // store planning results for start_to_goal actions
- SubActions []*uixt.SubActionResult `json:"sub_actions,omitempty"` // store sub-actions for other actions
+ StartTime int64 `json:"start_time"` // action start time in millisecond(ms)
+ Elapsed int64 `json:"elapsed_ms"` // action elapsed time(ms)
+ Error string `json:"error,omitempty"` // action execution result
+ Plannings []*uixt.PlanningExecutionResult `json:"plannings,omitempty"` // store planning results for start_to_goal actions, which contains multiple sub-actions
+ uixt.SessionData // store session data for other actions besides start_to_goal
}
// one testcase contains one or multiple steps
diff --git a/step_ui.go b/step_ui.go
index da922435..426621cb 100644
--- a/step_ui.go
+++ b/step_ui.go
@@ -783,13 +783,14 @@ func runStepMobileUI(s *SessionRunner, step IStep) (stepResult *StepResult, err
},
StartTime: startTime.UnixMilli(),
}
- subActionResults, err1 := uiDriver.ExecuteAction(
+ sessionData, err1 := uiDriver.ExecuteAction(
context.Background(), actionResult.MobileAction)
if err1 != nil {
+ actionResult.Error = err1.Error()
log.Warn().Err(err1).Msg("get foreground app failed, ignore")
}
actionResult.Elapsed = time.Since(startTime).Milliseconds()
- actionResult.SubActions = subActionResults
+ actionResult.SessionData = sessionData
stepResult.Actions = append(stepResult.Actions, actionResult)
}
@@ -827,13 +828,14 @@ func runStepMobileUI(s *SessionRunner, step IStep) (stepResult *StepResult, err
},
StartTime: startTime.UnixMilli(),
}
- subActionResults, err2 := uiDriver.ExecuteAction(
+ sessionData, err2 := uiDriver.ExecuteAction(
context.Background(), actionResult.MobileAction)
if err2 != nil {
+ actionResult.Error = err2.Error()
log.Warn().Err(err2).Str("step", step.Name()).Msg("auto handle popup failed")
}
actionResult.Elapsed = time.Since(startTime).Milliseconds()
- actionResult.SubActions = subActionResults
+ actionResult.SessionData = sessionData
stepResult.Actions = append(stepResult.Actions, actionResult)
}
@@ -950,11 +952,12 @@ func runStepMobileUI(s *SessionRunner, step IStep) (stepResult *StepResult, err
}
// handle other actions
- subActionResults, err := uiDriver.ExecuteAction(ctx, action)
+ sessionData, err := uiDriver.ExecuteAction(ctx, action)
actionResult.Elapsed = time.Since(actionStartTime).Milliseconds()
- actionResult.SubActions = subActionResults
+ actionResult.SessionData = sessionData
stepResult.Actions = append(stepResult.Actions, actionResult)
if err != nil {
+ actionResult.Error = err.Error()
if !code.IsErrorPredefined(err) {
err = errors.Wrap(code.MobileUIDriverError, err.Error())
}
diff --git a/uixt/ai/cv_vedem.go b/uixt/ai/cv_vedem.go
index 532ea301..2cb4a833 100644
--- a/uixt/ai/cv_vedem.go
+++ b/uixt/ai/cv_vedem.go
@@ -63,6 +63,7 @@ func (s *vedemCVService) ReadFromPath(imagePath string, opts ...option.ActionOpt
func (s *vedemCVService) ReadFromBuffer(imageBuf *bytes.Buffer, opts ...option.ActionOption) (
imageResult *CVResult, err error) {
actionOptions := option.NewActionOptions(opts...)
+ log.Debug().Interface("options", actionOptions).Msg("vedem.ReadFromBuffer")
screenshotActions := actionOptions.List()
if len(screenshotActions) == 0 {
// skip
diff --git a/uixt/android_device.go b/uixt/android_device.go
index 52e2107d..e7cc774c 100644
--- a/uixt/android_device.go
+++ b/uixt/android_device.go
@@ -384,6 +384,9 @@ func (dev *AndroidDevice) getPackageVersion(packageName string) (string, error)
}
func (dev *AndroidDevice) getPackagePath(packageName string) (string, error) {
+ if packageName == "" {
+ return "", errors.Wrap(code.InvalidParamError, "packageName is empty")
+ }
output, err := dev.Device.RunShellCommand("pm", "path", packageName)
if err != nil {
return "", errors.Wrap(err, "get package path failed")
diff --git a/uixt/driver_ext_ai.go b/uixt/driver_ext_ai.go
index 72dc58f9..773e4c1c 100644
--- a/uixt/driver_ext_ai.go
+++ b/uixt/driver_ext_ai.go
@@ -125,7 +125,7 @@ func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...op
planningResult.Elapsed = time.Since(planningStartTime).Milliseconds()
allPlannings = append(allPlannings, planningResult)
- if options.MaxRetryTimes > 1 && attempt >= options.MaxRetryTimes {
+ if options.MaxRetryTimes > 0 && attempt > options.MaxRetryTimes {
return allPlannings, errors.New("reached max retry times")
}
}
diff --git a/uixt/driver_ext_screenshot.go b/uixt/driver_ext_screenshot.go
index ae3e4238..53ee9fa6 100644
--- a/uixt/driver_ext_screenshot.go
+++ b/uixt/driver_ext_screenshot.go
@@ -136,7 +136,9 @@ func (dExt *XTDriver) createScreenshotWithSession(opts ...option.ActionOption) (
screenResult.Popup.ClosePoints = append(screenResult.Popup.ClosePoints, closeArea.Center())
}
}
- logger.Str("imageUrl", screenResult.UploadedURL)
+ if screenResult.UploadedURL != "" {
+ logger.Str("imageUrl", screenResult.UploadedURL)
+ }
}
}
diff --git a/uixt/driver_ext_tap.go b/uixt/driver_ext_tap.go
index dcb08753..0b36afcd 100644
--- a/uixt/driver_ext_tap.go
+++ b/uixt/driver_ext_tap.go
@@ -10,6 +10,7 @@ import (
func (dExt *XTDriver) TapByOCR(text string, opts ...option.ActionOption) error {
actionOptions := option.NewActionOptions(opts...)
+ log.Info().Str("text", text).Interface("options", actionOptions).Msg("TapByOCR")
if actionOptions.ScreenShotFileName == "" {
opts = append(opts, option.WithScreenShotFileName(fmt.Sprintf("tap_by_ocr_%s", text)))
}
@@ -36,7 +37,7 @@ func (dExt *XTDriver) TapByOCR(text string, opts ...option.ActionOption) error {
func (dExt *XTDriver) TapByCV(opts ...option.ActionOption) error {
actionOptions := option.NewActionOptions(opts...)
-
+ log.Info().Interface("options", actionOptions).Msg("TapByCV")
uiResult, err := dExt.FindUIResult(opts...)
if err != nil {
if actionOptions.IgnoreNotFoundError {
diff --git a/uixt/driver_utils.go b/uixt/driver_utils.go
index 1818d746..787f6f9b 100644
--- a/uixt/driver_utils.go
+++ b/uixt/driver_utils.go
@@ -1,6 +1,7 @@
package uixt
import (
+ "context"
"crypto/md5"
"fmt"
"io"
@@ -274,7 +275,8 @@ func getSimulationDuration(params []float64) (milliseconds int64) {
// sleepStrict sleeps strict duration with given params
// startTime is used to correct sleep duration caused by process time
-func sleepStrict(startTime time.Time, strictMilliseconds int64) {
+// ctx allows for cancellation during sleep
+func sleepStrict(ctx context.Context, startTime time.Time, strictMilliseconds int64) {
var elapsed int64
if !startTime.IsZero() {
elapsed = time.Since(startTime).Milliseconds()
@@ -294,7 +296,18 @@ func sleepStrict(startTime time.Time, strictMilliseconds int64) {
Int64("elapsed(ms)", elapsed).
Int64("strictSleep(ms)", strictMilliseconds).
Msg("sleep remaining duration time")
- time.Sleep(time.Duration(dur) * time.Millisecond)
+
+ // Use context-aware sleep instead of blocking time.Sleep
+ select {
+ case <-time.After(time.Duration(dur) * time.Millisecond):
+ // Normal completion
+ log.Debug().Int64("duration_ms", dur).Msg("strict sleep completed normally")
+ case <-ctx.Done():
+ // Interrupted by context cancellation (e.g., CTRL+C)
+ log.Info().Int64("planned_duration_ms", dur).
+ Msg("strict sleep interrupted by context cancellation")
+ return
+ }
}
// global file lock
diff --git a/uixt/driver_utils_test.go b/uixt/driver_utils_test.go
index 3a6311fa..ee81644c 100644
--- a/uixt/driver_utils_test.go
+++ b/uixt/driver_utils_test.go
@@ -1,6 +1,7 @@
package uixt
import (
+ "context"
"strings"
"testing"
"time"
@@ -30,8 +31,9 @@ func TestGetSimulationDuration(t *testing.T) {
}
func TestSleepStrict(t *testing.T) {
+ ctx := context.Background()
startTime := time.Now()
- sleepStrict(startTime, 1230)
+ sleepStrict(ctx, startTime, 1230)
dur := time.Since(startTime).Milliseconds()
t.Log(dur)
if dur < 1230 || dur > 1300 {
diff --git a/uixt/mcp_server.go b/uixt/mcp_server.go
index 72221448..dd30ec4b 100644
--- a/uixt/mcp_server.go
+++ b/uixt/mcp_server.go
@@ -226,6 +226,68 @@ func extractActionOptionsToArguments(actionOptions []option.ActionOption, argume
if tempOptions.CVService != "" {
arguments["cv_service"] = tempOptions.CVService
}
+
+ // Add UI/CV related options
+ if len(tempOptions.ScreenShotWithUITypes) > 0 {
+ arguments["screenshot_with_ui_types"] = tempOptions.ScreenShotWithUITypes
+ }
+ if len(tempOptions.Scope) == 4 {
+ arguments["scope"] = tempOptions.Scope
+ }
+ if len(tempOptions.AbsScope) == 4 {
+ arguments["abs_scope"] = tempOptions.AbsScope
+ }
+
+ // Add other screenshot options
+ if tempOptions.ScreenShotWithOCR {
+ arguments["screenshot_with_ocr"] = true
+ }
+ if tempOptions.ScreenShotWithUpload {
+ arguments["screenshot_with_upload"] = true
+ }
+ if tempOptions.ScreenShotWithLiveType {
+ arguments["screenshot_with_live_type"] = true
+ }
+ if tempOptions.ScreenShotWithLivePopularity {
+ arguments["screenshot_with_live_popularity"] = true
+ }
+ if tempOptions.ScreenShotWithClosePopups {
+ arguments["screenshot_with_close_popups"] = true
+ }
+ if tempOptions.ScreenShotWithOCRCluster != "" {
+ arguments["screenshot_with_ocr_cluster"] = tempOptions.ScreenShotWithOCRCluster
+ }
+ if tempOptions.ScreenShotFileName != "" {
+ arguments["screenshot_file_name"] = tempOptions.ScreenShotFileName
+ }
+
+ // Add tap/swipe offset options
+ if len(tempOptions.TapOffset) == 2 {
+ arguments["tap_offset"] = tempOptions.TapOffset
+ }
+ if len(tempOptions.SwipeOffset) == 4 {
+ arguments["swipe_offset"] = tempOptions.SwipeOffset
+ }
+ if len(tempOptions.OffsetRandomRange) == 2 {
+ arguments["offset_random_range"] = tempOptions.OffsetRandomRange
+ }
+
+ // Add string options
+ if tempOptions.Text != "" {
+ arguments["text"] = tempOptions.Text
+ }
+ if tempOptions.ImagePath != "" {
+ arguments["image_path"] = tempOptions.ImagePath
+ }
+ if tempOptions.AppName != "" {
+ arguments["app_name"] = tempOptions.AppName
+ }
+ if tempOptions.PackageName != "" {
+ arguments["package_name"] = tempOptions.PackageName
+ }
+ if tempOptions.Selector != "" {
+ arguments["selector"] = tempOptions.Selector
+ }
}
func getFloat64ValueOrDefault(value float64, defaultValue float64) float64 {
diff --git a/uixt/mcp_tools_utility.go b/uixt/mcp_tools_utility.go
index 40699295..f7877394 100644
--- a/uixt/mcp_tools_utility.go
+++ b/uixt/mcp_tools_utility.go
@@ -68,7 +68,15 @@ func (t *ToolSleep) Implement() server.ToolHandlerFunc {
return nil, fmt.Errorf("unsupported sleep duration type: %T", v)
}
- time.Sleep(duration)
+ // Use context-aware sleep instead of blocking time.Sleep
+ select {
+ case <-time.After(duration):
+ // Normal completion
+ case <-ctx.Done():
+ // Interrupted by context cancellation (e.g., CTRL+C)
+ log.Warn().Msg("sleep interrupted by cancellation")
+ return nil, fmt.Errorf("sleep interrupted: %w", ctx.Err())
+ }
message := fmt.Sprintf("Successfully slept for %v seconds", actualSeconds)
returnData := ToolSleep{
@@ -120,7 +128,18 @@ func (t *ToolSleepMS) Implement() server.ToolHandlerFunc {
// Sleep MS action logic
log.Info().Int64("milliseconds", unifiedReq.Milliseconds).Msg("sleeping in milliseconds")
- time.Sleep(time.Duration(unifiedReq.Milliseconds) * time.Millisecond)
+
+ duration := time.Duration(unifiedReq.Milliseconds) * time.Millisecond
+
+ // Use context-aware sleep instead of blocking time.Sleep
+ select {
+ case <-time.After(duration):
+ // Normal completion
+ case <-ctx.Done():
+ // Interrupted by context cancellation (e.g., CTRL+C)
+ log.Warn().Msg("sleep interrupted by cancellation")
+ return nil, fmt.Errorf("sleep interrupted: %w", ctx.Err())
+ }
message := fmt.Sprintf("Successfully slept for %d milliseconds", unifiedReq.Milliseconds)
returnData := ToolSleepMS{Milliseconds: unifiedReq.Milliseconds}
@@ -170,8 +189,8 @@ func (t *ToolSleepRandom) Implement() server.ToolHandlerFunc {
return nil, err
}
- // Sleep random action logic
- sleepStrict(time.Now(), getSimulationDuration(unifiedReq.Params))
+ // Sleep random action logic with context support
+ sleepStrict(ctx, time.Now(), getSimulationDuration(unifiedReq.Params))
message := fmt.Sprintf("Successfully slept for random duration with params: %v", unifiedReq.Params)
returnData := ToolSleepRandom{Params: unifiedReq.Params}
diff --git a/uixt/option/action.go b/uixt/option/action.go
index 5007f61c..54107dad 100644
--- a/uixt/option/action.go
+++ b/uixt/option/action.go
@@ -440,9 +440,6 @@ func NewActionOptions(opts ...ActionOption) *ActionOptions {
for _, option := range opts {
option(actionOptions)
}
- if actionOptions.MaxRetryTimes == 0 {
- actionOptions.MaxRetryTimes = 1
- }
return actionOptions
}
diff --git a/uixt/sdk.go b/uixt/sdk.go
index cdcbf65b..5b7bf67f 100644
--- a/uixt/sdk.go
+++ b/uixt/sdk.go
@@ -4,7 +4,6 @@ import (
"context"
"fmt"
"strings"
- "time"
"github.com/httprunner/httprunner/v5/uixt/ai"
"github.com/httprunner/httprunner/v5/uixt/option"
@@ -113,34 +112,23 @@ func (c *MCPClient4XTDriver) GetToolByAction(actionName option.ActionName) Actio
return c.Server.GetToolByAction(actionName)
}
-func (dExt *XTDriver) ExecuteAction(ctx context.Context, action option.MobileAction) ([]*SubActionResult, error) {
- subActionStartTime := time.Now()
-
+func (dExt *XTDriver) ExecuteAction(ctx context.Context, action option.MobileAction) (SessionData, error) {
// Find the corresponding tool for this action method
tool := dExt.client.Server.GetToolByAction(action.Method)
if tool == nil {
- return nil, fmt.Errorf("no tool found for action method: %s", action.Method)
+ return SessionData{}, fmt.Errorf("no tool found for action method: %s", action.Method)
}
// Use the tool's own conversion method
req, err := tool.ConvertActionToCallToolRequest(action)
if err != nil {
- return nil, fmt.Errorf("failed to convert action to MCP tool call: %w", err)
- }
-
- // Create sub-action result
- subActionResult := &SubActionResult{
- ActionName: string(action.Method),
- Arguments: action.Params,
- StartTime: subActionStartTime.UnixMilli(),
+ return SessionData{}, fmt.Errorf("failed to convert action to MCP tool call: %w", err)
}
// Execute via MCP tool
result, err := dExt.client.CallTool(ctx, req)
- subActionResult.Elapsed = time.Since(subActionStartTime).Milliseconds()
if err != nil {
- subActionResult.Error = err
- return []*SubActionResult{subActionResult}, fmt.Errorf("MCP tool call failed: %w", err)
+ return SessionData{}, fmt.Errorf("MCP tool call failed: %w", err)
}
// Check if the tool execution had business logic errors
@@ -152,16 +140,15 @@ func (dExt *XTDriver) ExecuteAction(ctx context.Context, action option.MobileAct
errMsg = fmt.Sprintf("invoke tool %s failed", tool.Name())
}
err := errors.New(errMsg)
- subActionResult.Error = err
- return []*SubActionResult{subActionResult}, err
+ return SessionData{}, err
}
- // For regular actions, collect session data and return single sub-action result
- subActionResult.SessionData = dExt.GetSession().GetData(true) // reset after getting data
+ // For regular actions, collect session data and return it directly
+ sessionData := dExt.GetSession().GetData(true) // reset after getting data
log.Debug().Str("tool", string(tool.Name())).
- Msg("execute action via MCP tool")
- return []*SubActionResult{subActionResult}, nil
+ Msg("executed action via MCP tool")
+ return sessionData, nil
}
// NewDeviceWithDefault is a helper function to create a device with default options