From ed5d3127cbb986d243d07e0444cfba3397c5bd34 Mon Sep 17 00:00:00 2001 From: "lilong.129" Date: Thu, 19 Jun 2025 21:51:38 +0800 Subject: [PATCH] fix: add missing action options --- internal/version/VERSION | 2 +- uixt/ai/cv_vedem.go | 1 + uixt/driver_ext_ai.go | 2 +- uixt/driver_ext_screenshot.go | 4 ++- uixt/driver_ext_tap.go | 3 +- uixt/mcp_server.go | 62 +++++++++++++++++++++++++++++++++++ uixt/option/action.go | 3 -- uixt/sdk.go | 2 +- 8 files changed, 71 insertions(+), 8 deletions(-) diff --git a/internal/version/VERSION b/internal/version/VERSION index 07532b93..dddcc207 100644 --- a/internal/version/VERSION +++ b/internal/version/VERSION @@ -1 +1 @@ -v5.0.0-beta-2506191446 +v5.0.0-beta-2506192157 diff --git a/uixt/ai/cv_vedem.go b/uixt/ai/cv_vedem.go index 532ea301..2cb4a833 100644 --- a/uixt/ai/cv_vedem.go +++ b/uixt/ai/cv_vedem.go @@ -63,6 +63,7 @@ func (s *vedemCVService) ReadFromPath(imagePath string, opts ...option.ActionOpt func (s *vedemCVService) ReadFromBuffer(imageBuf *bytes.Buffer, opts ...option.ActionOption) ( imageResult *CVResult, err error) { actionOptions := option.NewActionOptions(opts...) + log.Debug().Interface("options", actionOptions).Msg("vedem.ReadFromBuffer") screenshotActions := actionOptions.List() if len(screenshotActions) == 0 { // skip diff --git a/uixt/driver_ext_ai.go b/uixt/driver_ext_ai.go index 72dc58f9..9c5c3e36 100644 --- a/uixt/driver_ext_ai.go +++ b/uixt/driver_ext_ai.go @@ -125,7 +125,7 @@ func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...op planningResult.Elapsed = time.Since(planningStartTime).Milliseconds() allPlannings = append(allPlannings, planningResult) - if options.MaxRetryTimes > 1 && attempt >= options.MaxRetryTimes { + if options.MaxRetryTimes > 0 && attempt >= options.MaxRetryTimes { return allPlannings, errors.New("reached max retry times") } } diff --git a/uixt/driver_ext_screenshot.go b/uixt/driver_ext_screenshot.go index ae3e4238..53ee9fa6 100644 --- a/uixt/driver_ext_screenshot.go +++ b/uixt/driver_ext_screenshot.go @@ -136,7 +136,9 @@ func (dExt *XTDriver) createScreenshotWithSession(opts ...option.ActionOption) ( screenResult.Popup.ClosePoints = append(screenResult.Popup.ClosePoints, closeArea.Center()) } } - logger.Str("imageUrl", screenResult.UploadedURL) + if screenResult.UploadedURL != "" { + logger.Str("imageUrl", screenResult.UploadedURL) + } } } diff --git a/uixt/driver_ext_tap.go b/uixt/driver_ext_tap.go index dcb08753..0b36afcd 100644 --- a/uixt/driver_ext_tap.go +++ b/uixt/driver_ext_tap.go @@ -10,6 +10,7 @@ import ( func (dExt *XTDriver) TapByOCR(text string, opts ...option.ActionOption) error { actionOptions := option.NewActionOptions(opts...) + log.Info().Str("text", text).Interface("options", actionOptions).Msg("TapByOCR") if actionOptions.ScreenShotFileName == "" { opts = append(opts, option.WithScreenShotFileName(fmt.Sprintf("tap_by_ocr_%s", text))) } @@ -36,7 +37,7 @@ func (dExt *XTDriver) TapByOCR(text string, opts ...option.ActionOption) error { func (dExt *XTDriver) TapByCV(opts ...option.ActionOption) error { actionOptions := option.NewActionOptions(opts...) - + log.Info().Interface("options", actionOptions).Msg("TapByCV") uiResult, err := dExt.FindUIResult(opts...) if err != nil { if actionOptions.IgnoreNotFoundError { diff --git a/uixt/mcp_server.go b/uixt/mcp_server.go index 72221448..dd30ec4b 100644 --- a/uixt/mcp_server.go +++ b/uixt/mcp_server.go @@ -226,6 +226,68 @@ func extractActionOptionsToArguments(actionOptions []option.ActionOption, argume if tempOptions.CVService != "" { arguments["cv_service"] = tempOptions.CVService } + + // Add UI/CV related options + if len(tempOptions.ScreenShotWithUITypes) > 0 { + arguments["screenshot_with_ui_types"] = tempOptions.ScreenShotWithUITypes + } + if len(tempOptions.Scope) == 4 { + arguments["scope"] = tempOptions.Scope + } + if len(tempOptions.AbsScope) == 4 { + arguments["abs_scope"] = tempOptions.AbsScope + } + + // Add other screenshot options + if tempOptions.ScreenShotWithOCR { + arguments["screenshot_with_ocr"] = true + } + if tempOptions.ScreenShotWithUpload { + arguments["screenshot_with_upload"] = true + } + if tempOptions.ScreenShotWithLiveType { + arguments["screenshot_with_live_type"] = true + } + if tempOptions.ScreenShotWithLivePopularity { + arguments["screenshot_with_live_popularity"] = true + } + if tempOptions.ScreenShotWithClosePopups { + arguments["screenshot_with_close_popups"] = true + } + if tempOptions.ScreenShotWithOCRCluster != "" { + arguments["screenshot_with_ocr_cluster"] = tempOptions.ScreenShotWithOCRCluster + } + if tempOptions.ScreenShotFileName != "" { + arguments["screenshot_file_name"] = tempOptions.ScreenShotFileName + } + + // Add tap/swipe offset options + if len(tempOptions.TapOffset) == 2 { + arguments["tap_offset"] = tempOptions.TapOffset + } + if len(tempOptions.SwipeOffset) == 4 { + arguments["swipe_offset"] = tempOptions.SwipeOffset + } + if len(tempOptions.OffsetRandomRange) == 2 { + arguments["offset_random_range"] = tempOptions.OffsetRandomRange + } + + // Add string options + if tempOptions.Text != "" { + arguments["text"] = tempOptions.Text + } + if tempOptions.ImagePath != "" { + arguments["image_path"] = tempOptions.ImagePath + } + if tempOptions.AppName != "" { + arguments["app_name"] = tempOptions.AppName + } + if tempOptions.PackageName != "" { + arguments["package_name"] = tempOptions.PackageName + } + if tempOptions.Selector != "" { + arguments["selector"] = tempOptions.Selector + } } func getFloat64ValueOrDefault(value float64, defaultValue float64) float64 { diff --git a/uixt/option/action.go b/uixt/option/action.go index 5007f61c..54107dad 100644 --- a/uixt/option/action.go +++ b/uixt/option/action.go @@ -440,9 +440,6 @@ func NewActionOptions(opts ...ActionOption) *ActionOptions { for _, option := range opts { option(actionOptions) } - if actionOptions.MaxRetryTimes == 0 { - actionOptions.MaxRetryTimes = 1 - } return actionOptions } diff --git a/uixt/sdk.go b/uixt/sdk.go index cdcbf65b..2a0752ed 100644 --- a/uixt/sdk.go +++ b/uixt/sdk.go @@ -160,7 +160,7 @@ func (dExt *XTDriver) ExecuteAction(ctx context.Context, action option.MobileAct subActionResult.SessionData = dExt.GetSession().GetData(true) // reset after getting data log.Debug().Str("tool", string(tool.Name())). - Msg("execute action via MCP tool") + Msg("executed action via MCP tool") return []*SubActionResult{subActionResult}, nil }