diff --git a/internal/version/VERSION b/internal/version/VERSION index 2ff7b1e4..b0b62b0c 100644 --- a/internal/version/VERSION +++ b/internal/version/VERSION @@ -1 +1 @@ -v5.0.0-beta-2505250015 +v5.0.0-beta-2505250810 diff --git a/server/uixt.go b/server/uixt.go index 52f17764..71f11f0a 100644 --- a/server/uixt.go +++ b/server/uixt.go @@ -19,7 +19,7 @@ func (r *Router) uixtActionHandler(c *gin.Context) { return } - if err = dExt.DoAction(req); err != nil { + if err = dExt.ExecuteAction(req); err != nil { log.Err(err).Interface("action", req). Msg("exec uixt action failed") RenderError(c, err) @@ -42,7 +42,7 @@ func (r *Router) uixtActionsHandler(c *gin.Context) { } for _, action := range actions { - if err = dExt.DoAction(action); err != nil { + if err = dExt.ExecuteAction(action); err != nil { log.Err(err).Interface("action", action). Msg("exec uixt action failed") RenderError(c, err) diff --git a/step_ui.go b/step_ui.go index 9611f6dd..c4c49818 100644 --- a/step_ui.go +++ b/step_ui.go @@ -809,7 +809,7 @@ func runStepMobileUI(s *SessionRunner, step IStep) (stepResult *StepResult, err continue } - err = uiDriver.DoAction(action) + err = uiDriver.ExecuteAction(action) actionResult.Elapsed = time.Since(actionStartTime).Milliseconds() stepResult.Actions = append(stepResult.Actions, actionResult) if err != nil { diff --git a/uixt/driver_action.go b/uixt/driver_action.go index f39cb1c2..16c7f9cc 100644 --- a/uixt/driver_action.go +++ b/uixt/driver_action.go @@ -1,16 +1,6 @@ package uixt import ( - "encoding/json" - "fmt" - "time" - - "github.com/pkg/errors" - "github.com/rs/zerolog" - "github.com/rs/zerolog/log" - - "github.com/httprunner/httprunner/v5/code" - "github.com/httprunner/httprunner/v5/internal/builtin" "github.com/httprunner/httprunner/v5/uixt/option" ) @@ -31,261 +21,3 @@ func (ma MobileAction) GetOptions() []option.ActionOption { actionOptionList = append(actionOptionList, ma.ActionOptions.Options()...) return actionOptionList } - -// TODO: merge to uixt MCP Server -func (dExt *XTDriver) DoAction(action MobileAction) (err error) { - actionStartTime := time.Now() - defer func() { - var logger *zerolog.Event - if err != nil { - logger = log.Error().Bool("success", false).Err(err) - } else { - logger = log.Debug().Bool("success", true) - } - logger = logger. - Str("method", string(action.Method)). - Interface("params", action.Params). - Int64("elapsed(ms)", time.Since(actionStartTime).Milliseconds()) - logger.Msg("exec uixt action") - }() - - switch action.Method { - case option.ACTION_WebLoginNoneUI: - if len(action.Params.([]interface{})) == 4 { - driver, ok := dExt.IDriver.(*BrowserDriver) - if !ok { - return errors.New("invalid browser driver") - } - params := action.Params.([]interface{}) - _, err = driver.LoginNoneUI(params[0].(string), params[1].(string), params[2].(string), params[3].(string)) - return err - } - return fmt.Errorf("invalid %s params: %v", option.ACTION_WebLoginNoneUI, action.Params) - case option.ACTION_AppInstall: - if app, ok := action.Params.(string); ok { - if err = dExt.GetDevice().Install(app, - option.WithRetryTimes(action.MaxRetryTimes)); err != nil { - return errors.Wrap(err, "failed to install app") - } - } - case option.ACTION_AppUninstall: - if packageName, ok := action.Params.(string); ok { - if err = dExt.GetDevice().Uninstall(packageName); err != nil { - return errors.Wrap(err, "failed to uninstall app") - } - } - case option.ACTION_AppClear: - if packageName, ok := action.Params.(string); ok { - if err = dExt.AppClear(packageName); err != nil { - return errors.Wrap(err, "failed to clear app") - } - } - case option.ACTION_AppLaunch: - if bundleId, ok := action.Params.(string); ok { - return dExt.AppLaunch(bundleId) - } - return fmt.Errorf("invalid %s params, should be bundleId(string), got %v", - option.ACTION_AppLaunch, action.Params) - case option.ACTION_SwipeToTapApp: - if appName, ok := action.Params.(string); ok { - return dExt.SwipeToTapApp(appName, action.GetOptions()...) - } - return fmt.Errorf("invalid %s params, should be app name(string), got %v", - option.ACTION_SwipeToTapApp, action.Params) - case option.ACTION_SwipeToTapText: - if text, ok := action.Params.(string); ok { - return dExt.SwipeToTapTexts([]string{text}, action.GetOptions()...) - } - return fmt.Errorf("invalid %s params, should be app text(string), got %v", - option.ACTION_SwipeToTapText, action.Params) - case option.ACTION_SwipeToTapTexts: - if texts, ok := action.Params.([]string); ok { - return dExt.SwipeToTapTexts(texts, action.GetOptions()...) - } - if texts, err := builtin.ConvertToStringSlice(action.Params); err == nil { - return dExt.SwipeToTapTexts(texts, action.GetOptions()...) - } - return fmt.Errorf("invalid %s params: %v", option.ACTION_SwipeToTapTexts, action.Params) - case option.ACTION_AppTerminate: - if bundleId, ok := action.Params.(string); ok { - success, err := dExt.AppTerminate(bundleId) - if err != nil { - return errors.Wrap(err, "failed to terminate app") - } - if !success { - log.Warn().Str("bundleId", bundleId).Msg("app was not running") - } - return nil - } - return fmt.Errorf("app_terminate params should be bundleId(string), got %v", action.Params) - case option.ACTION_Home: - return dExt.Home() - case option.ACTION_SecondaryClick: - if params, err := builtin.ConvertToFloat64Slice(action.Params); err == nil { - if len(params) != 2 { - return fmt.Errorf("invalid tap location params: %v", params) - } - x, y := params[0], params[1] - return dExt.SecondaryClick(x, y) - } - return fmt.Errorf("invalid %s params: %v", option.ACTION_SecondaryClick, action.Params) - case option.ACTION_HoverBySelector: - if selector, ok := action.Params.(string); ok { - return dExt.HoverBySelector(selector, action.GetOptions()...) - } - return fmt.Errorf("invalid %s params: %v", option.ACTION_HoverBySelector, action.Params) - case option.ACTION_TapBySelector: - if selector, ok := action.Params.(string); ok { - return dExt.TapBySelector(selector, action.GetOptions()...) - } - return fmt.Errorf("invalid %s params: %v", option.ACTION_TapBySelector, action.Params) - case option.ACTION_SecondaryClickBySelector: - if selector, ok := action.Params.(string); ok { - return dExt.SecondaryClickBySelector(selector, action.GetOptions()...) - } - return fmt.Errorf("invalid %s params: %v", option.ACTION_SecondaryClickBySelector, action.Params) - case option.ACTION_WebCloseTab: - if param, ok := action.Params.(json.Number); ok { - paramInt64, _ := param.Int64() - return dExt.IDriver.(*BrowserDriver).CloseTab(int(paramInt64)) - } else if param, ok := action.Params.(int64); ok { - return dExt.IDriver.(*BrowserDriver).CloseTab(int(param)) - } else { - return dExt.IDriver.(*BrowserDriver).CloseTab(action.Params.(int)) - } - // return fmt.Errorf("invalid %s params: %v", ACTION_WebCloseTab, action.Params) - case option.ACTION_SetIme: - if ime, ok := action.Params.(string); ok { - err = dExt.SetIme(ime) - if err != nil { - return errors.Wrap(err, "failed to set ime") - } - return nil - } - case option.ACTION_GetSource: - if packageName, ok := action.Params.(string); ok { - _, err = dExt.Source(option.WithProcessName(packageName)) - if err != nil { - return errors.Wrap(err, "failed to set ime") - } - return nil - } - case option.ACTION_TapXY: - if params, err := builtin.ConvertToFloat64Slice(action.Params); err == nil { - // relative x,y of window size: [0.5, 0.5] - if len(params) != 2 { - return fmt.Errorf("invalid tap location params: %v", params) - } - x, y := params[0], params[1] - return dExt.TapXY(x, y, action.GetOptions()...) - } - return fmt.Errorf("invalid %s params: %v", option.ACTION_TapXY, action.Params) - case option.ACTION_TapAbsXY: - if params, err := builtin.ConvertToFloat64Slice(action.Params); err == nil { - // absolute coordinates x,y of window size: [100, 300] - if len(params) != 2 { - return fmt.Errorf("invalid tap location params: %v", params) - } - x, y := params[0], params[1] - return dExt.TapAbsXY(x, y, action.GetOptions()...) - } - return fmt.Errorf("invalid %s params: %v", option.ACTION_TapAbsXY, action.Params) - case option.ACTION_TapByOCR: - if ocrText, ok := action.Params.(string); ok { - return dExt.TapByOCR(ocrText, action.GetOptions()...) - } - return fmt.Errorf("invalid %s params: %v", option.ACTION_TapByOCR, action.Params) - case option.ACTION_TapByCV: - actionOptions := option.NewActionOptions(action.GetOptions()...) - if len(actionOptions.ScreenShotWithUITypes) > 0 { - return dExt.TapByCV(action.GetOptions()...) - } - return fmt.Errorf("invalid %s params: %v", option.ACTION_TapByCV, action.Params) - case option.ACTION_DoubleTapXY: - if params, err := builtin.ConvertToFloat64Slice(action.Params); err == nil { - // relative x,y of window size: [0.5, 0.5] - if len(params) != 2 { - return fmt.Errorf("invalid tap location params: %v", params) - } - x, y := params[0], params[1] - return dExt.DoubleTap(x, y) - } - return fmt.Errorf("invalid %s params: %v", option.ACTION_DoubleTapXY, action.Params) - case option.ACTION_Swipe: - params := action.Params - swipeAction := prepareSwipeAction(dExt, params, action.GetOptions()...) - return swipeAction(dExt) - case option.ACTION_Input: - // input text on current active element - // append \n to send text with enter - // send \b\b\b to delete 3 chars - param := fmt.Sprintf("%v", action.Params) - return dExt.Input(param) - case option.ACTION_Back: - return dExt.Back() - case option.ACTION_Sleep: - if param, ok := action.Params.(json.Number); ok { - seconds, _ := param.Float64() - time.Sleep(time.Duration(seconds*1000) * time.Millisecond) - return nil - } else if param, ok := action.Params.(float64); ok { - time.Sleep(time.Duration(param*1000) * time.Millisecond) - return nil - } else if param, ok := action.Params.(int64); ok { - time.Sleep(time.Duration(param) * time.Second) - return nil - } else if sd, ok := action.Params.(SleepConfig); ok { - sleepStrict(sd.StartTime, int64(sd.Seconds*1000)) - return nil - } else if param, ok := action.Params.(string); ok { - seconds, err := builtin.ConvertToFloat64(param) - if err != nil { - return errors.Wrapf(err, "invalid sleep params: %v(%T)", action.Params, action.Params) - } - time.Sleep(time.Duration(seconds*1000) * time.Millisecond) - return nil - } - return fmt.Errorf("invalid sleep params: %v(%T)", action.Params, action.Params) - case option.ACTION_SleepMS: - if param, ok := action.Params.(json.Number); ok { - milliseconds, _ := param.Int64() - time.Sleep(time.Duration(milliseconds) * time.Millisecond) - return nil - } else if param, ok := action.Params.(int64); ok { - time.Sleep(time.Duration(param) * time.Millisecond) - return nil - } else if sd, ok := action.Params.(SleepConfig); ok { - sleepStrict(sd.StartTime, sd.Milliseconds) - return nil - } - return fmt.Errorf("invalid sleep ms params: %v(%T)", action.Params, action.Params) - case option.ACTION_SleepRandom: - if params, err := builtin.ConvertToFloat64Slice(action.Params); err == nil { - sleepStrict(time.Now(), getSimulationDuration(params)) - return nil - } - return fmt.Errorf("invalid sleep random params: %v(%T)", action.Params, action.Params) - case option.ACTION_ScreenShot: - // take screenshot - log.Info().Msg("take screenshot for current screen") - _, err := dExt.GetScreenResult(action.GetScreenShotOptions()...) - return err - case option.ACTION_ClosePopups: - return dExt.ClosePopupsHandler() - case option.ACTION_CallFunction: - if funcDesc, ok := action.Params.(string); ok { - return dExt.Call(funcDesc, action.Fn, action.GetOptions()...) - } - return fmt.Errorf("invalid function description: %v", action.Params) - case option.ACTION_AIAction: - if prompt, ok := action.Params.(string); ok { - return dExt.AIAction(prompt, action.GetOptions()...) - } - return fmt.Errorf("invalid %s params: %v", option.ACTION_AIAction, action.Params) - default: - log.Warn().Str("action", string(action.Method)).Msg("action not implemented") - return errors.Wrapf(code.InvalidCaseError, - "UI action %v not implemented", action.Method) - } - return nil -} diff --git a/uixt/mcp_server.go b/uixt/mcp_server.go index 7e6958a3..ee733ab8 100644 --- a/uixt/mcp_server.go +++ b/uixt/mcp_server.go @@ -6,17 +6,38 @@ import ( "fmt" "strings" "sync" + "time" "github.com/danielpaulus/go-ios/ios" "github.com/mark3labs/mcp-go/mcp" "github.com/mark3labs/mcp-go/server" "github.com/rs/zerolog/log" + "github.com/httprunner/httprunner/v5/internal/builtin" "github.com/httprunner/httprunner/v5/internal/version" "github.com/httprunner/httprunner/v5/pkg/gadb" "github.com/httprunner/httprunner/v5/uixt/option" + "github.com/httprunner/httprunner/v5/uixt/types" ) +// MCPServer4XTDriver provides MCP (Model Context Protocol) interface for XTDriver. +// +// This implementation adopts a pure ActionTool-style architecture where: +// - Each MCP tool is implemented as a struct that implements the ActionTool interface +// - Operation logic is directly embedded in each tool's Implement() method +// - No intermediate action methods or coupling between tools +// - Complete decoupling from the original large switch-case DoAction method +// +// Architecture: +// MCP Request -> ActionTool.Implement() -> Direct Driver Method Call +// +// Benefits: +// - True ActionTool interface consistency across all tools +// - Complete decoupling with no method interdependencies +// - Unified code organization in a single file +// - Simplified error handling and logging per tool +// - Easy extensibility for new features + // NewMCPServer creates a new MCP server for XTDriver and registers all tools. func NewMCPServer() *MCPServer4XTDriver { mcpServer := server.NewMCPServer( @@ -104,6 +125,44 @@ func (ums *MCPServer4XTDriver) registerTools() { // ScreenShot Tool ums.registerTool(&ToolScreenShot{}) + + // Home Tool + ums.registerTool(&ToolHome{}) + + // Back Tool + ums.registerTool(&ToolBack{}) + + // Input Tool + ums.registerTool(&ToolInput{}) + + // Sleep Tool + ums.registerTool(&ToolSleep{}) + + // Register all missing tools from DoAction + ums.registerTool(&ToolWebLoginNoneUI{}) + ums.registerTool(&ToolAppInstall{}) + ums.registerTool(&ToolAppUninstall{}) + ums.registerTool(&ToolAppClear{}) + ums.registerTool(&ToolSwipeToTapApp{}) + ums.registerTool(&ToolSwipeToTapText{}) + ums.registerTool(&ToolSwipeToTapTexts{}) + ums.registerTool(&ToolSecondaryClick{}) + ums.registerTool(&ToolHoverBySelector{}) + ums.registerTool(&ToolTapBySelector{}) + ums.registerTool(&ToolSecondaryClickBySelector{}) + ums.registerTool(&ToolWebCloseTab{}) + ums.registerTool(&ToolSetIme{}) + ums.registerTool(&ToolGetSource{}) + ums.registerTool(&ToolTapAbsXY{}) + ums.registerTool(&ToolTapByOCR{}) + ums.registerTool(&ToolTapByCV{}) + ums.registerTool(&ToolDoubleTapXY{}) + ums.registerTool(&ToolSwipeAdvanced{}) + ums.registerTool(&ToolSleepMS{}) + ums.registerTool(&ToolSleepRandom{}) + ums.registerTool(&ToolClosePopups{}) + ums.registerTool(&ToolCallFunction{}) + ums.registerTool(&ToolAIAction{}) } func (ums *MCPServer4XTDriver) registerTool(tool ActionTool) { @@ -118,6 +177,7 @@ func (ums *MCPServer4XTDriver) registerTool(tool ActionTool) { log.Debug().Str("name", tool.Name()).Msg("register tool") } +// ActionTool interface defines the contract for MCP tools type ActionTool interface { Name() string Description() string @@ -218,7 +278,7 @@ func (t *ToolListPackages) Description() string { } func (t *ToolListPackages) Options() []mcp.ToolOption { - return option.NewMCPOptions(&option.TargetDeviceRequest{}) + return option.NewMCPOptions(option.TargetDeviceRequest{}) } func (t *ToolListPackages) Implement() toolCall { @@ -248,28 +308,33 @@ func (t *ToolLaunchApp) Description() string { } func (t *ToolLaunchApp) Options() []mcp.ToolOption { - return option.NewMCPOptions(&option.AppLaunchRequest{}) + return option.NewMCPOptions(option.AppLaunchRequest{}) } func (t *ToolLaunchApp) Implement() toolCall { return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { driverExt, err := setupXTDriver(ctx, request.Params.Arguments) if err != nil { - return nil, err + return nil, fmt.Errorf("setup driver failed: %w", err) } + var appLaunchReq option.AppLaunchRequest if err := mapToStruct(request.Params.Arguments, &appLaunchReq); err != nil { - return mcp.NewToolResultError("parse parameters error: " + err.Error()), nil + return nil, fmt.Errorf("parse parameters error: %w", err) } - packageName := appLaunchReq.PackageName - if packageName == "" { - return mcp.NewToolResultError("package_name is required"), nil + + if appLaunchReq.PackageName == "" { + return nil, fmt.Errorf("package_name is required") } - err = driverExt.AppLaunch(packageName) + + // Launch app action logic + log.Info().Str("packageName", appLaunchReq.PackageName).Msg("launching app") + err = driverExt.AppLaunch(appLaunchReq.PackageName) if err != nil { - return mcp.NewToolResultError("Launch app failed: " + err.Error()), nil + return mcp.NewToolResultError(fmt.Sprintf("Launch app failed: %s", err.Error())), nil } - return mcp.NewToolResultText(fmt.Sprintf("Launched app success: %s", packageName)), nil + + return mcp.NewToolResultText(fmt.Sprintf("Successfully launched app: %s", appLaunchReq.PackageName)), nil } } @@ -285,28 +350,36 @@ func (t *ToolTerminateApp) Description() string { } func (t *ToolTerminateApp) Options() []mcp.ToolOption { - return option.NewMCPOptions(&option.AppTerminateRequest{}) + return option.NewMCPOptions(option.AppTerminateRequest{}) } func (t *ToolTerminateApp) Implement() toolCall { return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { driverExt, err := setupXTDriver(ctx, request.Params.Arguments) if err != nil { - return nil, err + return nil, fmt.Errorf("setup driver failed: %w", err) } + var appTerminateReq option.AppTerminateRequest if err := mapToStruct(request.Params.Arguments, &appTerminateReq); err != nil { - return mcp.NewToolResultError("parse parameters error: " + err.Error()), nil + return nil, fmt.Errorf("parse parameters error: %w", err) } - packageName := appTerminateReq.PackageName - if packageName == "" { - return mcp.NewToolResultError("package_name is required"), nil + + if appTerminateReq.PackageName == "" { + return nil, fmt.Errorf("package_name is required") } - _, err = driverExt.AppTerminate(packageName) + + // Terminate app action logic + log.Info().Str("packageName", appTerminateReq.PackageName).Msg("terminating app") + success, err := driverExt.AppTerminate(appTerminateReq.PackageName) if err != nil { - return mcp.NewToolResultError("Terminate app failed: " + err.Error()), nil + return mcp.NewToolResultError(fmt.Sprintf("Terminate app failed: %s", err.Error())), nil } - return mcp.NewToolResultText(fmt.Sprintf("Terminated app success: %s", packageName)), nil + if !success { + log.Warn().Str("packageName", appTerminateReq.PackageName).Msg("app was not running") + } + + return mcp.NewToolResultText(fmt.Sprintf("Successfully terminated app: %s", appTerminateReq.PackageName)), nil } } @@ -322,14 +395,14 @@ func (t *ToolGetScreenSize) Description() string { } func (t *ToolGetScreenSize) Options() []mcp.ToolOption { - return option.NewMCPOptions(&option.TargetDeviceRequest{}) + return option.NewMCPOptions(option.TargetDeviceRequest{}) } func (t *ToolGetScreenSize) Implement() toolCall { return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { driverExt, err := setupXTDriver(ctx, request.Params.Arguments) if err != nil { - return nil, err + return nil, fmt.Errorf("setup driver failed: %w", err) } screenSize, err := driverExt.IDriver.WindowSize() @@ -354,24 +427,29 @@ func (t *ToolPressButton) Description() string { } func (t *ToolPressButton) Options() []mcp.ToolOption { - return option.NewMCPOptions(&option.PressButtonRequest{}) + return option.NewMCPOptions(option.PressButtonRequest{}) } func (t *ToolPressButton) Implement() toolCall { return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { driverExt, err := setupXTDriver(ctx, request.Params.Arguments) if err != nil { - return nil, err + return nil, fmt.Errorf("setup driver failed: %w", err) } + var pressButtonReq option.PressButtonRequest if err := mapToStruct(request.Params.Arguments, &pressButtonReq); err != nil { - return mcp.NewToolResultError("parse parameters error: " + err.Error()), nil + return nil, fmt.Errorf("parse parameters error: %w", err) } - err = driverExt.PressButton(pressButtonReq.Button) + + // Press button action logic + log.Info().Str("button", string(pressButtonReq.Button)).Msg("pressing button") + err = driverExt.PressButton(types.DeviceButton(pressButtonReq.Button)) if err != nil { - return mcp.NewToolResultError("Press button failed: " + err.Error()), nil + return mcp.NewToolResultError(fmt.Sprintf("Press button failed: %s", err.Error())), nil } - return mcp.NewToolResultText(fmt.Sprintf("Pressed button: %s", pressButtonReq.Button)), nil + + return mcp.NewToolResultText(fmt.Sprintf("Successfully pressed button: %s", pressButtonReq.Button)), nil } } @@ -387,28 +465,34 @@ func (t *ToolTapXY) Description() string { } func (t *ToolTapXY) Options() []mcp.ToolOption { - return option.NewMCPOptions(&option.TapRequest{}) + return option.NewMCPOptions(option.TapRequest{}) } func (t *ToolTapXY) Implement() toolCall { return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { driverExt, err := setupXTDriver(ctx, request.Params.Arguments) if err != nil { - return mcp.NewToolResultError("Tap failed: " + err.Error()), nil + return nil, fmt.Errorf("setup driver failed: %w", err) } + var tapReq option.TapRequest if err := mapToStruct(request.Params.Arguments, &tapReq); err != nil { - return mcp.NewToolResultError("parse parameters error: " + err.Error()), nil + return nil, fmt.Errorf("parse parameters error: %w", err) } - err = driverExt.TapXY(tapReq.X, tapReq.Y, + + // Tap action logic + log.Info().Float64("x", tapReq.X).Float64("y", tapReq.Y).Msg("tapping at coordinates") + opts := []option.ActionOption{ option.WithDuration(tapReq.Duration), - option.WithPreMarkOperation(true)) - if err != nil { - return mcp.NewToolResultError("Tap failed: " + err.Error()), nil + option.WithPreMarkOperation(true), } - return mcp.NewToolResultText( - fmt.Sprintf("tap (%f,%f) success", tapReq.X, tapReq.Y), - ), nil + + err = driverExt.TapXY(tapReq.X, tapReq.Y, opts...) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("Tap failed: %s", err.Error())), nil + } + + return mcp.NewToolResultText(fmt.Sprintf("Successfully tapped at coordinates (%.2f, %.2f)", tapReq.X, tapReq.Y)), nil } } @@ -424,45 +508,62 @@ func (t *ToolSwipe) Description() string { } func (t *ToolSwipe) Options() []mcp.ToolOption { - return option.NewMCPOptions(&option.SwipeRequest{}) + return option.NewMCPOptions(option.SwipeRequest{}) } func (t *ToolSwipe) Implement() toolCall { return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { driverExt, err := setupXTDriver(ctx, request.Params.Arguments) if err != nil { - return mcp.NewToolResultError("Swipe failed: " + err.Error()), nil - } - var swipeReq option.SwipeRequest - if err := mapToStruct(request.Params.Arguments, &swipeReq); err != nil { - return mcp.NewToolResultError("parse parameters error: " + err.Error()), nil + return nil, fmt.Errorf("setup driver failed: %w", err) } - options := []option.ActionOption{ + var swipeReq option.SwipeRequest + if err := mapToStruct(request.Params.Arguments, &swipeReq); err != nil { + return nil, fmt.Errorf("parse parameters error: %w", err) + } + + // Swipe action logic + log.Info().Str("direction", swipeReq.Direction).Msg("performing swipe") + + // Validate direction + validDirections := []string{"up", "down", "left", "right"} + isValid := false + for _, validDir := range validDirections { + if swipeReq.Direction == validDir { + isValid = true + break + } + } + if !isValid { + return nil, fmt.Errorf("invalid swipe direction: %s, expected one of: %v", swipeReq.Direction, validDirections) + } + + opts := []option.ActionOption{ option.WithPreMarkOperation(true), option.WithDuration(swipeReq.Duration), option.WithPressDuration(swipeReq.PressDuration), } - // enum direction: up, down, left, right + // Convert direction to coordinates and perform swipe switch swipeReq.Direction { case "up": - err = driverExt.Swipe(0.5, 0.5, 0.5, 0.1, options...) + err = driverExt.Swipe(0.5, 0.5, 0.5, 0.1, opts...) case "down": - err = driverExt.Swipe(0.5, 0.5, 0.5, 0.9, options...) + err = driverExt.Swipe(0.5, 0.5, 0.5, 0.9, opts...) case "left": - err = driverExt.Swipe(0.5, 0.5, 0.1, 0.5, options...) + err = driverExt.Swipe(0.5, 0.5, 0.1, 0.5, opts...) case "right": - err = driverExt.Swipe(0.5, 0.5, 0.9, 0.5, options...) + err = driverExt.Swipe(0.5, 0.5, 0.9, 0.5, opts...) default: - return mcp.NewToolResultError(fmt.Sprintf("get unexpected swipe direction: %s", swipeReq.Direction)), nil + return mcp.NewToolResultError(fmt.Sprintf("Unexpected swipe direction: %s", swipeReq.Direction)), nil } + if err != nil { - return mcp.NewToolResultError("Swipe failed: " + err.Error()), nil + return mcp.NewToolResultError(fmt.Sprintf("Swipe failed: %s", err.Error())), nil } - return mcp.NewToolResultText( - fmt.Sprintf("swipe %s success", swipeReq.Direction), - ), nil + + return mcp.NewToolResultText(fmt.Sprintf("Successfully swiped %s", swipeReq.Direction)), nil } } @@ -478,32 +579,39 @@ func (t *ToolDrag) Description() string { } func (t *ToolDrag) Options() []mcp.ToolOption { - return option.NewMCPOptions(&option.DragRequest{}) + return option.NewMCPOptions(option.DragRequest{}) } func (t *ToolDrag) Implement() toolCall { return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { driverExt, err := setupXTDriver(ctx, request.Params.Arguments) if err != nil { - return nil, err + return nil, fmt.Errorf("setup driver failed: %w", err) } + var dragReq option.DragRequest if err := mapToStruct(request.Params.Arguments, &dragReq); err != nil { - return mcp.NewToolResultError("parse parameters error: " + err.Error()), nil + return nil, fmt.Errorf("parse parameters error: %w", err) } - actionOptions := []option.ActionOption{} + + opts := []option.ActionOption{} if dragReq.Duration > 0 { - actionOptions = append(actionOptions, option.WithDuration(dragReq.Duration/1000.0)) + opts = append(opts, option.WithDuration(dragReq.Duration/1000.0)) } - err = driverExt.Swipe(dragReq.FromX, dragReq.FromY, - dragReq.ToX, dragReq.ToY, actionOptions...) + + // Drag action logic + log.Info(). + Float64("fromX", dragReq.FromX).Float64("fromY", dragReq.FromY). + Float64("toX", dragReq.ToX).Float64("toY", dragReq.ToY). + Msg("performing drag") + + err = driverExt.Swipe(dragReq.FromX, dragReq.FromY, dragReq.ToX, dragReq.ToY, opts...) if err != nil { - return mcp.NewToolResultError("Swipe failed: " + err.Error()), nil + return mcp.NewToolResultError(fmt.Sprintf("Drag failed: %s", err.Error())), nil } - return mcp.NewToolResultText( - fmt.Sprintf("swipe (%f,%f)->(%f,%f) success", - dragReq.FromX, dragReq.FromY, dragReq.ToX, dragReq.ToY), - ), nil + + return mcp.NewToolResultText(fmt.Sprintf("Successfully dragged from (%.2f, %.2f) to (%.2f, %.2f)", + dragReq.FromX, dragReq.FromY, dragReq.ToX, dragReq.ToY)), nil } } @@ -519,7 +627,7 @@ func (t *ToolScreenShot) Description() string { } func (t *ToolScreenShot) Options() []mcp.ToolOption { - return option.NewMCPOptions(&option.TargetDeviceRequest{}) + return option.NewMCPOptions(option.TargetDeviceRequest{}) } func (t *ToolScreenShot) Implement() toolCall { @@ -636,3 +744,1091 @@ func mapToStruct(m map[string]interface{}, out interface{}) error { } return json.Unmarshal(b, out) } + +// ToolHome implements the home tool call. +type ToolHome struct{} + +func (t *ToolHome) Name() string { + return "home" +} + +func (t *ToolHome) Description() string { + return "Press the home button on the device" +} + +func (t *ToolHome) Options() []mcp.ToolOption { + return option.NewMCPOptions(option.TargetDeviceRequest{}) +} + +func (t *ToolHome) Implement() toolCall { + return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + driverExt, err := setupXTDriver(ctx, request.Params.Arguments) + if err != nil { + return nil, fmt.Errorf("setup driver failed: %w", err) + } + + // Home action logic + log.Info().Msg("pressing home button") + err = driverExt.Home() + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("Home button press failed: %s", err.Error())), nil + } + + return mcp.NewToolResultText("Successfully pressed home button"), nil + } +} + +// ToolBack implements the back tool call. +type ToolBack struct{} + +func (t *ToolBack) Name() string { + return "back" +} + +func (t *ToolBack) Description() string { + return "Press the back button on the device" +} + +func (t *ToolBack) Options() []mcp.ToolOption { + return option.NewMCPOptions(option.TargetDeviceRequest{}) +} + +func (t *ToolBack) Implement() toolCall { + return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + driverExt, err := setupXTDriver(ctx, request.Params.Arguments) + if err != nil { + return nil, fmt.Errorf("setup driver failed: %w", err) + } + + // Back action logic + log.Info().Msg("pressing back button") + err = driverExt.Back() + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("Back button press failed: %s", err.Error())), nil + } + + return mcp.NewToolResultText("Successfully pressed back button"), nil + } +} + +// ToolInput implements the input tool call. +type ToolInput struct{} + +func (t *ToolInput) Name() string { + return "input" +} + +func (t *ToolInput) Description() string { + return "Input text on the current active element" +} + +func (t *ToolInput) Options() []mcp.ToolOption { + return option.NewMCPOptions(option.InputRequest{}) +} + +func (t *ToolInput) Implement() toolCall { + return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + driverExt, err := setupXTDriver(ctx, request.Params.Arguments) + if err != nil { + return nil, fmt.Errorf("setup driver failed: %w", err) + } + + var inputReq option.InputRequest + if err := mapToStruct(request.Params.Arguments, &inputReq); err != nil { + return nil, fmt.Errorf("parse parameters error: %w", err) + } + + if inputReq.Text == "" { + return nil, fmt.Errorf("text is required") + } + + // Input action logic + log.Info().Str("text", inputReq.Text).Msg("inputting text") + err = driverExt.Input(inputReq.Text) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("Input failed: %s", err.Error())), nil + } + + return mcp.NewToolResultText(fmt.Sprintf("Successfully input text: %s", inputReq.Text)), nil + } +} + +// ToolSleep implements the sleep tool call. +type ToolSleep struct{} + +func (t *ToolSleep) Name() string { + return "sleep" +} + +func (t *ToolSleep) Description() string { + return "Sleep for a specified number of seconds" +} + +func (t *ToolSleep) Options() []mcp.ToolOption { + return []mcp.ToolOption{ + mcp.WithNumber("seconds", mcp.Description("Number of seconds to sleep")), + } +} + +func (t *ToolSleep) Implement() toolCall { + return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + seconds, ok := request.Params.Arguments["seconds"] + if !ok { + return nil, fmt.Errorf("seconds parameter is required") + } + + // Sleep action logic + log.Info().Interface("seconds", seconds).Msg("sleeping") + + var duration time.Duration + switch v := seconds.(type) { + case float64: + duration = time.Duration(v*1000) * time.Millisecond + case int: + duration = time.Duration(v) * time.Second + case int64: + duration = time.Duration(v) * time.Second + case string: + s, err := builtin.ConvertToFloat64(v) + if err != nil { + return nil, fmt.Errorf("invalid sleep duration: %v", v) + } + duration = time.Duration(s*1000) * time.Millisecond + default: + return nil, fmt.Errorf("unsupported sleep duration type: %T", v) + } + + time.Sleep(duration) + + return mcp.NewToolResultText(fmt.Sprintf("Successfully slept for %v seconds", seconds)), nil + } +} + +// Additional ActionTool implementations for DoAction migration + +// ToolWebLoginNoneUI implements the web_login_none_ui tool call. +type ToolWebLoginNoneUI struct{} + +func (t *ToolWebLoginNoneUI) Name() string { + return "web_login_none_ui" +} + +func (t *ToolWebLoginNoneUI) Description() string { + return "Perform login without UI interaction for web applications" +} + +func (t *ToolWebLoginNoneUI) Options() []mcp.ToolOption { + return option.NewMCPOptions(option.WebLoginNoneUIRequest{}) +} + +func (t *ToolWebLoginNoneUI) Implement() toolCall { + return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + driverExt, err := setupXTDriver(ctx, request.Params.Arguments) + if err != nil { + return nil, fmt.Errorf("setup driver failed: %w", err) + } + + var loginReq option.WebLoginNoneUIRequest + if err := mapToStruct(request.Params.Arguments, &loginReq); err != nil { + return nil, fmt.Errorf("parse parameters error: %w", err) + } + + // Web login none UI action logic + log.Info().Str("packageName", loginReq.PackageName).Msg("performing web login without UI") + driver, ok := driverExt.IDriver.(*BrowserDriver) + if !ok { + return nil, fmt.Errorf("invalid browser driver for web login") + } + + _, err = driver.LoginNoneUI(loginReq.PackageName, loginReq.PhoneNumber, loginReq.Captcha, loginReq.Password) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("Web login failed: %s", err.Error())), nil + } + + return mcp.NewToolResultText("Successfully performed web login without UI"), nil + } +} + +// ToolAppInstall implements the app_install tool call. +type ToolAppInstall struct{} + +func (t *ToolAppInstall) Name() string { + return "app_install" +} + +func (t *ToolAppInstall) Description() string { + return "Install an app on the device" +} + +func (t *ToolAppInstall) Options() []mcp.ToolOption { + return option.NewMCPOptions(option.AppInstallRequest{}) +} + +func (t *ToolAppInstall) Implement() toolCall { + return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + driverExt, err := setupXTDriver(ctx, request.Params.Arguments) + if err != nil { + return nil, fmt.Errorf("setup driver failed: %w", err) + } + + var installReq option.AppInstallRequest + if err := mapToStruct(request.Params.Arguments, &installReq); err != nil { + return nil, fmt.Errorf("parse parameters error: %w", err) + } + + // App install action logic + log.Info().Str("appUrl", installReq.AppUrl).Msg("installing app") + err = driverExt.GetDevice().Install(installReq.AppUrl) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("App install failed: %s", err.Error())), nil + } + + return mcp.NewToolResultText(fmt.Sprintf("Successfully installed app from: %s", installReq.AppUrl)), nil + } +} + +// ToolAppUninstall implements the app_uninstall tool call. +type ToolAppUninstall struct{} + +func (t *ToolAppUninstall) Name() string { + return "app_uninstall" +} + +func (t *ToolAppUninstall) Description() string { + return "Uninstall an app from the device" +} + +func (t *ToolAppUninstall) Options() []mcp.ToolOption { + return option.NewMCPOptions(option.AppUninstallRequest{}) +} + +func (t *ToolAppUninstall) Implement() toolCall { + return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + driverExt, err := setupXTDriver(ctx, request.Params.Arguments) + if err != nil { + return nil, fmt.Errorf("setup driver failed: %w", err) + } + + var uninstallReq option.AppUninstallRequest + if err := mapToStruct(request.Params.Arguments, &uninstallReq); err != nil { + return nil, fmt.Errorf("parse parameters error: %w", err) + } + + // App uninstall action logic + log.Info().Str("packageName", uninstallReq.PackageName).Msg("uninstalling app") + err = driverExt.GetDevice().Uninstall(uninstallReq.PackageName) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("App uninstall failed: %s", err.Error())), nil + } + + return mcp.NewToolResultText(fmt.Sprintf("Successfully uninstalled app: %s", uninstallReq.PackageName)), nil + } +} + +// ToolAppClear implements the app_clear tool call. +type ToolAppClear struct{} + +func (t *ToolAppClear) Name() string { + return "app_clear" +} + +func (t *ToolAppClear) Description() string { + return "Clear app data and cache" +} + +func (t *ToolAppClear) Options() []mcp.ToolOption { + return option.NewMCPOptions(option.AppClearRequest{}) +} + +func (t *ToolAppClear) Implement() toolCall { + return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + driverExt, err := setupXTDriver(ctx, request.Params.Arguments) + if err != nil { + return nil, fmt.Errorf("setup driver failed: %w", err) + } + + var clearReq option.AppClearRequest + if err := mapToStruct(request.Params.Arguments, &clearReq); err != nil { + return nil, fmt.Errorf("parse parameters error: %w", err) + } + + // App clear action logic + log.Info().Str("packageName", clearReq.PackageName).Msg("clearing app") + err = driverExt.AppClear(clearReq.PackageName) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("App clear failed: %s", err.Error())), nil + } + + return mcp.NewToolResultText(fmt.Sprintf("Successfully cleared app: %s", clearReq.PackageName)), nil + } +} + +// ToolSwipeToTapApp implements the swipe_to_tap_app tool call. +type ToolSwipeToTapApp struct{} + +func (t *ToolSwipeToTapApp) Name() string { + return "swipe_to_tap_app" +} + +func (t *ToolSwipeToTapApp) Description() string { + return "Swipe to find and tap an app by name" +} + +func (t *ToolSwipeToTapApp) Options() []mcp.ToolOption { + return option.NewMCPOptions(option.SwipeToTapAppRequest{}) +} + +func (t *ToolSwipeToTapApp) Implement() toolCall { + return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + driverExt, err := setupXTDriver(ctx, request.Params.Arguments) + if err != nil { + return nil, fmt.Errorf("setup driver failed: %w", err) + } + + var swipeAppReq option.SwipeToTapAppRequest + if err := mapToStruct(request.Params.Arguments, &swipeAppReq); err != nil { + return nil, fmt.Errorf("parse parameters error: %w", err) + } + + // Swipe to tap app action logic + log.Info().Str("appName", swipeAppReq.AppName).Msg("swipe to tap app") + err = driverExt.SwipeToTapApp(swipeAppReq.AppName) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("Swipe to tap app failed: %s", err.Error())), nil + } + + return mcp.NewToolResultText(fmt.Sprintf("Successfully found and tapped app: %s", swipeAppReq.AppName)), nil + } +} + +// ToolSwipeToTapText implements the swipe_to_tap_text tool call. +type ToolSwipeToTapText struct{} + +func (t *ToolSwipeToTapText) Name() string { + return "swipe_to_tap_text" +} + +func (t *ToolSwipeToTapText) Description() string { + return "Swipe to find and tap text on screen" +} + +func (t *ToolSwipeToTapText) Options() []mcp.ToolOption { + return option.NewMCPOptions(option.SwipeToTapTextRequest{}) +} + +func (t *ToolSwipeToTapText) Implement() toolCall { + return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + driverExt, err := setupXTDriver(ctx, request.Params.Arguments) + if err != nil { + return nil, fmt.Errorf("setup driver failed: %w", err) + } + + var swipeTextReq option.SwipeToTapTextRequest + if err := mapToStruct(request.Params.Arguments, &swipeTextReq); err != nil { + return nil, fmt.Errorf("parse parameters error: %w", err) + } + + // Swipe to tap text action logic + log.Info().Str("text", swipeTextReq.Text).Msg("swipe to tap text") + err = driverExt.SwipeToTapTexts([]string{swipeTextReq.Text}) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("Swipe to tap text failed: %s", err.Error())), nil + } + + return mcp.NewToolResultText(fmt.Sprintf("Successfully found and tapped text: %s", swipeTextReq.Text)), nil + } +} + +// ToolSwipeToTapTexts implements the swipe_to_tap_texts tool call. +type ToolSwipeToTapTexts struct{} + +func (t *ToolSwipeToTapTexts) Name() string { + return "swipe_to_tap_texts" +} + +func (t *ToolSwipeToTapTexts) Description() string { + return "Swipe to find and tap one of multiple texts on screen" +} + +func (t *ToolSwipeToTapTexts) Options() []mcp.ToolOption { + return option.NewMCPOptions(option.SwipeToTapTextsRequest{}) +} + +func (t *ToolSwipeToTapTexts) Implement() toolCall { + return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + driverExt, err := setupXTDriver(ctx, request.Params.Arguments) + if err != nil { + return nil, fmt.Errorf("setup driver failed: %w", err) + } + + var swipeTextsReq option.SwipeToTapTextsRequest + if err := mapToStruct(request.Params.Arguments, &swipeTextsReq); err != nil { + return nil, fmt.Errorf("parse parameters error: %w", err) + } + + // Swipe to tap texts action logic + log.Info().Strs("texts", swipeTextsReq.Texts).Msg("swipe to tap texts") + err = driverExt.SwipeToTapTexts(swipeTextsReq.Texts) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("Swipe to tap texts failed: %s", err.Error())), nil + } + + return mcp.NewToolResultText(fmt.Sprintf("Successfully found and tapped one of texts: %v", swipeTextsReq.Texts)), nil + } +} + +// ToolSecondaryClick implements the secondary_click tool call. +type ToolSecondaryClick struct{} + +func (t *ToolSecondaryClick) Name() string { + return "secondary_click" +} + +func (t *ToolSecondaryClick) Description() string { + return "Perform secondary click (right click) at coordinates" +} + +func (t *ToolSecondaryClick) Options() []mcp.ToolOption { + return option.NewMCPOptions(option.SecondaryClickRequest{}) +} + +func (t *ToolSecondaryClick) Implement() toolCall { + return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + driverExt, err := setupXTDriver(ctx, request.Params.Arguments) + if err != nil { + return nil, fmt.Errorf("setup driver failed: %w", err) + } + + var clickReq option.SecondaryClickRequest + if err := mapToStruct(request.Params.Arguments, &clickReq); err != nil { + return nil, fmt.Errorf("parse parameters error: %w", err) + } + + // Secondary click action logic + log.Info().Float64("x", clickReq.X).Float64("y", clickReq.Y).Msg("performing secondary click") + err = driverExt.SecondaryClick(clickReq.X, clickReq.Y) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("Secondary click failed: %s", err.Error())), nil + } + + return mcp.NewToolResultText(fmt.Sprintf("Successfully performed secondary click at (%.2f, %.2f)", clickReq.X, clickReq.Y)), nil + } +} + +// ToolHoverBySelector implements the hover_by_selector tool call. +type ToolHoverBySelector struct{} + +func (t *ToolHoverBySelector) Name() string { + return "hover_by_selector" +} + +func (t *ToolHoverBySelector) Description() string { + return "Hover over an element selected by CSS selector or XPath" +} + +func (t *ToolHoverBySelector) Options() []mcp.ToolOption { + return option.NewMCPOptions(option.SelectorRequest{}) +} + +func (t *ToolHoverBySelector) Implement() toolCall { + return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + driverExt, err := setupXTDriver(ctx, request.Params.Arguments) + if err != nil { + return nil, fmt.Errorf("setup driver failed: %w", err) + } + + var selectorReq option.SelectorRequest + if err := mapToStruct(request.Params.Arguments, &selectorReq); err != nil { + return nil, fmt.Errorf("parse parameters error: %w", err) + } + + // Hover by selector action logic + log.Info().Str("selector", selectorReq.Selector).Msg("hovering by selector") + err = driverExt.HoverBySelector(selectorReq.Selector) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("Hover by selector failed: %s", err.Error())), nil + } + + return mcp.NewToolResultText(fmt.Sprintf("Successfully hovered over element with selector: %s", selectorReq.Selector)), nil + } +} + +// ToolTapBySelector implements the tap_by_selector tool call. +type ToolTapBySelector struct{} + +func (t *ToolTapBySelector) Name() string { + return "tap_by_selector" +} + +func (t *ToolTapBySelector) Description() string { + return "Tap an element selected by CSS selector or XPath" +} + +func (t *ToolTapBySelector) Options() []mcp.ToolOption { + return option.NewMCPOptions(option.SelectorRequest{}) +} + +func (t *ToolTapBySelector) Implement() toolCall { + return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + driverExt, err := setupXTDriver(ctx, request.Params.Arguments) + if err != nil { + return nil, fmt.Errorf("setup driver failed: %w", err) + } + + var selectorReq option.SelectorRequest + if err := mapToStruct(request.Params.Arguments, &selectorReq); err != nil { + return nil, fmt.Errorf("parse parameters error: %w", err) + } + + // Tap by selector action logic + log.Info().Str("selector", selectorReq.Selector).Msg("tapping by selector") + err = driverExt.TapBySelector(selectorReq.Selector) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("Tap by selector failed: %s", err.Error())), nil + } + + return mcp.NewToolResultText(fmt.Sprintf("Successfully tapped element with selector: %s", selectorReq.Selector)), nil + } +} + +// ToolSecondaryClickBySelector implements the secondary_click_by_selector tool call. +type ToolSecondaryClickBySelector struct{} + +func (t *ToolSecondaryClickBySelector) Name() string { + return "secondary_click_by_selector" +} + +func (t *ToolSecondaryClickBySelector) Description() string { + return "Perform secondary click on an element selected by CSS selector or XPath" +} + +func (t *ToolSecondaryClickBySelector) Options() []mcp.ToolOption { + return option.NewMCPOptions(option.SelectorRequest{}) +} + +func (t *ToolSecondaryClickBySelector) Implement() toolCall { + return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + driverExt, err := setupXTDriver(ctx, request.Params.Arguments) + if err != nil { + return nil, fmt.Errorf("setup driver failed: %w", err) + } + + var selectorReq option.SelectorRequest + if err := mapToStruct(request.Params.Arguments, &selectorReq); err != nil { + return nil, fmt.Errorf("parse parameters error: %w", err) + } + + // Secondary click by selector action logic + log.Info().Str("selector", selectorReq.Selector).Msg("performing secondary click by selector") + err = driverExt.SecondaryClickBySelector(selectorReq.Selector) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("Secondary click by selector failed: %s", err.Error())), nil + } + + return mcp.NewToolResultText(fmt.Sprintf("Successfully performed secondary click on element with selector: %s", selectorReq.Selector)), nil + } +} + +// ToolWebCloseTab implements the web_close_tab tool call. +type ToolWebCloseTab struct{} + +func (t *ToolWebCloseTab) Name() string { + return "web_close_tab" +} + +func (t *ToolWebCloseTab) Description() string { + return "Close a browser tab by index" +} + +func (t *ToolWebCloseTab) Options() []mcp.ToolOption { + return option.NewMCPOptions(option.WebCloseTabRequest{}) +} + +func (t *ToolWebCloseTab) Implement() toolCall { + return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + driverExt, err := setupXTDriver(ctx, request.Params.Arguments) + if err != nil { + return nil, fmt.Errorf("setup driver failed: %w", err) + } + + var closeTabReq option.WebCloseTabRequest + if err := mapToStruct(request.Params.Arguments, &closeTabReq); err != nil { + return nil, fmt.Errorf("parse parameters error: %w", err) + } + + // Web close tab action logic + log.Info().Int("tabIndex", closeTabReq.TabIndex).Msg("closing web tab") + browserDriver, ok := driverExt.IDriver.(*BrowserDriver) + if !ok { + return nil, fmt.Errorf("web close tab is only supported for browser drivers") + } + + err = browserDriver.CloseTab(closeTabReq.TabIndex) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("Close tab failed: %s", err.Error())), nil + } + + return mcp.NewToolResultText(fmt.Sprintf("Successfully closed tab at index: %d", closeTabReq.TabIndex)), nil + } +} + +// ToolSetIme implements the set_ime tool call. +type ToolSetIme struct{} + +func (t *ToolSetIme) Name() string { + return "set_ime" +} + +func (t *ToolSetIme) Description() string { + return "Set the input method editor (IME) on the device" +} + +func (t *ToolSetIme) Options() []mcp.ToolOption { + return option.NewMCPOptions(option.SetImeRequest{}) +} + +func (t *ToolSetIme) Implement() toolCall { + return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + driverExt, err := setupXTDriver(ctx, request.Params.Arguments) + if err != nil { + return nil, fmt.Errorf("setup driver failed: %w", err) + } + + var imeReq option.SetImeRequest + if err := mapToStruct(request.Params.Arguments, &imeReq); err != nil { + return nil, fmt.Errorf("parse parameters error: %w", err) + } + + // Set IME action logic + log.Info().Str("ime", imeReq.Ime).Msg("setting IME") + err = driverExt.SetIme(imeReq.Ime) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("Set IME failed: %s", err.Error())), nil + } + + return mcp.NewToolResultText(fmt.Sprintf("Successfully set IME to: %s", imeReq.Ime)), nil + } +} + +// ToolGetSource implements the get_source tool call. +type ToolGetSource struct{} + +func (t *ToolGetSource) Name() string { + return "get_source" +} + +func (t *ToolGetSource) Description() string { + return "Get the source/hierarchy of the current screen" +} + +func (t *ToolGetSource) Options() []mcp.ToolOption { + return option.NewMCPOptions(option.GetSourceRequest{}) +} + +func (t *ToolGetSource) Implement() toolCall { + return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + driverExt, err := setupXTDriver(ctx, request.Params.Arguments) + if err != nil { + return nil, fmt.Errorf("setup driver failed: %w", err) + } + + var sourceReq option.GetSourceRequest + if err := mapToStruct(request.Params.Arguments, &sourceReq); err != nil { + return nil, fmt.Errorf("parse parameters error: %w", err) + } + + // Get source action logic + log.Info().Str("packageName", sourceReq.PackageName).Msg("getting source") + _, err = driverExt.Source(option.WithProcessName(sourceReq.PackageName)) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("Get source failed: %s", err.Error())), nil + } + + return mcp.NewToolResultText(fmt.Sprintf("Successfully retrieved source for package: %s", sourceReq.PackageName)), nil + } +} + +// ToolTapAbsXY implements the tap_abs_xy tool call. +type ToolTapAbsXY struct{} + +func (t *ToolTapAbsXY) Name() string { + return "tap_abs_xy" +} + +func (t *ToolTapAbsXY) Description() string { + return "Tap at absolute pixel coordinates" +} + +func (t *ToolTapAbsXY) Options() []mcp.ToolOption { + return option.NewMCPOptions(option.TapAbsXYRequest{}) +} + +func (t *ToolTapAbsXY) Implement() toolCall { + return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + driverExt, err := setupXTDriver(ctx, request.Params.Arguments) + if err != nil { + return nil, fmt.Errorf("setup driver failed: %w", err) + } + + var tapAbsReq option.TapAbsXYRequest + if err := mapToStruct(request.Params.Arguments, &tapAbsReq); err != nil { + return nil, fmt.Errorf("parse parameters error: %w", err) + } + + // Tap absolute XY action logic + log.Info().Float64("x", tapAbsReq.X).Float64("y", tapAbsReq.Y).Msg("tapping at absolute coordinates") + opts := []option.ActionOption{} + if tapAbsReq.Duration > 0 { + opts = append(opts, option.WithDuration(tapAbsReq.Duration)) + } + + err = driverExt.TapAbsXY(tapAbsReq.X, tapAbsReq.Y, opts...) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("Tap absolute XY failed: %s", err.Error())), nil + } + + return mcp.NewToolResultText(fmt.Sprintf("Successfully tapped at absolute coordinates (%.0f, %.0f)", tapAbsReq.X, tapAbsReq.Y)), nil + } +} + +// ToolTapByOCR implements the tap_by_ocr tool call. +type ToolTapByOCR struct{} + +func (t *ToolTapByOCR) Name() string { + return "tap_by_ocr" +} + +func (t *ToolTapByOCR) Description() string { + return "Tap on text found by OCR recognition" +} + +func (t *ToolTapByOCR) Options() []mcp.ToolOption { + return option.NewMCPOptions(option.TapByOCRRequest{}) +} + +func (t *ToolTapByOCR) Implement() toolCall { + return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + driverExt, err := setupXTDriver(ctx, request.Params.Arguments) + if err != nil { + return nil, fmt.Errorf("setup driver failed: %w", err) + } + + var ocrReq option.TapByOCRRequest + if err := mapToStruct(request.Params.Arguments, &ocrReq); err != nil { + return nil, fmt.Errorf("parse parameters error: %w", err) + } + + // Tap by OCR action logic + log.Info().Str("text", ocrReq.Text).Msg("tapping by OCR") + err = driverExt.TapByOCR(ocrReq.Text) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("Tap by OCR failed: %s", err.Error())), nil + } + + return mcp.NewToolResultText(fmt.Sprintf("Successfully tapped on OCR text: %s", ocrReq.Text)), nil + } +} + +// ToolTapByCV implements the tap_by_cv tool call. +type ToolTapByCV struct{} + +func (t *ToolTapByCV) Name() string { + return "tap_by_cv" +} + +func (t *ToolTapByCV) Description() string { + return "Tap on element found by computer vision" +} + +func (t *ToolTapByCV) Options() []mcp.ToolOption { + return option.NewMCPOptions(option.TapByCVRequest{}) +} + +func (t *ToolTapByCV) Implement() toolCall { + return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + driverExt, err := setupXTDriver(ctx, request.Params.Arguments) + if err != nil { + return nil, fmt.Errorf("setup driver failed: %w", err) + } + + var cvReq option.TapByCVRequest + if err := mapToStruct(request.Params.Arguments, &cvReq); err != nil { + return nil, fmt.Errorf("parse parameters error: %w", err) + } + + // Tap by CV action logic + log.Info().Str("imagePath", cvReq.ImagePath).Msg("tapping by CV") + + // For TapByCV, we need to check if there are UI types in the options + // In the original DoAction, it requires ScreenShotWithUITypes to be set + // We'll add a basic implementation that triggers CV recognition + err = driverExt.TapByCV() + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("Tap by CV failed: %s", err.Error())), nil + } + + return mcp.NewToolResultText("Successfully tapped by computer vision"), nil + } +} + +// ToolDoubleTapXY implements the double_tap_xy tool call. +type ToolDoubleTapXY struct{} + +func (t *ToolDoubleTapXY) Name() string { + return "double_tap_xy" +} + +func (t *ToolDoubleTapXY) Description() string { + return "Double tap at given coordinates" +} + +func (t *ToolDoubleTapXY) Options() []mcp.ToolOption { + return option.NewMCPOptions(option.DoubleTapXYRequest{}) +} + +func (t *ToolDoubleTapXY) Implement() toolCall { + return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + driverExt, err := setupXTDriver(ctx, request.Params.Arguments) + if err != nil { + return nil, fmt.Errorf("setup driver failed: %w", err) + } + + var doubleTapReq option.DoubleTapXYRequest + if err := mapToStruct(request.Params.Arguments, &doubleTapReq); err != nil { + return nil, fmt.Errorf("parse parameters error: %w", err) + } + + // Double tap XY action logic + log.Info().Float64("x", doubleTapReq.X).Float64("y", doubleTapReq.Y).Msg("double tapping at coordinates") + err = driverExt.DoubleTap(doubleTapReq.X, doubleTapReq.Y) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("Double tap failed: %s", err.Error())), nil + } + + return mcp.NewToolResultText(fmt.Sprintf("Successfully double tapped at (%.2f, %.2f)", doubleTapReq.X, doubleTapReq.Y)), nil + } +} + +// ToolSwipeAdvanced implements the swipe_advanced tool call. +type ToolSwipeAdvanced struct{} + +func (t *ToolSwipeAdvanced) Name() string { + return "swipe_advanced" +} + +func (t *ToolSwipeAdvanced) Description() string { + return "Perform advanced swipe with custom coordinates and timing" +} + +func (t *ToolSwipeAdvanced) Options() []mcp.ToolOption { + return option.NewMCPOptions(option.SwipeAdvancedRequest{}) +} + +func (t *ToolSwipeAdvanced) Implement() toolCall { + return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + driverExt, err := setupXTDriver(ctx, request.Params.Arguments) + if err != nil { + return nil, fmt.Errorf("setup driver failed: %w", err) + } + + var swipeAdvReq option.SwipeAdvancedRequest + if err := mapToStruct(request.Params.Arguments, &swipeAdvReq); err != nil { + return nil, fmt.Errorf("parse parameters error: %w", err) + } + + // Advanced swipe action logic using prepareSwipeAction like the original DoAction + log.Info(). + Float64("fromX", swipeAdvReq.FromX).Float64("fromY", swipeAdvReq.FromY). + Float64("toX", swipeAdvReq.ToX).Float64("toY", swipeAdvReq.ToY). + Msg("performing advanced swipe") + + params := []float64{swipeAdvReq.FromX, swipeAdvReq.FromY, swipeAdvReq.ToX, swipeAdvReq.ToY} + opts := []option.ActionOption{} + if swipeAdvReq.Duration > 0 { + opts = append(opts, option.WithDuration(swipeAdvReq.Duration)) + } + if swipeAdvReq.PressDuration > 0 { + opts = append(opts, option.WithPressDuration(swipeAdvReq.PressDuration)) + } + + swipeAction := prepareSwipeAction(driverExt, params, opts...) + err = swipeAction(driverExt) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("Advanced swipe failed: %s", err.Error())), nil + } + + return mcp.NewToolResultText(fmt.Sprintf("Successfully performed advanced swipe from (%.2f, %.2f) to (%.2f, %.2f)", + swipeAdvReq.FromX, swipeAdvReq.FromY, swipeAdvReq.ToX, swipeAdvReq.ToY)), nil + } +} + +// ToolSleepMS implements the sleep_ms tool call. +type ToolSleepMS struct{} + +func (t *ToolSleepMS) Name() string { + return "sleep_ms" +} + +func (t *ToolSleepMS) Description() string { + return "Sleep for specified milliseconds" +} + +func (t *ToolSleepMS) Options() []mcp.ToolOption { + return option.NewMCPOptions(option.SleepMSRequest{}) +} + +func (t *ToolSleepMS) Implement() toolCall { + return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + var sleepReq option.SleepMSRequest + if err := mapToStruct(request.Params.Arguments, &sleepReq); err != nil { + return nil, fmt.Errorf("parse parameters error: %w", err) + } + + // Sleep MS action logic + log.Info().Int64("milliseconds", sleepReq.Milliseconds).Msg("sleeping in milliseconds") + time.Sleep(time.Duration(sleepReq.Milliseconds) * time.Millisecond) + + return mcp.NewToolResultText(fmt.Sprintf("Successfully slept for %d milliseconds", sleepReq.Milliseconds)), nil + } +} + +// ToolSleepRandom implements the sleep_random tool call. +type ToolSleepRandom struct{} + +func (t *ToolSleepRandom) Name() string { + return "sleep_random" +} + +func (t *ToolSleepRandom) Description() string { + return "Sleep for a random duration based on parameters" +} + +func (t *ToolSleepRandom) Options() []mcp.ToolOption { + return option.NewMCPOptions(option.SleepRandomRequest{}) +} + +func (t *ToolSleepRandom) Implement() toolCall { + return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + var sleepRandomReq option.SleepRandomRequest + if err := mapToStruct(request.Params.Arguments, &sleepRandomReq); err != nil { + return nil, fmt.Errorf("parse parameters error: %w", err) + } + + // Sleep random action logic + log.Info().Floats64("params", sleepRandomReq.Params).Msg("sleeping for random duration") + sleepStrict(time.Now(), getSimulationDuration(sleepRandomReq.Params)) + + return mcp.NewToolResultText(fmt.Sprintf("Successfully slept for random duration with params: %v", sleepRandomReq.Params)), nil + } +} + +// ToolClosePopups implements the close_popups tool call. +type ToolClosePopups struct{} + +func (t *ToolClosePopups) Name() string { + return "close_popups" +} + +func (t *ToolClosePopups) Description() string { + return "Close any popup windows or dialogs on screen" +} + +func (t *ToolClosePopups) Options() []mcp.ToolOption { + return option.NewMCPOptions(option.TargetDeviceRequest{}) +} + +func (t *ToolClosePopups) Implement() toolCall { + return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + driverExt, err := setupXTDriver(ctx, request.Params.Arguments) + if err != nil { + return nil, fmt.Errorf("setup driver failed: %w", err) + } + + // Close popups action logic + log.Info().Msg("closing popups") + err = driverExt.ClosePopupsHandler() + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("Close popups failed: %s", err.Error())), nil + } + + return mcp.NewToolResultText("Successfully closed popups"), nil + } +} + +// ToolCallFunction implements the call_function tool call. +type ToolCallFunction struct{} + +func (t *ToolCallFunction) Name() string { + return "call_function" +} + +func (t *ToolCallFunction) Description() string { + return "Call a custom function with description" +} + +func (t *ToolCallFunction) Options() []mcp.ToolOption { + return option.NewMCPOptions(option.CallFunctionRequest{}) +} + +func (t *ToolCallFunction) Implement() toolCall { + return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + driverExt, err := setupXTDriver(ctx, request.Params.Arguments) + if err != nil { + return nil, fmt.Errorf("setup driver failed: %w", err) + } + + var funcReq option.CallFunctionRequest + if err := mapToStruct(request.Params.Arguments, &funcReq); err != nil { + return nil, fmt.Errorf("parse parameters error: %w", err) + } + + // Call function action logic + // Note: The function (fn) parameter is not available in MCP calls + // This is a simplified implementation that only logs the description + log.Info().Str("description", funcReq.Description).Msg("calling function") + err = driverExt.Call(funcReq.Description, nil) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("Call function failed: %s", err.Error())), nil + } + + return mcp.NewToolResultText(fmt.Sprintf("Successfully called function: %s", funcReq.Description)), nil + } +} + +// ToolAIAction implements the ai_action tool call. +type ToolAIAction struct{} + +func (t *ToolAIAction) Name() string { + return "ai_action" +} + +func (t *ToolAIAction) Description() string { + return "Perform actions using AI with a given prompt" +} + +func (t *ToolAIAction) Options() []mcp.ToolOption { + return option.NewMCPOptions(option.AIActionRequest{}) +} + +func (t *ToolAIAction) Implement() toolCall { + return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + driverExt, err := setupXTDriver(ctx, request.Params.Arguments) + if err != nil { + return nil, fmt.Errorf("setup driver failed: %w", err) + } + + var aiReq option.AIActionRequest + if err := mapToStruct(request.Params.Arguments, &aiReq); err != nil { + return nil, fmt.Errorf("parse parameters error: %w", err) + } + + // AI action logic + log.Info().Str("prompt", aiReq.Prompt).Msg("performing AI action") + err = driverExt.AIAction(aiReq.Prompt) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("AI action failed: %s", err.Error())), nil + } + + return mcp.NewToolResultText(fmt.Sprintf("Successfully performed AI action with prompt: %s", aiReq.Prompt)), nil + } +} diff --git a/uixt/mcp_server_test.go b/uixt/mcp_server_test.go new file mode 100644 index 00000000..23c6fba2 --- /dev/null +++ b/uixt/mcp_server_test.go @@ -0,0 +1,72 @@ +package uixt + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestNewMCPServer(t *testing.T) { + server := NewMCPServer() + assert.NotNil(t, server) + + // Check that tools are registered + tools := server.ListTools() + assert.Greater(t, len(tools), 0, "Should have at least one tool registered") + + // Check specific tools exist + expectedTools := []string{ + "list_available_devices", + "select_device", + "list_packages", + "launch_app", + "terminate_app", + "get_screen_size", + "press_button", + "tap_xy", + "swipe", + "drag", + "screenshot", + "home", + "back", + "input", + "sleep", + } + + registeredToolNames := make(map[string]bool) + for _, tool := range tools { + registeredToolNames[tool.Name] = true + } + + for _, expectedTool := range expectedTools { + assert.True(t, registeredToolNames[expectedTool], "Tool %s should be registered", expectedTool) + } +} + +func TestToolInterfaces(t *testing.T) { + // Test that all tools implement the ActionTool interface correctly + tools := []ActionTool{ + &ToolListAvailableDevices{}, + &ToolSelectDevice{}, + &ToolListPackages{}, + &ToolLaunchApp{}, + &ToolTerminateApp{}, + &ToolGetScreenSize{}, + &ToolPressButton{}, + &ToolTapXY{}, + &ToolSwipe{}, + &ToolDrag{}, + &ToolScreenShot{}, + &ToolHome{}, + &ToolBack{}, + &ToolInput{}, + &ToolSleep{}, + } + + for _, tool := range tools { + assert.NotEmpty(t, tool.Name(), "Tool name should not be empty") + assert.NotEmpty(t, tool.Description(), "Tool description should not be empty") + assert.NotNil(t, tool.Options(), "Tool options should not be nil") + assert.NotNil(t, tool.Implement(), "Tool implementation should not be nil") + } +} diff --git a/uixt/option/request.go b/uixt/option/request.go index cdabd423..56576ff5 100644 --- a/uixt/option/request.go +++ b/uixt/option/request.go @@ -92,10 +92,125 @@ type PressButtonRequest struct { Button types.DeviceButton `json:"button" binding:"required" desc:"The button to press. Supported buttons: BACK (android only), HOME, VOLUME_UP, VOLUME_DOWN, ENTER."` } +// Additional requests for missing actions +type WebLoginNoneUIRequest struct { + TargetDeviceRequest + PackageName string `json:"packageName" binding:"required" desc:"Package name for the app to login"` + PhoneNumber string `json:"phoneNumber" binding:"required" desc:"Phone number for login"` + Captcha string `json:"captcha" binding:"required" desc:"Captcha code"` + Password string `json:"password" binding:"required" desc:"Password for login"` +} + +type SwipeToTapAppRequest struct { + TargetDeviceRequest + AppName string `json:"appName" binding:"required" desc:"App name to find and tap"` +} + +type SwipeToTapTextRequest struct { + TargetDeviceRequest + Text string `json:"text" binding:"required" desc:"Text to find and tap"` +} + +type SwipeToTapTextsRequest struct { + TargetDeviceRequest + Texts []string `json:"texts" binding:"required" desc:"List of texts to find and tap"` +} + +type SecondaryClickRequest struct { + TargetDeviceRequest + X float64 `json:"x" binding:"required" desc:"X coordinate (0.0~1.0 for percent, or absolute pixel value)"` + Y float64 `json:"y" binding:"required" desc:"Y coordinate (0.0~1.0 for percent, or absolute pixel value)"` +} + +type SelectorRequest struct { + TargetDeviceRequest + Selector string `json:"selector" binding:"required" desc:"CSS or XPath selector"` +} + +type WebCloseTabRequest struct { + TargetDeviceRequest + TabIndex int `json:"tabIndex" binding:"required" desc:"Index of the tab to close"` +} + +type SetImeRequest struct { + TargetDeviceRequest + Ime string `json:"ime" binding:"required" desc:"IME package name to set"` +} + +type GetSourceRequest struct { + TargetDeviceRequest + PackageName string `json:"packageName" binding:"required" desc:"Package name to get source from"` +} + +type TapAbsXYRequest struct { + TargetDeviceRequest + X float64 `json:"x" binding:"required" desc:"Absolute X coordinate in pixels"` + Y float64 `json:"y" binding:"required" desc:"Absolute Y coordinate in pixels"` + Duration float64 `json:"duration" desc:"Tap duration in seconds (optional)"` +} + +type TapByOCRRequest struct { + TargetDeviceRequest + Text string `json:"text" binding:"required" desc:"OCR text to find and tap"` +} + +type TapByCVRequest struct { + TargetDeviceRequest + ImagePath string `json:"imagePath" desc:"Path to reference image for CV recognition"` +} + +type DoubleTapXYRequest struct { + TargetDeviceRequest + X float64 `json:"x" binding:"required" desc:"X coordinate (0.0~1.0 for percent, or absolute pixel value)"` + Y float64 `json:"y" binding:"required" desc:"Y coordinate (0.0~1.0 for percent, or absolute pixel value)"` +} + +type SwipeAdvancedRequest struct { + TargetDeviceRequest + FromX float64 `json:"fromX" binding:"required" desc:"Starting X coordinate"` + FromY float64 `json:"fromY" binding:"required" desc:"Starting Y coordinate"` + ToX float64 `json:"toX" binding:"required" desc:"Ending X coordinate"` + ToY float64 `json:"toY" binding:"required" desc:"Ending Y coordinate"` + Duration float64 `json:"duration" desc:"Swipe duration in seconds (optional)"` + PressDuration float64 `json:"pressDuration" desc:"Press duration in seconds (optional)"` +} + +type SleepMSRequest struct { + TargetDeviceRequest + Milliseconds int64 `json:"milliseconds" binding:"required" desc:"Sleep duration in milliseconds"` +} + +type SleepRandomRequest struct { + TargetDeviceRequest + Params []float64 `json:"params" binding:"required" desc:"Random sleep parameters [min, max] or [min1, max1, weight1, ...]"` +} + +type CallFunctionRequest struct { + TargetDeviceRequest + Description string `json:"description" binding:"required" desc:"Function description"` +} + +type AIActionRequest struct { + TargetDeviceRequest + Prompt string `json:"prompt" binding:"required" desc:"AI action prompt"` +} + // NewMCPOptions generates mcp.NewTool parameters from a struct type. // It automatically generates mcp.NewTool parameters based on the struct fields and their desc tags. func NewMCPOptions(t interface{}) (options []mcp.ToolOption) { tType := reflect.TypeOf(t) + + // Handle pointer type by getting the element type + if tType.Kind() == reflect.Ptr { + tType = tType.Elem() + } + + // Ensure we have a struct type + if tType.Kind() != reflect.Struct { + log.Warn().Str("type", tType.String()).Msg("NewMCPOptions expects a struct or pointer to struct") + return options + } + for i := 0; i < tType.NumField(); i++ { field := tType.Field(i) jsonTag := field.Tag.Get("json") @@ -125,6 +240,23 @@ func NewMCPOptions(t interface{}) (options []mcp.ToolOption) { } else { options = append(options, mcp.WithBoolean(name, mcp.Description(desc))) } + case reflect.Slice: + // Handle slice types, especially []string and []float64 + if field.Type.Elem().Kind() == reflect.String { + // Array of strings + if required { + options = append(options, mcp.WithArray(name, mcp.Required(), mcp.Description(desc))) + } else { + options = append(options, mcp.WithArray(name, mcp.Description(desc))) + } + } else if field.Type.Elem().Kind() == reflect.Float64 { + // Array of numbers + if required { + options = append(options, mcp.WithArray(name, mcp.Required(), mcp.Description(desc))) + } else { + options = append(options, mcp.WithArray(name, mcp.Description(desc))) + } + } default: log.Warn().Str("field_type", field.Type.String()).Msg("Unsupported field type") } diff --git a/uixt/sdk.go b/uixt/sdk.go index 3bc557ed..cf58b5ce 100644 --- a/uixt/sdk.go +++ b/uixt/sdk.go @@ -2,8 +2,10 @@ package uixt import ( "context" + "encoding/json" "fmt" + "github.com/httprunner/httprunner/v5/internal/builtin" "github.com/httprunner/httprunner/v5/uixt/ai" "github.com/httprunner/httprunner/v5/uixt/option" "github.com/mark3labs/mcp-go/client" @@ -78,28 +80,755 @@ func (c *MCPClient4XTDriver) Close() error { return nil } -func convertActionToCallToolRequest(action MobileAction) (mcp.CallToolRequest, error) { - // req := mcp.CallToolRequest{ - // Params: struct { - // Name string `json:"name"` - // Arguments map[string]any `json:"arguments,omitempty"` - // Meta *struct { - // ProgressToken mcp.ProgressToken `json:"progressToken,omitempty"` - // } `json:"_meta,omitempty"` - // }{ - // Name: action.Method, - // Arguments: action.Params, - // }, - // } - return mcp.CallToolRequest{}, nil -} - func (dExt *XTDriver) ExecuteAction(action MobileAction) (err error) { - // convert action to call tool request + // Convert action to MCP tool call req, err := convertActionToCallToolRequest(action) if err != nil { - return err + return fmt.Errorf("failed to convert action to MCP tool call: %w", err) + } + + // Execute via MCP tool + result, err := dExt.client.CallTool(context.Background(), req) + if err != nil { + return fmt.Errorf("MCP tool call failed: %w", err) + } + + // Check if the tool execution had business logic errors + if result.IsError { + if len(result.Content) > 0 { + return fmt.Errorf("tool execution failed: %s", result.Content[0]) + } + return fmt.Errorf("tool execution failed") + } + + log.Debug().Str("method", string(action.Method)).Msg("executed action via MCP tool") + return nil +} + +func convertActionToCallToolRequest(action MobileAction) (mcp.CallToolRequest, error) { + var arguments map[string]interface{} + + switch action.Method { + case option.ACTION_WebLoginNoneUI: + if params, ok := action.Params.([]interface{}); ok && len(params) == 4 { + arguments = map[string]interface{}{ + "packageName": params[0].(string), + "phoneNumber": params[1].(string), + "captcha": params[2].(string), + "password": params[3].(string), + } + } else if params, ok := action.Params.([]string); ok && len(params) == 4 { + arguments = map[string]interface{}{ + "packageName": params[0], + "phoneNumber": params[1], + "captcha": params[2], + "password": params[3], + } + } else { + return mcp.CallToolRequest{}, fmt.Errorf("invalid web login params: %v", action.Params) + } + return mcp.CallToolRequest{ + Params: struct { + Name string `json:"name"` + Arguments map[string]any `json:"arguments,omitempty"` + Meta *struct { + ProgressToken mcp.ProgressToken `json:"progressToken,omitempty"` + } `json:"_meta,omitempty"` + }{ + Name: "web_login_none_ui", + Arguments: arguments, + }, + }, nil + + case option.ACTION_AppInstall: + if app, ok := action.Params.(string); ok { + arguments = map[string]interface{}{ + "appUrl": app, + } + } else { + return mcp.CallToolRequest{}, fmt.Errorf("invalid app install params: %v", action.Params) + } + return mcp.CallToolRequest{ + Params: struct { + Name string `json:"name"` + Arguments map[string]any `json:"arguments,omitempty"` + Meta *struct { + ProgressToken mcp.ProgressToken `json:"progressToken,omitempty"` + } `json:"_meta,omitempty"` + }{ + Name: "app_install", + Arguments: arguments, + }, + }, nil + + case option.ACTION_AppUninstall: + if packageName, ok := action.Params.(string); ok { + arguments = map[string]interface{}{ + "packageName": packageName, + } + } else { + return mcp.CallToolRequest{}, fmt.Errorf("invalid app uninstall params: %v", action.Params) + } + return mcp.CallToolRequest{ + Params: struct { + Name string `json:"name"` + Arguments map[string]any `json:"arguments,omitempty"` + Meta *struct { + ProgressToken mcp.ProgressToken `json:"progressToken,omitempty"` + } `json:"_meta,omitempty"` + }{ + Name: "app_uninstall", + Arguments: arguments, + }, + }, nil + + case option.ACTION_AppClear: + if packageName, ok := action.Params.(string); ok { + arguments = map[string]interface{}{ + "packageName": packageName, + } + } else { + return mcp.CallToolRequest{}, fmt.Errorf("invalid app clear params: %v", action.Params) + } + return mcp.CallToolRequest{ + Params: struct { + Name string `json:"name"` + Arguments map[string]any `json:"arguments,omitempty"` + Meta *struct { + ProgressToken mcp.ProgressToken `json:"progressToken,omitempty"` + } `json:"_meta,omitempty"` + }{ + Name: "app_clear", + Arguments: arguments, + }, + }, nil + + case option.ACTION_AppLaunch: + if packageName, ok := action.Params.(string); ok { + arguments = map[string]interface{}{ + "packageName": packageName, + } + } else { + return mcp.CallToolRequest{}, fmt.Errorf("invalid app launch params: %v", action.Params) + } + return mcp.CallToolRequest{ + Params: struct { + Name string `json:"name"` + Arguments map[string]any `json:"arguments,omitempty"` + Meta *struct { + ProgressToken mcp.ProgressToken `json:"progressToken,omitempty"` + } `json:"_meta,omitempty"` + }{ + Name: "launch_app", + Arguments: arguments, + }, + }, nil + + case option.ACTION_SwipeToTapApp: + if appName, ok := action.Params.(string); ok { + arguments = map[string]interface{}{ + "appName": appName, + } + } else { + return mcp.CallToolRequest{}, fmt.Errorf("invalid swipe to tap app params: %v", action.Params) + } + return mcp.CallToolRequest{ + Params: struct { + Name string `json:"name"` + Arguments map[string]any `json:"arguments,omitempty"` + Meta *struct { + ProgressToken mcp.ProgressToken `json:"progressToken,omitempty"` + } `json:"_meta,omitempty"` + }{ + Name: "swipe_to_tap_app", + Arguments: arguments, + }, + }, nil + + case option.ACTION_SwipeToTapText: + if text, ok := action.Params.(string); ok { + arguments = map[string]interface{}{ + "text": text, + } + } else { + return mcp.CallToolRequest{}, fmt.Errorf("invalid swipe to tap text params: %v", action.Params) + } + return mcp.CallToolRequest{ + Params: struct { + Name string `json:"name"` + Arguments map[string]any `json:"arguments,omitempty"` + Meta *struct { + ProgressToken mcp.ProgressToken `json:"progressToken,omitempty"` + } `json:"_meta,omitempty"` + }{ + Name: "swipe_to_tap_text", + Arguments: arguments, + }, + }, nil + + case option.ACTION_SwipeToTapTexts: + var texts []string + if textsSlice, ok := action.Params.([]string); ok { + texts = textsSlice + } else if textsInterface, err := builtin.ConvertToStringSlice(action.Params); err == nil { + texts = textsInterface + } else { + return mcp.CallToolRequest{}, fmt.Errorf("invalid swipe to tap texts params: %v", action.Params) + } + arguments = map[string]interface{}{ + "texts": texts, + } + return mcp.CallToolRequest{ + Params: struct { + Name string `json:"name"` + Arguments map[string]any `json:"arguments,omitempty"` + Meta *struct { + ProgressToken mcp.ProgressToken `json:"progressToken,omitempty"` + } `json:"_meta,omitempty"` + }{ + Name: "swipe_to_tap_texts", + Arguments: arguments, + }, + }, nil + + case option.ACTION_AppTerminate: + if packageName, ok := action.Params.(string); ok { + arguments = map[string]interface{}{ + "packageName": packageName, + } + } else { + return mcp.CallToolRequest{}, fmt.Errorf("invalid app terminate params: %v", action.Params) + } + return mcp.CallToolRequest{ + Params: struct { + Name string `json:"name"` + Arguments map[string]any `json:"arguments,omitempty"` + Meta *struct { + ProgressToken mcp.ProgressToken `json:"progressToken,omitempty"` + } `json:"_meta,omitempty"` + }{ + Name: "terminate_app", + Arguments: arguments, + }, + }, nil + + case option.ACTION_Home: + return mcp.CallToolRequest{ + Params: struct { + Name string `json:"name"` + Arguments map[string]any `json:"arguments,omitempty"` + Meta *struct { + ProgressToken mcp.ProgressToken `json:"progressToken,omitempty"` + } `json:"_meta,omitempty"` + }{ + Name: "home", + Arguments: map[string]interface{}{}, + }, + }, nil + + case option.ACTION_SecondaryClick: + if params, err := builtin.ConvertToFloat64Slice(action.Params); err == nil && len(params) == 2 { + arguments = map[string]interface{}{ + "x": params[0], + "y": params[1], + } + } else { + return mcp.CallToolRequest{}, fmt.Errorf("invalid secondary click params: %v", action.Params) + } + return mcp.CallToolRequest{ + Params: struct { + Name string `json:"name"` + Arguments map[string]any `json:"arguments,omitempty"` + Meta *struct { + ProgressToken mcp.ProgressToken `json:"progressToken,omitempty"` + } `json:"_meta,omitempty"` + }{ + Name: "secondary_click", + Arguments: arguments, + }, + }, nil + + case option.ACTION_HoverBySelector: + if selector, ok := action.Params.(string); ok { + arguments = map[string]interface{}{ + "selector": selector, + } + } else { + return mcp.CallToolRequest{}, fmt.Errorf("invalid hover by selector params: %v", action.Params) + } + return mcp.CallToolRequest{ + Params: struct { + Name string `json:"name"` + Arguments map[string]any `json:"arguments,omitempty"` + Meta *struct { + ProgressToken mcp.ProgressToken `json:"progressToken,omitempty"` + } `json:"_meta,omitempty"` + }{ + Name: "hover_by_selector", + Arguments: arguments, + }, + }, nil + + case option.ACTION_TapBySelector: + if selector, ok := action.Params.(string); ok { + arguments = map[string]interface{}{ + "selector": selector, + } + } else { + return mcp.CallToolRequest{}, fmt.Errorf("invalid tap by selector params: %v", action.Params) + } + return mcp.CallToolRequest{ + Params: struct { + Name string `json:"name"` + Arguments map[string]any `json:"arguments,omitempty"` + Meta *struct { + ProgressToken mcp.ProgressToken `json:"progressToken,omitempty"` + } `json:"_meta,omitempty"` + }{ + Name: "tap_by_selector", + Arguments: arguments, + }, + }, nil + + case option.ACTION_SecondaryClickBySelector: + if selector, ok := action.Params.(string); ok { + arguments = map[string]interface{}{ + "selector": selector, + } + } else { + return mcp.CallToolRequest{}, fmt.Errorf("invalid secondary click by selector params: %v", action.Params) + } + return mcp.CallToolRequest{ + Params: struct { + Name string `json:"name"` + Arguments map[string]any `json:"arguments,omitempty"` + Meta *struct { + ProgressToken mcp.ProgressToken `json:"progressToken,omitempty"` + } `json:"_meta,omitempty"` + }{ + Name: "secondary_click_by_selector", + Arguments: arguments, + }, + }, nil + + case option.ACTION_WebCloseTab: + var tabIndex int + if param, ok := action.Params.(json.Number); ok { + paramInt64, _ := param.Int64() + tabIndex = int(paramInt64) + } else if param, ok := action.Params.(int64); ok { + tabIndex = int(param) + } else if param, ok := action.Params.(int); ok { + tabIndex = param + } else { + return mcp.CallToolRequest{}, fmt.Errorf("invalid web close tab params: %v", action.Params) + } + arguments = map[string]interface{}{ + "tabIndex": tabIndex, + } + return mcp.CallToolRequest{ + Params: struct { + Name string `json:"name"` + Arguments map[string]any `json:"arguments,omitempty"` + Meta *struct { + ProgressToken mcp.ProgressToken `json:"progressToken,omitempty"` + } `json:"_meta,omitempty"` + }{ + Name: "web_close_tab", + Arguments: arguments, + }, + }, nil + + case option.ACTION_SetIme: + if ime, ok := action.Params.(string); ok { + arguments = map[string]interface{}{ + "ime": ime, + } + } else { + return mcp.CallToolRequest{}, fmt.Errorf("invalid set ime params: %v", action.Params) + } + return mcp.CallToolRequest{ + Params: struct { + Name string `json:"name"` + Arguments map[string]any `json:"arguments,omitempty"` + Meta *struct { + ProgressToken mcp.ProgressToken `json:"progressToken,omitempty"` + } `json:"_meta,omitempty"` + }{ + Name: "set_ime", + Arguments: arguments, + }, + }, nil + + case option.ACTION_GetSource: + if packageName, ok := action.Params.(string); ok { + arguments = map[string]interface{}{ + "packageName": packageName, + } + } else { + return mcp.CallToolRequest{}, fmt.Errorf("invalid get source params: %v", action.Params) + } + return mcp.CallToolRequest{ + Params: struct { + Name string `json:"name"` + Arguments map[string]any `json:"arguments,omitempty"` + Meta *struct { + ProgressToken mcp.ProgressToken `json:"progressToken,omitempty"` + } `json:"_meta,omitempty"` + }{ + Name: "get_source", + Arguments: arguments, + }, + }, nil + + case option.ACTION_TapXY: + if params, err := builtin.ConvertToFloat64Slice(action.Params); err == nil && len(params) == 2 { + x, y := params[0], params[1] + arguments = map[string]interface{}{ + "x": x, + "y": y, + } + // Add duration if available from action options + if actionOptions := action.GetOptions(); len(actionOptions) > 0 { + for _, opt := range actionOptions { + if opt != nil { + // Add options like duration + if duration := action.ActionOptions.Duration; duration > 0 { + arguments["duration"] = duration + } + } + } + } + } else { + return mcp.CallToolRequest{}, fmt.Errorf("invalid tap params: %v", action.Params) + } + return mcp.CallToolRequest{ + Params: struct { + Name string `json:"name"` + Arguments map[string]any `json:"arguments,omitempty"` + Meta *struct { + ProgressToken mcp.ProgressToken `json:"progressToken,omitempty"` + } `json:"_meta,omitempty"` + }{ + Name: "tap_xy", + Arguments: arguments, + }, + }, nil + + case option.ACTION_TapAbsXY: + if params, err := builtin.ConvertToFloat64Slice(action.Params); err == nil && len(params) == 2 { + x, y := params[0], params[1] + arguments = map[string]interface{}{ + "x": x, + "y": y, + } + // Add duration if available + if duration := action.ActionOptions.Duration; duration > 0 { + arguments["duration"] = duration + } + } else { + return mcp.CallToolRequest{}, fmt.Errorf("invalid tap abs params: %v", action.Params) + } + return mcp.CallToolRequest{ + Params: struct { + Name string `json:"name"` + Arguments map[string]any `json:"arguments,omitempty"` + Meta *struct { + ProgressToken mcp.ProgressToken `json:"progressToken,omitempty"` + } `json:"_meta,omitempty"` + }{ + Name: "tap_abs_xy", + Arguments: arguments, + }, + }, nil + + case option.ACTION_TapByOCR: + if text, ok := action.Params.(string); ok { + arguments = map[string]interface{}{ + "text": text, + } + } else { + return mcp.CallToolRequest{}, fmt.Errorf("invalid tap by OCR params: %v", action.Params) + } + return mcp.CallToolRequest{ + Params: struct { + Name string `json:"name"` + Arguments map[string]any `json:"arguments,omitempty"` + Meta *struct { + ProgressToken mcp.ProgressToken `json:"progressToken,omitempty"` + } `json:"_meta,omitempty"` + }{ + Name: "tap_by_ocr", + Arguments: arguments, + }, + }, nil + + case option.ACTION_TapByCV: + // For TapByCV, the original action might not have params but relies on options + arguments = map[string]interface{}{ + "imagePath": "", // Will be handled by the tool based on UI types + } + return mcp.CallToolRequest{ + Params: struct { + Name string `json:"name"` + Arguments map[string]any `json:"arguments,omitempty"` + Meta *struct { + ProgressToken mcp.ProgressToken `json:"progressToken,omitempty"` + } `json:"_meta,omitempty"` + }{ + Name: "tap_by_cv", + Arguments: arguments, + }, + }, nil + + case option.ACTION_DoubleTapXY: + if params, err := builtin.ConvertToFloat64Slice(action.Params); err == nil && len(params) == 2 { + x, y := params[0], params[1] + arguments = map[string]interface{}{ + "x": x, + "y": y, + } + } else { + return mcp.CallToolRequest{}, fmt.Errorf("invalid double tap params: %v", action.Params) + } + return mcp.CallToolRequest{ + Params: struct { + Name string `json:"name"` + Arguments map[string]any `json:"arguments,omitempty"` + Meta *struct { + ProgressToken mcp.ProgressToken `json:"progressToken,omitempty"` + } `json:"_meta,omitempty"` + }{ + Name: "double_tap_xy", + Arguments: arguments, + }, + }, nil + + case option.ACTION_Swipe: + // Handle different types of swipe params + switch params := action.Params.(type) { + case string: + // Direction swipe like "up", "down", "left", "right" + arguments = map[string]interface{}{ + "direction": params, + } + // Add duration and press duration from options + if duration := action.ActionOptions.Duration; duration > 0 { + arguments["duration"] = duration + } + if pressDuration := action.ActionOptions.PressDuration; pressDuration > 0 { + arguments["pressDuration"] = pressDuration + } + return mcp.CallToolRequest{ + Params: struct { + Name string `json:"name"` + Arguments map[string]any `json:"arguments,omitempty"` + Meta *struct { + ProgressToken mcp.ProgressToken `json:"progressToken,omitempty"` + } `json:"_meta,omitempty"` + }{ + Name: "swipe", + Arguments: arguments, + }, + }, nil + default: + // Advanced swipe with coordinates + if paramSlice, err := builtin.ConvertToFloat64Slice(params); err == nil && len(paramSlice) == 4 { + arguments = map[string]interface{}{ + "fromX": paramSlice[0], + "fromY": paramSlice[1], + "toX": paramSlice[2], + "toY": paramSlice[3], + } + // Add duration and press duration from options + if duration := action.ActionOptions.Duration; duration > 0 { + arguments["duration"] = duration + } + if pressDuration := action.ActionOptions.PressDuration; pressDuration > 0 { + arguments["pressDuration"] = pressDuration + } + return mcp.CallToolRequest{ + Params: struct { + Name string `json:"name"` + Arguments map[string]any `json:"arguments,omitempty"` + Meta *struct { + ProgressToken mcp.ProgressToken `json:"progressToken,omitempty"` + } `json:"_meta,omitempty"` + }{ + Name: "swipe_advanced", + Arguments: arguments, + }, + }, nil + } + } + return mcp.CallToolRequest{}, fmt.Errorf("invalid swipe params: %v", action.Params) + + case option.ACTION_Input: + text := fmt.Sprintf("%v", action.Params) + arguments = map[string]interface{}{ + "text": text, + } + return mcp.CallToolRequest{ + Params: struct { + Name string `json:"name"` + Arguments map[string]any `json:"arguments,omitempty"` + Meta *struct { + ProgressToken mcp.ProgressToken `json:"progressToken,omitempty"` + } `json:"_meta,omitempty"` + }{ + Name: "input", + Arguments: arguments, + }, + }, nil + + case option.ACTION_Back: + return mcp.CallToolRequest{ + Params: struct { + Name string `json:"name"` + Arguments map[string]any `json:"arguments,omitempty"` + Meta *struct { + ProgressToken mcp.ProgressToken `json:"progressToken,omitempty"` + } `json:"_meta,omitempty"` + }{ + Name: "back", + Arguments: map[string]interface{}{}, + }, + }, nil + + case option.ACTION_Sleep: + arguments = map[string]interface{}{ + "seconds": action.Params, + } + return mcp.CallToolRequest{ + Params: struct { + Name string `json:"name"` + Arguments map[string]any `json:"arguments,omitempty"` + Meta *struct { + ProgressToken mcp.ProgressToken `json:"progressToken,omitempty"` + } `json:"_meta,omitempty"` + }{ + Name: "sleep", + Arguments: arguments, + }, + }, nil + + case option.ACTION_SleepMS: + var milliseconds int64 + if param, ok := action.Params.(json.Number); ok { + milliseconds, _ = param.Int64() + } else if param, ok := action.Params.(int64); ok { + milliseconds = param + } else { + return mcp.CallToolRequest{}, fmt.Errorf("invalid sleep ms params: %v", action.Params) + } + arguments = map[string]interface{}{ + "milliseconds": milliseconds, + } + return mcp.CallToolRequest{ + Params: struct { + Name string `json:"name"` + Arguments map[string]any `json:"arguments,omitempty"` + Meta *struct { + ProgressToken mcp.ProgressToken `json:"progressToken,omitempty"` + } `json:"_meta,omitempty"` + }{ + Name: "sleep_ms", + Arguments: arguments, + }, + }, nil + + case option.ACTION_SleepRandom: + if params, err := builtin.ConvertToFloat64Slice(action.Params); err == nil { + arguments = map[string]interface{}{ + "params": params, + } + } else { + return mcp.CallToolRequest{}, fmt.Errorf("invalid sleep random params: %v", action.Params) + } + return mcp.CallToolRequest{ + Params: struct { + Name string `json:"name"` + Arguments map[string]any `json:"arguments,omitempty"` + Meta *struct { + ProgressToken mcp.ProgressToken `json:"progressToken,omitempty"` + } `json:"_meta,omitempty"` + }{ + Name: "sleep_random", + Arguments: arguments, + }, + }, nil + + case option.ACTION_ScreenShot: + return mcp.CallToolRequest{ + Params: struct { + Name string `json:"name"` + Arguments map[string]any `json:"arguments,omitempty"` + Meta *struct { + ProgressToken mcp.ProgressToken `json:"progressToken,omitempty"` + } `json:"_meta,omitempty"` + }{ + Name: "screenshot", + Arguments: map[string]interface{}{}, + }, + }, nil + + case option.ACTION_ClosePopups: + return mcp.CallToolRequest{ + Params: struct { + Name string `json:"name"` + Arguments map[string]any `json:"arguments,omitempty"` + Meta *struct { + ProgressToken mcp.ProgressToken `json:"progressToken,omitempty"` + } `json:"_meta,omitempty"` + }{ + Name: "close_popups", + Arguments: map[string]interface{}{}, + }, + }, nil + + case option.ACTION_CallFunction: + if description, ok := action.Params.(string); ok { + arguments = map[string]interface{}{ + "description": description, + } + } else { + return mcp.CallToolRequest{}, fmt.Errorf("invalid call function params: %v", action.Params) + } + return mcp.CallToolRequest{ + Params: struct { + Name string `json:"name"` + Arguments map[string]any `json:"arguments,omitempty"` + Meta *struct { + ProgressToken mcp.ProgressToken `json:"progressToken,omitempty"` + } `json:"_meta,omitempty"` + }{ + Name: "call_function", + Arguments: arguments, + }, + }, nil + + case option.ACTION_AIAction: + if prompt, ok := action.Params.(string); ok { + arguments = map[string]interface{}{ + "prompt": prompt, + } + } else { + return mcp.CallToolRequest{}, fmt.Errorf("invalid AI action params: %v", action.Params) + } + return mcp.CallToolRequest{ + Params: struct { + Name string `json:"name"` + Arguments map[string]any `json:"arguments,omitempty"` + Meta *struct { + ProgressToken mcp.ProgressToken `json:"progressToken,omitempty"` + } `json:"_meta,omitempty"` + }{ + Name: "ai_action", + Arguments: arguments, + }, + }, nil + + default: + return mcp.CallToolRequest{}, fmt.Errorf("unsupported action method: %s", action.Method) } - _, err = dExt.client.CallTool(context.Background(), req) - return err }