diff --git a/internal/version/VERSION b/internal/version/VERSION index 0fb5fd3b..781ed3db 100644 --- a/internal/version/VERSION +++ b/internal/version/VERSION @@ -1 +1 @@ -v5.0.0-beta-2505211651 +v5.0.0-beta-2505211725 diff --git a/mcphost/mcp_server.go b/mcphost/mcp_server.go index 1e4fb3a7..b8d211b0 100644 --- a/mcphost/mcp_server.go +++ b/mcphost/mcp_server.go @@ -143,9 +143,19 @@ func (ums *MCPServer4XTDriver) addTools() { ums.tools = append(ums.tools, getScreenSizeTool) ums.handlerMap[getScreenSizeTool.Name] = ums.handleGetScreenSize + // PressButton Tool + pressButtonParams := append( + []mcp.ToolOption{mcp.WithDescription("Press a button on device")}, + commonToolOptions..., + ) + pressButtonTool := mcp.NewTool("press_button", pressButtonParams...) + ums.mcpServer.AddTool(pressButtonTool, ums.handlePressButton) + ums.tools = append(ums.tools, pressButtonTool) + ums.handlerMap[pressButtonTool.Name] = ums.handlePressButton + // TapXY Tool tapParams := append( - []mcp.ToolOption{mcp.WithDescription("Taps on the device screen at the given coordinates.")}, + []mcp.ToolOption{mcp.WithDescription("Click on the screen at given x,y coordinates")}, commonToolOptions..., ) tapParams = append(tapParams, generateMCPOptions(types.TapRequest{})...) @@ -294,6 +304,23 @@ func (ums *MCPServer4XTDriver) handleGetScreenSize(ctx context.Context, request ), nil } +// handlePressButton handles the press_button tool call. +func (ums *MCPServer4XTDriver) handlePressButton(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + driverExt, err := ums.setupXTDriver(ctx, request.Params.Arguments) + if err != nil { + return nil, err + } + var pressButtonReq types.PressButtonRequest + if err := mapToStruct(request.Params.Arguments, &pressButtonReq); err != nil { + return mcp.NewToolResultError("parse parameters error: " + err.Error()), nil + } + err = driverExt.PressButton(pressButtonReq.Button) + if err != nil { + return mcp.NewToolResultError("Press button failed: " + err.Error()), nil + } + return mcp.NewToolResultText(fmt.Sprintf("Pressed button: %s", pressButtonReq.Button)), nil +} + // handleTapXY handles the tap_xy tool call. func (ums *MCPServer4XTDriver) handleTapXY(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { driverExt, err := ums.setupXTDriver(ctx, request.Params.Arguments) diff --git a/uixt/android_driver_adb.go b/uixt/android_driver_adb.go index fbfe3d9b..bdc7035c 100644 --- a/uixt/android_driver_adb.go +++ b/uixt/android_driver_adb.go @@ -936,6 +936,23 @@ func (ad *ADBDriver) OpenUrl(url string) (err error) { return } +var androidButtonMap = map[types.DeviceButton]string{ + types.DeviceButtonBack: "KEYCODE_BACK", + types.DeviceButtonHome: "KEYCODE_HOME", + types.DeviceButtonEnter: "KEYCODE_ENTER", + types.DeviceButtonVolumeUp: "KEYCODE_VOLUME_UP", + types.DeviceButtonVolumeDown: "KEYCODE_VOLUME_DOWN", +} + +func (ad *ADBDriver) PressButton(button types.DeviceButton) error { + buttonName, ok := androidButtonMap[button] + if !ok { + return fmt.Errorf("unsupported button: %s", button) + } + _, err := ad.runShellCommand("input", "keyevent", buttonName) + return err +} + func (ad *ADBDriver) PushImage(localPath string) error { log.Info().Str("localPath", localPath).Msg("ADBDriver.PushImage") remoteDir := "/sdcard/DCIM/Camera/" diff --git a/uixt/browser_driver.go b/uixt/browser_driver.go index a0e349ef..bdf09e5d 100644 --- a/uixt/browser_driver.go +++ b/uixt/browser_driver.go @@ -610,7 +610,7 @@ func (wd *BrowserDriver) PressBack(options ...option.ActionOption) error { return err } -func (wd *BrowserDriver) PressKeyCode(keyCode KeyCode) (err error) { +func (wd *BrowserDriver) PressButton(button types.DeviceButton) error { return errors.New("not support") } diff --git a/uixt/driver.go b/uixt/driver.go index 72d5ab06..3731bd3b 100644 --- a/uixt/driver.go +++ b/uixt/driver.go @@ -50,6 +50,8 @@ type IDriver interface { Home() error Unlock() error Back() error + PressButton(button types.DeviceButton) error + // hover HoverBySelector(selector string, opts ...option.ActionOption) error // tap diff --git a/uixt/harmony_driver_hdc.go b/uixt/harmony_driver_hdc.go index cef8636d..8a578c01 100644 --- a/uixt/harmony_driver_hdc.go +++ b/uixt/harmony_driver_hdc.go @@ -231,6 +231,22 @@ func (hd *HDCDriver) PressHarmonyKeyCode(keyCode ghdc.KeyCode) (err error) { return hd.uiDriver.PressKey(keyCode) } +var harmonyButtonMap = map[types.DeviceButton]ghdc.KeyCode{ + types.DeviceButtonBack: ghdc.KEYCODE_BACK, + types.DeviceButtonHome: ghdc.KEYCODE_HOME, + types.DeviceButtonEnter: ghdc.KEYCODE_ENTER, + types.DeviceButtonVolumeUp: ghdc.KEYCODE_VOLUME_UP, + types.DeviceButtonVolumeDown: ghdc.KEYCODE_VOLUME_DOWN, +} + +func (hd *HDCDriver) PressButton(button types.DeviceButton) (err error) { + keyCode, ok := harmonyButtonMap[button] + if !ok { + return fmt.Errorf("unsupported button: %s", button) + } + return hd.uiDriver.PressKey(keyCode) +} + func (hd *HDCDriver) ScreenShot(opts ...option.ActionOption) (*bytes.Buffer, error) { tempDir := os.TempDir() screenshotPath := fmt.Sprintf("%s/screenshot_%d.png", tempDir, time.Now().Unix()) diff --git a/uixt/ios_driver_wda.go b/uixt/ios_driver_wda.go index a0708ea9..3595a667 100644 --- a/uixt/ios_driver_wda.go +++ b/uixt/ios_driver_wda.go @@ -744,9 +744,14 @@ func (wd *WDADriver) Back() (err error) { return wd.Swipe(0, 0.5, 0.6, 0.5) } -func (wd *WDADriver) PressButton(devBtn types.DeviceButton) (err error) { +func (wd *WDADriver) PressButton(button types.DeviceButton) (err error) { // [[FBRoute POST:@"/wda/pressButton"] respondWithTarget:self action:@selector(handlePressButtonCommand:)] - data := map[string]interface{}{"name": devBtn} + + if button == types.DeviceButtonEnter { + return wd.Input("\n") + } + + data := map[string]interface{}{"name": button} urlStr := fmt.Sprintf("/session/%s/wda/pressButton", wd.Session.ID) _, err = wd.Session.POST(data, urlStr) return diff --git a/uixt/types/device.go b/uixt/types/device.go index d5fa9e84..1e47c6a0 100644 --- a/uixt/types/device.go +++ b/uixt/types/device.go @@ -174,13 +174,15 @@ func (bs BatteryStatus) String() string { } } -// DeviceButton A physical button on an iOS device. +// DeviceButton A physical button on a device. type DeviceButton string const ( DeviceButtonHome DeviceButton = "home" DeviceButtonVolumeUp DeviceButton = "volumeUp" DeviceButtonVolumeDown DeviceButton = "volumeDown" + DeviceButtonEnter DeviceButton = "enter" // use "\n" for ios + DeviceButtonBack DeviceButton = "back" // android only ) type NotificationType string diff --git a/uixt/types/request.go b/uixt/types/request.go index 868f80d9..b9167f04 100644 --- a/uixt/types/request.go +++ b/uixt/types/request.go @@ -26,3 +26,7 @@ type AppLaunchRequest struct { type AppTerminateRequest struct { PackageName string `json:"packageName" binding:"required" desc:"The package name of the app to terminate"` } + +type PressButtonRequest struct { + Button DeviceButton `json:"button" binding:"required" desc:"The button to press. Supported buttons: BACK (android only), HOME, VOLUME_UP, VOLUME_DOWN, ENTER."` +}