feat: add uixt tool press_button

This commit is contained in:
lilong.129
2025-05-21 17:25:17 +08:00
parent 5c68760cca
commit 7724cf0062
9 changed files with 79 additions and 6 deletions

View File

@@ -1 +1 @@
v5.0.0-beta-2505211651 v5.0.0-beta-2505211725

View File

@@ -143,9 +143,19 @@ func (ums *MCPServer4XTDriver) addTools() {
ums.tools = append(ums.tools, getScreenSizeTool) ums.tools = append(ums.tools, getScreenSizeTool)
ums.handlerMap[getScreenSizeTool.Name] = ums.handleGetScreenSize ums.handlerMap[getScreenSizeTool.Name] = ums.handleGetScreenSize
// PressButton Tool
pressButtonParams := append(
[]mcp.ToolOption{mcp.WithDescription("Press a button on device")},
commonToolOptions...,
)
pressButtonTool := mcp.NewTool("press_button", pressButtonParams...)
ums.mcpServer.AddTool(pressButtonTool, ums.handlePressButton)
ums.tools = append(ums.tools, pressButtonTool)
ums.handlerMap[pressButtonTool.Name] = ums.handlePressButton
// TapXY Tool // TapXY Tool
tapParams := append( tapParams := append(
[]mcp.ToolOption{mcp.WithDescription("Taps on the device screen at the given coordinates.")}, []mcp.ToolOption{mcp.WithDescription("Click on the screen at given x,y coordinates")},
commonToolOptions..., commonToolOptions...,
) )
tapParams = append(tapParams, generateMCPOptions(types.TapRequest{})...) tapParams = append(tapParams, generateMCPOptions(types.TapRequest{})...)
@@ -294,6 +304,23 @@ func (ums *MCPServer4XTDriver) handleGetScreenSize(ctx context.Context, request
), nil ), nil
} }
// handlePressButton handles the press_button tool call.
func (ums *MCPServer4XTDriver) handlePressButton(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
driverExt, err := ums.setupXTDriver(ctx, request.Params.Arguments)
if err != nil {
return nil, err
}
var pressButtonReq types.PressButtonRequest
if err := mapToStruct(request.Params.Arguments, &pressButtonReq); err != nil {
return mcp.NewToolResultError("parse parameters error: " + err.Error()), nil
}
err = driverExt.PressButton(pressButtonReq.Button)
if err != nil {
return mcp.NewToolResultError("Press button failed: " + err.Error()), nil
}
return mcp.NewToolResultText(fmt.Sprintf("Pressed button: %s", pressButtonReq.Button)), nil
}
// handleTapXY handles the tap_xy tool call. // handleTapXY handles the tap_xy tool call.
func (ums *MCPServer4XTDriver) handleTapXY(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { func (ums *MCPServer4XTDriver) handleTapXY(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
driverExt, err := ums.setupXTDriver(ctx, request.Params.Arguments) driverExt, err := ums.setupXTDriver(ctx, request.Params.Arguments)

View File

@@ -936,6 +936,23 @@ func (ad *ADBDriver) OpenUrl(url string) (err error) {
return return
} }
var androidButtonMap = map[types.DeviceButton]string{
types.DeviceButtonBack: "KEYCODE_BACK",
types.DeviceButtonHome: "KEYCODE_HOME",
types.DeviceButtonEnter: "KEYCODE_ENTER",
types.DeviceButtonVolumeUp: "KEYCODE_VOLUME_UP",
types.DeviceButtonVolumeDown: "KEYCODE_VOLUME_DOWN",
}
func (ad *ADBDriver) PressButton(button types.DeviceButton) error {
buttonName, ok := androidButtonMap[button]
if !ok {
return fmt.Errorf("unsupported button: %s", button)
}
_, err := ad.runShellCommand("input", "keyevent", buttonName)
return err
}
func (ad *ADBDriver) PushImage(localPath string) error { func (ad *ADBDriver) PushImage(localPath string) error {
log.Info().Str("localPath", localPath).Msg("ADBDriver.PushImage") log.Info().Str("localPath", localPath).Msg("ADBDriver.PushImage")
remoteDir := "/sdcard/DCIM/Camera/" remoteDir := "/sdcard/DCIM/Camera/"

View File

@@ -610,7 +610,7 @@ func (wd *BrowserDriver) PressBack(options ...option.ActionOption) error {
return err return err
} }
func (wd *BrowserDriver) PressKeyCode(keyCode KeyCode) (err error) { func (wd *BrowserDriver) PressButton(button types.DeviceButton) error {
return errors.New("not support") return errors.New("not support")
} }

View File

@@ -50,6 +50,8 @@ type IDriver interface {
Home() error Home() error
Unlock() error Unlock() error
Back() error Back() error
PressButton(button types.DeviceButton) error
// hover // hover
HoverBySelector(selector string, opts ...option.ActionOption) error HoverBySelector(selector string, opts ...option.ActionOption) error
// tap // tap

View File

@@ -231,6 +231,22 @@ func (hd *HDCDriver) PressHarmonyKeyCode(keyCode ghdc.KeyCode) (err error) {
return hd.uiDriver.PressKey(keyCode) return hd.uiDriver.PressKey(keyCode)
} }
var harmonyButtonMap = map[types.DeviceButton]ghdc.KeyCode{
types.DeviceButtonBack: ghdc.KEYCODE_BACK,
types.DeviceButtonHome: ghdc.KEYCODE_HOME,
types.DeviceButtonEnter: ghdc.KEYCODE_ENTER,
types.DeviceButtonVolumeUp: ghdc.KEYCODE_VOLUME_UP,
types.DeviceButtonVolumeDown: ghdc.KEYCODE_VOLUME_DOWN,
}
func (hd *HDCDriver) PressButton(button types.DeviceButton) (err error) {
keyCode, ok := harmonyButtonMap[button]
if !ok {
return fmt.Errorf("unsupported button: %s", button)
}
return hd.uiDriver.PressKey(keyCode)
}
func (hd *HDCDriver) ScreenShot(opts ...option.ActionOption) (*bytes.Buffer, error) { func (hd *HDCDriver) ScreenShot(opts ...option.ActionOption) (*bytes.Buffer, error) {
tempDir := os.TempDir() tempDir := os.TempDir()
screenshotPath := fmt.Sprintf("%s/screenshot_%d.png", tempDir, time.Now().Unix()) screenshotPath := fmt.Sprintf("%s/screenshot_%d.png", tempDir, time.Now().Unix())

View File

@@ -744,9 +744,14 @@ func (wd *WDADriver) Back() (err error) {
return wd.Swipe(0, 0.5, 0.6, 0.5) return wd.Swipe(0, 0.5, 0.6, 0.5)
} }
func (wd *WDADriver) PressButton(devBtn types.DeviceButton) (err error) { func (wd *WDADriver) PressButton(button types.DeviceButton) (err error) {
// [[FBRoute POST:@"/wda/pressButton"] respondWithTarget:self action:@selector(handlePressButtonCommand:)] // [[FBRoute POST:@"/wda/pressButton"] respondWithTarget:self action:@selector(handlePressButtonCommand:)]
data := map[string]interface{}{"name": devBtn}
if button == types.DeviceButtonEnter {
return wd.Input("\n")
}
data := map[string]interface{}{"name": button}
urlStr := fmt.Sprintf("/session/%s/wda/pressButton", wd.Session.ID) urlStr := fmt.Sprintf("/session/%s/wda/pressButton", wd.Session.ID)
_, err = wd.Session.POST(data, urlStr) _, err = wd.Session.POST(data, urlStr)
return return

View File

@@ -174,13 +174,15 @@ func (bs BatteryStatus) String() string {
} }
} }
// DeviceButton A physical button on an iOS device. // DeviceButton A physical button on a device.
type DeviceButton string type DeviceButton string
const ( const (
DeviceButtonHome DeviceButton = "home" DeviceButtonHome DeviceButton = "home"
DeviceButtonVolumeUp DeviceButton = "volumeUp" DeviceButtonVolumeUp DeviceButton = "volumeUp"
DeviceButtonVolumeDown DeviceButton = "volumeDown" DeviceButtonVolumeDown DeviceButton = "volumeDown"
DeviceButtonEnter DeviceButton = "enter" // use "\n" for ios
DeviceButtonBack DeviceButton = "back" // android only
) )
type NotificationType string type NotificationType string

View File

@@ -26,3 +26,7 @@ type AppLaunchRequest struct {
type AppTerminateRequest struct { type AppTerminateRequest struct {
PackageName string `json:"packageName" binding:"required" desc:"The package name of the app to terminate"` PackageName string `json:"packageName" binding:"required" desc:"The package name of the app to terminate"`
} }
type PressButtonRequest struct {
Button DeviceButton `json:"button" binding:"required" desc:"The button to press. Supported buttons: BACK (android only), HOME, VOLUME_UP, VOLUME_DOWN, ENTER."`
}