diff --git a/internal/version/VERSION b/internal/version/VERSION index 809297ca..62291b36 100644 --- a/internal/version/VERSION +++ b/internal/version/VERSION @@ -1 +1 @@ -v5.0.0-beta-2505262232 +v5.0.0-beta-2505262239 diff --git a/uixt/mcp_server.go b/uixt/mcp_server.go index 5a979776..18bd0395 100644 --- a/uixt/mcp_server.go +++ b/uixt/mcp_server.go @@ -101,6 +101,7 @@ func (s *MCPServer4XTDriver) registerTools() { s.registerTool(&ToolDoubleTapXY{}) // double tap xy // Swipe Tool + s.registerTool(&ToolSwipe{}) // generic swipe, auto-detect direction or coordinate s.registerTool(&ToolSwipeDirection{}) // swipe direction, up/down/left/right s.registerTool(&ToolSwipeCoordinate{}) // swipe coordinate, [fromX, fromY, toX, toY] s.registerTool(&ToolSwipeToTapApp{}) @@ -881,6 +882,175 @@ func (t *ToolPressButton) ConvertActionToCallToolRequest(action MobileAction) (m return mcp.CallToolRequest{}, fmt.Errorf("invalid press button params: %v", action.Params) } +// ToolSwipe implements the generic swipe tool call. +// It automatically determines whether to use direction-based or coordinate-based swipe +// based on the params type. +type ToolSwipe struct{} + +func (t *ToolSwipe) Name() option.ActionMethod { + return option.ACTION_Swipe +} + +func (t *ToolSwipe) Description() string { + return "Swipe on the screen by direction (up/down/left/right) or coordinates [fromX, fromY, toX, toY]" +} + +func (t *ToolSwipe) Options() []mcp.ToolOption { + // Combine options from both direction and coordinate swipe + directionOptions := option.NewMCPOptions(option.SwipeRequest{}) + coordinateOptions := option.NewMCPOptions(option.SwipeAdvancedRequest{}) + + // Merge the options + allOptions := make([]mcp.ToolOption, 0, len(directionOptions)+len(coordinateOptions)) + allOptions = append(allOptions, directionOptions...) + allOptions = append(allOptions, coordinateOptions...) + + return allOptions +} + +func (t *ToolSwipe) Implement() server.ToolHandlerFunc { + return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + driverExt, err := setupXTDriver(ctx, request.Params.Arguments) + if err != nil { + return nil, fmt.Errorf("setup driver failed: %w", err) + } + + // Check if it's direction-based swipe (has "direction" parameter) + if direction, exists := request.Params.Arguments["direction"]; exists { + // Direction-based swipe + directionStr, ok := direction.(string) + if !ok { + return nil, fmt.Errorf("direction parameter must be a string") + } + + // Validate direction + validDirections := []string{"up", "down", "left", "right"} + isValid := false + for _, validDir := range validDirections { + if directionStr == validDir { + isValid = true + break + } + } + if !isValid { + return nil, fmt.Errorf("invalid swipe direction: %s, expected one of: %v", directionStr, validDirections) + } + + log.Info().Str("direction", directionStr).Msg("performing direction-based swipe") + + // Extract duration and press duration + var duration, pressDuration float64 + if d, exists := request.Params.Arguments["duration"]; exists { + if dFloat, ok := d.(float64); ok { + duration = dFloat + } + } + if pd, exists := request.Params.Arguments["pressDuration"]; exists { + if pdFloat, ok := pd.(float64); ok { + pressDuration = pdFloat + } + } + + opts := []option.ActionOption{ + option.WithPreMarkOperation(true), + } + if duration > 0 { + opts = append(opts, option.WithDuration(duration)) + } + if pressDuration > 0 { + opts = append(opts, option.WithPressDuration(pressDuration)) + } + + // Convert direction to coordinates and perform swipe + switch directionStr { + case "up": + err = driverExt.Swipe(0.5, 0.5, 0.5, 0.1, opts...) + case "down": + err = driverExt.Swipe(0.5, 0.5, 0.5, 0.9, opts...) + case "left": + err = driverExt.Swipe(0.5, 0.5, 0.1, 0.5, opts...) + case "right": + err = driverExt.Swipe(0.5, 0.5, 0.9, 0.5, opts...) + default: + return mcp.NewToolResultError(fmt.Sprintf("Unexpected swipe direction: %s", directionStr)), nil + } + + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("Direction swipe failed: %s", err.Error())), nil + } + + return mcp.NewToolResultText(fmt.Sprintf("Successfully swiped %s", directionStr)), nil + + } else { + // Coordinate-based swipe + var swipeAdvReq option.SwipeAdvancedRequest + if err := mapToStruct(request.Params.Arguments, &swipeAdvReq); err != nil { + return nil, fmt.Errorf("parse parameters error: %w", err) + } + + log.Info(). + Float64("fromX", swipeAdvReq.FromX).Float64("fromY", swipeAdvReq.FromY). + Float64("toX", swipeAdvReq.ToX).Float64("toY", swipeAdvReq.ToY). + Msg("performing coordinate-based swipe") + + params := []float64{swipeAdvReq.FromX, swipeAdvReq.FromY, swipeAdvReq.ToX, swipeAdvReq.ToY} + opts := []option.ActionOption{} + if swipeAdvReq.Duration > 0 { + opts = append(opts, option.WithDuration(swipeAdvReq.Duration)) + } + if swipeAdvReq.PressDuration > 0 { + opts = append(opts, option.WithPressDuration(swipeAdvReq.PressDuration)) + } + + swipeAction := prepareSwipeAction(driverExt, params, opts...) + err = swipeAction(driverExt) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("Coordinate swipe failed: %s", err.Error())), nil + } + + return mcp.NewToolResultText(fmt.Sprintf("Successfully performed coordinate swipe from (%.2f, %.2f) to (%.2f, %.2f)", + swipeAdvReq.FromX, swipeAdvReq.FromY, swipeAdvReq.ToX, swipeAdvReq.ToY)), nil + } + } +} + +func (t *ToolSwipe) ConvertActionToCallToolRequest(action MobileAction) (mcp.CallToolRequest, error) { + // Check if params is a string (direction-based swipe) + if direction, ok := action.Params.(string); ok { + arguments := map[string]any{ + "direction": direction, + } + // Add duration and press duration from options + if duration := action.ActionOptions.Duration; duration > 0 { + arguments["duration"] = duration + } + if pressDuration := action.ActionOptions.PressDuration; pressDuration > 0 { + arguments["pressDuration"] = pressDuration + } + return buildMCPCallToolRequest(t.Name(), arguments), nil + } + + // Check if params is a coordinate array (coordinate-based swipe) + if paramSlice, err := builtin.ConvertToFloat64Slice(action.Params); err == nil && len(paramSlice) == 4 { + arguments := map[string]any{ + "fromX": paramSlice[0], + "fromY": paramSlice[1], + "toX": paramSlice[2], + "toY": paramSlice[3], + } + // Add duration and press duration from options + if duration := action.ActionOptions.Duration; duration > 0 { + arguments["duration"] = duration + } + if pressDuration := action.ActionOptions.PressDuration; pressDuration > 0 { + arguments["pressDuration"] = pressDuration + } + return buildMCPCallToolRequest(t.Name(), arguments), nil + } + + return mcp.CallToolRequest{}, fmt.Errorf("invalid swipe params: %v, expected string direction or [fromX, fromY, toX, toY] coordinates", action.Params) +} + // ToolSwipeDirection implements the swipe tool call. type ToolSwipeDirection struct{} diff --git a/uixt/mcp_server_test.go b/uixt/mcp_server_test.go index 909de39e..ade4b246 100644 --- a/uixt/mcp_server_test.go +++ b/uixt/mcp_server_test.go @@ -25,6 +25,7 @@ func TestNewMCPServer(t *testing.T) { "tap_ocr", "tap_cv", "double_tap_xy", + "swipe", "swipe_direction", "swipe_coordinate", "swipe_to_tap_app", @@ -79,6 +80,7 @@ func TestToolInterfaces(t *testing.T) { &ToolTapByOCR{}, &ToolTapByCV{}, &ToolDoubleTapXY{}, + &ToolSwipe{}, &ToolSwipeDirection{}, &ToolSwipeCoordinate{}, &ToolSwipeToTapApp{}, @@ -423,6 +425,72 @@ func TestToolDoubleTapXY(t *testing.T) { assert.Error(t, err) } +// TestToolSwipe tests the ToolSwipe implementation +func TestToolSwipe(t *testing.T) { + tool := &ToolSwipe{} + + // Test Name + assert.Equal(t, option.ACTION_Swipe, tool.Name()) + + // Test Description + assert.NotEmpty(t, tool.Description()) + + // Test Options + options := tool.Options() + assert.NotNil(t, options) + + // Test ConvertActionToCallToolRequest with direction params (string) + directionAction := MobileAction{ + Method: option.ACTION_Swipe, + Params: "up", + ActionOptions: option.ActionOptions{ + Duration: 1.5, + PressDuration: 0.5, + }, + } + request, err := tool.ConvertActionToCallToolRequest(directionAction) + assert.NoError(t, err) + assert.Equal(t, string(option.ACTION_Swipe), request.Params.Name) + assert.Equal(t, "up", request.Params.Arguments["direction"]) + assert.Equal(t, 1.5, request.Params.Arguments["duration"]) + assert.Equal(t, 0.5, request.Params.Arguments["pressDuration"]) + + // Test ConvertActionToCallToolRequest with coordinate params ([]float64) + coordinateAction := MobileAction{ + Method: option.ACTION_Swipe, + Params: []float64{0.1, 0.2, 0.8, 0.9}, + ActionOptions: option.ActionOptions{ + Duration: 2.0, + PressDuration: 1.0, + }, + } + request, err = tool.ConvertActionToCallToolRequest(coordinateAction) + assert.NoError(t, err) + assert.Equal(t, string(option.ACTION_Swipe), request.Params.Name) + assert.Equal(t, 0.1, request.Params.Arguments["fromX"]) + assert.Equal(t, 0.2, request.Params.Arguments["fromY"]) + assert.Equal(t, 0.8, request.Params.Arguments["toX"]) + assert.Equal(t, 0.9, request.Params.Arguments["toY"]) + assert.Equal(t, 2.0, request.Params.Arguments["duration"]) + assert.Equal(t, 1.0, request.Params.Arguments["pressDuration"]) + + // Test ConvertActionToCallToolRequest with invalid params + invalidAction := MobileAction{ + Method: option.ACTION_Swipe, + Params: 123, // should be string or []float64 + } + _, err = tool.ConvertActionToCallToolRequest(invalidAction) + assert.Error(t, err) + + // Test ConvertActionToCallToolRequest with incomplete coordinate params + incompleteAction := MobileAction{ + Method: option.ACTION_Swipe, + Params: []float64{0.1, 0.2}, // missing toX and toY + } + _, err = tool.ConvertActionToCallToolRequest(incompleteAction) + assert.Error(t, err) +} + // TestToolSwipeDirection tests the ToolSwipeDirection implementation func TestToolSwipeDirection(t *testing.T) { tool := &ToolSwipeDirection{} diff --git a/uixt/option/action.go b/uixt/option/action.go index f1d1691b..75385178 100644 --- a/uixt/option/action.go +++ b/uixt/option/action.go @@ -37,6 +37,7 @@ const ( ACTION_TapByOCR ActionMethod = "tap_ocr" ACTION_TapByCV ActionMethod = "tap_cv" ACTION_DoubleTapXY ActionMethod = "double_tap_xy" + ACTION_Swipe ActionMethod = "swipe" // swipe by direction or coordinates ACTION_SwipeDirection ActionMethod = "swipe_direction" // swipe by direction (up, down, left, right) ACTION_SwipeCoordinate ActionMethod = "swipe_coordinate" // swipe by coordinates (fromX, fromY, toX, toY) ACTION_Drag ActionMethod = "drag"