refactor: move server models to uixt/types

This commit is contained in:
lilong.129
2025-05-20 14:21:18 +08:00
parent 2f48a92f7f
commit 5066c64368
6 changed files with 52 additions and 29 deletions

View File

@@ -1,255 +0,0 @@
package server
import (
"context"
"encoding/base64"
"encoding/json"
"fmt"
"reflect"
"strings"
"sync"
"github.com/httprunner/httprunner/v5/internal/version"
"github.com/httprunner/httprunner/v5/uixt"
"github.com/httprunner/httprunner/v5/uixt/option"
"github.com/mark3labs/mcp-go/mcp"
"github.com/mark3labs/mcp-go/server"
"github.com/rs/zerolog/log"
)
// MCPServer4XTDriver wraps a MCPServer to expose XTDriver functionality via MCP protocol.
type MCPServer4XTDriver struct {
mcpServer *server.MCPServer
driverCache sync.Map // key is serial, value is *XTDriver
}
// NewMCPServer creates a new MCP server for XTDriver and registers all tools.
func NewMCPServer() *MCPServer4XTDriver {
mcpServer := server.NewMCPServer(
"uixt",
version.GetVersionInfo(),
server.WithToolCapabilities(false),
)
s := &MCPServer4XTDriver{
mcpServer: mcpServer,
}
s.addTools()
return s
}
// Start runs the MCP server (blocking).
func (s *MCPServer4XTDriver) Start() error {
log.Info().Msg("Starting HttpRunner UIXT MCP Server...")
return server.ServeStdio(s.mcpServer)
}
// addTools registers all MCP tools.
func (ums *MCPServer4XTDriver) addTools() {
// TapXY Tool
tapParams := append(
[]mcp.ToolOption{mcp.WithDescription("Taps on the device screen at the given coordinates.")},
commonToolOptions...,
)
tapParams = append(tapParams, generateMCPOptions(TapRequest{})...)
tapXYTool := mcp.NewTool("tap_xy", tapParams...)
ums.mcpServer.AddTool(tapXYTool, ums.handleTapXY)
log.Info().Str("name", tapXYTool.Name).Msg("Register tool")
// Swipe Tool
swipeParams := append(
[]mcp.ToolOption{mcp.WithDescription("Swipes on the device screen from one point to another.")},
commonToolOptions...,
)
swipeParams = append(swipeParams, generateMCPOptions(DragRequest{})...)
swipeTool := mcp.NewTool("swipe", swipeParams...)
ums.mcpServer.AddTool(swipeTool, ums.handleSwipe)
log.Info().Str("name", swipeTool.Name).Msg("Register tool")
// ScreenShot Tool
screenShotTool := mcp.NewTool("screenshot",
mcp.WithDescription("Takes a screenshot of the device screen and returns it as a base64 encoded string."),
)
ums.mcpServer.AddTool(screenShotTool, ums.handleScreenShot)
log.Info().Str("name", screenShotTool.Name).Msg("Register tool")
}
// handleTapXY handles the tap_xy tool call.
func (ums *MCPServer4XTDriver) handleTapXY(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
driverExt, err := ums.setupXTDriver(ctx, request.Params.Arguments)
if err != nil {
return nil, err
}
var tapReq TapRequest
if err := mapToStruct(request.Params.Arguments, &tapReq); err != nil {
return mcp.NewToolResultError("parse parameters error: " + err.Error()), nil
}
if tapReq.Duration > 0 {
err := driverExt.Drag(tapReq.X, tapReq.Y, tapReq.X, tapReq.Y, option.WithDuration(tapReq.Duration))
if err != nil {
return mcp.NewToolResultError("Tap failed: " + err.Error()), nil
}
} else {
err := driverExt.TapXY(tapReq.X, tapReq.Y)
if err != nil {
return mcp.NewToolResultError("Tap failed: " + err.Error()), nil
}
}
return mcp.NewToolResultText("Tap successful."), nil
}
// handleSwipe handles the swipe tool call.
func (ums *MCPServer4XTDriver) handleSwipe(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
driverExt, err := ums.setupXTDriver(ctx, request.Params.Arguments)
if err != nil {
return nil, err
}
var swipeReq DragRequest
if err := mapToStruct(request.Params.Arguments, &swipeReq); err != nil {
return mcp.NewToolResultError("parse parameters error: " + err.Error()), nil
}
actionOptions := []option.ActionOption{}
if swipeReq.Duration > 0 {
actionOptions = append(actionOptions, option.WithDuration(swipeReq.Duration/1000.0))
}
err = driverExt.Swipe(swipeReq.FromX, swipeReq.FromY, swipeReq.ToX, swipeReq.ToY, actionOptions...)
if err != nil {
return mcp.NewToolResultError("Swipe failed: " + err.Error()), nil
}
return mcp.NewToolResultText("Swipe successful."), nil
}
// handleScreenShot handles the screenshot tool call.
func (ums *MCPServer4XTDriver) handleScreenShot(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
log.Info().Msg("Executing ScreenShot")
driverExt, err := ums.setupXTDriver(ctx, request.Params.Arguments)
if err != nil {
return nil, err
}
buffer, err := driverExt.ScreenShot()
if err != nil {
log.Error().Err(err).Msg("ScreenShot failed")
return mcp.NewToolResultError(fmt.Sprintf("Failed to take screenshot: %v", err)), nil
}
if buffer == nil || buffer.Len() == 0 {
log.Error().Msg("Screenshot buffer is nil or empty")
return mcp.NewToolResultError("Screenshot returned empty buffer"), nil
}
encodedString := base64.StdEncoding.EncodeToString(buffer.Bytes())
log.Info().Int("image_size_bytes", len(buffer.Bytes())).Int("base64_len", len(encodedString)).Msg("Screenshot successful")
return mcp.NewToolResultText(encodedString), nil
}
// setupXTDriver initializes an XTDriver based on the platform and serial.
func (ums *MCPServer4XTDriver) setupXTDriver(_ context.Context, args map[string]interface{}) (*uixt.XTDriver, error) {
platform, _ := args["platform"].(string)
serial, _ := args["serial"].(string)
if platform == "" || serial == "" {
return nil, fmt.Errorf("platform and serial are required")
}
// Check if driver exists in cache
cacheKey := fmt.Sprintf("%s_%s", platform, serial)
if cachedDriver, ok := ums.driverCache.Load(cacheKey); ok {
if driverExt, ok := cachedDriver.(*uixt.XTDriver); ok {
log.Info().Str("platform", platform).Str("serial", serial).Msg("Using cached driver")
return driverExt, nil
}
}
// init device
var device uixt.IDevice
var err error
switch strings.ToLower(platform) {
case "android":
device, err = uixt.NewAndroidDevice(option.WithSerialNumber(serial))
case "ios":
device, err = uixt.NewIOSDevice(
option.WithUDID(serial),
option.WithWDAPort(8700),
option.WithWDAMjpegPort(8800),
option.WithResetHomeOnStartup(false),
)
case "browser":
device, err = uixt.NewBrowserDevice(option.WithBrowserID(serial))
default:
return nil, fmt.Errorf("invalid platform: %s", platform)
}
if err != nil {
return nil, fmt.Errorf("init device failed: %w", err)
}
if err := device.Setup(); err != nil {
return nil, fmt.Errorf("setup device failed: %w", err)
}
// init driver
driver, err := device.NewDriver()
if err != nil {
return nil, fmt.Errorf("init driver failed: %w", err)
}
if err := driver.Setup(); err != nil {
return nil, fmt.Errorf("setup driver failed: %w", err)
}
// init XTDriver
driverExt, err := uixt.NewXTDriver(driver,
option.WithCVService(option.CVServiceTypeVEDEM))
if err != nil {
return nil, fmt.Errorf("init XT driver failed: %w", err)
}
return driverExt, nil
}
// generateMCPOptions generates mcp.NewTool parameters from a struct type.
// It automatically generates mcp.NewTool parameters based on the struct fields and their desc tags.
func generateMCPOptions(t interface{}) (options []mcp.ToolOption) {
tType := reflect.TypeOf(t)
for i := 0; i < tType.NumField(); i++ {
field := tType.Field(i)
jsonTag := field.Tag.Get("json")
if jsonTag == "" || jsonTag == "-" {
continue
}
name := strings.Split(jsonTag, ",")[0]
binding := field.Tag.Get("binding")
required := strings.Contains(binding, "required")
desc := field.Tag.Get("desc")
switch field.Type.Kind() {
case reflect.Float64, reflect.Float32, reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
if required {
options = append(options, mcp.WithNumber(name, mcp.Required(), mcp.Description(desc)))
} else {
options = append(options, mcp.WithNumber(name, mcp.Description(desc)))
}
case reflect.String:
if required {
options = append(options, mcp.WithString(name, mcp.Required(), mcp.Description(desc)))
} else {
options = append(options, mcp.WithString(name, mcp.Description(desc)))
}
case reflect.Bool:
if required {
options = append(options, mcp.WithBoolean(name, mcp.Required(), mcp.Description(desc)))
} else {
options = append(options, mcp.WithBoolean(name, mcp.Description(desc)))
}
default:
log.Warn().Str("field_type", field.Type.String()).Msg("Unsupported field type")
}
}
return options
}
// mapToStruct convert map[string]interface{} to target struct
func mapToStruct(m map[string]interface{}, out interface{}) error {
b, err := json.Marshal(m)
if err != nil {
return err
}
return json.Unmarshal(b, out)
}
// commonToolOptions is the common tool options for all tools.
var commonToolOptions = []mcp.ToolOption{
mcp.WithString("platform", mcp.Required(), mcp.Description("Device platform: android/ios/browser")),
mcp.WithString("serial", mcp.Required(), mcp.Description("Device serial/udid/browser id")),
}

View File

@@ -4,12 +4,6 @@ import (
"github.com/httprunner/httprunner/v5/uixt/option"
)
type TapRequest struct {
X float64 `json:"x" binding:"required" desc:"X coordinate (0.0~1.0 for percent, or absolute pixel value)"`
Y float64 `json:"y" binding:"required" desc:"Y coordinate (0.0~1.0 for percent, or absolute pixel value)"`
Duration float64 `json:"duration" desc:"Tap duration in seconds (optional)"`
}
type uploadRequest struct {
X float64 `json:"x"`
Y float64 `json:"y"`
@@ -17,15 +11,6 @@ type uploadRequest struct {
FileFormat string `json:"file_format"`
}
type DragRequest struct {
FromX float64 `json:"from_x" binding:"required" desc:"Starting X-coordinate (percentage, 0.0 to 1.0)"`
FromY float64 `json:"from_y" binding:"required" desc:"Starting Y-coordinate (percentage, 0.0 to 1.0)"`
ToX float64 `json:"to_x" binding:"required" desc:"Ending X-coordinate (percentage, 0.0 to 1.0)"`
ToY float64 `json:"to_y" binding:"required" desc:"Ending Y-coordinate (percentage, 0.0 to 1.0)"`
Duration float64 `json:"duration" desc:"Swipe duration in milliseconds (optional)"`
PressDuration float64 `json:"press_duration" desc:"Press duration in milliseconds (optional)"`
}
type InputRequest struct {
Text string `json:"text" binding:"required"`
Frequency int `json:"frequency"` // only iOS

View File

@@ -4,10 +4,11 @@ import (
"github.com/gin-gonic/gin"
"github.com/httprunner/httprunner/v5/uixt"
"github.com/httprunner/httprunner/v5/uixt/option"
"github.com/httprunner/httprunner/v5/uixt/types"
)
func (r *Router) tapHandler(c *gin.Context) {
var tapReq TapRequest
var tapReq types.TapRequest
if err := c.ShouldBindJSON(&tapReq); err != nil {
RenderErrorValidateRequest(c, err)
return
@@ -30,7 +31,7 @@ func (r *Router) tapHandler(c *gin.Context) {
}
func (r *Router) rightClickHandler(c *gin.Context) {
var rightClickReq TapRequest
var rightClickReq types.TapRequest
if err := c.ShouldBindJSON(&rightClickReq); err != nil {
RenderErrorValidateRequest(c, err)
return
@@ -117,7 +118,7 @@ func (r *Router) scrollHandler(c *gin.Context) {
}
func (r *Router) doubleTapHandler(c *gin.Context) {
var tapReq TapRequest
var tapReq types.TapRequest
if err := c.ShouldBindJSON(&tapReq); err != nil {
RenderErrorValidateRequest(c, err)
return
@@ -137,7 +138,7 @@ func (r *Router) doubleTapHandler(c *gin.Context) {
}
func (r *Router) dragHandler(c *gin.Context) {
var dragReq DragRequest
var dragReq types.DragRequest
if err := c.ShouldBindJSON(&dragReq); err != nil {
RenderErrorValidateRequest(c, err)
return

View File

@@ -8,6 +8,7 @@ import (
"net/http/httptest"
"testing"
"github.com/httprunner/httprunner/v5/uixt/types"
"github.com/stretchr/testify/assert"
)
@@ -17,14 +18,14 @@ func TestTapHandler(t *testing.T) {
tests := []struct {
name string
path string
tapReq TapRequest
tapReq types.TapRequest
wantStatus int
wantResp HttpResponse
}{
{
name: "tap abs xy",
path: fmt.Sprintf("/api/v1/android/%s/ui/tap", "4622ca24"),
tapReq: TapRequest{
tapReq: types.TapRequest{
X: 500,
Y: 800,
Duration: 0,
@@ -39,7 +40,7 @@ func TestTapHandler(t *testing.T) {
{
name: "tap relative xy",
path: fmt.Sprintf("/api/v1/android/%s/ui/tap", "4622ca24"),
tapReq: TapRequest{
tapReq: types.TapRequest{
X: 0.5,
Y: 0.6,
Duration: 0,