From 9bf31f643d43b81e8bd677b623fef8b06e08e7ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BD=99=E6=B3=93=E9=93=AE?= Date: Tue, 12 Aug 2025 16:45:53 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=94=AF=E6=8C=81=E6=B8=85=E7=A9=BA/?= =?UTF-8?q?=E5=AF=BC=E5=85=A5=E5=9B=BE=E7=89=87?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- uixt/driver_session.go | 6 +- uixt/image_utils.go | 85 +++++++++++++++++ uixt/mcp_server.go | 4 + uixt/mcp_tools_device.go | 200 +++++++++++++++++++++++++++++++++++++++ uixt/option/action.go | 4 + 5 files changed, 297 insertions(+), 2 deletions(-) create mode 100644 uixt/image_utils.go diff --git a/uixt/driver_session.go b/uixt/driver_session.go index 35f5cc57..5a837bb1 100644 --- a/uixt/driver_session.go +++ b/uixt/driver_session.go @@ -265,8 +265,10 @@ func (s *DriverSession) Request(method string, urlStr string, rawBody []byte, op logger = log.Debug().Bool("success", true) } - logger = logger.Str("logid", logid).Str("request_method", method).Str("request_url", rawURL). - Str("request_body", string(rawBody)) + logger = logger.Str("logid", logid).Str("request_method", method).Str("request_url", rawURL) + if len(rawBody) < 1024 { + logger = logger.Str("request_body", string(rawBody)) + } if !driverResult.RequestTime.IsZero() { logger = logger.Int64("request_time", driverResult.RequestTime.UnixMilli()) } diff --git a/uixt/image_utils.go b/uixt/image_utils.go new file mode 100644 index 00000000..338f895a --- /dev/null +++ b/uixt/image_utils.go @@ -0,0 +1,85 @@ +package uixt + +import ( + "fmt" + "io" + "net/http" + "os" + "path/filepath" + "strings" + + "github.com/rs/zerolog/log" +) + +// DetectAndRenameImageFile examines the file content to determine its image type +// and renames the file with the appropriate extension (.jpg, .png, etc.) +func DetectAndRenameImageFile(filePath string) (string, error) { + // Open the file + file, err := os.Open(filePath) + if err != nil { + return "", fmt.Errorf("failed to open file for type detection: %v", err) + } + defer file.Close() + + // Read the first 512 bytes to detect content type + buffer := make([]byte, 512) + _, err = file.Read(buffer) + if err != nil && err != io.EOF { + return "", fmt.Errorf("failed to read file for type detection: %v", err) + } + + // Reset file pointer + _, err = file.Seek(0, 0) + if err != nil { + return "", fmt.Errorf("failed to reset file pointer: %v", err) + } + + // Detect content type + contentType := http.DetectContentType(buffer) + log.Info().Str("filePath", filePath).Str("contentType", contentType).Msg("Detected content type") + + // Determine file extension based on content type + var extension string + switch { + case strings.Contains(contentType, "image/jpeg"): + extension = ".jpg" + case strings.Contains(contentType, "image/png"): + extension = ".png" + case strings.Contains(contentType, "image/gif"): + extension = ".gif" + case strings.Contains(contentType, "image/webp"): + extension = ".webp" + case strings.Contains(contentType, "image/bmp"): + extension = ".bmp" + case strings.Contains(contentType, "image/tiff"): + extension = ".tiff" + case strings.Contains(contentType, "image/svg+xml"): + extension = ".svg" + default: + // Default to jpg if we can't determine the type but it's still an image + if strings.Contains(contentType, "image/") { + extension = ".jpg" + } else { + return filePath, fmt.Errorf("not a recognized image type: %s", contentType) + } + } + + // Create new file path with extension + dir := filepath.Dir(filePath) + base := filepath.Base(filePath) + newFilePath := filepath.Join(dir, base+extension) + + // If the file already has the correct extension, just return it + if filePath == newFilePath { + return filePath, nil + } + + // Rename the file + err = os.Rename(filePath, newFilePath) + if err != nil { + return "", fmt.Errorf("failed to rename file: %v", err) + } + + log.Info().Str("oldPath", filePath).Str("newPath", newFilePath).Msg("Renamed image file with proper extension") + return newFilePath, nil +} diff --git a/uixt/mcp_server.go b/uixt/mcp_server.go index 92f0b1be..bc4be2bc 100644 --- a/uixt/mcp_server.go +++ b/uixt/mcp_server.go @@ -133,6 +133,10 @@ func (s *MCPServer4XTDriver) registerTools() { s.registerTool(&ToolGetScreenSize{}) s.registerTool(&ToolGetSource{}) + // Image Tools + s.registerTool(&ToolPushImage{}) + s.registerTool(&ToolClearImage{}) + // Utility Tools s.registerTool(&ToolSleep{}) s.registerTool(&ToolSleepMS{}) diff --git a/uixt/mcp_tools_device.go b/uixt/mcp_tools_device.go index e09dd906..43fd5fe1 100644 --- a/uixt/mcp_tools_device.go +++ b/uixt/mcp_tools_device.go @@ -3,6 +3,7 @@ package uixt import ( "context" "fmt" + "os" "github.com/danielpaulus/go-ios/ios" "github.com/mark3labs/mcp-go/mcp" @@ -216,3 +217,202 @@ func (t *ToolScreenRecord) Implement() server.ToolHandlerFunc { func (t *ToolScreenRecord) ConvertActionToCallToolRequest(action option.MobileAction) (mcp.CallToolRequest, error) { return BuildMCPCallToolRequest(t.Name(), map[string]any{}, action), nil } + +// ToolPushImage implements the push_image tool call. +type ToolPushImage struct { + // Return data fields - these define the structure of data returned by this tool + ImagePath string `json:"imagePath" desc:"Path of the image that was pushed"` + ImageUrl string `json:"imageUrl,omitempty" desc:"URL of the image that was downloaded and pushed (if applicable)"` + Cleared bool `json:"cleared,omitempty" desc:"Whether images were cleared before pushing (if applicable)"` +} + +func (t *ToolPushImage) Name() option.ActionName { + return option.ACTION_PushImage +} + +func (t *ToolPushImage) Description() string { + return "Push an image to the device's gallery. For Android, the image will be pushed to the DCIM/Camera directory. For iOS, the image will be added to the device's photo album." +} + +func (t *ToolPushImage) Options() []mcp.ToolOption { + return []mcp.ToolOption{ + mcp.WithString("platform", mcp.Enum("android", "ios"), mcp.Description("The platform type of device to push image to")), + mcp.WithString("serial", mcp.Description("The device serial number or UDID")), + mcp.WithString("imagePath", mcp.Description("Path to the local image file to push to the device")), + mcp.WithString("imageUrl", mcp.Description("URL of the image to download and push to the device")), + mcp.WithBoolean("cleanup", mcp.Description("Whether to delete the downloaded file after pushing it to the device")), + mcp.WithBoolean("clearBefore", mcp.Description("Whether to clear images before pushing (if applicable)")), + } +} + +func (t *ToolPushImage) Implement() server.ToolHandlerFunc { + return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + driverExt, err := setupXTDriver(ctx, request.GetArguments()) + if err != nil { + return nil, err + } + + // Get image path or URL + imagePath, hasPath := request.GetArguments()["imagePath"].(string) + imageUrl, hasUrl := request.GetArguments()["imageUrl"].(string) + cleanup, _ := request.GetArguments()["cleanup"].(bool) + clearBefore, _ := request.GetArguments()["clearBefore"].(bool) + + // Check if we have either path or URL + if (!hasPath || imagePath == "") && (!hasUrl || imageUrl == "") { + return nil, fmt.Errorf("either imagePath or imageUrl is required") + } + + // If we have a URL, download it + downloadedFile := false + if hasUrl && imageUrl != "" { + log.Info().Str("imageUrl", imageUrl).Msg("Downloading image from URL") + downloadedPath, err := DownloadFileByUrl(imageUrl) + if err != nil { + return nil, fmt.Errorf("failed to download image from URL: %v", err) + } + + // Detect image type and rename with proper extension + renamedPath, err := DetectAndRenameImageFile(downloadedPath) + if err != nil { + log.Warn().Err(err).Str("path", downloadedPath).Msg("Failed to detect image type or rename file, using original file") + imagePath = downloadedPath + } else { + imagePath = renamedPath + } + downloadedFile = true + } + + // Clear images before pushing if requested + cleared := false + if clearBefore { + log.Info().Msg("Clearing images before pushing new image") + err := driverExt.IDriver.ClearImages() + if err != nil { + log.Warn().Err(err).Msg("Failed to clear images before pushing, continuing anyway") + } else { + cleared = true + } + } + + // Push the image to the device + err = driverExt.IDriver.PushImage(imagePath) + if err != nil { + // If we downloaded the file and failed to push it, clean up + if downloadedFile && cleanup { + _ = os.Remove(imagePath) + } + return nil, err + } + + // Clean up downloaded file if requested + if downloadedFile && cleanup { + log.Info().Str("imagePath", imagePath).Msg("Cleaning up downloaded image") + _ = os.Remove(imagePath) + } + + message := fmt.Sprintf("Successfully pushed image to device") + returnData := ToolPushImage{ + ImagePath: imagePath, + Cleared: cleared, + } + + // Include URL in response if it was used + if hasUrl && imageUrl != "" { + returnData.ImageUrl = imageUrl + message = fmt.Sprintf("Successfully downloaded and pushed image from %s to device", imageUrl) + } + + // Add cleared info to message if applicable + if cleared { + message = fmt.Sprintf("%s (images cleared before pushing)", message) + } + + return NewMCPSuccessResponse(message, &returnData), nil + } +} + +func (t *ToolPushImage) ConvertActionToCallToolRequest(action option.MobileAction) (mcp.CallToolRequest, error) { + arguments := map[string]any{} + + // Handle string param as imageUrl + if imageUrl, ok := action.Params.(string); ok && imageUrl != "" { + arguments["imageUrl"] = imageUrl + } + + // Handle map params with imageUrl or imagePath + if params, ok := action.Params.(map[string]interface{}); ok { + if imageUrl, ok := params["imageUrl"].(string); ok && imageUrl != "" { + arguments["imageUrl"] = imageUrl + } + if imagePath, ok := params["imagePath"].(string); ok && imagePath != "" { + arguments["imagePath"] = imagePath + } + if cleanup, ok := params["cleanup"].(bool); ok { + arguments["cleanup"] = cleanup + } + if clearBefore, ok := params["clearBefore"].(bool); ok { + arguments["clearBefore"] = clearBefore + } + } + + // Handle custom options + if imageUrl, ok := action.ActionOptions.Custom["imageUrl"].(string); ok && imageUrl != "" { + arguments["imageUrl"] = imageUrl + } + if imagePath, ok := action.ActionOptions.Custom["imagePath"].(string); ok && imagePath != "" { + arguments["imagePath"] = imagePath + } + if cleanup, ok := action.ActionOptions.Custom["cleanup"].(bool); ok { + arguments["cleanup"] = cleanup + } + if clearBefore, ok := action.ActionOptions.Custom["clearBefore"].(bool); ok { + arguments["clearBefore"] = clearBefore + } + + return BuildMCPCallToolRequest(t.Name(), arguments, action), nil +} + +// ToolClearImage implements the clear_image tool call. +type ToolClearImage struct { + // Return data fields - these define the structure of data returned by this tool + Success bool `json:"success" desc:"Whether the operation was successful"` +} + +func (t *ToolClearImage) Name() option.ActionName { + return option.ACTION_ClearImage +} + +func (t *ToolClearImage) Description() string { + return "Clear images from the device's gallery. For Android, this will remove all images from the DCIM/Camera directory. For iOS, this will clear the images added through the push_image tool." +} + +func (t *ToolClearImage) Options() []mcp.ToolOption { + return []mcp.ToolOption{ + mcp.WithString("platform", mcp.Enum("android", "ios"), mcp.Description("The platform type of device to clear images from")), + mcp.WithString("serial", mcp.Description("The device serial number or UDID")), + } +} + +func (t *ToolClearImage) Implement() server.ToolHandlerFunc { + return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + driverExt, err := setupXTDriver(ctx, request.GetArguments()) + if err != nil { + return nil, err + } + + err = driverExt.IDriver.ClearImages() + if err != nil { + return nil, err + } + + message := "Successfully cleared images from device" + returnData := ToolClearImage{Success: true} + + return NewMCPSuccessResponse(message, &returnData), nil + } +} + +func (t *ToolClearImage) ConvertActionToCallToolRequest(action option.MobileAction) (mcp.CallToolRequest, error) { + return BuildMCPCallToolRequest(t.Name(), map[string]any{}, action), nil +} diff --git a/uixt/option/action.go b/uixt/option/action.go index 7dfd872c..e13aa9f0 100644 --- a/uixt/option/action.go +++ b/uixt/option/action.go @@ -98,6 +98,10 @@ const ( ACTION_ListAvailableDevices ActionName = "list_available_devices" ACTION_SelectDevice ActionName = "select_device" + // image actions + ACTION_PushImage ActionName = "push_image" + ACTION_ClearImage ActionName = "clear_image" + // custom actions ACTION_SwipeToTapApp ActionName = "swipe_to_tap_app" // swipe left & right to find app and tap ACTION_SwipeToTapText ActionName = "swipe_to_tap_text" // swipe up & down to find text and tap