feat: 支持清空/导入图片

This commit is contained in:
余泓铮
2025-08-12 16:45:53 +08:00
parent 07bfabd5b6
commit 9bf31f643d
5 changed files with 297 additions and 2 deletions

View File

@@ -265,8 +265,10 @@ func (s *DriverSession) Request(method string, urlStr string, rawBody []byte, op
logger = log.Debug().Bool("success", true) logger = log.Debug().Bool("success", true)
} }
logger = logger.Str("logid", logid).Str("request_method", method).Str("request_url", rawURL). logger = logger.Str("logid", logid).Str("request_method", method).Str("request_url", rawURL)
Str("request_body", string(rawBody)) if len(rawBody) < 1024 {
logger = logger.Str("request_body", string(rawBody))
}
if !driverResult.RequestTime.IsZero() { if !driverResult.RequestTime.IsZero() {
logger = logger.Int64("request_time", driverResult.RequestTime.UnixMilli()) logger = logger.Int64("request_time", driverResult.RequestTime.UnixMilli())
} }

85
uixt/image_utils.go Normal file
View File

@@ -0,0 +1,85 @@
package uixt
import (
"fmt"
"io"
"net/http"
"os"
"path/filepath"
"strings"
"github.com/rs/zerolog/log"
)
// DetectAndRenameImageFile examines the file content to determine its image type
// and renames the file with the appropriate extension (.jpg, .png, etc.)
func DetectAndRenameImageFile(filePath string) (string, error) {
// Open the file
file, err := os.Open(filePath)
if err != nil {
return "", fmt.Errorf("failed to open file for type detection: %v", err)
}
defer file.Close()
// Read the first 512 bytes to detect content type
buffer := make([]byte, 512)
_, err = file.Read(buffer)
if err != nil && err != io.EOF {
return "", fmt.Errorf("failed to read file for type detection: %v", err)
}
// Reset file pointer
_, err = file.Seek(0, 0)
if err != nil {
return "", fmt.Errorf("failed to reset file pointer: %v", err)
}
// Detect content type
contentType := http.DetectContentType(buffer)
log.Info().Str("filePath", filePath).Str("contentType", contentType).Msg("Detected content type")
// Determine file extension based on content type
var extension string
switch {
case strings.Contains(contentType, "image/jpeg"):
extension = ".jpg"
case strings.Contains(contentType, "image/png"):
extension = ".png"
case strings.Contains(contentType, "image/gif"):
extension = ".gif"
case strings.Contains(contentType, "image/webp"):
extension = ".webp"
case strings.Contains(contentType, "image/bmp"):
extension = ".bmp"
case strings.Contains(contentType, "image/tiff"):
extension = ".tiff"
case strings.Contains(contentType, "image/svg+xml"):
extension = ".svg"
default:
// Default to jpg if we can't determine the type but it's still an image
if strings.Contains(contentType, "image/") {
extension = ".jpg"
} else {
return filePath, fmt.Errorf("not a recognized image type: %s", contentType)
}
}
// Create new file path with extension
dir := filepath.Dir(filePath)
base := filepath.Base(filePath)
newFilePath := filepath.Join(dir, base+extension)
// If the file already has the correct extension, just return it
if filePath == newFilePath {
return filePath, nil
}
// Rename the file
err = os.Rename(filePath, newFilePath)
if err != nil {
return "", fmt.Errorf("failed to rename file: %v", err)
}
log.Info().Str("oldPath", filePath).Str("newPath", newFilePath).Msg("Renamed image file with proper extension")
return newFilePath, nil
}

View File

@@ -133,6 +133,10 @@ func (s *MCPServer4XTDriver) registerTools() {
s.registerTool(&ToolGetScreenSize{}) s.registerTool(&ToolGetScreenSize{})
s.registerTool(&ToolGetSource{}) s.registerTool(&ToolGetSource{})
// Image Tools
s.registerTool(&ToolPushImage{})
s.registerTool(&ToolClearImage{})
// Utility Tools // Utility Tools
s.registerTool(&ToolSleep{}) s.registerTool(&ToolSleep{})
s.registerTool(&ToolSleepMS{}) s.registerTool(&ToolSleepMS{})

View File

@@ -3,6 +3,7 @@ package uixt
import ( import (
"context" "context"
"fmt" "fmt"
"os"
"github.com/danielpaulus/go-ios/ios" "github.com/danielpaulus/go-ios/ios"
"github.com/mark3labs/mcp-go/mcp" "github.com/mark3labs/mcp-go/mcp"
@@ -216,3 +217,202 @@ func (t *ToolScreenRecord) Implement() server.ToolHandlerFunc {
func (t *ToolScreenRecord) ConvertActionToCallToolRequest(action option.MobileAction) (mcp.CallToolRequest, error) { func (t *ToolScreenRecord) ConvertActionToCallToolRequest(action option.MobileAction) (mcp.CallToolRequest, error) {
return BuildMCPCallToolRequest(t.Name(), map[string]any{}, action), nil return BuildMCPCallToolRequest(t.Name(), map[string]any{}, action), nil
} }
// ToolPushImage implements the push_image tool call.
type ToolPushImage struct {
// Return data fields - these define the structure of data returned by this tool
ImagePath string `json:"imagePath" desc:"Path of the image that was pushed"`
ImageUrl string `json:"imageUrl,omitempty" desc:"URL of the image that was downloaded and pushed (if applicable)"`
Cleared bool `json:"cleared,omitempty" desc:"Whether images were cleared before pushing (if applicable)"`
}
func (t *ToolPushImage) Name() option.ActionName {
return option.ACTION_PushImage
}
func (t *ToolPushImage) Description() string {
return "Push an image to the device's gallery. For Android, the image will be pushed to the DCIM/Camera directory. For iOS, the image will be added to the device's photo album."
}
func (t *ToolPushImage) Options() []mcp.ToolOption {
return []mcp.ToolOption{
mcp.WithString("platform", mcp.Enum("android", "ios"), mcp.Description("The platform type of device to push image to")),
mcp.WithString("serial", mcp.Description("The device serial number or UDID")),
mcp.WithString("imagePath", mcp.Description("Path to the local image file to push to the device")),
mcp.WithString("imageUrl", mcp.Description("URL of the image to download and push to the device")),
mcp.WithBoolean("cleanup", mcp.Description("Whether to delete the downloaded file after pushing it to the device")),
mcp.WithBoolean("clearBefore", mcp.Description("Whether to clear images before pushing (if applicable)")),
}
}
func (t *ToolPushImage) Implement() server.ToolHandlerFunc {
return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
driverExt, err := setupXTDriver(ctx, request.GetArguments())
if err != nil {
return nil, err
}
// Get image path or URL
imagePath, hasPath := request.GetArguments()["imagePath"].(string)
imageUrl, hasUrl := request.GetArguments()["imageUrl"].(string)
cleanup, _ := request.GetArguments()["cleanup"].(bool)
clearBefore, _ := request.GetArguments()["clearBefore"].(bool)
// Check if we have either path or URL
if (!hasPath || imagePath == "") && (!hasUrl || imageUrl == "") {
return nil, fmt.Errorf("either imagePath or imageUrl is required")
}
// If we have a URL, download it
downloadedFile := false
if hasUrl && imageUrl != "" {
log.Info().Str("imageUrl", imageUrl).Msg("Downloading image from URL")
downloadedPath, err := DownloadFileByUrl(imageUrl)
if err != nil {
return nil, fmt.Errorf("failed to download image from URL: %v", err)
}
// Detect image type and rename with proper extension
renamedPath, err := DetectAndRenameImageFile(downloadedPath)
if err != nil {
log.Warn().Err(err).Str("path", downloadedPath).Msg("Failed to detect image type or rename file, using original file")
imagePath = downloadedPath
} else {
imagePath = renamedPath
}
downloadedFile = true
}
// Clear images before pushing if requested
cleared := false
if clearBefore {
log.Info().Msg("Clearing images before pushing new image")
err := driverExt.IDriver.ClearImages()
if err != nil {
log.Warn().Err(err).Msg("Failed to clear images before pushing, continuing anyway")
} else {
cleared = true
}
}
// Push the image to the device
err = driverExt.IDriver.PushImage(imagePath)
if err != nil {
// If we downloaded the file and failed to push it, clean up
if downloadedFile && cleanup {
_ = os.Remove(imagePath)
}
return nil, err
}
// Clean up downloaded file if requested
if downloadedFile && cleanup {
log.Info().Str("imagePath", imagePath).Msg("Cleaning up downloaded image")
_ = os.Remove(imagePath)
}
message := fmt.Sprintf("Successfully pushed image to device")
returnData := ToolPushImage{
ImagePath: imagePath,
Cleared: cleared,
}
// Include URL in response if it was used
if hasUrl && imageUrl != "" {
returnData.ImageUrl = imageUrl
message = fmt.Sprintf("Successfully downloaded and pushed image from %s to device", imageUrl)
}
// Add cleared info to message if applicable
if cleared {
message = fmt.Sprintf("%s (images cleared before pushing)", message)
}
return NewMCPSuccessResponse(message, &returnData), nil
}
}
func (t *ToolPushImage) ConvertActionToCallToolRequest(action option.MobileAction) (mcp.CallToolRequest, error) {
arguments := map[string]any{}
// Handle string param as imageUrl
if imageUrl, ok := action.Params.(string); ok && imageUrl != "" {
arguments["imageUrl"] = imageUrl
}
// Handle map params with imageUrl or imagePath
if params, ok := action.Params.(map[string]interface{}); ok {
if imageUrl, ok := params["imageUrl"].(string); ok && imageUrl != "" {
arguments["imageUrl"] = imageUrl
}
if imagePath, ok := params["imagePath"].(string); ok && imagePath != "" {
arguments["imagePath"] = imagePath
}
if cleanup, ok := params["cleanup"].(bool); ok {
arguments["cleanup"] = cleanup
}
if clearBefore, ok := params["clearBefore"].(bool); ok {
arguments["clearBefore"] = clearBefore
}
}
// Handle custom options
if imageUrl, ok := action.ActionOptions.Custom["imageUrl"].(string); ok && imageUrl != "" {
arguments["imageUrl"] = imageUrl
}
if imagePath, ok := action.ActionOptions.Custom["imagePath"].(string); ok && imagePath != "" {
arguments["imagePath"] = imagePath
}
if cleanup, ok := action.ActionOptions.Custom["cleanup"].(bool); ok {
arguments["cleanup"] = cleanup
}
if clearBefore, ok := action.ActionOptions.Custom["clearBefore"].(bool); ok {
arguments["clearBefore"] = clearBefore
}
return BuildMCPCallToolRequest(t.Name(), arguments, action), nil
}
// ToolClearImage implements the clear_image tool call.
type ToolClearImage struct {
// Return data fields - these define the structure of data returned by this tool
Success bool `json:"success" desc:"Whether the operation was successful"`
}
func (t *ToolClearImage) Name() option.ActionName {
return option.ACTION_ClearImage
}
func (t *ToolClearImage) Description() string {
return "Clear images from the device's gallery. For Android, this will remove all images from the DCIM/Camera directory. For iOS, this will clear the images added through the push_image tool."
}
func (t *ToolClearImage) Options() []mcp.ToolOption {
return []mcp.ToolOption{
mcp.WithString("platform", mcp.Enum("android", "ios"), mcp.Description("The platform type of device to clear images from")),
mcp.WithString("serial", mcp.Description("The device serial number or UDID")),
}
}
func (t *ToolClearImage) Implement() server.ToolHandlerFunc {
return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
driverExt, err := setupXTDriver(ctx, request.GetArguments())
if err != nil {
return nil, err
}
err = driverExt.IDriver.ClearImages()
if err != nil {
return nil, err
}
message := "Successfully cleared images from device"
returnData := ToolClearImage{Success: true}
return NewMCPSuccessResponse(message, &returnData), nil
}
}
func (t *ToolClearImage) ConvertActionToCallToolRequest(action option.MobileAction) (mcp.CallToolRequest, error) {
return BuildMCPCallToolRequest(t.Name(), map[string]any{}, action), nil
}

View File

@@ -98,6 +98,10 @@ const (
ACTION_ListAvailableDevices ActionName = "list_available_devices" ACTION_ListAvailableDevices ActionName = "list_available_devices"
ACTION_SelectDevice ActionName = "select_device" ACTION_SelectDevice ActionName = "select_device"
// image actions
ACTION_PushImage ActionName = "push_image"
ACTION_ClearImage ActionName = "clear_image"
// custom actions // custom actions
ACTION_SwipeToTapApp ActionName = "swipe_to_tap_app" // swipe left & right to find app and tap ACTION_SwipeToTapApp ActionName = "swipe_to_tap_app" // swipe left & right to find app and tap
ACTION_SwipeToTapText ActionName = "swipe_to_tap_text" // swipe up & down to find text and tap ACTION_SwipeToTapText ActionName = "swipe_to_tap_text" // swipe up & down to find text and tap