From d58bbaeb5fb658093580a9271ba633accfbf5296 Mon Sep 17 00:00:00 2001 From: "lilong.129" Date: Wed, 21 May 2025 17:57:48 +0800 Subject: [PATCH] fix: uixt tool take_screenshot --- internal/version/VERSION | 2 +- mcphost/chat.go | 2 +- mcphost/host.go | 27 --------------------------- mcphost/mcp_server.go | 22 ++++++++++------------ 4 files changed, 12 insertions(+), 41 deletions(-) diff --git a/internal/version/VERSION b/internal/version/VERSION index 9e2d5b89..619898c8 100644 --- a/internal/version/VERSION +++ b/internal/version/VERSION @@ -1 +1 @@ -v5.0.0-beta-2505211747 +v5.0.0-beta-2505211805 diff --git a/mcphost/chat.go b/mcphost/chat.go index bb872b4b..8319f7ce 100644 --- a/mcphost/chat.go +++ b/mcphost/chat.go @@ -112,7 +112,7 @@ func (c *Chat) runPrompt(ctx context.Context, prompt string) error { // Create user message planningOpts := &ai.PlanningOptions{ - UserInstruction: "chat with MCP tools", + UserInstruction: prompt, Message: &schema.Message{ Role: schema.User, Content: prompt, diff --git a/mcphost/host.go b/mcphost/host.go index f8375151..2df803f9 100644 --- a/mcphost/host.go +++ b/mcphost/host.go @@ -394,30 +394,3 @@ func handleToolError(result *mcp.CallToolResult) error { } return fmt.Errorf("tool error: unknown error") } - -// ScreenshotBase64 get screenshot base64 for the given platform and serial -func (h *MCPHost) ScreenshotBase64(ctx context.Context, platform, serial string) (string, error) { - driver, err := h.GetOrCreateDriver(platform, serial) - if err != nil { - return "", err - } - return uixt.GetScreenShotBufferBase64(driver) -} - -// GetOrCreateDriver get or create a driver for the given platform and serial -func (h *MCPHost) GetOrCreateDriver(platform, serial string) (*uixt.XTDriver, error) { - h.mu.Lock() - defer h.mu.Unlock() - cacheKey := fmt.Sprintf("%s_%s", platform, serial) - if driver, ok := h.drivers[cacheKey]; ok { - return driver, nil - } - - driverExt, err := initDriverExt(platform, serial) - if err != nil { - return nil, err - } - // store driver in cache - h.drivers[cacheKey] = driverExt - return driverExt, nil -} diff --git a/mcphost/mcp_server.go b/mcphost/mcp_server.go index 85515c30..917e1c07 100644 --- a/mcphost/mcp_server.go +++ b/mcphost/mcp_server.go @@ -2,7 +2,6 @@ package mcphost import ( "context" - "encoding/base64" "encoding/json" "fmt" "reflect" @@ -178,9 +177,11 @@ func (ums *MCPServer4XTDriver) addTools() { log.Info().Str("name", swipeTool.Name).Msg("Register tool") // ScreenShot Tool - screenShotTool := mcp.NewTool("screenshot", - mcp.WithDescription("Takes a screenshot of the device screen and returns it as a base64 encoded string."), + takeScreenShotParams := append( + []mcp.ToolOption{mcp.WithDescription("Take a screenshot of the mobile device. Use this to understand what's on screen. Do not cache this result.")}, + commonToolOptions..., ) + screenShotTool := mcp.NewTool("take_screenshot", takeScreenShotParams...) ums.mcpServer.AddTool(screenShotTool, ums.handleScreenShot) ums.tools = append(ums.tools, screenShotTool) ums.handlerMap[screenShotTool.Name] = ums.handleScreenShot @@ -407,23 +408,20 @@ func (ums *MCPServer4XTDriver) handleDrag(ctx context.Context, request mcp.CallT // handleScreenShot handles the screenshot tool call. func (ums *MCPServer4XTDriver) handleScreenShot(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { - log.Info().Msg("Executing ScreenShot") + log.Info().Msg("take screenshot") driverExt, err := ums.setupXTDriver(ctx, request.Params.Arguments) if err != nil { return nil, err } - buffer, err := driverExt.ScreenShot() + + bufferBase64, err := uixt.GetScreenShotBufferBase64(driverExt.IDriver) if err != nil { log.Error().Err(err).Msg("ScreenShot failed") return mcp.NewToolResultError(fmt.Sprintf("Failed to take screenshot: %v", err)), nil } - if buffer == nil || buffer.Len() == 0 { - log.Error().Msg("Screenshot buffer is nil or empty") - return mcp.NewToolResultError("Screenshot returned empty buffer"), nil - } - encodedString := base64.StdEncoding.EncodeToString(buffer.Bytes()) - log.Info().Int("image_size_bytes", len(buffer.Bytes())).Int("base64_len", len(encodedString)).Msg("Screenshot successful") - return mcp.NewToolResultText(encodedString), nil + log.Debug().Int("imageBytes", len(bufferBase64)).Msg("take screenshot success") + + return mcp.NewToolResultImage("screenshot", bufferBase64, "image/jpeg"), nil } // setupXTDriver initializes an XTDriver based on the platform and serial.