diff --git a/internal/version/VERSION b/internal/version/VERSION index ffe8319b..54d05ab6 100644 --- a/internal/version/VERSION +++ b/internal/version/VERSION @@ -1 +1 @@ -v5.0.0-beta-2506251157 +v5.0.0-beta-2506251207 diff --git a/uixt/browser_driver.go b/uixt/browser_driver.go index 5d35a756..71325483 100644 --- a/uixt/browser_driver.go +++ b/uixt/browser_driver.go @@ -37,6 +37,7 @@ type CreateBrowserResponse struct { type BrowserDriver struct { urlPrefix *url.URL Session *DriverSession + Device *BrowserDevice } type BrowserInfo struct { @@ -96,6 +97,7 @@ func CreateBrowser(timeout int, width, height int) (browserInfo *BrowserInfo, er func NewBrowserDriver(device *BrowserDevice) (driver *BrowserDriver, err error) { log.Info().Msg("init NewBrowserDriver driver") driver = new(BrowserDriver) + driver.Device = device driver.urlPrefix = &url.URL{} driver.urlPrefix.Host = BROWSER_LOCAL_ADDRESS driver.urlPrefix.Scheme = "http" @@ -597,7 +599,7 @@ func (wd *BrowserDriver) Clear(packageName string) error { } func (wd *BrowserDriver) GetDevice() IDevice { - return nil + return wd.Device } func (wd *BrowserDriver) ForegroundInfo() (app types.AppInfo, err error) { diff --git a/uixt/mcp_server.go b/uixt/mcp_server.go index 06dfb9c3..098fdb44 100644 --- a/uixt/mcp_server.go +++ b/uixt/mcp_server.go @@ -117,6 +117,7 @@ func (s *MCPServer4XTDriver) registerTools() { s.registerTool(&ToolClosePopups{}) // PC/Web Tools + s.registerTool(&ToolWebLoginNoneUI{}) s.registerTool(&ToolSecondaryClick{}) s.registerTool(&ToolHoverBySelector{}) s.registerTool(&ToolTapBySelector{}) diff --git a/uixt/mcp_tools_ai.go b/uixt/mcp_tools_ai.go index e29e3cc6..c932f43f 100644 --- a/uixt/mcp_tools_ai.go +++ b/uixt/mcp_tools_ai.go @@ -44,7 +44,7 @@ func (t *ToolStartToGoal) Implement() server.ToolHandlerFunc { // Start to goal logic _, err = driverExt.StartToGoal(ctx, unifiedReq.Prompt) if err != nil { - return NewMCPErrorResponse(fmt.Sprintf("Failed to achieve goal: %s", err.Error())), nil + return NewMCPErrorResponse(fmt.Sprintf("Failed to achieve goal: %s", err.Error())), err } message := fmt.Sprintf("Successfully achieved goal: %s", unifiedReq.Prompt) @@ -104,7 +104,7 @@ func (t *ToolAIAction) Implement() server.ToolHandlerFunc { // AI action logic _, err = driverExt.AIAction(ctx, unifiedReq.Prompt) if err != nil { - return NewMCPErrorResponse(fmt.Sprintf("AI action failed: %s", err.Error())), nil + return NewMCPErrorResponse(fmt.Sprintf("AI action failed: %s", err.Error())), err } message := fmt.Sprintf("Successfully performed AI action with prompt: %s", unifiedReq.Prompt) @@ -168,7 +168,7 @@ func (t *ToolAIQuery) Implement() server.ToolHandlerFunc { // AI query logic with options queryResult, err := driverExt.AIQuery(unifiedReq.Prompt, opts...) if err != nil { - return NewMCPErrorResponse(fmt.Sprintf("AI query failed: %s", err.Error())), nil + return NewMCPErrorResponse(fmt.Sprintf("AI query failed: %s", err.Error())), err } message := fmt.Sprintf("Successfully queried information with prompt: %s", unifiedReq.Prompt) diff --git a/uixt/mcp_tools_app.go b/uixt/mcp_tools_app.go index 417d5fac..3c9e8c77 100644 --- a/uixt/mcp_tools_app.go +++ b/uixt/mcp_tools_app.go @@ -94,7 +94,7 @@ func (t *ToolLaunchApp) Implement() server.ToolHandlerFunc { // Launch app action logic err = driverExt.AppLaunch(unifiedReq.PackageName) if err != nil { - return NewMCPErrorResponse(fmt.Sprintf("Launch app failed: %s", err.Error())), nil + return NewMCPErrorResponse(fmt.Sprintf("Launch app failed: %s", err.Error())), err } message := fmt.Sprintf("Successfully launched app: %s", unifiedReq.PackageName) @@ -153,7 +153,7 @@ func (t *ToolTerminateApp) Implement() server.ToolHandlerFunc { // Terminate app action logic success, err := driverExt.AppTerminate(unifiedReq.PackageName) if err != nil { - return NewMCPErrorResponse(fmt.Sprintf("Terminate app failed: %s", err.Error())), nil + return NewMCPErrorResponse(fmt.Sprintf("Terminate app failed: %s", err.Error())), err } if !success { log.Warn().Str("packageName", unifiedReq.PackageName).Msg("app was not running") @@ -213,7 +213,7 @@ func (t *ToolAppInstall) Implement() server.ToolHandlerFunc { // App install action logic err = driverExt.GetDevice().Install(unifiedReq.AppUrl) if err != nil { - return NewMCPErrorResponse(fmt.Sprintf("App install failed: %s", err.Error())), nil + return NewMCPErrorResponse(fmt.Sprintf("App install failed: %s", err.Error())), err } message := fmt.Sprintf("Successfully installed app from: %s", unifiedReq.AppUrl) @@ -267,7 +267,7 @@ func (t *ToolAppUninstall) Implement() server.ToolHandlerFunc { // App uninstall action logic err = driverExt.GetDevice().Uninstall(unifiedReq.PackageName) if err != nil { - return NewMCPErrorResponse(fmt.Sprintf("App uninstall failed: %s", err.Error())), nil + return NewMCPErrorResponse(fmt.Sprintf("App uninstall failed: %s", err.Error())), err } message := fmt.Sprintf("Successfully uninstalled app: %s", unifiedReq.PackageName) @@ -321,7 +321,7 @@ func (t *ToolAppClear) Implement() server.ToolHandlerFunc { // App clear action logic err = driverExt.AppClear(unifiedReq.PackageName) if err != nil { - return NewMCPErrorResponse(fmt.Sprintf("App clear failed: %s", err.Error())), nil + return NewMCPErrorResponse(fmt.Sprintf("App clear failed: %s", err.Error())), err } message := fmt.Sprintf("Successfully cleared app: %s", unifiedReq.PackageName) @@ -371,7 +371,7 @@ func (t *ToolGetForegroundApp) Implement() server.ToolHandlerFunc { // Get foreground app info appInfo, err := driverExt.ForegroundInfo() if err != nil { - return NewMCPErrorResponse(fmt.Sprintf("Get foreground app failed: %s", err.Error())), nil + return NewMCPErrorResponse(fmt.Sprintf("Get foreground app failed: %s", err.Error())), err } message := fmt.Sprintf("Current foreground app: %s (%s)", appInfo.AppName, appInfo.PackageName) diff --git a/uixt/mcp_tools_button.go b/uixt/mcp_tools_button.go index f49ae309..a4a3de78 100644 --- a/uixt/mcp_tools_button.go +++ b/uixt/mcp_tools_button.go @@ -44,7 +44,7 @@ func (t *ToolPressButton) Implement() server.ToolHandlerFunc { // Press button action logic err = driverExt.PressButton(types.DeviceButton(unifiedReq.Button)) if err != nil { - return NewMCPErrorResponse(fmt.Sprintf("Press button failed: %s", err.Error())), nil + return NewMCPErrorResponse(fmt.Sprintf("Press button failed: %s", err.Error())), err } message := fmt.Sprintf("Successfully pressed button: %s", unifiedReq.Button) @@ -91,7 +91,7 @@ func (t *ToolHome) Implement() server.ToolHandlerFunc { // Home action logic err = driverExt.Home() if err != nil { - return NewMCPErrorResponse(fmt.Sprintf("Home button press failed: %s", err.Error())), nil + return NewMCPErrorResponse(fmt.Sprintf("Home button press failed: %s", err.Error())), err } message := "Successfully pressed home button" @@ -132,7 +132,7 @@ func (t *ToolBack) Implement() server.ToolHandlerFunc { // Back action logic err = driverExt.Back() if err != nil { - return NewMCPErrorResponse(fmt.Sprintf("Back button press failed: %s", err.Error())), nil + return NewMCPErrorResponse(fmt.Sprintf("Back button press failed: %s", err.Error())), err } message := "Successfully pressed back button" diff --git a/uixt/mcp_tools_input.go b/uixt/mcp_tools_input.go index 4286e211..a0be103e 100644 --- a/uixt/mcp_tools_input.go +++ b/uixt/mcp_tools_input.go @@ -47,7 +47,7 @@ func (t *ToolInput) Implement() server.ToolHandlerFunc { // Input action logic err = driverExt.Input(unifiedReq.Text) if err != nil { - return NewMCPErrorResponse(fmt.Sprintf("Input failed: %s", err.Error())), nil + return NewMCPErrorResponse(fmt.Sprintf("Input failed: %s", err.Error())), err } message := fmt.Sprintf("Successfully input text: %s", unifiedReq.Text) @@ -99,7 +99,7 @@ func (t *ToolSetIme) Implement() server.ToolHandlerFunc { // Set IME action logic err = driverExt.SetIme(unifiedReq.Ime) if err != nil { - return NewMCPErrorResponse(fmt.Sprintf("Set IME failed: %s", err.Error())), nil + return NewMCPErrorResponse(fmt.Sprintf("Set IME failed: %s", err.Error())), err } message := fmt.Sprintf("Successfully set IME to: %s", unifiedReq.Ime) diff --git a/uixt/mcp_tools_screen.go b/uixt/mcp_tools_screen.go index eaf38261..99d94565 100644 --- a/uixt/mcp_tools_screen.go +++ b/uixt/mcp_tools_screen.go @@ -130,7 +130,7 @@ func (t *ToolGetSource) Implement() server.ToolHandlerFunc { // Get source action logic sourceData, err := driverExt.Source(option.WithProcessName(unifiedReq.PackageName)) if err != nil { - return NewMCPErrorResponse(fmt.Sprintf("Get source failed: %s", err.Error())), nil + return NewMCPErrorResponse(fmt.Sprintf("Get source failed: %s", err.Error())), err } message := fmt.Sprintf("Successfully retrieved source for package: %s", unifiedReq.PackageName) diff --git a/uixt/mcp_tools_swipe.go b/uixt/mcp_tools_swipe.go index 8c6108ba..1dfa6dd7 100644 --- a/uixt/mcp_tools_swipe.go +++ b/uixt/mcp_tools_swipe.go @@ -155,7 +155,7 @@ func (t *ToolSwipeDirection) Implement() server.ToolHandlerFunc { } if err != nil { - return NewMCPErrorResponse(fmt.Sprintf("Swipe failed: %s", err.Error())), nil + return NewMCPErrorResponse(fmt.Sprintf("Swipe failed: %s", err.Error())), err } message := fmt.Sprintf("Successfully swiped %s", swipeDirection) @@ -255,7 +255,7 @@ func (t *ToolSwipeCoordinate) Implement() server.ToolHandlerFunc { swipeAction := prepareSwipeAction(driverExt, params, opts...) err = swipeAction(driverExt) if err != nil { - return NewMCPErrorResponse(fmt.Sprintf("Advanced swipe failed: %s", err.Error())), nil + return NewMCPErrorResponse(fmt.Sprintf("Advanced swipe failed: %s", err.Error())), err } message := fmt.Sprintf("Successfully performed advanced swipe from (%.2f, %.2f) to (%.2f, %.2f)", @@ -345,7 +345,7 @@ func (t *ToolSwipeToTapApp) Implement() server.ToolHandlerFunc { // Swipe to tap app action logic err = driverExt.SwipeToTapApp(unifiedReq.AppName, opts...) if err != nil { - return NewMCPErrorResponse(fmt.Sprintf("Swipe to tap app failed: %s", err.Error())), nil + return NewMCPErrorResponse(fmt.Sprintf("Swipe to tap app failed: %s", err.Error())), err } message := fmt.Sprintf("Successfully found and tapped app: %s", unifiedReq.AppName) @@ -422,7 +422,7 @@ func (t *ToolSwipeToTapText) Implement() server.ToolHandlerFunc { // Swipe to tap text action logic err = driverExt.SwipeToTapTexts([]string{unifiedReq.Text}, opts...) if err != nil { - return NewMCPErrorResponse(fmt.Sprintf("Swipe to tap text failed: %s", err.Error())), nil + return NewMCPErrorResponse(fmt.Sprintf("Swipe to tap text failed: %s", err.Error())), err } message := fmt.Sprintf("Successfully found and tapped text: %s", unifiedReq.Text) @@ -500,7 +500,7 @@ func (t *ToolSwipeToTapTexts) Implement() server.ToolHandlerFunc { // Swipe to tap texts action logic err = driverExt.SwipeToTapTexts(unifiedReq.Texts, opts...) if err != nil { - return NewMCPErrorResponse(fmt.Sprintf("Swipe to tap texts failed: %s", err.Error())), nil + return NewMCPErrorResponse(fmt.Sprintf("Swipe to tap texts failed: %s", err.Error())), err } message := fmt.Sprintf("Successfully found and tapped one of texts: %v", unifiedReq.Texts) @@ -586,7 +586,7 @@ func (t *ToolDrag) Implement() server.ToolHandlerFunc { // Drag action logic err = driverExt.Swipe(unifiedReq.FromX, unifiedReq.FromY, unifiedReq.ToX, unifiedReq.ToY, opts...) if err != nil { - return NewMCPErrorResponse(fmt.Sprintf("Drag failed: %s", err.Error())), nil + return NewMCPErrorResponse(fmt.Sprintf("Drag failed: %s", err.Error())), err } message := fmt.Sprintf("Successfully dragged from (%.2f, %.2f) to (%.2f, %.2f)", diff --git a/uixt/mcp_tools_touch.go b/uixt/mcp_tools_touch.go index 2ff3376e..19e42261 100644 --- a/uixt/mcp_tools_touch.go +++ b/uixt/mcp_tools_touch.go @@ -58,7 +58,7 @@ func (t *ToolTapXY) Implement() server.ToolHandlerFunc { // Tap action logic err = driverExt.TapXY(unifiedReq.X, unifiedReq.Y, opts...) if err != nil { - return NewMCPErrorResponse(fmt.Sprintf("Tap failed: %s", err.Error())), nil + return NewMCPErrorResponse(fmt.Sprintf("Tap failed: %s", err.Error())), err } message := fmt.Sprintf("Successfully tapped at coordinates (%.2f, %.2f)", unifiedReq.X, unifiedReq.Y) @@ -144,7 +144,7 @@ func (t *ToolTapAbsXY) Implement() server.ToolHandlerFunc { // Tap absolute XY action logic err = driverExt.TapAbsXY(unifiedReq.X, unifiedReq.Y, opts...) if err != nil { - return NewMCPErrorResponse(fmt.Sprintf("Tap absolute XY failed: %s", err.Error())), nil + return NewMCPErrorResponse(fmt.Sprintf("Tap absolute XY failed: %s", err.Error())), err } message := fmt.Sprintf("Successfully tapped at absolute coordinates (%.0f, %.0f)", unifiedReq.X, unifiedReq.Y) @@ -224,7 +224,7 @@ func (t *ToolTapByOCR) Implement() server.ToolHandlerFunc { // Tap by OCR action logic err = driverExt.TapByOCR(unifiedReq.Text, opts...) if err != nil { - return NewMCPErrorResponse(fmt.Sprintf("Tap by OCR failed: %s", err.Error())), nil + return NewMCPErrorResponse(fmt.Sprintf("Tap by OCR failed: %s", err.Error())), err } message := fmt.Sprintf("Successfully tapped on OCR text: %s", unifiedReq.Text) @@ -290,7 +290,7 @@ func (t *ToolTapByCV) Implement() server.ToolHandlerFunc { // We'll add a basic implementation that triggers CV recognition err = driverExt.TapByCV(opts...) if err != nil { - return NewMCPErrorResponse(fmt.Sprintf("Tap by CV failed: %s", err.Error())), nil + return NewMCPErrorResponse(fmt.Sprintf("Tap by CV failed: %s", err.Error())), err } message := "Successfully tapped by computer vision" @@ -352,7 +352,7 @@ func (t *ToolDoubleTapXY) Implement() server.ToolHandlerFunc { // Double tap XY action logic err = driverExt.DoubleTap(unifiedReq.X, unifiedReq.Y) if err != nil { - return NewMCPErrorResponse(fmt.Sprintf("Double tap failed: %s", err.Error())), nil + return NewMCPErrorResponse(fmt.Sprintf("Double tap failed: %s", err.Error())), err } message := fmt.Sprintf("Successfully double tapped at (%.2f, %.2f)", unifiedReq.X, unifiedReq.Y) diff --git a/uixt/mcp_tools_utility.go b/uixt/mcp_tools_utility.go index 32f1c4d3..4aa2a267 100644 --- a/uixt/mcp_tools_utility.go +++ b/uixt/mcp_tools_utility.go @@ -236,7 +236,7 @@ func (t *ToolClosePopups) Implement() server.ToolHandlerFunc { // Close popups action logic err = driverExt.ClosePopupsHandler() if err != nil { - return NewMCPErrorResponse(fmt.Sprintf("Close popups failed: %s", err.Error())), nil + return NewMCPErrorResponse(fmt.Sprintf("Close popups failed: %s", err.Error())), err } message := "Successfully closed popups" diff --git a/uixt/mcp_tools_web.go b/uixt/mcp_tools_web.go index dcab952a..a21a7489 100644 --- a/uixt/mcp_tools_web.go +++ b/uixt/mcp_tools_web.go @@ -52,7 +52,7 @@ func (t *ToolWebLoginNoneUI) Implement() server.ToolHandlerFunc { _, err = driver.LoginNoneUI(unifiedReq.PackageName, unifiedReq.PhoneNumber, unifiedReq.Captcha, unifiedReq.Password) if err != nil { - return NewMCPErrorResponse(fmt.Sprintf("Web login failed: %s", err.Error())), nil + return NewMCPErrorResponse(fmt.Sprintf("Web login failed: %s", err.Error())), err } message := "Successfully performed web login without UI" @@ -63,7 +63,15 @@ func (t *ToolWebLoginNoneUI) Implement() server.ToolHandlerFunc { } func (t *ToolWebLoginNoneUI) ConvertActionToCallToolRequest(action option.MobileAction) (mcp.CallToolRequest, error) { - return BuildMCPCallToolRequest(t.Name(), map[string]any{}), nil + arguments := map[string]any{} + if textsSlice, ok := action.Params.([]interface{}); ok { + arguments["packageName"] = textsSlice[0].(string) + arguments["phoneNumber"] = textsSlice[1].(string) + arguments["captcha"] = textsSlice[2].(string) + arguments["password"] = textsSlice[3].(string) + } + + return BuildMCPCallToolRequest(t.Name(), arguments), nil } // ToolSecondaryClick implements the secondary_click tool call. @@ -106,7 +114,7 @@ func (t *ToolSecondaryClick) Implement() server.ToolHandlerFunc { // Secondary click action logic err = driverExt.SecondaryClick(unifiedReq.X, unifiedReq.Y) if err != nil { - return NewMCPErrorResponse(fmt.Sprintf("Secondary click failed: %s", err.Error())), nil + return NewMCPErrorResponse(fmt.Sprintf("Secondary click failed: %s", err.Error())), err } message := fmt.Sprintf("Successfully performed secondary click at (%.2f, %.2f)", unifiedReq.X, unifiedReq.Y) @@ -125,6 +133,8 @@ func (t *ToolSecondaryClick) ConvertActionToCallToolRequest(action option.Mobile "x": params[0], "y": params[1], } + // Extract options to arguments + extractActionOptionsToArguments(action.GetOptions(), arguments) return BuildMCPCallToolRequest(t.Name(), arguments), nil } return mcp.CallToolRequest{}, fmt.Errorf("invalid secondary click params: %v", action.Params) @@ -160,11 +170,13 @@ func (t *ToolHoverBySelector) Implement() server.ToolHandlerFunc { if err != nil { return nil, err } + // Get options directly since ActionOptions is now ActionOptions + opts := unifiedReq.Options() // Hover by selector action logic - err = driverExt.HoverBySelector(unifiedReq.Selector) + err = driverExt.HoverBySelector(unifiedReq.Selector, opts...) if err != nil { - return NewMCPErrorResponse(fmt.Sprintf("Hover by selector failed: %s", err.Error())), nil + return NewMCPErrorResponse(fmt.Sprintf("Hover by selector failed: %s", err.Error())), err } message := fmt.Sprintf("Successfully hovered over element with selector: %s", unifiedReq.Selector) @@ -179,6 +191,8 @@ func (t *ToolHoverBySelector) ConvertActionToCallToolRequest(action option.Mobil arguments := map[string]any{ "selector": selector, } + // Extract options to arguments + extractActionOptionsToArguments(action.GetOptions(), arguments) return BuildMCPCallToolRequest(t.Name(), arguments), nil } return mcp.CallToolRequest{}, fmt.Errorf("invalid hover by selector params: %v", action.Params) @@ -214,11 +228,13 @@ func (t *ToolTapBySelector) Implement() server.ToolHandlerFunc { if err != nil { return nil, err } + // Get options directly since ActionOptions is now ActionOptions + opts := unifiedReq.Options() // Tap by selector action logic - err = driverExt.TapBySelector(unifiedReq.Selector) + err = driverExt.TapBySelector(unifiedReq.Selector, opts...) if err != nil { - return NewMCPErrorResponse(fmt.Sprintf("Tap by selector failed: %s", err.Error())), nil + return NewMCPErrorResponse(fmt.Sprintf("Tap by selector failed: %s", err.Error())), err } message := fmt.Sprintf("Successfully tapped element with selector: %s", unifiedReq.Selector) @@ -233,6 +249,8 @@ func (t *ToolTapBySelector) ConvertActionToCallToolRequest(action option.MobileA arguments := map[string]any{ "selector": selector, } + // Extract options to arguments + extractActionOptionsToArguments(action.GetOptions(), arguments) return BuildMCPCallToolRequest(t.Name(), arguments), nil } return mcp.CallToolRequest{}, fmt.Errorf("invalid tap by selector params: %v", action.Params) @@ -268,11 +286,13 @@ func (t *ToolSecondaryClickBySelector) Implement() server.ToolHandlerFunc { if err != nil { return nil, err } + // Get options directly since ActionOptions is now ActionOptions + opts := unifiedReq.Options() // Secondary click by selector action logic - err = driverExt.SecondaryClickBySelector(unifiedReq.Selector) + err = driverExt.SecondaryClickBySelector(unifiedReq.Selector, opts...) if err != nil { - return NewMCPErrorResponse(fmt.Sprintf("Secondary click by selector failed: %s", err.Error())), nil + return NewMCPErrorResponse(fmt.Sprintf("Secondary click by selector failed: %s", err.Error())), err } message := fmt.Sprintf("Successfully performed secondary click on element with selector: %s", unifiedReq.Selector) @@ -287,6 +307,8 @@ func (t *ToolSecondaryClickBySelector) ConvertActionToCallToolRequest(action opt arguments := map[string]any{ "selector": selector, } + // Extract options to arguments + extractActionOptionsToArguments(action.GetOptions(), arguments) return BuildMCPCallToolRequest(t.Name(), arguments), nil } return mcp.CallToolRequest{}, fmt.Errorf("invalid secondary click by selector params: %v", action.Params) @@ -336,7 +358,7 @@ func (t *ToolWebCloseTab) Implement() server.ToolHandlerFunc { err = browserDriver.CloseTab(unifiedReq.TabIndex) if err != nil { - return NewMCPErrorResponse(fmt.Sprintf("Close tab failed: %s", err.Error())), nil + return NewMCPErrorResponse(fmt.Sprintf("Close tab failed: %s", err.Error())), err } message := fmt.Sprintf("Successfully closed tab at index: %d", unifiedReq.TabIndex) @@ -361,5 +383,7 @@ func (t *ToolWebCloseTab) ConvertActionToCallToolRequest(action option.MobileAct arguments := map[string]any{ "tabIndex": tabIndex, } + // Extract options to arguments + extractActionOptionsToArguments(action.GetOptions(), arguments) return BuildMCPCallToolRequest(t.Name(), arguments), nil }