diff --git a/uixt/driver_ext_screenshot.go b/uixt/driver_ext_screenshot.go index 2d6451b7..f6500b65 100644 --- a/uixt/driver_ext_screenshot.go +++ b/uixt/driver_ext_screenshot.go @@ -102,6 +102,10 @@ func (dExt *XTDriver) GetScreenResult(opts ...option.ActionOption) (screenResult logger := log.Debug().Str("imagePath", imagePath) // perform CV processing if any CV-related option is enabled if needsCVProcessing(screenshotOptions) { + if err = dExt.initCVService(); err != nil { + return nil, err + } + imageResult, err := dExt.CVService.ReadFromBuffer(compressBufSource, opts...) if err != nil { log.Error().Err(err).Msg("ReadFromBuffer from ImageService failed") diff --git a/uixt/ios_device.go b/uixt/ios_device.go index 17caddf2..658892c1 100644 --- a/uixt/ios_device.go +++ b/uixt/ios_device.go @@ -222,6 +222,11 @@ func (dev *IOSDevice) NewDriver() (driver IDriver, err error) { if err != nil { return nil, errors.Wrap(err, "failed to init WDA driver") } + wdaStatus, err := wdaDriver.Status() + if err != nil { + return nil, err + } + log.Info().Interface("status", wdaStatus).Msg("check WDA status") if dev.Options.ResetHomeOnStartup { log.Info().Msg("go back to home screen") diff --git a/uixt/ios_driver_wda.go b/uixt/ios_driver_wda.go index 3e881324..0a03e7d1 100644 --- a/uixt/ios_driver_wda.go +++ b/uixt/ios_driver_wda.go @@ -41,14 +41,6 @@ func NewWDADriver(device *IOSDevice) (*WDADriver, error) { return nil, err } - // check WDA status - wdaStatus, err := driver.Status() - if err != nil { - return nil, err - } - log.Info().Interface("status", wdaStatus). - Msg("check WDA status") - // register driver session reset handler driver.Session.RegisterResetHandler(driver.Setup) @@ -147,20 +139,11 @@ func (wd *WDADriver) Setup() error { // Store base URL for building full URLs baseURL := fmt.Sprintf("http://localhost:%d", localPort) wd.Session.SetBaseURL(baseURL) - - if err = wd.initMjpegClient(); err != nil { - return err - } - // create new session if err := wd.InitSession(nil); err != nil { return errors.Wrap(code.DeviceHTTPDriverError, err.Error()) } - // init WDA scale - if wd.scale, err = wd.Scale(); err != nil { - return err - } return nil } @@ -326,15 +309,16 @@ func (wd *WDADriver) ScreenShot(opts ...option.ActionOption) (raw *bytes.Buffer, return raw, nil } -func (wd *WDADriver) toScale(x float64) float64 { +func (wd *WDADriver) toScale(x float64) (float64, error) { if wd.scale == 0 { // not setup yet - if err := wd.Setup(); err != nil { - log.Error().Err(err).Msg("init scale failed") - os.Exit(code.GetErrorCode(err)) + var err error + if wd.scale, err = wd.Scale(); err != nil || wd.scale == 0 { + log.Error().Err(err).Msg("get screen scale failed") + return 0, err } } - return x / wd.scale + return x / wd.scale, nil } func (wd *WDADriver) ActiveAppInfo() (info types.AppInfo, err error) { @@ -553,11 +537,16 @@ func (wd *WDADriver) TapAbsXY(x, y float64, opts ...option.ActionOption) error { log.Info().Float64("x", x).Float64("y", y).Msg("WDADriver.TapAbsXY") // [[FBRoute POST:@"/wda/tap/:uuid"] respondWithTarget:self action:@selector(handleTap:)] - x = wd.toScale(x) - y = wd.toScale(y) + var err error + if x, err = wd.toScale(x); err != nil { + return err + } + if y, err = wd.toScale(y); err != nil { + return err + } actionOptions := option.NewActionOptions(opts...) - x, y, err := preHandler_TapAbsXY(wd, actionOptions, x, y) + x, y, err = preHandler_TapAbsXY(wd, actionOptions, x, y) if err != nil { return err } @@ -577,11 +566,16 @@ func (wd *WDADriver) DoubleTap(x, y float64, opts ...option.ActionOption) error log.Info().Float64("x", x).Float64("y", y).Msg("WDADriver.DoubleTap") // [[FBRoute POST:@"/wda/doubleTap"] respondWithTarget:self action:@selector(handleDoubleTapCoordinate:)] - x = wd.toScale(x) - y = wd.toScale(y) + var err error + if x, err = wd.toScale(x); err != nil { + return err + } + if y, err = wd.toScale(y); err != nil { + return err + } actionOptions := option.NewActionOptions(opts...) - x, y, err := preHandler_DoubleTap(wd, actionOptions, x, y) + x, y, err = preHandler_DoubleTap(wd, actionOptions, x, y) if err != nil { return err } @@ -611,13 +605,22 @@ func (wd *WDADriver) Drag(fromX, fromY, toX, toY float64, opts ...option.ActionO Float64("toX", toX).Float64("toY", toY).Msg("WDADriver.Drag") // [[FBRoute POST:@"/wda/dragfromtoforduration"] respondWithTarget:self action:@selector(handleDragCoordinate:)] - fromX = wd.toScale(fromX) - fromY = wd.toScale(fromY) - toX = wd.toScale(toX) - toY = wd.toScale(toY) + var err error + if fromX, err = wd.toScale(fromX); err != nil { + return err + } + if fromY, err = wd.toScale(fromY); err != nil { + return err + } + if toX, err = wd.toScale(toX); err != nil { + return err + } + if toY, err = wd.toScale(toY); err != nil { + return err + } actionOptions := option.NewActionOptions(opts...) - fromX, fromY, toX, toY, err := preHandler_Drag(wd, actionOptions, fromX, fromY, toX, toY) + fromX, fromY, toX, toY, err = preHandler_Drag(wd, actionOptions, fromX, fromY, toX, toY) if err != nil { return err } @@ -851,6 +854,10 @@ func (wd *WDADriver) triggerWDALog(data map[string]interface{}) (rawResp []byte, func (wd *WDADriver) ScreenRecord(opts ...option.ActionOption) (videoPath string, err error) { log.Info().Msg("WDADriver.ScreenRecord") + err = wd.initMjpegClient() + if err != nil { + return "", err + } timestamp := time.Now().Format("20060102_150405") + fmt.Sprintf("_%03d", time.Now().UnixNano()/1e6%1000) fileName := filepath.Join(config.GetConfig().ScreenShotsPath(), fmt.Sprintf("%s.mp4", timestamp)) diff --git a/uixt/sdk.go b/uixt/sdk.go index e394a3b2..d315bbca 100644 --- a/uixt/sdk.go +++ b/uixt/sdk.go @@ -5,38 +5,28 @@ import ( "fmt" "strings" - "github.com/httprunner/httprunner/v5/uixt/ai" - "github.com/httprunner/httprunner/v5/uixt/option" "github.com/mark3labs/mcp-go/client" "github.com/mark3labs/mcp-go/mcp" "github.com/pkg/errors" "github.com/rs/zerolog/log" + + "github.com/httprunner/httprunner/v5/uixt/ai" + "github.com/httprunner/httprunner/v5/uixt/option" ) func NewXTDriver(driver IDriver, opts ...option.AIServiceOption) (*XTDriver, error) { + services := option.NewAIServiceOptions(opts...) driverExt := &XTDriver{ IDriver: driver, client: &MCPClient4XTDriver{ Server: NewMCPServer(), }, + services: services, loadedMCPClients: make(map[string]client.MCPClient), } - services := option.NewAIServiceOptions(opts...) - var err error - // default to vedem CV service - if services.CVService == "" { - log.Warn().Msg("no CV service config provided, use default vedem") - services.CVService = option.CVServiceTypeVEDEM - } - driverExt.CVService, err = ai.NewCVService(services.CVService) - if err != nil { - log.Error().Err(err).Msg("init vedem image service failed") - return nil, err - } - // Handle LLM service initialization if services.LLMConfig != nil { // Use advanced LLM configuration if provided @@ -72,10 +62,30 @@ type XTDriver struct { CVService ai.ICVService // OCR/CV LLMService ai.ILLMService // LLM + services *option.AIServiceOptions // AI services options client *MCPClient4XTDriver // MCP Client for built-in uixt server loadedMCPClients map[string]client.MCPClient // External MCP clients } +func (dExt *XTDriver) initCVService() error { + if dExt.CVService != nil { + return nil + } + cvServiceType := dExt.services.CVService + if cvServiceType == "" { + log.Warn().Msg("no CV service config provided, use default vedem") + cvServiceType = option.CVServiceTypeVEDEM + } + cvService, err := ai.NewCVService(cvServiceType) + if err != nil { + log.Error().Err(err).Str("type", string(cvServiceType)). + Msg("init cv service failed") + return errors.Wrap(err, "init cv service failed") + } + dExt.CVService = cvService + return nil +} + // MCPClient4XTDriver is a mock MCP client that only implements the methods used by the host type MCPClient4XTDriver struct { client.MCPClient @@ -200,7 +210,8 @@ func (dExt *XTDriver) GetMCPClient(serverName string) (client.MCPClient, bool) { // CallMCPTool calls the specified MCP tool func (dExt *XTDriver) CallMCPTool(ctx context.Context, - serverName, toolName string, arguments map[string]any) (result *mcp.CallToolResult, err error) { + serverName, toolName string, arguments map[string]any, +) (result *mcp.CallToolResult, err error) { // Get MCP client mcpClient, exists := dExt.GetMCPClient(serverName)