From 9890588ca7f953265ce31bfccd25930b11556208 Mon Sep 17 00:00:00 2001 From: "lilong.129" Date: Sun, 20 Jul 2025 23:14:08 +0800 Subject: [PATCH 1/9] fix: support combined LLMService with wings service --- internal/version/VERSION | 2 +- uixt/ai/ai.go | 86 ++++++++++------ uixt/driver_ext_ai.go | 202 ++++++++++++------------------------- uixt/driver_ext_ai_test.go | 54 +++++----- uixt/option/ai.go | 1 + uixt/sdk.go | 21 ++-- 6 files changed, 152 insertions(+), 214 deletions(-) diff --git a/internal/version/VERSION b/internal/version/VERSION index 8281c03e..2472fe80 100644 --- a/internal/version/VERSION +++ b/internal/version/VERSION @@ -1 +1 @@ -v5.0.0-250717 +v5.0.0-250720 diff --git a/uixt/ai/ai.go b/uixt/ai/ai.go index 75bd8845..4ff4db8f 100644 --- a/uixt/ai/ai.go +++ b/uixt/ai/ai.go @@ -4,6 +4,7 @@ import ( "context" "github.com/cloudwego/eino/schema" + "github.com/httprunner/httprunner/v5/uixt/option" ) @@ -24,43 +25,66 @@ func NewLLMService(modelType option.LLMServiceType) (ILLMService, error) { // NewLLMServiceWithOptionConfig creates a new LLM service with different models for each component func NewLLMServiceWithOptionConfig(config *option.LLMServiceConfig) (ILLMService, error) { - // Get model configs for each component - plannerModelConfig, err := GetModelConfig(config.PlannerModel) - if err != nil { - return nil, err + combinedLLMService := &combinedLLMService{} + + // Planner + if config.PlannerModel == option.WINGS_SERVICE { + planner, err := NewWingsService() + if err != nil { + return nil, err + } + combinedLLMService.planner = planner + } else { + plannerModelConfig, err := GetModelConfig(config.PlannerModel) + if err != nil { + return nil, err + } + planner, err := NewPlanner(context.Background(), plannerModelConfig) + if err != nil { + return nil, err + } + combinedLLMService.planner = planner } - asserterModelConfig, err := GetModelConfig(config.AsserterModel) - if err != nil { - return nil, err + // Asserter + if config.AsserterModel == option.WINGS_SERVICE { + asserter, err := NewWingsService() + if err != nil { + return nil, err + } + combinedLLMService.asserter = asserter + } else { + asserterModelConfig, err := GetModelConfig(config.AsserterModel) + if err != nil { + return nil, err + } + asserter, err := NewAsserter(context.Background(), asserterModelConfig) + if err != nil { + return nil, err + } + combinedLLMService.asserter = asserter } - querierModelConfig, err := GetModelConfig(config.QuerierModel) - if err != nil { - return nil, err + // Querier + if config.QuerierModel == option.WINGS_SERVICE { + querier, err := NewWingsService() + if err != nil { + return nil, err + } + combinedLLMService.querier = querier + } else { + querierModelConfig, err := GetModelConfig(config.QuerierModel) + if err != nil { + return nil, err + } + querier, err := NewQuerier(context.Background(), querierModelConfig) + if err != nil { + return nil, err + } + combinedLLMService.querier = querier } - // Create components with their respective model configs - planner, err := NewPlanner(context.Background(), plannerModelConfig) - if err != nil { - return nil, err - } - - asserter, err := NewAsserter(context.Background(), asserterModelConfig) - if err != nil { - return nil, err - } - - querier, err := NewQuerier(context.Background(), querierModelConfig) - if err != nil { - return nil, err - } - - return &combinedLLMService{ - planner: planner, - asserter: asserter, - querier: querier, - }, nil + return combinedLLMService, nil } // combinedLLMService 实现了 ILLMService 接口,组合了规划、断言和查询功能 diff --git a/uixt/driver_ext_ai.go b/uixt/driver_ext_ai.go index 5243afc8..9fa43ffb 100644 --- a/uixt/driver_ext_ai.go +++ b/uixt/driver_ext_ai.go @@ -3,7 +3,6 @@ package uixt import ( "context" "encoding/json" - "fmt" "time" "github.com/cloudwego/eino/schema" @@ -16,7 +15,7 @@ import ( "github.com/httprunner/httprunner/v5/uixt/types" ) -// StartToGoal (original implementation - preserved) +// StartToGoal runs AI actions until task is finished or time limit is reached func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...option.ActionOption) ([]*PlanningExecutionResult, error) { options := option.NewActionOptions(opts...) logger := log.Info().Str("prompt", prompt) @@ -195,7 +194,7 @@ func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...op } } -// AIAction with WingsService priority support +// AIAction performs AI-driven action and returns detailed execution result func (dExt *XTDriver) AIAction(ctx context.Context, prompt string, opts ...option.ActionOption) (*AIExecutionResult, error) { log.Info().Str("prompt", prompt).Msg("performing AI action") @@ -208,93 +207,24 @@ func (dExt *XTDriver) AIAction(ctx context.Context, prompt string, opts ...optio return nil, err } - // Step 2: Check if WingsService is available and prioritize it - if dExt.WingsService != nil { - log.Info().Msg("using Wings service for AI action") - return dExt.executeAIAction(ctx, prompt, screenResult, dExt.WingsService, "wings", opts...) - } - - // Step 3: Fallback to LLM service - if dExt.LLMService == nil { - return nil, errors.New("neither Wings service nor LLM service is initialized") - } - - log.Info().Msg("using LLM service for AI action") - return dExt.executeAIAction(ctx, prompt, screenResult, dExt.LLMService, "llm", opts...) -} - -// executeAIAction executes AIAction using any AI service (generic implementation) -func (dExt *XTDriver) executeAIAction(ctx context.Context, prompt string, screenResult *ScreenResult, service ai.ILLMService, serviceType string, opts ...option.ActionOption) (*AIExecutionResult, error) { - // Add device context for Wings service if needed - if serviceType == "wings" { - ctx = dExt.addDeviceContextForWings(ctx) - } - - // Step 1: Plan next action and measure time + // Step 2: Plan next action and measure time modelCallStartTime := time.Now() - - var planningResult *ai.PlanningResult - var err error - - if serviceType == "llm" { - // For LLM service, use PlanNextAction which includes additional processing - planningExecutionResult, planErr := dExt.PlanNextAction(ctx, prompt, opts...) - if planErr != nil { - modelCallElapsed := time.Since(modelCallStartTime).Milliseconds() - return &AIExecutionResult{ - Type: "action", - ModelCallElapsed: modelCallElapsed, - ScreenshotElapsed: screenResult.Elapsed, - ImagePath: screenResult.ImagePath, - Resolution: &screenResult.Resolution, - Error: planErr.Error(), - }, errors.Wrap(planErr, "get next action failed") - } - planningResult = &planningExecutionResult.PlanningResult - } else { - // For Wings service, call Plan directly - planningOpts := &ai.PlanningOptions{ - UserInstruction: prompt, - Message: &schema.Message{ - Role: schema.User, - MultiContent: []schema.ChatMessagePart{ - { - Type: schema.ChatMessagePartTypeImageURL, - ImageURL: &schema.ChatMessageImageURL{ - URL: screenResult.Base64, - }, - }, - }, - }, - Size: screenResult.Resolution, - } - - planningResult, err = service.Plan(ctx, planningOpts) - if err != nil { - modelCallElapsed := time.Since(modelCallStartTime).Milliseconds() - return &AIExecutionResult{ - Type: "action", - ModelCallElapsed: modelCallElapsed, - ScreenshotElapsed: screenResult.Elapsed, - ImagePath: screenResult.ImagePath, - Resolution: &screenResult.Resolution, - Error: err.Error(), - }, errors.Wrap(err, fmt.Sprintf("%s service planning failed", serviceType)) - } - } - + planningResult, err := dExt.PlanNextAction(ctx, prompt, opts...) modelCallElapsed := time.Since(modelCallStartTime).Milliseconds() - aiExecutionResult := &AIExecutionResult{ Type: "action", ModelCallElapsed: modelCallElapsed, ScreenshotElapsed: screenResult.Elapsed, ImagePath: screenResult.ImagePath, Resolution: &screenResult.Resolution, - PlanningResult: planningResult, + PlanningResult: &planningResult.PlanningResult, + } + if err != nil { + aiExecutionResult.Error = err.Error() + return aiExecutionResult, errors.Wrap(err, "get next action failed") } - // Step 2: Execute tool calls + // Step 3: Execute tool calls for _, toolCall := range planningResult.ToolCalls { err = dExt.invokeToolCall(ctx, toolCall, opts...) if err != nil { @@ -309,9 +239,13 @@ func (dExt *XTDriver) executeAIAction(ctx context.Context, prompt string, screen return aiExecutionResult, nil } -// AIAssert with WingsService priority support +// AIAssert performs AI-driven assertion and returns detailed execution result func (dExt *XTDriver) AIAssert(assertion string, opts ...option.ActionOption) (*AIExecutionResult, error) { - log.Info().Str("assertion", assertion).Msg("performing AI assertion") + if dExt.LLMService == nil { + return nil, errors.New("LLM service is not initialized") + } + + ctx := dExt.addDeviceContextForWings(context.Background()) // Step 1: Take screenshot and convert to base64 screenResult, err := dExt.GetScreenResult( @@ -322,29 +256,6 @@ func (dExt *XTDriver) AIAssert(assertion string, opts ...option.ActionOption) (* return nil, err } - // Step 2: Check if WingsService is available and prioritize it - if dExt.WingsService != nil { - log.Info().Msg("using Wings service for AI assertion") - return dExt.executeAIAssert(assertion, screenResult, dExt.WingsService, "wings", opts...) - } - - // Step 3: Fallback to LLM service - if dExt.LLMService == nil { - return nil, errors.New("neither Wings service nor LLM service is initialized") - } - - log.Info().Msg("using LLM service for AI assertion") - return dExt.executeAIAssert(assertion, screenResult, dExt.LLMService, "llm", opts...) -} - -// executeAIAssert executes AIAssert using any AI service (generic implementation) -func (dExt *XTDriver) executeAIAssert(assertion string, screenResult *ScreenResult, service ai.ILLMService, serviceType string, opts ...option.ActionOption) (*AIExecutionResult, error) { - // Step 1: Prepare context and options - ctx := context.Background() - if serviceType == "wings" { - ctx = dExt.addDeviceContextForWings(ctx) - } - assertResult := &AIExecutionResult{ Type: "assert", ScreenshotElapsed: screenResult.Elapsed, @@ -352,61 +263,47 @@ func (dExt *XTDriver) executeAIAssert(assertion string, screenResult *ScreenResu Resolution: &screenResult.Resolution, } - // Step 2: Call service and measure time + // Step 2: Call model and measure time modelCallStartTime := time.Now() assertOpts := &ai.AssertOptions{ Assertion: assertion, Screenshot: screenResult.Base64, Size: screenResult.Resolution, } - - result, err := service.Assert(ctx, assertOpts) + result, err := dExt.LLMService.Assert(ctx, assertOpts) assertResult.ModelCallElapsed = time.Since(modelCallStartTime).Milliseconds() assertResult.AssertionResult = result if err != nil { assertResult.Error = err.Error() - return assertResult, errors.Wrap(err, fmt.Sprintf("%s assertion failed", serviceType)) + return assertResult, errors.Wrap(err, "AI assertion failed") } + // For assertion failure, we should still return success but mark the assertion as failed + // This ensures that the AIResult (including screenshot and thought) is properly saved and displayed if !result.Pass { - assertResult.Error = result.Thought + assertResult.Error = result.Thought // Store the failure reason for reporting } return assertResult, nil } -// addDeviceContextForWings adds device information to context for Wings service -func (dExt *XTDriver) addDeviceContextForWings(ctx context.Context) context.Context { - device := dExt.GetDevice() - if device == nil { - return ctx - } +// Context key types to avoid collisions +type contextKey string - // Add device ID to context - ctx = context.WithValue(ctx, "device_id", device.UUID()) +const ( + deviceIDKey contextKey = "device_id" + platformTypeKey contextKey = "platform_type" +) - // Add platform type to context - platformType := "android" // default - switch device.(type) { - case *AndroidDevice: - platformType = "android" - case *IOSDevice: - platformType = "ios" - case *HarmonyDevice: - platformType = "harmony" - } - ctx = context.WithValue(ctx, "platform_type", platformType) - - return ctx -} - -// PlanNextAction (original implementation - preserved) +// PlanNextAction performs planning and returns unified planning information func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ...option.ActionOption) (*PlanningExecutionResult, error) { if dExt.LLMService == nil { return nil, errors.New("LLM service is not initialized") } + ctx = dExt.addDeviceContextForWings(ctx) + // Parse action options to get ResetHistory setting options := option.NewActionOptions(opts...) resetHistory := options.ResetHistory @@ -476,7 +373,7 @@ func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts .. return planningResult, nil } -// isTaskFinished (original implementation - preserved) +// isTaskFinished checks if the task is completed based on the planning result func (dExt *XTDriver) isTaskFinished(planningResult *PlanningExecutionResult) bool { // Check if there are no tool calls (no actions to execute) if len(planningResult.ToolCalls) == 0 { @@ -495,7 +392,7 @@ func (dExt *XTDriver) isTaskFinished(planningResult *PlanningExecutionResult) bo return false } -// invokeToolCall (original implementation - preserved) +// invokeToolCall invokes the tool call func (dExt *XTDriver) invokeToolCall(ctx context.Context, toolCall schema.ToolCall, opts ...option.ActionOption) error { // Parse arguments arguments := make(map[string]interface{}) @@ -522,7 +419,7 @@ func (dExt *XTDriver) invokeToolCall(ctx context.Context, toolCall schema.ToolCa return nil } -// PlanningExecutionResult (original implementation - preserved) +// PlanningExecutionResult represents a unified planning result that contains both planning information and execution results type PlanningExecutionResult struct { ai.PlanningResult // Inherit all fields from ai.PlanningResult (ToolCalls, Thought, Content, Error, ModelName) // Planning process information @@ -539,7 +436,7 @@ type PlanningExecutionResult struct { SubActions []*SubActionResult `json:"sub_actions,omitempty"` // sub-actions generated from this planning } -// AIExecutionResult (original implementation - preserved) +// AIExecutionResult represents a unified result structure for all AI operations type AIExecutionResult struct { Type string `json:"type"` // operation type: "query", "action", "assert" ModelCallElapsed int64 `json:"model_call_elapsed"` // model call elapsed time in milliseconds @@ -556,7 +453,7 @@ type AIExecutionResult struct { Error string `json:"error,omitempty"` // error message if operation failed } -// SubActionResult (original implementation - preserved) +// SubActionResult represents a sub-action within a start_to_goal action type SubActionResult struct { ActionName string `json:"action_name"` // name of the sub-action (e.g., "tap", "input") Arguments interface{} `json:"arguments,omitempty"` // arguments passed to the sub-action @@ -571,7 +468,7 @@ type SessionData struct { ScreenResults []*ScreenResult `json:"screen_results,omitempty"` // store sub-action specific screen_results } -// AIQuery (original implementation - preserved) +// AIQuery performs AI-driven query and returns detailed execution result func (dExt *XTDriver) AIQuery(text string, opts ...option.ActionOption) (*AIExecutionResult, error) { if dExt.LLMService == nil { return nil, errors.New("LLM service is not initialized") @@ -616,3 +513,28 @@ func (dExt *XTDriver) AIQuery(text string, opts ...option.ActionOption) (*AIExec } return aiResult, nil } + +// addDeviceContextForWings adds device information to context for Wings service +func (dExt *XTDriver) addDeviceContextForWings(ctx context.Context) context.Context { + device := dExt.GetDevice() + if device == nil { + return ctx + } + + // Add device ID to context + ctx = context.WithValue(ctx, deviceIDKey, device.UUID()) + + // Add platform type to context + platformType := "android" // default + switch device.(type) { + case *AndroidDevice: + platformType = "android" + case *IOSDevice: + platformType = "ios" + case *HarmonyDevice: + platformType = "harmony" + } + ctx = context.WithValue(ctx, platformTypeKey, platformType) + + return ctx +} diff --git a/uixt/driver_ext_ai_test.go b/uixt/driver_ext_ai_test.go index 83904cb4..01738846 100644 --- a/uixt/driver_ext_ai_test.go +++ b/uixt/driver_ext_ai_test.go @@ -23,33 +23,33 @@ func TestDriverExt_TapByLLM(t *testing.T) { assert.Nil(t, err) } -//func TestDriverExt_StartToGoal(t *testing.T) { -// driver := setupDriverExt(t) -// -// userInstruction := `连连看是一款经典的益智消除类小游戏,通常以图案或图标为主要元素。以下是连连看的基本规则说明: -// 1. 游戏目标: 玩家需要在规定时间内,通过连接相同的图案或图标,将它们从游戏界面中消除。 -// 2. 连接规则: -// - 两个相同的图案可以通过不超过三条直线连接。 -// - 连接线可以水平或垂直,但不能斜线,也不能跨过其他图案。 -// - 连接线的转折次数不能超过两次。 -// 3. 游戏界面: -// - 游戏界面通常是一个矩形区域,内含多个图案或图标,排列成行和列。 -// - 图案或图标在未选中状态下背景为白色,选中状态下背景为绿色。 -// 4. 时间限制: 游戏通常设有时间限制,玩家需要在时间耗尽前完成所有图案的消除。 -// 5. 得分机制: 每成功连接并消除一对图案,玩家会获得相应的分数。完成游戏后,根据剩余时间和消除效率计算总分。 -// 6. 关卡设计: 游戏可能包含多个关卡,随着关卡的推进,图案的复杂度和数量会增加。 -// -// 注意事项: -// 1、当连接错误时,顶部的红心会减少一个,需及时调整策略,避免红心变为0个后游戏失败 -// 2、不要连续 2 次点击同一个图案 -// 3、不要犯重复的错误 -// ` -// -// userInstruction += "\n\n请严格按照以上游戏规则,开始游戏;注意,请只做点击操作" -// -// //_, err := driver.StartToGoal(context.Background(), userInstruction) -// //assert.Nil(t, err) -//} +func TestDriverExt_StartToGoal(t *testing.T) { + driver := setupDriverExt(t) + + userInstruction := `连连看是一款经典的益智消除类小游戏,通常以图案或图标为主要元素。以下是连连看的基本规则说明: + 1. 游戏目标: 玩家需要在规定时间内,通过连接相同的图案或图标,将它们从游戏界面中消除。 + 2. 连接规则: + - 两个相同的图案可以通过不超过三条直线连接。 + - 连接线可以水平或垂直,但不能斜线,也不能跨过其他图案。 + - 连接线的转折次数不能超过两次。 + 3. 游戏界面: + - 游戏界面通常是一个矩形区域,内含多个图案或图标,排列成行和列。 + - 图案或图标在未选中状态下背景为白色,选中状态下背景为绿色。 + 4. 时间限制: 游戏通常设有时间限制,玩家需要在时间耗尽前完成所有图案的消除。 + 5. 得分机制: 每成功连接并消除一对图案,玩家会获得相应的分数。完成游戏后,根据剩余时间和消除效率计算总分。 + 6. 关卡设计: 游戏可能包含多个关卡,随着关卡的推进,图案的复杂度和数量会增加。 + + 注意事项: + 1、当连接错误时,顶部的红心会减少一个,需及时调整策略,避免红心变为0个后游戏失败 + 2、不要连续 2 次点击同一个图案 + 3、不要犯重复的错误 + ` + + userInstruction += "\n\n请严格按照以上游戏规则,开始游戏;注意,请只做点击操作" + + _, err := driver.StartToGoal(context.Background(), userInstruction) + assert.Nil(t, err) +} func TestDriverExt_PlanNextAction(t *testing.T) { driver := setupDriverExt(t) diff --git a/uixt/option/ai.go b/uixt/option/ai.go index e62879ea..2bc790c2 100644 --- a/uixt/option/ai.go +++ b/uixt/option/ai.go @@ -58,6 +58,7 @@ const ( DOUBAO_SEED_1_6_250615 LLMServiceType = "doubao-seed-1.6-250615" OPENAI_GPT_4O LLMServiceType = "openai/gpt-4o" DEEPSEEK_R1_250528 LLMServiceType = "deepseek-r1-250528" + WINGS_SERVICE LLMServiceType = "wings-service" ) func WithLLMService(modelType LLMServiceType) AIServiceOption { diff --git a/uixt/sdk.go b/uixt/sdk.go index 1bf4204a..8175fdfc 100644 --- a/uixt/sdk.go +++ b/uixt/sdk.go @@ -27,10 +27,6 @@ func NewXTDriver(driver IDriver, opts ...option.AIServiceOption) (*XTDriver, err var err error - // Initialize Wings service (always available) - driverExt.WingsService = ai.NewWingsService() - log.Info().Msg("Wings service initialized") - // Handle LLM service initialization if services.LLMConfig != nil { // Use advanced LLM configuration if provided @@ -53,15 +49,11 @@ func NewXTDriver(driver IDriver, opts ...option.AIServiceOption) (*XTDriver, err } // Register uixt MCP tools to LLM service if it exists - mcpTools := driverExt.client.Server.ListTools() - einoTools := ai.ConvertMCPToolsToEinoToolInfos(mcpTools, "uixt") - if err = driverExt.WingsService.RegisterTools(einoTools); err != nil { - log.Debug().Err(err).Msg("Wings service ignoring tool registration (expected)") - } - if driverExt.LLMService != nil { - if err = driverExt.LLMService.RegisterTools(einoTools); err != nil { - log.Warn().Err(err).Msg("failed to register uixt tools to LLM service") + mcpTools := driverExt.client.Server.ListTools() + einoTools := ai.ConvertMCPToolsToEinoToolInfos(mcpTools, "uixt") + if err := driverExt.LLMService.RegisterTools(einoTools); err != nil { + log.Warn().Err(err).Msg("failed to register uixt tools") } } @@ -71,9 +63,8 @@ func NewXTDriver(driver IDriver, opts ...option.AIServiceOption) (*XTDriver, err // XTDriver = IDriver + AI type XTDriver struct { IDriver - CVService ai.ICVService // OCR/CV - LLMService ai.ILLMService // LLM (fallback service) - WingsService ai.ILLMService // Wings API service (priority service) + CVService ai.ICVService // OCR/CV + LLMService ai.ILLMService // LLM services *option.AIServiceOptions // AI services options client *MCPClient4XTDriver // MCP Client for built-in uixt server From 8b83f3ac78d2f47007e010453ec062de6af3e7ea Mon Sep 17 00:00:00 2001 From: "lilong.129" Date: Sun, 20 Jul 2025 23:47:41 +0800 Subject: [PATCH 2/9] fix: remove internal url from code --- uixt/ai/wings_service.go | 45 ++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/uixt/ai/wings_service.go b/uixt/ai/wings_service.go index 5dea7484..adfd18ea 100644 --- a/uixt/ai/wings_service.go +++ b/uixt/ai/wings_service.go @@ -16,41 +16,40 @@ import ( "github.com/pkg/errors" "github.com/rs/zerolog/log" + "github.com/httprunner/httprunner/v5/code" "github.com/httprunner/httprunner/v5/internal/builtin" ) // WingsService implements ILLMService interface using external Wings API type WingsService struct { - apiURL string - bizId string - isExternal bool - accessKey string - secretKey string + apiURL string + bizId string + accessKey string + secretKey string } // NewWingsService creates a new Wings service instance -func NewWingsService() ILLMService { +func NewWingsService() (ILLMService, error) { // Check for environment variables for external API access - accessKey := "" - secretKey := "" - isExternal := false - apiURL := "https://vedem-algorithm.bytedance.net/algorithm/StepActionDecision" + apiURL := os.Getenv("VEDEM_WINGS_API_URL") + accessKey := os.Getenv("VEDEM_WINGS_AK") + secretKey := os.Getenv("VEDEM_WINGS_SK") + bizID := os.Getenv("VEDEM_WINGS_BIZ_ID") - // If environment variables are set, use external API with authentication - if ak, sk := os.Getenv("VEDEM_WINGS_AK"), os.Getenv("VEDEM_WINGS_SK"); ak != "" && sk != "" { - accessKey = ak - secretKey = sk - isExternal = true - apiURL = "https://vedem-algorithm.zijieapi.com/algorithm/StepActionDecision" + // check required env + if apiURL == "" { + return nil, errors.Wrap(code.LLMEnvMissedError, "missed env VEDEM_WINGS_API_URL") + } + if bizID == "" { + return nil, errors.Wrap(code.LLMEnvMissedError, "missed env VEDEM_WINGS_BIZ_ID") } return &WingsService{ - apiURL: apiURL, - bizId: "489fdae44de048e0922a32834ea668af", - isExternal: isExternal, - accessKey: accessKey, - secretKey: secretKey, - } + apiURL: apiURL, + bizId: bizID, + accessKey: accessKey, + secretKey: secretKey, + }, nil } // Plan implements the ILLMService.Plan method using Wings API @@ -409,7 +408,7 @@ func (w *WingsService) callWingsAPI(ctx context.Context, request WingsActionRequ httpReq.Header.Set("Accept", "application/json") // Add authentication headers if using external API - if w.isExternal { + if w.accessKey != "" && w.secretKey != "" { signToken := "UNSIGNED-PAYLOAD" token := builtin.Sign("auth-v2", w.accessKey, w.secretKey, []byte(signToken)) From 318c94286005bd821ace957a789d5675a7f95ff7 Mon Sep 17 00:00:00 2001 From: "lilong.129" Date: Mon, 21 Jul 2025 14:52:53 +0800 Subject: [PATCH 3/9] change: addDeviceContextForWings --- internal/version/VERSION | 2 +- uixt/driver_ext_ai.go | 20 ++++++++++++-------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/internal/version/VERSION b/internal/version/VERSION index 2472fe80..d486bb38 100644 --- a/internal/version/VERSION +++ b/internal/version/VERSION @@ -1 +1 @@ -v5.0.0-250720 +v5.0.0-250721 diff --git a/uixt/driver_ext_ai.go b/uixt/driver_ext_ai.go index 9fa43ffb..1b384f6f 100644 --- a/uixt/driver_ext_ai.go +++ b/uixt/driver_ext_ai.go @@ -288,14 +288,6 @@ func (dExt *XTDriver) AIAssert(assertion string, opts ...option.ActionOption) (* return assertResult, nil } -// Context key types to avoid collisions -type contextKey string - -const ( - deviceIDKey contextKey = "device_id" - platformTypeKey contextKey = "platform_type" -) - // PlanNextAction performs planning and returns unified planning information func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ...option.ActionOption) (*PlanningExecutionResult, error) { if dExt.LLMService == nil { @@ -514,6 +506,14 @@ func (dExt *XTDriver) AIQuery(text string, opts ...option.ActionOption) (*AIExec return aiResult, nil } +// Context key types to avoid collisions +type contextKey string + +const ( + deviceIDKey contextKey = "device_id" + platformTypeKey contextKey = "platform_type" +) + // addDeviceContextForWings adds device information to context for Wings service func (dExt *XTDriver) addDeviceContextForWings(ctx context.Context) context.Context { device := dExt.GetDevice() @@ -533,6 +533,10 @@ func (dExt *XTDriver) addDeviceContextForWings(ctx context.Context) context.Cont platformType = "ios" case *HarmonyDevice: platformType = "harmony" + case *BrowserDevice: + platformType = "browser" + default: + platformType = "unknown" } ctx = context.WithValue(ctx, platformTypeKey, platformType) From 4890c098faad889743d6ef95f720e8694c05d5f4 Mon Sep 17 00:00:00 2001 From: "lilong.129" Date: Mon, 21 Jul 2025 15:10:09 +0800 Subject: [PATCH 4/9] change: remove addDeviceContextForWings --- uixt/ai/wings_service.go | 4 ++-- uixt/driver_ext_ai.go | 43 +--------------------------------------- uixt/sdk.go | 18 +++++++++++------ 3 files changed, 15 insertions(+), 50 deletions(-) diff --git a/uixt/ai/wings_service.go b/uixt/ai/wings_service.go index 8e134714..35fa77db 100644 --- a/uixt/ai/wings_service.go +++ b/uixt/ai/wings_service.go @@ -345,8 +345,8 @@ func (w *WingsService) extractScreenshotFromMessage(message *schema.Message) (st } // getDeviceInfoFromContext gets device info from context with fallback -func (w *WingsService) getDeviceInfoFromContext(ctx context.Context, screenshot string) WingsDeviceInfo { - // Fallback to default device info +func (w *WingsService) getDeviceInfoFromContext(_ context.Context, screenshot string) WingsDeviceInfo { + // use default device info return WingsDeviceInfo{ DeviceID: "default-device", NowImage: screenshot, diff --git a/uixt/driver_ext_ai.go b/uixt/driver_ext_ai.go index 1b384f6f..fa376cba 100644 --- a/uixt/driver_ext_ai.go +++ b/uixt/driver_ext_ai.go @@ -245,8 +245,6 @@ func (dExt *XTDriver) AIAssert(assertion string, opts ...option.ActionOption) (* return nil, errors.New("LLM service is not initialized") } - ctx := dExt.addDeviceContextForWings(context.Background()) - // Step 1: Take screenshot and convert to base64 screenResult, err := dExt.GetScreenResult( option.WithScreenShotFileName("ai_assert"), @@ -270,7 +268,7 @@ func (dExt *XTDriver) AIAssert(assertion string, opts ...option.ActionOption) (* Screenshot: screenResult.Base64, Size: screenResult.Resolution, } - result, err := dExt.LLMService.Assert(ctx, assertOpts) + result, err := dExt.LLMService.Assert(context.Background(), assertOpts) assertResult.ModelCallElapsed = time.Since(modelCallStartTime).Milliseconds() assertResult.AssertionResult = result @@ -294,8 +292,6 @@ func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts .. return nil, errors.New("LLM service is not initialized") } - ctx = dExt.addDeviceContextForWings(ctx) - // Parse action options to get ResetHistory setting options := option.NewActionOptions(opts...) resetHistory := options.ResetHistory @@ -505,40 +501,3 @@ func (dExt *XTDriver) AIQuery(text string, opts ...option.ActionOption) (*AIExec } return aiResult, nil } - -// Context key types to avoid collisions -type contextKey string - -const ( - deviceIDKey contextKey = "device_id" - platformTypeKey contextKey = "platform_type" -) - -// addDeviceContextForWings adds device information to context for Wings service -func (dExt *XTDriver) addDeviceContextForWings(ctx context.Context) context.Context { - device := dExt.GetDevice() - if device == nil { - return ctx - } - - // Add device ID to context - ctx = context.WithValue(ctx, deviceIDKey, device.UUID()) - - // Add platform type to context - platformType := "android" // default - switch device.(type) { - case *AndroidDevice: - platformType = "android" - case *IOSDevice: - platformType = "ios" - case *HarmonyDevice: - platformType = "harmony" - case *BrowserDevice: - platformType = "browser" - default: - platformType = "unknown" - } - ctx = context.WithValue(ctx, platformTypeKey, platformType) - - return ctx -} diff --git a/uixt/sdk.go b/uixt/sdk.go index 8175fdfc..cd00f036 100644 --- a/uixt/sdk.go +++ b/uixt/sdk.go @@ -29,23 +29,29 @@ func NewXTDriver(driver IDriver, opts ...option.AIServiceOption) (*XTDriver, err // Handle LLM service initialization if services.LLMConfig != nil { - // Use advanced LLM configuration if provided + // Use advanced LLM service configuration if provided driverExt.LLMService, err = ai.NewLLMServiceWithOptionConfig(services.LLMConfig) if err != nil { - log.Warn().Err(err).Msg("init llm service with config failed, Wings service will be used") + log.Warn().Err(err).Msg("init llm service with config failed") } else { log.Info().Msg("LLM service initialized with advanced config") } } else if services.LLMService != "" { - // Fallback to simple LLM service if no config provided + // Use simple LLM service configuration if provided driverExt.LLMService, err = ai.NewLLMService(services.LLMService) if err != nil { - log.Warn().Err(err).Msg("init llm service failed, Wings service will be used") + log.Warn().Err(err).Msg("init llm service failed") } else { - log.Info().Msg("LLM service initialized") + log.Info().Msg("LLM service initialized with simple config") } } else { - log.Info().Msg("no LLM service config provided, using Wings service only") + // Use Wings service as fallback + driverExt.LLMService, err = ai.NewWingsService() + if err != nil { + log.Warn().Err(err).Msg("init Wings service failed") + } else { + log.Info().Msg("Wings service initialized") + } } // Register uixt MCP tools to LLM service if it exists From 88bda7c21560242f837e086edafe8cfd4aea81c8 Mon Sep 17 00:00:00 2001 From: "lilong.129" Date: Mon, 21 Jul 2025 16:28:38 +0800 Subject: [PATCH 5/9] fix: avoid recreate driver in tests --- uixt/android_test.go | 54 +++++++++++++++++++++++++++++--------------- uixt/harmony_test.go | 23 ++++++++++++++----- uixt/ios_test.go | 31 ++++++++++++++++++------- 3 files changed, 76 insertions(+), 32 deletions(-) diff --git a/uixt/android_test.go b/uixt/android_test.go index af76109d..794b9160 100644 --- a/uixt/android_test.go +++ b/uixt/android_test.go @@ -16,29 +16,47 @@ import ( ) func setupADBDriverExt(t *testing.T) *XTDriver { - device, err := NewAndroidDevice() - require.Nil(t, err) - device.Options.UIA2 = false - device.Options.LogOn = false - driver, err := device.NewDriver() - require.Nil(t, err) - driverExt, err := NewXTDriver(driver, - option.WithCVService(option.CVServiceTypeVEDEM), - // option.WithLLMService(option.DOUBAO_1_5_UI_TARS_250328), - ) + config := DriverCacheConfig{ + Platform: "android", + Serial: "", // Let it auto-detect the device serial + AIOptions: []option.AIServiceOption{ + option.WithCVService(option.CVServiceTypeVEDEM), + option.WithLLMConfig( + option.NewLLMServiceConfig(option.DOUBAO_1_5_UI_TARS_250328). + WithPlannerModel(option.WINGS_SERVICE). + WithAsserterModel(option.WINGS_SERVICE), + ), + }, + } + + driverExt, err := GetOrCreateXTDriver(config) require.Nil(t, err) return driverExt } func setupUIA2DriverExt(t *testing.T) *XTDriver { - device, err := NewAndroidDevice() - require.Nil(t, err) - device.Options.UIA2 = true // use uiautomator2 driver - device.Options.LogOn = false - driver, err := device.NewDriver() - require.Nil(t, err) - driverExt, err := NewXTDriver(driver, - option.WithCVService(option.CVServiceTypeVEDEM)) + // Use cache mechanism with UIA2 enabled + deviceOpts := option.NewDeviceOptions( + option.WithPlatform("android"), + option.WithDeviceUIA2(true), + option.WithDeviceLogOn(false), + ) + + config := DriverCacheConfig{ + Platform: "android", + Serial: "", // Let it auto-detect the device serial + DeviceOpts: deviceOpts, + AIOptions: []option.AIServiceOption{ + option.WithCVService(option.CVServiceTypeVEDEM), + option.WithLLMConfig( + option.NewLLMServiceConfig(option.DOUBAO_1_5_UI_TARS_250328). + WithPlannerModel(option.WINGS_SERVICE). + WithAsserterModel(option.WINGS_SERVICE), + ), + }, + } + + driverExt, err := GetOrCreateXTDriver(config) require.Nil(t, err) return driverExt } diff --git a/uixt/harmony_test.go b/uixt/harmony_test.go index dff5f75b..71dfb440 100644 --- a/uixt/harmony_test.go +++ b/uixt/harmony_test.go @@ -5,17 +5,28 @@ package uixt import ( "testing" - "github.com/httprunner/httprunner/v5/uixt/option" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + + "github.com/httprunner/httprunner/v5/uixt/option" ) func setupHDCDriverExt(t *testing.T) *XTDriver { - device, err := NewHarmonyDevice() - require.Nil(t, err) - hdcDriver, err := NewHDCDriver(device) - require.Nil(t, err) - driverExt, err := NewXTDriver(hdcDriver, option.WithCVService(option.CVServiceTypeVEDEM)) + // Use cache mechanism for Harmony HDC driver + config := DriverCacheConfig{ + Platform: "harmony", + Serial: "", // Let it auto-detect the device serial + AIOptions: []option.AIServiceOption{ + option.WithCVService(option.CVServiceTypeVEDEM), + option.WithLLMConfig( + option.NewLLMServiceConfig(option.DOUBAO_1_5_UI_TARS_250328). + WithPlannerModel(option.WINGS_SERVICE). + WithAsserterModel(option.WINGS_SERVICE), + ), + }, + } + + driverExt, err := GetOrCreateXTDriver(config) require.Nil(t, err) return driverExt } diff --git a/uixt/ios_test.go b/uixt/ios_test.go index 55283f3f..0a0bbde0 100644 --- a/uixt/ios_test.go +++ b/uixt/ios_test.go @@ -16,14 +16,29 @@ import ( ) func setupWDADriverExt(t *testing.T) *XTDriver { - device, err := NewIOSDevice( - option.WithWDAPort(8700), - option.WithWDAMjpegPort(8800), - option.WithWDALogOn(true)) - require.Nil(t, err) - driver, err := device.NewDriver() - require.Nil(t, err) - driverExt, err := NewXTDriver(driver, option.WithCVService(option.CVServiceTypeVEDEM)) + // Use cache mechanism with unified DeviceOptions for iOS WDA driver + deviceOpts := option.NewDeviceOptions( + option.WithPlatform("ios"), + option.WithDeviceWDAPort(8700), + option.WithDeviceWDAMjpegPort(8800), + option.WithDeviceLogOn(true), + ) + + config := DriverCacheConfig{ + Platform: "ios", + Serial: "", // Let it auto-detect the device serial + DeviceOpts: deviceOpts, + AIOptions: []option.AIServiceOption{ + option.WithCVService(option.CVServiceTypeVEDEM), + option.WithLLMConfig( + option.NewLLMServiceConfig(option.DOUBAO_1_5_UI_TARS_250328). + WithPlannerModel(option.WINGS_SERVICE). + WithAsserterModel(option.WINGS_SERVICE), + ), + }, + } + + driverExt, err := GetOrCreateXTDriver(config) require.Nil(t, err) return driverExt } From 739fb42d278ecda9303d604f453076253a041c2a Mon Sep 17 00:00:00 2001 From: "lilong.129" Date: Mon, 21 Jul 2025 16:53:38 +0800 Subject: [PATCH 6/9] fix: unittest --- uixt/driver_ext_ai.go | 4 +++- uixt/driver_ext_ai_test.go | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/uixt/driver_ext_ai.go b/uixt/driver_ext_ai.go index fa376cba..185f41fc 100644 --- a/uixt/driver_ext_ai.go +++ b/uixt/driver_ext_ai.go @@ -217,7 +217,9 @@ func (dExt *XTDriver) AIAction(ctx context.Context, prompt string, opts ...optio ScreenshotElapsed: screenResult.Elapsed, ImagePath: screenResult.ImagePath, Resolution: &screenResult.Resolution, - PlanningResult: &planningResult.PlanningResult, + } + if planningResult != nil { + aiExecutionResult.PlanningResult = &planningResult.PlanningResult } if err != nil { aiExecutionResult.Error = err.Error() diff --git a/uixt/driver_ext_ai_test.go b/uixt/driver_ext_ai_test.go index 868ef5af..6ca5d2e3 100644 --- a/uixt/driver_ext_ai_test.go +++ b/uixt/driver_ext_ai_test.go @@ -314,7 +314,7 @@ func TestDriverExt_AIAction_CompareWithAIAction(t *testing.T) { if aiResult.PlanningResult != nil { t.Logf("AIAction model: %s", aiResult.PlanningResult.ModelName) - assert.NotEqual(t, "wings-api", aiResult.PlanningResult.ModelName, "AIAction should not use wings-api") + assert.Equal(t, "wings-api", aiResult.PlanningResult.ModelName, "AIAction should use wings-api") } } } From 9e59b7ff9b7f6da7c87d970235af22ea67167c7f Mon Sep 17 00:00:00 2001 From: "lilong.129" Date: Mon, 21 Jul 2025 18:01:37 +0800 Subject: [PATCH 7/9] refactor: replace copy with set alias --- uixt/mcp_server.go | 20 ++++-- uixt/mcp_server_test.go | 134 ---------------------------------------- uixt/mcp_tools_app.go | 125 ------------------------------------- uixt/mcp_tools_input.go | 59 ------------------ uixt/mcp_tools_touch.go | 73 ---------------------- uixt/option/action.go | 8 --- uixt/sdk.go | 4 +- 7 files changed, 18 insertions(+), 405 deletions(-) diff --git a/uixt/mcp_server.go b/uixt/mcp_server.go index bedaf885..d8f2d4d4 100644 --- a/uixt/mcp_server.go +++ b/uixt/mcp_server.go @@ -61,9 +61,25 @@ func (s *MCPServer4XTDriver) GetToolByAction(actionMethod option.ActionName) Act if s.actionToolMap == nil { return nil } + actionMethod = getActionNameByAlias(actionMethod) return s.actionToolMap[actionMethod] } +func getActionNameByAlias(actionMethod option.ActionName) option.ActionName { + switch strings.ToLower(string(actionMethod)) { + case "terminal_app": + return option.ACTION_AppTerminate + case "open_app": + return option.ACTION_AppLaunch + case "text": + return option.ACTION_Input + case "tap": + return option.ACTION_TapXY + default: + return actionMethod + } +} + // registerTools registers all MCP tools. func (s *MCPServer4XTDriver) registerTools() { // Device Tool @@ -71,7 +87,6 @@ func (s *MCPServer4XTDriver) registerTools() { s.registerTool(&ToolSelectDevice{}) // SelectDevice // Touch Tools - s.registerTool(&ToolTap{}) // tap s.registerTool(&ToolTapXY{}) // tap xy s.registerTool(&ToolTapAbsXY{}) // tap abs xy s.registerTool(&ToolTapByOCR{}) // tap by OCR @@ -89,7 +104,6 @@ func (s *MCPServer4XTDriver) registerTools() { // Input Tools s.registerTool(&ToolInput{}) - s.registerTool(&ToolText{}) s.registerTool(&ToolBackspace{}) s.registerTool(&ToolSetIme{}) @@ -101,9 +115,7 @@ func (s *MCPServer4XTDriver) registerTools() { // App Tools s.registerTool(&ToolListPackages{}) // ListPackages s.registerTool(&ToolLaunchApp{}) // LaunchApp - s.registerTool(&ToolOpenApp{}) // OpenApp s.registerTool(&ToolTerminateApp{}) // TerminateApp - s.registerTool(&ToolTerminateAppNew{}) // TerminateApp (new) s.registerTool(&ToolColdLaunch{}) // ColdLaunch s.registerTool(&ToolAppInstall{}) // AppInstall s.registerTool(&ToolAppUninstall{}) // AppUninstall diff --git a/uixt/mcp_server_test.go b/uixt/mcp_server_test.go index 5e85b400..900adc12 100644 --- a/uixt/mcp_server_test.go +++ b/uixt/mcp_server_test.go @@ -79,7 +79,6 @@ func TestToolInterfaces(t *testing.T) { tools := []ActionTool{ &ToolListAvailableDevices{}, &ToolSelectDevice{}, - &ToolTap{}, &ToolTapXY{}, &ToolTapAbsXY{}, &ToolTapByOCR{}, @@ -93,7 +92,6 @@ func TestToolInterfaces(t *testing.T) { &ToolSwipeToTapTexts{}, &ToolDrag{}, &ToolInput{}, - &ToolText{}, &ToolBackspace{}, &ToolScreenShot{}, &ToolGetScreenSize{}, @@ -102,9 +100,7 @@ func TestToolInterfaces(t *testing.T) { &ToolBack{}, &ToolListPackages{}, &ToolLaunchApp{}, - &ToolOpenApp{}, &ToolTerminateApp{}, - &ToolTerminateAppNew{}, &ToolColdLaunch{}, &ToolAppInstall{}, &ToolAppUninstall{}, @@ -246,45 +242,6 @@ func TestToolSelectDevice(t *testing.T) { assert.Equal(t, string(option.ACTION_SelectDevice), request.Params.Name) } -// TestToolTap tests the ToolTap implementation -func TestToolTap(t *testing.T) { - tool := &ToolTap{} - - // Test Name - assert.Equal(t, option.ACTION_Tap, tool.Name()) - - // Test Description - assert.NotEmpty(t, tool.Description()) - - // Test Options - options := tool.Options() - assert.NotNil(t, options) - - // Test ConvertActionToCallToolRequest with valid params - action := option.MobileAction{ - Method: option.ACTION_Tap, - Params: []float64{0.5, 0.6}, - ActionOptions: option.ActionOptions{ - Duration: 1.5, - }, - } - request, err := tool.ConvertActionToCallToolRequest(action) - assert.NoError(t, err) - assert.Equal(t, string(option.ACTION_Tap), request.Params.Name) - args := request.GetArguments() - assert.Equal(t, 0.5, args["x"]) - assert.Equal(t, 0.6, args["y"]) - assert.Equal(t, 1.5, args["duration"]) - - // Test ConvertActionToCallToolRequest with invalid params - invalidAction := option.MobileAction{ - Method: option.ACTION_Tap, - Params: "invalid", - } - _, err = tool.ConvertActionToCallToolRequest(invalidAction) - assert.Error(t, err) -} - // TestToolTapXY tests the ToolTapXY implementation func TestToolTapXY(t *testing.T) { tool := &ToolTapXY{} @@ -827,31 +784,6 @@ func TestToolInput(t *testing.T) { assert.Equal(t, "Hello World", request.GetArguments()["text"]) } -// TestToolText tests the ToolText implementation -func TestToolText(t *testing.T) { - tool := &ToolText{} - - // Test Name - assert.Equal(t, option.ACTION_Text, tool.Name()) - - // Test Description - assert.NotEmpty(t, tool.Description()) - - // Test Options - options := tool.Options() - assert.NotNil(t, options) - - // Test ConvertActionToCallToolRequest with valid params - action := option.MobileAction{ - Method: option.ACTION_Text, - Params: "Hello World", - } - request, err := tool.ConvertActionToCallToolRequest(action) - assert.NoError(t, err) - assert.Equal(t, string(option.ACTION_Text), request.Params.Name) - assert.Equal(t, "Hello World", request.GetArguments()["text"]) -} - // TestToolBackspace tests the ToolBackspace implementation func TestToolBackspace(t *testing.T) { tool := &ToolBackspace{} @@ -1086,39 +1018,6 @@ func TestToolLaunchApp(t *testing.T) { assert.Error(t, err) } -// TestToolOpenApp tests the ToolOpenApp implementation -func TestToolOpenApp(t *testing.T) { - tool := &ToolOpenApp{} - - // Test Name - assert.Equal(t, option.ACTION_OpenApp, tool.Name()) - - // Test Description - assert.NotEmpty(t, tool.Description()) - - // Test Options - options := tool.Options() - assert.NotNil(t, options) - - // Test ConvertActionToCallToolRequest with valid params - action := option.MobileAction{ - Method: option.ACTION_OpenApp, - Params: "com.example.app", - } - request, err := tool.ConvertActionToCallToolRequest(action) - assert.NoError(t, err) - assert.Equal(t, string(option.ACTION_OpenApp), request.Params.Name) - assert.Equal(t, "com.example.app", request.GetArguments()["packageName"]) - - // Test ConvertActionToCallToolRequest with invalid params - invalidAction := option.MobileAction{ - Method: option.ACTION_OpenApp, - Params: 123, // should be string - } - _, err = tool.ConvertActionToCallToolRequest(invalidAction) - assert.Error(t, err) -} - // TestToolTerminateApp tests the ToolTerminateApp implementation func TestToolTerminateApp(t *testing.T) { tool := &ToolTerminateApp{} @@ -1152,39 +1051,6 @@ func TestToolTerminateApp(t *testing.T) { assert.Error(t, err) } -// TestToolTerminateAppNew tests the ToolTerminateAppNew implementation -func TestToolTerminateAppNew(t *testing.T) { - tool := &ToolTerminateAppNew{} - - // Test Name - assert.Equal(t, option.ACTION_TerminateApp, tool.Name()) - - // Test Description - assert.NotEmpty(t, tool.Description()) - - // Test Options - options := tool.Options() - assert.NotNil(t, options) - - // Test ConvertActionToCallToolRequest with valid params - action := option.MobileAction{ - Method: option.ACTION_TerminateApp, - Params: "com.example.app", - } - request, err := tool.ConvertActionToCallToolRequest(action) - assert.NoError(t, err) - assert.Equal(t, string(option.ACTION_TerminateApp), request.Params.Name) - assert.Equal(t, "com.example.app", request.GetArguments()["packageName"]) - - // Test ConvertActionToCallToolRequest with invalid params - invalidAction := option.MobileAction{ - Method: option.ACTION_TerminateApp, - Params: []int{1, 2, 3}, // should be string - } - _, err = tool.ConvertActionToCallToolRequest(invalidAction) - assert.Error(t, err) -} - // TestToolColdLaunch tests the ToolColdLaunch implementation func TestToolColdLaunch(t *testing.T) { tool := &ToolColdLaunch{} diff --git a/uixt/mcp_tools_app.go b/uixt/mcp_tools_app.go index 1a269492..cf8a4621 100644 --- a/uixt/mcp_tools_app.go +++ b/uixt/mcp_tools_app.go @@ -395,131 +395,6 @@ func (t *ToolGetForegroundApp) ConvertActionToCallToolRequest(action option.Mobi return BuildMCPCallToolRequest(t.Name(), map[string]any{}, action), nil } -// ToolOpenApp implements the open_app tool call. -type ToolOpenApp struct { - // Return data fields - these define the structure of data returned by this tool - PackageName string `json:"packageName" desc:"Package name of the opened app"` -} - -func (t *ToolOpenApp) Name() option.ActionName { - return option.ACTION_OpenApp -} - -func (t *ToolOpenApp) Description() string { - return "Open an app on mobile device using its package name and wait for the app to load" -} - -func (t *ToolOpenApp) Options() []mcp.ToolOption { - unifiedReq := &option.ActionOptions{} - return unifiedReq.GetMCPOptions(option.ACTION_OpenApp) -} - -func (t *ToolOpenApp) Implement() server.ToolHandlerFunc { - return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { - arguments := request.GetArguments() - driverExt, err := setupXTDriver(ctx, arguments) - if err != nil { - return nil, fmt.Errorf("setup driver failed: %w", err) - } - - unifiedReq, err := parseActionOptions(arguments) - if err != nil { - return nil, err - } - - if unifiedReq.PackageName == "" { - return nil, fmt.Errorf("package_name is required") - } - - // Open app action logic - err = driverExt.AppLaunch(unifiedReq.PackageName) - if err != nil { - return NewMCPErrorResponse(fmt.Sprintf("Open app failed: %s", err.Error())), err - } - - message := fmt.Sprintf("Successfully opened app: %s", unifiedReq.PackageName) - returnData := ToolOpenApp{PackageName: unifiedReq.PackageName} - - return NewMCPSuccessResponse(message, &returnData), nil - } -} - -func (t *ToolOpenApp) ConvertActionToCallToolRequest(action option.MobileAction) (mcp.CallToolRequest, error) { - if packageName, ok := action.Params.(string); ok { - arguments := map[string]any{ - "packageName": packageName, - } - return BuildMCPCallToolRequest(t.Name(), arguments, action), nil - } - return mcp.CallToolRequest{}, fmt.Errorf("invalid open app params: %v", action.Params) -} - -// ToolTerminateAppNew implements the terminal_app tool call. -type ToolTerminateAppNew struct { - // Return data fields - these define the structure of data returned by this tool - PackageName string `json:"packageName" desc:"Package name of the terminated app"` - WasRunning bool `json:"wasRunning" desc:"Whether the app was actually running before termination"` -} - -func (t *ToolTerminateAppNew) Name() option.ActionName { - return option.ACTION_TerminateApp -} - -func (t *ToolTerminateAppNew) Description() string { - return "Terminate a running app on mobile device using its package name" -} - -func (t *ToolTerminateAppNew) Options() []mcp.ToolOption { - unifiedReq := &option.ActionOptions{} - return unifiedReq.GetMCPOptions(option.ACTION_TerminateApp) -} - -func (t *ToolTerminateAppNew) Implement() server.ToolHandlerFunc { - return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { - arguments := request.GetArguments() - driverExt, err := setupXTDriver(ctx, arguments) - if err != nil { - return nil, fmt.Errorf("setup driver failed: %w", err) - } - - unifiedReq, err := parseActionOptions(arguments) - if err != nil { - return nil, err - } - - if unifiedReq.PackageName == "" { - return nil, fmt.Errorf("package_name is required") - } - - // Terminate app action logic - success, err := driverExt.AppTerminate(unifiedReq.PackageName) - if err != nil { - return NewMCPErrorResponse(fmt.Sprintf("Terminate app failed: %s", err.Error())), err - } - if !success { - log.Warn().Str("packageName", unifiedReq.PackageName).Msg("app was not running") - } - - message := fmt.Sprintf("Successfully terminated app: %s", unifiedReq.PackageName) - returnData := ToolTerminateAppNew{ - PackageName: unifiedReq.PackageName, - WasRunning: success, - } - - return NewMCPSuccessResponse(message, &returnData), nil - } -} - -func (t *ToolTerminateAppNew) ConvertActionToCallToolRequest(action option.MobileAction) (mcp.CallToolRequest, error) { - if packageName, ok := action.Params.(string); ok { - arguments := map[string]any{ - "packageName": packageName, - } - return BuildMCPCallToolRequest(t.Name(), arguments, action), nil - } - return mcp.CallToolRequest{}, fmt.Errorf("invalid terminate app params: %v", action.Params) -} - // ToolColdLaunch implements the cold_launch tool call. type ToolColdLaunch struct { // Return data fields - these define the structure of data returned by this tool diff --git a/uixt/mcp_tools_input.go b/uixt/mcp_tools_input.go index cda75467..8f4b306e 100644 --- a/uixt/mcp_tools_input.go +++ b/uixt/mcp_tools_input.go @@ -124,65 +124,6 @@ func (t *ToolSetIme) ConvertActionToCallToolRequest(action option.MobileAction) return mcp.CallToolRequest{}, fmt.Errorf("invalid set ime params: %v", action.Params) } -// ToolText implements the text tool call. -type ToolText struct { - // Return data fields - these define the structure of data returned by this tool - Text string `json:"text" desc:"Text that was input"` -} - -func (t *ToolText) Name() option.ActionName { - return option.ACTION_Text -} - -func (t *ToolText) Description() string { - return "Input text into the currently focused element or input field" -} - -func (t *ToolText) Options() []mcp.ToolOption { - unifiedReq := &option.ActionOptions{} - return unifiedReq.GetMCPOptions(option.ACTION_Text) -} - -func (t *ToolText) Implement() server.ToolHandlerFunc { - return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { - arguments := request.GetArguments() - driverExt, err := setupXTDriver(ctx, arguments) - if err != nil { - return nil, fmt.Errorf("setup driver failed: %w", err) - } - - unifiedReq, err := parseActionOptions(arguments) - if err != nil { - return nil, err - } - - if unifiedReq.Text == "" { - return nil, fmt.Errorf("text is required") - } - - opts := unifiedReq.Options() - - // Text input action logic - err = driverExt.Input(unifiedReq.Text, opts...) - if err != nil { - return NewMCPErrorResponse(fmt.Sprintf("Text input failed: %s", err.Error())), err - } - - message := fmt.Sprintf("Successfully input text: %s", unifiedReq.Text) - returnData := ToolText{Text: unifiedReq.Text} - - return NewMCPSuccessResponse(message, &returnData), nil - } -} - -func (t *ToolText) ConvertActionToCallToolRequest(action option.MobileAction) (mcp.CallToolRequest, error) { - text := fmt.Sprintf("%v", action.Params) - arguments := map[string]any{ - "text": text, - } - return BuildMCPCallToolRequest(t.Name(), arguments, action), nil -} - // ToolBackspace implements the backspace tool call. type ToolBackspace struct { // Return data fields - these define the structure of data returned by this tool diff --git a/uixt/mcp_tools_touch.go b/uixt/mcp_tools_touch.go index e43678ba..f78d7ef1 100644 --- a/uixt/mcp_tools_touch.go +++ b/uixt/mcp_tools_touch.go @@ -84,79 +84,6 @@ func (t *ToolTapXY) ConvertActionToCallToolRequest(action option.MobileAction) ( return mcp.CallToolRequest{}, fmt.Errorf("invalid tap params: %v", action.Params) } -// ToolTap implements the tap tool call. -type ToolTap struct { - // Return data fields - these define the structure of data returned by this tool - X float64 `json:"x" desc:"X coordinate where tap was performed"` - Y float64 `json:"y" desc:"Y coordinate where tap was performed"` -} - -func (t *ToolTap) Name() option.ActionName { - return option.ACTION_Tap -} - -func (t *ToolTap) Description() string { - return "Tap on the screen at given relative coordinates (0.0-1.0 range)" -} - -func (t *ToolTap) Options() []mcp.ToolOption { - unifiedReq := &option.ActionOptions{} - return unifiedReq.GetMCPOptions(option.ACTION_Tap) -} - -func (t *ToolTap) Implement() server.ToolHandlerFunc { - return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { - arguments := request.GetArguments() - driverExt, err := setupXTDriver(ctx, arguments) - if err != nil { - return nil, fmt.Errorf("setup driver failed: %w", err) - } - - unifiedReq, err := parseActionOptions(arguments) - if err != nil { - return nil, err - } - - // Build all options from request arguments - opts := unifiedReq.Options() - - // Validate required parameters - if unifiedReq.X == 0 || unifiedReq.Y == 0 { - return nil, fmt.Errorf("x and y coordinates are required") - } - - // Tap action logic - err = driverExt.TapXY(unifiedReq.X, unifiedReq.Y, opts...) - if err != nil { - return NewMCPErrorResponse(fmt.Sprintf("Tap failed: %s", err.Error())), err - } - - message := fmt.Sprintf("Successfully tapped at coordinates (%.2f, %.2f)", unifiedReq.X, unifiedReq.Y) - returnData := ToolTap{ - X: unifiedReq.X, - Y: unifiedReq.Y, - } - - return NewMCPSuccessResponse(message, &returnData), nil - } -} - -func (t *ToolTap) ConvertActionToCallToolRequest(action option.MobileAction) (mcp.CallToolRequest, error) { - if params, err := builtin.ConvertToFloat64Slice(action.Params); err == nil && len(params) == 2 { - x, y := params[0], params[1] - arguments := map[string]any{ - "x": x, - "y": y, - } - // Add duration if available from action options - if duration := action.ActionOptions.Duration; duration > 0 { - arguments["duration"] = duration - } - return BuildMCPCallToolRequest(t.Name(), arguments, action), nil - } - return mcp.CallToolRequest{}, fmt.Errorf("invalid tap params: %v", action.Params) -} - // ToolTapAbsXY implements the tap_abs_xy tool call. type ToolTapAbsXY struct { // Return data fields - these define the structure of data returned by this tool diff --git a/uixt/option/action.go b/uixt/option/action.go index 6521ba3c..edbffc31 100644 --- a/uixt/option/action.go +++ b/uixt/option/action.go @@ -43,9 +43,7 @@ const ( ACTION_AppClear ActionName = "app_clear" ACTION_AppStart ActionName = "app_start" ACTION_AppLaunch ActionName = "app_launch" // 启动 app 并堵塞等待 app 首屏加载完成 - ACTION_OpenApp ActionName = "open_app" // 启动 app 并堵塞等待 app 首屏加载完成 ACTION_AppTerminate ActionName = "app_terminate" - ACTION_TerminateApp ActionName = "terminal_app" ACTION_ColdLaunch ActionName = "cold_launch" ACTION_AppStop ActionName = "app_stop" ACTION_ScreenShot ActionName = "screenshot" @@ -61,7 +59,6 @@ const ( // UI handling ACTION_Home ActionName = "home" - ACTION_Tap ActionName = "tap" // generic tap action ACTION_TapXY ActionName = "tap_xy" ACTION_TapAbsXY ActionName = "tap_abs_xy" ACTION_TapByOCR ActionName = "tap_ocr" @@ -73,7 +70,6 @@ const ( ACTION_SwipeCoordinate ActionName = "swipe_coordinate" // swipe by coordinates (fromX, fromY, toX, toY) ACTION_Drag ActionName = "drag" ACTION_Input ActionName = "input" - ACTION_Text ActionName = "text" ACTION_PressButton ActionName = "press_button" ACTION_Back ActionName = "back" ACTION_KeyCode ActionName = "keycode" @@ -605,7 +601,6 @@ func WithOutputSchema(schema interface{}) ActionOption { func (o *ActionOptions) GetMCPOptions(actionType ActionName) []mcp.ToolOption { // Define field mappings for different action types fieldMappings := map[ActionName][]string{ - ACTION_Tap: {"platform", "serial", "x", "y", "duration"}, ACTION_TapXY: {"platform", "serial", "x", "y", "duration"}, ACTION_TapAbsXY: {"platform", "serial", "x", "y", "duration"}, ACTION_TapByOCR: {"platform", "serial", "text", "ignoreNotFoundError", "maxRetryTimes", "index", "regex", "tapRandomRect"}, @@ -616,12 +611,9 @@ func (o *ActionOptions) GetMCPOptions(actionType ActionName) []mcp.ToolOption { ACTION_Swipe: {"platform", "serial", "direction", "fromX", "fromY", "toX", "toY", "duration", "pressDuration"}, ACTION_Drag: {"platform", "serial", "fromX", "fromY", "toX", "toY", "duration", "pressDuration"}, ACTION_Input: {"platform", "serial", "text", "frequency"}, - ACTION_Text: {"platform", "serial", "text", "frequency"}, ACTION_Backspace: {"platform", "serial", "count"}, ACTION_AppLaunch: {"platform", "serial", "packageName"}, - ACTION_OpenApp: {"platform", "serial", "packageName"}, ACTION_AppTerminate: {"platform", "serial", "packageName"}, - ACTION_TerminateApp: {"platform", "serial", "packageName"}, ACTION_ColdLaunch: {"platform", "serial", "packageName"}, ACTION_AppInstall: {"platform", "serial", "appUrl", "packageName"}, ACTION_AppUninstall: {"platform", "serial", "packageName"}, diff --git a/uixt/sdk.go b/uixt/sdk.go index cd00f036..dc16a67a 100644 --- a/uixt/sdk.go +++ b/uixt/sdk.go @@ -58,8 +58,8 @@ func NewXTDriver(driver IDriver, opts ...option.AIServiceOption) (*XTDriver, err if driverExt.LLMService != nil { mcpTools := driverExt.client.Server.ListTools() einoTools := ai.ConvertMCPToolsToEinoToolInfos(mcpTools, "uixt") - if err := driverExt.LLMService.RegisterTools(einoTools); err != nil { - log.Warn().Err(err).Msg("failed to register uixt tools") + if err = driverExt.LLMService.RegisterTools(einoTools); err != nil { + log.Warn().Err(err).Msg("failed to register uixt tools to LLM service") } } From fbca5907c9d1bf748db0fadd0f43833bdbe6fbcb Mon Sep 17 00:00:00 2001 From: "lilong.129" Date: Mon, 21 Jul 2025 20:24:51 +0800 Subject: [PATCH 8/9] fix: compress with compression rate of 95 --- uixt/driver_ext_screenshot.go | 34 ++++++++++------------------------ 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/uixt/driver_ext_screenshot.go b/uixt/driver_ext_screenshot.go index b5bb07f9..105c4344 100644 --- a/uixt/driver_ext_screenshot.go +++ b/uixt/driver_ext_screenshot.go @@ -336,32 +336,18 @@ func compressImageBufferWithOptions(raw *bytes.Buffer, enableResize bool, maxWid newHeight = originalHeight } - // Determine JPEG quality based on image size for optimal compression - jpegQuality := 60 // Default quality for better compression - if newWidth*newHeight > 500000 { // For very large images, use lower quality - jpegQuality = 50 - } else if newWidth*newHeight < 100000 { // For small images, use higher quality - jpegQuality = 70 - } - + jpegQuality := 95 var buf bytes.Buffer - switch strings.ToLower(format) { - case "jpeg", "jpg": - // Use adaptive JPEG compression quality - err = jpeg.Encode(&buf, resizedImg, &jpeg.Options{Quality: jpegQuality}) - case "png": - // Convert PNG to JPEG for better compression - err = jpeg.Encode(&buf, resizedImg, &jpeg.Options{Quality: jpegQuality}) - case "gif": - // Keep GIF format but with reduced colors for better compression - err = gif.Encode(&buf, resizedImg, &gif.Options{NumColors: 64}) + switch format { + case "jpeg", "jpg", "png": + // compress with compression rate of 95 + jpegOptions := &jpeg.Options{Quality: jpegQuality} + err = jpeg.Encode(&buf, resizedImg, jpegOptions) + if err != nil { + return nil, err + } default: - // Default to JPEG for unknown formats - err = jpeg.Encode(&buf, resizedImg, &jpeg.Options{Quality: jpegQuality}) - } - - if err != nil { - return nil, err + return nil, fmt.Errorf("unsupported image format: %s", format) } compressedSize := buf.Len() From 9c4a945db6b02f302f91c37e1b11635ecd872755 Mon Sep 17 00:00:00 2001 From: "lilong.129" Date: Mon, 21 Jul 2025 22:00:48 +0800 Subject: [PATCH 9/9] fix: keep compatibility for config ignore_popup --- runner.go | 18 ++++++++++++++++++ step_ui.go | 7 ++++--- uixt/option/android.go | 1 + uixt/option/browser.go | 9 +++++---- uixt/option/harmony.go | 5 +++-- uixt/option/ios.go | 1 + 6 files changed, 32 insertions(+), 9 deletions(-) diff --git a/runner.go b/runner.go index a93c96f1..809c274e 100644 --- a/runner.go +++ b/runner.go @@ -1077,6 +1077,24 @@ func (r *SessionRunner) GetTransactions() map[string]map[TransactionType]time.Ti return r.transactions } +// keep for compatibility +func (r *SessionRunner) ignorePopup(osType string) bool { + config := r.caseRunner.TestCase.Config.Get() + if osType == string(StepTypeAndroid) && len(config.Android) > 0 { + return config.Android[0].IgnorePopup + } + if osType == string(StepTypeIOS) && len(config.IOS) > 0 { + return config.IOS[0].IgnorePopup + } + if osType == string(StepTypeHarmony) && len(config.Harmony) > 0 { + return config.Harmony[0].IgnorePopup + } + if osType == string(stepTypeBrowser) && len(config.Browser) > 0 { + return config.Browser[0].IgnorePopup + } + return false +} + // saveJSONCase saves the original JSON case content to the results directory func saveJSONCase(casePath string) error { // Read the original JSON case content diff --git a/step_ui.go b/step_ui.go index e103c255..9e2d7682 100644 --- a/step_ui.go +++ b/step_ui.go @@ -798,10 +798,11 @@ func runStepMobileUI(s *SessionRunner, step IStep) (stepResult *StepResult, err config = s.caseRunner.Config.Get() } // automatic handling of pop-up windows on each step finished, default to disabled - // priority: step ignore_popup > config auto_popup_handler > step auto_popup_handler + // priority: config ignore_popup > step ignore_popup > config auto_popup_handler > step auto_popup_handler shouldHandlePopup := false - - if stepIgnorePopup { + if s.ignorePopup(mobileStep.OSType) { + shouldHandlePopup = false + } else if stepIgnorePopup { // step level config, keep for compatibility shouldHandlePopup = false } else if config != nil && config.AutoPopupHandler { diff --git a/uixt/option/android.go b/uixt/option/android.go index 42ed31d3..bfa991a6 100644 --- a/uixt/option/android.go +++ b/uixt/option/android.go @@ -5,6 +5,7 @@ import "github.com/httprunner/httprunner/v5/pkg/gadb" type AndroidDeviceOptions struct { SerialNumber string `json:"serial,omitempty" yaml:"serial,omitempty"` LogOn bool `json:"log_on,omitempty" yaml:"log_on,omitempty"` + IgnorePopup bool `json:"ignore_popup,omitempty" yaml:"ignore_popup,omitempty"` // keep for compatibility // adb AdbServerHost string `json:"adb_server_host,omitempty" yaml:"adb_server_host,omitempty"` diff --git a/uixt/option/browser.go b/uixt/option/browser.go index 81dc17ef..a4405436 100644 --- a/uixt/option/browser.go +++ b/uixt/option/browser.go @@ -9,10 +9,11 @@ func NewBrowserDeviceOptions(opts ...BrowserDeviceOption) *BrowserDeviceOptions } type BrowserDeviceOptions struct { - BrowserID string `json:"browser_id,omitempty" yaml:"browser_id,omitempty"` - LogOn bool `json:"log_on,omitempty" yaml:"log_on,omitempty"` - Width int `json:"width,omitempty" yaml:"width,omitempty"` - Height int `json:"height,omitempty" yaml:"height,omitempty"` + BrowserID string `json:"browser_id,omitempty" yaml:"browser_id,omitempty"` + LogOn bool `json:"log_on,omitempty" yaml:"log_on,omitempty"` + IgnorePopup bool `json:"ignore_popup,omitempty" yaml:"ignore_popup,omitempty"` // keep for compatibility + Width int `json:"width,omitempty" yaml:"width,omitempty"` + Height int `json:"height,omitempty" yaml:"height,omitempty"` } func (dev *BrowserDeviceOptions) Options() (deviceOptions []BrowserDeviceOption) { diff --git a/uixt/option/harmony.go b/uixt/option/harmony.go index f7f052e5..961223ee 100644 --- a/uixt/option/harmony.go +++ b/uixt/option/harmony.go @@ -8,8 +8,9 @@ const ( ) type HarmonyDeviceOptions struct { - ConnectKey string `json:"connect_key,omitempty" yaml:"connect_key,omitempty"` - LogOn bool `json:"log_on,omitempty" yaml:"log_on,omitempty"` + ConnectKey string `json:"connect_key,omitempty" yaml:"connect_key,omitempty"` + LogOn bool `json:"log_on,omitempty" yaml:"log_on,omitempty"` + IgnorePopup bool `json:"ignore_popup,omitempty" yaml:"ignore_popup,omitempty"` // keep for compatibility } func (dev *HarmonyDeviceOptions) Options() (deviceOptions []HarmonyDeviceOption) { diff --git a/uixt/option/ios.go b/uixt/option/ios.go index a19c716d..9cb3cf17 100644 --- a/uixt/option/ios.go +++ b/uixt/option/ios.go @@ -6,6 +6,7 @@ type IOSDeviceOptions struct { WDAPort int `json:"port,omitempty" yaml:"port,omitempty"` // WDA remote port WDAMjpegPort int `json:"mjpeg_port,omitempty" yaml:"mjpeg_port,omitempty"` // WDA remote MJPEG port LogOn bool `json:"log_on,omitempty" yaml:"log_on,omitempty"` + IgnorePopup bool `json:"ignore_popup,omitempty" yaml:"ignore_popup,omitempty"` // keep for compatibility // switch to iOS springboard before init WDA session ResetHomeOnStartup bool `json:"reset_home_on_startup,omitempty" yaml:"reset_home_on_startup,omitempty"`