From 9890588ca7f953265ce31bfccd25930b11556208 Mon Sep 17 00:00:00 2001 From: "lilong.129" Date: Sun, 20 Jul 2025 23:14:08 +0800 Subject: [PATCH] fix: support combined LLMService with wings service --- internal/version/VERSION | 2 +- uixt/ai/ai.go | 86 ++++++++++------ uixt/driver_ext_ai.go | 202 ++++++++++++------------------------- uixt/driver_ext_ai_test.go | 54 +++++----- uixt/option/ai.go | 1 + uixt/sdk.go | 21 ++-- 6 files changed, 152 insertions(+), 214 deletions(-) diff --git a/internal/version/VERSION b/internal/version/VERSION index 8281c03e..2472fe80 100644 --- a/internal/version/VERSION +++ b/internal/version/VERSION @@ -1 +1 @@ -v5.0.0-250717 +v5.0.0-250720 diff --git a/uixt/ai/ai.go b/uixt/ai/ai.go index 75bd8845..4ff4db8f 100644 --- a/uixt/ai/ai.go +++ b/uixt/ai/ai.go @@ -4,6 +4,7 @@ import ( "context" "github.com/cloudwego/eino/schema" + "github.com/httprunner/httprunner/v5/uixt/option" ) @@ -24,43 +25,66 @@ func NewLLMService(modelType option.LLMServiceType) (ILLMService, error) { // NewLLMServiceWithOptionConfig creates a new LLM service with different models for each component func NewLLMServiceWithOptionConfig(config *option.LLMServiceConfig) (ILLMService, error) { - // Get model configs for each component - plannerModelConfig, err := GetModelConfig(config.PlannerModel) - if err != nil { - return nil, err + combinedLLMService := &combinedLLMService{} + + // Planner + if config.PlannerModel == option.WINGS_SERVICE { + planner, err := NewWingsService() + if err != nil { + return nil, err + } + combinedLLMService.planner = planner + } else { + plannerModelConfig, err := GetModelConfig(config.PlannerModel) + if err != nil { + return nil, err + } + planner, err := NewPlanner(context.Background(), plannerModelConfig) + if err != nil { + return nil, err + } + combinedLLMService.planner = planner } - asserterModelConfig, err := GetModelConfig(config.AsserterModel) - if err != nil { - return nil, err + // Asserter + if config.AsserterModel == option.WINGS_SERVICE { + asserter, err := NewWingsService() + if err != nil { + return nil, err + } + combinedLLMService.asserter = asserter + } else { + asserterModelConfig, err := GetModelConfig(config.AsserterModel) + if err != nil { + return nil, err + } + asserter, err := NewAsserter(context.Background(), asserterModelConfig) + if err != nil { + return nil, err + } + combinedLLMService.asserter = asserter } - querierModelConfig, err := GetModelConfig(config.QuerierModel) - if err != nil { - return nil, err + // Querier + if config.QuerierModel == option.WINGS_SERVICE { + querier, err := NewWingsService() + if err != nil { + return nil, err + } + combinedLLMService.querier = querier + } else { + querierModelConfig, err := GetModelConfig(config.QuerierModel) + if err != nil { + return nil, err + } + querier, err := NewQuerier(context.Background(), querierModelConfig) + if err != nil { + return nil, err + } + combinedLLMService.querier = querier } - // Create components with their respective model configs - planner, err := NewPlanner(context.Background(), plannerModelConfig) - if err != nil { - return nil, err - } - - asserter, err := NewAsserter(context.Background(), asserterModelConfig) - if err != nil { - return nil, err - } - - querier, err := NewQuerier(context.Background(), querierModelConfig) - if err != nil { - return nil, err - } - - return &combinedLLMService{ - planner: planner, - asserter: asserter, - querier: querier, - }, nil + return combinedLLMService, nil } // combinedLLMService 实现了 ILLMService 接口,组合了规划、断言和查询功能 diff --git a/uixt/driver_ext_ai.go b/uixt/driver_ext_ai.go index 5243afc8..9fa43ffb 100644 --- a/uixt/driver_ext_ai.go +++ b/uixt/driver_ext_ai.go @@ -3,7 +3,6 @@ package uixt import ( "context" "encoding/json" - "fmt" "time" "github.com/cloudwego/eino/schema" @@ -16,7 +15,7 @@ import ( "github.com/httprunner/httprunner/v5/uixt/types" ) -// StartToGoal (original implementation - preserved) +// StartToGoal runs AI actions until task is finished or time limit is reached func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...option.ActionOption) ([]*PlanningExecutionResult, error) { options := option.NewActionOptions(opts...) logger := log.Info().Str("prompt", prompt) @@ -195,7 +194,7 @@ func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...op } } -// AIAction with WingsService priority support +// AIAction performs AI-driven action and returns detailed execution result func (dExt *XTDriver) AIAction(ctx context.Context, prompt string, opts ...option.ActionOption) (*AIExecutionResult, error) { log.Info().Str("prompt", prompt).Msg("performing AI action") @@ -208,93 +207,24 @@ func (dExt *XTDriver) AIAction(ctx context.Context, prompt string, opts ...optio return nil, err } - // Step 2: Check if WingsService is available and prioritize it - if dExt.WingsService != nil { - log.Info().Msg("using Wings service for AI action") - return dExt.executeAIAction(ctx, prompt, screenResult, dExt.WingsService, "wings", opts...) - } - - // Step 3: Fallback to LLM service - if dExt.LLMService == nil { - return nil, errors.New("neither Wings service nor LLM service is initialized") - } - - log.Info().Msg("using LLM service for AI action") - return dExt.executeAIAction(ctx, prompt, screenResult, dExt.LLMService, "llm", opts...) -} - -// executeAIAction executes AIAction using any AI service (generic implementation) -func (dExt *XTDriver) executeAIAction(ctx context.Context, prompt string, screenResult *ScreenResult, service ai.ILLMService, serviceType string, opts ...option.ActionOption) (*AIExecutionResult, error) { - // Add device context for Wings service if needed - if serviceType == "wings" { - ctx = dExt.addDeviceContextForWings(ctx) - } - - // Step 1: Plan next action and measure time + // Step 2: Plan next action and measure time modelCallStartTime := time.Now() - - var planningResult *ai.PlanningResult - var err error - - if serviceType == "llm" { - // For LLM service, use PlanNextAction which includes additional processing - planningExecutionResult, planErr := dExt.PlanNextAction(ctx, prompt, opts...) - if planErr != nil { - modelCallElapsed := time.Since(modelCallStartTime).Milliseconds() - return &AIExecutionResult{ - Type: "action", - ModelCallElapsed: modelCallElapsed, - ScreenshotElapsed: screenResult.Elapsed, - ImagePath: screenResult.ImagePath, - Resolution: &screenResult.Resolution, - Error: planErr.Error(), - }, errors.Wrap(planErr, "get next action failed") - } - planningResult = &planningExecutionResult.PlanningResult - } else { - // For Wings service, call Plan directly - planningOpts := &ai.PlanningOptions{ - UserInstruction: prompt, - Message: &schema.Message{ - Role: schema.User, - MultiContent: []schema.ChatMessagePart{ - { - Type: schema.ChatMessagePartTypeImageURL, - ImageURL: &schema.ChatMessageImageURL{ - URL: screenResult.Base64, - }, - }, - }, - }, - Size: screenResult.Resolution, - } - - planningResult, err = service.Plan(ctx, planningOpts) - if err != nil { - modelCallElapsed := time.Since(modelCallStartTime).Milliseconds() - return &AIExecutionResult{ - Type: "action", - ModelCallElapsed: modelCallElapsed, - ScreenshotElapsed: screenResult.Elapsed, - ImagePath: screenResult.ImagePath, - Resolution: &screenResult.Resolution, - Error: err.Error(), - }, errors.Wrap(err, fmt.Sprintf("%s service planning failed", serviceType)) - } - } - + planningResult, err := dExt.PlanNextAction(ctx, prompt, opts...) modelCallElapsed := time.Since(modelCallStartTime).Milliseconds() - aiExecutionResult := &AIExecutionResult{ Type: "action", ModelCallElapsed: modelCallElapsed, ScreenshotElapsed: screenResult.Elapsed, ImagePath: screenResult.ImagePath, Resolution: &screenResult.Resolution, - PlanningResult: planningResult, + PlanningResult: &planningResult.PlanningResult, + } + if err != nil { + aiExecutionResult.Error = err.Error() + return aiExecutionResult, errors.Wrap(err, "get next action failed") } - // Step 2: Execute tool calls + // Step 3: Execute tool calls for _, toolCall := range planningResult.ToolCalls { err = dExt.invokeToolCall(ctx, toolCall, opts...) if err != nil { @@ -309,9 +239,13 @@ func (dExt *XTDriver) executeAIAction(ctx context.Context, prompt string, screen return aiExecutionResult, nil } -// AIAssert with WingsService priority support +// AIAssert performs AI-driven assertion and returns detailed execution result func (dExt *XTDriver) AIAssert(assertion string, opts ...option.ActionOption) (*AIExecutionResult, error) { - log.Info().Str("assertion", assertion).Msg("performing AI assertion") + if dExt.LLMService == nil { + return nil, errors.New("LLM service is not initialized") + } + + ctx := dExt.addDeviceContextForWings(context.Background()) // Step 1: Take screenshot and convert to base64 screenResult, err := dExt.GetScreenResult( @@ -322,29 +256,6 @@ func (dExt *XTDriver) AIAssert(assertion string, opts ...option.ActionOption) (* return nil, err } - // Step 2: Check if WingsService is available and prioritize it - if dExt.WingsService != nil { - log.Info().Msg("using Wings service for AI assertion") - return dExt.executeAIAssert(assertion, screenResult, dExt.WingsService, "wings", opts...) - } - - // Step 3: Fallback to LLM service - if dExt.LLMService == nil { - return nil, errors.New("neither Wings service nor LLM service is initialized") - } - - log.Info().Msg("using LLM service for AI assertion") - return dExt.executeAIAssert(assertion, screenResult, dExt.LLMService, "llm", opts...) -} - -// executeAIAssert executes AIAssert using any AI service (generic implementation) -func (dExt *XTDriver) executeAIAssert(assertion string, screenResult *ScreenResult, service ai.ILLMService, serviceType string, opts ...option.ActionOption) (*AIExecutionResult, error) { - // Step 1: Prepare context and options - ctx := context.Background() - if serviceType == "wings" { - ctx = dExt.addDeviceContextForWings(ctx) - } - assertResult := &AIExecutionResult{ Type: "assert", ScreenshotElapsed: screenResult.Elapsed, @@ -352,61 +263,47 @@ func (dExt *XTDriver) executeAIAssert(assertion string, screenResult *ScreenResu Resolution: &screenResult.Resolution, } - // Step 2: Call service and measure time + // Step 2: Call model and measure time modelCallStartTime := time.Now() assertOpts := &ai.AssertOptions{ Assertion: assertion, Screenshot: screenResult.Base64, Size: screenResult.Resolution, } - - result, err := service.Assert(ctx, assertOpts) + result, err := dExt.LLMService.Assert(ctx, assertOpts) assertResult.ModelCallElapsed = time.Since(modelCallStartTime).Milliseconds() assertResult.AssertionResult = result if err != nil { assertResult.Error = err.Error() - return assertResult, errors.Wrap(err, fmt.Sprintf("%s assertion failed", serviceType)) + return assertResult, errors.Wrap(err, "AI assertion failed") } + // For assertion failure, we should still return success but mark the assertion as failed + // This ensures that the AIResult (including screenshot and thought) is properly saved and displayed if !result.Pass { - assertResult.Error = result.Thought + assertResult.Error = result.Thought // Store the failure reason for reporting } return assertResult, nil } -// addDeviceContextForWings adds device information to context for Wings service -func (dExt *XTDriver) addDeviceContextForWings(ctx context.Context) context.Context { - device := dExt.GetDevice() - if device == nil { - return ctx - } +// Context key types to avoid collisions +type contextKey string - // Add device ID to context - ctx = context.WithValue(ctx, "device_id", device.UUID()) +const ( + deviceIDKey contextKey = "device_id" + platformTypeKey contextKey = "platform_type" +) - // Add platform type to context - platformType := "android" // default - switch device.(type) { - case *AndroidDevice: - platformType = "android" - case *IOSDevice: - platformType = "ios" - case *HarmonyDevice: - platformType = "harmony" - } - ctx = context.WithValue(ctx, "platform_type", platformType) - - return ctx -} - -// PlanNextAction (original implementation - preserved) +// PlanNextAction performs planning and returns unified planning information func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ...option.ActionOption) (*PlanningExecutionResult, error) { if dExt.LLMService == nil { return nil, errors.New("LLM service is not initialized") } + ctx = dExt.addDeviceContextForWings(ctx) + // Parse action options to get ResetHistory setting options := option.NewActionOptions(opts...) resetHistory := options.ResetHistory @@ -476,7 +373,7 @@ func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts .. return planningResult, nil } -// isTaskFinished (original implementation - preserved) +// isTaskFinished checks if the task is completed based on the planning result func (dExt *XTDriver) isTaskFinished(planningResult *PlanningExecutionResult) bool { // Check if there are no tool calls (no actions to execute) if len(planningResult.ToolCalls) == 0 { @@ -495,7 +392,7 @@ func (dExt *XTDriver) isTaskFinished(planningResult *PlanningExecutionResult) bo return false } -// invokeToolCall (original implementation - preserved) +// invokeToolCall invokes the tool call func (dExt *XTDriver) invokeToolCall(ctx context.Context, toolCall schema.ToolCall, opts ...option.ActionOption) error { // Parse arguments arguments := make(map[string]interface{}) @@ -522,7 +419,7 @@ func (dExt *XTDriver) invokeToolCall(ctx context.Context, toolCall schema.ToolCa return nil } -// PlanningExecutionResult (original implementation - preserved) +// PlanningExecutionResult represents a unified planning result that contains both planning information and execution results type PlanningExecutionResult struct { ai.PlanningResult // Inherit all fields from ai.PlanningResult (ToolCalls, Thought, Content, Error, ModelName) // Planning process information @@ -539,7 +436,7 @@ type PlanningExecutionResult struct { SubActions []*SubActionResult `json:"sub_actions,omitempty"` // sub-actions generated from this planning } -// AIExecutionResult (original implementation - preserved) +// AIExecutionResult represents a unified result structure for all AI operations type AIExecutionResult struct { Type string `json:"type"` // operation type: "query", "action", "assert" ModelCallElapsed int64 `json:"model_call_elapsed"` // model call elapsed time in milliseconds @@ -556,7 +453,7 @@ type AIExecutionResult struct { Error string `json:"error,omitempty"` // error message if operation failed } -// SubActionResult (original implementation - preserved) +// SubActionResult represents a sub-action within a start_to_goal action type SubActionResult struct { ActionName string `json:"action_name"` // name of the sub-action (e.g., "tap", "input") Arguments interface{} `json:"arguments,omitempty"` // arguments passed to the sub-action @@ -571,7 +468,7 @@ type SessionData struct { ScreenResults []*ScreenResult `json:"screen_results,omitempty"` // store sub-action specific screen_results } -// AIQuery (original implementation - preserved) +// AIQuery performs AI-driven query and returns detailed execution result func (dExt *XTDriver) AIQuery(text string, opts ...option.ActionOption) (*AIExecutionResult, error) { if dExt.LLMService == nil { return nil, errors.New("LLM service is not initialized") @@ -616,3 +513,28 @@ func (dExt *XTDriver) AIQuery(text string, opts ...option.ActionOption) (*AIExec } return aiResult, nil } + +// addDeviceContextForWings adds device information to context for Wings service +func (dExt *XTDriver) addDeviceContextForWings(ctx context.Context) context.Context { + device := dExt.GetDevice() + if device == nil { + return ctx + } + + // Add device ID to context + ctx = context.WithValue(ctx, deviceIDKey, device.UUID()) + + // Add platform type to context + platformType := "android" // default + switch device.(type) { + case *AndroidDevice: + platformType = "android" + case *IOSDevice: + platformType = "ios" + case *HarmonyDevice: + platformType = "harmony" + } + ctx = context.WithValue(ctx, platformTypeKey, platformType) + + return ctx +} diff --git a/uixt/driver_ext_ai_test.go b/uixt/driver_ext_ai_test.go index 83904cb4..01738846 100644 --- a/uixt/driver_ext_ai_test.go +++ b/uixt/driver_ext_ai_test.go @@ -23,33 +23,33 @@ func TestDriverExt_TapByLLM(t *testing.T) { assert.Nil(t, err) } -//func TestDriverExt_StartToGoal(t *testing.T) { -// driver := setupDriverExt(t) -// -// userInstruction := `连连看是一款经典的益智消除类小游戏,通常以图案或图标为主要元素。以下是连连看的基本规则说明: -// 1. 游戏目标: 玩家需要在规定时间内,通过连接相同的图案或图标,将它们从游戏界面中消除。 -// 2. 连接规则: -// - 两个相同的图案可以通过不超过三条直线连接。 -// - 连接线可以水平或垂直,但不能斜线,也不能跨过其他图案。 -// - 连接线的转折次数不能超过两次。 -// 3. 游戏界面: -// - 游戏界面通常是一个矩形区域,内含多个图案或图标,排列成行和列。 -// - 图案或图标在未选中状态下背景为白色,选中状态下背景为绿色。 -// 4. 时间限制: 游戏通常设有时间限制,玩家需要在时间耗尽前完成所有图案的消除。 -// 5. 得分机制: 每成功连接并消除一对图案,玩家会获得相应的分数。完成游戏后,根据剩余时间和消除效率计算总分。 -// 6. 关卡设计: 游戏可能包含多个关卡,随着关卡的推进,图案的复杂度和数量会增加。 -// -// 注意事项: -// 1、当连接错误时,顶部的红心会减少一个,需及时调整策略,避免红心变为0个后游戏失败 -// 2、不要连续 2 次点击同一个图案 -// 3、不要犯重复的错误 -// ` -// -// userInstruction += "\n\n请严格按照以上游戏规则,开始游戏;注意,请只做点击操作" -// -// //_, err := driver.StartToGoal(context.Background(), userInstruction) -// //assert.Nil(t, err) -//} +func TestDriverExt_StartToGoal(t *testing.T) { + driver := setupDriverExt(t) + + userInstruction := `连连看是一款经典的益智消除类小游戏,通常以图案或图标为主要元素。以下是连连看的基本规则说明: + 1. 游戏目标: 玩家需要在规定时间内,通过连接相同的图案或图标,将它们从游戏界面中消除。 + 2. 连接规则: + - 两个相同的图案可以通过不超过三条直线连接。 + - 连接线可以水平或垂直,但不能斜线,也不能跨过其他图案。 + - 连接线的转折次数不能超过两次。 + 3. 游戏界面: + - 游戏界面通常是一个矩形区域,内含多个图案或图标,排列成行和列。 + - 图案或图标在未选中状态下背景为白色,选中状态下背景为绿色。 + 4. 时间限制: 游戏通常设有时间限制,玩家需要在时间耗尽前完成所有图案的消除。 + 5. 得分机制: 每成功连接并消除一对图案,玩家会获得相应的分数。完成游戏后,根据剩余时间和消除效率计算总分。 + 6. 关卡设计: 游戏可能包含多个关卡,随着关卡的推进,图案的复杂度和数量会增加。 + + 注意事项: + 1、当连接错误时,顶部的红心会减少一个,需及时调整策略,避免红心变为0个后游戏失败 + 2、不要连续 2 次点击同一个图案 + 3、不要犯重复的错误 + ` + + userInstruction += "\n\n请严格按照以上游戏规则,开始游戏;注意,请只做点击操作" + + _, err := driver.StartToGoal(context.Background(), userInstruction) + assert.Nil(t, err) +} func TestDriverExt_PlanNextAction(t *testing.T) { driver := setupDriverExt(t) diff --git a/uixt/option/ai.go b/uixt/option/ai.go index e62879ea..2bc790c2 100644 --- a/uixt/option/ai.go +++ b/uixt/option/ai.go @@ -58,6 +58,7 @@ const ( DOUBAO_SEED_1_6_250615 LLMServiceType = "doubao-seed-1.6-250615" OPENAI_GPT_4O LLMServiceType = "openai/gpt-4o" DEEPSEEK_R1_250528 LLMServiceType = "deepseek-r1-250528" + WINGS_SERVICE LLMServiceType = "wings-service" ) func WithLLMService(modelType LLMServiceType) AIServiceOption { diff --git a/uixt/sdk.go b/uixt/sdk.go index 1bf4204a..8175fdfc 100644 --- a/uixt/sdk.go +++ b/uixt/sdk.go @@ -27,10 +27,6 @@ func NewXTDriver(driver IDriver, opts ...option.AIServiceOption) (*XTDriver, err var err error - // Initialize Wings service (always available) - driverExt.WingsService = ai.NewWingsService() - log.Info().Msg("Wings service initialized") - // Handle LLM service initialization if services.LLMConfig != nil { // Use advanced LLM configuration if provided @@ -53,15 +49,11 @@ func NewXTDriver(driver IDriver, opts ...option.AIServiceOption) (*XTDriver, err } // Register uixt MCP tools to LLM service if it exists - mcpTools := driverExt.client.Server.ListTools() - einoTools := ai.ConvertMCPToolsToEinoToolInfos(mcpTools, "uixt") - if err = driverExt.WingsService.RegisterTools(einoTools); err != nil { - log.Debug().Err(err).Msg("Wings service ignoring tool registration (expected)") - } - if driverExt.LLMService != nil { - if err = driverExt.LLMService.RegisterTools(einoTools); err != nil { - log.Warn().Err(err).Msg("failed to register uixt tools to LLM service") + mcpTools := driverExt.client.Server.ListTools() + einoTools := ai.ConvertMCPToolsToEinoToolInfos(mcpTools, "uixt") + if err := driverExt.LLMService.RegisterTools(einoTools); err != nil { + log.Warn().Err(err).Msg("failed to register uixt tools") } } @@ -71,9 +63,8 @@ func NewXTDriver(driver IDriver, opts ...option.AIServiceOption) (*XTDriver, err // XTDriver = IDriver + AI type XTDriver struct { IDriver - CVService ai.ICVService // OCR/CV - LLMService ai.ILLMService // LLM (fallback service) - WingsService ai.ILLMService // Wings API service (priority service) + CVService ai.ICVService // OCR/CV + LLMService ai.ILLMService // LLM services *option.AIServiceOptions // AI services options client *MCPClient4XTDriver // MCP Client for built-in uixt server