From 6f3a8b1cbd80e5c7e5d688bc6ef634fa47c654e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BD=99=E6=B3=93=E9=93=AE?= Date: Mon, 21 Jul 2025 14:21:18 +0800 Subject: [PATCH 1/2] =?UTF-8?q?feat:=20=E6=94=AF=E6=8C=81=E8=87=AA?= =?UTF-8?q?=E4=B8=BB=E9=85=8D=E7=BD=AELLM=20Service?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- uixt/ai/wings_service.go | 16 ----- uixt/android_test.go | 2 +- uixt/driver_ext_ai.go | 128 ++++++++++--------------------------- uixt/driver_ext_ai_test.go | 11 +--- uixt/sdk.go | 23 +++---- 5 files changed, 43 insertions(+), 137 deletions(-) diff --git a/uixt/ai/wings_service.go b/uixt/ai/wings_service.go index 5dea7484..65d6dfd0 100644 --- a/uixt/ai/wings_service.go +++ b/uixt/ai/wings_service.go @@ -347,22 +347,6 @@ func (w *WingsService) extractScreenshotFromMessage(message *schema.Message) (st // getDeviceInfoFromContext gets device info from context with fallback func (w *WingsService) getDeviceInfoFromContext(ctx context.Context, screenshot string) WingsDeviceInfo { - // Try to get device info from context - if deviceID, ok := ctx.Value("device_id").(string); ok { - platformType := "android" - if platform, ok := ctx.Value("platform_type").(string); ok { - platformType = platform - } - - return WingsDeviceInfo{ - DeviceID: deviceID, - NowImage: screenshot, - PreImage: screenshot, - NowLayoutJSON: "", - OperationSystem: platformType, - } - } - // Fallback to default device info return WingsDeviceInfo{ DeviceID: "default-device", diff --git a/uixt/android_test.go b/uixt/android_test.go index 0d5d418d..9cd4c5e7 100644 --- a/uixt/android_test.go +++ b/uixt/android_test.go @@ -24,7 +24,7 @@ func setupADBDriverExt(t *testing.T) *XTDriver { require.Nil(t, err) driverExt, err := NewXTDriver(driver, option.WithCVService(option.CVServiceTypeVEDEM), - // option.WithLLMService(option.DOUBAO_1_5_THINKING_VISION_PRO_250428), + option.WithLLMService(option.DOUBAO_1_5_UI_TARS_250328), ) require.Nil(t, err) return driverExt diff --git a/uixt/driver_ext_ai.go b/uixt/driver_ext_ai.go index 5243afc8..daf15cc3 100644 --- a/uixt/driver_ext_ai.go +++ b/uixt/driver_ext_ai.go @@ -209,80 +209,51 @@ func (dExt *XTDriver) AIAction(ctx context.Context, prompt string, opts ...optio } // Step 2: Check if WingsService is available and prioritize it - if dExt.WingsService != nil { + if dExt.LLMService != nil { log.Info().Msg("using Wings service for AI action") - return dExt.executeAIAction(ctx, prompt, screenResult, dExt.WingsService, "wings", opts...) + return dExt.executeAIAction(ctx, prompt, screenResult, dExt.LLMService, "wings", opts...) + } else { + return nil, errors.New("no LLM service is initialized") } - - // Step 3: Fallback to LLM service - if dExt.LLMService == nil { - return nil, errors.New("neither Wings service nor LLM service is initialized") - } - - log.Info().Msg("using LLM service for AI action") - return dExt.executeAIAction(ctx, prompt, screenResult, dExt.LLMService, "llm", opts...) } // executeAIAction executes AIAction using any AI service (generic implementation) func (dExt *XTDriver) executeAIAction(ctx context.Context, prompt string, screenResult *ScreenResult, service ai.ILLMService, serviceType string, opts ...option.ActionOption) (*AIExecutionResult, error) { - // Add device context for Wings service if needed - if serviceType == "wings" { - ctx = dExt.addDeviceContextForWings(ctx) - } - // Step 1: Plan next action and measure time modelCallStartTime := time.Now() var planningResult *ai.PlanningResult var err error - if serviceType == "llm" { - // For LLM service, use PlanNextAction which includes additional processing - planningExecutionResult, planErr := dExt.PlanNextAction(ctx, prompt, opts...) - if planErr != nil { - modelCallElapsed := time.Since(modelCallStartTime).Milliseconds() - return &AIExecutionResult{ - Type: "action", - ModelCallElapsed: modelCallElapsed, - ScreenshotElapsed: screenResult.Elapsed, - ImagePath: screenResult.ImagePath, - Resolution: &screenResult.Resolution, - Error: planErr.Error(), - }, errors.Wrap(planErr, "get next action failed") - } - planningResult = &planningExecutionResult.PlanningResult - } else { - // For Wings service, call Plan directly - planningOpts := &ai.PlanningOptions{ - UserInstruction: prompt, - Message: &schema.Message{ - Role: schema.User, - MultiContent: []schema.ChatMessagePart{ - { - Type: schema.ChatMessagePartTypeImageURL, - ImageURL: &schema.ChatMessageImageURL{ - URL: screenResult.Base64, - }, + // For Wings service, call Plan directly + planningOpts := &ai.PlanningOptions{ + UserInstruction: prompt, + Message: &schema.Message{ + Role: schema.User, + MultiContent: []schema.ChatMessagePart{ + { + Type: schema.ChatMessagePartTypeImageURL, + ImageURL: &schema.ChatMessageImageURL{ + URL: screenResult.Base64, }, }, }, - Size: screenResult.Resolution, - } - - planningResult, err = service.Plan(ctx, planningOpts) - if err != nil { - modelCallElapsed := time.Since(modelCallStartTime).Milliseconds() - return &AIExecutionResult{ - Type: "action", - ModelCallElapsed: modelCallElapsed, - ScreenshotElapsed: screenResult.Elapsed, - ImagePath: screenResult.ImagePath, - Resolution: &screenResult.Resolution, - Error: err.Error(), - }, errors.Wrap(err, fmt.Sprintf("%s service planning failed", serviceType)) - } + }, + Size: screenResult.Resolution, } + planningResult, err = service.Plan(ctx, planningOpts) + if err != nil { + modelCallElapsed := time.Since(modelCallStartTime).Milliseconds() + return &AIExecutionResult{ + Type: "action", + ModelCallElapsed: modelCallElapsed, + ScreenshotElapsed: screenResult.Elapsed, + ImagePath: screenResult.ImagePath, + Resolution: &screenResult.Resolution, + Error: err.Error(), + }, errors.Wrap(err, fmt.Sprintf("%s service planning failed", serviceType)) + } modelCallElapsed := time.Since(modelCallStartTime).Milliseconds() aiExecutionResult := &AIExecutionResult{ @@ -322,28 +293,18 @@ func (dExt *XTDriver) AIAssert(assertion string, opts ...option.ActionOption) (* return nil, err } - // Step 2: Check if WingsService is available and prioritize it - if dExt.WingsService != nil { + if dExt.LLMService != nil { log.Info().Msg("using Wings service for AI assertion") - return dExt.executeAIAssert(assertion, screenResult, dExt.WingsService, "wings", opts...) + return dExt.executeAIAssert(assertion, screenResult, dExt.LLMService, "wings", opts...) + } else { + return nil, errors.New("no LLM service is initialized") } - - // Step 3: Fallback to LLM service - if dExt.LLMService == nil { - return nil, errors.New("neither Wings service nor LLM service is initialized") - } - - log.Info().Msg("using LLM service for AI assertion") - return dExt.executeAIAssert(assertion, screenResult, dExt.LLMService, "llm", opts...) } // executeAIAssert executes AIAssert using any AI service (generic implementation) func (dExt *XTDriver) executeAIAssert(assertion string, screenResult *ScreenResult, service ai.ILLMService, serviceType string, opts ...option.ActionOption) (*AIExecutionResult, error) { // Step 1: Prepare context and options ctx := context.Background() - if serviceType == "wings" { - ctx = dExt.addDeviceContextForWings(ctx) - } assertResult := &AIExecutionResult{ Type: "assert", @@ -376,31 +337,6 @@ func (dExt *XTDriver) executeAIAssert(assertion string, screenResult *ScreenResu return assertResult, nil } -// addDeviceContextForWings adds device information to context for Wings service -func (dExt *XTDriver) addDeviceContextForWings(ctx context.Context) context.Context { - device := dExt.GetDevice() - if device == nil { - return ctx - } - - // Add device ID to context - ctx = context.WithValue(ctx, "device_id", device.UUID()) - - // Add platform type to context - platformType := "android" // default - switch device.(type) { - case *AndroidDevice: - platformType = "android" - case *IOSDevice: - platformType = "ios" - case *HarmonyDevice: - platformType = "harmony" - } - ctx = context.WithValue(ctx, "platform_type", platformType) - - return ctx -} - // PlanNextAction (original implementation - preserved) func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ...option.ActionOption) (*PlanningExecutionResult, error) { if dExt.LLMService == nil { diff --git a/uixt/driver_ext_ai_test.go b/uixt/driver_ext_ai_test.go index 83904cb4..b069b6ef 100644 --- a/uixt/driver_ext_ai_test.go +++ b/uixt/driver_ext_ai_test.go @@ -295,32 +295,25 @@ func TestDriverExt_AIAction_CompareWithAIAction(t *testing.T) { prompt := "点击搜索按钮" // Test both methods with the same prompt - wingsResult, wingsErr := driver.AIAction(context.Background(), prompt) aiResult, aiErr := driver.AIAction(context.Background(), prompt) // Both should execute without critical errors (may have different implementations) - t.Logf("AIAction error: %v", wingsErr) t.Logf("AIAction error: %v", aiErr) // If both succeed, compare results - if wingsResult != nil && aiResult != nil { - assert.Equal(t, "action", wingsResult.Type, "AIAction result type should be 'action'") + if aiResult != nil { assert.Equal(t, "action", aiResult.Type, "AIAction result type should be 'action'") // Both should have timing information - assert.Greater(t, wingsResult.ModelCallElapsed, int64(0), "AIAction should have model call elapsed time") assert.Greater(t, aiResult.ModelCallElapsed, int64(0), "AIAction should have model call elapsed time") // Both should have screenshot information - assert.NotEmpty(t, wingsResult.ImagePath, "AIAction should have image path") assert.NotEmpty(t, aiResult.ImagePath, "AIAction should have image path") // Compare model names - if wingsResult.PlanningResult != nil && aiResult.PlanningResult != nil { - t.Logf("AIAction model: %s", wingsResult.PlanningResult.ModelName) + if aiResult.PlanningResult != nil { t.Logf("AIAction model: %s", aiResult.PlanningResult.ModelName) - assert.Equal(t, "wings-api", wingsResult.PlanningResult.ModelName, "AIAction should use wings-api") assert.NotEqual(t, "wings-api", aiResult.PlanningResult.ModelName, "AIAction should not use wings-api") } } diff --git a/uixt/sdk.go b/uixt/sdk.go index 1bf4204a..2a90b3ea 100644 --- a/uixt/sdk.go +++ b/uixt/sdk.go @@ -27,10 +27,6 @@ func NewXTDriver(driver IDriver, opts ...option.AIServiceOption) (*XTDriver, err var err error - // Initialize Wings service (always available) - driverExt.WingsService = ai.NewWingsService() - log.Info().Msg("Wings service initialized") - // Handle LLM service initialization if services.LLMConfig != nil { // Use advanced LLM configuration if provided @@ -49,17 +45,15 @@ func NewXTDriver(driver IDriver, opts ...option.AIServiceOption) (*XTDriver, err log.Info().Msg("LLM service initialized") } } else { - log.Info().Msg("no LLM service config provided, using Wings service only") - } - - // Register uixt MCP tools to LLM service if it exists - mcpTools := driverExt.client.Server.ListTools() - einoTools := ai.ConvertMCPToolsToEinoToolInfos(mcpTools, "uixt") - if err = driverExt.WingsService.RegisterTools(einoTools); err != nil { - log.Debug().Err(err).Msg("Wings service ignoring tool registration (expected)") + driverExt.LLMService = ai.NewWingsService() + log.Info().Msg("Wings service initialized") } if driverExt.LLMService != nil { + // Register uixt MCP tools to LLM service if it exists + mcpTools := driverExt.client.Server.ListTools() + einoTools := ai.ConvertMCPToolsToEinoToolInfos(mcpTools, "uixt") + if err = driverExt.LLMService.RegisterTools(einoTools); err != nil { log.Warn().Err(err).Msg("failed to register uixt tools to LLM service") } @@ -71,9 +65,8 @@ func NewXTDriver(driver IDriver, opts ...option.AIServiceOption) (*XTDriver, err // XTDriver = IDriver + AI type XTDriver struct { IDriver - CVService ai.ICVService // OCR/CV - LLMService ai.ILLMService // LLM (fallback service) - WingsService ai.ILLMService // Wings API service (priority service) + CVService ai.ICVService // OCR/CV + LLMService ai.ILLMService // LLM (fallback service) services *option.AIServiceOptions // AI services options client *MCPClient4XTDriver // MCP Client for built-in uixt server From 097594abb3084fb39de56a3670c0e35b19461c66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BD=99=E6=B3=93=E9=93=AE?= Date: Mon, 21 Jul 2025 14:29:50 +0800 Subject: [PATCH 2/2] =?UTF-8?q?del:=20=E5=88=A0=E9=99=A4=E9=94=99=E8=AF=AF?= =?UTF-8?q?test=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- examples/uitest/android_touch_simulator_test.go | 14 -------------- uixt/android_test.go | 2 +- 2 files changed, 1 insertion(+), 15 deletions(-) diff --git a/examples/uitest/android_touch_simulator_test.go b/examples/uitest/android_touch_simulator_test.go index ddc690cf..fd489fbb 100644 --- a/examples/uitest/android_touch_simulator_test.go +++ b/examples/uitest/android_touch_simulator_test.go @@ -29,10 +29,6 @@ func ParseTouchEvents(data string) ([]types.TouchEvent, error) { event := types.TouchEvent{} var err error - // Parse each field - if event.Timestamp, err = strconv.ParseInt(parts[0], 10, 64); err != nil { - return nil, fmt.Errorf("invalid timestamp: %v", err) - } if event.X, err = strconv.ParseFloat(parts[1], 64); err != nil { return nil, fmt.Errorf("invalid x coordinate: %v", err) } @@ -155,9 +151,6 @@ func TestTouchEventParsing(t *testing.T) { } event := events[0] - if event.Timestamp != 1752646457403 { - t.Errorf("Expected timestamp 1752646457403, got %d", event.Timestamp) - } if event.X != 456.78418 { t.Errorf("Expected X 456.78418, got %f", event.X) } @@ -229,12 +222,5 @@ func TestTouchEventSequenceValidation(t *testing.T) { } } - // Validate timestamps are in increasing order - for i := 1; i < len(events); i++ { - if events[i].Timestamp <= events[i-1].Timestamp { - t.Errorf("Event %d: timestamp should be greater than previous event", i) - } - } - t.Logf("Touch sequence validation passed: %d events with correct action sequence", len(events)) } diff --git a/uixt/android_test.go b/uixt/android_test.go index 9cd4c5e7..af76109d 100644 --- a/uixt/android_test.go +++ b/uixt/android_test.go @@ -24,7 +24,7 @@ func setupADBDriverExt(t *testing.T) *XTDriver { require.Nil(t, err) driverExt, err := NewXTDriver(driver, option.WithCVService(option.CVServiceTypeVEDEM), - option.WithLLMService(option.DOUBAO_1_5_UI_TARS_250328), + // option.WithLLMService(option.DOUBAO_1_5_UI_TARS_250328), ) require.Nil(t, err) return driverExt