feat: 支持自主配置LLM Service

This commit is contained in:
余泓铮
2025-07-21 14:21:18 +08:00
parent 984edf3f3c
commit 6f3a8b1cbd
5 changed files with 43 additions and 137 deletions

View File

@@ -347,22 +347,6 @@ func (w *WingsService) extractScreenshotFromMessage(message *schema.Message) (st
// getDeviceInfoFromContext gets device info from context with fallback
func (w *WingsService) getDeviceInfoFromContext(ctx context.Context, screenshot string) WingsDeviceInfo {
// Try to get device info from context
if deviceID, ok := ctx.Value("device_id").(string); ok {
platformType := "android"
if platform, ok := ctx.Value("platform_type").(string); ok {
platformType = platform
}
return WingsDeviceInfo{
DeviceID: deviceID,
NowImage: screenshot,
PreImage: screenshot,
NowLayoutJSON: "",
OperationSystem: platformType,
}
}
// Fallback to default device info
return WingsDeviceInfo{
DeviceID: "default-device",

View File

@@ -24,7 +24,7 @@ func setupADBDriverExt(t *testing.T) *XTDriver {
require.Nil(t, err)
driverExt, err := NewXTDriver(driver,
option.WithCVService(option.CVServiceTypeVEDEM),
// option.WithLLMService(option.DOUBAO_1_5_THINKING_VISION_PRO_250428),
option.WithLLMService(option.DOUBAO_1_5_UI_TARS_250328),
)
require.Nil(t, err)
return driverExt

View File

@@ -209,80 +209,51 @@ func (dExt *XTDriver) AIAction(ctx context.Context, prompt string, opts ...optio
}
// Step 2: Check if WingsService is available and prioritize it
if dExt.WingsService != nil {
if dExt.LLMService != nil {
log.Info().Msg("using Wings service for AI action")
return dExt.executeAIAction(ctx, prompt, screenResult, dExt.WingsService, "wings", opts...)
return dExt.executeAIAction(ctx, prompt, screenResult, dExt.LLMService, "wings", opts...)
} else {
return nil, errors.New("no LLM service is initialized")
}
// Step 3: Fallback to LLM service
if dExt.LLMService == nil {
return nil, errors.New("neither Wings service nor LLM service is initialized")
}
log.Info().Msg("using LLM service for AI action")
return dExt.executeAIAction(ctx, prompt, screenResult, dExt.LLMService, "llm", opts...)
}
// executeAIAction executes AIAction using any AI service (generic implementation)
func (dExt *XTDriver) executeAIAction(ctx context.Context, prompt string, screenResult *ScreenResult, service ai.ILLMService, serviceType string, opts ...option.ActionOption) (*AIExecutionResult, error) {
// Add device context for Wings service if needed
if serviceType == "wings" {
ctx = dExt.addDeviceContextForWings(ctx)
}
// Step 1: Plan next action and measure time
modelCallStartTime := time.Now()
var planningResult *ai.PlanningResult
var err error
if serviceType == "llm" {
// For LLM service, use PlanNextAction which includes additional processing
planningExecutionResult, planErr := dExt.PlanNextAction(ctx, prompt, opts...)
if planErr != nil {
modelCallElapsed := time.Since(modelCallStartTime).Milliseconds()
return &AIExecutionResult{
Type: "action",
ModelCallElapsed: modelCallElapsed,
ScreenshotElapsed: screenResult.Elapsed,
ImagePath: screenResult.ImagePath,
Resolution: &screenResult.Resolution,
Error: planErr.Error(),
}, errors.Wrap(planErr, "get next action failed")
}
planningResult = &planningExecutionResult.PlanningResult
} else {
// For Wings service, call Plan directly
planningOpts := &ai.PlanningOptions{
UserInstruction: prompt,
Message: &schema.Message{
Role: schema.User,
MultiContent: []schema.ChatMessagePart{
{
Type: schema.ChatMessagePartTypeImageURL,
ImageURL: &schema.ChatMessageImageURL{
URL: screenResult.Base64,
},
// For Wings service, call Plan directly
planningOpts := &ai.PlanningOptions{
UserInstruction: prompt,
Message: &schema.Message{
Role: schema.User,
MultiContent: []schema.ChatMessagePart{
{
Type: schema.ChatMessagePartTypeImageURL,
ImageURL: &schema.ChatMessageImageURL{
URL: screenResult.Base64,
},
},
},
Size: screenResult.Resolution,
}
planningResult, err = service.Plan(ctx, planningOpts)
if err != nil {
modelCallElapsed := time.Since(modelCallStartTime).Milliseconds()
return &AIExecutionResult{
Type: "action",
ModelCallElapsed: modelCallElapsed,
ScreenshotElapsed: screenResult.Elapsed,
ImagePath: screenResult.ImagePath,
Resolution: &screenResult.Resolution,
Error: err.Error(),
}, errors.Wrap(err, fmt.Sprintf("%s service planning failed", serviceType))
}
},
Size: screenResult.Resolution,
}
planningResult, err = service.Plan(ctx, planningOpts)
if err != nil {
modelCallElapsed := time.Since(modelCallStartTime).Milliseconds()
return &AIExecutionResult{
Type: "action",
ModelCallElapsed: modelCallElapsed,
ScreenshotElapsed: screenResult.Elapsed,
ImagePath: screenResult.ImagePath,
Resolution: &screenResult.Resolution,
Error: err.Error(),
}, errors.Wrap(err, fmt.Sprintf("%s service planning failed", serviceType))
}
modelCallElapsed := time.Since(modelCallStartTime).Milliseconds()
aiExecutionResult := &AIExecutionResult{
@@ -322,28 +293,18 @@ func (dExt *XTDriver) AIAssert(assertion string, opts ...option.ActionOption) (*
return nil, err
}
// Step 2: Check if WingsService is available and prioritize it
if dExt.WingsService != nil {
if dExt.LLMService != nil {
log.Info().Msg("using Wings service for AI assertion")
return dExt.executeAIAssert(assertion, screenResult, dExt.WingsService, "wings", opts...)
return dExt.executeAIAssert(assertion, screenResult, dExt.LLMService, "wings", opts...)
} else {
return nil, errors.New("no LLM service is initialized")
}
// Step 3: Fallback to LLM service
if dExt.LLMService == nil {
return nil, errors.New("neither Wings service nor LLM service is initialized")
}
log.Info().Msg("using LLM service for AI assertion")
return dExt.executeAIAssert(assertion, screenResult, dExt.LLMService, "llm", opts...)
}
// executeAIAssert executes AIAssert using any AI service (generic implementation)
func (dExt *XTDriver) executeAIAssert(assertion string, screenResult *ScreenResult, service ai.ILLMService, serviceType string, opts ...option.ActionOption) (*AIExecutionResult, error) {
// Step 1: Prepare context and options
ctx := context.Background()
if serviceType == "wings" {
ctx = dExt.addDeviceContextForWings(ctx)
}
assertResult := &AIExecutionResult{
Type: "assert",
@@ -376,31 +337,6 @@ func (dExt *XTDriver) executeAIAssert(assertion string, screenResult *ScreenResu
return assertResult, nil
}
// addDeviceContextForWings adds device information to context for Wings service
func (dExt *XTDriver) addDeviceContextForWings(ctx context.Context) context.Context {
device := dExt.GetDevice()
if device == nil {
return ctx
}
// Add device ID to context
ctx = context.WithValue(ctx, "device_id", device.UUID())
// Add platform type to context
platformType := "android" // default
switch device.(type) {
case *AndroidDevice:
platformType = "android"
case *IOSDevice:
platformType = "ios"
case *HarmonyDevice:
platformType = "harmony"
}
ctx = context.WithValue(ctx, "platform_type", platformType)
return ctx
}
// PlanNextAction (original implementation - preserved)
func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ...option.ActionOption) (*PlanningExecutionResult, error) {
if dExt.LLMService == nil {

View File

@@ -295,32 +295,25 @@ func TestDriverExt_AIAction_CompareWithAIAction(t *testing.T) {
prompt := "点击搜索按钮"
// Test both methods with the same prompt
wingsResult, wingsErr := driver.AIAction(context.Background(), prompt)
aiResult, aiErr := driver.AIAction(context.Background(), prompt)
// Both should execute without critical errors (may have different implementations)
t.Logf("AIAction error: %v", wingsErr)
t.Logf("AIAction error: %v", aiErr)
// If both succeed, compare results
if wingsResult != nil && aiResult != nil {
assert.Equal(t, "action", wingsResult.Type, "AIAction result type should be 'action'")
if aiResult != nil {
assert.Equal(t, "action", aiResult.Type, "AIAction result type should be 'action'")
// Both should have timing information
assert.Greater(t, wingsResult.ModelCallElapsed, int64(0), "AIAction should have model call elapsed time")
assert.Greater(t, aiResult.ModelCallElapsed, int64(0), "AIAction should have model call elapsed time")
// Both should have screenshot information
assert.NotEmpty(t, wingsResult.ImagePath, "AIAction should have image path")
assert.NotEmpty(t, aiResult.ImagePath, "AIAction should have image path")
// Compare model names
if wingsResult.PlanningResult != nil && aiResult.PlanningResult != nil {
t.Logf("AIAction model: %s", wingsResult.PlanningResult.ModelName)
if aiResult.PlanningResult != nil {
t.Logf("AIAction model: %s", aiResult.PlanningResult.ModelName)
assert.Equal(t, "wings-api", wingsResult.PlanningResult.ModelName, "AIAction should use wings-api")
assert.NotEqual(t, "wings-api", aiResult.PlanningResult.ModelName, "AIAction should not use wings-api")
}
}

View File

@@ -27,10 +27,6 @@ func NewXTDriver(driver IDriver, opts ...option.AIServiceOption) (*XTDriver, err
var err error
// Initialize Wings service (always available)
driverExt.WingsService = ai.NewWingsService()
log.Info().Msg("Wings service initialized")
// Handle LLM service initialization
if services.LLMConfig != nil {
// Use advanced LLM configuration if provided
@@ -49,17 +45,15 @@ func NewXTDriver(driver IDriver, opts ...option.AIServiceOption) (*XTDriver, err
log.Info().Msg("LLM service initialized")
}
} else {
log.Info().Msg("no LLM service config provided, using Wings service only")
}
// Register uixt MCP tools to LLM service if it exists
mcpTools := driverExt.client.Server.ListTools()
einoTools := ai.ConvertMCPToolsToEinoToolInfos(mcpTools, "uixt")
if err = driverExt.WingsService.RegisterTools(einoTools); err != nil {
log.Debug().Err(err).Msg("Wings service ignoring tool registration (expected)")
driverExt.LLMService = ai.NewWingsService()
log.Info().Msg("Wings service initialized")
}
if driverExt.LLMService != nil {
// Register uixt MCP tools to LLM service if it exists
mcpTools := driverExt.client.Server.ListTools()
einoTools := ai.ConvertMCPToolsToEinoToolInfos(mcpTools, "uixt")
if err = driverExt.LLMService.RegisterTools(einoTools); err != nil {
log.Warn().Err(err).Msg("failed to register uixt tools to LLM service")
}
@@ -71,9 +65,8 @@ func NewXTDriver(driver IDriver, opts ...option.AIServiceOption) (*XTDriver, err
// XTDriver = IDriver + AI
type XTDriver struct {
IDriver
CVService ai.ICVService // OCR/CV
LLMService ai.ILLMService // LLM (fallback service)
WingsService ai.ILLMService // Wings API service (priority service)
CVService ai.ICVService // OCR/CV
LLMService ai.ILLMService // LLM (fallback service)
services *option.AIServiceOptions // AI services options
client *MCPClient4XTDriver // MCP Client for built-in uixt server