fix: support combined LLMService with wings service

This commit is contained in:
lilong.129
2025-07-20 23:14:08 +08:00
parent 70d117ea00
commit 9890588ca7
6 changed files with 152 additions and 214 deletions

View File

@@ -4,6 +4,7 @@ import (
"context"
"github.com/cloudwego/eino/schema"
"github.com/httprunner/httprunner/v5/uixt/option"
)
@@ -24,43 +25,66 @@ func NewLLMService(modelType option.LLMServiceType) (ILLMService, error) {
// NewLLMServiceWithOptionConfig creates a new LLM service with different models for each component
func NewLLMServiceWithOptionConfig(config *option.LLMServiceConfig) (ILLMService, error) {
// Get model configs for each component
plannerModelConfig, err := GetModelConfig(config.PlannerModel)
if err != nil {
return nil, err
combinedLLMService := &combinedLLMService{}
// Planner
if config.PlannerModel == option.WINGS_SERVICE {
planner, err := NewWingsService()
if err != nil {
return nil, err
}
combinedLLMService.planner = planner
} else {
plannerModelConfig, err := GetModelConfig(config.PlannerModel)
if err != nil {
return nil, err
}
planner, err := NewPlanner(context.Background(), plannerModelConfig)
if err != nil {
return nil, err
}
combinedLLMService.planner = planner
}
asserterModelConfig, err := GetModelConfig(config.AsserterModel)
if err != nil {
return nil, err
// Asserter
if config.AsserterModel == option.WINGS_SERVICE {
asserter, err := NewWingsService()
if err != nil {
return nil, err
}
combinedLLMService.asserter = asserter
} else {
asserterModelConfig, err := GetModelConfig(config.AsserterModel)
if err != nil {
return nil, err
}
asserter, err := NewAsserter(context.Background(), asserterModelConfig)
if err != nil {
return nil, err
}
combinedLLMService.asserter = asserter
}
querierModelConfig, err := GetModelConfig(config.QuerierModel)
if err != nil {
return nil, err
// Querier
if config.QuerierModel == option.WINGS_SERVICE {
querier, err := NewWingsService()
if err != nil {
return nil, err
}
combinedLLMService.querier = querier
} else {
querierModelConfig, err := GetModelConfig(config.QuerierModel)
if err != nil {
return nil, err
}
querier, err := NewQuerier(context.Background(), querierModelConfig)
if err != nil {
return nil, err
}
combinedLLMService.querier = querier
}
// Create components with their respective model configs
planner, err := NewPlanner(context.Background(), plannerModelConfig)
if err != nil {
return nil, err
}
asserter, err := NewAsserter(context.Background(), asserterModelConfig)
if err != nil {
return nil, err
}
querier, err := NewQuerier(context.Background(), querierModelConfig)
if err != nil {
return nil, err
}
return &combinedLLMService{
planner: planner,
asserter: asserter,
querier: querier,
}, nil
return combinedLLMService, nil
}
// combinedLLMService 实现了 ILLMService 接口,组合了规划、断言和查询功能

View File

@@ -3,7 +3,6 @@ package uixt
import (
"context"
"encoding/json"
"fmt"
"time"
"github.com/cloudwego/eino/schema"
@@ -16,7 +15,7 @@ import (
"github.com/httprunner/httprunner/v5/uixt/types"
)
// StartToGoal (original implementation - preserved)
// StartToGoal runs AI actions until task is finished or time limit is reached
func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...option.ActionOption) ([]*PlanningExecutionResult, error) {
options := option.NewActionOptions(opts...)
logger := log.Info().Str("prompt", prompt)
@@ -195,7 +194,7 @@ func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...op
}
}
// AIAction with WingsService priority support
// AIAction performs AI-driven action and returns detailed execution result
func (dExt *XTDriver) AIAction(ctx context.Context, prompt string, opts ...option.ActionOption) (*AIExecutionResult, error) {
log.Info().Str("prompt", prompt).Msg("performing AI action")
@@ -208,93 +207,24 @@ func (dExt *XTDriver) AIAction(ctx context.Context, prompt string, opts ...optio
return nil, err
}
// Step 2: Check if WingsService is available and prioritize it
if dExt.WingsService != nil {
log.Info().Msg("using Wings service for AI action")
return dExt.executeAIAction(ctx, prompt, screenResult, dExt.WingsService, "wings", opts...)
}
// Step 3: Fallback to LLM service
if dExt.LLMService == nil {
return nil, errors.New("neither Wings service nor LLM service is initialized")
}
log.Info().Msg("using LLM service for AI action")
return dExt.executeAIAction(ctx, prompt, screenResult, dExt.LLMService, "llm", opts...)
}
// executeAIAction executes AIAction using any AI service (generic implementation)
func (dExt *XTDriver) executeAIAction(ctx context.Context, prompt string, screenResult *ScreenResult, service ai.ILLMService, serviceType string, opts ...option.ActionOption) (*AIExecutionResult, error) {
// Add device context for Wings service if needed
if serviceType == "wings" {
ctx = dExt.addDeviceContextForWings(ctx)
}
// Step 1: Plan next action and measure time
// Step 2: Plan next action and measure time
modelCallStartTime := time.Now()
var planningResult *ai.PlanningResult
var err error
if serviceType == "llm" {
// For LLM service, use PlanNextAction which includes additional processing
planningExecutionResult, planErr := dExt.PlanNextAction(ctx, prompt, opts...)
if planErr != nil {
modelCallElapsed := time.Since(modelCallStartTime).Milliseconds()
return &AIExecutionResult{
Type: "action",
ModelCallElapsed: modelCallElapsed,
ScreenshotElapsed: screenResult.Elapsed,
ImagePath: screenResult.ImagePath,
Resolution: &screenResult.Resolution,
Error: planErr.Error(),
}, errors.Wrap(planErr, "get next action failed")
}
planningResult = &planningExecutionResult.PlanningResult
} else {
// For Wings service, call Plan directly
planningOpts := &ai.PlanningOptions{
UserInstruction: prompt,
Message: &schema.Message{
Role: schema.User,
MultiContent: []schema.ChatMessagePart{
{
Type: schema.ChatMessagePartTypeImageURL,
ImageURL: &schema.ChatMessageImageURL{
URL: screenResult.Base64,
},
},
},
},
Size: screenResult.Resolution,
}
planningResult, err = service.Plan(ctx, planningOpts)
if err != nil {
modelCallElapsed := time.Since(modelCallStartTime).Milliseconds()
return &AIExecutionResult{
Type: "action",
ModelCallElapsed: modelCallElapsed,
ScreenshotElapsed: screenResult.Elapsed,
ImagePath: screenResult.ImagePath,
Resolution: &screenResult.Resolution,
Error: err.Error(),
}, errors.Wrap(err, fmt.Sprintf("%s service planning failed", serviceType))
}
}
planningResult, err := dExt.PlanNextAction(ctx, prompt, opts...)
modelCallElapsed := time.Since(modelCallStartTime).Milliseconds()
aiExecutionResult := &AIExecutionResult{
Type: "action",
ModelCallElapsed: modelCallElapsed,
ScreenshotElapsed: screenResult.Elapsed,
ImagePath: screenResult.ImagePath,
Resolution: &screenResult.Resolution,
PlanningResult: planningResult,
PlanningResult: &planningResult.PlanningResult,
}
if err != nil {
aiExecutionResult.Error = err.Error()
return aiExecutionResult, errors.Wrap(err, "get next action failed")
}
// Step 2: Execute tool calls
// Step 3: Execute tool calls
for _, toolCall := range planningResult.ToolCalls {
err = dExt.invokeToolCall(ctx, toolCall, opts...)
if err != nil {
@@ -309,9 +239,13 @@ func (dExt *XTDriver) executeAIAction(ctx context.Context, prompt string, screen
return aiExecutionResult, nil
}
// AIAssert with WingsService priority support
// AIAssert performs AI-driven assertion and returns detailed execution result
func (dExt *XTDriver) AIAssert(assertion string, opts ...option.ActionOption) (*AIExecutionResult, error) {
log.Info().Str("assertion", assertion).Msg("performing AI assertion")
if dExt.LLMService == nil {
return nil, errors.New("LLM service is not initialized")
}
ctx := dExt.addDeviceContextForWings(context.Background())
// Step 1: Take screenshot and convert to base64
screenResult, err := dExt.GetScreenResult(
@@ -322,29 +256,6 @@ func (dExt *XTDriver) AIAssert(assertion string, opts ...option.ActionOption) (*
return nil, err
}
// Step 2: Check if WingsService is available and prioritize it
if dExt.WingsService != nil {
log.Info().Msg("using Wings service for AI assertion")
return dExt.executeAIAssert(assertion, screenResult, dExt.WingsService, "wings", opts...)
}
// Step 3: Fallback to LLM service
if dExt.LLMService == nil {
return nil, errors.New("neither Wings service nor LLM service is initialized")
}
log.Info().Msg("using LLM service for AI assertion")
return dExt.executeAIAssert(assertion, screenResult, dExt.LLMService, "llm", opts...)
}
// executeAIAssert executes AIAssert using any AI service (generic implementation)
func (dExt *XTDriver) executeAIAssert(assertion string, screenResult *ScreenResult, service ai.ILLMService, serviceType string, opts ...option.ActionOption) (*AIExecutionResult, error) {
// Step 1: Prepare context and options
ctx := context.Background()
if serviceType == "wings" {
ctx = dExt.addDeviceContextForWings(ctx)
}
assertResult := &AIExecutionResult{
Type: "assert",
ScreenshotElapsed: screenResult.Elapsed,
@@ -352,61 +263,47 @@ func (dExt *XTDriver) executeAIAssert(assertion string, screenResult *ScreenResu
Resolution: &screenResult.Resolution,
}
// Step 2: Call service and measure time
// Step 2: Call model and measure time
modelCallStartTime := time.Now()
assertOpts := &ai.AssertOptions{
Assertion: assertion,
Screenshot: screenResult.Base64,
Size: screenResult.Resolution,
}
result, err := service.Assert(ctx, assertOpts)
result, err := dExt.LLMService.Assert(ctx, assertOpts)
assertResult.ModelCallElapsed = time.Since(modelCallStartTime).Milliseconds()
assertResult.AssertionResult = result
if err != nil {
assertResult.Error = err.Error()
return assertResult, errors.Wrap(err, fmt.Sprintf("%s assertion failed", serviceType))
return assertResult, errors.Wrap(err, "AI assertion failed")
}
// For assertion failure, we should still return success but mark the assertion as failed
// This ensures that the AIResult (including screenshot and thought) is properly saved and displayed
if !result.Pass {
assertResult.Error = result.Thought
assertResult.Error = result.Thought // Store the failure reason for reporting
}
return assertResult, nil
}
// addDeviceContextForWings adds device information to context for Wings service
func (dExt *XTDriver) addDeviceContextForWings(ctx context.Context) context.Context {
device := dExt.GetDevice()
if device == nil {
return ctx
}
// Context key types to avoid collisions
type contextKey string
// Add device ID to context
ctx = context.WithValue(ctx, "device_id", device.UUID())
const (
deviceIDKey contextKey = "device_id"
platformTypeKey contextKey = "platform_type"
)
// Add platform type to context
platformType := "android" // default
switch device.(type) {
case *AndroidDevice:
platformType = "android"
case *IOSDevice:
platformType = "ios"
case *HarmonyDevice:
platformType = "harmony"
}
ctx = context.WithValue(ctx, "platform_type", platformType)
return ctx
}
// PlanNextAction (original implementation - preserved)
// PlanNextAction performs planning and returns unified planning information
func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ...option.ActionOption) (*PlanningExecutionResult, error) {
if dExt.LLMService == nil {
return nil, errors.New("LLM service is not initialized")
}
ctx = dExt.addDeviceContextForWings(ctx)
// Parse action options to get ResetHistory setting
options := option.NewActionOptions(opts...)
resetHistory := options.ResetHistory
@@ -476,7 +373,7 @@ func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ..
return planningResult, nil
}
// isTaskFinished (original implementation - preserved)
// isTaskFinished checks if the task is completed based on the planning result
func (dExt *XTDriver) isTaskFinished(planningResult *PlanningExecutionResult) bool {
// Check if there are no tool calls (no actions to execute)
if len(planningResult.ToolCalls) == 0 {
@@ -495,7 +392,7 @@ func (dExt *XTDriver) isTaskFinished(planningResult *PlanningExecutionResult) bo
return false
}
// invokeToolCall (original implementation - preserved)
// invokeToolCall invokes the tool call
func (dExt *XTDriver) invokeToolCall(ctx context.Context, toolCall schema.ToolCall, opts ...option.ActionOption) error {
// Parse arguments
arguments := make(map[string]interface{})
@@ -522,7 +419,7 @@ func (dExt *XTDriver) invokeToolCall(ctx context.Context, toolCall schema.ToolCa
return nil
}
// PlanningExecutionResult (original implementation - preserved)
// PlanningExecutionResult represents a unified planning result that contains both planning information and execution results
type PlanningExecutionResult struct {
ai.PlanningResult // Inherit all fields from ai.PlanningResult (ToolCalls, Thought, Content, Error, ModelName)
// Planning process information
@@ -539,7 +436,7 @@ type PlanningExecutionResult struct {
SubActions []*SubActionResult `json:"sub_actions,omitempty"` // sub-actions generated from this planning
}
// AIExecutionResult (original implementation - preserved)
// AIExecutionResult represents a unified result structure for all AI operations
type AIExecutionResult struct {
Type string `json:"type"` // operation type: "query", "action", "assert"
ModelCallElapsed int64 `json:"model_call_elapsed"` // model call elapsed time in milliseconds
@@ -556,7 +453,7 @@ type AIExecutionResult struct {
Error string `json:"error,omitempty"` // error message if operation failed
}
// SubActionResult (original implementation - preserved)
// SubActionResult represents a sub-action within a start_to_goal action
type SubActionResult struct {
ActionName string `json:"action_name"` // name of the sub-action (e.g., "tap", "input")
Arguments interface{} `json:"arguments,omitempty"` // arguments passed to the sub-action
@@ -571,7 +468,7 @@ type SessionData struct {
ScreenResults []*ScreenResult `json:"screen_results,omitempty"` // store sub-action specific screen_results
}
// AIQuery (original implementation - preserved)
// AIQuery performs AI-driven query and returns detailed execution result
func (dExt *XTDriver) AIQuery(text string, opts ...option.ActionOption) (*AIExecutionResult, error) {
if dExt.LLMService == nil {
return nil, errors.New("LLM service is not initialized")
@@ -616,3 +513,28 @@ func (dExt *XTDriver) AIQuery(text string, opts ...option.ActionOption) (*AIExec
}
return aiResult, nil
}
// addDeviceContextForWings adds device information to context for Wings service
func (dExt *XTDriver) addDeviceContextForWings(ctx context.Context) context.Context {
device := dExt.GetDevice()
if device == nil {
return ctx
}
// Add device ID to context
ctx = context.WithValue(ctx, deviceIDKey, device.UUID())
// Add platform type to context
platformType := "android" // default
switch device.(type) {
case *AndroidDevice:
platformType = "android"
case *IOSDevice:
platformType = "ios"
case *HarmonyDevice:
platformType = "harmony"
}
ctx = context.WithValue(ctx, platformTypeKey, platformType)
return ctx
}

View File

@@ -23,33 +23,33 @@ func TestDriverExt_TapByLLM(t *testing.T) {
assert.Nil(t, err)
}
//func TestDriverExt_StartToGoal(t *testing.T) {
// driver := setupDriverExt(t)
//
// userInstruction := `连连看是一款经典的益智消除类小游戏,通常以图案或图标为主要元素。以下是连连看的基本规则说明:
// 1. 游戏目标: 玩家需要在规定时间内,通过连接相同的图案或图标,将它们从游戏界面中消除。
// 2. 连接规则:
// - 两个相同的图案可以通过不超过三条直线连接。
// - 连接线可以水平或垂直,但不能斜线,也不能跨过其他图案。
// - 连接线的转折次数不能超过两次。
// 3. 游戏界面:
// - 游戏界面通常是一个矩形区域,内含多个图案或图标,排列成行和列。
// - 图案或图标在未选中状态下背景为白色,选中状态下背景为绿色。
// 4. 时间限制: 游戏通常设有时间限制,玩家需要在时间耗尽前完成所有图案的消除。
// 5. 得分机制: 每成功连接并消除一对图案,玩家会获得相应的分数。完成游戏后,根据剩余时间和消除效率计算总分。
// 6. 关卡设计: 游戏可能包含多个关卡,随着关卡的推进,图案的复杂度和数量会增加。
//
// 注意事项:
// 1、当连接错误时顶部的红心会减少一个需及时调整策略避免红心变为0个后游戏失败
// 2、不要连续 2 次点击同一个图案
// 3、不要犯重复的错误
// `
//
// userInstruction += "\n\n请严格按照以上游戏规则开始游戏注意请只做点击操作"
//
// //_, err := driver.StartToGoal(context.Background(), userInstruction)
// //assert.Nil(t, err)
//}
func TestDriverExt_StartToGoal(t *testing.T) {
driver := setupDriverExt(t)
userInstruction := `连连看是一款经典的益智消除类小游戏,通常以图案或图标为主要元素。以下是连连看的基本规则说明:
1. 游戏目标: 玩家需要在规定时间内,通过连接相同的图案或图标,将它们从游戏界面中消除。
2. 连接规则:
- 两个相同的图案可以通过不超过三条直线连接。
- 连接线可以水平或垂直,但不能斜线,也不能跨过其他图案。
- 连接线的转折次数不能超过两次。
3. 游戏界面:
- 游戏界面通常是一个矩形区域,内含多个图案或图标,排列成行和列。
- 图案或图标在未选中状态下背景为白色,选中状态下背景为绿色。
4. 时间限制: 游戏通常设有时间限制,玩家需要在时间耗尽前完成所有图案的消除。
5. 得分机制: 每成功连接并消除一对图案,玩家会获得相应的分数。完成游戏后,根据剩余时间和消除效率计算总分。
6. 关卡设计: 游戏可能包含多个关卡,随着关卡的推进,图案的复杂度和数量会增加。
注意事项:
1、当连接错误时顶部的红心会减少一个需及时调整策略避免红心变为0个后游戏失败
2、不要连续 2 次点击同一个图案
3、不要犯重复的错误
`
userInstruction += "\n\n请严格按照以上游戏规则开始游戏注意请只做点击操作"
_, err := driver.StartToGoal(context.Background(), userInstruction)
assert.Nil(t, err)
}
func TestDriverExt_PlanNextAction(t *testing.T) {
driver := setupDriverExt(t)

View File

@@ -58,6 +58,7 @@ const (
DOUBAO_SEED_1_6_250615 LLMServiceType = "doubao-seed-1.6-250615"
OPENAI_GPT_4O LLMServiceType = "openai/gpt-4o"
DEEPSEEK_R1_250528 LLMServiceType = "deepseek-r1-250528"
WINGS_SERVICE LLMServiceType = "wings-service"
)
func WithLLMService(modelType LLMServiceType) AIServiceOption {

View File

@@ -27,10 +27,6 @@ func NewXTDriver(driver IDriver, opts ...option.AIServiceOption) (*XTDriver, err
var err error
// Initialize Wings service (always available)
driverExt.WingsService = ai.NewWingsService()
log.Info().Msg("Wings service initialized")
// Handle LLM service initialization
if services.LLMConfig != nil {
// Use advanced LLM configuration if provided
@@ -53,15 +49,11 @@ func NewXTDriver(driver IDriver, opts ...option.AIServiceOption) (*XTDriver, err
}
// Register uixt MCP tools to LLM service if it exists
mcpTools := driverExt.client.Server.ListTools()
einoTools := ai.ConvertMCPToolsToEinoToolInfos(mcpTools, "uixt")
if err = driverExt.WingsService.RegisterTools(einoTools); err != nil {
log.Debug().Err(err).Msg("Wings service ignoring tool registration (expected)")
}
if driverExt.LLMService != nil {
if err = driverExt.LLMService.RegisterTools(einoTools); err != nil {
log.Warn().Err(err).Msg("failed to register uixt tools to LLM service")
mcpTools := driverExt.client.Server.ListTools()
einoTools := ai.ConvertMCPToolsToEinoToolInfos(mcpTools, "uixt")
if err := driverExt.LLMService.RegisterTools(einoTools); err != nil {
log.Warn().Err(err).Msg("failed to register uixt tools")
}
}
@@ -71,9 +63,8 @@ func NewXTDriver(driver IDriver, opts ...option.AIServiceOption) (*XTDriver, err
// XTDriver = IDriver + AI
type XTDriver struct {
IDriver
CVService ai.ICVService // OCR/CV
LLMService ai.ILLMService // LLM (fallback service)
WingsService ai.ILLMService // Wings API service (priority service)
CVService ai.ICVService // OCR/CV
LLMService ai.ILLMService // LLM
services *option.AIServiceOptions // AI services options
client *MCPClient4XTDriver // MCP Client for built-in uixt server