Merge branch 'master' into session_refactor

This commit is contained in:
余泓铮
2025-07-22 17:58:24 +08:00
23 changed files with 286 additions and 641 deletions

View File

@@ -1 +1 @@
v5.0.0-250717
v5.0.0-250721

View File

@@ -1077,6 +1077,24 @@ func (r *SessionRunner) GetTransactions() map[string]map[TransactionType]time.Ti
return r.transactions
}
// keep for compatibility
func (r *SessionRunner) ignorePopup(osType string) bool {
config := r.caseRunner.TestCase.Config.Get()
if osType == string(StepTypeAndroid) && len(config.Android) > 0 {
return config.Android[0].IgnorePopup
}
if osType == string(StepTypeIOS) && len(config.IOS) > 0 {
return config.IOS[0].IgnorePopup
}
if osType == string(StepTypeHarmony) && len(config.Harmony) > 0 {
return config.Harmony[0].IgnorePopup
}
if osType == string(stepTypeBrowser) && len(config.Browser) > 0 {
return config.Browser[0].IgnorePopup
}
return false
}
// saveJSONCase saves the original JSON case content to the results directory
func saveJSONCase(casePath string) error {
// Read the original JSON case content

View File

@@ -798,10 +798,11 @@ func runStepMobileUI(s *SessionRunner, step IStep) (stepResult *StepResult, err
config = s.caseRunner.Config.Get()
}
// automatic handling of pop-up windows on each step finished, default to disabled
// priority: step ignore_popup > config auto_popup_handler > step auto_popup_handler
// priority: config ignore_popup > step ignore_popup > config auto_popup_handler > step auto_popup_handler
shouldHandlePopup := false
if stepIgnorePopup {
if s.ignorePopup(mobileStep.OSType) {
shouldHandlePopup = false
} else if stepIgnorePopup {
// step level config, keep for compatibility
shouldHandlePopup = false
} else if config != nil && config.AutoPopupHandler {

View File

@@ -4,6 +4,7 @@ import (
"context"
"github.com/cloudwego/eino/schema"
"github.com/httprunner/httprunner/v5/uixt/option"
)
@@ -24,43 +25,66 @@ func NewLLMService(modelType option.LLMServiceType) (ILLMService, error) {
// NewLLMServiceWithOptionConfig creates a new LLM service with different models for each component
func NewLLMServiceWithOptionConfig(config *option.LLMServiceConfig) (ILLMService, error) {
// Get model configs for each component
plannerModelConfig, err := GetModelConfig(config.PlannerModel)
if err != nil {
return nil, err
combinedLLMService := &combinedLLMService{}
// Planner
if config.PlannerModel == option.WINGS_SERVICE {
planner, err := NewWingsService()
if err != nil {
return nil, err
}
combinedLLMService.planner = planner
} else {
plannerModelConfig, err := GetModelConfig(config.PlannerModel)
if err != nil {
return nil, err
}
planner, err := NewPlanner(context.Background(), plannerModelConfig)
if err != nil {
return nil, err
}
combinedLLMService.planner = planner
}
asserterModelConfig, err := GetModelConfig(config.AsserterModel)
if err != nil {
return nil, err
// Asserter
if config.AsserterModel == option.WINGS_SERVICE {
asserter, err := NewWingsService()
if err != nil {
return nil, err
}
combinedLLMService.asserter = asserter
} else {
asserterModelConfig, err := GetModelConfig(config.AsserterModel)
if err != nil {
return nil, err
}
asserter, err := NewAsserter(context.Background(), asserterModelConfig)
if err != nil {
return nil, err
}
combinedLLMService.asserter = asserter
}
querierModelConfig, err := GetModelConfig(config.QuerierModel)
if err != nil {
return nil, err
// Querier
if config.QuerierModel == option.WINGS_SERVICE {
querier, err := NewWingsService()
if err != nil {
return nil, err
}
combinedLLMService.querier = querier
} else {
querierModelConfig, err := GetModelConfig(config.QuerierModel)
if err != nil {
return nil, err
}
querier, err := NewQuerier(context.Background(), querierModelConfig)
if err != nil {
return nil, err
}
combinedLLMService.querier = querier
}
// Create components with their respective model configs
planner, err := NewPlanner(context.Background(), plannerModelConfig)
if err != nil {
return nil, err
}
asserter, err := NewAsserter(context.Background(), asserterModelConfig)
if err != nil {
return nil, err
}
querier, err := NewQuerier(context.Background(), querierModelConfig)
if err != nil {
return nil, err
}
return &combinedLLMService{
planner: planner,
asserter: asserter,
querier: querier,
}, nil
return combinedLLMService, nil
}
// combinedLLMService 实现了 ILLMService 接口,组合了规划、断言和查询功能

View File

@@ -16,41 +16,40 @@ import (
"github.com/pkg/errors"
"github.com/rs/zerolog/log"
"github.com/httprunner/httprunner/v5/code"
"github.com/httprunner/httprunner/v5/internal/builtin"
)
// WingsService implements ILLMService interface using external Wings API
type WingsService struct {
apiURL string
bizId string
isExternal bool
accessKey string
secretKey string
apiURL string
bizId string
accessKey string
secretKey string
}
// NewWingsService creates a new Wings service instance
func NewWingsService() ILLMService {
func NewWingsService() (ILLMService, error) {
// Check for environment variables for external API access
accessKey := ""
secretKey := ""
isExternal := false
apiURL := "https://vedem-algorithm.bytedance.net/algorithm/StepActionDecision"
apiURL := os.Getenv("VEDEM_WINGS_API_URL")
accessKey := os.Getenv("VEDEM_WINGS_AK")
secretKey := os.Getenv("VEDEM_WINGS_SK")
bizID := os.Getenv("VEDEM_WINGS_BIZ_ID")
// If environment variables are set, use external API with authentication
if ak, sk := os.Getenv("VEDEM_WINGS_AK"), os.Getenv("VEDEM_WINGS_SK"); ak != "" && sk != "" {
accessKey = ak
secretKey = sk
isExternal = true
apiURL = "https://vedem-algorithm.zijieapi.com/algorithm/StepActionDecision"
// check required env
if apiURL == "" {
return nil, errors.Wrap(code.LLMEnvMissedError, "missed env VEDEM_WINGS_API_URL")
}
if bizID == "" {
return nil, errors.Wrap(code.LLMEnvMissedError, "missed env VEDEM_WINGS_BIZ_ID")
}
return &WingsService{
apiURL: apiURL,
bizId: "489fdae44de048e0922a32834ea668af",
isExternal: isExternal,
accessKey: accessKey,
secretKey: secretKey,
}
apiURL: apiURL,
bizId: bizID,
accessKey: accessKey,
secretKey: secretKey,
}, nil
}
// Plan implements the ILLMService.Plan method using Wings API
@@ -346,8 +345,8 @@ func (w *WingsService) extractScreenshotFromMessage(message *schema.Message) (st
}
// getDeviceInfoFromContext gets device info from context with fallback
func (w *WingsService) getDeviceInfoFromContext(ctx context.Context, screenshot string) WingsDeviceInfo {
// Fallback to default device info
func (w *WingsService) getDeviceInfoFromContext(_ context.Context, screenshot string) WingsDeviceInfo {
// use default device info
return WingsDeviceInfo{
DeviceID: "default-device",
NowImage: screenshot,
@@ -393,7 +392,7 @@ func (w *WingsService) callWingsAPI(ctx context.Context, request WingsActionRequ
httpReq.Header.Set("Accept", "application/json")
// Add authentication headers if using external API
if w.isExternal {
if w.accessKey != "" && w.secretKey != "" {
signToken := "UNSIGNED-PAYLOAD"
token := builtin.Sign("auth-v2", w.accessKey, w.secretKey, []byte(signToken))

View File

@@ -16,29 +16,47 @@ import (
)
func setupADBDriverExt(t *testing.T) *XTDriver {
device, err := NewAndroidDevice()
require.Nil(t, err)
device.Options.UIA2 = false
device.Options.LogOn = false
driver, err := device.NewDriver()
require.Nil(t, err)
driverExt, err := NewXTDriver(driver,
option.WithCVService(option.CVServiceTypeVEDEM),
// option.WithLLMService(option.DOUBAO_1_5_UI_TARS_250328),
)
config := DriverCacheConfig{
Platform: "android",
Serial: "", // Let it auto-detect the device serial
AIOptions: []option.AIServiceOption{
option.WithCVService(option.CVServiceTypeVEDEM),
option.WithLLMConfig(
option.NewLLMServiceConfig(option.DOUBAO_1_5_UI_TARS_250328).
WithPlannerModel(option.WINGS_SERVICE).
WithAsserterModel(option.WINGS_SERVICE),
),
},
}
driverExt, err := GetOrCreateXTDriver(config)
require.Nil(t, err)
return driverExt
}
func setupUIA2DriverExt(t *testing.T) *XTDriver {
device, err := NewAndroidDevice()
require.Nil(t, err)
device.Options.UIA2 = true // use uiautomator2 driver
device.Options.LogOn = false
driver, err := device.NewDriver()
require.Nil(t, err)
driverExt, err := NewXTDriver(driver,
option.WithCVService(option.CVServiceTypeVEDEM))
// Use cache mechanism with UIA2 enabled
deviceOpts := option.NewDeviceOptions(
option.WithPlatform("android"),
option.WithDeviceUIA2(true),
option.WithDeviceLogOn(false),
)
config := DriverCacheConfig{
Platform: "android",
Serial: "", // Let it auto-detect the device serial
DeviceOpts: deviceOpts,
AIOptions: []option.AIServiceOption{
option.WithCVService(option.CVServiceTypeVEDEM),
option.WithLLMConfig(
option.NewLLMServiceConfig(option.DOUBAO_1_5_UI_TARS_250328).
WithPlannerModel(option.WINGS_SERVICE).
WithAsserterModel(option.WINGS_SERVICE),
),
},
}
driverExt, err := GetOrCreateXTDriver(config)
require.Nil(t, err)
return driverExt
}

View File

@@ -3,7 +3,6 @@ package uixt
import (
"context"
"encoding/json"
"fmt"
"time"
"github.com/cloudwego/eino/schema"
@@ -16,7 +15,7 @@ import (
"github.com/httprunner/httprunner/v5/uixt/types"
)
// StartToGoal (original implementation - preserved)
// StartToGoal runs AI actions until task is finished or time limit is reached
func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...option.ActionOption) ([]*PlanningExecutionResult, error) {
options := option.NewActionOptions(opts...)
logger := log.Info().Str("prompt", prompt)
@@ -195,7 +194,7 @@ func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...op
}
}
// AIAction with WingsService priority support
// AIAction performs AI-driven action and returns detailed execution result
func (dExt *XTDriver) AIAction(ctx context.Context, prompt string, opts ...option.ActionOption) (*AIExecutionResult, error) {
log.Info().Str("prompt", prompt).Msg("performing AI action")
@@ -208,64 +207,26 @@ func (dExt *XTDriver) AIAction(ctx context.Context, prompt string, opts ...optio
return nil, err
}
// Step 2: Check if WingsService is available and prioritize it
if dExt.LLMService != nil {
log.Info().Msg("using Wings service for AI action")
return dExt.executeAIAction(ctx, prompt, screenResult, dExt.LLMService, "wings", opts...)
} else {
return nil, errors.New("no LLM service is initialized")
}
}
// executeAIAction executes AIAction using any AI service (generic implementation)
func (dExt *XTDriver) executeAIAction(ctx context.Context, prompt string, screenResult *ScreenResult, service ai.ILLMService, serviceType string, opts ...option.ActionOption) (*AIExecutionResult, error) {
// Step 1: Plan next action and measure time
// Step 2: Plan next action and measure time
modelCallStartTime := time.Now()
var planningResult *ai.PlanningResult
var err error
// For Wings service, call Plan directly
planningOpts := &ai.PlanningOptions{
UserInstruction: prompt,
Message: &schema.Message{
Role: schema.User,
MultiContent: []schema.ChatMessagePart{
{
Type: schema.ChatMessagePartTypeImageURL,
ImageURL: &schema.ChatMessageImageURL{
URL: screenResult.Base64,
},
},
},
},
Size: screenResult.Resolution,
}
planningResult, err = service.Plan(ctx, planningOpts)
if err != nil {
modelCallElapsed := time.Since(modelCallStartTime).Milliseconds()
return &AIExecutionResult{
Type: "action",
ModelCallElapsed: modelCallElapsed,
ScreenshotElapsed: screenResult.Elapsed,
ImagePath: screenResult.ImagePath,
Resolution: &screenResult.Resolution,
Error: err.Error(),
}, errors.Wrap(err, fmt.Sprintf("%s service planning failed", serviceType))
}
planningResult, err := dExt.PlanNextAction(ctx, prompt, opts...)
modelCallElapsed := time.Since(modelCallStartTime).Milliseconds()
aiExecutionResult := &AIExecutionResult{
Type: "action",
ModelCallElapsed: modelCallElapsed,
ScreenshotElapsed: screenResult.Elapsed,
ImagePath: screenResult.ImagePath,
Resolution: &screenResult.Resolution,
PlanningResult: planningResult,
}
if planningResult != nil {
aiExecutionResult.PlanningResult = &planningResult.PlanningResult
}
if err != nil {
aiExecutionResult.Error = err.Error()
return aiExecutionResult, errors.Wrap(err, "get next action failed")
}
// Step 2: Execute tool calls
// Step 3: Execute tool calls
for _, toolCall := range planningResult.ToolCalls {
err = dExt.invokeToolCall(ctx, toolCall, opts...)
if err != nil {
@@ -280,9 +241,11 @@ func (dExt *XTDriver) executeAIAction(ctx context.Context, prompt string, screen
return aiExecutionResult, nil
}
// AIAssert with WingsService priority support
// AIAssert performs AI-driven assertion and returns detailed execution result
func (dExt *XTDriver) AIAssert(assertion string, opts ...option.ActionOption) (*AIExecutionResult, error) {
log.Info().Str("assertion", assertion).Msg("performing AI assertion")
if dExt.LLMService == nil {
return nil, errors.New("LLM service is not initialized")
}
// Step 1: Take screenshot and convert to base64
screenResult, err := dExt.GetScreenResult(
@@ -293,19 +256,6 @@ func (dExt *XTDriver) AIAssert(assertion string, opts ...option.ActionOption) (*
return nil, err
}
if dExt.LLMService != nil {
log.Info().Msg("using Wings service for AI assertion")
return dExt.executeAIAssert(assertion, screenResult, dExt.LLMService, "wings", opts...)
} else {
return nil, errors.New("no LLM service is initialized")
}
}
// executeAIAssert executes AIAssert using any AI service (generic implementation)
func (dExt *XTDriver) executeAIAssert(assertion string, screenResult *ScreenResult, service ai.ILLMService, serviceType string, opts ...option.ActionOption) (*AIExecutionResult, error) {
// Step 1: Prepare context and options
ctx := context.Background()
assertResult := &AIExecutionResult{
Type: "assert",
ScreenshotElapsed: screenResult.Elapsed,
@@ -313,31 +263,32 @@ func (dExt *XTDriver) executeAIAssert(assertion string, screenResult *ScreenResu
Resolution: &screenResult.Resolution,
}
// Step 2: Call service and measure time
// Step 2: Call model and measure time
modelCallStartTime := time.Now()
assertOpts := &ai.AssertOptions{
Assertion: assertion,
Screenshot: screenResult.Base64,
Size: screenResult.Resolution,
}
result, err := service.Assert(ctx, assertOpts)
result, err := dExt.LLMService.Assert(context.Background(), assertOpts)
assertResult.ModelCallElapsed = time.Since(modelCallStartTime).Milliseconds()
assertResult.AssertionResult = result
if err != nil {
assertResult.Error = err.Error()
return assertResult, errors.Wrap(err, fmt.Sprintf("%s assertion failed", serviceType))
return assertResult, errors.Wrap(err, "AI assertion failed")
}
// For assertion failure, we should still return success but mark the assertion as failed
// This ensures that the AIResult (including screenshot and thought) is properly saved and displayed
if !result.Pass {
assertResult.Error = result.Thought
assertResult.Error = result.Thought // Store the failure reason for reporting
}
return assertResult, nil
}
// PlanNextAction (original implementation - preserved)
// PlanNextAction performs planning and returns unified planning information
func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ...option.ActionOption) (*PlanningExecutionResult, error) {
if dExt.LLMService == nil {
return nil, errors.New("LLM service is not initialized")
@@ -412,7 +363,7 @@ func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ..
return planningResult, nil
}
// isTaskFinished (original implementation - preserved)
// isTaskFinished checks if the task is completed based on the planning result
func (dExt *XTDriver) isTaskFinished(planningResult *PlanningExecutionResult) bool {
// Check if there are no tool calls (no actions to execute)
if len(planningResult.ToolCalls) == 0 {
@@ -431,7 +382,7 @@ func (dExt *XTDriver) isTaskFinished(planningResult *PlanningExecutionResult) bo
return false
}
// invokeToolCall (original implementation - preserved)
// invokeToolCall invokes the tool call
func (dExt *XTDriver) invokeToolCall(ctx context.Context, toolCall schema.ToolCall, opts ...option.ActionOption) error {
// Parse arguments
arguments := make(map[string]interface{})
@@ -458,7 +409,7 @@ func (dExt *XTDriver) invokeToolCall(ctx context.Context, toolCall schema.ToolCa
return nil
}
// PlanningExecutionResult (original implementation - preserved)
// PlanningExecutionResult represents a unified planning result that contains both planning information and execution results
type PlanningExecutionResult struct {
ai.PlanningResult // Inherit all fields from ai.PlanningResult (ToolCalls, Thought, Content, Error, ModelName)
// Planning process information
@@ -475,7 +426,7 @@ type PlanningExecutionResult struct {
SubActions []*SubActionResult `json:"sub_actions,omitempty"` // sub-actions generated from this planning
}
// AIExecutionResult (original implementation - preserved)
// AIExecutionResult represents a unified result structure for all AI operations
type AIExecutionResult struct {
Type string `json:"type"` // operation type: "query", "action", "assert"
ModelCallElapsed int64 `json:"model_call_elapsed"` // model call elapsed time in milliseconds
@@ -492,7 +443,7 @@ type AIExecutionResult struct {
Error string `json:"error,omitempty"` // error message if operation failed
}
// SubActionResult (original implementation - preserved)
// SubActionResult represents a sub-action within a start_to_goal action
type SubActionResult struct {
ActionName string `json:"action_name"` // name of the sub-action (e.g., "tap", "input")
Arguments interface{} `json:"arguments,omitempty"` // arguments passed to the sub-action
@@ -507,7 +458,7 @@ type SessionData struct {
ScreenResults []*ScreenResult `json:"screen_results,omitempty"` // store sub-action specific screen_results
}
// AIQuery (original implementation - preserved)
// AIQuery performs AI-driven query and returns detailed execution result
func (dExt *XTDriver) AIQuery(text string, opts ...option.ActionOption) (*AIExecutionResult, error) {
if dExt.LLMService == nil {
return nil, errors.New("LLM service is not initialized")

View File

@@ -23,33 +23,33 @@ func TestDriverExt_TapByLLM(t *testing.T) {
assert.Nil(t, err)
}
//func TestDriverExt_StartToGoal(t *testing.T) {
// driver := setupDriverExt(t)
//
// userInstruction := `连连看是一款经典的益智消除类小游戏,通常以图案或图标为主要元素。以下是连连看的基本规则说明:
// 1. 游戏目标: 玩家需要在规定时间内,通过连接相同的图案或图标,将它们从游戏界面中消除。
// 2. 连接规则:
// - 两个相同的图案可以通过不超过三条直线连接。
// - 连接线可以水平或垂直,但不能斜线,也不能跨过其他图案。
// - 连接线的转折次数不能超过两次。
// 3. 游戏界面:
// - 游戏界面通常是一个矩形区域,内含多个图案或图标,排列成行和列。
// - 图案或图标在未选中状态下背景为白色,选中状态下背景为绿色。
// 4. 时间限制: 游戏通常设有时间限制,玩家需要在时间耗尽前完成所有图案的消除。
// 5. 得分机制: 每成功连接并消除一对图案,玩家会获得相应的分数。完成游戏后,根据剩余时间和消除效率计算总分。
// 6. 关卡设计: 游戏可能包含多个关卡,随着关卡的推进,图案的复杂度和数量会增加。
//
// 注意事项:
// 1、当连接错误时顶部的红心会减少一个需及时调整策略避免红心变为0个后游戏失败
// 2、不要连续 2 次点击同一个图案
// 3、不要犯重复的错误
// `
//
// userInstruction += "\n\n请严格按照以上游戏规则开始游戏注意请只做点击操作"
//
// //_, err := driver.StartToGoal(context.Background(), userInstruction)
// //assert.Nil(t, err)
//}
func TestDriverExt_StartToGoal(t *testing.T) {
driver := setupDriverExt(t)
userInstruction := `连连看是一款经典的益智消除类小游戏,通常以图案或图标为主要元素。以下是连连看的基本规则说明:
1. 游戏目标: 玩家需要在规定时间内,通过连接相同的图案或图标,将它们从游戏界面中消除。
2. 连接规则:
- 两个相同的图案可以通过不超过三条直线连接。
- 连接线可以水平或垂直,但不能斜线,也不能跨过其他图案。
- 连接线的转折次数不能超过两次。
3. 游戏界面:
- 游戏界面通常是一个矩形区域,内含多个图案或图标,排列成行和列。
- 图案或图标在未选中状态下背景为白色,选中状态下背景为绿色。
4. 时间限制: 游戏通常设有时间限制,玩家需要在时间耗尽前完成所有图案的消除。
5. 得分机制: 每成功连接并消除一对图案,玩家会获得相应的分数。完成游戏后,根据剩余时间和消除效率计算总分。
6. 关卡设计: 游戏可能包含多个关卡,随着关卡的推进,图案的复杂度和数量会增加。
注意事项:
1、当连接错误时顶部的红心会减少一个需及时调整策略避免红心变为0个后游戏失败
2、不要连续 2 次点击同一个图案
3、不要犯重复的错误
`
userInstruction += "\n\n请严格按照以上游戏规则开始游戏注意请只做点击操作"
_, err := driver.StartToGoal(context.Background(), userInstruction)
assert.Nil(t, err)
}
func TestDriverExt_PlanNextAction(t *testing.T) {
driver := setupDriverExt(t)
@@ -314,7 +314,7 @@ func TestDriverExt_AIAction_CompareWithAIAction(t *testing.T) {
if aiResult.PlanningResult != nil {
t.Logf("AIAction model: %s", aiResult.PlanningResult.ModelName)
assert.NotEqual(t, "wings-api", aiResult.PlanningResult.ModelName, "AIAction should not use wings-api")
assert.Equal(t, "wings-api", aiResult.PlanningResult.ModelName, "AIAction should use wings-api")
}
}
}

View File

@@ -337,32 +337,18 @@ func compressImageBufferWithOptions(raw *bytes.Buffer, enableResize bool, maxWid
newHeight = originalHeight
}
// Determine JPEG quality based on image size for optimal compression
jpegQuality := 60 // Default quality for better compression
if newWidth*newHeight > 500000 { // For very large images, use lower quality
jpegQuality = 50
} else if newWidth*newHeight < 100000 { // For small images, use higher quality
jpegQuality = 70
}
jpegQuality := 95
var buf bytes.Buffer
switch strings.ToLower(format) {
case "jpeg", "jpg":
// Use adaptive JPEG compression quality
err = jpeg.Encode(&buf, resizedImg, &jpeg.Options{Quality: jpegQuality})
case "png":
// Convert PNG to JPEG for better compression
err = jpeg.Encode(&buf, resizedImg, &jpeg.Options{Quality: jpegQuality})
case "gif":
// Keep GIF format but with reduced colors for better compression
err = gif.Encode(&buf, resizedImg, &gif.Options{NumColors: 64})
switch format {
case "jpeg", "jpg", "png":
// compress with compression rate of 95
jpegOptions := &jpeg.Options{Quality: jpegQuality}
err = jpeg.Encode(&buf, resizedImg, jpegOptions)
if err != nil {
return nil, err
}
default:
// Default to JPEG for unknown formats
err = jpeg.Encode(&buf, resizedImg, &jpeg.Options{Quality: jpegQuality})
}
if err != nil {
return nil, err
return nil, fmt.Errorf("unsupported image format: %s", format)
}
compressedSize := buf.Len()

View File

@@ -5,17 +5,28 @@ package uixt
import (
"testing"
"github.com/httprunner/httprunner/v5/uixt/option"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/httprunner/httprunner/v5/uixt/option"
)
func setupHDCDriverExt(t *testing.T) *XTDriver {
device, err := NewHarmonyDevice()
require.Nil(t, err)
hdcDriver, err := NewHDCDriver(device)
require.Nil(t, err)
driverExt, err := NewXTDriver(hdcDriver, option.WithCVService(option.CVServiceTypeVEDEM))
// Use cache mechanism for Harmony HDC driver
config := DriverCacheConfig{
Platform: "harmony",
Serial: "", // Let it auto-detect the device serial
AIOptions: []option.AIServiceOption{
option.WithCVService(option.CVServiceTypeVEDEM),
option.WithLLMConfig(
option.NewLLMServiceConfig(option.DOUBAO_1_5_UI_TARS_250328).
WithPlannerModel(option.WINGS_SERVICE).
WithAsserterModel(option.WINGS_SERVICE),
),
},
}
driverExt, err := GetOrCreateXTDriver(config)
require.Nil(t, err)
return driverExt
}

View File

@@ -16,14 +16,29 @@ import (
)
func setupWDADriverExt(t *testing.T) *XTDriver {
device, err := NewIOSDevice(
option.WithWDAPort(8700),
option.WithWDAMjpegPort(8800),
option.WithWDALogOn(true))
require.Nil(t, err)
driver, err := device.NewDriver()
require.Nil(t, err)
driverExt, err := NewXTDriver(driver, option.WithCVService(option.CVServiceTypeVEDEM))
// Use cache mechanism with unified DeviceOptions for iOS WDA driver
deviceOpts := option.NewDeviceOptions(
option.WithPlatform("ios"),
option.WithDeviceWDAPort(8700),
option.WithDeviceWDAMjpegPort(8800),
option.WithDeviceLogOn(true),
)
config := DriverCacheConfig{
Platform: "ios",
Serial: "", // Let it auto-detect the device serial
DeviceOpts: deviceOpts,
AIOptions: []option.AIServiceOption{
option.WithCVService(option.CVServiceTypeVEDEM),
option.WithLLMConfig(
option.NewLLMServiceConfig(option.DOUBAO_1_5_UI_TARS_250328).
WithPlannerModel(option.WINGS_SERVICE).
WithAsserterModel(option.WINGS_SERVICE),
),
},
}
driverExt, err := GetOrCreateXTDriver(config)
require.Nil(t, err)
return driverExt
}

View File

@@ -61,9 +61,25 @@ func (s *MCPServer4XTDriver) GetToolByAction(actionMethod option.ActionName) Act
if s.actionToolMap == nil {
return nil
}
actionMethod = getActionNameByAlias(actionMethod)
return s.actionToolMap[actionMethod]
}
func getActionNameByAlias(actionMethod option.ActionName) option.ActionName {
switch strings.ToLower(string(actionMethod)) {
case "terminal_app":
return option.ACTION_AppTerminate
case "open_app":
return option.ACTION_AppLaunch
case "text":
return option.ACTION_Input
case "tap":
return option.ACTION_TapXY
default:
return actionMethod
}
}
// registerTools registers all MCP tools.
func (s *MCPServer4XTDriver) registerTools() {
// Device Tool
@@ -71,7 +87,6 @@ func (s *MCPServer4XTDriver) registerTools() {
s.registerTool(&ToolSelectDevice{}) // SelectDevice
// Touch Tools
s.registerTool(&ToolTap{}) // tap
s.registerTool(&ToolTapXY{}) // tap xy
s.registerTool(&ToolTapAbsXY{}) // tap abs xy
s.registerTool(&ToolTapByOCR{}) // tap by OCR
@@ -89,7 +104,6 @@ func (s *MCPServer4XTDriver) registerTools() {
// Input Tools
s.registerTool(&ToolInput{})
s.registerTool(&ToolText{})
s.registerTool(&ToolBackspace{})
s.registerTool(&ToolSetIme{})
@@ -101,9 +115,7 @@ func (s *MCPServer4XTDriver) registerTools() {
// App Tools
s.registerTool(&ToolListPackages{}) // ListPackages
s.registerTool(&ToolLaunchApp{}) // LaunchApp
s.registerTool(&ToolOpenApp{}) // OpenApp
s.registerTool(&ToolTerminateApp{}) // TerminateApp
s.registerTool(&ToolTerminateAppNew{}) // TerminateApp (new)
s.registerTool(&ToolColdLaunch{}) // ColdLaunch
s.registerTool(&ToolAppInstall{}) // AppInstall
s.registerTool(&ToolAppUninstall{}) // AppUninstall

View File

@@ -79,7 +79,6 @@ func TestToolInterfaces(t *testing.T) {
tools := []ActionTool{
&ToolListAvailableDevices{},
&ToolSelectDevice{},
&ToolTap{},
&ToolTapXY{},
&ToolTapAbsXY{},
&ToolTapByOCR{},
@@ -93,7 +92,6 @@ func TestToolInterfaces(t *testing.T) {
&ToolSwipeToTapTexts{},
&ToolDrag{},
&ToolInput{},
&ToolText{},
&ToolBackspace{},
&ToolScreenShot{},
&ToolGetScreenSize{},
@@ -102,9 +100,7 @@ func TestToolInterfaces(t *testing.T) {
&ToolBack{},
&ToolListPackages{},
&ToolLaunchApp{},
&ToolOpenApp{},
&ToolTerminateApp{},
&ToolTerminateAppNew{},
&ToolColdLaunch{},
&ToolAppInstall{},
&ToolAppUninstall{},
@@ -246,45 +242,6 @@ func TestToolSelectDevice(t *testing.T) {
assert.Equal(t, string(option.ACTION_SelectDevice), request.Params.Name)
}
// TestToolTap tests the ToolTap implementation
func TestToolTap(t *testing.T) {
tool := &ToolTap{}
// Test Name
assert.Equal(t, option.ACTION_Tap, tool.Name())
// Test Description
assert.NotEmpty(t, tool.Description())
// Test Options
options := tool.Options()
assert.NotNil(t, options)
// Test ConvertActionToCallToolRequest with valid params
action := option.MobileAction{
Method: option.ACTION_Tap,
Params: []float64{0.5, 0.6},
ActionOptions: option.ActionOptions{
Duration: 1.5,
},
}
request, err := tool.ConvertActionToCallToolRequest(action)
assert.NoError(t, err)
assert.Equal(t, string(option.ACTION_Tap), request.Params.Name)
args := request.GetArguments()
assert.Equal(t, 0.5, args["x"])
assert.Equal(t, 0.6, args["y"])
assert.Equal(t, 1.5, args["duration"])
// Test ConvertActionToCallToolRequest with invalid params
invalidAction := option.MobileAction{
Method: option.ACTION_Tap,
Params: "invalid",
}
_, err = tool.ConvertActionToCallToolRequest(invalidAction)
assert.Error(t, err)
}
// TestToolTapXY tests the ToolTapXY implementation
func TestToolTapXY(t *testing.T) {
tool := &ToolTapXY{}
@@ -827,31 +784,6 @@ func TestToolInput(t *testing.T) {
assert.Equal(t, "Hello World", request.GetArguments()["text"])
}
// TestToolText tests the ToolText implementation
func TestToolText(t *testing.T) {
tool := &ToolText{}
// Test Name
assert.Equal(t, option.ACTION_Text, tool.Name())
// Test Description
assert.NotEmpty(t, tool.Description())
// Test Options
options := tool.Options()
assert.NotNil(t, options)
// Test ConvertActionToCallToolRequest with valid params
action := option.MobileAction{
Method: option.ACTION_Text,
Params: "Hello World",
}
request, err := tool.ConvertActionToCallToolRequest(action)
assert.NoError(t, err)
assert.Equal(t, string(option.ACTION_Text), request.Params.Name)
assert.Equal(t, "Hello World", request.GetArguments()["text"])
}
// TestToolBackspace tests the ToolBackspace implementation
func TestToolBackspace(t *testing.T) {
tool := &ToolBackspace{}
@@ -1086,39 +1018,6 @@ func TestToolLaunchApp(t *testing.T) {
assert.Error(t, err)
}
// TestToolOpenApp tests the ToolOpenApp implementation
func TestToolOpenApp(t *testing.T) {
tool := &ToolOpenApp{}
// Test Name
assert.Equal(t, option.ACTION_OpenApp, tool.Name())
// Test Description
assert.NotEmpty(t, tool.Description())
// Test Options
options := tool.Options()
assert.NotNil(t, options)
// Test ConvertActionToCallToolRequest with valid params
action := option.MobileAction{
Method: option.ACTION_OpenApp,
Params: "com.example.app",
}
request, err := tool.ConvertActionToCallToolRequest(action)
assert.NoError(t, err)
assert.Equal(t, string(option.ACTION_OpenApp), request.Params.Name)
assert.Equal(t, "com.example.app", request.GetArguments()["packageName"])
// Test ConvertActionToCallToolRequest with invalid params
invalidAction := option.MobileAction{
Method: option.ACTION_OpenApp,
Params: 123, // should be string
}
_, err = tool.ConvertActionToCallToolRequest(invalidAction)
assert.Error(t, err)
}
// TestToolTerminateApp tests the ToolTerminateApp implementation
func TestToolTerminateApp(t *testing.T) {
tool := &ToolTerminateApp{}
@@ -1152,39 +1051,6 @@ func TestToolTerminateApp(t *testing.T) {
assert.Error(t, err)
}
// TestToolTerminateAppNew tests the ToolTerminateAppNew implementation
func TestToolTerminateAppNew(t *testing.T) {
tool := &ToolTerminateAppNew{}
// Test Name
assert.Equal(t, option.ACTION_TerminateApp, tool.Name())
// Test Description
assert.NotEmpty(t, tool.Description())
// Test Options
options := tool.Options()
assert.NotNil(t, options)
// Test ConvertActionToCallToolRequest with valid params
action := option.MobileAction{
Method: option.ACTION_TerminateApp,
Params: "com.example.app",
}
request, err := tool.ConvertActionToCallToolRequest(action)
assert.NoError(t, err)
assert.Equal(t, string(option.ACTION_TerminateApp), request.Params.Name)
assert.Equal(t, "com.example.app", request.GetArguments()["packageName"])
// Test ConvertActionToCallToolRequest with invalid params
invalidAction := option.MobileAction{
Method: option.ACTION_TerminateApp,
Params: []int{1, 2, 3}, // should be string
}
_, err = tool.ConvertActionToCallToolRequest(invalidAction)
assert.Error(t, err)
}
// TestToolColdLaunch tests the ToolColdLaunch implementation
func TestToolColdLaunch(t *testing.T) {
tool := &ToolColdLaunch{}

View File

@@ -395,131 +395,6 @@ func (t *ToolGetForegroundApp) ConvertActionToCallToolRequest(action option.Mobi
return BuildMCPCallToolRequest(t.Name(), map[string]any{}, action), nil
}
// ToolOpenApp implements the open_app tool call.
type ToolOpenApp struct {
// Return data fields - these define the structure of data returned by this tool
PackageName string `json:"packageName" desc:"Package name of the opened app"`
}
func (t *ToolOpenApp) Name() option.ActionName {
return option.ACTION_OpenApp
}
func (t *ToolOpenApp) Description() string {
return "Open an app on mobile device using its package name and wait for the app to load"
}
func (t *ToolOpenApp) Options() []mcp.ToolOption {
unifiedReq := &option.ActionOptions{}
return unifiedReq.GetMCPOptions(option.ACTION_OpenApp)
}
func (t *ToolOpenApp) Implement() server.ToolHandlerFunc {
return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
arguments := request.GetArguments()
driverExt, err := setupXTDriver(ctx, arguments)
if err != nil {
return nil, fmt.Errorf("setup driver failed: %w", err)
}
unifiedReq, err := parseActionOptions(arguments)
if err != nil {
return nil, err
}
if unifiedReq.PackageName == "" {
return nil, fmt.Errorf("package_name is required")
}
// Open app action logic
err = driverExt.AppLaunch(unifiedReq.PackageName)
if err != nil {
return NewMCPErrorResponse(fmt.Sprintf("Open app failed: %s", err.Error())), err
}
message := fmt.Sprintf("Successfully opened app: %s", unifiedReq.PackageName)
returnData := ToolOpenApp{PackageName: unifiedReq.PackageName}
return NewMCPSuccessResponse(message, &returnData), nil
}
}
func (t *ToolOpenApp) ConvertActionToCallToolRequest(action option.MobileAction) (mcp.CallToolRequest, error) {
if packageName, ok := action.Params.(string); ok {
arguments := map[string]any{
"packageName": packageName,
}
return BuildMCPCallToolRequest(t.Name(), arguments, action), nil
}
return mcp.CallToolRequest{}, fmt.Errorf("invalid open app params: %v", action.Params)
}
// ToolTerminateAppNew implements the terminal_app tool call.
type ToolTerminateAppNew struct {
// Return data fields - these define the structure of data returned by this tool
PackageName string `json:"packageName" desc:"Package name of the terminated app"`
WasRunning bool `json:"wasRunning" desc:"Whether the app was actually running before termination"`
}
func (t *ToolTerminateAppNew) Name() option.ActionName {
return option.ACTION_TerminateApp
}
func (t *ToolTerminateAppNew) Description() string {
return "Terminate a running app on mobile device using its package name"
}
func (t *ToolTerminateAppNew) Options() []mcp.ToolOption {
unifiedReq := &option.ActionOptions{}
return unifiedReq.GetMCPOptions(option.ACTION_TerminateApp)
}
func (t *ToolTerminateAppNew) Implement() server.ToolHandlerFunc {
return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
arguments := request.GetArguments()
driverExt, err := setupXTDriver(ctx, arguments)
if err != nil {
return nil, fmt.Errorf("setup driver failed: %w", err)
}
unifiedReq, err := parseActionOptions(arguments)
if err != nil {
return nil, err
}
if unifiedReq.PackageName == "" {
return nil, fmt.Errorf("package_name is required")
}
// Terminate app action logic
success, err := driverExt.AppTerminate(unifiedReq.PackageName)
if err != nil {
return NewMCPErrorResponse(fmt.Sprintf("Terminate app failed: %s", err.Error())), err
}
if !success {
log.Warn().Str("packageName", unifiedReq.PackageName).Msg("app was not running")
}
message := fmt.Sprintf("Successfully terminated app: %s", unifiedReq.PackageName)
returnData := ToolTerminateAppNew{
PackageName: unifiedReq.PackageName,
WasRunning: success,
}
return NewMCPSuccessResponse(message, &returnData), nil
}
}
func (t *ToolTerminateAppNew) ConvertActionToCallToolRequest(action option.MobileAction) (mcp.CallToolRequest, error) {
if packageName, ok := action.Params.(string); ok {
arguments := map[string]any{
"packageName": packageName,
}
return BuildMCPCallToolRequest(t.Name(), arguments, action), nil
}
return mcp.CallToolRequest{}, fmt.Errorf("invalid terminate app params: %v", action.Params)
}
// ToolColdLaunch implements the cold_launch tool call.
type ToolColdLaunch struct {
// Return data fields - these define the structure of data returned by this tool

View File

@@ -124,65 +124,6 @@ func (t *ToolSetIme) ConvertActionToCallToolRequest(action option.MobileAction)
return mcp.CallToolRequest{}, fmt.Errorf("invalid set ime params: %v", action.Params)
}
// ToolText implements the text tool call.
type ToolText struct {
// Return data fields - these define the structure of data returned by this tool
Text string `json:"text" desc:"Text that was input"`
}
func (t *ToolText) Name() option.ActionName {
return option.ACTION_Text
}
func (t *ToolText) Description() string {
return "Input text into the currently focused element or input field"
}
func (t *ToolText) Options() []mcp.ToolOption {
unifiedReq := &option.ActionOptions{}
return unifiedReq.GetMCPOptions(option.ACTION_Text)
}
func (t *ToolText) Implement() server.ToolHandlerFunc {
return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
arguments := request.GetArguments()
driverExt, err := setupXTDriver(ctx, arguments)
if err != nil {
return nil, fmt.Errorf("setup driver failed: %w", err)
}
unifiedReq, err := parseActionOptions(arguments)
if err != nil {
return nil, err
}
if unifiedReq.Text == "" {
return nil, fmt.Errorf("text is required")
}
opts := unifiedReq.Options()
// Text input action logic
err = driverExt.Input(unifiedReq.Text, opts...)
if err != nil {
return NewMCPErrorResponse(fmt.Sprintf("Text input failed: %s", err.Error())), err
}
message := fmt.Sprintf("Successfully input text: %s", unifiedReq.Text)
returnData := ToolText{Text: unifiedReq.Text}
return NewMCPSuccessResponse(message, &returnData), nil
}
}
func (t *ToolText) ConvertActionToCallToolRequest(action option.MobileAction) (mcp.CallToolRequest, error) {
text := fmt.Sprintf("%v", action.Params)
arguments := map[string]any{
"text": text,
}
return BuildMCPCallToolRequest(t.Name(), arguments, action), nil
}
// ToolBackspace implements the backspace tool call.
type ToolBackspace struct {
// Return data fields - these define the structure of data returned by this tool

View File

@@ -84,79 +84,6 @@ func (t *ToolTapXY) ConvertActionToCallToolRequest(action option.MobileAction) (
return mcp.CallToolRequest{}, fmt.Errorf("invalid tap params: %v", action.Params)
}
// ToolTap implements the tap tool call.
type ToolTap struct {
// Return data fields - these define the structure of data returned by this tool
X float64 `json:"x" desc:"X coordinate where tap was performed"`
Y float64 `json:"y" desc:"Y coordinate where tap was performed"`
}
func (t *ToolTap) Name() option.ActionName {
return option.ACTION_Tap
}
func (t *ToolTap) Description() string {
return "Tap on the screen at given relative coordinates (0.0-1.0 range)"
}
func (t *ToolTap) Options() []mcp.ToolOption {
unifiedReq := &option.ActionOptions{}
return unifiedReq.GetMCPOptions(option.ACTION_Tap)
}
func (t *ToolTap) Implement() server.ToolHandlerFunc {
return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
arguments := request.GetArguments()
driverExt, err := setupXTDriver(ctx, arguments)
if err != nil {
return nil, fmt.Errorf("setup driver failed: %w", err)
}
unifiedReq, err := parseActionOptions(arguments)
if err != nil {
return nil, err
}
// Build all options from request arguments
opts := unifiedReq.Options()
// Validate required parameters
if unifiedReq.X == 0 || unifiedReq.Y == 0 {
return nil, fmt.Errorf("x and y coordinates are required")
}
// Tap action logic
err = driverExt.TapXY(unifiedReq.X, unifiedReq.Y, opts...)
if err != nil {
return NewMCPErrorResponse(fmt.Sprintf("Tap failed: %s", err.Error())), err
}
message := fmt.Sprintf("Successfully tapped at coordinates (%.2f, %.2f)", unifiedReq.X, unifiedReq.Y)
returnData := ToolTap{
X: unifiedReq.X,
Y: unifiedReq.Y,
}
return NewMCPSuccessResponse(message, &returnData), nil
}
}
func (t *ToolTap) ConvertActionToCallToolRequest(action option.MobileAction) (mcp.CallToolRequest, error) {
if params, err := builtin.ConvertToFloat64Slice(action.Params); err == nil && len(params) == 2 {
x, y := params[0], params[1]
arguments := map[string]any{
"x": x,
"y": y,
}
// Add duration if available from action options
if duration := action.ActionOptions.Duration; duration > 0 {
arguments["duration"] = duration
}
return BuildMCPCallToolRequest(t.Name(), arguments, action), nil
}
return mcp.CallToolRequest{}, fmt.Errorf("invalid tap params: %v", action.Params)
}
// ToolTapAbsXY implements the tap_abs_xy tool call.
type ToolTapAbsXY struct {
// Return data fields - these define the structure of data returned by this tool

View File

@@ -43,9 +43,7 @@ const (
ACTION_AppClear ActionName = "app_clear"
ACTION_AppStart ActionName = "app_start"
ACTION_AppLaunch ActionName = "app_launch" // 启动 app 并堵塞等待 app 首屏加载完成
ACTION_OpenApp ActionName = "open_app" // 启动 app 并堵塞等待 app 首屏加载完成
ACTION_AppTerminate ActionName = "app_terminate"
ACTION_TerminateApp ActionName = "terminal_app"
ACTION_ColdLaunch ActionName = "cold_launch"
ACTION_AppStop ActionName = "app_stop"
ACTION_ScreenShot ActionName = "screenshot"
@@ -61,7 +59,6 @@ const (
// UI handling
ACTION_Home ActionName = "home"
ACTION_Tap ActionName = "tap" // generic tap action
ACTION_TapXY ActionName = "tap_xy"
ACTION_TapAbsXY ActionName = "tap_abs_xy"
ACTION_TapByOCR ActionName = "tap_ocr"
@@ -73,7 +70,6 @@ const (
ACTION_SwipeCoordinate ActionName = "swipe_coordinate" // swipe by coordinates (fromX, fromY, toX, toY)
ACTION_Drag ActionName = "drag"
ACTION_Input ActionName = "input"
ACTION_Text ActionName = "text"
ACTION_PressButton ActionName = "press_button"
ACTION_Back ActionName = "back"
ACTION_KeyCode ActionName = "keycode"
@@ -605,7 +601,6 @@ func WithOutputSchema(schema interface{}) ActionOption {
func (o *ActionOptions) GetMCPOptions(actionType ActionName) []mcp.ToolOption {
// Define field mappings for different action types
fieldMappings := map[ActionName][]string{
ACTION_Tap: {"platform", "serial", "x", "y", "duration"},
ACTION_TapXY: {"platform", "serial", "x", "y", "duration"},
ACTION_TapAbsXY: {"platform", "serial", "x", "y", "duration"},
ACTION_TapByOCR: {"platform", "serial", "text", "ignoreNotFoundError", "maxRetryTimes", "index", "regex", "tapRandomRect"},
@@ -616,12 +611,9 @@ func (o *ActionOptions) GetMCPOptions(actionType ActionName) []mcp.ToolOption {
ACTION_Swipe: {"platform", "serial", "direction", "fromX", "fromY", "toX", "toY", "duration", "pressDuration"},
ACTION_Drag: {"platform", "serial", "fromX", "fromY", "toX", "toY", "duration", "pressDuration"},
ACTION_Input: {"platform", "serial", "text", "frequency"},
ACTION_Text: {"platform", "serial", "text", "frequency"},
ACTION_Backspace: {"platform", "serial", "count"},
ACTION_AppLaunch: {"platform", "serial", "packageName"},
ACTION_OpenApp: {"platform", "serial", "packageName"},
ACTION_AppTerminate: {"platform", "serial", "packageName"},
ACTION_TerminateApp: {"platform", "serial", "packageName"},
ACTION_ColdLaunch: {"platform", "serial", "packageName"},
ACTION_AppInstall: {"platform", "serial", "appUrl", "packageName"},
ACTION_AppUninstall: {"platform", "serial", "packageName"},

View File

@@ -58,6 +58,7 @@ const (
DOUBAO_SEED_1_6_250615 LLMServiceType = "doubao-seed-1.6-250615"
OPENAI_GPT_4O LLMServiceType = "openai/gpt-4o"
DEEPSEEK_R1_250528 LLMServiceType = "deepseek-r1-250528"
WINGS_SERVICE LLMServiceType = "wings-service"
)
func WithLLMService(modelType LLMServiceType) AIServiceOption {

View File

@@ -5,6 +5,7 @@ import "github.com/httprunner/httprunner/v5/pkg/gadb"
type AndroidDeviceOptions struct {
SerialNumber string `json:"serial,omitempty" yaml:"serial,omitempty"`
LogOn bool `json:"log_on,omitempty" yaml:"log_on,omitempty"`
IgnorePopup bool `json:"ignore_popup,omitempty" yaml:"ignore_popup,omitempty"` // keep for compatibility
// adb
AdbServerHost string `json:"adb_server_host,omitempty" yaml:"adb_server_host,omitempty"`

View File

@@ -9,10 +9,11 @@ func NewBrowserDeviceOptions(opts ...BrowserDeviceOption) *BrowserDeviceOptions
}
type BrowserDeviceOptions struct {
BrowserID string `json:"browser_id,omitempty" yaml:"browser_id,omitempty"`
LogOn bool `json:"log_on,omitempty" yaml:"log_on,omitempty"`
Width int `json:"width,omitempty" yaml:"width,omitempty"`
Height int `json:"height,omitempty" yaml:"height,omitempty"`
BrowserID string `json:"browser_id,omitempty" yaml:"browser_id,omitempty"`
LogOn bool `json:"log_on,omitempty" yaml:"log_on,omitempty"`
IgnorePopup bool `json:"ignore_popup,omitempty" yaml:"ignore_popup,omitempty"` // keep for compatibility
Width int `json:"width,omitempty" yaml:"width,omitempty"`
Height int `json:"height,omitempty" yaml:"height,omitempty"`
}
func (dev *BrowserDeviceOptions) Options() (deviceOptions []BrowserDeviceOption) {

View File

@@ -8,8 +8,9 @@ const (
)
type HarmonyDeviceOptions struct {
ConnectKey string `json:"connect_key,omitempty" yaml:"connect_key,omitempty"`
LogOn bool `json:"log_on,omitempty" yaml:"log_on,omitempty"`
ConnectKey string `json:"connect_key,omitempty" yaml:"connect_key,omitempty"`
LogOn bool `json:"log_on,omitempty" yaml:"log_on,omitempty"`
IgnorePopup bool `json:"ignore_popup,omitempty" yaml:"ignore_popup,omitempty"` // keep for compatibility
}
func (dev *HarmonyDeviceOptions) Options() (deviceOptions []HarmonyDeviceOption) {

View File

@@ -6,6 +6,7 @@ type IOSDeviceOptions struct {
WDAPort int `json:"port,omitempty" yaml:"port,omitempty"` // WDA remote port
WDAMjpegPort int `json:"mjpeg_port,omitempty" yaml:"mjpeg_port,omitempty"` // WDA remote MJPEG port
LogOn bool `json:"log_on,omitempty" yaml:"log_on,omitempty"`
IgnorePopup bool `json:"ignore_popup,omitempty" yaml:"ignore_popup,omitempty"` // keep for compatibility
// switch to iOS springboard before init WDA session
ResetHomeOnStartup bool `json:"reset_home_on_startup,omitempty" yaml:"reset_home_on_startup,omitempty"`

View File

@@ -29,31 +29,35 @@ func NewXTDriver(driver IDriver, opts ...option.AIServiceOption) (*XTDriver, err
// Handle LLM service initialization
if services.LLMConfig != nil {
// Use advanced LLM configuration if provided
// Use advanced LLM service configuration if provided
driverExt.LLMService, err = ai.NewLLMServiceWithOptionConfig(services.LLMConfig)
if err != nil {
log.Warn().Err(err).Msg("init llm service with config failed, Wings service will be used")
log.Warn().Err(err).Msg("init llm service with config failed")
} else {
log.Info().Msg("LLM service initialized with advanced config")
}
} else if services.LLMService != "" {
// Fallback to simple LLM service if no config provided
// Use simple LLM service configuration if provided
driverExt.LLMService, err = ai.NewLLMService(services.LLMService)
if err != nil {
log.Warn().Err(err).Msg("init llm service failed, Wings service will be used")
log.Warn().Err(err).Msg("init llm service failed")
} else {
log.Info().Msg("LLM service initialized")
log.Info().Msg("LLM service initialized with simple config")
}
} else {
driverExt.LLMService = ai.NewWingsService()
log.Info().Msg("Wings service initialized")
// Use Wings service as fallback
driverExt.LLMService, err = ai.NewWingsService()
if err != nil {
log.Warn().Err(err).Msg("init Wings service failed")
} else {
log.Info().Msg("Wings service initialized")
}
}
// Register uixt MCP tools to LLM service if it exists
if driverExt.LLMService != nil {
// Register uixt MCP tools to LLM service if it exists
mcpTools := driverExt.client.Server.ListTools()
einoTools := ai.ConvertMCPToolsToEinoToolInfos(mcpTools, "uixt")
if err = driverExt.LLMService.RegisterTools(einoTools); err != nil {
log.Warn().Err(err).Msg("failed to register uixt tools to LLM service")
}
@@ -66,7 +70,7 @@ func NewXTDriver(driver IDriver, opts ...option.AIServiceOption) (*XTDriver, err
type XTDriver struct {
IDriver
CVService ai.ICVService // OCR/CV
LLMService ai.ILLMService // LLM (fallback service)
LLMService ai.ILLMService // LLM
services *option.AIServiceOptions // AI services options
client *MCPClient4XTDriver // MCP Client for built-in uixt server