mirror of
https://github.com/httprunner/httprunner.git
synced 2026-06-26 01:51:29 +08:00
Merge branch 'master' into session_refactor
This commit is contained in:
@@ -1 +1 @@
|
||||
v5.0.0-250717
|
||||
v5.0.0-250721
|
||||
|
||||
18
runner.go
18
runner.go
@@ -1077,6 +1077,24 @@ func (r *SessionRunner) GetTransactions() map[string]map[TransactionType]time.Ti
|
||||
return r.transactions
|
||||
}
|
||||
|
||||
// keep for compatibility
|
||||
func (r *SessionRunner) ignorePopup(osType string) bool {
|
||||
config := r.caseRunner.TestCase.Config.Get()
|
||||
if osType == string(StepTypeAndroid) && len(config.Android) > 0 {
|
||||
return config.Android[0].IgnorePopup
|
||||
}
|
||||
if osType == string(StepTypeIOS) && len(config.IOS) > 0 {
|
||||
return config.IOS[0].IgnorePopup
|
||||
}
|
||||
if osType == string(StepTypeHarmony) && len(config.Harmony) > 0 {
|
||||
return config.Harmony[0].IgnorePopup
|
||||
}
|
||||
if osType == string(stepTypeBrowser) && len(config.Browser) > 0 {
|
||||
return config.Browser[0].IgnorePopup
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// saveJSONCase saves the original JSON case content to the results directory
|
||||
func saveJSONCase(casePath string) error {
|
||||
// Read the original JSON case content
|
||||
|
||||
@@ -798,10 +798,11 @@ func runStepMobileUI(s *SessionRunner, step IStep) (stepResult *StepResult, err
|
||||
config = s.caseRunner.Config.Get()
|
||||
}
|
||||
// automatic handling of pop-up windows on each step finished, default to disabled
|
||||
// priority: step ignore_popup > config auto_popup_handler > step auto_popup_handler
|
||||
// priority: config ignore_popup > step ignore_popup > config auto_popup_handler > step auto_popup_handler
|
||||
shouldHandlePopup := false
|
||||
|
||||
if stepIgnorePopup {
|
||||
if s.ignorePopup(mobileStep.OSType) {
|
||||
shouldHandlePopup = false
|
||||
} else if stepIgnorePopup {
|
||||
// step level config, keep for compatibility
|
||||
shouldHandlePopup = false
|
||||
} else if config != nil && config.AutoPopupHandler {
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
|
||||
"github.com/cloudwego/eino/schema"
|
||||
|
||||
"github.com/httprunner/httprunner/v5/uixt/option"
|
||||
)
|
||||
|
||||
@@ -24,43 +25,66 @@ func NewLLMService(modelType option.LLMServiceType) (ILLMService, error) {
|
||||
|
||||
// NewLLMServiceWithOptionConfig creates a new LLM service with different models for each component
|
||||
func NewLLMServiceWithOptionConfig(config *option.LLMServiceConfig) (ILLMService, error) {
|
||||
// Get model configs for each component
|
||||
plannerModelConfig, err := GetModelConfig(config.PlannerModel)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
combinedLLMService := &combinedLLMService{}
|
||||
|
||||
// Planner
|
||||
if config.PlannerModel == option.WINGS_SERVICE {
|
||||
planner, err := NewWingsService()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
combinedLLMService.planner = planner
|
||||
} else {
|
||||
plannerModelConfig, err := GetModelConfig(config.PlannerModel)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
planner, err := NewPlanner(context.Background(), plannerModelConfig)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
combinedLLMService.planner = planner
|
||||
}
|
||||
|
||||
asserterModelConfig, err := GetModelConfig(config.AsserterModel)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
// Asserter
|
||||
if config.AsserterModel == option.WINGS_SERVICE {
|
||||
asserter, err := NewWingsService()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
combinedLLMService.asserter = asserter
|
||||
} else {
|
||||
asserterModelConfig, err := GetModelConfig(config.AsserterModel)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
asserter, err := NewAsserter(context.Background(), asserterModelConfig)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
combinedLLMService.asserter = asserter
|
||||
}
|
||||
|
||||
querierModelConfig, err := GetModelConfig(config.QuerierModel)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
// Querier
|
||||
if config.QuerierModel == option.WINGS_SERVICE {
|
||||
querier, err := NewWingsService()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
combinedLLMService.querier = querier
|
||||
} else {
|
||||
querierModelConfig, err := GetModelConfig(config.QuerierModel)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
querier, err := NewQuerier(context.Background(), querierModelConfig)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
combinedLLMService.querier = querier
|
||||
}
|
||||
|
||||
// Create components with their respective model configs
|
||||
planner, err := NewPlanner(context.Background(), plannerModelConfig)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
asserter, err := NewAsserter(context.Background(), asserterModelConfig)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
querier, err := NewQuerier(context.Background(), querierModelConfig)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &combinedLLMService{
|
||||
planner: planner,
|
||||
asserter: asserter,
|
||||
querier: querier,
|
||||
}, nil
|
||||
return combinedLLMService, nil
|
||||
}
|
||||
|
||||
// combinedLLMService 实现了 ILLMService 接口,组合了规划、断言和查询功能
|
||||
|
||||
@@ -16,41 +16,40 @@ import (
|
||||
"github.com/pkg/errors"
|
||||
"github.com/rs/zerolog/log"
|
||||
|
||||
"github.com/httprunner/httprunner/v5/code"
|
||||
"github.com/httprunner/httprunner/v5/internal/builtin"
|
||||
)
|
||||
|
||||
// WingsService implements ILLMService interface using external Wings API
|
||||
type WingsService struct {
|
||||
apiURL string
|
||||
bizId string
|
||||
isExternal bool
|
||||
accessKey string
|
||||
secretKey string
|
||||
apiURL string
|
||||
bizId string
|
||||
accessKey string
|
||||
secretKey string
|
||||
}
|
||||
|
||||
// NewWingsService creates a new Wings service instance
|
||||
func NewWingsService() ILLMService {
|
||||
func NewWingsService() (ILLMService, error) {
|
||||
// Check for environment variables for external API access
|
||||
accessKey := ""
|
||||
secretKey := ""
|
||||
isExternal := false
|
||||
apiURL := "https://vedem-algorithm.bytedance.net/algorithm/StepActionDecision"
|
||||
apiURL := os.Getenv("VEDEM_WINGS_API_URL")
|
||||
accessKey := os.Getenv("VEDEM_WINGS_AK")
|
||||
secretKey := os.Getenv("VEDEM_WINGS_SK")
|
||||
bizID := os.Getenv("VEDEM_WINGS_BIZ_ID")
|
||||
|
||||
// If environment variables are set, use external API with authentication
|
||||
if ak, sk := os.Getenv("VEDEM_WINGS_AK"), os.Getenv("VEDEM_WINGS_SK"); ak != "" && sk != "" {
|
||||
accessKey = ak
|
||||
secretKey = sk
|
||||
isExternal = true
|
||||
apiURL = "https://vedem-algorithm.zijieapi.com/algorithm/StepActionDecision"
|
||||
// check required env
|
||||
if apiURL == "" {
|
||||
return nil, errors.Wrap(code.LLMEnvMissedError, "missed env VEDEM_WINGS_API_URL")
|
||||
}
|
||||
if bizID == "" {
|
||||
return nil, errors.Wrap(code.LLMEnvMissedError, "missed env VEDEM_WINGS_BIZ_ID")
|
||||
}
|
||||
|
||||
return &WingsService{
|
||||
apiURL: apiURL,
|
||||
bizId: "489fdae44de048e0922a32834ea668af",
|
||||
isExternal: isExternal,
|
||||
accessKey: accessKey,
|
||||
secretKey: secretKey,
|
||||
}
|
||||
apiURL: apiURL,
|
||||
bizId: bizID,
|
||||
accessKey: accessKey,
|
||||
secretKey: secretKey,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Plan implements the ILLMService.Plan method using Wings API
|
||||
@@ -346,8 +345,8 @@ func (w *WingsService) extractScreenshotFromMessage(message *schema.Message) (st
|
||||
}
|
||||
|
||||
// getDeviceInfoFromContext gets device info from context with fallback
|
||||
func (w *WingsService) getDeviceInfoFromContext(ctx context.Context, screenshot string) WingsDeviceInfo {
|
||||
// Fallback to default device info
|
||||
func (w *WingsService) getDeviceInfoFromContext(_ context.Context, screenshot string) WingsDeviceInfo {
|
||||
// use default device info
|
||||
return WingsDeviceInfo{
|
||||
DeviceID: "default-device",
|
||||
NowImage: screenshot,
|
||||
@@ -393,7 +392,7 @@ func (w *WingsService) callWingsAPI(ctx context.Context, request WingsActionRequ
|
||||
httpReq.Header.Set("Accept", "application/json")
|
||||
|
||||
// Add authentication headers if using external API
|
||||
if w.isExternal {
|
||||
if w.accessKey != "" && w.secretKey != "" {
|
||||
signToken := "UNSIGNED-PAYLOAD"
|
||||
token := builtin.Sign("auth-v2", w.accessKey, w.secretKey, []byte(signToken))
|
||||
|
||||
|
||||
@@ -16,29 +16,47 @@ import (
|
||||
)
|
||||
|
||||
func setupADBDriverExt(t *testing.T) *XTDriver {
|
||||
device, err := NewAndroidDevice()
|
||||
require.Nil(t, err)
|
||||
device.Options.UIA2 = false
|
||||
device.Options.LogOn = false
|
||||
driver, err := device.NewDriver()
|
||||
require.Nil(t, err)
|
||||
driverExt, err := NewXTDriver(driver,
|
||||
option.WithCVService(option.CVServiceTypeVEDEM),
|
||||
// option.WithLLMService(option.DOUBAO_1_5_UI_TARS_250328),
|
||||
)
|
||||
config := DriverCacheConfig{
|
||||
Platform: "android",
|
||||
Serial: "", // Let it auto-detect the device serial
|
||||
AIOptions: []option.AIServiceOption{
|
||||
option.WithCVService(option.CVServiceTypeVEDEM),
|
||||
option.WithLLMConfig(
|
||||
option.NewLLMServiceConfig(option.DOUBAO_1_5_UI_TARS_250328).
|
||||
WithPlannerModel(option.WINGS_SERVICE).
|
||||
WithAsserterModel(option.WINGS_SERVICE),
|
||||
),
|
||||
},
|
||||
}
|
||||
|
||||
driverExt, err := GetOrCreateXTDriver(config)
|
||||
require.Nil(t, err)
|
||||
return driverExt
|
||||
}
|
||||
|
||||
func setupUIA2DriverExt(t *testing.T) *XTDriver {
|
||||
device, err := NewAndroidDevice()
|
||||
require.Nil(t, err)
|
||||
device.Options.UIA2 = true // use uiautomator2 driver
|
||||
device.Options.LogOn = false
|
||||
driver, err := device.NewDriver()
|
||||
require.Nil(t, err)
|
||||
driverExt, err := NewXTDriver(driver,
|
||||
option.WithCVService(option.CVServiceTypeVEDEM))
|
||||
// Use cache mechanism with UIA2 enabled
|
||||
deviceOpts := option.NewDeviceOptions(
|
||||
option.WithPlatform("android"),
|
||||
option.WithDeviceUIA2(true),
|
||||
option.WithDeviceLogOn(false),
|
||||
)
|
||||
|
||||
config := DriverCacheConfig{
|
||||
Platform: "android",
|
||||
Serial: "", // Let it auto-detect the device serial
|
||||
DeviceOpts: deviceOpts,
|
||||
AIOptions: []option.AIServiceOption{
|
||||
option.WithCVService(option.CVServiceTypeVEDEM),
|
||||
option.WithLLMConfig(
|
||||
option.NewLLMServiceConfig(option.DOUBAO_1_5_UI_TARS_250328).
|
||||
WithPlannerModel(option.WINGS_SERVICE).
|
||||
WithAsserterModel(option.WINGS_SERVICE),
|
||||
),
|
||||
},
|
||||
}
|
||||
|
||||
driverExt, err := GetOrCreateXTDriver(config)
|
||||
require.Nil(t, err)
|
||||
return driverExt
|
||||
}
|
||||
|
||||
@@ -3,7 +3,6 @@ package uixt
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/cloudwego/eino/schema"
|
||||
@@ -16,7 +15,7 @@ import (
|
||||
"github.com/httprunner/httprunner/v5/uixt/types"
|
||||
)
|
||||
|
||||
// StartToGoal (original implementation - preserved)
|
||||
// StartToGoal runs AI actions until task is finished or time limit is reached
|
||||
func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...option.ActionOption) ([]*PlanningExecutionResult, error) {
|
||||
options := option.NewActionOptions(opts...)
|
||||
logger := log.Info().Str("prompt", prompt)
|
||||
@@ -195,7 +194,7 @@ func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...op
|
||||
}
|
||||
}
|
||||
|
||||
// AIAction with WingsService priority support
|
||||
// AIAction performs AI-driven action and returns detailed execution result
|
||||
func (dExt *XTDriver) AIAction(ctx context.Context, prompt string, opts ...option.ActionOption) (*AIExecutionResult, error) {
|
||||
log.Info().Str("prompt", prompt).Msg("performing AI action")
|
||||
|
||||
@@ -208,64 +207,26 @@ func (dExt *XTDriver) AIAction(ctx context.Context, prompt string, opts ...optio
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Step 2: Check if WingsService is available and prioritize it
|
||||
if dExt.LLMService != nil {
|
||||
log.Info().Msg("using Wings service for AI action")
|
||||
return dExt.executeAIAction(ctx, prompt, screenResult, dExt.LLMService, "wings", opts...)
|
||||
} else {
|
||||
return nil, errors.New("no LLM service is initialized")
|
||||
}
|
||||
}
|
||||
|
||||
// executeAIAction executes AIAction using any AI service (generic implementation)
|
||||
func (dExt *XTDriver) executeAIAction(ctx context.Context, prompt string, screenResult *ScreenResult, service ai.ILLMService, serviceType string, opts ...option.ActionOption) (*AIExecutionResult, error) {
|
||||
// Step 1: Plan next action and measure time
|
||||
// Step 2: Plan next action and measure time
|
||||
modelCallStartTime := time.Now()
|
||||
|
||||
var planningResult *ai.PlanningResult
|
||||
var err error
|
||||
|
||||
// For Wings service, call Plan directly
|
||||
planningOpts := &ai.PlanningOptions{
|
||||
UserInstruction: prompt,
|
||||
Message: &schema.Message{
|
||||
Role: schema.User,
|
||||
MultiContent: []schema.ChatMessagePart{
|
||||
{
|
||||
Type: schema.ChatMessagePartTypeImageURL,
|
||||
ImageURL: &schema.ChatMessageImageURL{
|
||||
URL: screenResult.Base64,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
Size: screenResult.Resolution,
|
||||
}
|
||||
|
||||
planningResult, err = service.Plan(ctx, planningOpts)
|
||||
if err != nil {
|
||||
modelCallElapsed := time.Since(modelCallStartTime).Milliseconds()
|
||||
return &AIExecutionResult{
|
||||
Type: "action",
|
||||
ModelCallElapsed: modelCallElapsed,
|
||||
ScreenshotElapsed: screenResult.Elapsed,
|
||||
ImagePath: screenResult.ImagePath,
|
||||
Resolution: &screenResult.Resolution,
|
||||
Error: err.Error(),
|
||||
}, errors.Wrap(err, fmt.Sprintf("%s service planning failed", serviceType))
|
||||
}
|
||||
planningResult, err := dExt.PlanNextAction(ctx, prompt, opts...)
|
||||
modelCallElapsed := time.Since(modelCallStartTime).Milliseconds()
|
||||
|
||||
aiExecutionResult := &AIExecutionResult{
|
||||
Type: "action",
|
||||
ModelCallElapsed: modelCallElapsed,
|
||||
ScreenshotElapsed: screenResult.Elapsed,
|
||||
ImagePath: screenResult.ImagePath,
|
||||
Resolution: &screenResult.Resolution,
|
||||
PlanningResult: planningResult,
|
||||
}
|
||||
if planningResult != nil {
|
||||
aiExecutionResult.PlanningResult = &planningResult.PlanningResult
|
||||
}
|
||||
if err != nil {
|
||||
aiExecutionResult.Error = err.Error()
|
||||
return aiExecutionResult, errors.Wrap(err, "get next action failed")
|
||||
}
|
||||
|
||||
// Step 2: Execute tool calls
|
||||
// Step 3: Execute tool calls
|
||||
for _, toolCall := range planningResult.ToolCalls {
|
||||
err = dExt.invokeToolCall(ctx, toolCall, opts...)
|
||||
if err != nil {
|
||||
@@ -280,9 +241,11 @@ func (dExt *XTDriver) executeAIAction(ctx context.Context, prompt string, screen
|
||||
return aiExecutionResult, nil
|
||||
}
|
||||
|
||||
// AIAssert with WingsService priority support
|
||||
// AIAssert performs AI-driven assertion and returns detailed execution result
|
||||
func (dExt *XTDriver) AIAssert(assertion string, opts ...option.ActionOption) (*AIExecutionResult, error) {
|
||||
log.Info().Str("assertion", assertion).Msg("performing AI assertion")
|
||||
if dExt.LLMService == nil {
|
||||
return nil, errors.New("LLM service is not initialized")
|
||||
}
|
||||
|
||||
// Step 1: Take screenshot and convert to base64
|
||||
screenResult, err := dExt.GetScreenResult(
|
||||
@@ -293,19 +256,6 @@ func (dExt *XTDriver) AIAssert(assertion string, opts ...option.ActionOption) (*
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if dExt.LLMService != nil {
|
||||
log.Info().Msg("using Wings service for AI assertion")
|
||||
return dExt.executeAIAssert(assertion, screenResult, dExt.LLMService, "wings", opts...)
|
||||
} else {
|
||||
return nil, errors.New("no LLM service is initialized")
|
||||
}
|
||||
}
|
||||
|
||||
// executeAIAssert executes AIAssert using any AI service (generic implementation)
|
||||
func (dExt *XTDriver) executeAIAssert(assertion string, screenResult *ScreenResult, service ai.ILLMService, serviceType string, opts ...option.ActionOption) (*AIExecutionResult, error) {
|
||||
// Step 1: Prepare context and options
|
||||
ctx := context.Background()
|
||||
|
||||
assertResult := &AIExecutionResult{
|
||||
Type: "assert",
|
||||
ScreenshotElapsed: screenResult.Elapsed,
|
||||
@@ -313,31 +263,32 @@ func (dExt *XTDriver) executeAIAssert(assertion string, screenResult *ScreenResu
|
||||
Resolution: &screenResult.Resolution,
|
||||
}
|
||||
|
||||
// Step 2: Call service and measure time
|
||||
// Step 2: Call model and measure time
|
||||
modelCallStartTime := time.Now()
|
||||
assertOpts := &ai.AssertOptions{
|
||||
Assertion: assertion,
|
||||
Screenshot: screenResult.Base64,
|
||||
Size: screenResult.Resolution,
|
||||
}
|
||||
|
||||
result, err := service.Assert(ctx, assertOpts)
|
||||
result, err := dExt.LLMService.Assert(context.Background(), assertOpts)
|
||||
assertResult.ModelCallElapsed = time.Since(modelCallStartTime).Milliseconds()
|
||||
assertResult.AssertionResult = result
|
||||
|
||||
if err != nil {
|
||||
assertResult.Error = err.Error()
|
||||
return assertResult, errors.Wrap(err, fmt.Sprintf("%s assertion failed", serviceType))
|
||||
return assertResult, errors.Wrap(err, "AI assertion failed")
|
||||
}
|
||||
|
||||
// For assertion failure, we should still return success but mark the assertion as failed
|
||||
// This ensures that the AIResult (including screenshot and thought) is properly saved and displayed
|
||||
if !result.Pass {
|
||||
assertResult.Error = result.Thought
|
||||
assertResult.Error = result.Thought // Store the failure reason for reporting
|
||||
}
|
||||
|
||||
return assertResult, nil
|
||||
}
|
||||
|
||||
// PlanNextAction (original implementation - preserved)
|
||||
// PlanNextAction performs planning and returns unified planning information
|
||||
func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ...option.ActionOption) (*PlanningExecutionResult, error) {
|
||||
if dExt.LLMService == nil {
|
||||
return nil, errors.New("LLM service is not initialized")
|
||||
@@ -412,7 +363,7 @@ func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ..
|
||||
return planningResult, nil
|
||||
}
|
||||
|
||||
// isTaskFinished (original implementation - preserved)
|
||||
// isTaskFinished checks if the task is completed based on the planning result
|
||||
func (dExt *XTDriver) isTaskFinished(planningResult *PlanningExecutionResult) bool {
|
||||
// Check if there are no tool calls (no actions to execute)
|
||||
if len(planningResult.ToolCalls) == 0 {
|
||||
@@ -431,7 +382,7 @@ func (dExt *XTDriver) isTaskFinished(planningResult *PlanningExecutionResult) bo
|
||||
return false
|
||||
}
|
||||
|
||||
// invokeToolCall (original implementation - preserved)
|
||||
// invokeToolCall invokes the tool call
|
||||
func (dExt *XTDriver) invokeToolCall(ctx context.Context, toolCall schema.ToolCall, opts ...option.ActionOption) error {
|
||||
// Parse arguments
|
||||
arguments := make(map[string]interface{})
|
||||
@@ -458,7 +409,7 @@ func (dExt *XTDriver) invokeToolCall(ctx context.Context, toolCall schema.ToolCa
|
||||
return nil
|
||||
}
|
||||
|
||||
// PlanningExecutionResult (original implementation - preserved)
|
||||
// PlanningExecutionResult represents a unified planning result that contains both planning information and execution results
|
||||
type PlanningExecutionResult struct {
|
||||
ai.PlanningResult // Inherit all fields from ai.PlanningResult (ToolCalls, Thought, Content, Error, ModelName)
|
||||
// Planning process information
|
||||
@@ -475,7 +426,7 @@ type PlanningExecutionResult struct {
|
||||
SubActions []*SubActionResult `json:"sub_actions,omitempty"` // sub-actions generated from this planning
|
||||
}
|
||||
|
||||
// AIExecutionResult (original implementation - preserved)
|
||||
// AIExecutionResult represents a unified result structure for all AI operations
|
||||
type AIExecutionResult struct {
|
||||
Type string `json:"type"` // operation type: "query", "action", "assert"
|
||||
ModelCallElapsed int64 `json:"model_call_elapsed"` // model call elapsed time in milliseconds
|
||||
@@ -492,7 +443,7 @@ type AIExecutionResult struct {
|
||||
Error string `json:"error,omitempty"` // error message if operation failed
|
||||
}
|
||||
|
||||
// SubActionResult (original implementation - preserved)
|
||||
// SubActionResult represents a sub-action within a start_to_goal action
|
||||
type SubActionResult struct {
|
||||
ActionName string `json:"action_name"` // name of the sub-action (e.g., "tap", "input")
|
||||
Arguments interface{} `json:"arguments,omitempty"` // arguments passed to the sub-action
|
||||
@@ -507,7 +458,7 @@ type SessionData struct {
|
||||
ScreenResults []*ScreenResult `json:"screen_results,omitempty"` // store sub-action specific screen_results
|
||||
}
|
||||
|
||||
// AIQuery (original implementation - preserved)
|
||||
// AIQuery performs AI-driven query and returns detailed execution result
|
||||
func (dExt *XTDriver) AIQuery(text string, opts ...option.ActionOption) (*AIExecutionResult, error) {
|
||||
if dExt.LLMService == nil {
|
||||
return nil, errors.New("LLM service is not initialized")
|
||||
|
||||
@@ -23,33 +23,33 @@ func TestDriverExt_TapByLLM(t *testing.T) {
|
||||
assert.Nil(t, err)
|
||||
}
|
||||
|
||||
//func TestDriverExt_StartToGoal(t *testing.T) {
|
||||
// driver := setupDriverExt(t)
|
||||
//
|
||||
// userInstruction := `连连看是一款经典的益智消除类小游戏,通常以图案或图标为主要元素。以下是连连看的基本规则说明:
|
||||
// 1. 游戏目标: 玩家需要在规定时间内,通过连接相同的图案或图标,将它们从游戏界面中消除。
|
||||
// 2. 连接规则:
|
||||
// - 两个相同的图案可以通过不超过三条直线连接。
|
||||
// - 连接线可以水平或垂直,但不能斜线,也不能跨过其他图案。
|
||||
// - 连接线的转折次数不能超过两次。
|
||||
// 3. 游戏界面:
|
||||
// - 游戏界面通常是一个矩形区域,内含多个图案或图标,排列成行和列。
|
||||
// - 图案或图标在未选中状态下背景为白色,选中状态下背景为绿色。
|
||||
// 4. 时间限制: 游戏通常设有时间限制,玩家需要在时间耗尽前完成所有图案的消除。
|
||||
// 5. 得分机制: 每成功连接并消除一对图案,玩家会获得相应的分数。完成游戏后,根据剩余时间和消除效率计算总分。
|
||||
// 6. 关卡设计: 游戏可能包含多个关卡,随着关卡的推进,图案的复杂度和数量会增加。
|
||||
//
|
||||
// 注意事项:
|
||||
// 1、当连接错误时,顶部的红心会减少一个,需及时调整策略,避免红心变为0个后游戏失败
|
||||
// 2、不要连续 2 次点击同一个图案
|
||||
// 3、不要犯重复的错误
|
||||
// `
|
||||
//
|
||||
// userInstruction += "\n\n请严格按照以上游戏规则,开始游戏;注意,请只做点击操作"
|
||||
//
|
||||
// //_, err := driver.StartToGoal(context.Background(), userInstruction)
|
||||
// //assert.Nil(t, err)
|
||||
//}
|
||||
func TestDriverExt_StartToGoal(t *testing.T) {
|
||||
driver := setupDriverExt(t)
|
||||
|
||||
userInstruction := `连连看是一款经典的益智消除类小游戏,通常以图案或图标为主要元素。以下是连连看的基本规则说明:
|
||||
1. 游戏目标: 玩家需要在规定时间内,通过连接相同的图案或图标,将它们从游戏界面中消除。
|
||||
2. 连接规则:
|
||||
- 两个相同的图案可以通过不超过三条直线连接。
|
||||
- 连接线可以水平或垂直,但不能斜线,也不能跨过其他图案。
|
||||
- 连接线的转折次数不能超过两次。
|
||||
3. 游戏界面:
|
||||
- 游戏界面通常是一个矩形区域,内含多个图案或图标,排列成行和列。
|
||||
- 图案或图标在未选中状态下背景为白色,选中状态下背景为绿色。
|
||||
4. 时间限制: 游戏通常设有时间限制,玩家需要在时间耗尽前完成所有图案的消除。
|
||||
5. 得分机制: 每成功连接并消除一对图案,玩家会获得相应的分数。完成游戏后,根据剩余时间和消除效率计算总分。
|
||||
6. 关卡设计: 游戏可能包含多个关卡,随着关卡的推进,图案的复杂度和数量会增加。
|
||||
|
||||
注意事项:
|
||||
1、当连接错误时,顶部的红心会减少一个,需及时调整策略,避免红心变为0个后游戏失败
|
||||
2、不要连续 2 次点击同一个图案
|
||||
3、不要犯重复的错误
|
||||
`
|
||||
|
||||
userInstruction += "\n\n请严格按照以上游戏规则,开始游戏;注意,请只做点击操作"
|
||||
|
||||
_, err := driver.StartToGoal(context.Background(), userInstruction)
|
||||
assert.Nil(t, err)
|
||||
}
|
||||
|
||||
func TestDriverExt_PlanNextAction(t *testing.T) {
|
||||
driver := setupDriverExt(t)
|
||||
@@ -314,7 +314,7 @@ func TestDriverExt_AIAction_CompareWithAIAction(t *testing.T) {
|
||||
if aiResult.PlanningResult != nil {
|
||||
t.Logf("AIAction model: %s", aiResult.PlanningResult.ModelName)
|
||||
|
||||
assert.NotEqual(t, "wings-api", aiResult.PlanningResult.ModelName, "AIAction should not use wings-api")
|
||||
assert.Equal(t, "wings-api", aiResult.PlanningResult.ModelName, "AIAction should use wings-api")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -337,32 +337,18 @@ func compressImageBufferWithOptions(raw *bytes.Buffer, enableResize bool, maxWid
|
||||
newHeight = originalHeight
|
||||
}
|
||||
|
||||
// Determine JPEG quality based on image size for optimal compression
|
||||
jpegQuality := 60 // Default quality for better compression
|
||||
if newWidth*newHeight > 500000 { // For very large images, use lower quality
|
||||
jpegQuality = 50
|
||||
} else if newWidth*newHeight < 100000 { // For small images, use higher quality
|
||||
jpegQuality = 70
|
||||
}
|
||||
|
||||
jpegQuality := 95
|
||||
var buf bytes.Buffer
|
||||
switch strings.ToLower(format) {
|
||||
case "jpeg", "jpg":
|
||||
// Use adaptive JPEG compression quality
|
||||
err = jpeg.Encode(&buf, resizedImg, &jpeg.Options{Quality: jpegQuality})
|
||||
case "png":
|
||||
// Convert PNG to JPEG for better compression
|
||||
err = jpeg.Encode(&buf, resizedImg, &jpeg.Options{Quality: jpegQuality})
|
||||
case "gif":
|
||||
// Keep GIF format but with reduced colors for better compression
|
||||
err = gif.Encode(&buf, resizedImg, &gif.Options{NumColors: 64})
|
||||
switch format {
|
||||
case "jpeg", "jpg", "png":
|
||||
// compress with compression rate of 95
|
||||
jpegOptions := &jpeg.Options{Quality: jpegQuality}
|
||||
err = jpeg.Encode(&buf, resizedImg, jpegOptions)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
default:
|
||||
// Default to JPEG for unknown formats
|
||||
err = jpeg.Encode(&buf, resizedImg, &jpeg.Options{Quality: jpegQuality})
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("unsupported image format: %s", format)
|
||||
}
|
||||
|
||||
compressedSize := buf.Len()
|
||||
|
||||
@@ -5,17 +5,28 @@ package uixt
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/httprunner/httprunner/v5/uixt/option"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/httprunner/httprunner/v5/uixt/option"
|
||||
)
|
||||
|
||||
func setupHDCDriverExt(t *testing.T) *XTDriver {
|
||||
device, err := NewHarmonyDevice()
|
||||
require.Nil(t, err)
|
||||
hdcDriver, err := NewHDCDriver(device)
|
||||
require.Nil(t, err)
|
||||
driverExt, err := NewXTDriver(hdcDriver, option.WithCVService(option.CVServiceTypeVEDEM))
|
||||
// Use cache mechanism for Harmony HDC driver
|
||||
config := DriverCacheConfig{
|
||||
Platform: "harmony",
|
||||
Serial: "", // Let it auto-detect the device serial
|
||||
AIOptions: []option.AIServiceOption{
|
||||
option.WithCVService(option.CVServiceTypeVEDEM),
|
||||
option.WithLLMConfig(
|
||||
option.NewLLMServiceConfig(option.DOUBAO_1_5_UI_TARS_250328).
|
||||
WithPlannerModel(option.WINGS_SERVICE).
|
||||
WithAsserterModel(option.WINGS_SERVICE),
|
||||
),
|
||||
},
|
||||
}
|
||||
|
||||
driverExt, err := GetOrCreateXTDriver(config)
|
||||
require.Nil(t, err)
|
||||
return driverExt
|
||||
}
|
||||
|
||||
@@ -16,14 +16,29 @@ import (
|
||||
)
|
||||
|
||||
func setupWDADriverExt(t *testing.T) *XTDriver {
|
||||
device, err := NewIOSDevice(
|
||||
option.WithWDAPort(8700),
|
||||
option.WithWDAMjpegPort(8800),
|
||||
option.WithWDALogOn(true))
|
||||
require.Nil(t, err)
|
||||
driver, err := device.NewDriver()
|
||||
require.Nil(t, err)
|
||||
driverExt, err := NewXTDriver(driver, option.WithCVService(option.CVServiceTypeVEDEM))
|
||||
// Use cache mechanism with unified DeviceOptions for iOS WDA driver
|
||||
deviceOpts := option.NewDeviceOptions(
|
||||
option.WithPlatform("ios"),
|
||||
option.WithDeviceWDAPort(8700),
|
||||
option.WithDeviceWDAMjpegPort(8800),
|
||||
option.WithDeviceLogOn(true),
|
||||
)
|
||||
|
||||
config := DriverCacheConfig{
|
||||
Platform: "ios",
|
||||
Serial: "", // Let it auto-detect the device serial
|
||||
DeviceOpts: deviceOpts,
|
||||
AIOptions: []option.AIServiceOption{
|
||||
option.WithCVService(option.CVServiceTypeVEDEM),
|
||||
option.WithLLMConfig(
|
||||
option.NewLLMServiceConfig(option.DOUBAO_1_5_UI_TARS_250328).
|
||||
WithPlannerModel(option.WINGS_SERVICE).
|
||||
WithAsserterModel(option.WINGS_SERVICE),
|
||||
),
|
||||
},
|
||||
}
|
||||
|
||||
driverExt, err := GetOrCreateXTDriver(config)
|
||||
require.Nil(t, err)
|
||||
return driverExt
|
||||
}
|
||||
|
||||
@@ -61,9 +61,25 @@ func (s *MCPServer4XTDriver) GetToolByAction(actionMethod option.ActionName) Act
|
||||
if s.actionToolMap == nil {
|
||||
return nil
|
||||
}
|
||||
actionMethod = getActionNameByAlias(actionMethod)
|
||||
return s.actionToolMap[actionMethod]
|
||||
}
|
||||
|
||||
func getActionNameByAlias(actionMethod option.ActionName) option.ActionName {
|
||||
switch strings.ToLower(string(actionMethod)) {
|
||||
case "terminal_app":
|
||||
return option.ACTION_AppTerminate
|
||||
case "open_app":
|
||||
return option.ACTION_AppLaunch
|
||||
case "text":
|
||||
return option.ACTION_Input
|
||||
case "tap":
|
||||
return option.ACTION_TapXY
|
||||
default:
|
||||
return actionMethod
|
||||
}
|
||||
}
|
||||
|
||||
// registerTools registers all MCP tools.
|
||||
func (s *MCPServer4XTDriver) registerTools() {
|
||||
// Device Tool
|
||||
@@ -71,7 +87,6 @@ func (s *MCPServer4XTDriver) registerTools() {
|
||||
s.registerTool(&ToolSelectDevice{}) // SelectDevice
|
||||
|
||||
// Touch Tools
|
||||
s.registerTool(&ToolTap{}) // tap
|
||||
s.registerTool(&ToolTapXY{}) // tap xy
|
||||
s.registerTool(&ToolTapAbsXY{}) // tap abs xy
|
||||
s.registerTool(&ToolTapByOCR{}) // tap by OCR
|
||||
@@ -89,7 +104,6 @@ func (s *MCPServer4XTDriver) registerTools() {
|
||||
|
||||
// Input Tools
|
||||
s.registerTool(&ToolInput{})
|
||||
s.registerTool(&ToolText{})
|
||||
s.registerTool(&ToolBackspace{})
|
||||
s.registerTool(&ToolSetIme{})
|
||||
|
||||
@@ -101,9 +115,7 @@ func (s *MCPServer4XTDriver) registerTools() {
|
||||
// App Tools
|
||||
s.registerTool(&ToolListPackages{}) // ListPackages
|
||||
s.registerTool(&ToolLaunchApp{}) // LaunchApp
|
||||
s.registerTool(&ToolOpenApp{}) // OpenApp
|
||||
s.registerTool(&ToolTerminateApp{}) // TerminateApp
|
||||
s.registerTool(&ToolTerminateAppNew{}) // TerminateApp (new)
|
||||
s.registerTool(&ToolColdLaunch{}) // ColdLaunch
|
||||
s.registerTool(&ToolAppInstall{}) // AppInstall
|
||||
s.registerTool(&ToolAppUninstall{}) // AppUninstall
|
||||
|
||||
@@ -79,7 +79,6 @@ func TestToolInterfaces(t *testing.T) {
|
||||
tools := []ActionTool{
|
||||
&ToolListAvailableDevices{},
|
||||
&ToolSelectDevice{},
|
||||
&ToolTap{},
|
||||
&ToolTapXY{},
|
||||
&ToolTapAbsXY{},
|
||||
&ToolTapByOCR{},
|
||||
@@ -93,7 +92,6 @@ func TestToolInterfaces(t *testing.T) {
|
||||
&ToolSwipeToTapTexts{},
|
||||
&ToolDrag{},
|
||||
&ToolInput{},
|
||||
&ToolText{},
|
||||
&ToolBackspace{},
|
||||
&ToolScreenShot{},
|
||||
&ToolGetScreenSize{},
|
||||
@@ -102,9 +100,7 @@ func TestToolInterfaces(t *testing.T) {
|
||||
&ToolBack{},
|
||||
&ToolListPackages{},
|
||||
&ToolLaunchApp{},
|
||||
&ToolOpenApp{},
|
||||
&ToolTerminateApp{},
|
||||
&ToolTerminateAppNew{},
|
||||
&ToolColdLaunch{},
|
||||
&ToolAppInstall{},
|
||||
&ToolAppUninstall{},
|
||||
@@ -246,45 +242,6 @@ func TestToolSelectDevice(t *testing.T) {
|
||||
assert.Equal(t, string(option.ACTION_SelectDevice), request.Params.Name)
|
||||
}
|
||||
|
||||
// TestToolTap tests the ToolTap implementation
|
||||
func TestToolTap(t *testing.T) {
|
||||
tool := &ToolTap{}
|
||||
|
||||
// Test Name
|
||||
assert.Equal(t, option.ACTION_Tap, tool.Name())
|
||||
|
||||
// Test Description
|
||||
assert.NotEmpty(t, tool.Description())
|
||||
|
||||
// Test Options
|
||||
options := tool.Options()
|
||||
assert.NotNil(t, options)
|
||||
|
||||
// Test ConvertActionToCallToolRequest with valid params
|
||||
action := option.MobileAction{
|
||||
Method: option.ACTION_Tap,
|
||||
Params: []float64{0.5, 0.6},
|
||||
ActionOptions: option.ActionOptions{
|
||||
Duration: 1.5,
|
||||
},
|
||||
}
|
||||
request, err := tool.ConvertActionToCallToolRequest(action)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, string(option.ACTION_Tap), request.Params.Name)
|
||||
args := request.GetArguments()
|
||||
assert.Equal(t, 0.5, args["x"])
|
||||
assert.Equal(t, 0.6, args["y"])
|
||||
assert.Equal(t, 1.5, args["duration"])
|
||||
|
||||
// Test ConvertActionToCallToolRequest with invalid params
|
||||
invalidAction := option.MobileAction{
|
||||
Method: option.ACTION_Tap,
|
||||
Params: "invalid",
|
||||
}
|
||||
_, err = tool.ConvertActionToCallToolRequest(invalidAction)
|
||||
assert.Error(t, err)
|
||||
}
|
||||
|
||||
// TestToolTapXY tests the ToolTapXY implementation
|
||||
func TestToolTapXY(t *testing.T) {
|
||||
tool := &ToolTapXY{}
|
||||
@@ -827,31 +784,6 @@ func TestToolInput(t *testing.T) {
|
||||
assert.Equal(t, "Hello World", request.GetArguments()["text"])
|
||||
}
|
||||
|
||||
// TestToolText tests the ToolText implementation
|
||||
func TestToolText(t *testing.T) {
|
||||
tool := &ToolText{}
|
||||
|
||||
// Test Name
|
||||
assert.Equal(t, option.ACTION_Text, tool.Name())
|
||||
|
||||
// Test Description
|
||||
assert.NotEmpty(t, tool.Description())
|
||||
|
||||
// Test Options
|
||||
options := tool.Options()
|
||||
assert.NotNil(t, options)
|
||||
|
||||
// Test ConvertActionToCallToolRequest with valid params
|
||||
action := option.MobileAction{
|
||||
Method: option.ACTION_Text,
|
||||
Params: "Hello World",
|
||||
}
|
||||
request, err := tool.ConvertActionToCallToolRequest(action)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, string(option.ACTION_Text), request.Params.Name)
|
||||
assert.Equal(t, "Hello World", request.GetArguments()["text"])
|
||||
}
|
||||
|
||||
// TestToolBackspace tests the ToolBackspace implementation
|
||||
func TestToolBackspace(t *testing.T) {
|
||||
tool := &ToolBackspace{}
|
||||
@@ -1086,39 +1018,6 @@ func TestToolLaunchApp(t *testing.T) {
|
||||
assert.Error(t, err)
|
||||
}
|
||||
|
||||
// TestToolOpenApp tests the ToolOpenApp implementation
|
||||
func TestToolOpenApp(t *testing.T) {
|
||||
tool := &ToolOpenApp{}
|
||||
|
||||
// Test Name
|
||||
assert.Equal(t, option.ACTION_OpenApp, tool.Name())
|
||||
|
||||
// Test Description
|
||||
assert.NotEmpty(t, tool.Description())
|
||||
|
||||
// Test Options
|
||||
options := tool.Options()
|
||||
assert.NotNil(t, options)
|
||||
|
||||
// Test ConvertActionToCallToolRequest with valid params
|
||||
action := option.MobileAction{
|
||||
Method: option.ACTION_OpenApp,
|
||||
Params: "com.example.app",
|
||||
}
|
||||
request, err := tool.ConvertActionToCallToolRequest(action)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, string(option.ACTION_OpenApp), request.Params.Name)
|
||||
assert.Equal(t, "com.example.app", request.GetArguments()["packageName"])
|
||||
|
||||
// Test ConvertActionToCallToolRequest with invalid params
|
||||
invalidAction := option.MobileAction{
|
||||
Method: option.ACTION_OpenApp,
|
||||
Params: 123, // should be string
|
||||
}
|
||||
_, err = tool.ConvertActionToCallToolRequest(invalidAction)
|
||||
assert.Error(t, err)
|
||||
}
|
||||
|
||||
// TestToolTerminateApp tests the ToolTerminateApp implementation
|
||||
func TestToolTerminateApp(t *testing.T) {
|
||||
tool := &ToolTerminateApp{}
|
||||
@@ -1152,39 +1051,6 @@ func TestToolTerminateApp(t *testing.T) {
|
||||
assert.Error(t, err)
|
||||
}
|
||||
|
||||
// TestToolTerminateAppNew tests the ToolTerminateAppNew implementation
|
||||
func TestToolTerminateAppNew(t *testing.T) {
|
||||
tool := &ToolTerminateAppNew{}
|
||||
|
||||
// Test Name
|
||||
assert.Equal(t, option.ACTION_TerminateApp, tool.Name())
|
||||
|
||||
// Test Description
|
||||
assert.NotEmpty(t, tool.Description())
|
||||
|
||||
// Test Options
|
||||
options := tool.Options()
|
||||
assert.NotNil(t, options)
|
||||
|
||||
// Test ConvertActionToCallToolRequest with valid params
|
||||
action := option.MobileAction{
|
||||
Method: option.ACTION_TerminateApp,
|
||||
Params: "com.example.app",
|
||||
}
|
||||
request, err := tool.ConvertActionToCallToolRequest(action)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, string(option.ACTION_TerminateApp), request.Params.Name)
|
||||
assert.Equal(t, "com.example.app", request.GetArguments()["packageName"])
|
||||
|
||||
// Test ConvertActionToCallToolRequest with invalid params
|
||||
invalidAction := option.MobileAction{
|
||||
Method: option.ACTION_TerminateApp,
|
||||
Params: []int{1, 2, 3}, // should be string
|
||||
}
|
||||
_, err = tool.ConvertActionToCallToolRequest(invalidAction)
|
||||
assert.Error(t, err)
|
||||
}
|
||||
|
||||
// TestToolColdLaunch tests the ToolColdLaunch implementation
|
||||
func TestToolColdLaunch(t *testing.T) {
|
||||
tool := &ToolColdLaunch{}
|
||||
|
||||
@@ -395,131 +395,6 @@ func (t *ToolGetForegroundApp) ConvertActionToCallToolRequest(action option.Mobi
|
||||
return BuildMCPCallToolRequest(t.Name(), map[string]any{}, action), nil
|
||||
}
|
||||
|
||||
// ToolOpenApp implements the open_app tool call.
|
||||
type ToolOpenApp struct {
|
||||
// Return data fields - these define the structure of data returned by this tool
|
||||
PackageName string `json:"packageName" desc:"Package name of the opened app"`
|
||||
}
|
||||
|
||||
func (t *ToolOpenApp) Name() option.ActionName {
|
||||
return option.ACTION_OpenApp
|
||||
}
|
||||
|
||||
func (t *ToolOpenApp) Description() string {
|
||||
return "Open an app on mobile device using its package name and wait for the app to load"
|
||||
}
|
||||
|
||||
func (t *ToolOpenApp) Options() []mcp.ToolOption {
|
||||
unifiedReq := &option.ActionOptions{}
|
||||
return unifiedReq.GetMCPOptions(option.ACTION_OpenApp)
|
||||
}
|
||||
|
||||
func (t *ToolOpenApp) Implement() server.ToolHandlerFunc {
|
||||
return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
|
||||
arguments := request.GetArguments()
|
||||
driverExt, err := setupXTDriver(ctx, arguments)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("setup driver failed: %w", err)
|
||||
}
|
||||
|
||||
unifiedReq, err := parseActionOptions(arguments)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if unifiedReq.PackageName == "" {
|
||||
return nil, fmt.Errorf("package_name is required")
|
||||
}
|
||||
|
||||
// Open app action logic
|
||||
err = driverExt.AppLaunch(unifiedReq.PackageName)
|
||||
if err != nil {
|
||||
return NewMCPErrorResponse(fmt.Sprintf("Open app failed: %s", err.Error())), err
|
||||
}
|
||||
|
||||
message := fmt.Sprintf("Successfully opened app: %s", unifiedReq.PackageName)
|
||||
returnData := ToolOpenApp{PackageName: unifiedReq.PackageName}
|
||||
|
||||
return NewMCPSuccessResponse(message, &returnData), nil
|
||||
}
|
||||
}
|
||||
|
||||
func (t *ToolOpenApp) ConvertActionToCallToolRequest(action option.MobileAction) (mcp.CallToolRequest, error) {
|
||||
if packageName, ok := action.Params.(string); ok {
|
||||
arguments := map[string]any{
|
||||
"packageName": packageName,
|
||||
}
|
||||
return BuildMCPCallToolRequest(t.Name(), arguments, action), nil
|
||||
}
|
||||
return mcp.CallToolRequest{}, fmt.Errorf("invalid open app params: %v", action.Params)
|
||||
}
|
||||
|
||||
// ToolTerminateAppNew implements the terminal_app tool call.
|
||||
type ToolTerminateAppNew struct {
|
||||
// Return data fields - these define the structure of data returned by this tool
|
||||
PackageName string `json:"packageName" desc:"Package name of the terminated app"`
|
||||
WasRunning bool `json:"wasRunning" desc:"Whether the app was actually running before termination"`
|
||||
}
|
||||
|
||||
func (t *ToolTerminateAppNew) Name() option.ActionName {
|
||||
return option.ACTION_TerminateApp
|
||||
}
|
||||
|
||||
func (t *ToolTerminateAppNew) Description() string {
|
||||
return "Terminate a running app on mobile device using its package name"
|
||||
}
|
||||
|
||||
func (t *ToolTerminateAppNew) Options() []mcp.ToolOption {
|
||||
unifiedReq := &option.ActionOptions{}
|
||||
return unifiedReq.GetMCPOptions(option.ACTION_TerminateApp)
|
||||
}
|
||||
|
||||
func (t *ToolTerminateAppNew) Implement() server.ToolHandlerFunc {
|
||||
return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
|
||||
arguments := request.GetArguments()
|
||||
driverExt, err := setupXTDriver(ctx, arguments)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("setup driver failed: %w", err)
|
||||
}
|
||||
|
||||
unifiedReq, err := parseActionOptions(arguments)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if unifiedReq.PackageName == "" {
|
||||
return nil, fmt.Errorf("package_name is required")
|
||||
}
|
||||
|
||||
// Terminate app action logic
|
||||
success, err := driverExt.AppTerminate(unifiedReq.PackageName)
|
||||
if err != nil {
|
||||
return NewMCPErrorResponse(fmt.Sprintf("Terminate app failed: %s", err.Error())), err
|
||||
}
|
||||
if !success {
|
||||
log.Warn().Str("packageName", unifiedReq.PackageName).Msg("app was not running")
|
||||
}
|
||||
|
||||
message := fmt.Sprintf("Successfully terminated app: %s", unifiedReq.PackageName)
|
||||
returnData := ToolTerminateAppNew{
|
||||
PackageName: unifiedReq.PackageName,
|
||||
WasRunning: success,
|
||||
}
|
||||
|
||||
return NewMCPSuccessResponse(message, &returnData), nil
|
||||
}
|
||||
}
|
||||
|
||||
func (t *ToolTerminateAppNew) ConvertActionToCallToolRequest(action option.MobileAction) (mcp.CallToolRequest, error) {
|
||||
if packageName, ok := action.Params.(string); ok {
|
||||
arguments := map[string]any{
|
||||
"packageName": packageName,
|
||||
}
|
||||
return BuildMCPCallToolRequest(t.Name(), arguments, action), nil
|
||||
}
|
||||
return mcp.CallToolRequest{}, fmt.Errorf("invalid terminate app params: %v", action.Params)
|
||||
}
|
||||
|
||||
// ToolColdLaunch implements the cold_launch tool call.
|
||||
type ToolColdLaunch struct {
|
||||
// Return data fields - these define the structure of data returned by this tool
|
||||
|
||||
@@ -124,65 +124,6 @@ func (t *ToolSetIme) ConvertActionToCallToolRequest(action option.MobileAction)
|
||||
return mcp.CallToolRequest{}, fmt.Errorf("invalid set ime params: %v", action.Params)
|
||||
}
|
||||
|
||||
// ToolText implements the text tool call.
|
||||
type ToolText struct {
|
||||
// Return data fields - these define the structure of data returned by this tool
|
||||
Text string `json:"text" desc:"Text that was input"`
|
||||
}
|
||||
|
||||
func (t *ToolText) Name() option.ActionName {
|
||||
return option.ACTION_Text
|
||||
}
|
||||
|
||||
func (t *ToolText) Description() string {
|
||||
return "Input text into the currently focused element or input field"
|
||||
}
|
||||
|
||||
func (t *ToolText) Options() []mcp.ToolOption {
|
||||
unifiedReq := &option.ActionOptions{}
|
||||
return unifiedReq.GetMCPOptions(option.ACTION_Text)
|
||||
}
|
||||
|
||||
func (t *ToolText) Implement() server.ToolHandlerFunc {
|
||||
return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
|
||||
arguments := request.GetArguments()
|
||||
driverExt, err := setupXTDriver(ctx, arguments)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("setup driver failed: %w", err)
|
||||
}
|
||||
|
||||
unifiedReq, err := parseActionOptions(arguments)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if unifiedReq.Text == "" {
|
||||
return nil, fmt.Errorf("text is required")
|
||||
}
|
||||
|
||||
opts := unifiedReq.Options()
|
||||
|
||||
// Text input action logic
|
||||
err = driverExt.Input(unifiedReq.Text, opts...)
|
||||
if err != nil {
|
||||
return NewMCPErrorResponse(fmt.Sprintf("Text input failed: %s", err.Error())), err
|
||||
}
|
||||
|
||||
message := fmt.Sprintf("Successfully input text: %s", unifiedReq.Text)
|
||||
returnData := ToolText{Text: unifiedReq.Text}
|
||||
|
||||
return NewMCPSuccessResponse(message, &returnData), nil
|
||||
}
|
||||
}
|
||||
|
||||
func (t *ToolText) ConvertActionToCallToolRequest(action option.MobileAction) (mcp.CallToolRequest, error) {
|
||||
text := fmt.Sprintf("%v", action.Params)
|
||||
arguments := map[string]any{
|
||||
"text": text,
|
||||
}
|
||||
return BuildMCPCallToolRequest(t.Name(), arguments, action), nil
|
||||
}
|
||||
|
||||
// ToolBackspace implements the backspace tool call.
|
||||
type ToolBackspace struct {
|
||||
// Return data fields - these define the structure of data returned by this tool
|
||||
|
||||
@@ -84,79 +84,6 @@ func (t *ToolTapXY) ConvertActionToCallToolRequest(action option.MobileAction) (
|
||||
return mcp.CallToolRequest{}, fmt.Errorf("invalid tap params: %v", action.Params)
|
||||
}
|
||||
|
||||
// ToolTap implements the tap tool call.
|
||||
type ToolTap struct {
|
||||
// Return data fields - these define the structure of data returned by this tool
|
||||
X float64 `json:"x" desc:"X coordinate where tap was performed"`
|
||||
Y float64 `json:"y" desc:"Y coordinate where tap was performed"`
|
||||
}
|
||||
|
||||
func (t *ToolTap) Name() option.ActionName {
|
||||
return option.ACTION_Tap
|
||||
}
|
||||
|
||||
func (t *ToolTap) Description() string {
|
||||
return "Tap on the screen at given relative coordinates (0.0-1.0 range)"
|
||||
}
|
||||
|
||||
func (t *ToolTap) Options() []mcp.ToolOption {
|
||||
unifiedReq := &option.ActionOptions{}
|
||||
return unifiedReq.GetMCPOptions(option.ACTION_Tap)
|
||||
}
|
||||
|
||||
func (t *ToolTap) Implement() server.ToolHandlerFunc {
|
||||
return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
|
||||
arguments := request.GetArguments()
|
||||
driverExt, err := setupXTDriver(ctx, arguments)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("setup driver failed: %w", err)
|
||||
}
|
||||
|
||||
unifiedReq, err := parseActionOptions(arguments)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Build all options from request arguments
|
||||
opts := unifiedReq.Options()
|
||||
|
||||
// Validate required parameters
|
||||
if unifiedReq.X == 0 || unifiedReq.Y == 0 {
|
||||
return nil, fmt.Errorf("x and y coordinates are required")
|
||||
}
|
||||
|
||||
// Tap action logic
|
||||
err = driverExt.TapXY(unifiedReq.X, unifiedReq.Y, opts...)
|
||||
if err != nil {
|
||||
return NewMCPErrorResponse(fmt.Sprintf("Tap failed: %s", err.Error())), err
|
||||
}
|
||||
|
||||
message := fmt.Sprintf("Successfully tapped at coordinates (%.2f, %.2f)", unifiedReq.X, unifiedReq.Y)
|
||||
returnData := ToolTap{
|
||||
X: unifiedReq.X,
|
||||
Y: unifiedReq.Y,
|
||||
}
|
||||
|
||||
return NewMCPSuccessResponse(message, &returnData), nil
|
||||
}
|
||||
}
|
||||
|
||||
func (t *ToolTap) ConvertActionToCallToolRequest(action option.MobileAction) (mcp.CallToolRequest, error) {
|
||||
if params, err := builtin.ConvertToFloat64Slice(action.Params); err == nil && len(params) == 2 {
|
||||
x, y := params[0], params[1]
|
||||
arguments := map[string]any{
|
||||
"x": x,
|
||||
"y": y,
|
||||
}
|
||||
// Add duration if available from action options
|
||||
if duration := action.ActionOptions.Duration; duration > 0 {
|
||||
arguments["duration"] = duration
|
||||
}
|
||||
return BuildMCPCallToolRequest(t.Name(), arguments, action), nil
|
||||
}
|
||||
return mcp.CallToolRequest{}, fmt.Errorf("invalid tap params: %v", action.Params)
|
||||
}
|
||||
|
||||
// ToolTapAbsXY implements the tap_abs_xy tool call.
|
||||
type ToolTapAbsXY struct {
|
||||
// Return data fields - these define the structure of data returned by this tool
|
||||
|
||||
@@ -43,9 +43,7 @@ const (
|
||||
ACTION_AppClear ActionName = "app_clear"
|
||||
ACTION_AppStart ActionName = "app_start"
|
||||
ACTION_AppLaunch ActionName = "app_launch" // 启动 app 并堵塞等待 app 首屏加载完成
|
||||
ACTION_OpenApp ActionName = "open_app" // 启动 app 并堵塞等待 app 首屏加载完成
|
||||
ACTION_AppTerminate ActionName = "app_terminate"
|
||||
ACTION_TerminateApp ActionName = "terminal_app"
|
||||
ACTION_ColdLaunch ActionName = "cold_launch"
|
||||
ACTION_AppStop ActionName = "app_stop"
|
||||
ACTION_ScreenShot ActionName = "screenshot"
|
||||
@@ -61,7 +59,6 @@ const (
|
||||
|
||||
// UI handling
|
||||
ACTION_Home ActionName = "home"
|
||||
ACTION_Tap ActionName = "tap" // generic tap action
|
||||
ACTION_TapXY ActionName = "tap_xy"
|
||||
ACTION_TapAbsXY ActionName = "tap_abs_xy"
|
||||
ACTION_TapByOCR ActionName = "tap_ocr"
|
||||
@@ -73,7 +70,6 @@ const (
|
||||
ACTION_SwipeCoordinate ActionName = "swipe_coordinate" // swipe by coordinates (fromX, fromY, toX, toY)
|
||||
ACTION_Drag ActionName = "drag"
|
||||
ACTION_Input ActionName = "input"
|
||||
ACTION_Text ActionName = "text"
|
||||
ACTION_PressButton ActionName = "press_button"
|
||||
ACTION_Back ActionName = "back"
|
||||
ACTION_KeyCode ActionName = "keycode"
|
||||
@@ -605,7 +601,6 @@ func WithOutputSchema(schema interface{}) ActionOption {
|
||||
func (o *ActionOptions) GetMCPOptions(actionType ActionName) []mcp.ToolOption {
|
||||
// Define field mappings for different action types
|
||||
fieldMappings := map[ActionName][]string{
|
||||
ACTION_Tap: {"platform", "serial", "x", "y", "duration"},
|
||||
ACTION_TapXY: {"platform", "serial", "x", "y", "duration"},
|
||||
ACTION_TapAbsXY: {"platform", "serial", "x", "y", "duration"},
|
||||
ACTION_TapByOCR: {"platform", "serial", "text", "ignoreNotFoundError", "maxRetryTimes", "index", "regex", "tapRandomRect"},
|
||||
@@ -616,12 +611,9 @@ func (o *ActionOptions) GetMCPOptions(actionType ActionName) []mcp.ToolOption {
|
||||
ACTION_Swipe: {"platform", "serial", "direction", "fromX", "fromY", "toX", "toY", "duration", "pressDuration"},
|
||||
ACTION_Drag: {"platform", "serial", "fromX", "fromY", "toX", "toY", "duration", "pressDuration"},
|
||||
ACTION_Input: {"platform", "serial", "text", "frequency"},
|
||||
ACTION_Text: {"platform", "serial", "text", "frequency"},
|
||||
ACTION_Backspace: {"platform", "serial", "count"},
|
||||
ACTION_AppLaunch: {"platform", "serial", "packageName"},
|
||||
ACTION_OpenApp: {"platform", "serial", "packageName"},
|
||||
ACTION_AppTerminate: {"platform", "serial", "packageName"},
|
||||
ACTION_TerminateApp: {"platform", "serial", "packageName"},
|
||||
ACTION_ColdLaunch: {"platform", "serial", "packageName"},
|
||||
ACTION_AppInstall: {"platform", "serial", "appUrl", "packageName"},
|
||||
ACTION_AppUninstall: {"platform", "serial", "packageName"},
|
||||
|
||||
@@ -58,6 +58,7 @@ const (
|
||||
DOUBAO_SEED_1_6_250615 LLMServiceType = "doubao-seed-1.6-250615"
|
||||
OPENAI_GPT_4O LLMServiceType = "openai/gpt-4o"
|
||||
DEEPSEEK_R1_250528 LLMServiceType = "deepseek-r1-250528"
|
||||
WINGS_SERVICE LLMServiceType = "wings-service"
|
||||
)
|
||||
|
||||
func WithLLMService(modelType LLMServiceType) AIServiceOption {
|
||||
|
||||
@@ -5,6 +5,7 @@ import "github.com/httprunner/httprunner/v5/pkg/gadb"
|
||||
type AndroidDeviceOptions struct {
|
||||
SerialNumber string `json:"serial,omitempty" yaml:"serial,omitempty"`
|
||||
LogOn bool `json:"log_on,omitempty" yaml:"log_on,omitempty"`
|
||||
IgnorePopup bool `json:"ignore_popup,omitempty" yaml:"ignore_popup,omitempty"` // keep for compatibility
|
||||
|
||||
// adb
|
||||
AdbServerHost string `json:"adb_server_host,omitempty" yaml:"adb_server_host,omitempty"`
|
||||
|
||||
@@ -9,10 +9,11 @@ func NewBrowserDeviceOptions(opts ...BrowserDeviceOption) *BrowserDeviceOptions
|
||||
}
|
||||
|
||||
type BrowserDeviceOptions struct {
|
||||
BrowserID string `json:"browser_id,omitempty" yaml:"browser_id,omitempty"`
|
||||
LogOn bool `json:"log_on,omitempty" yaml:"log_on,omitempty"`
|
||||
Width int `json:"width,omitempty" yaml:"width,omitempty"`
|
||||
Height int `json:"height,omitempty" yaml:"height,omitempty"`
|
||||
BrowserID string `json:"browser_id,omitempty" yaml:"browser_id,omitempty"`
|
||||
LogOn bool `json:"log_on,omitempty" yaml:"log_on,omitempty"`
|
||||
IgnorePopup bool `json:"ignore_popup,omitempty" yaml:"ignore_popup,omitempty"` // keep for compatibility
|
||||
Width int `json:"width,omitempty" yaml:"width,omitempty"`
|
||||
Height int `json:"height,omitempty" yaml:"height,omitempty"`
|
||||
}
|
||||
|
||||
func (dev *BrowserDeviceOptions) Options() (deviceOptions []BrowserDeviceOption) {
|
||||
|
||||
@@ -8,8 +8,9 @@ const (
|
||||
)
|
||||
|
||||
type HarmonyDeviceOptions struct {
|
||||
ConnectKey string `json:"connect_key,omitempty" yaml:"connect_key,omitempty"`
|
||||
LogOn bool `json:"log_on,omitempty" yaml:"log_on,omitempty"`
|
||||
ConnectKey string `json:"connect_key,omitempty" yaml:"connect_key,omitempty"`
|
||||
LogOn bool `json:"log_on,omitempty" yaml:"log_on,omitempty"`
|
||||
IgnorePopup bool `json:"ignore_popup,omitempty" yaml:"ignore_popup,omitempty"` // keep for compatibility
|
||||
}
|
||||
|
||||
func (dev *HarmonyDeviceOptions) Options() (deviceOptions []HarmonyDeviceOption) {
|
||||
|
||||
@@ -6,6 +6,7 @@ type IOSDeviceOptions struct {
|
||||
WDAPort int `json:"port,omitempty" yaml:"port,omitempty"` // WDA remote port
|
||||
WDAMjpegPort int `json:"mjpeg_port,omitempty" yaml:"mjpeg_port,omitempty"` // WDA remote MJPEG port
|
||||
LogOn bool `json:"log_on,omitempty" yaml:"log_on,omitempty"`
|
||||
IgnorePopup bool `json:"ignore_popup,omitempty" yaml:"ignore_popup,omitempty"` // keep for compatibility
|
||||
|
||||
// switch to iOS springboard before init WDA session
|
||||
ResetHomeOnStartup bool `json:"reset_home_on_startup,omitempty" yaml:"reset_home_on_startup,omitempty"`
|
||||
|
||||
24
uixt/sdk.go
24
uixt/sdk.go
@@ -29,31 +29,35 @@ func NewXTDriver(driver IDriver, opts ...option.AIServiceOption) (*XTDriver, err
|
||||
|
||||
// Handle LLM service initialization
|
||||
if services.LLMConfig != nil {
|
||||
// Use advanced LLM configuration if provided
|
||||
// Use advanced LLM service configuration if provided
|
||||
driverExt.LLMService, err = ai.NewLLMServiceWithOptionConfig(services.LLMConfig)
|
||||
if err != nil {
|
||||
log.Warn().Err(err).Msg("init llm service with config failed, Wings service will be used")
|
||||
log.Warn().Err(err).Msg("init llm service with config failed")
|
||||
} else {
|
||||
log.Info().Msg("LLM service initialized with advanced config")
|
||||
}
|
||||
} else if services.LLMService != "" {
|
||||
// Fallback to simple LLM service if no config provided
|
||||
// Use simple LLM service configuration if provided
|
||||
driverExt.LLMService, err = ai.NewLLMService(services.LLMService)
|
||||
if err != nil {
|
||||
log.Warn().Err(err).Msg("init llm service failed, Wings service will be used")
|
||||
log.Warn().Err(err).Msg("init llm service failed")
|
||||
} else {
|
||||
log.Info().Msg("LLM service initialized")
|
||||
log.Info().Msg("LLM service initialized with simple config")
|
||||
}
|
||||
} else {
|
||||
driverExt.LLMService = ai.NewWingsService()
|
||||
log.Info().Msg("Wings service initialized")
|
||||
// Use Wings service as fallback
|
||||
driverExt.LLMService, err = ai.NewWingsService()
|
||||
if err != nil {
|
||||
log.Warn().Err(err).Msg("init Wings service failed")
|
||||
} else {
|
||||
log.Info().Msg("Wings service initialized")
|
||||
}
|
||||
}
|
||||
|
||||
// Register uixt MCP tools to LLM service if it exists
|
||||
if driverExt.LLMService != nil {
|
||||
// Register uixt MCP tools to LLM service if it exists
|
||||
mcpTools := driverExt.client.Server.ListTools()
|
||||
einoTools := ai.ConvertMCPToolsToEinoToolInfos(mcpTools, "uixt")
|
||||
|
||||
if err = driverExt.LLMService.RegisterTools(einoTools); err != nil {
|
||||
log.Warn().Err(err).Msg("failed to register uixt tools to LLM service")
|
||||
}
|
||||
@@ -66,7 +70,7 @@ func NewXTDriver(driver IDriver, opts ...option.AIServiceOption) (*XTDriver, err
|
||||
type XTDriver struct {
|
||||
IDriver
|
||||
CVService ai.ICVService // OCR/CV
|
||||
LLMService ai.ILLMService // LLM (fallback service)
|
||||
LLMService ai.ILLMService // LLM
|
||||
|
||||
services *option.AIServiceOptions // AI services options
|
||||
client *MCPClient4XTDriver // MCP Client for built-in uixt server
|
||||
|
||||
Reference in New Issue
Block a user