Merge branch 'session_refactor' into 'master'

Session refactor

See merge request iesqa/httprunner!122
This commit is contained in:
余泓铮
2025-07-14 07:24:46 +00:00
14 changed files with 1644 additions and 110 deletions

View File

@@ -49,7 +49,8 @@ func NewVEDEMImageService() (*vedemCVService, error) {
type vedemCVService struct{}
func (s *vedemCVService) ReadFromPath(imagePath string, opts ...option.ActionOption) (
imageResult *CVResult, err error) {
imageResult *CVResult, err error,
) {
imageBuf, err := os.ReadFile(imagePath)
if err != nil {
err = errors.Wrap(code.CVPrepareRequestError,
@@ -61,7 +62,8 @@ func (s *vedemCVService) ReadFromPath(imagePath string, opts ...option.ActionOpt
}
func (s *vedemCVService) ReadFromBuffer(imageBuf *bytes.Buffer, opts ...option.ActionOption) (
imageResult *CVResult, err error) {
imageResult *CVResult, err error,
) {
actionOptions := option.NewActionOptions(opts...)
log.Debug().Interface("options", actionOptions).Msg("vedem.ReadFromBuffer")
screenshotActions := actionOptions.List()
@@ -77,7 +79,7 @@ func (s *vedemCVService) ReadFromBuffer(imageBuf *bytes.Buffer, opts ...option.A
if err != nil {
logger = log.Error().Err(err)
} else {
logger = log.Debug()
logger = log.Info()
if imageResult.URL != "" {
logger = logger.Str("url", imageResult.URL)
}

542
uixt/ai/wings_service.go Normal file
View File

@@ -0,0 +1,542 @@
package ai
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"strings"
"time"
"github.com/cloudwego/eino/schema"
"github.com/google/uuid"
"github.com/pkg/errors"
"github.com/rs/zerolog/log"
)
// WingsService implements ILLMService interface using external Wings API
type WingsService struct {
apiURL string
bizId string
}
// NewWingsService creates a new Wings service instance
func NewWingsService() ILLMService {
return &WingsService{
apiURL: "https://vedem-algorithm.bytedance.net/algorithm/StepActionDecision",
bizId: "489fdae44de048e0922a32834ea668af",
}
}
// Plan implements the ILLMService.Plan method using Wings API
func (w *WingsService) Plan(ctx context.Context, opts *PlanningOptions) (*PlanningResult, error) {
// Validate input parameters
if err := validatePlanningInput(opts); err != nil {
return nil, errors.Wrap(err, "validate planning parameters failed")
}
// Extract screenshot from message
screenshot, err := w.extractScreenshotFromMessage(opts.Message)
if err != nil {
return nil, errors.Wrap(err, "extract screenshot failed")
}
// Get device info from context (if available)
deviceInfo := w.getDeviceInfoFromContext(ctx, screenshot)
// Prepare Wings API request
apiRequest := WingsActionRequest{
Historys: []interface{}{}, // empty as specified
DeviceInfos: []WingsDeviceInfo{
deviceInfo,
},
StepText: opts.UserInstruction,
BizId: w.bizId,
TextCase: "整体描述:\\n前置条件\\n获取 1 台设备 A。\\n获取 1 个[万粉创作者]账号a。\\n获取 2 个[普通]账号 b、c。\\n账号 a 和账号 b 互相关注。\\n账号 a 和账号 c 互相关注。\\n账号 a 给账号 b 设置备注为 “11131b”。\\n账号 a 给账号 c 设置备注为 “11131c”。\\n账号 a 创建一个粉丝群 m。\\n 账号 a 修改粉丝群 m 名称为“11131群”。\\n 账号 a 邀请账号 b 加入粉丝群 m。\\n账号 a 邀请账号 c 加入粉丝群 m。\\n账号 a 给群聊 m 发送一条文字消息。\\n设备 A 打开抖音 app。\\n设备 A 登录账号 a。\\n设备 A 退出抖音 app。\\n操作步骤\\n账号a打开抖音app。\\n点击“消息”。\\n点击“11131群”cell。\\n点击“聊天信息页入口”按钮。\\n点击“分享公开群”按钮。\\n点击文字“群口令”。\\n断言屏幕中存在文字“口令复制成功”。\\n停止操作。\\n注意事项\\n",
StepType: "automation",
DeviceID: deviceInfo.DeviceID,
Base: WingsBase{
LogID: generateWingsUUID(),
},
}
// Call Wings API
startTime := time.Now()
response, err := w.callWingsAPI(ctx, apiRequest)
elapsed := time.Since(startTime).Milliseconds()
if err != nil {
return &PlanningResult{
Thought: "Wings API call failed",
Error: err.Error(),
ModelName: "wings-api",
}, errors.Wrap(err, "Wings API call failed")
}
// Check API response status
if response.BaseResp.StatusCode != 0 {
err = fmt.Errorf("API returned error: %s", response.BaseResp.StatusMessage)
return &PlanningResult{
Thought: response.ThoughtChain.Thought,
Error: err.Error(),
ModelName: "wings-api",
}, err
}
// Convert Wings API response to tool calls
toolCalls, err := w.convertWingsResponseToToolCalls(response.ActionParams)
if err != nil {
return &PlanningResult{
Thought: response.ThoughtChain.Thought,
Error: err.Error(),
ModelName: "wings-api",
}, errors.Wrap(err, "convert Wings response to tool calls failed")
}
log.Info().
Str("thought", response.ThoughtChain.Thought).
Int("tool_calls_count", len(toolCalls)).
Int64("elapsed_ms", elapsed).
Msg("Wings API planning completed")
return &PlanningResult{
ToolCalls: toolCalls,
Thought: response.ThoughtChain.Thought,
Content: response.ThoughtChain.Summary,
ModelName: "wings-api",
}, nil
}
// Assert implements the ILLMService.Assert method using Wings API
func (w *WingsService) Assert(ctx context.Context, opts *AssertOptions) (*AssertionResult, error) {
// Validate input parameters
if err := validateAssertionInput(opts); err != nil {
return nil, errors.Wrap(err, "validate assertion parameters failed")
}
// Clean screenshot data URL prefix
cleanScreenshot := w.cleanScreenshotDataURL(opts.Screenshot)
// Get device info from context (if available)
deviceInfo := w.getDeviceInfoFromScreenshot(ctx, cleanScreenshot)
// Prepare Wings API request for assertion
apiRequest := WingsActionRequest{
Historys: []interface{}{}, // empty as specified
DeviceInfos: []WingsDeviceInfo{
deviceInfo,
},
StepText: opts.Assertion,
BizId: w.bizId,
TextCase: "整体描述:\\n前置条件\\n获取 1 台设备 A。\\n获取 1 个[万粉创作者]账号a。\\n获取 2 个[普通]账号 b、c。\\n账号 a 和账号 b 互相关注。\\n账号 a 和账号 c 互相关注。\\n账号 a 给账号 b 设置备注为 “11131b”。\\n账号 a 给账号 c 设置备注为 “11131c”。\\n账号 a 创建一个粉丝群 m。\\n 账号 a 修改粉丝群 m 名称为“11131群”。\\n 账号 a 邀请账号 b 加入粉丝群 m。\\n账号 a 邀请账号 c 加入粉丝群 m。\\n账号 a 给群聊 m 发送一条文字消息。\\n设备 A 打开抖音 app。\\n设备 A 登录账号 a。\\n设备 A 退出抖音 app。\\n操作步骤\\n账号a打开抖音app。\\n点击“消息”。\\n点击“11131群”cell。\\n点击“聊天信息页入口”按钮。\\n点击“分享公开群”按钮。\\n点击文字“群口令”。\\n断言屏幕中存在文字“口令复制成功”。\\n停止操作。\\n注意事项\\n",
StepType: "assert", // Different from automation
DeviceID: deviceInfo.DeviceID,
Base: WingsBase{
LogID: generateWingsUUID(),
},
}
// Call Wings API
startTime := time.Now()
response, err := w.callWingsAPI(ctx, apiRequest)
elapsed := time.Since(startTime).Milliseconds()
if err != nil {
return &AssertionResult{
Pass: false,
Thought: "Wings API call failed",
ModelName: "wings-api",
}, errors.Wrap(err, "Wings API call failed")
}
// Check API response status
if response.BaseResp.StatusCode != 0 {
err = fmt.Errorf("API returned error: %s", response.BaseResp.StatusMessage)
return &AssertionResult{
Pass: false,
Thought: response.ThoughtChain.Thought,
ModelName: "wings-api",
}, err
}
// Parse assertion result from action_params
passed, assertionThought, err := w.parseAssertionResult(response.ActionParams, response.ThoughtChain)
if err != nil {
return &AssertionResult{
Pass: false,
Thought: response.ThoughtChain.Thought,
ModelName: "wings-api",
}, errors.Wrap(err, "parse assertion result failed")
}
log.Info().
Bool("passed", passed).
Str("thought", assertionThought).
Int64("elapsed_ms", elapsed).
Msg("Wings API assertion completed")
result := &AssertionResult{
Pass: passed,
Thought: assertionThought,
ModelName: "wings-api",
}
// Return error if assertion failed (consistent with original behavior)
if !passed {
return result, errors.New(assertionThought)
}
return result, nil
}
// Query implements the ILLMService.Query method (not supported)
func (w *WingsService) Query(ctx context.Context, opts *QueryOptions) (*QueryResult, error) {
return nil, errors.New("Query operation is not supported by Wings service")
}
// RegisterTools implements the ILLMService.RegisterTools method (no-op for Wings)
func (w *WingsService) RegisterTools(tools []*schema.ToolInfo) error {
// Wings service doesn't need tool registration as it determines actions via API
log.Debug().Int("tools_count", len(tools)).Msg("Wings service ignoring tool registration")
return nil
}
// Wings API data structures
type WingsActionRequest struct {
Historys []interface{} `json:"historys"`
DeviceInfos []WingsDeviceInfo `json:"device_infos"`
StepText string `json:"step_text"`
BizId string `json:"biz_id"`
TextCase string `json:"text_case"`
StepType string `json:"step_type"`
DeviceID string `json:"device_id"`
Base WingsBase `json:"Base"`
}
type WingsDeviceInfo struct {
DeviceID string `json:"device_id"`
NowImage string `json:"now_image"`
PreImage string `json:"pre_image"`
NowImageUrl string `json:"now_image_url"`
PreImageUrl string `json:"pre_image_url"`
NowLayoutJSON string `json:"now_layout_json"`
OperationSystem string `json:"operation_system"`
}
type WingsBase struct {
LogID string `json:"LogID"`
}
type WingsActionResponse struct {
StepType string `json:"step_type"`
ActionParams string `json:"action_params"`
ThoughtChain WingsThoughtChain `json:"thought_chain"`
BaseResp WingsBaseResp `json:"BaseResp"`
}
type WingsThoughtChain struct {
Observation string `json:"observation"`
Thought string `json:"thought"`
Summary string `json:"summary"`
}
type WingsBaseResp struct {
StatusCode int `json:"StatusCode"`
StatusMessage string `json:"StatusMessage"`
Extra WingsExtra `json:"Extra"`
}
type WingsExtra struct {
CostTime string `json:"cost_time"`
LogID string `json:"_log_id"`
}
// Action parameter structures
type WingsActionParams struct {
Type string `json:"Type"`
Params interface{} `json:"Params"`
Bounds [][]float64 `json:"Bounds"`
UiDict interface{} `json:"UiDict"`
UiIndex string `json:"UiIndex"`
}
type WingsTapParams struct {
X float64 `json:"x"`
Y float64 `json:"y"`
}
type WingsDoubleTapParams struct {
X float64 `json:"x"`
Y float64 `json:"y"`
}
type WingsLongPressParams struct {
X float64 `json:"x"`
Y float64 `json:"y"`
Duration float64 `json:"duration"`
}
type WingsSwipeParams struct {
FromX float64 `json:"from_x"`
FromY float64 `json:"from_y"`
ToX float64 `json:"to_x"`
ToY float64 `json:"to_y"`
Duration float64 `json:"duration"`
}
type WingsTextParams struct {
Text string `json:"text"`
}
// Helper methods
// generateWingsUUID generates a random UUID for LogID
func generateWingsUUID() string {
return uuid.New().String()
}
// extractScreenshotFromMessage extracts base64 screenshot from message
func (w *WingsService) extractScreenshotFromMessage(message *schema.Message) (string, error) {
if message == nil || len(message.MultiContent) == 0 {
return "", errors.New("no message content found")
}
for _, content := range message.MultiContent {
if content.Type == schema.ChatMessagePartTypeImageURL && content.ImageURL != nil {
// Extract base64 data from data URL
screenshot := content.ImageURL.URL
if strings.HasPrefix(screenshot, "data:image/") {
// Remove data URL prefix
parts := strings.Split(screenshot, ",")
if len(parts) == 2 {
return parts[1], nil
}
}
return screenshot, nil
}
}
return "", errors.New("no image found in message")
}
// getDeviceInfoFromContext gets device info from context with fallback
func (w *WingsService) getDeviceInfoFromContext(ctx context.Context, screenshot string) WingsDeviceInfo {
// Try to get device info from context
if deviceID, ok := ctx.Value("device_id").(string); ok {
platformType := "android"
if platform, ok := ctx.Value("platform_type").(string); ok {
platformType = platform
}
return WingsDeviceInfo{
DeviceID: deviceID,
NowImage: screenshot,
PreImage: screenshot,
NowLayoutJSON: "",
OperationSystem: platformType,
}
}
// Fallback to default device info
return WingsDeviceInfo{
DeviceID: "default-device",
NowImage: screenshot,
PreImage: screenshot,
NowLayoutJSON: "",
OperationSystem: "android",
}
}
// getDeviceInfoFromScreenshot gets device info from screenshot (for Assert)
func (w *WingsService) getDeviceInfoFromScreenshot(ctx context.Context, screenshot string) WingsDeviceInfo {
return w.getDeviceInfoFromContext(ctx, screenshot)
}
// cleanScreenshotDataURL removes data URL prefix from screenshot string
func (w *WingsService) cleanScreenshotDataURL(screenshot string) string {
if strings.HasPrefix(screenshot, "data:image/") {
// Remove data URL prefix like "data:image/jpeg;base64,"
parts := strings.Split(screenshot, ",")
if len(parts) == 2 {
return parts[1]
}
}
return screenshot
}
// callWingsAPI calls the external Wings API
func (w *WingsService) callWingsAPI(ctx context.Context, request WingsActionRequest) (*WingsActionResponse, error) {
// Marshal request to JSON
requestBody, err := json.Marshal(request)
if err != nil {
return nil, errors.Wrap(err, "marshal request failed")
}
// Create HTTP request
httpReq, err := http.NewRequestWithContext(ctx, "POST", w.apiURL, bytes.NewBuffer(requestBody))
if err != nil {
return nil, errors.Wrap(err, "create HTTP request failed")
}
// Set headers
httpReq.Header.Set("Content-Type", "application/json")
httpReq.Header.Set("Accept", "application/json")
// Execute HTTP request
client := &http.Client{
Timeout: 30 * time.Second,
}
resp, err := client.Do(httpReq)
if err != nil {
return nil, errors.Wrap(err, "HTTP request failed")
}
defer resp.Body.Close()
// Read response body
responseBody, err := io.ReadAll(resp.Body)
if err != nil {
return nil, errors.Wrap(err, "read response body failed")
}
// Check HTTP status
if resp.StatusCode != 200 {
return nil, fmt.Errorf("HTTP request failed with status %d: %s", resp.StatusCode, string(responseBody))
}
// Parse response
var apiResponse WingsActionResponse
if err := json.Unmarshal(responseBody, &apiResponse); err != nil {
return nil, errors.Wrap(err, "unmarshal response failed")
}
return &apiResponse, nil
}
// convertWingsResponseToToolCalls converts Wings API response to tool calls using generic approach
func (w *WingsService) convertWingsResponseToToolCalls(actionParamsStr string) ([]schema.ToolCall, error) {
if actionParamsStr == "" {
return []schema.ToolCall{}, nil
}
var actionParams WingsActionParams
if err := json.Unmarshal([]byte(actionParamsStr), &actionParams); err != nil {
return nil, fmt.Errorf("parse action params failed: %w", err)
}
// Use Wings API Type as tool name directly
toolName := actionParams.Type
params := actionParams.Params
// Create tool call using generic method
toolCall, err := w.createToolCall(toolName, params)
if err != nil {
return nil, fmt.Errorf("create tool call for %s failed: %w", toolName, err)
}
return []schema.ToolCall{toolCall}, nil
}
// createToolCall creates a generic tool call with given name and arguments
func (w *WingsService) createToolCall(toolName string, params interface{}) (schema.ToolCall, error) {
// Convert params to arguments map
arguments := make(map[string]interface{})
if params != nil {
// Try to convert params to map[string]interface{}
switch p := params.(type) {
case map[string]interface{}:
arguments = p
case string:
// If params is a string, try to unmarshal it as JSON
if err := json.Unmarshal([]byte(p), &arguments); err != nil {
// If not JSON, treat as simple text parameter
arguments["text"] = p
}
default:
// For other types, try to marshal and unmarshal
paramsBytes, err := json.Marshal(params)
if err != nil {
return schema.ToolCall{}, fmt.Errorf("marshal params failed: %w", err)
}
if err := json.Unmarshal(paramsBytes, &arguments); err != nil {
// If unmarshal fails, create a generic params field
arguments["params"] = params
}
}
}
// Convert arguments to JSON string
argumentsJSON, err := json.Marshal(arguments)
if err != nil {
return schema.ToolCall{}, fmt.Errorf("marshal arguments failed: %w", err)
}
// Generate unique tool call ID
toolCallID := fmt.Sprintf("call_%s", uuid.New().String()[:8])
return schema.ToolCall{
ID: toolCallID,
Function: schema.FunctionCall{
Name: toolName,
Arguments: string(argumentsJSON),
},
}, nil
}
// parseAssertionResult parses the assertion result from action_params
func (w *WingsService) parseAssertionResult(actionParamsStr string, thoughtChain WingsThoughtChain) (bool, string, error) {
// Parse action parameters JSON
var actionParams map[string]interface{}
if err := json.Unmarshal([]byte(actionParamsStr), &actionParams); err != nil {
return false, "", errors.Wrap(err, "parse action params failed")
}
// Extract action_type from the parsed JSON
actionType, exists := actionParams["action_type"]
if !exists {
// If no action_type field, try to parse nested structure
if totalRes, ok := actionParams["total_res"].([]interface{}); ok && len(totalRes) > 0 {
if firstRes, ok := totalRes[0].(map[string]interface{}); ok {
if actionParamsNested, ok := firstRes["action_params"].(map[string]interface{}); ok {
if nestedActionType, ok := actionParamsNested["action_type"]; ok {
actionType = nestedActionType
}
}
}
}
}
// Default to failed if no action_type found
if actionType == nil {
return false, thoughtChain.Summary, nil
}
// Convert action_type to string and check result
actionTypeStr, ok := actionType.(string)
if !ok {
return false, thoughtChain.Summary, nil
}
// Determine assertion result based on action_type
passed := strings.ToLower(actionTypeStr) == "passed"
// Use thoughtChain.Summary as the assertion thought
assertionThought := thoughtChain.Summary
if assertionThought == "" {
assertionThought = thoughtChain.Thought
}
if assertionThought == "" {
assertionThought = thoughtChain.Observation
}
log.Info().
Str("action_type", actionTypeStr).
Bool("passed", passed).
Str("thought", assertionThought).
Msg("parsed Wings assertion result")
return passed, assertionThought, nil
}

View File

@@ -24,7 +24,7 @@ func setupADBDriverExt(t *testing.T) *XTDriver {
require.Nil(t, err)
driverExt, err := NewXTDriver(driver,
option.WithCVService(option.CVServiceTypeVEDEM),
option.WithLLMService(option.DOUBAO_1_5_THINKING_VISION_PRO_250428),
// option.WithLLMService(option.DOUBAO_1_5_THINKING_VISION_PRO_250428),
)
require.Nil(t, err)
return driverExt

View File

@@ -2,6 +2,8 @@ package uixt
import (
"context"
"encoding/json"
"fmt"
"time"
"github.com/cloudwego/eino/schema"
@@ -9,12 +11,12 @@ import (
"github.com/rs/zerolog/log"
"github.com/httprunner/httprunner/v5/code"
"github.com/httprunner/httprunner/v5/internal/json"
"github.com/httprunner/httprunner/v5/uixt/ai"
"github.com/httprunner/httprunner/v5/uixt/option"
"github.com/httprunner/httprunner/v5/uixt/types"
)
// StartToGoal (original implementation - preserved)
func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...option.ActionOption) ([]*PlanningExecutionResult, error) {
options := option.NewActionOptions(opts...)
logger := log.Info().Str("prompt", prompt)
@@ -193,7 +195,7 @@ func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...op
}
}
// AIAction performs AI-driven action and returns detailed execution result
// AIAction with WingsService priority support
func (dExt *XTDriver) AIAction(ctx context.Context, prompt string, opts ...option.ActionOption) (*AIExecutionResult, error) {
log.Info().Str("prompt", prompt).Msg("performing AI action")
@@ -206,25 +208,93 @@ func (dExt *XTDriver) AIAction(ctx context.Context, prompt string, opts ...optio
return nil, err
}
// Step 2: Plan next action and measure time
// Step 2: Check if WingsService is available and prioritize it
if dExt.WingsService != nil {
log.Info().Msg("using Wings service for AI action")
return dExt.executeAIAction(ctx, prompt, screenResult, dExt.WingsService, "wings", opts...)
}
// Step 3: Fallback to LLM service
if dExt.LLMService == nil {
return nil, errors.New("neither Wings service nor LLM service is initialized")
}
log.Info().Msg("using LLM service for AI action")
return dExt.executeAIAction(ctx, prompt, screenResult, dExt.LLMService, "llm", opts...)
}
// executeAIAction executes AIAction using any AI service (generic implementation)
func (dExt *XTDriver) executeAIAction(ctx context.Context, prompt string, screenResult *ScreenResult, service ai.ILLMService, serviceType string, opts ...option.ActionOption) (*AIExecutionResult, error) {
// Add device context for Wings service if needed
if serviceType == "wings" {
ctx = dExt.addDeviceContextForWings(ctx)
}
// Step 1: Plan next action and measure time
modelCallStartTime := time.Now()
planningResult, err := dExt.PlanNextAction(ctx, prompt, opts...)
var planningResult *ai.PlanningResult
var err error
if serviceType == "llm" {
// For LLM service, use PlanNextAction which includes additional processing
planningExecutionResult, planErr := dExt.PlanNextAction(ctx, prompt, opts...)
if planErr != nil {
modelCallElapsed := time.Since(modelCallStartTime).Milliseconds()
return &AIExecutionResult{
Type: "action",
ModelCallElapsed: modelCallElapsed,
ScreenshotElapsed: screenResult.Elapsed,
ImagePath: screenResult.ImagePath,
Resolution: &screenResult.Resolution,
Error: planErr.Error(),
}, errors.Wrap(planErr, "get next action failed")
}
planningResult = &planningExecutionResult.PlanningResult
} else {
// For Wings service, call Plan directly
planningOpts := &ai.PlanningOptions{
UserInstruction: prompt,
Message: &schema.Message{
Role: schema.User,
MultiContent: []schema.ChatMessagePart{
{
Type: schema.ChatMessagePartTypeImageURL,
ImageURL: &schema.ChatMessageImageURL{
URL: screenResult.Base64,
},
},
},
},
Size: screenResult.Resolution,
}
planningResult, err = service.Plan(ctx, planningOpts)
if err != nil {
modelCallElapsed := time.Since(modelCallStartTime).Milliseconds()
return &AIExecutionResult{
Type: "action",
ModelCallElapsed: modelCallElapsed,
ScreenshotElapsed: screenResult.Elapsed,
ImagePath: screenResult.ImagePath,
Resolution: &screenResult.Resolution,
Error: err.Error(),
}, errors.Wrap(err, fmt.Sprintf("%s service planning failed", serviceType))
}
}
modelCallElapsed := time.Since(modelCallStartTime).Milliseconds()
aiExecutionResult := &AIExecutionResult{
Type: "action",
ModelCallElapsed: modelCallElapsed,
ScreenshotElapsed: screenResult.Elapsed,
ImagePath: screenResult.ImagePath,
Resolution: &screenResult.Resolution,
PlanningResult: &planningResult.PlanningResult,
PlanningResult: planningResult,
}
if err != nil {
aiExecutionResult.Error = err.Error()
return aiExecutionResult, errors.Wrap(err, "get next action failed")
}
// Step 3: Execute tool calls
// Step 2: Execute tool calls
for _, toolCall := range planningResult.ToolCalls {
err = dExt.invokeToolCall(ctx, toolCall, opts...)
if err != nil {
@@ -239,7 +309,99 @@ func (dExt *XTDriver) AIAction(ctx context.Context, prompt string, opts ...optio
return aiExecutionResult, nil
}
// PlanNextAction performs planning and returns unified planning information
// AIAssert with WingsService priority support
func (dExt *XTDriver) AIAssert(assertion string, opts ...option.ActionOption) (*AIExecutionResult, error) {
log.Info().Str("assertion", assertion).Msg("performing AI assertion")
// Step 1: Take screenshot and convert to base64
screenResult, err := dExt.GetScreenResult(
option.WithScreenShotFileName("ai_assert"),
option.WithScreenShotBase64(true),
)
if err != nil {
return nil, err
}
// Step 2: Check if WingsService is available and prioritize it
if dExt.WingsService != nil {
log.Info().Msg("using Wings service for AI assertion")
return dExt.executeAIAssert(assertion, screenResult, dExt.WingsService, "wings", opts...)
}
// Step 3: Fallback to LLM service
if dExt.LLMService == nil {
return nil, errors.New("neither Wings service nor LLM service is initialized")
}
log.Info().Msg("using LLM service for AI assertion")
return dExt.executeAIAssert(assertion, screenResult, dExt.LLMService, "llm", opts...)
}
// executeAIAssert executes AIAssert using any AI service (generic implementation)
func (dExt *XTDriver) executeAIAssert(assertion string, screenResult *ScreenResult, service ai.ILLMService, serviceType string, opts ...option.ActionOption) (*AIExecutionResult, error) {
// Step 1: Prepare context and options
ctx := context.Background()
if serviceType == "wings" {
ctx = dExt.addDeviceContextForWings(ctx)
}
assertResult := &AIExecutionResult{
Type: "assert",
ScreenshotElapsed: screenResult.Elapsed,
ImagePath: screenResult.ImagePath,
Resolution: &screenResult.Resolution,
}
// Step 2: Call service and measure time
modelCallStartTime := time.Now()
assertOpts := &ai.AssertOptions{
Assertion: assertion,
Screenshot: screenResult.Base64,
Size: screenResult.Resolution,
}
result, err := service.Assert(ctx, assertOpts)
assertResult.ModelCallElapsed = time.Since(modelCallStartTime).Milliseconds()
assertResult.AssertionResult = result
if err != nil {
assertResult.Error = err.Error()
return assertResult, errors.Wrap(err, fmt.Sprintf("%s assertion failed", serviceType))
}
if !result.Pass {
assertResult.Error = result.Thought
}
return assertResult, nil
}
// addDeviceContextForWings adds device information to context for Wings service
func (dExt *XTDriver) addDeviceContextForWings(ctx context.Context) context.Context {
device := dExt.GetDevice()
if device == nil {
return ctx
}
// Add device ID to context
ctx = context.WithValue(ctx, "device_id", device.UUID())
// Add platform type to context
platformType := "android" // default
switch device.(type) {
case *AndroidDevice:
platformType = "android"
case *IOSDevice:
platformType = "ios"
case *HarmonyDevice:
platformType = "harmony"
}
ctx = context.WithValue(ctx, "platform_type", platformType)
return ctx
}
// PlanNextAction (original implementation - preserved)
func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ...option.ActionOption) (*PlanningExecutionResult, error) {
if dExt.LLMService == nil {
return nil, errors.New("LLM service is not initialized")
@@ -314,7 +476,7 @@ func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ..
return planningResult, nil
}
// isTaskFinished checks if the task is completed based on the planning result
// isTaskFinished (original implementation - preserved)
func (dExt *XTDriver) isTaskFinished(planningResult *PlanningExecutionResult) bool {
// Check if there are no tool calls (no actions to execute)
if len(planningResult.ToolCalls) == 0 {
@@ -333,7 +495,7 @@ func (dExt *XTDriver) isTaskFinished(planningResult *PlanningExecutionResult) bo
return false
}
// invokeToolCall invokes the tool call
// invokeToolCall (original implementation - preserved)
func (dExt *XTDriver) invokeToolCall(ctx context.Context, toolCall schema.ToolCall, opts ...option.ActionOption) error {
// Parse arguments
arguments := make(map[string]interface{})
@@ -360,7 +522,7 @@ func (dExt *XTDriver) invokeToolCall(ctx context.Context, toolCall schema.ToolCa
return nil
}
// PlanningExecutionResult represents a unified planning result that contains both planning information and execution results
// PlanningExecutionResult (original implementation - preserved)
type PlanningExecutionResult struct {
ai.PlanningResult // Inherit all fields from ai.PlanningResult (ToolCalls, Thought, Content, Error, ModelName)
// Planning process information
@@ -377,7 +539,7 @@ type PlanningExecutionResult struct {
SubActions []*SubActionResult `json:"sub_actions,omitempty"` // sub-actions generated from this planning
}
// AIExecutionResult represents a unified result structure for all AI operations
// AIExecutionResult (original implementation - preserved)
type AIExecutionResult struct {
Type string `json:"type"` // operation type: "query", "action", "assert"
ModelCallElapsed int64 `json:"model_call_elapsed"` // model call elapsed time in milliseconds
@@ -394,7 +556,7 @@ type AIExecutionResult struct {
Error string `json:"error,omitempty"` // error message if operation failed
}
// SubActionResult represents a sub-action within a start_to_goal action
// SubActionResult (original implementation - preserved)
type SubActionResult struct {
ActionName string `json:"action_name"` // name of the sub-action (e.g., "tap", "input")
Arguments interface{} `json:"arguments,omitempty"` // arguments passed to the sub-action
@@ -409,6 +571,7 @@ type SessionData struct {
ScreenResults []*ScreenResult `json:"screen_results,omitempty"` // store sub-action specific screen_results
}
// AIQuery (original implementation - preserved)
func (dExt *XTDriver) AIQuery(text string, opts ...option.ActionOption) (*AIExecutionResult, error) {
if dExt.LLMService == nil {
return nil, errors.New("LLM service is not initialized")
@@ -453,50 +616,3 @@ func (dExt *XTDriver) AIQuery(text string, opts ...option.ActionOption) (*AIExec
}
return aiResult, nil
}
// AIAssert performs AI-driven assertion and returns detailed execution result
func (dExt *XTDriver) AIAssert(assertion string, opts ...option.ActionOption) (*AIExecutionResult, error) {
if dExt.LLMService == nil {
return nil, errors.New("LLM service is not initialized")
}
// Step 1: Take screenshot and convert to base64
screenResult, err := dExt.GetScreenResult(
option.WithScreenShotFileName("ai_assert"),
option.WithScreenShotBase64(true),
)
if err != nil {
return nil, err
}
assertResult := &AIExecutionResult{
Type: "assert",
ScreenshotElapsed: screenResult.Elapsed,
ImagePath: screenResult.ImagePath,
Resolution: &screenResult.Resolution,
}
// Step 2: Call model and measure time
modelCallStartTime := time.Now()
assertOpts := &ai.AssertOptions{
Assertion: assertion,
Screenshot: screenResult.Base64,
Size: screenResult.Resolution,
}
result, err := dExt.LLMService.Assert(context.Background(), assertOpts)
assertResult.ModelCallElapsed = time.Since(modelCallStartTime).Milliseconds()
assertResult.AssertionResult = result
if err != nil {
assertResult.Error = err.Error()
return assertResult, errors.Wrap(err, "AI assertion failed")
}
// For assertion failure, we should still return success but mark the assertion as failed
// This ensures that the AIResult (including screenshot and thought) is properly saved and displayed
if !result.Pass {
assertResult.Error = result.Thought // Store the failure reason for reporting
}
return assertResult, nil
}

View File

@@ -7,10 +7,11 @@ import (
"testing"
"github.com/cloudwego/eino/schema"
"github.com/stretchr/testify/assert"
"github.com/httprunner/httprunner/v5/uixt/ai"
"github.com/httprunner/httprunner/v5/uixt/option"
"github.com/httprunner/httprunner/v5/uixt/types"
"github.com/stretchr/testify/assert"
)
func TestDriverExt_TapByLLM(t *testing.T) {
@@ -22,33 +23,33 @@ func TestDriverExt_TapByLLM(t *testing.T) {
assert.Nil(t, err)
}
func TestDriverExt_StartToGoal(t *testing.T) {
driver := setupDriverExt(t)
userInstruction := `连连看是一款经典的益智消除类小游戏,通常以图案或图标为主要元素。以下是连连看的基本规则说明:
1. 游戏目标: 玩家需要在规定时间内,通过连接相同的图案或图标,将它们从游戏界面中消除。
2. 连接规则:
- 两个相同的图案可以通过不超过三条直线连接。
- 连接线可以水平或垂直,但不能斜线,也不能跨过其他图案。
- 连接线的转折次数不能超过两次。
3. 游戏界面:
- 游戏界面通常是一个矩形区域,内含多个图案或图标,排列成行和列。
- 图案或图标在未选中状态下背景为白色,选中状态下背景为绿色。
4. 时间限制: 游戏通常设有时间限制,玩家需要在时间耗尽前完成所有图案的消除。
5. 得分机制: 每成功连接并消除一对图案,玩家会获得相应的分数。完成游戏后,根据剩余时间和消除效率计算总分。
6. 关卡设计: 游戏可能包含多个关卡,随着关卡的推进,图案的复杂度和数量会增加。
注意事项:
1、当连接错误时顶部的红心会减少一个需及时调整策略避免红心变为0个后游戏失败
2、不要连续 2 次点击同一个图案
3、不要犯重复的错误
`
userInstruction += "\n\n请严格按照以上游戏规则开始游戏注意请只做点击操作"
_, err := driver.StartToGoal(context.Background(), userInstruction)
assert.Nil(t, err)
}
//func TestDriverExt_StartToGoal(t *testing.T) {
// driver := setupDriverExt(t)
//
// userInstruction := `连连看是一款经典的益智消除类小游戏,通常以图案或图标为主要元素。以下是连连看的基本规则说明:
// 1. 游戏目标: 玩家需要在规定时间内,通过连接相同的图案或图标,将它们从游戏界面中消除。
// 2. 连接规则:
// - 两个相同的图案可以通过不超过三条直线连接。
// - 连接线可以水平或垂直,但不能斜线,也不能跨过其他图案。
// - 连接线的转折次数不能超过两次。
// 3. 游戏界面:
// - 游戏界面通常是一个矩形区域,内含多个图案或图标,排列成行和列。
// - 图案或图标在未选中状态下背景为白色,选中状态下背景为绿色。
// 4. 时间限制: 游戏通常设有时间限制,玩家需要在时间耗尽前完成所有图案的消除。
// 5. 得分机制: 每成功连接并消除一对图案,玩家会获得相应的分数。完成游戏后,根据剩余时间和消除效率计算总分。
// 6. 关卡设计: 游戏可能包含多个关卡,随着关卡的推进,图案的复杂度和数量会增加。
//
// 注意事项:
// 1、当连接错误时顶部的红心会减少一个需及时调整策略避免红心变为0个后游戏失败
// 2、不要连续 2 次点击同一个图案
// 3、不要犯重复的错误
// `
//
// userInstruction += "\n\n请严格按照以上游戏规则开始游戏注意请只做点击操作"
//
// //_, err := driver.StartToGoal(context.Background(), userInstruction)
// //assert.Nil(t, err)
//}
func TestDriverExt_PlanNextAction(t *testing.T) {
driver := setupDriverExt(t)
@@ -244,3 +245,241 @@ func TestPlanningOptions_ResetHistory(t *testing.T) {
assert.True(t, opts.ResetHistory)
assert.Equal(t, "test instruction", opts.UserInstruction)
}
// TestDriverExt_AIAction tests the AIAction method integration with real driver
func TestDriverExt_AIAction(t *testing.T) {
driver := setupDriverExt(t)
// Test AIAction with search button click prompt
result, err := driver.AIAction(context.Background(), "冷启动抖音app")
// Verify no error occurred
assert.Nil(t, err, "AIAction should execute without error")
// Verify result is not nil
assert.NotNil(t, result, "AIAction should return a result")
// Verify result has correct type
assert.Equal(t, "action", result.Type, "Result type should be 'action'")
// Verify timing information is captured
assert.Greater(t, result.ModelCallElapsed, int64(0), "Model call should have elapsed time")
assert.Greater(t, result.ScreenshotElapsed, int64(0), "Screenshot should have elapsed time")
// Verify screenshot information is captured
assert.NotEmpty(t, result.ImagePath, "Image path should not be empty")
assert.NotNil(t, result.Resolution, "Resolution should not be nil")
assert.Greater(t, result.Resolution.Width, 0, "Width should be greater than 0")
assert.Greater(t, result.Resolution.Height, 0, "Height should be greater than 0")
// Verify planning result is captured
assert.NotNil(t, result.PlanningResult, "Planning result should not be nil")
assert.Equal(t, "wings-api", result.PlanningResult.ModelName, "Model name should be 'wings-api'")
// Log result for debugging
t.Logf("AIAction executed successfully:")
t.Logf(" Type: %s", result.Type)
t.Logf(" Model Call Elapsed: %d ms", result.ModelCallElapsed)
t.Logf(" Screenshot Elapsed: %d ms", result.ScreenshotElapsed)
t.Logf(" Image Path: %s", result.ImagePath)
t.Logf(" Resolution: %dx%d", result.Resolution.Width, result.Resolution.Height)
if result.Error != "" {
t.Logf(" Error: %s", result.Error)
}
}
// TestDriverExt_AIAction_CompareWithAIAction compares AIAction with AIAction
func TestDriverExt_AIAction_CompareWithAIAction(t *testing.T) {
driver := setupDriverExt(t)
prompt := "点击搜索按钮"
// Test both methods with the same prompt
wingsResult, wingsErr := driver.AIAction(context.Background(), prompt)
aiResult, aiErr := driver.AIAction(context.Background(), prompt)
// Both should execute without critical errors (may have different implementations)
t.Logf("AIAction error: %v", wingsErr)
t.Logf("AIAction error: %v", aiErr)
// If both succeed, compare results
if wingsResult != nil && aiResult != nil {
assert.Equal(t, "action", wingsResult.Type, "AIAction result type should be 'action'")
assert.Equal(t, "action", aiResult.Type, "AIAction result type should be 'action'")
// Both should have timing information
assert.Greater(t, wingsResult.ModelCallElapsed, int64(0), "AIAction should have model call elapsed time")
assert.Greater(t, aiResult.ModelCallElapsed, int64(0), "AIAction should have model call elapsed time")
// Both should have screenshot information
assert.NotEmpty(t, wingsResult.ImagePath, "AIAction should have image path")
assert.NotEmpty(t, aiResult.ImagePath, "AIAction should have image path")
// Compare model names
if wingsResult.PlanningResult != nil && aiResult.PlanningResult != nil {
t.Logf("AIAction model: %s", wingsResult.PlanningResult.ModelName)
t.Logf("AIAction model: %s", aiResult.PlanningResult.ModelName)
assert.Equal(t, "wings-api", wingsResult.PlanningResult.ModelName, "AIAction should use wings-api")
assert.NotEqual(t, "wings-api", aiResult.PlanningResult.ModelName, "AIAction should not use wings-api")
}
}
}
// TestDriverExt_AIAction_ErrorHandling tests AIAction error handling
func TestDriverExt_AIAction_ErrorHandling(t *testing.T) {
driver := setupDriverExt(t)
// Test with empty prompt
result, err := driver.AIAction(context.Background(), "")
// Should handle empty prompt gracefully
if err != nil {
t.Logf("Empty prompt error (expected): %v", err)
assert.NotNil(t, result, "Result should still be returned even on error")
if result != nil {
assert.NotEmpty(t, result.Error, "Result should contain error message")
}
} else {
t.Logf("Empty prompt handled successfully")
assert.NotNil(t, result, "Result should be returned")
}
// Test with very long prompt
longPrompt := "这是一个非常长的提示词用来测试AIAction是否能够正确处理长文本输入。" +
"我们需要确保API能够处理各种长度的输入包括这种可能超出某些限制的文本。" +
"请在当前界面中寻找任何可能的搜索相关的按钮或输入框,然后进行点击操作。"
result2, err2 := driver.AIAction(context.Background(), longPrompt)
// Should handle long prompt
if err2 != nil {
t.Logf("Long prompt error: %v", err2)
} else {
t.Logf("Long prompt handled successfully")
assert.NotNil(t, result2, "Result should be returned for long prompt")
assert.Equal(t, "action", result2.Type, "Result type should be 'action'")
}
}
// TestDriverExt_AIAssert tests the AIAssert method integration with real driver
func TestDriverExt_AIAssert(t *testing.T) {
driver := setupDriverExt(t)
// Test AIAssert with assertion about search button
result, err := driver.AIAssert("屏幕中存在搜索按钮")
// Verify no error occurred (or error is captured in result)
if err != nil {
t.Logf("AIAssert error: %v", err)
// For assertion failures, error is expected, but result should still be returned
assert.NotNil(t, result, "AIAssert should return a result even on assertion failure")
} else {
assert.NotNil(t, result, "AIAssert should return a result")
}
// Verify result has correct type
assert.Equal(t, "assert", result.Type, "Result type should be 'assert'")
// Verify timing information is captured
assert.Greater(t, result.ModelCallElapsed, int64(0), "Model call should have elapsed time")
assert.Greater(t, result.ScreenshotElapsed, int64(0), "Screenshot should have elapsed time")
// Verify screenshot information is captured
assert.NotEmpty(t, result.ImagePath, "Image path should not be empty")
assert.NotNil(t, result.Resolution, "Resolution should not be nil")
assert.Greater(t, result.Resolution.Width, 0, "Width should be greater than 0")
assert.Greater(t, result.Resolution.Height, 0, "Height should be greater than 0")
// Verify assertion result is captured
assert.NotNil(t, result.AssertionResult, "Assertion result should not be nil")
assert.NotEmpty(t, result.AssertionResult.Thought, "Assertion result thought should not be empty")
// Log result for debugging
t.Logf("AIAssert executed:")
t.Logf(" Type: %s", result.Type)
t.Logf(" Model Call Elapsed: %d ms", result.ModelCallElapsed)
t.Logf(" Screenshot Elapsed: %d ms", result.ScreenshotElapsed)
t.Logf(" Image Path: %s", result.ImagePath)
t.Logf(" Resolution: %dx%d", result.Resolution.Width, result.Resolution.Height)
t.Logf(" Assertion Pass: %t", result.AssertionResult.Pass)
t.Logf(" Assertion Thought: %s", result.AssertionResult.Thought)
if result.Error != "" {
t.Logf(" Error: %s", result.Error)
}
}
// TestDriverExt_AIAssert_CompareWithAIAssert compares AIAssert with AIAssert
func TestDriverExt_AIAssert_CompareWithAIAssert(t *testing.T) {
driver := setupDriverExt(t)
assertion := "屏幕中存在搜索按钮"
// Test both methods with the same assertion
wingsResult, wingsErr := driver.AIAssert(assertion)
aiResult, aiErr := driver.AIAssert(assertion)
// Both should execute (may have different results)
t.Logf("AIAssert error: %v", wingsErr)
t.Logf("AIAssert error: %v", aiErr)
// If both succeed, compare results
if wingsResult != nil && aiResult != nil {
assert.Equal(t, "assert", wingsResult.Type, "AIAssert result type should be 'assert'")
assert.Equal(t, "assert", aiResult.Type, "AIAssert result type should be 'assert'")
// Both should have timing information
assert.Greater(t, wingsResult.ModelCallElapsed, int64(0), "AIAssert should have model call elapsed time")
assert.Greater(t, aiResult.ModelCallElapsed, int64(0), "AIAssert should have model call elapsed time")
// Both should have screenshot information
assert.NotEmpty(t, wingsResult.ImagePath, "AIAssert should have image path")
assert.NotEmpty(t, aiResult.ImagePath, "AIAssert should have image path")
// Both should have assertion results
assert.NotNil(t, wingsResult.AssertionResult, "AIAssert should have assertion result")
assert.NotNil(t, aiResult.AssertionResult, "AIAssert should have assertion result")
// Log comparison
t.Logf("AIAssert Pass: %t, Thought: %s", wingsResult.AssertionResult.Pass, wingsResult.AssertionResult.Thought)
t.Logf("AIAssert Pass: %t, Thought: %s", aiResult.AssertionResult.Pass, aiResult.AssertionResult.Thought)
}
}
// TestDriverExt_AIAssert_ErrorHandling tests AIAssert error handling
func TestDriverExt_AIAssert_ErrorHandling(t *testing.T) {
driver := setupDriverExt(t)
// Test with empty assertion
result, err := driver.AIAssert("")
// Should handle empty assertion gracefully
if err != nil {
t.Logf("Empty assertion error (may be expected): %v", err)
assert.NotNil(t, result, "Result should still be returned even on error")
if result != nil {
assert.NotEmpty(t, result.Error, "Result should contain error message")
}
} else {
t.Logf("Empty assertion handled successfully")
assert.NotNil(t, result, "Result should be returned")
}
// Test with complex assertion
complexAssertion := "断言:当前屏幕显示的是主页面,包含用户头像、搜索框、导航栏等关键元素,并且没有任何错误提示信息"
result2, err2 := driver.AIAssert(complexAssertion)
// Should handle complex assertion
if err2 != nil {
t.Logf("Complex assertion result: %v", err2)
} else {
t.Logf("Complex assertion handled successfully")
assert.NotNil(t, result2, "Result should be returned for complex assertion")
assert.Equal(t, "assert", result2.Type, "Result type should be 'assert'")
if result2.AssertionResult != nil {
t.Logf("Assertion passed: %t", result2.AssertionResult.Pass)
}
}
}

View File

@@ -176,9 +176,6 @@ func (dExt *XTDriver) GetScreenTexts(opts ...option.ActionOption) (ocrTexts ai.O
func (dExt *XTDriver) FindScreenText(text string, opts ...option.ActionOption) (textRect ai.OCRText, err error) {
options := option.NewActionOptions(opts...)
if options.ScreenShotFileName == "" {
opts = append(opts, option.WithScreenShotFileName(fmt.Sprintf("find_screen_text_%s", text)))
}
// convert relative scope to absolute scope
if options.AbsScope == nil && len(options.Scope) == 4 {

View File

@@ -2,17 +2,20 @@ package uixt
import (
"fmt"
"time"
"github.com/rs/zerolog/log"
"github.com/httprunner/httprunner/v5/uixt/ai"
"github.com/httprunner/httprunner/v5/uixt/option"
"github.com/rs/zerolog/log"
)
func (dExt *XTDriver) TapByOCR(text string, opts ...option.ActionOption) error {
actionOptions := option.NewActionOptions(opts...)
log.Info().Str("text", text).Interface("options", actionOptions).Msg("TapByOCR")
if actionOptions.ScreenShotFileName == "" {
opts = append(opts, option.WithScreenShotFileName(fmt.Sprintf("tap_by_ocr_%s", text)))
opts = append(opts, option.WithScreenShotFileName(fmt.Sprintf("%s_tap_by_ocr_%s", dExt.GetDevice().UUID(), time.Now().Format("20060102150405"))))
}
textRect, err := dExt.FindScreenText(text, opts...)

View File

@@ -71,6 +71,7 @@ func (s *MCPServer4XTDriver) registerTools() {
s.registerTool(&ToolSelectDevice{}) // SelectDevice
// Touch Tools
s.registerTool(&ToolTap{}) // tap
s.registerTool(&ToolTapXY{}) // tap xy
s.registerTool(&ToolTapAbsXY{}) // tap abs xy
s.registerTool(&ToolTapByOCR{}) // tap by OCR
@@ -88,6 +89,8 @@ func (s *MCPServer4XTDriver) registerTools() {
// Input Tools
s.registerTool(&ToolInput{})
s.registerTool(&ToolText{})
s.registerTool(&ToolBackspace{})
s.registerTool(&ToolSetIme{})
// Button Tools
@@ -98,7 +101,10 @@ func (s *MCPServer4XTDriver) registerTools() {
// App Tools
s.registerTool(&ToolListPackages{}) // ListPackages
s.registerTool(&ToolLaunchApp{}) // LaunchApp
s.registerTool(&ToolOpenApp{}) // OpenApp
s.registerTool(&ToolTerminateApp{}) // TerminateApp
s.registerTool(&ToolTerminateAppNew{}) // TerminateApp (new)
s.registerTool(&ToolColdLaunch{}) // ColdLaunch
s.registerTool(&ToolAppInstall{}) // AppInstall
s.registerTool(&ToolAppUninstall{}) // AppUninstall
s.registerTool(&ToolAppClear{}) // AppClear

View File

@@ -79,6 +79,7 @@ func TestToolInterfaces(t *testing.T) {
tools := []ActionTool{
&ToolListAvailableDevices{},
&ToolSelectDevice{},
&ToolTap{},
&ToolTapXY{},
&ToolTapAbsXY{},
&ToolTapByOCR{},
@@ -92,6 +93,8 @@ func TestToolInterfaces(t *testing.T) {
&ToolSwipeToTapTexts{},
&ToolDrag{},
&ToolInput{},
&ToolText{},
&ToolBackspace{},
&ToolScreenShot{},
&ToolGetScreenSize{},
&ToolPressButton{},
@@ -99,7 +102,10 @@ func TestToolInterfaces(t *testing.T) {
&ToolBack{},
&ToolListPackages{},
&ToolLaunchApp{},
&ToolOpenApp{},
&ToolTerminateApp{},
&ToolTerminateAppNew{},
&ToolColdLaunch{},
&ToolAppInstall{},
&ToolAppUninstall{},
&ToolAppClear{},
@@ -240,6 +246,45 @@ func TestToolSelectDevice(t *testing.T) {
assert.Equal(t, string(option.ACTION_SelectDevice), request.Params.Name)
}
// TestToolTap tests the ToolTap implementation
func TestToolTap(t *testing.T) {
tool := &ToolTap{}
// Test Name
assert.Equal(t, option.ACTION_Tap, tool.Name())
// Test Description
assert.NotEmpty(t, tool.Description())
// Test Options
options := tool.Options()
assert.NotNil(t, options)
// Test ConvertActionToCallToolRequest with valid params
action := option.MobileAction{
Method: option.ACTION_Tap,
Params: []float64{0.5, 0.6},
ActionOptions: option.ActionOptions{
Duration: 1.5,
},
}
request, err := tool.ConvertActionToCallToolRequest(action)
assert.NoError(t, err)
assert.Equal(t, string(option.ACTION_Tap), request.Params.Name)
args := request.GetArguments()
assert.Equal(t, 0.5, args["x"])
assert.Equal(t, 0.6, args["y"])
assert.Equal(t, 1.5, args["duration"])
// Test ConvertActionToCallToolRequest with invalid params
invalidAction := option.MobileAction{
Method: option.ACTION_Tap,
Params: "invalid",
}
_, err = tool.ConvertActionToCallToolRequest(invalidAction)
assert.Error(t, err)
}
// TestToolTapXY tests the ToolTapXY implementation
func TestToolTapXY(t *testing.T) {
tool := &ToolTapXY{}
@@ -782,6 +827,74 @@ func TestToolInput(t *testing.T) {
assert.Equal(t, "Hello World", request.GetArguments()["text"])
}
// TestToolText tests the ToolText implementation
func TestToolText(t *testing.T) {
tool := &ToolText{}
// Test Name
assert.Equal(t, option.ACTION_Text, tool.Name())
// Test Description
assert.NotEmpty(t, tool.Description())
// Test Options
options := tool.Options()
assert.NotNil(t, options)
// Test ConvertActionToCallToolRequest with valid params
action := option.MobileAction{
Method: option.ACTION_Text,
Params: "Hello World",
}
request, err := tool.ConvertActionToCallToolRequest(action)
assert.NoError(t, err)
assert.Equal(t, string(option.ACTION_Text), request.Params.Name)
assert.Equal(t, "Hello World", request.GetArguments()["text"])
}
// TestToolBackspace tests the ToolBackspace implementation
func TestToolBackspace(t *testing.T) {
tool := &ToolBackspace{}
// Test Name
assert.Equal(t, option.ACTION_Backspace, tool.Name())
// Test Description
assert.NotEmpty(t, tool.Description())
// Test Options
options := tool.Options()
assert.NotNil(t, options)
// Test ConvertActionToCallToolRequest with valid int params
action := option.MobileAction{
Method: option.ACTION_Backspace,
Params: 3,
}
request, err := tool.ConvertActionToCallToolRequest(action)
assert.NoError(t, err)
assert.Equal(t, string(option.ACTION_Backspace), request.Params.Name)
assert.Equal(t, 3, request.GetArguments()["count"])
// Test ConvertActionToCallToolRequest with float64 params
actionFloat := option.MobileAction{
Method: option.ACTION_Backspace,
Params: 5.0,
}
requestFloat, err := tool.ConvertActionToCallToolRequest(actionFloat)
assert.NoError(t, err)
assert.Equal(t, 5, requestFloat.GetArguments()["count"])
// Test ConvertActionToCallToolRequest with invalid params (should default to 1)
invalidAction := option.MobileAction{
Method: option.ACTION_Backspace,
Params: "invalid",
}
requestDefault, err := tool.ConvertActionToCallToolRequest(invalidAction)
assert.NoError(t, err)
assert.Equal(t, 1, requestDefault.GetArguments()["count"])
}
// TestToolScreenShot tests the ToolScreenShot implementation
func TestToolScreenShot(t *testing.T) {
tool := &ToolScreenShot{}
@@ -973,6 +1086,39 @@ func TestToolLaunchApp(t *testing.T) {
assert.Error(t, err)
}
// TestToolOpenApp tests the ToolOpenApp implementation
func TestToolOpenApp(t *testing.T) {
tool := &ToolOpenApp{}
// Test Name
assert.Equal(t, option.ACTION_OpenApp, tool.Name())
// Test Description
assert.NotEmpty(t, tool.Description())
// Test Options
options := tool.Options()
assert.NotNil(t, options)
// Test ConvertActionToCallToolRequest with valid params
action := option.MobileAction{
Method: option.ACTION_OpenApp,
Params: "com.example.app",
}
request, err := tool.ConvertActionToCallToolRequest(action)
assert.NoError(t, err)
assert.Equal(t, string(option.ACTION_OpenApp), request.Params.Name)
assert.Equal(t, "com.example.app", request.GetArguments()["packageName"])
// Test ConvertActionToCallToolRequest with invalid params
invalidAction := option.MobileAction{
Method: option.ACTION_OpenApp,
Params: 123, // should be string
}
_, err = tool.ConvertActionToCallToolRequest(invalidAction)
assert.Error(t, err)
}
// TestToolTerminateApp tests the ToolTerminateApp implementation
func TestToolTerminateApp(t *testing.T) {
tool := &ToolTerminateApp{}
@@ -1006,6 +1152,72 @@ func TestToolTerminateApp(t *testing.T) {
assert.Error(t, err)
}
// TestToolTerminateAppNew tests the ToolTerminateAppNew implementation
func TestToolTerminateAppNew(t *testing.T) {
tool := &ToolTerminateAppNew{}
// Test Name
assert.Equal(t, option.ACTION_TerminateApp, tool.Name())
// Test Description
assert.NotEmpty(t, tool.Description())
// Test Options
options := tool.Options()
assert.NotNil(t, options)
// Test ConvertActionToCallToolRequest with valid params
action := option.MobileAction{
Method: option.ACTION_TerminateApp,
Params: "com.example.app",
}
request, err := tool.ConvertActionToCallToolRequest(action)
assert.NoError(t, err)
assert.Equal(t, string(option.ACTION_TerminateApp), request.Params.Name)
assert.Equal(t, "com.example.app", request.GetArguments()["packageName"])
// Test ConvertActionToCallToolRequest with invalid params
invalidAction := option.MobileAction{
Method: option.ACTION_TerminateApp,
Params: []int{1, 2, 3}, // should be string
}
_, err = tool.ConvertActionToCallToolRequest(invalidAction)
assert.Error(t, err)
}
// TestToolColdLaunch tests the ToolColdLaunch implementation
func TestToolColdLaunch(t *testing.T) {
tool := &ToolColdLaunch{}
// Test Name
assert.Equal(t, option.ACTION_ColdLaunch, tool.Name())
// Test Description
assert.NotEmpty(t, tool.Description())
// Test Options
options := tool.Options()
assert.NotNil(t, options)
// Test ConvertActionToCallToolRequest with valid params
action := option.MobileAction{
Method: option.ACTION_ColdLaunch,
Params: "com.example.app",
}
request, err := tool.ConvertActionToCallToolRequest(action)
assert.NoError(t, err)
assert.Equal(t, string(option.ACTION_ColdLaunch), request.Params.Name)
assert.Equal(t, "com.example.app", request.GetArguments()["packageName"])
// Test ConvertActionToCallToolRequest with invalid params
invalidAction := option.MobileAction{
Method: option.ACTION_ColdLaunch,
Params: 123, // should be string
}
_, err = tool.ConvertActionToCallToolRequest(invalidAction)
assert.Error(t, err)
}
// TestToolAppInstall tests the ToolAppInstall implementation
func TestToolAppInstall(t *testing.T) {
tool := &ToolAppInstall{}

View File

@@ -3,6 +3,7 @@ package uixt
import (
"context"
"fmt"
"time"
"github.com/mark3labs/mcp-go/mcp"
"github.com/mark3labs/mcp-go/server"
@@ -393,3 +394,195 @@ func (t *ToolGetForegroundApp) Implement() server.ToolHandlerFunc {
func (t *ToolGetForegroundApp) ConvertActionToCallToolRequest(action option.MobileAction) (mcp.CallToolRequest, error) {
return BuildMCPCallToolRequest(t.Name(), map[string]any{}, action), nil
}
// ToolOpenApp implements the open_app tool call.
type ToolOpenApp struct {
// Return data fields - these define the structure of data returned by this tool
PackageName string `json:"packageName" desc:"Package name of the opened app"`
}
func (t *ToolOpenApp) Name() option.ActionName {
return option.ACTION_OpenApp
}
func (t *ToolOpenApp) Description() string {
return "Open an app on mobile device using its package name and wait for the app to load"
}
func (t *ToolOpenApp) Options() []mcp.ToolOption {
unifiedReq := &option.ActionOptions{}
return unifiedReq.GetMCPOptions(option.ACTION_OpenApp)
}
func (t *ToolOpenApp) Implement() server.ToolHandlerFunc {
return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
arguments := request.GetArguments()
driverExt, err := setupXTDriver(ctx, arguments)
if err != nil {
return nil, fmt.Errorf("setup driver failed: %w", err)
}
unifiedReq, err := parseActionOptions(arguments)
if err != nil {
return nil, err
}
if unifiedReq.PackageName == "" {
return nil, fmt.Errorf("package_name is required")
}
// Open app action logic
err = driverExt.AppLaunch(unifiedReq.PackageName)
if err != nil {
return NewMCPErrorResponse(fmt.Sprintf("Open app failed: %s", err.Error())), err
}
message := fmt.Sprintf("Successfully opened app: %s", unifiedReq.PackageName)
returnData := ToolOpenApp{PackageName: unifiedReq.PackageName}
return NewMCPSuccessResponse(message, &returnData), nil
}
}
func (t *ToolOpenApp) ConvertActionToCallToolRequest(action option.MobileAction) (mcp.CallToolRequest, error) {
if packageName, ok := action.Params.(string); ok {
arguments := map[string]any{
"packageName": packageName,
}
return BuildMCPCallToolRequest(t.Name(), arguments, action), nil
}
return mcp.CallToolRequest{}, fmt.Errorf("invalid open app params: %v", action.Params)
}
// ToolTerminateAppNew implements the terminal_app tool call.
type ToolTerminateAppNew struct {
// Return data fields - these define the structure of data returned by this tool
PackageName string `json:"packageName" desc:"Package name of the terminated app"`
WasRunning bool `json:"wasRunning" desc:"Whether the app was actually running before termination"`
}
func (t *ToolTerminateAppNew) Name() option.ActionName {
return option.ACTION_TerminateApp
}
func (t *ToolTerminateAppNew) Description() string {
return "Terminate a running app on mobile device using its package name"
}
func (t *ToolTerminateAppNew) Options() []mcp.ToolOption {
unifiedReq := &option.ActionOptions{}
return unifiedReq.GetMCPOptions(option.ACTION_TerminateApp)
}
func (t *ToolTerminateAppNew) Implement() server.ToolHandlerFunc {
return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
arguments := request.GetArguments()
driverExt, err := setupXTDriver(ctx, arguments)
if err != nil {
return nil, fmt.Errorf("setup driver failed: %w", err)
}
unifiedReq, err := parseActionOptions(arguments)
if err != nil {
return nil, err
}
if unifiedReq.PackageName == "" {
return nil, fmt.Errorf("package_name is required")
}
// Terminate app action logic
success, err := driverExt.AppTerminate(unifiedReq.PackageName)
if err != nil {
return NewMCPErrorResponse(fmt.Sprintf("Terminate app failed: %s", err.Error())), err
}
if !success {
log.Warn().Str("packageName", unifiedReq.PackageName).Msg("app was not running")
}
message := fmt.Sprintf("Successfully terminated app: %s", unifiedReq.PackageName)
returnData := ToolTerminateAppNew{
PackageName: unifiedReq.PackageName,
WasRunning: success,
}
return NewMCPSuccessResponse(message, &returnData), nil
}
}
func (t *ToolTerminateAppNew) ConvertActionToCallToolRequest(action option.MobileAction) (mcp.CallToolRequest, error) {
if packageName, ok := action.Params.(string); ok {
arguments := map[string]any{
"packageName": packageName,
}
return BuildMCPCallToolRequest(t.Name(), arguments, action), nil
}
return mcp.CallToolRequest{}, fmt.Errorf("invalid terminate app params: %v", action.Params)
}
// ToolColdLaunch implements the cold_launch tool call.
type ToolColdLaunch struct {
// Return data fields - these define the structure of data returned by this tool
PackageName string `json:"packageName" desc:"Package name of the cold launched app"`
}
func (t *ToolColdLaunch) Name() option.ActionName {
return option.ACTION_ColdLaunch
}
func (t *ToolColdLaunch) Description() string {
return "Perform a cold launch of an app (terminate first if running, then launch)"
}
func (t *ToolColdLaunch) Options() []mcp.ToolOption {
unifiedReq := &option.ActionOptions{}
return unifiedReq.GetMCPOptions(option.ACTION_ColdLaunch)
}
func (t *ToolColdLaunch) Implement() server.ToolHandlerFunc {
return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
arguments := request.GetArguments()
driverExt, err := setupXTDriver(ctx, arguments)
if err != nil {
return nil, fmt.Errorf("setup driver failed: %w", err)
}
unifiedReq, err := parseActionOptions(arguments)
if err != nil {
return nil, err
}
if unifiedReq.PackageName == "" {
return nil, fmt.Errorf("package_name is required")
}
// Cold launch logic: terminate first, then launch
// First try to terminate the app (ignore errors if app is not running)
_, err = driverExt.AppTerminate(unifiedReq.PackageName)
if err != nil {
log.Warn().Str("packageName", unifiedReq.PackageName).Msg("app was not running")
return NewMCPErrorResponse(fmt.Sprintf("Cold launch failed, terminate app failed: %s", err.Error())), err
}
time.Sleep(3 * time.Second)
// Then launch the app
err = driverExt.AppLaunch(unifiedReq.PackageName)
if err != nil {
return NewMCPErrorResponse(fmt.Sprintf("Cold launch failed, launch app failed: %s", err.Error())), err
}
message := fmt.Sprintf("Successfully cold launched app: %s", unifiedReq.PackageName)
returnData := ToolColdLaunch{PackageName: unifiedReq.PackageName}
return NewMCPSuccessResponse(message, &returnData), nil
}
}
func (t *ToolColdLaunch) ConvertActionToCallToolRequest(action option.MobileAction) (mcp.CallToolRequest, error) {
if packageName, ok := action.Params.(string); ok {
arguments := map[string]any{
"packageName": packageName,
}
return BuildMCPCallToolRequest(t.Name(), arguments, action), nil
}
return mcp.CallToolRequest{}, fmt.Errorf("invalid cold launch params: %v", action.Params)
}

View File

@@ -123,3 +123,131 @@ func (t *ToolSetIme) ConvertActionToCallToolRequest(action option.MobileAction)
}
return mcp.CallToolRequest{}, fmt.Errorf("invalid set ime params: %v", action.Params)
}
// ToolText implements the text tool call.
type ToolText struct {
// Return data fields - these define the structure of data returned by this tool
Text string `json:"text" desc:"Text that was input"`
}
func (t *ToolText) Name() option.ActionName {
return option.ACTION_Text
}
func (t *ToolText) Description() string {
return "Input text into the currently focused element or input field"
}
func (t *ToolText) Options() []mcp.ToolOption {
unifiedReq := &option.ActionOptions{}
return unifiedReq.GetMCPOptions(option.ACTION_Text)
}
func (t *ToolText) Implement() server.ToolHandlerFunc {
return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
arguments := request.GetArguments()
driverExt, err := setupXTDriver(ctx, arguments)
if err != nil {
return nil, fmt.Errorf("setup driver failed: %w", err)
}
unifiedReq, err := parseActionOptions(arguments)
if err != nil {
return nil, err
}
if unifiedReq.Text == "" {
return nil, fmt.Errorf("text is required")
}
opts := unifiedReq.Options()
// Text input action logic
err = driverExt.Input(unifiedReq.Text, opts...)
if err != nil {
return NewMCPErrorResponse(fmt.Sprintf("Text input failed: %s", err.Error())), err
}
message := fmt.Sprintf("Successfully input text: %s", unifiedReq.Text)
returnData := ToolText{Text: unifiedReq.Text}
return NewMCPSuccessResponse(message, &returnData), nil
}
}
func (t *ToolText) ConvertActionToCallToolRequest(action option.MobileAction) (mcp.CallToolRequest, error) {
text := fmt.Sprintf("%v", action.Params)
arguments := map[string]any{
"text": text,
}
return BuildMCPCallToolRequest(t.Name(), arguments, action), nil
}
// ToolBackspace implements the backspace tool call.
type ToolBackspace struct {
// Return data fields - these define the structure of data returned by this tool
Count int `json:"count" desc:"Number of backspace operations performed"`
}
func (t *ToolBackspace) Name() option.ActionName {
return option.ACTION_Backspace
}
func (t *ToolBackspace) Description() string {
return "Perform backspace operations to delete characters"
}
func (t *ToolBackspace) Options() []mcp.ToolOption {
unifiedReq := &option.ActionOptions{}
return unifiedReq.GetMCPOptions(option.ACTION_Backspace)
}
func (t *ToolBackspace) Implement() server.ToolHandlerFunc {
return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
arguments := request.GetArguments()
driverExt, err := setupXTDriver(ctx, arguments)
if err != nil {
return nil, fmt.Errorf("setup driver failed: %w", err)
}
unifiedReq, err := parseActionOptions(arguments)
if err != nil {
return nil, err
}
count := unifiedReq.Count
if count <= 0 {
count = 1 // Default to 1 backspace if not specified or invalid
}
opts := unifiedReq.Options()
// Backspace action logic
err = driverExt.Backspace(count, opts...)
if err != nil {
return NewMCPErrorResponse(fmt.Sprintf("Backspace failed: %s", err.Error())), err
}
message := fmt.Sprintf("Successfully performed %d backspace operations", count)
returnData := ToolBackspace{Count: count}
return NewMCPSuccessResponse(message, &returnData), nil
}
}
func (t *ToolBackspace) ConvertActionToCallToolRequest(action option.MobileAction) (mcp.CallToolRequest, error) {
var count int
switch v := action.Params.(type) {
case int:
count = v
case float64:
count = int(v)
default:
count = 1 // Default count
}
arguments := map[string]any{
"count": count,
}
return BuildMCPCallToolRequest(t.Name(), arguments, action), nil
}

View File

@@ -84,6 +84,79 @@ func (t *ToolTapXY) ConvertActionToCallToolRequest(action option.MobileAction) (
return mcp.CallToolRequest{}, fmt.Errorf("invalid tap params: %v", action.Params)
}
// ToolTap implements the tap tool call.
type ToolTap struct {
// Return data fields - these define the structure of data returned by this tool
X float64 `json:"x" desc:"X coordinate where tap was performed"`
Y float64 `json:"y" desc:"Y coordinate where tap was performed"`
}
func (t *ToolTap) Name() option.ActionName {
return option.ACTION_Tap
}
func (t *ToolTap) Description() string {
return "Tap on the screen at given relative coordinates (0.0-1.0 range)"
}
func (t *ToolTap) Options() []mcp.ToolOption {
unifiedReq := &option.ActionOptions{}
return unifiedReq.GetMCPOptions(option.ACTION_Tap)
}
func (t *ToolTap) Implement() server.ToolHandlerFunc {
return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
arguments := request.GetArguments()
driverExt, err := setupXTDriver(ctx, arguments)
if err != nil {
return nil, fmt.Errorf("setup driver failed: %w", err)
}
unifiedReq, err := parseActionOptions(arguments)
if err != nil {
return nil, err
}
// Build all options from request arguments
opts := unifiedReq.Options()
// Validate required parameters
if unifiedReq.X == 0 || unifiedReq.Y == 0 {
return nil, fmt.Errorf("x and y coordinates are required")
}
// Tap action logic
err = driverExt.TapXY(unifiedReq.X, unifiedReq.Y, opts...)
if err != nil {
return NewMCPErrorResponse(fmt.Sprintf("Tap failed: %s", err.Error())), err
}
message := fmt.Sprintf("Successfully tapped at coordinates (%.2f, %.2f)", unifiedReq.X, unifiedReq.Y)
returnData := ToolTap{
X: unifiedReq.X,
Y: unifiedReq.Y,
}
return NewMCPSuccessResponse(message, &returnData), nil
}
}
func (t *ToolTap) ConvertActionToCallToolRequest(action option.MobileAction) (mcp.CallToolRequest, error) {
if params, err := builtin.ConvertToFloat64Slice(action.Params); err == nil && len(params) == 2 {
x, y := params[0], params[1]
arguments := map[string]any{
"x": x,
"y": y,
}
// Add duration if available from action options
if duration := action.ActionOptions.Duration; duration > 0 {
arguments["duration"] = duration
}
return BuildMCPCallToolRequest(t.Name(), arguments, action), nil
}
return mcp.CallToolRequest{}, fmt.Errorf("invalid tap params: %v", action.Params)
}
// ToolTapAbsXY implements the tap_abs_xy tool call.
type ToolTapAbsXY struct {
// Return data fields - these define the structure of data returned by this tool

View File

@@ -43,7 +43,10 @@ const (
ACTION_AppClear ActionName = "app_clear"
ACTION_AppStart ActionName = "app_start"
ACTION_AppLaunch ActionName = "app_launch" // 启动 app 并堵塞等待 app 首屏加载完成
ACTION_OpenApp ActionName = "open_app" // 启动 app 并堵塞等待 app 首屏加载完成
ACTION_AppTerminate ActionName = "app_terminate"
ACTION_TerminateApp ActionName = "terminal_app"
ACTION_ColdLaunch ActionName = "cold_launch"
ACTION_AppStop ActionName = "app_stop"
ACTION_ScreenShot ActionName = "screenshot"
ACTION_ScreenRecord ActionName = "screenrecord"
@@ -70,6 +73,7 @@ const (
ACTION_SwipeCoordinate ActionName = "swipe_coordinate" // swipe by coordinates (fromX, fromY, toX, toY)
ACTION_Drag ActionName = "drag"
ACTION_Input ActionName = "input"
ACTION_Text ActionName = "text"
ACTION_PressButton ActionName = "press_button"
ACTION_Back ActionName = "back"
ACTION_KeyCode ActionName = "keycode"
@@ -601,6 +605,7 @@ func WithOutputSchema(schema interface{}) ActionOption {
func (o *ActionOptions) GetMCPOptions(actionType ActionName) []mcp.ToolOption {
// Define field mappings for different action types
fieldMappings := map[ActionName][]string{
ACTION_Tap: {"platform", "serial", "x", "y", "duration"},
ACTION_TapXY: {"platform", "serial", "x", "y", "duration"},
ACTION_TapAbsXY: {"platform", "serial", "x", "y", "duration"},
ACTION_TapByOCR: {"platform", "serial", "text", "ignoreNotFoundError", "maxRetryTimes", "index", "regex", "tapRandomRect"},
@@ -611,8 +616,13 @@ func (o *ActionOptions) GetMCPOptions(actionType ActionName) []mcp.ToolOption {
ACTION_Swipe: {"platform", "serial", "direction", "fromX", "fromY", "toX", "toY", "duration", "pressDuration"},
ACTION_Drag: {"platform", "serial", "fromX", "fromY", "toX", "toY", "duration", "pressDuration"},
ACTION_Input: {"platform", "serial", "text", "frequency"},
ACTION_Text: {"platform", "serial", "text", "frequency"},
ACTION_Backspace: {"platform", "serial", "count"},
ACTION_AppLaunch: {"platform", "serial", "packageName"},
ACTION_OpenApp: {"platform", "serial", "packageName"},
ACTION_AppTerminate: {"platform", "serial", "packageName"},
ACTION_TerminateApp: {"platform", "serial", "packageName"},
ACTION_ColdLaunch: {"platform", "serial", "packageName"},
ACTION_AppInstall: {"platform", "serial", "appUrl", "packageName"},
ACTION_AppUninstall: {"platform", "serial", "packageName"},
ACTION_AppClear: {"platform", "serial", "packageName"},

View File

@@ -27,29 +27,41 @@ func NewXTDriver(driver IDriver, opts ...option.AIServiceOption) (*XTDriver, err
var err error
// Initialize Wings service (always available)
driverExt.WingsService = ai.NewWingsService()
log.Info().Msg("Wings service initialized")
// Handle LLM service initialization
if services.LLMConfig != nil {
// Use advanced LLM configuration if provided
driverExt.LLMService, err = ai.NewLLMServiceWithOptionConfig(services.LLMConfig)
if err != nil {
return nil, errors.Wrap(err, "init llm service with config failed")
log.Warn().Err(err).Msg("init llm service with config failed, Wings service will be used")
} else {
log.Info().Msg("LLM service initialized with advanced config")
}
} else if services.LLMService != "" {
// Fallback to simple LLM service if no config provided
driverExt.LLMService, err = ai.NewLLMService(services.LLMService)
if err != nil {
return nil, errors.Wrap(err, "init llm service failed")
log.Warn().Err(err).Msg("init llm service failed, Wings service will be used")
} else {
log.Info().Msg("LLM service initialized")
}
} else {
log.Warn().Msg("no LLM service config provided")
log.Info().Msg("no LLM service config provided, using Wings service only")
}
// Register uixt MCP tools to LLM service if it exists
mcpTools := driverExt.client.Server.ListTools()
einoTools := ai.ConvertMCPToolsToEinoToolInfos(mcpTools, "uixt")
if err = driverExt.WingsService.RegisterTools(einoTools); err != nil {
log.Debug().Err(err).Msg("Wings service ignoring tool registration (expected)")
}
if driverExt.LLMService != nil {
mcpTools := driverExt.client.Server.ListTools()
einoTools := ai.ConvertMCPToolsToEinoToolInfos(mcpTools, "uixt")
if err := driverExt.LLMService.RegisterTools(einoTools); err != nil {
log.Warn().Err(err).Msg("failed to register uixt tools")
if err = driverExt.LLMService.RegisterTools(einoTools); err != nil {
log.Warn().Err(err).Msg("failed to register uixt tools to LLM service")
}
}
@@ -59,8 +71,9 @@ func NewXTDriver(driver IDriver, opts ...option.AIServiceOption) (*XTDriver, err
// XTDriver = IDriver + AI
type XTDriver struct {
IDriver
CVService ai.ICVService // OCR/CV
LLMService ai.ILLMService // LLM
CVService ai.ICVService // OCR/CV
LLMService ai.ILLMService // LLM (fallback service)
WingsService ai.ILLMService // Wings API service (priority service)
services *option.AIServiceOptions // AI services options
client *MCPClient4XTDriver // MCP Client for built-in uixt server