diff --git a/pkg/gadb/device.go b/pkg/gadb/device.go index c6857fee..caf7e448 100644 --- a/pkg/gadb/device.go +++ b/pkg/gadb/device.go @@ -664,22 +664,14 @@ func (d *Device) installViaABBExec(apk io.ReadSeeker, args ...string) (raw []byt tp transport filesize int64 ) - timeout := 8 - ctx, cancel := context.WithTimeout(context.Background(), time.Duration(timeout)*time.Minute) - defer cancel() - filesize, err = apk.Seek(0, io.SeekEnd) if err != nil { return nil, err } - if tp, err = d.createDeviceTransport(4 * time.Minute); err != nil { + if tp, err = d.createDeviceTransport(5 * time.Minute); err != nil { return nil, err } defer func() { _ = tp.Close() }() - go func() { - <-ctx.Done() - _ = tp.Close() - }() cmd := "abb_exec:package\x00install\x00-t" for _, arg := range args { cmd += "\x00" + arg @@ -698,9 +690,6 @@ func (d *Device) installViaABBExec(apk io.ReadSeeker, args ...string) (raw []byt return nil, err } raw, err = tp.ReadBytesAll() - if errors.Is(ctx.Err(), context.DeadlineExceeded) { - return nil, fmt.Errorf("installation timed out after %d minutes", timeout) - } return } diff --git a/uixt/ai/wings_service.go b/uixt/ai/wings_service.go index ab25e576..35fa77db 100644 --- a/uixt/ai/wings_service.go +++ b/uixt/ai/wings_service.go @@ -26,7 +26,6 @@ type WingsService struct { bizId string accessKey string secretKey string - history []History // Conversation history for Wings API } // NewWingsService creates a new Wings service instance @@ -50,7 +49,6 @@ func NewWingsService() (ILLMService, error) { bizId: bizID, accessKey: accessKey, secretKey: secretKey, - history: []History{}, }, nil } @@ -61,11 +59,6 @@ func (w *WingsService) Plan(ctx context.Context, opts *PlanningOptions) (*Planni return nil, errors.Wrap(err, "validate planning parameters failed") } - // Reset history if requested - if opts.ResetHistory { - w.resetHistory() - } - // Extract screenshot from message screenshot, err := w.extractScreenshotFromMessage(opts.Message) if err != nil { @@ -77,11 +70,15 @@ func (w *WingsService) Plan(ctx context.Context, opts *PlanningOptions) (*Planni // Prepare Wings API request apiRequest := WingsActionRequest{ - Historys: w.history, - DeviceInfo: deviceInfo, - StepText: fmt.Sprintf("%s", opts.UserInstruction), - BizId: w.bizId, - TextCase: fmt.Sprintf("整体描述:\n前置条件:\n操作步骤:\n%s\n停止操作。\n注意事项:\n", opts.UserInstruction), + Historys: []interface{}{}, // empty as specified + DeviceInfos: []WingsDeviceInfo{ + deviceInfo, + }, + StepText: opts.UserInstruction, + BizId: w.bizId, + TextCase: "整体描述:\\n前置条件:\\n获取 1 台设备 A。\\n获取 1 个[万粉创作者]账号a。\\n获取 2 个[普通]账号 b、c。\\n账号 a 和账号 b 互相关注。\\n账号 a 和账号 c 互相关注。\\n账号 a 给账号 b 设置备注为 “11131b”。\\n账号 a 给账号 c 设置备注为 “11131c”。\\n账号 a 创建一个粉丝群 m。\\n 账号 a 修改粉丝群 m 名称为“11131群”。\\n 账号 a 邀请账号 b 加入粉丝群 m。\\n账号 a 邀请账号 c 加入粉丝群 m。\\n账号 a 给群聊 m 发送一条文字消息。\\n设备 A 打开抖音 app。\\n设备 A 登录账号 a。\\n设备 A 退出抖音 app。\\n操作步骤:\\n账号a打开抖音app。\\n点击“消息”。\\n点击“11131群”cell。\\n点击“聊天信息页入口”按钮。\\n点击“分享公开群”按钮。\\n点击文字“群口令”。\\n断言:屏幕中存在文字“口令复制成功”。\\n停止操作。\\n注意事项:\\n", + StepType: "automation", + DeviceID: deviceInfo.DeviceID, Base: WingsBase{ LogID: generateWingsUUID(), }, @@ -101,7 +98,7 @@ func (w *WingsService) Plan(ctx context.Context, opts *PlanningOptions) (*Planni } // Check API response status - if response.BaseResp.StatusCode != 0 && response.BaseResp.StatusCode != 200 { + if response.BaseResp.StatusCode != 0 { err = fmt.Errorf("API returned error: %s", response.BaseResp.StatusMessage) return &PlanningResult{ Thought: response.ThoughtChain.Thought, @@ -110,50 +107,26 @@ func (w *WingsService) Plan(ctx context.Context, opts *PlanningOptions) (*Planni }, err } - // Update history with response data - newHistoryEntry := History{ - Observation: response.ThoughtChain.Observation, - Thought: response.ThoughtChain.Thought, - Summary: response.ThoughtChain.Summary, - StepText: response.StepText, - StepTextTrans: response.StepTextTrans, - OriStepIndex: response.OriStepIndex, - DeviceID: deviceInfo[0].DeviceID, - AgentType: response.AgentType, - ActionResult: "", // Always empty as requested - DeviceInfos: &deviceInfo, - ActionParams: response.ActionParams, + // Convert Wings API response to tool calls + toolCalls, err := w.convertWingsResponseToToolCalls(response.ActionParams) + if err != nil { + return &PlanningResult{ + Thought: response.ThoughtChain.Thought, + Error: err.Error(), + ModelName: "wings-api", + }, errors.Wrap(err, "convert Wings response to tool calls failed") } - w.history = append(w.history, newHistoryEntry) - var toolCalls []schema.ToolCall - if response.StepType != "FINISH" { - // Convert Wings API response to tool calls - toolCalls, err = w.convertWingsResponseToToolCalls(response.ActionParams) - if err != nil { - return &PlanningResult{ - Thought: response.ThoughtChain.Thought, - Error: err.Error(), - ModelName: "wings-api", - }, errors.Wrap(err, "convert Wings response to tool calls failed") - } - } - - // No need to update ActionResult as per user request - // ActionResult should always be empty log.Info(). Str("thought", response.ThoughtChain.Thought). - Str("action", response.AgentType). - Str("action_params", response.ActionParams). - Str("log_id", fmt.Sprintf("%v", response.BaseResp.Extra)). Int("tool_calls_count", len(toolCalls)). Int64("elapsed_ms", elapsed). Msg("Wings API planning completed") return &PlanningResult{ ToolCalls: toolCalls, - Thought: response.StepTextTrans, - Content: response.StepTextTrans, + Thought: response.ThoughtChain.Thought, + Content: response.ThoughtChain.Summary, ModelName: "wings-api", }, nil } @@ -173,15 +146,20 @@ func (w *WingsService) Assert(ctx context.Context, opts *AssertOptions) (*Assert // Prepare Wings API request for assertion apiRequest := WingsActionRequest{ - Historys: []History{}, - DeviceInfo: deviceInfo, - StepText: fmt.Sprintf("断言:%s", opts.Assertion), - BizId: w.bizId, - TextCase: fmt.Sprintf("整体描述:\n前置条件:\n操作步骤:\n断言: %s\n停止操作。\n注意事项:\n", opts.Assertion), + Historys: []interface{}{}, // empty as specified + DeviceInfos: []WingsDeviceInfo{ + deviceInfo, + }, + StepText: opts.Assertion, + BizId: w.bizId, + TextCase: "整体描述:\\n前置条件:\\n获取 1 台设备 A。\\n获取 1 个[万粉创作者]账号a。\\n获取 2 个[普通]账号 b、c。\\n账号 a 和账号 b 互相关注。\\n账号 a 和账号 c 互相关注。\\n账号 a 给账号 b 设置备注为 “11131b”。\\n账号 a 给账号 c 设置备注为 “11131c”。\\n账号 a 创建一个粉丝群 m。\\n 账号 a 修改粉丝群 m 名称为“11131群”。\\n 账号 a 邀请账号 b 加入粉丝群 m。\\n账号 a 邀请账号 c 加入粉丝群 m。\\n账号 a 给群聊 m 发送一条文字消息。\\n设备 A 打开抖音 app。\\n设备 A 登录账号 a。\\n设备 A 退出抖音 app。\\n操作步骤:\\n账号a打开抖音app。\\n点击“消息”。\\n点击“11131群”cell。\\n点击“聊天信息页入口”按钮。\\n点击“分享公开群”按钮。\\n点击文字“群口令”。\\n断言:屏幕中存在文字“口令复制成功”。\\n停止操作。\\n注意事项:\\n", + StepType: "assert", // Different from automation + DeviceID: deviceInfo.DeviceID, Base: WingsBase{ LogID: generateWingsUUID(), }, } + log.Info().Interface("apiRequest", apiRequest).Msg("Wings API request") // Call Wings API startTime := time.Now() @@ -197,7 +175,7 @@ func (w *WingsService) Assert(ctx context.Context, opts *AssertOptions) (*Assert } // Check API response status - if response.BaseResp.StatusCode != 0 && response.BaseResp.StatusCode != 200 { + if response.BaseResp.StatusCode != 0 { err = fmt.Errorf("API returned error: %s", response.BaseResp.StatusMessage) return &AssertionResult{ Pass: false, @@ -206,22 +184,6 @@ func (w *WingsService) Assert(ctx context.Context, opts *AssertOptions) (*Assert }, err } - // Update history with response data - newHistoryEntry := History{ - Observation: response.ThoughtChain.Observation, - Thought: response.ThoughtChain.Thought, - Summary: response.ThoughtChain.Summary, - StepText: response.StepText, - StepTextTrans: response.StepTextTrans, - OriStepIndex: response.OriStepIndex, - DeviceID: deviceInfo[0].DeviceID, - AgentType: response.AgentType, - ActionResult: "", // Always empty as requested - DeviceInfos: &deviceInfo, - ActionParams: response.ActionParams, - } - w.history = append(w.history, newHistoryEntry) - // Parse assertion result from action_params passed, assertionThought, err := w.parseAssertionResult(response.ActionParams, response.ThoughtChain) if err != nil { @@ -232,9 +194,6 @@ func (w *WingsService) Assert(ctx context.Context, opts *AssertOptions) (*Assert }, errors.Wrap(err, "parse assertion result failed") } - // No need to update ActionResult as per user request - // ActionResult should always be empty - log.Info(). Bool("passed", passed). Str("thought", assertionThought). @@ -269,12 +228,14 @@ func (w *WingsService) RegisterTools(tools []*schema.ToolInfo) error { // Wings API data structures type WingsActionRequest struct { - Historys []History `json:"historys"` - DeviceInfo []WingsDeviceInfo `json:"device_infos"` - StepText string `json:"step_text"` - BizId string `json:"biz_id"` - TextCase string `json:"text_case"` - Base WingsBase `json:"Base"` + Historys []interface{} `json:"historys"` + DeviceInfos []WingsDeviceInfo `json:"device_infos"` + StepText string `json:"step_text"` + BizId string `json:"biz_id"` + TextCase string `json:"text_case"` + StepType string `json:"step_type"` + DeviceID string `json:"device_id"` + Base WingsBase `json:"Base"` } type WingsDeviceInfo struct { @@ -292,14 +253,10 @@ type WingsBase struct { } type WingsActionResponse struct { - AgentType string `json:"agent_type" thrift:"agent_type,1,required"` - StepText string `json:"step_text" thrift:"step_text,2,required"` - StepTextTrans string `json:"step_text_trans" thrift:"step_text_trans,3,required"` - OriStepIndex int `json:"ori_step_index" thrift:"ori_step_index,4,required"` - StepType string `json:"step_type" thrift:"step_type,5,required"` - ActionParams string `json:"action_params" thrift:"action_params,6,required"` - ThoughtChain WingsThoughtChain `json:"thought_chain" thrift:"thought_chain,7,required"` - BaseResp WingsBaseResp `json:"BaseResp" thrift:"BaseResp,255,optional"` + StepType string `json:"step_type"` + ActionParams string `json:"action_params"` + ThoughtChain WingsThoughtChain `json:"thought_chain"` + BaseResp WingsBaseResp `json:"BaseResp"` } type WingsThoughtChain struct { @@ -319,21 +276,6 @@ type WingsExtra struct { LogID string `json:"_log_id"` } -// History structure for request and response -type History struct { - Observation string `json:"observation" thrift:"observation,1,required"` // 思考结果 - Thought string `json:"thought" thrift:"thought,2,required"` // 思考结果 - Summary string `json:"summary" thrift:"summary,3,required"` // 思考结果 - StepText string `json:"step_text" thrift:"step_text,4"` // 操作的指令 - DeviceID string `json:"device_id" thrift:"device_id,5"` // 操作的设备id - AgentType string `json:"agent_type" thrift:"agent_type,7"` // 最终决策的agent类型 - ActionResult string `json:"action_result" thrift:"action_result,8"` // 操作结果, 断言=断言结果, 自动化=自动化操作是否成功, 物料构造=物料构造结果 - DeviceInfos *[]WingsDeviceInfo `json:"device_infos,omitempty" thrift:"device_infos,9"` // 所有设备的信息 - ActionParams string `json:"action_params,omitempty" thrift:"action_params,10"` // 历史操作解析结果(断言,自动化,物料构造) - StepTextTrans string `json:"step_text_trans,omitempty" thrift:"step_text_trans,13"` // 归一化的步骤文本(为后续的实际执行解析文本) - OriStepIndex int `json:"ori_step_index,omitempty" thrift:"ori_step_index,14"` // 原本的执行序列(扩展前、目标导向原始文本步骤) -} - // Action parameter structures type WingsActionParams struct { Type string `json:"Type"` @@ -373,11 +315,6 @@ type WingsTextParams struct { // Helper methods -// resetHistory resets the conversation history -func (w *WingsService) resetHistory() { - w.history = []History{} -} - // generateWingsUUID generates a random UUID for LogID func generateWingsUUID() string { return uuid.New().String() @@ -408,29 +345,19 @@ func (w *WingsService) extractScreenshotFromMessage(message *schema.Message) (st } // getDeviceInfoFromContext gets device info from context with fallback -func (w *WingsService) getDeviceInfoFromContext(_ context.Context, screenshot string) []WingsDeviceInfo { - // TODO: Extract device info from context if available - - // Use last history's NowImage as PreImage if history exists - preImage := screenshot - if len(w.history) > 0 && w.history[len(w.history)-1].DeviceInfos != nil && len(*w.history[len(w.history)-1].DeviceInfos) > 0 { - preImage = (*w.history[len(w.history)-1].DeviceInfos)[0].NowImage - } - - // use default device info with optimized PreImage - return []WingsDeviceInfo{ - { - DeviceID: "default-device", - NowImage: screenshot, - PreImage: preImage, - NowLayoutJSON: "", - OperationSystem: "android", - }, +func (w *WingsService) getDeviceInfoFromContext(_ context.Context, screenshot string) WingsDeviceInfo { + // use default device info + return WingsDeviceInfo{ + DeviceID: "default-device", + NowImage: screenshot, + PreImage: screenshot, + NowLayoutJSON: "", + OperationSystem: "android", } } // getDeviceInfoFromScreenshot gets device info from screenshot (for Assert) -func (w *WingsService) getDeviceInfoFromScreenshot(ctx context.Context, screenshot string) []WingsDeviceInfo { +func (w *WingsService) getDeviceInfoFromScreenshot(ctx context.Context, screenshot string) WingsDeviceInfo { return w.getDeviceInfoFromContext(ctx, screenshot) } @@ -463,8 +390,6 @@ func (w *WingsService) callWingsAPI(ctx context.Context, request WingsActionRequ // Set headers httpReq.Header.Set("Content-Type", "application/json") httpReq.Header.Set("Accept", "application/json") - httpReq.Header.Add("x-use-ppe", "1") - httpReq.Header.Add("x-tt-env", "ppe_refactor_merge") // Add authentication headers if using external API if w.accessKey != "" && w.secretKey != "" { @@ -478,7 +403,7 @@ func (w *WingsService) callWingsAPI(ctx context.Context, request WingsActionRequ // Execute HTTP request client := &http.Client{ - Timeout: 120 * time.Second, + Timeout: 60 * time.Second, } resp, err := client.Do(httpReq) @@ -486,9 +411,7 @@ func (w *WingsService) callWingsAPI(ctx context.Context, request WingsActionRequ return nil, errors.Wrap(err, "HTTP request failed") } defer resp.Body.Close() - // resp X-Tt-Logid - logID := resp.Header.Get("X-Tt-Logid") - log.Info().Str("step_text", request.StepText).Str("log_id", logID).Str("biz_id", request.BizId).Str("url", w.apiURL).Msg("call wings api") + // Read response body responseBody, err := io.ReadAll(resp.Body) if err != nil { @@ -511,7 +434,7 @@ func (w *WingsService) callWingsAPI(ctx context.Context, request WingsActionRequ // convertWingsResponseToToolCalls converts Wings API response to tool calls using generic approach func (w *WingsService) convertWingsResponseToToolCalls(actionParamsStr string) ([]schema.ToolCall, error) { - if actionParamsStr == "" || actionParamsStr == "FINISH" { + if actionParamsStr == "" { return []schema.ToolCall{}, nil } diff --git a/uixt/android_device.go b/uixt/android_device.go index 0a47eb95..efb243e8 100644 --- a/uixt/android_device.go +++ b/uixt/android_device.go @@ -240,12 +240,12 @@ func (dev *AndroidDevice) installViaInstaller(apkPath string, args ...string) er return err } // 等待安装完成或超时 - timeout := 8 * time.Minute + timeout := 3 * time.Minute select { case err := <-done: return err case <-time.After(timeout): - return fmt.Errorf("install via installer timed out after %v", timeout) + return fmt.Errorf("installation timed out after %v", timeout) } } diff --git a/uixt/android_test.go b/uixt/android_test.go index c7cb883d..b8a085be 100644 --- a/uixt/android_test.go +++ b/uixt/android_test.go @@ -21,6 +21,11 @@ func setupADBDriverExt(t *testing.T) *XTDriver { Serial: "", // Let it auto-detect the device serial AIOptions: []option.AIServiceOption{ option.WithCVService(option.CVServiceTypeVEDEM), + option.WithLLMConfig( + option.NewLLMServiceConfig(option.DOUBAO_1_5_UI_TARS_250328). + WithPlannerModel(option.WINGS_SERVICE). + WithAsserterModel(option.WINGS_SERVICE), + ), }, } diff --git a/uixt/driver_ext_ai_test.go b/uixt/driver_ext_ai_test.go index 89fd65c8..6ca5d2e3 100644 --- a/uixt/driver_ext_ai_test.go +++ b/uixt/driver_ext_ai_test.go @@ -292,14 +292,31 @@ func TestDriverExt_AIAction(t *testing.T) { func TestDriverExt_AIAction_CompareWithAIAction(t *testing.T) { driver := setupDriverExt(t) - prompt := "[目标导向]向上滑动屏幕2次" + prompt := "点击搜索按钮" // Test both methods with the same prompt - aiResult, aiErr := driver.StartToGoal(context.Background(), prompt) + aiResult, aiErr := driver.AIAction(context.Background(), prompt) // Both should execute without critical errors (may have different implementations) t.Logf("AIAction error: %v", aiErr) - t.Logf("AIAction result: %v", aiResult) + + // If both succeed, compare results + if aiResult != nil { + assert.Equal(t, "action", aiResult.Type, "AIAction result type should be 'action'") + + // Both should have timing information + assert.Greater(t, aiResult.ModelCallElapsed, int64(0), "AIAction should have model call elapsed time") + + // Both should have screenshot information + assert.NotEmpty(t, aiResult.ImagePath, "AIAction should have image path") + + // Compare model names + if aiResult.PlanningResult != nil { + t.Logf("AIAction model: %s", aiResult.PlanningResult.ModelName) + + assert.Equal(t, "wings-api", aiResult.PlanningResult.ModelName, "AIAction should use wings-api") + } + } } // TestDriverExt_AIAction_ErrorHandling tests AIAction error handling