diff --git a/demo.json b/demo.json deleted file mode 100644 index 501de7ae..00000000 --- a/demo.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "config": { - "name": "demo" - }, - "teststeps": [ - { - "name": "获取剪贴板", - "android": { - "os_type": "android", - "actions": [ - { - "method": "push_albums", - "params": { - "fileUrl": "https://tosv.byted.org/obj/wzh-shoots-ios-pkg/video/recordings/b4b3d1b3902ec0920db2740b87b97a42b9c80f0f_end_1754473925.mp4", - "clearBefore": true - }, - "options": {}, - "identifier": "action_push_image" - } - ] - } - } - ] -} diff --git a/uixt/ai/wings_service.go b/uixt/ai/wings_service.go index 85bdd779..4424f98a 100644 --- a/uixt/ai/wings_service.go +++ b/uixt/ai/wings_service.go @@ -163,11 +163,8 @@ func (w *WingsService) Assert(ctx context.Context, opts *AssertOptions) (*Assert return nil, errors.Wrap(err, "validate assertion parameters failed") } - // Clean screenshot data URL prefix - cleanScreenshot := w.cleanScreenshotDataURL(opts.Screenshot) - // Get device info from context (if available) - deviceInfos := w.getDeviceInfoFromScreenshot(ctx, cleanScreenshot) + deviceInfos := w.getDeviceInfoFromScreenshot(ctx, opts.Screenshot) // Prepare Wings API request for assertion apiRequest := WingsActionRequest{ @@ -387,16 +384,7 @@ func (w *WingsService) extractScreenshotFromMessage(message *schema.Message) (st for _, content := range message.MultiContent { if content.Type == schema.ChatMessagePartTypeImageURL && content.ImageURL != nil { - // Extract base64 data from data URL - screenshot := content.ImageURL.URL - if strings.HasPrefix(screenshot, "data:image/") { - // Remove data URL prefix - parts := strings.Split(screenshot, ",") - if len(parts) == 2 { - return parts[1], nil - } - } - return screenshot, nil + return content.ImageURL.URL, nil } } @@ -408,17 +396,17 @@ func (w *WingsService) getDeviceInfoFromContext(_ context.Context, screenshot st // TODO: Extract device info from context if available // Use last history's NowImage as PreImage if history exists - preImage := screenshot + preImageUrl := screenshot if len(w.history) > 0 && w.history[len(w.history)-1].DeviceInfos != nil && len(*w.history[len(w.history)-1].DeviceInfos) > 0 { - preImage = (*w.history[len(w.history)-1].DeviceInfos)[0].NowImage + preImageUrl = (*w.history[len(w.history)-1].DeviceInfos)[0].NowImageUrl } // use default device info with optimized PreImage return []WingsDeviceInfo{ { DeviceID: "default-device", - NowImage: screenshot, - PreImage: preImage, + NowImageUrl: screenshot, + PreImageUrl: preImageUrl, NowLayoutJSON: "", OperationSystem: "android", }, @@ -484,7 +472,7 @@ func (w *WingsService) callWingsAPI(ctx context.Context, request WingsActionRequ defer resp.Body.Close() logID := resp.Header.Get("X-Tt-Logid") - log.Info().Str("step_text", request.StepText).Str("log_id", logID).Str("biz_id", request.BizId).Str("url", w.apiURL).Msg("call wings api") + log.Info().Str("step_text", request.StepText).Str("image_url", request.DeviceInfos[0].NowImageUrl).Str("log_id", logID).Str("biz_id", request.BizId).Str("url", w.apiURL).Msg("call wings api") // Read response body responseBody, err := io.ReadAll(resp.Body) diff --git a/uixt/driver_ext_ai.go b/uixt/driver_ext_ai.go index 185f41fc..c849e6a6 100644 --- a/uixt/driver_ext_ai.go +++ b/uixt/driver_ext_ai.go @@ -251,6 +251,7 @@ func (dExt *XTDriver) AIAssert(assertion string, opts ...option.ActionOption) (* screenResult, err := dExt.GetScreenResult( option.WithScreenShotFileName("ai_assert"), option.WithScreenShotBase64(true), + option.WithScreenShotUpload(true), ) if err != nil { return nil, err @@ -267,7 +268,7 @@ func (dExt *XTDriver) AIAssert(assertion string, opts ...option.ActionOption) (* modelCallStartTime := time.Now() assertOpts := &ai.AssertOptions{ Assertion: assertion, - Screenshot: screenResult.Base64, + Screenshot: screenResult.UploadedURL, Size: screenResult.Resolution, } result, err := dExt.LLMService.Assert(context.Background(), assertOpts) @@ -302,6 +303,7 @@ func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts .. screenResult, err := dExt.GetScreenResult( option.WithScreenShotFileName("ai_planning"), option.WithScreenShotBase64(true), + option.WithScreenShotUpload(true), ) if err != nil { return nil, err @@ -321,7 +323,7 @@ func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts .. { Type: schema.ChatMessagePartTypeImageURL, ImageURL: &schema.ChatMessageImageURL{ - URL: screenResult.Base64, + URL: screenResult.UploadedURL, }, }, }, diff --git a/uixt/driver_ext_screenshot.go b/uixt/driver_ext_screenshot.go index d84c4e1b..b1662d3b 100644 --- a/uixt/driver_ext_screenshot.go +++ b/uixt/driver_ext_screenshot.go @@ -134,6 +134,19 @@ func (dExt *XTDriver) GetScreenResult(opts ...option.ActionOption) (screenResult } } + if screenshotOptions.ScreenShotWithUpload { + // Upload the screenshot to the server + if screenResult.ImagePath != "" && screenResult.bufSource != nil { + url, err := uploadScreenshot(screenResult.ImagePath, screenResult.bufSource) + if err != nil { + log.Warn().Err(err).Str("imagePath", screenResult.ImagePath).Msg("failed to upload screenshot") + } else if url != "" { + screenResult.UploadedURL = url + log.Info().Str("uploadedUrl", url).Msg("screenshot uploaded successfully") + } + } + } + // save screen result to session session := dExt.GetSession() session.screenResults = append(session.screenResults, screenResult) diff --git a/uixt/driver_utils.go b/uixt/driver_utils.go index d4c80237..f583c84d 100644 --- a/uixt/driver_utils.go +++ b/uixt/driver_utils.go @@ -1,11 +1,14 @@ package uixt import ( + "bytes" "context" "crypto/md5" + "crypto/tls" "fmt" "io" "math/rand/v2" + "mime/multipart" "net/http" "os" "path/filepath" @@ -18,6 +21,7 @@ import ( "github.com/httprunner/httprunner/v5/code" "github.com/httprunner/httprunner/v5/internal/builtin" "github.com/httprunner/httprunner/v5/internal/config" + "github.com/httprunner/httprunner/v5/internal/json" "github.com/httprunner/httprunner/v5/uixt/option" ) @@ -381,3 +385,79 @@ func DownloadFileByUrl(fileUrl string) (filePath string, err error) { log.Info().Str("filePath", filePath).Msg("download file success") return filePath, nil } + +// uploadScreenshot uploads a screenshot to the server and returns the URL +func uploadScreenshot(imagePath string, imageBuffer *bytes.Buffer) (string, error) { + // Create a new buffer for the multipart form + var requestBody bytes.Buffer + writer := multipart.NewWriter(&requestBody) + + // Create a form file field + fileField, err := writer.CreateFormFile("file", filepath.Base(imagePath)) + if err != nil { + return "", errors.Wrap(err, "failed to create form file") + } + + // Copy the image buffer to the form file field + if _, err := io.Copy(fileField, bytes.NewReader(imageBuffer.Bytes())); err != nil { + return "", errors.Wrap(err, "failed to copy image data") + } + + // Close the multipart writer + if err := writer.Close(); err != nil { + return "", errors.Wrap(err, "failed to close multipart writer") + } + + // Create the HTTP request + uploadURL := "https://gtf-eapi-cn.bytedance.com/cn/upload/xxx" + req, err := http.NewRequest("POST", uploadURL, &requestBody) + if err != nil { + return "", errors.Wrap(code.UploadFailed, err.Error()) + } + + // Set headers + req.Header.Set("Content-Type", writer.FormDataContentType()) + req.Header.Set("accessKey", "ies.vedem.video") + req.Header.Set("token", "***REMOVED***") + + // Create HTTP client with HTTP/1.1 support + client := &http.Client{ + Transport: &http.Transport{ + TLSNextProto: make(map[string]func(authority string, c *tls.Conn) http.RoundTripper), + }, + } + + // Send the request + log.Debug().Str("url", uploadURL).Str("imagePath", imagePath).Msg("uploading screenshot") + resp, err := client.Do(req) + if err != nil { + return "", errors.Wrap(code.UploadFailed, err.Error()) + } + defer resp.Body.Close() + + // Read the response body + respBody, err := io.ReadAll(resp.Body) + if err != nil { + return "", errors.Wrap(code.UploadFailed, err.Error()) + } + + // Parse the response JSON + var result struct { + StatusCode int `json:"StatusCode"` + Data interface{} `json:"Data"` + URL string `json:"URL"` + } + + if err := json.Unmarshal(respBody, &result); err != nil { + log.Warn().Err(err).Str("response", string(respBody)).Msg("failed to parse upload response") + return "", errors.Wrap(code.UploadFailed, "failed to parse response JSON") + } + + // Check if the upload was successful + if result.StatusCode != 0 { + return "", fmt.Errorf("upload failed with status code: %d", result.StatusCode) + } + + log.Debug().Str("url", result.URL).Msg("screenshot uploaded successfully") + return result.URL, nil +}