package uixt import ( "bytes" "encoding/base64" "fmt" "image" "image/color" "image/draw" "image/gif" "image/jpeg" "image/png" "math" "os" "path/filepath" "strings" "time" "github.com/pkg/errors" "github.com/rs/zerolog/log" "github.com/httprunner/httprunner/v5/code" "github.com/httprunner/httprunner/v5/internal/builtin" "github.com/httprunner/httprunner/v5/internal/config" "github.com/httprunner/httprunner/v5/uixt/ai" "github.com/httprunner/httprunner/v5/uixt/option" "github.com/httprunner/httprunner/v5/uixt/types" ) type ScreenResult struct { bufSource *bytes.Buffer // raw image buffer bytes ImagePath string `json:"image_path"` // image file path Resolution types.Size `json:"resolution"` UploadedURL string `json:"uploaded_url"` // uploaded image url Texts ai.OCRTexts `json:"texts"` // dumped raw OCRTexts Icons ai.UIResultMap `json:"icons"` // CV 识别的图标 Tags []string `json:"tags"` // tags for image, e.g. ["feed", "ad", "live"] Popup *PopupInfo `json:"popup,omitempty"` } func (s *ScreenResult) FilterTextsByScope(x1, y1, x2, y2 float64) ai.OCRTexts { if x1 > 1 || y1 > 1 || x2 > 1 || y2 > 1 { log.Warn().Msg("x1, y1, x2, y2 should be in percentage, skip filter scope") return s.Texts } return s.Texts.FilterScope(option.AbsScope{ int(float64(s.Resolution.Width) * x1), int(float64(s.Resolution.Height) * y1), int(float64(s.Resolution.Width) * x2), int(float64(s.Resolution.Height) * y2), }) } // GetScreenshotBase64WithSize takes a screenshot, returns the compressed image buffer in base64 format and screen size // Also saves the screenshot to session for report display func (dExt *XTDriver) GetScreenshotBase64WithSize() (compressedBufBase64 string, size types.Size, err error) { // Create screenshot with session saving, minimal CV processing for AI operations screenResult, err := dExt.createScreenshotWithSession( option.WithScreenShotFileName("screenshot_base64"), ) if err != nil { return "", types.Size{}, err } // convert buffer to base64 string screenShotBase64 := "data:image/jpeg;base64," + base64.StdEncoding.EncodeToString(screenResult.bufSource.Bytes()) return screenShotBase64, screenResult.Resolution, nil } // createScreenshotWithSession creates a screenshot with optional OCR processing and saves to session func (dExt *XTDriver) createScreenshotWithSession(opts ...option.ActionOption) (screenResult *ScreenResult, err error) { // get compressed screenshot buffer compressBufSource, err := getScreenShotBuffer(dExt.IDriver) if err != nil { return nil, err } screenshotOptions := option.NewActionOptions(opts...) // save compressed screenshot to file var fileName string optionsList := screenshotOptions.List() if screenshotOptions.ScreenShotFileName != "" { fileName = builtin.GenNameWithTimestamp("%d_" + screenshotOptions.ScreenShotFileName) } else if len(optionsList) != 0 { fileName = builtin.GenNameWithTimestamp("%d_" + strings.Join(optionsList, "_")) } else { fileName = builtin.GenNameWithTimestamp("%d_screenshot") } imagePath := filepath.Join( config.GetConfig().ScreenShotsPath(), fmt.Sprintf("%s.%s", fileName, "jpeg"), ) go func() { err := saveScreenShot(compressBufSource, imagePath) if err != nil { log.Error().Err(err).Msg("save screenshot file failed") } }() windowSize, err := dExt.WindowSize() if err != nil { return nil, errors.Wrap(code.DeviceGetInfoError, err.Error()) } // create basic screen result screenResult = &ScreenResult{ bufSource: compressBufSource, ImagePath: imagePath, Tags: nil, Resolution: windowSize, } logger := log.Debug().Str("imagePath", imagePath) // perform CV processing if any CV-related option is enabled if needsCVProcessing(screenshotOptions) { imageResult, err := dExt.CVService.ReadFromBuffer(compressBufSource, opts...) if err != nil { log.Error().Err(err).Msg("ReadFromBuffer from ImageService failed") return nil, err } if imageResult != nil { screenResult.Texts = imageResult.OCRResult.ToOCRTexts() screenResult.UploadedURL = imageResult.URL screenResult.Icons = imageResult.UIResult if screenshotOptions.ScreenShotWithClosePopups && imageResult.ClosePopupsResult != nil { screenResult.Popup = &PopupInfo{ ClosePopupsResult: imageResult.ClosePopupsResult, PicName: imagePath, PicURL: imageResult.URL, } closeAreas, _ := imageResult.UIResult.FilterUIResults([]string{"close"}) for _, closeArea := range closeAreas { screenResult.Popup.ClosePoints = append(screenResult.Popup.ClosePoints, closeArea.Center()) } } if screenResult.UploadedURL != "" { logger.Str("imageUrl", screenResult.UploadedURL) } } } // save screen result to session session := dExt.GetSession() session.screenResults = append(session.screenResults, screenResult) logger.Msg("log screenshot") return screenResult, nil } // needsCVProcessing determines if CV service processing is required based on screenshot options func needsCVProcessing(options *option.ActionOptions) bool { return options.ScreenShotWithOCR || options.ScreenShotWithUpload || options.ScreenShotWithLiveType || options.ScreenShotWithLivePopularity || len(options.ScreenShotWithUITypes) > 0 || options.ScreenShotWithClosePopups || options.ScreenShotWithOCRCluster != "" } // GetScreenResult takes a screenshot, returns the image recognition result func (dExt *XTDriver) GetScreenResult(opts ...option.ActionOption) (screenResult *ScreenResult, err error) { // Enable OCR processing for GetScreenResult opts = append(opts, option.WithScreenShotOCR(true)) return dExt.createScreenshotWithSession(opts...) } func (dExt *XTDriver) GetScreenTexts(opts ...option.ActionOption) (ocrTexts ai.OCRTexts, err error) { options := option.NewActionOptions(opts...) if options.ScreenShotFileName == "" { opts = append(opts, option.WithScreenShotFileName("get_screen_texts")) } opts = append(opts, option.WithScreenShotOCR(true), option.WithScreenShotUpload(true)) screenResult, err := dExt.GetScreenResult(opts...) if err != nil { return } return screenResult.Texts, nil } func (dExt *XTDriver) FindScreenText(text string, opts ...option.ActionOption) (textRect ai.OCRText, err error) { options := option.NewActionOptions(opts...) if options.ScreenShotFileName == "" { opts = append(opts, option.WithScreenShotFileName(fmt.Sprintf("find_screen_text_%s", text))) } // convert relative scope to absolute scope if options.AbsScope == nil && len(options.Scope) == 4 { windowSize, err := dExt.WindowSize() if err != nil { return ai.OCRText{}, err } absScope := option.AbsScope{ int(options.Scope[0] * float64(windowSize.Width)), int(options.Scope[1] * float64(windowSize.Height)), int(options.Scope[2] * float64(windowSize.Width)), int(options.Scope[3] * float64(windowSize.Height)), } opts = append(opts, option.WithAbsScope( absScope[0], absScope[1], absScope[2], absScope[3])) log.Info().Interface("scope", options.Scope). Interface("absScope", absScope).Msg("convert to abs scope") } ocrTexts, err := dExt.GetScreenTexts(opts...) if err != nil { return } textRect, err = ocrTexts.FindText(text, opts...) if err != nil { log.Warn().Msgf("FindText failed: %s", err.Error()) return } log.Info().Str("text", text). Interface("textRect", textRect).Msgf("FindScreenText success") return textRect, nil } func (dExt *XTDriver) FindUIResult(opts ...option.ActionOption) (uiResult ai.UIResult, err error) { options := option.NewActionOptions(opts...) if options.ScreenShotFileName == "" { opts = append(opts, option.WithScreenShotFileName( fmt.Sprintf("find_ui_result_%s", strings.Join(options.ScreenShotWithUITypes, "_")))) } screenResult, err := dExt.GetScreenResult(opts...) if err != nil { return } uiResults, err := screenResult.Icons.FilterUIResults(options.ScreenShotWithUITypes) if err != nil { return } uiResult, err = uiResults.GetUIResult(opts...) log.Info().Interface("text", options.ScreenShotWithUITypes). Interface("uiResult", uiResult).Msg("FindUIResult success") return } // getScreenShotBuffer takes a screenshot, returns the compressed image buffer func getScreenShotBuffer(driver IDriver) (compressedBufSource *bytes.Buffer, err error) { // take screenshot bufSource, err := driver.ScreenShot() if err != nil { return nil, errors.Wrapf(code.DeviceScreenShotError, "take screenshot failed %v", err) } // compress screenshot compressBufSource, err := compressImageBufferWithOptions(bufSource, false, 800) if err != nil { return nil, errors.Wrapf(code.DeviceScreenShotError, "compress screenshot failed %v", err) } return compressBufSource, nil } // saveScreenShot saves compressed image file with file name func saveScreenShot(raw *bytes.Buffer, screenshotPath string) error { // notice: screenshot data is a stream, so we need to copy it to a new buffer copiedBuffer := &bytes.Buffer{} if _, err := copiedBuffer.Write(raw.Bytes()); err != nil { log.Error().Err(err).Msg("copy screenshot buffer failed") } img, format, err := image.Decode(copiedBuffer) if err != nil { return errors.Wrap(err, "decode screenshot image failed") } file, err := os.Create(screenshotPath) if err != nil { return errors.Wrap(err, "create screenshot image file failed") } defer func() { _ = file.Close() }() // compress image and save to file switch format { case "jpeg": jpegOptions := &jpeg.Options{Quality: 95} err = jpeg.Encode(file, img, jpegOptions) case "png": encoder := png.Encoder{ CompressionLevel: png.BestCompression, } err = encoder.Encode(file, img) case "gif": gifOptions := &gif.Options{ NumColors: 256, } err = gif.Encode(file, img, gifOptions) default: return fmt.Errorf("unsupported image format %s", format) } if err != nil { return errors.Wrap(err, "save image file failed") } var fileSize int64 fileInfo, err := file.Stat() if err == nil { fileSize = fileInfo.Size() } log.Info().Str("path", screenshotPath). Int("rawBytes", raw.Len()).Int64("saveBytes", fileSize). Msg("save screenshot file success") return nil } // compressImageBufferWithOptions compresses image buffer with advanced options func compressImageBufferWithOptions(raw *bytes.Buffer, enableResize bool, maxWidth int) (compressed *bytes.Buffer, err error) { rawSize := raw.Len() // decode image from buffer img, format, err := image.Decode(raw) if err != nil { return nil, err } // Get original image dimensions bounds := img.Bounds() originalWidth := bounds.Dx() originalHeight := bounds.Dy() // Calculate new dimensions for compression if resize is enabled var newWidth, newHeight int var resizedImg image.Image = img if enableResize && originalWidth > maxWidth { ratio := float64(maxWidth) / float64(originalWidth) newWidth = maxWidth newHeight = int(float64(originalHeight) * ratio) resizedImg = resizeImage(img, newWidth, newHeight) } else { newWidth = originalWidth newHeight = originalHeight } // Determine JPEG quality based on image size for optimal compression jpegQuality := 60 // Default quality for better compression if newWidth*newHeight > 500000 { // For very large images, use lower quality jpegQuality = 50 } else if newWidth*newHeight < 100000 { // For small images, use higher quality jpegQuality = 70 } var buf bytes.Buffer switch strings.ToLower(format) { case "jpeg", "jpg": // Use adaptive JPEG compression quality err = jpeg.Encode(&buf, resizedImg, &jpeg.Options{Quality: jpegQuality}) case "png": // Convert PNG to JPEG for better compression err = jpeg.Encode(&buf, resizedImg, &jpeg.Options{Quality: jpegQuality}) case "gif": // Keep GIF format but with reduced colors for better compression err = gif.Encode(&buf, resizedImg, &gif.Options{NumColors: 64}) default: // Default to JPEG for unknown formats err = jpeg.Encode(&buf, resizedImg, &jpeg.Options{Quality: jpegQuality}) } if err != nil { return nil, err } compressedSize := buf.Len() log.Debug(). Int("rawSize", rawSize). Int("originalWidth", originalWidth). Int("originalHeight", originalHeight). Int("newWidth", newWidth). Int("newHeight", newHeight). Int("jpegQuality", jpegQuality). Int("compressedSize", compressedSize). Bool("resized", enableResize && originalWidth > maxWidth). Msg("compress image buffer") // return compressed image buffer return &buf, nil } // resizeImage resizes an image using simple nearest neighbor algorithm func resizeImage(src image.Image, width, height int) image.Image { srcBounds := src.Bounds() srcWidth := srcBounds.Dx() srcHeight := srcBounds.Dy() // Create a new image with the target dimensions dst := image.NewRGBA(image.Rect(0, 0, width, height)) // Simple nearest neighbor resizing for y := 0; y < height; y++ { for x := 0; x < width; x++ { // Map destination coordinates to source coordinates srcX := x * srcWidth / width srcY := y * srcHeight / height // Ensure we don't go out of bounds if srcX >= srcWidth { srcX = srcWidth - 1 } if srcY >= srcHeight { srcY = srcHeight - 1 } // Copy pixel from source to destination dst.Set(x, y, src.At(srcBounds.Min.X+srcX, srcBounds.Min.Y+srcY)) } } return dst } // CompressImageFile compresses an image file and returns the compressed data func CompressImageFile(imagePath string, enableResize bool, maxWidth int) ([]byte, error) { log.Debug().Str("imagePath", imagePath).Bool("enableResize", enableResize). Int("maxWidth", maxWidth).Msg("compress image file") // Read the original image file file, err := os.Open(imagePath) if err != nil { return nil, fmt.Errorf("failed to open image file: %w", err) } defer file.Close() // Read file content into buffer var buf bytes.Buffer _, err = buf.ReadFrom(file) if err != nil { return nil, fmt.Errorf("failed to read image file: %w", err) } // Compress using the buffer compression function compressedBuf, err := compressImageBufferWithOptions(&buf, enableResize, maxWidth) if err != nil { return nil, fmt.Errorf("failed to compress image: %w", err) } return compressedBuf.Bytes(), nil } // MarkUIOperation add operation mark for UI operation func MarkUIOperation(driver IDriver, actionType option.ActionName, actionCoordinates []float64) error { if actionType == "" || len(actionCoordinates) == 0 { return nil } start := time.Now() // get screenshot compressedBufSource, err := driver.ScreenShot() if err != nil { return err } // create screenshot save path timestamp := builtin.GenNameWithTimestamp("%d") imagePath := filepath.Join( config.GetConfig().ScreenShotsPath(), fmt.Sprintf("action_%s_pre_%s.png", timestamp, actionType), ) if actionType == option.ACTION_TapAbsXY || actionType == option.ACTION_DoubleTapXY { if len(actionCoordinates) != 2 { return fmt.Errorf("invalid tap action coordinates: %v", actionCoordinates) } x, y := actionCoordinates[0], actionCoordinates[1] point := image.Point{X: int(x), Y: int(y)} err = SaveImageWithCircleMarker(compressedBufSource, point, imagePath) } else if actionType == option.ACTION_SwipeDirection || actionType == option.ACTION_SwipeCoordinate || actionType == option.ACTION_Drag { if len(actionCoordinates) != 4 { return fmt.Errorf("invalid swipe action coordinates: %v", actionCoordinates) } fromX, fromY := actionCoordinates[0], actionCoordinates[1] toX, toY := actionCoordinates[2], actionCoordinates[3] from := image.Point{X: int(fromX), Y: int(fromY)} to := image.Point{X: int(toX), Y: int(toY)} err = SaveImageWithArrowMarker(compressedBufSource, from, to, imagePath) } if err != nil { log.Error().Err(err). Int64("duration(ms)", time.Since(start).Milliseconds()). Msg("mark UI operation failed") return err } if imagePath != "" { log.Info().Str("operation", string(actionType)). Str("imagePath", imagePath). Int64("duration(ms)", time.Since(start).Milliseconds()). Msg("mark UI operation success") // save screenshot to session session := driver.GetSession() session.screenResults = append(session.screenResults, &ScreenResult{ bufSource: compressedBufSource, ImagePath: imagePath, }) } return nil } // SaveImageWithCircleMarker saves an image with circle marker func SaveImageWithCircleMarker(imgBuf *bytes.Buffer, point image.Point, outputPath string) error { img, _, err := image.Decode(imgBuf) if err != nil { return fmt.Errorf("failed to decode image data: %w", err) } bounds := img.Bounds() rgba := image.NewRGBA(bounds) draw.Draw(rgba, bounds, img, bounds.Min, draw.Src) // draw a red circle at the tap point centerX := point.X centerY := point.Y radius := 20 lineWidth := 5 red := color.RGBA{255, 0, 0, 255} for angle := 0.0; angle < 2*math.Pi; angle += 0.01 { for w := 0; w < lineWidth; w++ { r := float64(radius - w) x := int(float64(centerX) + r*math.Cos(angle)) y := int(float64(centerY) + r*math.Sin(angle)) if x >= 0 && x < bounds.Max.X && y >= 0 && y < bounds.Max.Y { rgba.Set(x, y, red) } } } outFile, err := os.Create(outputPath) if err != nil { return fmt.Errorf("failed to create output file: %w", err) } defer outFile.Close() if err := png.Encode(outFile, rgba); err != nil { return fmt.Errorf("failed to encode and save image: %w", err) } return nil } // SaveImageWithArrowMarker saves an image with an arrow marker func SaveImageWithArrowMarker(imgBuf *bytes.Buffer, from, to image.Point, outputPath string) error { img, _, err := image.Decode(imgBuf) if err != nil { return fmt.Errorf("failed to decode image data: %w", err) } bounds := img.Bounds() rgba := image.NewRGBA(bounds) draw.Draw(rgba, bounds, img, bounds.Min, draw.Src) drawArrow(rgba, from, to, color.RGBA{255, 0, 0, 255}, 5) outFile, err := os.Create(outputPath) if err != nil { return fmt.Errorf("failed to create output file: %w", err) } defer outFile.Close() if err := png.Encode(outFile, rgba); err != nil { return fmt.Errorf("failed to encode and save image: %w", err) } return nil } // drawArrow draws an arrow from 'from' to 'to' on the image func drawArrow(rgba *image.RGBA, from, to image.Point, color color.RGBA, lineWidth int) { bounds := rgba.Bounds() dx, dy := to.X-from.X, to.Y-from.Y steps := int(math.Sqrt(float64(dx*dx + dy*dy))) if steps == 0 { steps = 1 } stepX, stepY := float64(dx)/float64(steps), float64(dy)/float64(steps) // main line for i := 0; i < steps; i++ { x := int(float64(from.X) + stepX*float64(i)) y := int(float64(from.Y) + stepY*float64(i)) for w := 0; w < lineWidth; w++ { offsetX, offsetY := 0, 0 if math.Abs(stepX) > math.Abs(stepY) { offsetY = w - lineWidth/2 } else { offsetX = w - lineWidth/2 } drawX, drawY := x+offsetX, y+offsetY if drawX >= 0 && drawX < bounds.Max.X && drawY >= 0 && drawY < bounds.Max.Y { rgba.Set(drawX, drawY, color) } } } // arrow head arrowLength := float64(steps) * 0.15 if arrowLength < 10 { arrowLength = 10 } else if arrowLength > 30 { arrowLength = 30 } head := calculateArrowHead(float64(from.X), float64(from.Y), float64(to.X), float64(to.Y), arrowLength) if head != nil { for _, point := range head[:2] { drawLineInImage(rgba, to.X, to.Y, int(point.X), int(point.Y), color, lineWidth, bounds) } for _, point := range head[1:] { drawLineInImage(rgba, to.X, to.Y, int(point.X), int(point.Y), color, lineWidth, bounds) } } } // calculateArrowHead calculates the endpoint and arrowhead coordinates func calculateArrowHead(fromX, fromY, toX, toY float64, arrowLength float64) []struct{ X, Y float64 } { // calculate direction vector dx, dy := toX-fromX, toY-fromY // calculate distance length := math.Sqrt(dx*dx + dy*dy) if length < 1e-6 { return nil } // unit vector dx, dy = dx/length, dy/length // calculate orthogonal vector of arrow direction (counterclockwise 90 degrees) orthX, orthY := -dy, dx // calculate two wing points of arrow headWidth := arrowLength * 0.5 backX, backY := toX-dx*arrowLength, toY-dy*arrowLength // two wing points of arrow leftWingX, leftWingY := backX+orthX*headWidth, backY+orthY*headWidth rightWingX, rightWingY := backX-orthX*headWidth, backY-orthY*headWidth return []struct{ X, Y float64 }{ {leftWingX, leftWingY}, {toX, toY}, {rightWingX, rightWingY}, } } // drawLineInImage draws a line on the image func drawLineInImage(img *image.RGBA, x0, y0, x1, y1 int, lineColor color.RGBA, lineWidth int, bounds image.Rectangle) { // use Bresenham algorithm to draw line dx, dy := math.Abs(float64(x1-x0)), math.Abs(float64(y1-y0)) sx, sy := 1, 1 if x0 >= x1 { sx = -1 } if y0 >= y1 { sy = -1 } err := dx - dy for { // draw point (consider line width) for w := 0; w < lineWidth; w++ { offsetX, offsetY := 0, 0 // decide offset direction based on line angle if dx > dy { // more horizontal line offsetY = w - lineWidth/2 } else { // more vertical line offsetX = w - lineWidth/2 } drawX, drawY := x0+offsetX, y0+offsetY if drawX >= 0 && drawX < bounds.Max.X && drawY >= 0 && drawY < bounds.Max.Y { img.Set(drawX, drawY, lineColor) } } // end of line if x0 == x1 && y0 == y1 { break } // calculate next point e2 := 2 * err if e2 > -dy { err = err - dy x0 = x0 + sx } if e2 < dx { err = err + dx y0 = y0 + sy } } }