feat: mark tap/swipe UI operation

This commit is contained in:
lilong.129
2025-05-05 16:31:13 +08:00
parent 6569121d5d
commit cfc71819d2
12 changed files with 438 additions and 88 deletions

31
docs/uixt/ui_mark.md Normal file
View File

@@ -0,0 +1,31 @@
### UI操作标注
针对 UI 操作的位置进行标注,帮助用户直观地了解操作发生的位置。
#### 功能说明
- 点击操作tap使用红色矩形框标注点击位置
- 滑动操作swipe使用红色箭头标注滑动方向从起始点指向结束点
#### 使用方法
只需在操作函数中添加 `WithMarkOperationEnabled(true)` 选项即可启用操作标注功能:
```go
// 启用操作标注功能
opts := []option.ActionOption{option.WithMarkOperationEnabled(true)}
// 执行点击操作,会自动用红色矩形标注点击位置
err := driver.TapXY(0.5, 0.5, opts...)
// 执行滑动操作,会自动用红色箭头标注滑动方向
err = driver.Swipe(0.2, 0.5, 0.8, 0.5, opts...)
// 可以同时使用其他选项
opts = append(opts, option.WithScreenShotFileName("custom_name"))
err = driver.TapXY(0.3, 0.7, opts...)
```
#### 标注结果
标注后的图片会保存在截图目录中,文件名格式为:`{timestamp}_{tap|swipe}_marked.png`

View File

@@ -1 +1 @@
v5.0.0-beta-2504301621
v5.0.0-beta-2505051631

View File

@@ -313,8 +313,15 @@ func (ad *ADBDriver) TapAbsXY(x, y float64, opts ...option.ActionOption) error {
// adb shell input tap x y
xStr := fmt.Sprintf("%.1f", x)
yStr := fmt.Sprintf("%.1f", y)
_, err := ad.runShellCommand(
"input", "tap", xStr, yStr)
_, err := ad.runShellCommand("input", "tap", xStr, yStr)
// mark UI operation
if actionOptions.MarkOperationEnabled {
if markErr := MarkUIOperation(ad, ACTION_TapAbsXY, []float64{x, y}); markErr != nil {
log.Warn().Err(markErr).Msg("Failed to mark tap operation")
}
}
if err != nil {
return errors.Wrap(err, fmt.Sprintf("tap <%s, %s> failed", xStr, yStr))
}
@@ -394,6 +401,14 @@ func (ad *ADBDriver) Drag(fromX, fromY, toX, toY float64, opts ...option.ActionO
fmt.Sprintf("%.1f", toX), fmt.Sprintf("%.1f", toY),
fmt.Sprintf("%d", int(duration)),
)
// mark UI operation
if actionOptions.MarkOperationEnabled {
if markErr := MarkUIOperation(ad, ACTION_Drag, []float64{fromX, fromY, toX, toY}); markErr != nil {
log.Warn().Err(markErr).Msg("Failed to mark drag operation")
}
}
if err != nil {
return errors.Wrap(err, "adb drag failed")
}
@@ -417,6 +432,14 @@ func (ad *ADBDriver) Swipe(fromX, fromY, toX, toY float64, opts ...option.Action
fmt.Sprintf("%.1f", fromX), fmt.Sprintf("%.1f", fromY),
fmt.Sprintf("%.1f", toX), fmt.Sprintf("%.1f", toY),
)
// mark UI operation
if actionOptions.MarkOperationEnabled {
if markErr := MarkUIOperation(ad, ACTION_Swipe, []float64{fromX, fromY, toX, toY}); markErr != nil {
log.Warn().Err(markErr).Msg("Failed to mark swipe operation")
}
}
if err != nil {
return errors.Wrap(err, "adb swipe failed")
}

View File

@@ -324,6 +324,14 @@ func (ud *UIA2Driver) TapAbsXY(x, y float64, opts ...option.ActionOption) error
urlStr := fmt.Sprintf("/session/%s/actions/tap", ud.Session.ID)
_, err := ud.Session.POST(data, urlStr)
// mark UI operation
if actionOptions.MarkOperationEnabled {
if markErr := MarkUIOperation(ud, ACTION_TapAbsXY, []float64{x, y}); markErr != nil {
log.Warn().Err(markErr).Msg("Failed to mark tap operation")
}
}
return err
}
@@ -374,6 +382,14 @@ func (ud *UIA2Driver) Drag(fromX, fromY, toX, toY float64, opts ...option.Action
// register(postHandler, new Drag("/wd/hub/session/:sessionId/touch/drag"))
urlStr := fmt.Sprintf("/session/%s/touch/drag", ud.Session.ID)
_, err = ud.Session.POST(data, urlStr)
// mark UI operation
if actionOptions.MarkOperationEnabled {
if markErr := MarkUIOperation(ud, ACTION_Drag, []float64{fromX, fromY, toX, toY}); markErr != nil {
log.Warn().Err(markErr).Msg("Failed to mark drag operation")
}
}
return err
}
@@ -417,6 +433,14 @@ func (ud *UIA2Driver) Swipe(fromX, fromY, toX, toY float64, opts ...option.Actio
urlStr := fmt.Sprintf("/session/%s/actions/swipe", ud.Session.ID)
_, err = ud.Session.POST(data, urlStr)
// mark UI operation
if actionOptions.MarkOperationEnabled {
if markErr := MarkUIOperation(ud, ACTION_Swipe, []float64{fromX, fromY, toX, toY}); markErr != nil {
log.Warn().Err(markErr).Msg("Failed to mark swipe operation")
}
}
return err
}

View File

@@ -118,6 +118,14 @@ func (wd *BrowserDriver) Drag(fromX, fromY, toX, toY float64, options ...option.
}
_, err = wd.HttpPOST(data, wd.sessionId, "ui/drag")
// mark UI operation
if actionOptions.MarkOperationEnabled {
if markErr := MarkUIOperation(wd, ACTION_Drag, []float64{fromX, fromY, toX, toY}); markErr != nil {
log.Warn().Err(markErr).Msg("Failed to mark drag operation")
}
}
return
}
@@ -529,6 +537,14 @@ func (wd *BrowserDriver) TapFloat(x, y float64, options ...option.ActionOption)
"duration": duration,
}
_, err := wd.HttpPOST(data, wd.sessionId, "ui/tap")
// mark UI operation
if actionOptions.MarkOperationEnabled {
if markErr := MarkUIOperation(wd, ACTION_TapAbsXY, []float64{x, y}); markErr != nil {
log.Warn().Err(markErr).Msg("Failed to mark tap operation")
}
}
return err
}

View File

@@ -42,6 +42,7 @@ const (
ACTION_TapByCV ActionMethod = "tap_cv"
ACTION_DoubleTapXY ActionMethod = "double_tap_xy"
ACTION_Swipe ActionMethod = "swipe"
ACTION_Drag ActionMethod = "drag"
ACTION_Input ActionMethod = "input"
ACTION_Back ActionMethod = "back"
ACTION_KeyCode ActionMethod = "keycode"

View File

@@ -2,7 +2,6 @@ package uixt
import (
"bytes"
"encoding/base64"
"fmt"
"image"
"image/color"
@@ -10,6 +9,7 @@ import (
"image/gif"
"image/jpeg"
"image/png"
"math"
"os"
"path/filepath"
"strings"
@@ -298,69 +298,243 @@ func compressImageBuffer(raw *bytes.Buffer) (compressed *bytes.Buffer, err error
return &buf, nil
}
// SavePositionImg saves an image with position markers
func SavePositionImg(params struct {
InputImgBase64 string
Rect struct {
X float64
Y float64
// MarkUIOperation add operation mark for UI operation
func MarkUIOperation(driver IDriver, actionType ActionMethod, actionCoordinates []float64) error {
if actionType == "" || len(actionCoordinates) == 0 {
return nil
}
OutputPath string
}) error {
// 解码Base64图像
imgData := params.InputImgBase64
// 如果包含了数据URL前缀去掉它
if strings.HasPrefix(imgData, "data:image/") {
parts := strings.Split(imgData, ",")
if len(parts) > 1 {
imgData = parts[1]
// get screenshot
compressedBufSource, err := driver.ScreenShot()
if err != nil {
return err
}
// create screenshot save path
timestamp := builtin.GenNameWithTimestamp("%d")
var imagePath string
if actionType == ACTION_TapAbsXY {
if len(actionCoordinates) != 2 {
return fmt.Errorf("invalid tap action coordinates: %v", actionCoordinates)
}
}
// 解码Base64
unbased, err := base64.StdEncoding.DecodeString(imgData)
if err != nil {
return fmt.Errorf("无法解码Base64图像: %w", err)
}
// 解码图像
reader := bytes.NewReader(unbased)
img, _, err := image.Decode(reader)
if err != nil {
return fmt.Errorf("无法解码图像数据: %w", err)
}
// 创建一个可以在其上绘制的图像
bounds := img.Bounds()
rgba := image.NewRGBA(bounds)
draw.Draw(rgba, bounds, img, bounds.Min, draw.Src)
// 在点击/拖动位置绘制标记
markRadius := 30
x, y := int(params.Rect.X), int(params.Rect.Y)
// 绘制红色圆圈
for i := -markRadius; i <= markRadius; i++ {
for j := -markRadius; j <= markRadius; j++ {
if i*i+j*j <= markRadius*markRadius {
if x+i >= 0 && x+i < bounds.Max.X && y+j >= 0 && y+j < bounds.Max.Y {
rgba.Set(x+i, y+j, color.RGBA{255, 0, 0, 255})
}
}
imagePath = filepath.Join(
config.GetConfig().ScreenShotsPath,
fmt.Sprintf("%s_tap_marked.png", timestamp),
)
x, y := actionCoordinates[0], actionCoordinates[1]
point := image.Point{X: int(x), Y: int(y)}
err = SaveImageWithCircleMarker(compressedBufSource, point, imagePath)
} else if actionType == ACTION_Swipe {
if len(actionCoordinates) != 4 {
return fmt.Errorf("invalid swipe action coordinates: %v", actionCoordinates)
}
imagePath = filepath.Join(
config.GetConfig().ScreenShotsPath,
fmt.Sprintf("%s_swipe_marked.png", timestamp),
)
fromX, fromY := actionCoordinates[0], actionCoordinates[1]
toX, toY := actionCoordinates[2], actionCoordinates[3]
from := image.Point{X: int(fromX), Y: int(fromY)}
to := image.Point{X: int(toX), Y: int(toY)}
err = SaveImageWithArrowMarker(compressedBufSource, from, to, imagePath)
}
// 保存图像
outFile, err := os.Create(params.OutputPath)
if err != nil {
return fmt.Errorf("无法创建输出文件: %w", err)
log.Error().Err(err).Msg("mark UI operation failed")
return err
}
defer outFile.Close()
// 编码为PNG并保存
if err := png.Encode(outFile, rgba); err != nil {
return fmt.Errorf("无法编码和保存图像: %w", err)
if imagePath != "" {
log.Info().Str("operation", string(actionType)).
Str("imagePath", imagePath).
Msg("mark UI operation success")
}
return nil
}
// SaveImageWithCircleMarker saves an image with circle marker
func SaveImageWithCircleMarker(imgBuf *bytes.Buffer, point image.Point, outputPath string) error {
img, _, err := image.Decode(imgBuf)
if err != nil {
return fmt.Errorf("failed to decode image data: %w", err)
}
bounds := img.Bounds()
rgba := image.NewRGBA(bounds)
draw.Draw(rgba, bounds, img, bounds.Min, draw.Src)
// draw a red circle at the tap point
centerX := point.X
centerY := point.Y
radius := 20
lineWidth := 5
red := color.RGBA{255, 0, 0, 255}
for angle := 0.0; angle < 2*math.Pi; angle += 0.01 {
for w := 0; w < lineWidth; w++ {
r := float64(radius - w)
x := int(float64(centerX) + r*math.Cos(angle))
y := int(float64(centerY) + r*math.Sin(angle))
if x >= 0 && x < bounds.Max.X && y >= 0 && y < bounds.Max.Y {
rgba.Set(x, y, red)
}
}
}
outFile, err := os.Create(outputPath)
if err != nil {
return fmt.Errorf("failed to create output file: %w", err)
}
defer outFile.Close()
if err := png.Encode(outFile, rgba); err != nil {
return fmt.Errorf("failed to encode and save image: %w", err)
}
return nil
}
// SaveImageWithArrowMarker saves an image with an arrow marker
func SaveImageWithArrowMarker(imgBuf *bytes.Buffer, from, to image.Point, outputPath string) error {
img, _, err := image.Decode(imgBuf)
if err != nil {
return fmt.Errorf("failed to decode image data: %w", err)
}
bounds := img.Bounds()
rgba := image.NewRGBA(bounds)
draw.Draw(rgba, bounds, img, bounds.Min, draw.Src)
drawArrow(rgba, from, to, color.RGBA{255, 0, 0, 255}, 5)
outFile, err := os.Create(outputPath)
if err != nil {
return fmt.Errorf("failed to create output file: %w", err)
}
defer outFile.Close()
if err := png.Encode(outFile, rgba); err != nil {
return fmt.Errorf("failed to encode and save image: %w", err)
}
return nil
}
// drawArrow draws an arrow from 'from' to 'to' on the image
func drawArrow(rgba *image.RGBA, from, to image.Point, color color.RGBA, lineWidth int) {
bounds := rgba.Bounds()
dx, dy := to.X-from.X, to.Y-from.Y
steps := int(math.Sqrt(float64(dx*dx + dy*dy)))
if steps == 0 {
steps = 1
}
stepX, stepY := float64(dx)/float64(steps), float64(dy)/float64(steps)
// main line
for i := 0; i < steps; i++ {
x := int(float64(from.X) + stepX*float64(i))
y := int(float64(from.Y) + stepY*float64(i))
for w := 0; w < lineWidth; w++ {
offsetX, offsetY := 0, 0
if math.Abs(stepX) > math.Abs(stepY) {
offsetY = w - lineWidth/2
} else {
offsetX = w - lineWidth/2
}
drawX, drawY := x+offsetX, y+offsetY
if drawX >= 0 && drawX < bounds.Max.X && drawY >= 0 && drawY < bounds.Max.Y {
rgba.Set(drawX, drawY, color)
}
}
}
// arrow head
arrowLength := float64(steps) * 0.15
if arrowLength < 10 {
arrowLength = 10
} else if arrowLength > 30 {
arrowLength = 30
}
head := calculateArrowHead(float64(from.X), float64(from.Y), float64(to.X), float64(to.Y), arrowLength)
if head != nil {
for _, point := range head[:2] {
drawLineInImage(rgba, to.X, to.Y, int(point.X), int(point.Y), color, lineWidth, bounds)
}
for _, point := range head[1:] {
drawLineInImage(rgba, to.X, to.Y, int(point.X), int(point.Y), color, lineWidth, bounds)
}
}
}
// calculateArrowHead calculates the endpoint and arrowhead coordinates
func calculateArrowHead(fromX, fromY, toX, toY float64, arrowLength float64) []struct{ X, Y float64 } {
// calculate direction vector
dx, dy := toX-fromX, toY-fromY
// calculate distance
length := math.Sqrt(dx*dx + dy*dy)
if length < 1e-6 {
return nil
}
// unit vector
dx, dy = dx/length, dy/length
// calculate orthogonal vector of arrow direction (counterclockwise 90 degrees)
orthX, orthY := -dy, dx
// calculate two wing points of arrow
headWidth := arrowLength * 0.5
backX, backY := toX-dx*arrowLength, toY-dy*arrowLength
// two wing points of arrow
leftWingX, leftWingY := backX+orthX*headWidth, backY+orthY*headWidth
rightWingX, rightWingY := backX-orthX*headWidth, backY-orthY*headWidth
return []struct{ X, Y float64 }{
{leftWingX, leftWingY},
{toX, toY},
{rightWingX, rightWingY},
}
}
// drawLineInImage draws a line on the image
func drawLineInImage(img *image.RGBA, x0, y0, x1, y1 int, lineColor color.RGBA, lineWidth int, bounds image.Rectangle) {
// use Bresenham algorithm to draw line
dx, dy := math.Abs(float64(x1-x0)), math.Abs(float64(y1-y0))
sx, sy := 1, 1
if x0 >= x1 {
sx = -1
}
if y0 >= y1 {
sy = -1
}
err := dx - dy
for {
// draw point (consider line width)
for w := 0; w < lineWidth; w++ {
offsetX, offsetY := 0, 0
// decide offset direction based on line angle
if dx > dy {
// more horizontal line
offsetY = w - lineWidth/2
} else {
// more vertical line
offsetX = w - lineWidth/2
}
drawX, drawY := x0+offsetX, y0+offsetY
if drawX >= 0 && drawX < bounds.Max.X && drawY >= 0 && drawY < bounds.Max.Y {
img.Set(drawX, drawY, lineColor)
}
}
// end of line
if x0 == x1 && y0 == y1 {
break
}
// calculate next point
e2 := 2 * err
if e2 > -dy {
err = err - dy
x0 = x0 + sx
}
if e2 < dx {
err = err + dx
y0 = y0 + sy
}
}
}

View File

@@ -3,11 +3,12 @@
package uixt
import (
"bytes"
"image"
"os"
"testing"
"time"
"github.com/httprunner/httprunner/v5/internal/builtin"
"github.com/httprunner/httprunner/v5/uixt/option"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
@@ -254,32 +255,50 @@ func TestDriverExt_Action_Offset(t *testing.T) {
assert.Nil(t, err)
}
func TestSavePositionImg(t *testing.T) {
imageBase64, _, err := builtin.LoadImage("ai/testdata/popup_risk_warning.png")
func TestSaveImageWithCircle(t *testing.T) {
imgBytes, err := os.ReadFile("ai/testdata/llk_1.png")
require.NoError(t, err)
imgBuf := bytes.NewBuffer(imgBytes)
point := image.Point{X: 500, Y: 500}
outputPath := "ai/testdata/output.png"
err = SaveImageWithCircleMarker(imgBuf, point, outputPath)
require.NoError(t, err)
params := struct {
InputImgBase64 string
Rect struct {
X float64
Y float64
}
OutputPath string
}{
InputImgBase64: imageBase64,
Rect: struct {
X float64
Y float64
}{
X: 500,
Y: 500,
},
OutputPath: "ai/testdata/output.png",
}
err = SavePositionImg(params)
require.NoError(t, err)
// cleanup
defer os.Remove(params.OutputPath)
defer os.Remove(outputPath)
}
func TestSaveImageWithArrow(t *testing.T) {
imgBytes, err := os.ReadFile("ai/testdata/llk_1.png")
require.NoError(t, err)
imgBuf := bytes.NewBuffer(imgBytes)
from := image.Point{X: 500, Y: 500}
to := image.Point{X: 1000, Y: 1000}
outputPath := "ai/testdata/output.png"
err = SaveImageWithArrowMarker(imgBuf, from, to, outputPath)
require.NoError(t, err)
defer os.Remove(outputPath)
}
func TestMarkOperation(t *testing.T) {
driver := setupDriverExt(t)
opts := []option.ActionOption{option.WithMarkOperationEnabled(true)}
// tap point
err := driver.TapXY(0.5, 0.5, opts...)
assert.Nil(t, err)
err = driver.TapAbsXY(500, 800, opts...)
assert.Nil(t, err)
// swipe
err = driver.Swipe(0.2, 0.5, 0.8, 0.5, opts...)
assert.Nil(t, err)
err = driver.Swipe(0.3, 0.7, 0.3, 0.3, opts...)
assert.Nil(t, err)
}

View File

@@ -161,8 +161,17 @@ func (hd *HDCDriver) TapAbsXY(x, y float64, opts ...option.ActionOption) error {
startTime := int(time.Now().UnixMilli())
hd.points = append(hd.points, ExportPoint{Start: startTime, End: startTime + 100, Ext: actionOptions.Identifier, RunTime: 100})
}
return hd.uiDriver.InjectGesture(
err := hd.uiDriver.InjectGesture(
ghdc.NewGesture().Start(ghdc.Point{X: int(x), Y: int(y)}).Pause(100))
// mark UI operation
if actionOptions.MarkOperationEnabled {
if markErr := MarkUIOperation(hd, ACTION_TapAbsXY, []float64{x, y}); markErr != nil {
log.Warn().Err(markErr).Msg("Failed to mark tap operation")
}
}
return err
}
func (hd *HDCDriver) DoubleTap(x, y float64, opts ...option.ActionOption) error {
@@ -197,9 +206,18 @@ func (hd *HDCDriver) Swipe(fromX, fromY, toX, toY float64, opts ...option.Action
startTime := int(time.Now().UnixMilli())
hd.points = append(hd.points, ExportPoint{Start: startTime, End: startTime + 100, Ext: actionOptions.Identifier, RunTime: 100})
}
return hd.uiDriver.InjectGesture(
err = hd.uiDriver.InjectGesture(
ghdc.NewGesture().Start(ghdc.Point{X: int(fromX), Y: int(fromY)}).
MoveTo(ghdc.Point{X: int(toX), Y: int(toY)}, duration))
// mark UI operation
if actionOptions.MarkOperationEnabled {
if markErr := MarkUIOperation(hd, ACTION_Swipe, []float64{fromX, fromY, toX, toY}); markErr != nil {
log.Warn().Err(markErr).Msg("Failed to mark swipe operation")
}
}
return err
}
func (hd *HDCDriver) SetIme(ime string) error {

View File

@@ -603,6 +603,14 @@ func (wd *WDADriver) TapAbsXY(x, y float64, opts ...option.ActionOption) error {
urlStr := fmt.Sprintf("/session/%s/wda/tap/0", wd.Session.ID)
_, err := wd.Session.POST(data, urlStr)
// mark UI operation
if actionOptions.MarkOperationEnabled {
if markErr := MarkUIOperation(wd, ACTION_TapAbsXY, []float64{x, y}); markErr != nil {
log.Warn().Err(markErr).Msg("Failed to mark tap operation")
}
}
return err
}
@@ -666,6 +674,14 @@ func (wd *WDADriver) Drag(fromX, fromY, toX, toY float64, opts ...option.ActionO
urlStr := fmt.Sprintf("/session/%s/wda/dragfromtoforduration", wd.Session.ID)
_, err = wd.Session.POST(data, urlStr)
// _, err = wd.Session.POST(data, "/session", wd.Session.ID, "/wda/drag")
// mark UI operation
if actionOptions.MarkOperationEnabled {
if markErr := MarkUIOperation(wd, ACTION_Drag, []float64{fromX, fromY, toX, toY}); markErr != nil {
log.Warn().Err(markErr).Msg("Failed to mark drag operation")
}
}
return err
}

View File

@@ -132,6 +132,7 @@ func (o *ActionOptions) Options() []ActionOption {
options = append(options, o.GetScreenShotOptions()...)
options = append(options, o.GetScreenRecordOptions()...)
options = append(options, o.GetMarkOperationOptions()...)
return options
}

View File

@@ -8,6 +8,7 @@ type ScreenOptions struct {
ScreenShotOptions
ScreenRecordOptions
ScreenFilterOptions
MarkOperationOptions
}
type ScreenShotOptions struct {
@@ -273,3 +274,29 @@ func WithIndex(index int) ActionOption {
o.Index = index
}
}
// MarkOperationOptions contains options for marking UI operations
type MarkOperationOptions struct {
// mark UI operation, enable/disable UI operation marking
MarkOperationEnabled bool `json:"mark_operation_enabled,omitempty" yaml:"mark_operation_enabled,omitempty"`
}
func (o *MarkOperationOptions) GetMarkOperationOptions() []ActionOption {
options := make([]ActionOption, 0)
if o == nil {
return options
}
if o.MarkOperationEnabled {
options = append(options, WithMarkOperationEnabled(true))
}
return options
}
// WithMarkOperationEnabled enables or disables UI operation marking
func WithMarkOperationEnabled(enabled bool) ActionOption {
return func(o *ActionOptions) {
o.MarkOperationEnabled = enabled
}
}