From 02c7d3c6cfb3e3cfc7e1e36ccf6a4a790d868dea Mon Sep 17 00:00:00 2001 From: buyuxiang <347586493@qq.com> Date: Mon, 31 Jul 2023 21:14:48 +0800 Subject: [PATCH] feat: `tap_cv` action supports ui type detection and tap --- docs/CHANGELOG.md | 8 +- hrp/pkg/uixt/action.go | 11 +++ hrp/pkg/uixt/ocr_vedem.go | 152 ++++++++++++++++++++++++++++++++- hrp/pkg/uixt/ocr_vedem_test.go | 14 +++ hrp/pkg/uixt/tap.go | 14 +++ 5 files changed, 195 insertions(+), 4 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 2116f3f4..dbf50fde 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -1,8 +1,14 @@ # Release History -## v4.3.6 (2023-07-24) +## v4.3.6 (2023-07-31) - feat: support to reset driver (or session only) automatically when UIA2 / WDA crashed or WebDriver request failed +- feat: `tap_cv` action supports ui type detection and tap +- compatibility: support indicating options separately in `MobileAction` level +- fix: use Override size if existed, otherwise use Physical size (android devices) +- fix: add default options for `swipe_to_tap_app` action +- refactor: ui validation methods +- fix: reuse the same request body during `GetImage` retry ## v4.3.5 (2023-07-23) diff --git a/hrp/pkg/uixt/action.go b/hrp/pkg/uixt/action.go index 8e8459ec..12ab0a01 100644 --- a/hrp/pkg/uixt/action.go +++ b/hrp/pkg/uixt/action.go @@ -479,6 +479,17 @@ func (dExt *DriverExt) DoAction(action MobileAction) error { if imagePath, ok := action.Params.(string); ok { return dExt.TapByCV(imagePath, action.GetOptions()...) } + if uiParams, ok := action.Params.([]interface{}); ok { + var uiTypes []string + for _, uiParam := range uiParams { + uiType, ok := uiParam.(string) + if !ok { + continue + } + uiTypes = append(uiTypes, uiType) + } + return dExt.TapByUIDetection(uiTypes, action.Options.Options()...) + } return fmt.Errorf("invalid %s params: %v", ACTION_TapByCV, action.Params) case ACTION_DoubleTapXY: if location, ok := action.Params.([]interface{}); ok { diff --git a/hrp/pkg/uixt/ocr_vedem.go b/hrp/pkg/uixt/ocr_vedem.go index 9c2982b8..b1e20d2c 100644 --- a/hrp/pkg/uixt/ocr_vedem.go +++ b/hrp/pkg/uixt/ocr_vedem.go @@ -54,9 +54,10 @@ func (o OCRResults) ToOCRTexts() (ocrTexts OCRTexts) { type ImageResult struct { imagePath string - URL string `json:"url"` // image uploaded url - OCRResult OCRResults `json:"ocrResult"` // OCR texts - LiveType string `json:"liveType"` // 直播间类型 + URL string `json:"url"` // image uploaded url + OCRResult OCRResults `json:"ocrResult"` // OCR texts + LiveType string `json:"liveType"` // 直播间类型 + UIResult UIResultMap `json:"uiResult"` // 图标检测 } type APIResponseImage struct { @@ -171,6 +172,11 @@ func newVEDEMImageService(actions ...string) (*veDEMImageService, error) { }, nil } +func (v *veDEMImageService) WithUITypes(uiTypes ...string) *veDEMImageService { + v.uiTypes = uiTypes + return v +} + // veDEMImageService implements IImageService interface // actions: // @@ -181,6 +187,7 @@ func newVEDEMImageService(actions ...string) (*veDEMImageService, error) { // close - get close popup type veDEMImageService struct { actions []string + uiTypes []string } func (s *veDEMImageService) GetImage(imageBuf *bytes.Buffer) (imageResult ImageResult, err error) { @@ -189,6 +196,9 @@ func (s *veDEMImageService) GetImage(imageBuf *bytes.Buffer) (imageResult ImageR for _, action := range s.actions { bodyWriter.WriteField("actions", action) } + for _, uiType := range s.uiTypes { + bodyWriter.WriteField("uiTypes", uiType) + } bodyWriter.WriteField("ocrCluster", "highPrecision") formWriter, err := bodyWriter.CreateFormFile("image", "screenshot.png") @@ -401,3 +411,139 @@ func getRectangleCenterPoint(rect image.Rectangle) (point PointF) { } return point } + +func getCenterPoint(point PointF, width, height float64) PointF { + return PointF{ + X: point.X + width*0.5, + Y: point.Y + height*0.5, + } +} + +type UIResult struct { + Point PointF `json:"point"` + Width float64 `json:"width"` + Height float64 `json:"height"` +} + +func (u UIResult) Center() PointF { + return getCenterPoint(u.Point, u.Width, u.Height) +} + +type UIResults []UIResult + +func (u UIResults) FilterScope(scope AbsScope) (results UIResults) { + for _, uiResult := range u { + rect := image.Rectangle{ + Min: image.Point{ + X: int(uiResult.Point.X), + Y: int(uiResult.Point.Y), + }, + Max: image.Point{ + X: int(uiResult.Point.X + uiResult.Width), + Y: int(uiResult.Point.Y + uiResult.Height), + }, + } + + // check if ui result in scope + if len(scope) == 4 { + if rect.Min.X < scope[0] || + rect.Min.Y < scope[1] || + rect.Max.X > scope[2] || + rect.Max.Y > scope[3] { + // not in scope + continue + } + } + results = append(results, uiResult) + } + return +} + +type UIResultMap map[string]UIResults + +func (u UIResultMap) FilterUIResults(uiTypes []string) (uiResults UIResults, err error) { + var ok bool + for _, uiType := range uiTypes { + uiResults, ok = u[uiType] + if ok && len(uiResults) != 0 { + return + } + } + err = errors.Errorf("UI types %v not detected", uiTypes) + return +} + +func (u UIResults) GetUIResult(options ...ActionOption) (UIResult, error) { + actionOptions := NewActionOptions(options...) + + uiResults := u.FilterScope(actionOptions.AbsScope) + if len(uiResults) == 0 { + return UIResult{}, errors.Wrap(code.OCRTextNotFoundError, + "ui types not found in scope") + } + // get index + idx := actionOptions.Index + if idx < 0 { + idx = len(uiResults) + idx + } + + // index out of range + if idx >= len(uiResults) || idx < 0 { + return UIResult{}, errors.Wrap(code.OCRTextNotFoundError, + fmt.Sprintf("ui types index %d out of range", idx)) + } + return uiResults[idx], nil +} + +// newVEDEMUIService return image service for +func newVEDEMUIService(uiTypes []string) (*veDEMImageService, error) { + vedemUIService, err := newVEDEMImageService("ui") + if err != nil { + return nil, err + } + return vedemUIService.WithUITypes(uiTypes...), nil +} + +func (dExt *DriverExt) GetUIResultMap(uiTypes []string) (uiResultMap UIResultMap, err error) { + var bufSource *bytes.Buffer + var imagePath string + if bufSource, imagePath, err = dExt.takeScreenShot( + builtin.GenNameWithTimestamp("%d_ocr")); err != nil { + return + } + + vedemUIService, err := newVEDEMUIService(uiTypes) + if err != nil { + return + } + imageResult, err := vedemUIService.GetImage(bufSource) + if err != nil { + log.Error().Err(err).Msg("GetImage from ImageService failed") + return + } + + imageUrl := imageResult.URL + if imageUrl != "" { + dExt.cacheStepData.screenShotsUrls[imagePath] = imageUrl + log.Debug().Str("imagePath", imagePath).Str("imageUrl", imageUrl).Msg("log screenshot") + } + uiResultMap = imageResult.UIResult + return +} + +func (dExt *DriverExt) FindUIResult(uiTypes []string, options ...ActionOption) (point PointF, err error) { + uiResultMap, err := dExt.GetUIResultMap(uiTypes) + if err != nil { + return + } + uiResults, err := uiResultMap.FilterUIResults(uiTypes) + if err != nil { + return + } + uiResult, err := uiResults.GetUIResult(dExt.ParseActionOptions(options...)...) + point = uiResult.Center() + + log.Info().Interface("text", uiTypes). + Interface("point", point).Msg("FindUIResult success") + return +} diff --git a/hrp/pkg/uixt/ocr_vedem_test.go b/hrp/pkg/uixt/ocr_vedem_test.go index a026aafc..1004855a 100644 --- a/hrp/pkg/uixt/ocr_vedem_test.go +++ b/hrp/pkg/uixt/ocr_vedem_test.go @@ -72,3 +72,17 @@ func TestMatchRegex(t *testing.T) { } } } + +func TestTapUIWithScreenshot(t *testing.T) { + serialNumber := os.Getenv("SERIAL_NUMBER") + device, _ := NewAndroidDevice(WithSerialNumber(serialNumber)) + driver, err := device.NewDriver(nil) + if err != nil { + t.Fatal(err) + } + + err = driver.TapByUIDetection([]string{"dyhouse", "shoppingbag"}) + if err != nil { + t.Fatal(err) + } +} diff --git a/hrp/pkg/uixt/tap.go b/hrp/pkg/uixt/tap.go index 5d0ea39b..83603ccf 100644 --- a/hrp/pkg/uixt/tap.go +++ b/hrp/pkg/uixt/tap.go @@ -49,6 +49,20 @@ func (dExt *DriverExt) TapByCV(imagePath string, options ...ActionOption) error return dExt.TapAbsXY(point.X, point.Y, options...) } +func (dExt *DriverExt) TapByUIDetection(uiTypes []string, options ...ActionOption) error { + actionOptions := NewActionOptions(options...) + + point, err := dExt.FindUIResult(uiTypes, options...) + if err != nil { + if actionOptions.IgnoreNotFoundError { + return nil + } + return err + } + + return dExt.TapAbsXY(point.X, point.Y, options...) +} + func (dExt *DriverExt) Tap(param string, options ...ActionOption) error { return dExt.TapOffset(param, 0, 0, options...) }