From 7b0a442a7aecd9f3276d664ed8c57f643141ad71 Mon Sep 17 00:00:00 2001 From: "xucong.053" Date: Sat, 15 Oct 2022 23:50:31 +0800 Subject: [PATCH] feat: get ocr position by given recognition area --- hrp/pkg/uixt/demo/main_test.go | 2 +- hrp/pkg/uixt/ext.go | 20 ++++-- hrp/pkg/uixt/ocr_vedem.go | 117 ++++++++++++++++++++++++++------- hrp/pkg/uixt/tap.go | 12 ++-- hrp/step.go | 1 + 5 files changed, 116 insertions(+), 36 deletions(-) diff --git a/hrp/pkg/uixt/demo/main_test.go b/hrp/pkg/uixt/demo/main_test.go index 6dac96d2..9456a550 100644 --- a/hrp/pkg/uixt/demo/main_test.go +++ b/hrp/pkg/uixt/demo/main_test.go @@ -32,7 +32,7 @@ func TestIOSDemo(t *testing.T) { // 持续监测手机屏幕,直到出现青少年模式弹窗后,点击「我知道了」 for { - points, err := driverExt.GetTextXYs([]string{"青少年模式", "我知道了"}) + points, err := driverExt.GetTextXYs([]string{"青少年模式", "我知道了"}, nil) if err != nil { time.Sleep(1 * time.Second) continue diff --git a/hrp/pkg/uixt/ext.go b/hrp/pkg/uixt/ext.go index 564bad59..4696c656 100644 --- a/hrp/pkg/uixt/ext.go +++ b/hrp/pkg/uixt/ext.go @@ -67,6 +67,7 @@ type MobileAction struct { Identifier string `json:"identifier,omitempty" yaml:"identifier,omitempty"` // used to identify the action in log MaxRetryTimes int `json:"max_retry_times,omitempty" yaml:"max_retry_times,omitempty"` // max retry times Direction interface{} `json:"direction,omitempty" yaml:"direction,omitempty"` // used by swipe to tap text or app + RecognitionArea []float64 `json:"recognition_area,omitempty" yaml:"recognition_area,omitempty"` // used by ocr to get text position in the recognition area Index int `json:"index,omitempty" yaml:"index,omitempty"` // index of the target element, should start from 1 Timeout int `json:"timeout,omitempty" yaml:"timeout,omitempty"` // TODO: wait timeout in seconds for mobile action IgnoreNotFoundError bool `json:"ignore_NotFoundError,omitempty" yaml:"ignore_NotFoundError,omitempty"` // ignore error if target element not found @@ -103,6 +104,13 @@ func WithCustomDirection(sx, sy, ex, ey float64) ActionOption { } } +// WithRecognitionArea inputs area of [(x1,y1), (x2,y2)] +func WithRecognitionArea(x1, y1, x2, y2 float64) ActionOption { + return func(o *MobileAction) { + o.RecognitionArea = []float64{x1, y1, x2, y2} + } +} + func WithText(text string) ActionOption { return func(o *MobileAction) { o.Text = text @@ -302,7 +310,7 @@ func (dExt *DriverExt) FindUIElement(param string) (ele WebElement, err error) { func (dExt *DriverExt) FindUIRectInUIKit(search string, index ...int) (x, y, width, height float64, err error) { // click on text, using OCR if !isPathExists(search) { - return dExt.FindTextByOCR(search, index...) + return dExt.FindTextByOCR(search, nil, index...) } // click on image, using opencv return dExt.FindImageRectInUIKit(search, index...) @@ -339,7 +347,7 @@ func (dExt *DriverExt) IsLabelExist(label string) bool { } func (dExt *DriverExt) IsOCRExist(text string) bool { - _, _, _, _, err := dExt.FindTextByOCR(text) + _, _, _, _, err := dExt.FindTextByOCR(text, nil) return err == nil } @@ -374,7 +382,7 @@ func (dExt *DriverExt) DoAction(action MobileAction) error { var point PointF findApp := func(d *DriverExt) error { var err error - point, err = d.GetTextXY(appName, action.Index) + point, err = d.GetTextXY(appName, action.RecognitionArea, action.Index) return err } foundAppAction := func(d *DriverExt) error { @@ -406,7 +414,7 @@ func (dExt *DriverExt) DoAction(action MobileAction) error { var point PointF findText := func(d *DriverExt) error { var err error - point, err = d.GetTextXY(text, action.Index) + point, err = d.GetTextXY(text, action.RecognitionArea, action.Index) return err } foundTextAction := func(d *DriverExt) error { @@ -439,7 +447,7 @@ func (dExt *DriverExt) DoAction(action MobileAction) error { var point PointF findText := func(d *DriverExt) error { var err error - points, err := d.GetTextXYs(texts) + points, err := d.GetTextXYs(texts, action.RecognitionArea) if err != nil { return err } @@ -511,7 +519,7 @@ func (dExt *DriverExt) DoAction(action MobileAction) error { return fmt.Errorf("invalid %s params: %v", ACTION_Tap, action.Params) case ACTION_TapByOCR: if ocrText, ok := action.Params.(string); ok { - return dExt.TapByOCR(ocrText, action.Identifier, action.IgnoreNotFoundError, action.Index) + return dExt.TapByOCR(ocrText, action.Identifier, action.IgnoreNotFoundError, action.RecognitionArea, action.Index) } return fmt.Errorf("invalid %s params: %v", ACTION_TapByOCR, action.Params) case ACTION_TapByCV: diff --git a/hrp/pkg/uixt/ocr_vedem.go b/hrp/pkg/uixt/ocr_vedem.go index 82d142dc..11388ba6 100644 --- a/hrp/pkg/uixt/ocr_vedem.go +++ b/hrp/pkg/uixt/ocr_vedem.go @@ -5,6 +5,7 @@ import ( "fmt" "image" "io/ioutil" + "math" "mime/multipart" "net/http" "os" @@ -109,7 +110,7 @@ func getLogID(header http.Header) string { return logID[0] } -func (s *veDEMOCRService) FindText(text string, imageBuf []byte, index ...int) (rect image.Rectangle, err error) { +func (s *veDEMOCRService) FindText(text string, imageBuf []byte, recAbsArea []int, index ...int) (rect image.Rectangle, err error) { if len(index) == 0 { index = []int{0} // index not specified } @@ -120,16 +121,25 @@ func (s *veDEMOCRService) FindText(text string, imageBuf []byte, index ...int) ( return } + if len(recAbsArea) != 4 { + recAbsArea = []int{0, 0, math.MaxInt64, math.MaxInt64} + } + + var minX, minY, maxX, maxY int + if recAbsArea[0] < recAbsArea[2] { + minX, maxX = recAbsArea[0], recAbsArea[2] + } else { + minX, maxX = recAbsArea[2], recAbsArea[0] + } + if recAbsArea[1] < recAbsArea[3] { + minY, maxY = recAbsArea[1], recAbsArea[3] + } else { + minY, maxY = recAbsArea[3], recAbsArea[1] + } + var rects []image.Rectangle var ocrTexts []string for _, ocrResult := range ocrResults { - ocrTexts = append(ocrTexts, ocrResult.Text) - - // not contains text - if !strings.Contains(ocrResult.Text, text) { - continue - } - rect = image.Rectangle{ // ocrResult.Points 顺序:左上 -> 右上 -> 右下 -> 左下 Min: image.Point{ @@ -141,7 +151,16 @@ func (s *veDEMOCRService) FindText(text string, imageBuf []byte, index ...int) ( Y: int(ocrResult.Points[2].Y), }, } - rects = append(rects, rect) + if rect.Min.X > minX && rect.Max.X < maxX && rect.Min.Y < maxY && rect.Max.Y > minY { + ocrTexts = append(ocrTexts, ocrResult.Text) + + // not contains text + if !strings.Contains(ocrResult.Text, text) { + continue + } + + rects = append(rects, rect) + } // contains text while not match exactly if ocrResult.Text != text { @@ -177,23 +196,36 @@ func (s *veDEMOCRService) FindText(text string, imageBuf []byte, index ...int) ( return rects[idx], nil } -func (s *veDEMOCRService) FindTexts(texts []string, imageBuf []byte) (rects []image.Rectangle, err error) { +func (s *veDEMOCRService) FindTexts(texts []string, imageBuf []byte, recAbsArea []int) (rects []image.Rectangle, err error) { ocrResults, err := s.getOCRResult(imageBuf) if err != nil { log.Error().Err(err).Msg("getOCRResult failed") return } + if len(recAbsArea) != 4 { + recAbsArea = []int{0, 0, math.MaxInt64, math.MaxInt64} + } + + var minX, minY, maxX, maxY int + if recAbsArea[0] < recAbsArea[2] { + minX, maxX = recAbsArea[0], recAbsArea[2] + } else { + minX, maxX = recAbsArea[2], recAbsArea[0] + } + if recAbsArea[1] < recAbsArea[3] { + minY, maxY = recAbsArea[1], recAbsArea[3] + } else { + minY, maxY = recAbsArea[3], recAbsArea[1] + } + + var success bool + var rect image.Rectangle + var ocrTexts []string for _, text := range texts { var found bool for _, ocrResult := range ocrResults { - // not contains text - if !strings.Contains(ocrResult.Text, text) { - continue - } - - found = true - rect := image.Rectangle{ + rect = image.Rectangle{ // ocrResult.Points 顺序:左上 -> 右上 -> 右下 -> 左下 Min: image.Point{ X: int(ocrResult.Points[0].X), @@ -204,12 +236,29 @@ func (s *veDEMOCRService) FindTexts(texts []string, imageBuf []byte) (rects []im Y: int(ocrResult.Points[2].Y), }, } - rects = append(rects, rect) - break + + if rect.Min.X > minX && rect.Max.X < maxX && rect.Min.Y < maxY && rect.Max.Y > minY { + ocrTexts = append(ocrTexts, ocrResult.Text) + + // not contains text + if !strings.Contains(ocrResult.Text, text) { + continue + } + + found = true + rects = append(rects, rect) + break + } } if !found { rects = append(rects, image.Rectangle{}) } + success = found || success + } + + if !success { + return rects, + fmt.Errorf("texts %s not found in %v", texts, ocrTexts) } return rects, nil @@ -219,15 +268,26 @@ type OCRService interface { FindText(text string, imageBuf []byte, index ...int) (rect image.Rectangle, err error) } -func (dExt *DriverExt) FindTextByOCR(ocrText string, index ...int) (x, y, width, height float64, err error) { +func (dExt *DriverExt) FindTextByOCR(ocrText string, recognitionArea []float64, index ...int) (x, y, width, height float64, err error) { var bufSource *bytes.Buffer if bufSource, err = dExt.takeScreenShot(); err != nil { err = fmt.Errorf("takeScreenShot error: %v", err) return } + if len(recognitionArea) != 4 { + recognitionArea = []float64{0, 0, 1, 1} + } + + absArea := []int{ + int(recognitionArea[0] * float64(dExt.windowSize.Width) * dExt.scale), + int(recognitionArea[1] * float64(dExt.windowSize.Height) * dExt.scale), + int(recognitionArea[2] * float64(dExt.windowSize.Width) * dExt.scale), + int(recognitionArea[3] * float64(dExt.windowSize.Height) * dExt.scale), + } + service := &veDEMOCRService{} - rect, err := service.FindText(ocrText, bufSource.Bytes(), index...) + rect, err := service.FindText(ocrText, bufSource.Bytes(), absArea, index...) if err != nil { log.Warn().Msgf("FindText failed: %s", err.Error()) err = fmt.Errorf("FindText failed: %v", err) @@ -240,15 +300,26 @@ func (dExt *DriverExt) FindTextByOCR(ocrText string, index ...int) (x, y, width, return } -func (dExt *DriverExt) FindTextsByOCR(ocrTexts []string) (points [][]float64, err error) { +func (dExt *DriverExt) FindTextsByOCR(ocrTexts []string, recognitionArea []float64) (points [][]float64, err error) { var bufSource *bytes.Buffer if bufSource, err = dExt.takeScreenShot(); err != nil { err = fmt.Errorf("takeScreenShot error: %v", err) return } + if len(recognitionArea) != 4 { + recognitionArea = []float64{0, 0, 1, 1} + } + + absArea := []int{ + int(recognitionArea[0] * float64(dExt.windowSize.Width) * dExt.scale), + int(recognitionArea[1] * float64(dExt.windowSize.Height) * dExt.scale), + int(recognitionArea[2] * float64(dExt.windowSize.Width) * dExt.scale), + int(recognitionArea[3] * float64(dExt.windowSize.Height) * dExt.scale), + } + service := &veDEMOCRService{} - rects, err := service.FindTexts(ocrTexts, bufSource.Bytes()) + rects, err := service.FindTexts(ocrTexts, bufSource.Bytes(), absArea) if err != nil { log.Warn().Msgf("FindTexts failed: %s", err.Error()) err = fmt.Errorf("FindTexts failed: %v", err) diff --git a/hrp/pkg/uixt/tap.go b/hrp/pkg/uixt/tap.go index 7957f3dd..611a5174 100644 --- a/hrp/pkg/uixt/tap.go +++ b/hrp/pkg/uixt/tap.go @@ -28,8 +28,8 @@ func (dExt *DriverExt) TapXY(x, y float64, identifier string) error { return dExt.TapAbsXY(x, y, identifier) } -func (dExt *DriverExt) GetTextXY(ocrText string, index ...int) (point PointF, err error) { - x, y, width, height, err := dExt.FindTextByOCR(ocrText, index...) +func (dExt *DriverExt) GetTextXY(ocrText string, recognitionArea []float64, index ...int) (point PointF, err error) { + x, y, width, height, err := dExt.FindTextByOCR(ocrText, recognitionArea, index...) if err != nil { return PointF{}, err } @@ -41,8 +41,8 @@ func (dExt *DriverExt) GetTextXY(ocrText string, index ...int) (point PointF, er return point, nil } -func (dExt *DriverExt) GetTextXYs(ocrText []string) (points []PointF, err error) { - ps, err := dExt.FindTextsByOCR(ocrText) +func (dExt *DriverExt) GetTextXYs(ocrText []string, recognitionArea []float64) (points []PointF, err error) { + ps, err := dExt.FindTextsByOCR(ocrText, recognitionArea) if err != nil { return nil, err } @@ -71,8 +71,8 @@ func (dExt *DriverExt) GetImageXY(imagePath string, index ...int) (point PointF, return point, nil } -func (dExt *DriverExt) TapByOCR(ocrText string, identifier string, ignoreNotFoundError bool, index ...int) error { - point, err := dExt.GetTextXY(ocrText, index...) +func (dExt *DriverExt) TapByOCR(ocrText string, identifier string, ignoreNotFoundError bool, recognitionArea []float64, index ...int) error { + point, err := dExt.GetTextXY(ocrText, recognitionArea, index...) if err != nil { if ignoreNotFoundError { return nil diff --git a/hrp/step.go b/hrp/step.go index ac7481f1..8873f947 100644 --- a/hrp/step.go +++ b/hrp/step.go @@ -31,6 +31,7 @@ var ( WithDescription = uixt.WithDescription WithDirection = uixt.WithDirection WithCustomDirection = uixt.WithCustomDirection + WithRecognitionArea = uixt.WithRecognitionArea ) var (