feat: get ocr position by given recognition area

This commit is contained in:
xucong.053
2022-10-15 23:50:31 +08:00
parent 3ccdb00fc2
commit 7b0a442a7a
5 changed files with 116 additions and 36 deletions

View File

@@ -32,7 +32,7 @@ func TestIOSDemo(t *testing.T) {
// 持续监测手机屏幕,直到出现青少年模式弹窗后,点击「我知道了」
for {
points, err := driverExt.GetTextXYs([]string{"青少年模式", "我知道了"})
points, err := driverExt.GetTextXYs([]string{"青少年模式", "我知道了"}, nil)
if err != nil {
time.Sleep(1 * time.Second)
continue

View File

@@ -67,6 +67,7 @@ type MobileAction struct {
Identifier string `json:"identifier,omitempty" yaml:"identifier,omitempty"` // used to identify the action in log
MaxRetryTimes int `json:"max_retry_times,omitempty" yaml:"max_retry_times,omitempty"` // max retry times
Direction interface{} `json:"direction,omitempty" yaml:"direction,omitempty"` // used by swipe to tap text or app
RecognitionArea []float64 `json:"recognition_area,omitempty" yaml:"recognition_area,omitempty"` // used by ocr to get text position in the recognition area
Index int `json:"index,omitempty" yaml:"index,omitempty"` // index of the target element, should start from 1
Timeout int `json:"timeout,omitempty" yaml:"timeout,omitempty"` // TODO: wait timeout in seconds for mobile action
IgnoreNotFoundError bool `json:"ignore_NotFoundError,omitempty" yaml:"ignore_NotFoundError,omitempty"` // ignore error if target element not found
@@ -103,6 +104,13 @@ func WithCustomDirection(sx, sy, ex, ey float64) ActionOption {
}
}
// WithRecognitionArea inputs area of [(x1,y1), (x2,y2)]
func WithRecognitionArea(x1, y1, x2, y2 float64) ActionOption {
return func(o *MobileAction) {
o.RecognitionArea = []float64{x1, y1, x2, y2}
}
}
func WithText(text string) ActionOption {
return func(o *MobileAction) {
o.Text = text
@@ -302,7 +310,7 @@ func (dExt *DriverExt) FindUIElement(param string) (ele WebElement, err error) {
func (dExt *DriverExt) FindUIRectInUIKit(search string, index ...int) (x, y, width, height float64, err error) {
// click on text, using OCR
if !isPathExists(search) {
return dExt.FindTextByOCR(search, index...)
return dExt.FindTextByOCR(search, nil, index...)
}
// click on image, using opencv
return dExt.FindImageRectInUIKit(search, index...)
@@ -339,7 +347,7 @@ func (dExt *DriverExt) IsLabelExist(label string) bool {
}
func (dExt *DriverExt) IsOCRExist(text string) bool {
_, _, _, _, err := dExt.FindTextByOCR(text)
_, _, _, _, err := dExt.FindTextByOCR(text, nil)
return err == nil
}
@@ -374,7 +382,7 @@ func (dExt *DriverExt) DoAction(action MobileAction) error {
var point PointF
findApp := func(d *DriverExt) error {
var err error
point, err = d.GetTextXY(appName, action.Index)
point, err = d.GetTextXY(appName, action.RecognitionArea, action.Index)
return err
}
foundAppAction := func(d *DriverExt) error {
@@ -406,7 +414,7 @@ func (dExt *DriverExt) DoAction(action MobileAction) error {
var point PointF
findText := func(d *DriverExt) error {
var err error
point, err = d.GetTextXY(text, action.Index)
point, err = d.GetTextXY(text, action.RecognitionArea, action.Index)
return err
}
foundTextAction := func(d *DriverExt) error {
@@ -439,7 +447,7 @@ func (dExt *DriverExt) DoAction(action MobileAction) error {
var point PointF
findText := func(d *DriverExt) error {
var err error
points, err := d.GetTextXYs(texts)
points, err := d.GetTextXYs(texts, action.RecognitionArea)
if err != nil {
return err
}
@@ -511,7 +519,7 @@ func (dExt *DriverExt) DoAction(action MobileAction) error {
return fmt.Errorf("invalid %s params: %v", ACTION_Tap, action.Params)
case ACTION_TapByOCR:
if ocrText, ok := action.Params.(string); ok {
return dExt.TapByOCR(ocrText, action.Identifier, action.IgnoreNotFoundError, action.Index)
return dExt.TapByOCR(ocrText, action.Identifier, action.IgnoreNotFoundError, action.RecognitionArea, action.Index)
}
return fmt.Errorf("invalid %s params: %v", ACTION_TapByOCR, action.Params)
case ACTION_TapByCV:

View File

@@ -5,6 +5,7 @@ import (
"fmt"
"image"
"io/ioutil"
"math"
"mime/multipart"
"net/http"
"os"
@@ -109,7 +110,7 @@ func getLogID(header http.Header) string {
return logID[0]
}
func (s *veDEMOCRService) FindText(text string, imageBuf []byte, index ...int) (rect image.Rectangle, err error) {
func (s *veDEMOCRService) FindText(text string, imageBuf []byte, recAbsArea []int, index ...int) (rect image.Rectangle, err error) {
if len(index) == 0 {
index = []int{0} // index not specified
}
@@ -120,16 +121,25 @@ func (s *veDEMOCRService) FindText(text string, imageBuf []byte, index ...int) (
return
}
if len(recAbsArea) != 4 {
recAbsArea = []int{0, 0, math.MaxInt64, math.MaxInt64}
}
var minX, minY, maxX, maxY int
if recAbsArea[0] < recAbsArea[2] {
minX, maxX = recAbsArea[0], recAbsArea[2]
} else {
minX, maxX = recAbsArea[2], recAbsArea[0]
}
if recAbsArea[1] < recAbsArea[3] {
minY, maxY = recAbsArea[1], recAbsArea[3]
} else {
minY, maxY = recAbsArea[3], recAbsArea[1]
}
var rects []image.Rectangle
var ocrTexts []string
for _, ocrResult := range ocrResults {
ocrTexts = append(ocrTexts, ocrResult.Text)
// not contains text
if !strings.Contains(ocrResult.Text, text) {
continue
}
rect = image.Rectangle{
// ocrResult.Points 顺序:左上 -> 右上 -> 右下 -> 左下
Min: image.Point{
@@ -141,7 +151,16 @@ func (s *veDEMOCRService) FindText(text string, imageBuf []byte, index ...int) (
Y: int(ocrResult.Points[2].Y),
},
}
rects = append(rects, rect)
if rect.Min.X > minX && rect.Max.X < maxX && rect.Min.Y < maxY && rect.Max.Y > minY {
ocrTexts = append(ocrTexts, ocrResult.Text)
// not contains text
if !strings.Contains(ocrResult.Text, text) {
continue
}
rects = append(rects, rect)
}
// contains text while not match exactly
if ocrResult.Text != text {
@@ -177,23 +196,36 @@ func (s *veDEMOCRService) FindText(text string, imageBuf []byte, index ...int) (
return rects[idx], nil
}
func (s *veDEMOCRService) FindTexts(texts []string, imageBuf []byte) (rects []image.Rectangle, err error) {
func (s *veDEMOCRService) FindTexts(texts []string, imageBuf []byte, recAbsArea []int) (rects []image.Rectangle, err error) {
ocrResults, err := s.getOCRResult(imageBuf)
if err != nil {
log.Error().Err(err).Msg("getOCRResult failed")
return
}
if len(recAbsArea) != 4 {
recAbsArea = []int{0, 0, math.MaxInt64, math.MaxInt64}
}
var minX, minY, maxX, maxY int
if recAbsArea[0] < recAbsArea[2] {
minX, maxX = recAbsArea[0], recAbsArea[2]
} else {
minX, maxX = recAbsArea[2], recAbsArea[0]
}
if recAbsArea[1] < recAbsArea[3] {
minY, maxY = recAbsArea[1], recAbsArea[3]
} else {
minY, maxY = recAbsArea[3], recAbsArea[1]
}
var success bool
var rect image.Rectangle
var ocrTexts []string
for _, text := range texts {
var found bool
for _, ocrResult := range ocrResults {
// not contains text
if !strings.Contains(ocrResult.Text, text) {
continue
}
found = true
rect := image.Rectangle{
rect = image.Rectangle{
// ocrResult.Points 顺序:左上 -> 右上 -> 右下 -> 左下
Min: image.Point{
X: int(ocrResult.Points[0].X),
@@ -204,12 +236,29 @@ func (s *veDEMOCRService) FindTexts(texts []string, imageBuf []byte) (rects []im
Y: int(ocrResult.Points[2].Y),
},
}
rects = append(rects, rect)
break
if rect.Min.X > minX && rect.Max.X < maxX && rect.Min.Y < maxY && rect.Max.Y > minY {
ocrTexts = append(ocrTexts, ocrResult.Text)
// not contains text
if !strings.Contains(ocrResult.Text, text) {
continue
}
found = true
rects = append(rects, rect)
break
}
}
if !found {
rects = append(rects, image.Rectangle{})
}
success = found || success
}
if !success {
return rects,
fmt.Errorf("texts %s not found in %v", texts, ocrTexts)
}
return rects, nil
@@ -219,15 +268,26 @@ type OCRService interface {
FindText(text string, imageBuf []byte, index ...int) (rect image.Rectangle, err error)
}
func (dExt *DriverExt) FindTextByOCR(ocrText string, index ...int) (x, y, width, height float64, err error) {
func (dExt *DriverExt) FindTextByOCR(ocrText string, recognitionArea []float64, index ...int) (x, y, width, height float64, err error) {
var bufSource *bytes.Buffer
if bufSource, err = dExt.takeScreenShot(); err != nil {
err = fmt.Errorf("takeScreenShot error: %v", err)
return
}
if len(recognitionArea) != 4 {
recognitionArea = []float64{0, 0, 1, 1}
}
absArea := []int{
int(recognitionArea[0] * float64(dExt.windowSize.Width) * dExt.scale),
int(recognitionArea[1] * float64(dExt.windowSize.Height) * dExt.scale),
int(recognitionArea[2] * float64(dExt.windowSize.Width) * dExt.scale),
int(recognitionArea[3] * float64(dExt.windowSize.Height) * dExt.scale),
}
service := &veDEMOCRService{}
rect, err := service.FindText(ocrText, bufSource.Bytes(), index...)
rect, err := service.FindText(ocrText, bufSource.Bytes(), absArea, index...)
if err != nil {
log.Warn().Msgf("FindText failed: %s", err.Error())
err = fmt.Errorf("FindText failed: %v", err)
@@ -240,15 +300,26 @@ func (dExt *DriverExt) FindTextByOCR(ocrText string, index ...int) (x, y, width,
return
}
func (dExt *DriverExt) FindTextsByOCR(ocrTexts []string) (points [][]float64, err error) {
func (dExt *DriverExt) FindTextsByOCR(ocrTexts []string, recognitionArea []float64) (points [][]float64, err error) {
var bufSource *bytes.Buffer
if bufSource, err = dExt.takeScreenShot(); err != nil {
err = fmt.Errorf("takeScreenShot error: %v", err)
return
}
if len(recognitionArea) != 4 {
recognitionArea = []float64{0, 0, 1, 1}
}
absArea := []int{
int(recognitionArea[0] * float64(dExt.windowSize.Width) * dExt.scale),
int(recognitionArea[1] * float64(dExt.windowSize.Height) * dExt.scale),
int(recognitionArea[2] * float64(dExt.windowSize.Width) * dExt.scale),
int(recognitionArea[3] * float64(dExt.windowSize.Height) * dExt.scale),
}
service := &veDEMOCRService{}
rects, err := service.FindTexts(ocrTexts, bufSource.Bytes())
rects, err := service.FindTexts(ocrTexts, bufSource.Bytes(), absArea)
if err != nil {
log.Warn().Msgf("FindTexts failed: %s", err.Error())
err = fmt.Errorf("FindTexts failed: %v", err)

View File

@@ -28,8 +28,8 @@ func (dExt *DriverExt) TapXY(x, y float64, identifier string) error {
return dExt.TapAbsXY(x, y, identifier)
}
func (dExt *DriverExt) GetTextXY(ocrText string, index ...int) (point PointF, err error) {
x, y, width, height, err := dExt.FindTextByOCR(ocrText, index...)
func (dExt *DriverExt) GetTextXY(ocrText string, recognitionArea []float64, index ...int) (point PointF, err error) {
x, y, width, height, err := dExt.FindTextByOCR(ocrText, recognitionArea, index...)
if err != nil {
return PointF{}, err
}
@@ -41,8 +41,8 @@ func (dExt *DriverExt) GetTextXY(ocrText string, index ...int) (point PointF, er
return point, nil
}
func (dExt *DriverExt) GetTextXYs(ocrText []string) (points []PointF, err error) {
ps, err := dExt.FindTextsByOCR(ocrText)
func (dExt *DriverExt) GetTextXYs(ocrText []string, recognitionArea []float64) (points []PointF, err error) {
ps, err := dExt.FindTextsByOCR(ocrText, recognitionArea)
if err != nil {
return nil, err
}
@@ -71,8 +71,8 @@ func (dExt *DriverExt) GetImageXY(imagePath string, index ...int) (point PointF,
return point, nil
}
func (dExt *DriverExt) TapByOCR(ocrText string, identifier string, ignoreNotFoundError bool, index ...int) error {
point, err := dExt.GetTextXY(ocrText, index...)
func (dExt *DriverExt) TapByOCR(ocrText string, identifier string, ignoreNotFoundError bool, recognitionArea []float64, index ...int) error {
point, err := dExt.GetTextXY(ocrText, recognitionArea, index...)
if err != nil {
if ignoreNotFoundError {
return nil

View File

@@ -31,6 +31,7 @@ var (
WithDescription = uixt.WithDescription
WithDirection = uixt.WithDirection
WithCustomDirection = uixt.WithCustomDirection
WithRecognitionArea = uixt.WithRecognitionArea
)
var (