mirror of
https://github.com/httprunner/httprunner.git
synced 2026-05-13 17:29:56 +08:00
feat: get ocr position by given recognition area
This commit is contained in:
@@ -32,7 +32,7 @@ func TestIOSDemo(t *testing.T) {
|
||||
|
||||
// 持续监测手机屏幕,直到出现青少年模式弹窗后,点击「我知道了」
|
||||
for {
|
||||
points, err := driverExt.GetTextXYs([]string{"青少年模式", "我知道了"})
|
||||
points, err := driverExt.GetTextXYs([]string{"青少年模式", "我知道了"}, nil)
|
||||
if err != nil {
|
||||
time.Sleep(1 * time.Second)
|
||||
continue
|
||||
|
||||
@@ -67,6 +67,7 @@ type MobileAction struct {
|
||||
Identifier string `json:"identifier,omitempty" yaml:"identifier,omitempty"` // used to identify the action in log
|
||||
MaxRetryTimes int `json:"max_retry_times,omitempty" yaml:"max_retry_times,omitempty"` // max retry times
|
||||
Direction interface{} `json:"direction,omitempty" yaml:"direction,omitempty"` // used by swipe to tap text or app
|
||||
RecognitionArea []float64 `json:"recognition_area,omitempty" yaml:"recognition_area,omitempty"` // used by ocr to get text position in the recognition area
|
||||
Index int `json:"index,omitempty" yaml:"index,omitempty"` // index of the target element, should start from 1
|
||||
Timeout int `json:"timeout,omitempty" yaml:"timeout,omitempty"` // TODO: wait timeout in seconds for mobile action
|
||||
IgnoreNotFoundError bool `json:"ignore_NotFoundError,omitempty" yaml:"ignore_NotFoundError,omitempty"` // ignore error if target element not found
|
||||
@@ -103,6 +104,13 @@ func WithCustomDirection(sx, sy, ex, ey float64) ActionOption {
|
||||
}
|
||||
}
|
||||
|
||||
// WithRecognitionArea inputs area of [(x1,y1), (x2,y2)]
|
||||
func WithRecognitionArea(x1, y1, x2, y2 float64) ActionOption {
|
||||
return func(o *MobileAction) {
|
||||
o.RecognitionArea = []float64{x1, y1, x2, y2}
|
||||
}
|
||||
}
|
||||
|
||||
func WithText(text string) ActionOption {
|
||||
return func(o *MobileAction) {
|
||||
o.Text = text
|
||||
@@ -302,7 +310,7 @@ func (dExt *DriverExt) FindUIElement(param string) (ele WebElement, err error) {
|
||||
func (dExt *DriverExt) FindUIRectInUIKit(search string, index ...int) (x, y, width, height float64, err error) {
|
||||
// click on text, using OCR
|
||||
if !isPathExists(search) {
|
||||
return dExt.FindTextByOCR(search, index...)
|
||||
return dExt.FindTextByOCR(search, nil, index...)
|
||||
}
|
||||
// click on image, using opencv
|
||||
return dExt.FindImageRectInUIKit(search, index...)
|
||||
@@ -339,7 +347,7 @@ func (dExt *DriverExt) IsLabelExist(label string) bool {
|
||||
}
|
||||
|
||||
func (dExt *DriverExt) IsOCRExist(text string) bool {
|
||||
_, _, _, _, err := dExt.FindTextByOCR(text)
|
||||
_, _, _, _, err := dExt.FindTextByOCR(text, nil)
|
||||
return err == nil
|
||||
}
|
||||
|
||||
@@ -374,7 +382,7 @@ func (dExt *DriverExt) DoAction(action MobileAction) error {
|
||||
var point PointF
|
||||
findApp := func(d *DriverExt) error {
|
||||
var err error
|
||||
point, err = d.GetTextXY(appName, action.Index)
|
||||
point, err = d.GetTextXY(appName, action.RecognitionArea, action.Index)
|
||||
return err
|
||||
}
|
||||
foundAppAction := func(d *DriverExt) error {
|
||||
@@ -406,7 +414,7 @@ func (dExt *DriverExt) DoAction(action MobileAction) error {
|
||||
var point PointF
|
||||
findText := func(d *DriverExt) error {
|
||||
var err error
|
||||
point, err = d.GetTextXY(text, action.Index)
|
||||
point, err = d.GetTextXY(text, action.RecognitionArea, action.Index)
|
||||
return err
|
||||
}
|
||||
foundTextAction := func(d *DriverExt) error {
|
||||
@@ -439,7 +447,7 @@ func (dExt *DriverExt) DoAction(action MobileAction) error {
|
||||
var point PointF
|
||||
findText := func(d *DriverExt) error {
|
||||
var err error
|
||||
points, err := d.GetTextXYs(texts)
|
||||
points, err := d.GetTextXYs(texts, action.RecognitionArea)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -511,7 +519,7 @@ func (dExt *DriverExt) DoAction(action MobileAction) error {
|
||||
return fmt.Errorf("invalid %s params: %v", ACTION_Tap, action.Params)
|
||||
case ACTION_TapByOCR:
|
||||
if ocrText, ok := action.Params.(string); ok {
|
||||
return dExt.TapByOCR(ocrText, action.Identifier, action.IgnoreNotFoundError, action.Index)
|
||||
return dExt.TapByOCR(ocrText, action.Identifier, action.IgnoreNotFoundError, action.RecognitionArea, action.Index)
|
||||
}
|
||||
return fmt.Errorf("invalid %s params: %v", ACTION_TapByOCR, action.Params)
|
||||
case ACTION_TapByCV:
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"fmt"
|
||||
"image"
|
||||
"io/ioutil"
|
||||
"math"
|
||||
"mime/multipart"
|
||||
"net/http"
|
||||
"os"
|
||||
@@ -109,7 +110,7 @@ func getLogID(header http.Header) string {
|
||||
return logID[0]
|
||||
}
|
||||
|
||||
func (s *veDEMOCRService) FindText(text string, imageBuf []byte, index ...int) (rect image.Rectangle, err error) {
|
||||
func (s *veDEMOCRService) FindText(text string, imageBuf []byte, recAbsArea []int, index ...int) (rect image.Rectangle, err error) {
|
||||
if len(index) == 0 {
|
||||
index = []int{0} // index not specified
|
||||
}
|
||||
@@ -120,16 +121,25 @@ func (s *veDEMOCRService) FindText(text string, imageBuf []byte, index ...int) (
|
||||
return
|
||||
}
|
||||
|
||||
if len(recAbsArea) != 4 {
|
||||
recAbsArea = []int{0, 0, math.MaxInt64, math.MaxInt64}
|
||||
}
|
||||
|
||||
var minX, minY, maxX, maxY int
|
||||
if recAbsArea[0] < recAbsArea[2] {
|
||||
minX, maxX = recAbsArea[0], recAbsArea[2]
|
||||
} else {
|
||||
minX, maxX = recAbsArea[2], recAbsArea[0]
|
||||
}
|
||||
if recAbsArea[1] < recAbsArea[3] {
|
||||
minY, maxY = recAbsArea[1], recAbsArea[3]
|
||||
} else {
|
||||
minY, maxY = recAbsArea[3], recAbsArea[1]
|
||||
}
|
||||
|
||||
var rects []image.Rectangle
|
||||
var ocrTexts []string
|
||||
for _, ocrResult := range ocrResults {
|
||||
ocrTexts = append(ocrTexts, ocrResult.Text)
|
||||
|
||||
// not contains text
|
||||
if !strings.Contains(ocrResult.Text, text) {
|
||||
continue
|
||||
}
|
||||
|
||||
rect = image.Rectangle{
|
||||
// ocrResult.Points 顺序:左上 -> 右上 -> 右下 -> 左下
|
||||
Min: image.Point{
|
||||
@@ -141,7 +151,16 @@ func (s *veDEMOCRService) FindText(text string, imageBuf []byte, index ...int) (
|
||||
Y: int(ocrResult.Points[2].Y),
|
||||
},
|
||||
}
|
||||
rects = append(rects, rect)
|
||||
if rect.Min.X > minX && rect.Max.X < maxX && rect.Min.Y < maxY && rect.Max.Y > minY {
|
||||
ocrTexts = append(ocrTexts, ocrResult.Text)
|
||||
|
||||
// not contains text
|
||||
if !strings.Contains(ocrResult.Text, text) {
|
||||
continue
|
||||
}
|
||||
|
||||
rects = append(rects, rect)
|
||||
}
|
||||
|
||||
// contains text while not match exactly
|
||||
if ocrResult.Text != text {
|
||||
@@ -177,23 +196,36 @@ func (s *veDEMOCRService) FindText(text string, imageBuf []byte, index ...int) (
|
||||
return rects[idx], nil
|
||||
}
|
||||
|
||||
func (s *veDEMOCRService) FindTexts(texts []string, imageBuf []byte) (rects []image.Rectangle, err error) {
|
||||
func (s *veDEMOCRService) FindTexts(texts []string, imageBuf []byte, recAbsArea []int) (rects []image.Rectangle, err error) {
|
||||
ocrResults, err := s.getOCRResult(imageBuf)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("getOCRResult failed")
|
||||
return
|
||||
}
|
||||
|
||||
if len(recAbsArea) != 4 {
|
||||
recAbsArea = []int{0, 0, math.MaxInt64, math.MaxInt64}
|
||||
}
|
||||
|
||||
var minX, minY, maxX, maxY int
|
||||
if recAbsArea[0] < recAbsArea[2] {
|
||||
minX, maxX = recAbsArea[0], recAbsArea[2]
|
||||
} else {
|
||||
minX, maxX = recAbsArea[2], recAbsArea[0]
|
||||
}
|
||||
if recAbsArea[1] < recAbsArea[3] {
|
||||
minY, maxY = recAbsArea[1], recAbsArea[3]
|
||||
} else {
|
||||
minY, maxY = recAbsArea[3], recAbsArea[1]
|
||||
}
|
||||
|
||||
var success bool
|
||||
var rect image.Rectangle
|
||||
var ocrTexts []string
|
||||
for _, text := range texts {
|
||||
var found bool
|
||||
for _, ocrResult := range ocrResults {
|
||||
// not contains text
|
||||
if !strings.Contains(ocrResult.Text, text) {
|
||||
continue
|
||||
}
|
||||
|
||||
found = true
|
||||
rect := image.Rectangle{
|
||||
rect = image.Rectangle{
|
||||
// ocrResult.Points 顺序:左上 -> 右上 -> 右下 -> 左下
|
||||
Min: image.Point{
|
||||
X: int(ocrResult.Points[0].X),
|
||||
@@ -204,12 +236,29 @@ func (s *veDEMOCRService) FindTexts(texts []string, imageBuf []byte) (rects []im
|
||||
Y: int(ocrResult.Points[2].Y),
|
||||
},
|
||||
}
|
||||
rects = append(rects, rect)
|
||||
break
|
||||
|
||||
if rect.Min.X > minX && rect.Max.X < maxX && rect.Min.Y < maxY && rect.Max.Y > minY {
|
||||
ocrTexts = append(ocrTexts, ocrResult.Text)
|
||||
|
||||
// not contains text
|
||||
if !strings.Contains(ocrResult.Text, text) {
|
||||
continue
|
||||
}
|
||||
|
||||
found = true
|
||||
rects = append(rects, rect)
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
rects = append(rects, image.Rectangle{})
|
||||
}
|
||||
success = found || success
|
||||
}
|
||||
|
||||
if !success {
|
||||
return rects,
|
||||
fmt.Errorf("texts %s not found in %v", texts, ocrTexts)
|
||||
}
|
||||
|
||||
return rects, nil
|
||||
@@ -219,15 +268,26 @@ type OCRService interface {
|
||||
FindText(text string, imageBuf []byte, index ...int) (rect image.Rectangle, err error)
|
||||
}
|
||||
|
||||
func (dExt *DriverExt) FindTextByOCR(ocrText string, index ...int) (x, y, width, height float64, err error) {
|
||||
func (dExt *DriverExt) FindTextByOCR(ocrText string, recognitionArea []float64, index ...int) (x, y, width, height float64, err error) {
|
||||
var bufSource *bytes.Buffer
|
||||
if bufSource, err = dExt.takeScreenShot(); err != nil {
|
||||
err = fmt.Errorf("takeScreenShot error: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
if len(recognitionArea) != 4 {
|
||||
recognitionArea = []float64{0, 0, 1, 1}
|
||||
}
|
||||
|
||||
absArea := []int{
|
||||
int(recognitionArea[0] * float64(dExt.windowSize.Width) * dExt.scale),
|
||||
int(recognitionArea[1] * float64(dExt.windowSize.Height) * dExt.scale),
|
||||
int(recognitionArea[2] * float64(dExt.windowSize.Width) * dExt.scale),
|
||||
int(recognitionArea[3] * float64(dExt.windowSize.Height) * dExt.scale),
|
||||
}
|
||||
|
||||
service := &veDEMOCRService{}
|
||||
rect, err := service.FindText(ocrText, bufSource.Bytes(), index...)
|
||||
rect, err := service.FindText(ocrText, bufSource.Bytes(), absArea, index...)
|
||||
if err != nil {
|
||||
log.Warn().Msgf("FindText failed: %s", err.Error())
|
||||
err = fmt.Errorf("FindText failed: %v", err)
|
||||
@@ -240,15 +300,26 @@ func (dExt *DriverExt) FindTextByOCR(ocrText string, index ...int) (x, y, width,
|
||||
return
|
||||
}
|
||||
|
||||
func (dExt *DriverExt) FindTextsByOCR(ocrTexts []string) (points [][]float64, err error) {
|
||||
func (dExt *DriverExt) FindTextsByOCR(ocrTexts []string, recognitionArea []float64) (points [][]float64, err error) {
|
||||
var bufSource *bytes.Buffer
|
||||
if bufSource, err = dExt.takeScreenShot(); err != nil {
|
||||
err = fmt.Errorf("takeScreenShot error: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
if len(recognitionArea) != 4 {
|
||||
recognitionArea = []float64{0, 0, 1, 1}
|
||||
}
|
||||
|
||||
absArea := []int{
|
||||
int(recognitionArea[0] * float64(dExt.windowSize.Width) * dExt.scale),
|
||||
int(recognitionArea[1] * float64(dExt.windowSize.Height) * dExt.scale),
|
||||
int(recognitionArea[2] * float64(dExt.windowSize.Width) * dExt.scale),
|
||||
int(recognitionArea[3] * float64(dExt.windowSize.Height) * dExt.scale),
|
||||
}
|
||||
|
||||
service := &veDEMOCRService{}
|
||||
rects, err := service.FindTexts(ocrTexts, bufSource.Bytes())
|
||||
rects, err := service.FindTexts(ocrTexts, bufSource.Bytes(), absArea)
|
||||
if err != nil {
|
||||
log.Warn().Msgf("FindTexts failed: %s", err.Error())
|
||||
err = fmt.Errorf("FindTexts failed: %v", err)
|
||||
|
||||
@@ -28,8 +28,8 @@ func (dExt *DriverExt) TapXY(x, y float64, identifier string) error {
|
||||
return dExt.TapAbsXY(x, y, identifier)
|
||||
}
|
||||
|
||||
func (dExt *DriverExt) GetTextXY(ocrText string, index ...int) (point PointF, err error) {
|
||||
x, y, width, height, err := dExt.FindTextByOCR(ocrText, index...)
|
||||
func (dExt *DriverExt) GetTextXY(ocrText string, recognitionArea []float64, index ...int) (point PointF, err error) {
|
||||
x, y, width, height, err := dExt.FindTextByOCR(ocrText, recognitionArea, index...)
|
||||
if err != nil {
|
||||
return PointF{}, err
|
||||
}
|
||||
@@ -41,8 +41,8 @@ func (dExt *DriverExt) GetTextXY(ocrText string, index ...int) (point PointF, er
|
||||
return point, nil
|
||||
}
|
||||
|
||||
func (dExt *DriverExt) GetTextXYs(ocrText []string) (points []PointF, err error) {
|
||||
ps, err := dExt.FindTextsByOCR(ocrText)
|
||||
func (dExt *DriverExt) GetTextXYs(ocrText []string, recognitionArea []float64) (points []PointF, err error) {
|
||||
ps, err := dExt.FindTextsByOCR(ocrText, recognitionArea)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -71,8 +71,8 @@ func (dExt *DriverExt) GetImageXY(imagePath string, index ...int) (point PointF,
|
||||
return point, nil
|
||||
}
|
||||
|
||||
func (dExt *DriverExt) TapByOCR(ocrText string, identifier string, ignoreNotFoundError bool, index ...int) error {
|
||||
point, err := dExt.GetTextXY(ocrText, index...)
|
||||
func (dExt *DriverExt) TapByOCR(ocrText string, identifier string, ignoreNotFoundError bool, recognitionArea []float64, index ...int) error {
|
||||
point, err := dExt.GetTextXY(ocrText, recognitionArea, index...)
|
||||
if err != nil {
|
||||
if ignoreNotFoundError {
|
||||
return nil
|
||||
|
||||
@@ -31,6 +31,7 @@ var (
|
||||
WithDescription = uixt.WithDescription
|
||||
WithDirection = uixt.WithDirection
|
||||
WithCustomDirection = uixt.WithCustomDirection
|
||||
WithRecognitionArea = uixt.WithRecognitionArea
|
||||
)
|
||||
|
||||
var (
|
||||
|
||||
Reference in New Issue
Block a user