refactor: simplify OCR APIs

This commit is contained in:
lilong.129
2023-04-25 22:03:55 +08:00
parent f9bebf7202
commit 3ec27b9afc
13 changed files with 231 additions and 247 deletions

View File

@@ -150,7 +150,7 @@ func NewWorldCupLive(device uixt.Device, matchName, bundleID string, duration, i
func (wc *WorldCupLive) getCurrentLiveTime(utcTime time.Time) error {
utcTimeStr := utcTime.Format("15:04:05")
ocrTexts, err := wc.driver.GetTextsByOCR()
ocrTexts, err := wc.driver.GetScreenTextsByOCR()
if err != nil {
log.Error().Err(err).Msg("get ocr texts failed")
return err
@@ -212,8 +212,10 @@ func (wc *WorldCupLive) EnterLive(bundleID string) error {
time.Sleep(5 * time.Second)
// 青少年弹窗处理
if points, err := wc.driver.GetTextXYs([]string{"青少年模式", "我知道了"}); err == nil {
_ = wc.driver.TapAbsXY(points[1].X, points[1].Y)
if ocrTexts, err := wc.driver.GetScreenTextsByOCR(); err == nil {
if points, err := ocrTexts.FindTexts([]string{"青少年模式", "我知道了"}); err == nil {
_ = wc.driver.TapAbsXY(points[1].X, points[1].Y)
}
}
// 进入世界杯 tab

View File

@@ -222,7 +222,7 @@ func (dExt *DriverExt) DoAction(action MobileAction) error {
// }
findTextCondition := func(d *DriverExt) error {
var err error
point, err = d.GetTextXY(text, indexOption, scopeOption)
point, err = d.FindScreenTextByOCR(text, indexOption, scopeOption)
return err
}
foundTextAction := func(d *DriverExt) error {
@@ -268,7 +268,11 @@ func (dExt *DriverExt) DoAction(action MobileAction) error {
var point PointF
findTexts := func(d *DriverExt) error {
var err error
points, err := d.GetTextXYs(texts, scopeOption)
ocrTexts, err := d.GetScreenTextsByOCR(scopeOption)
if err != nil {
return err
}
points, err := ocrTexts.FindTexts(texts, scopeOption)
if err != nil {
return err
}
@@ -455,7 +459,12 @@ func (dExt *DriverExt) DoAction(action MobileAction) error {
if !ok {
return fmt.Errorf("invalid video crawler params: %v(%T)", action.Params, action.Params)
}
return dExt.VideoCrawler(params)
data, _ := json.Marshal(params)
configs := &VideoCrawlerConfigs{}
if err := json.Unmarshal(data, configs); err != nil {
return errors.Wrapf(err, "invalid video crawler params: %v(%T)", action.Params, action.Params)
}
return dExt.VideoCrawler(configs)
}
return nil
}

View File

@@ -17,14 +17,12 @@ func (dExt *DriverExt) DragOffsetFloat(pathname string, toX, toY, xOffset, yOffs
pressForDuration = []float64{1.0}
}
var x, y, width, height float64
if x, y, width, height, err = dExt.FindUIRectInUIKit(pathname); err != nil {
point, err := dExt.FindUIRectInUIKit(pathname)
if err != nil {
return err
}
fromX := x + width*xOffset
fromY := y + height*yOffset
return dExt.Driver.DragFloat(fromX, fromY, toX, toY,
// FIXME: handle offset
return dExt.Driver.DragFloat(point.X+xOffset, point.Y+yOffset, toX, toY,
WithDataPressDuration(pressForDuration[0]))
}

View File

@@ -52,8 +52,8 @@ type DriverExt struct {
frame *bytes.Buffer
doneMjpegStream chan bool
scale float64
ocrService OCRService // used to get text from image
screenShots []string // cache screenshot paths
OCRService IOCRService // used to get text from image
screenShots []string // cache screenshot paths
CVArgs
}
@@ -75,7 +75,7 @@ func NewDriverExt(device Device, driver WebDriver) (dExt *DriverExt, err error)
return nil, err
}
if dExt.ocrService, err = newVEDEMOCRService(); err != nil {
if dExt.OCRService, err = newVEDEMOCRService(); err != nil {
return nil, err
}
@@ -178,10 +178,10 @@ func init() {
rand.Seed(time.Now().UnixNano())
}
func (dExt *DriverExt) FindUIRectInUIKit(search string, options ...DataOption) (x, y, width, height float64, err error) {
func (dExt *DriverExt) FindUIRectInUIKit(search string, options ...DataOption) (point PointF, err error) {
// click on text, using OCR
if !isPathExists(search) {
return dExt.FindTextByOCR(search, options...)
return dExt.FindScreenTextByOCR(search, options...)
}
// click on image, using opencv
return dExt.FindImageRectInUIKit(search, options...)
@@ -194,12 +194,12 @@ func (dExt *DriverExt) MappingToRectInUIKit(rect image.Rectangle) (x, y, width,
}
func (dExt *DriverExt) IsOCRExist(text string) bool {
_, _, _, _, err := dExt.FindTextByOCR(text)
_, err := dExt.FindScreenTextByOCR(text)
return err == nil
}
func (dExt *DriverExt) IsImageExist(text string) bool {
_, _, _, _, err := dExt.FindImageRectInUIKit(text)
_, err := dExt.FindImageRectInUIKit(text)
return err == nil
}

View File

@@ -10,9 +10,9 @@ func TestDriverExtOCR(t *testing.T) {
driverExt, err := iosDevice.NewDriver(nil)
checkErr(t, err)
x, y, width, height, err := driverExt.FindTextByOCR("抖音")
point, err := driverExt.FindScreenTextByOCR("抖音")
checkErr(t, err)
t.Logf("x: %v, y: %v, width: %v, height: %v", x, y, width, height)
driverExt.Driver.TapFloat(x+width*0.5, y+height*0.5-20)
t.Logf("point.X: %v, point.Y: %v", point.X, point.Y)
driverExt.Driver.TapFloat(point.X, point.Y-20)
}

View File

@@ -34,7 +34,85 @@ type ResponseOCR struct {
OCRResult []OCRResult `json:"ocrResult"`
}
type veDEMOCRService struct{}
type OCRText struct {
Text string
Rect image.Rectangle
}
type OCRTexts []OCRText
func (t OCRTexts) texts() (texts []string) {
for _, text := range t {
texts = append(texts, text.Text)
}
return texts
}
func (t OCRTexts) FindText(text string, options ...DataOption) (
point PointF, err error) {
dataOptions := NewDataOptions(options...)
var rects []image.Rectangle
for _, ocrText := range t {
rect := ocrText.Rect
// not contains text
if !strings.Contains(ocrText.Text, text) {
continue
}
rects = append(rects, rect)
// contains text while not match exactly
if ocrText.Text != text {
continue
}
// match exactly, and not specify index, return the first one
if dataOptions.Index == 0 {
return getRectangleCenterPoint(rect), nil
}
}
if len(rects) == 0 {
return PointF{}, errors.Wrap(code.OCRTextNotFoundError,
fmt.Sprintf("text %s not found in %v", text, t.texts()))
}
// get index
idx := dataOptions.Index
if idx > 0 {
// NOTICE: index start from 1
idx = idx - 1
} else if idx < 0 {
idx = len(rects) + idx
}
// index out of range
if idx >= len(rects) {
return PointF{}, errors.Wrap(code.OCRTextNotFoundError,
fmt.Sprintf("text %s found %d, index %d out of range", text, len(rects), idx))
}
return getRectangleCenterPoint(rects[idx]), nil
}
func (t OCRTexts) FindTexts(texts []string, options ...DataOption) (points []PointF, err error) {
for _, text := range texts {
point, err := t.FindText(text, options...)
if err != nil {
continue
}
points = append(points, point)
}
if len(points) != len(texts) {
return nil, errors.Wrap(code.OCRTextNotFoundError,
fmt.Sprintf("texts %s not found in %v", texts, t.texts()))
}
return points, nil
}
func newVEDEMOCRService() (*veDEMOCRService, error) {
if err := checkEnv(); err != nil {
@@ -43,18 +121,7 @@ func newVEDEMOCRService() (*veDEMOCRService, error) {
return &veDEMOCRService{}, nil
}
func checkEnv() error {
if env.VEDEM_OCR_URL == "" {
return errors.Wrap(code.OCREnvMissedError, "VEDEM_OCR_URL missed")
}
if env.VEDEM_OCR_AK == "" {
return errors.Wrap(code.OCREnvMissedError, "VEDEM_OCR_AK missed")
}
if env.VEDEM_OCR_SK == "" {
return errors.Wrap(code.OCREnvMissedError, "VEDEM_OCR_SK missed")
}
return nil
}
type veDEMOCRService struct{}
func (s *veDEMOCRService) getOCRResult(imageBuf *bytes.Buffer) ([]OCRResult, error) {
bodyBuf := &bytes.Buffer{}
@@ -138,32 +205,6 @@ func (s *veDEMOCRService) getOCRResult(imageBuf *bytes.Buffer) ([]OCRResult, err
return ocrResult.OCRResult, nil
}
func getLogID(header http.Header) string {
if len(header) == 0 {
return ""
}
logID, ok := header["X-Tt-Logid"]
if !ok || len(logID) == 0 {
return ""
}
return logID[0]
}
type OCRText struct {
Text string
Rect image.Rectangle
}
type OCRTexts []OCRText
func (t OCRTexts) Texts() (texts []string) {
for _, text := range t {
texts = append(texts, text.Text)
}
return texts
}
func (s *veDEMOCRService) GetTexts(imageBuf *bytes.Buffer, options ...DataOption) (
ocrTexts OCRTexts, err error) {
@@ -203,157 +244,74 @@ func (s *veDEMOCRService) GetTexts(imageBuf *bytes.Buffer, options ...DataOption
return
}
func (s *veDEMOCRService) FindText(text string, imageBuf *bytes.Buffer, options ...DataOption) (
rect image.Rectangle, err error) {
ocrTexts, err := s.GetTexts(imageBuf, options...)
if err != nil {
log.Error().Err(err).Msg("GetTexts failed")
return
func checkEnv() error {
if env.VEDEM_OCR_URL == "" {
return errors.Wrap(code.OCREnvMissedError, "VEDEM_OCR_URL missed")
}
dataOptions := NewDataOptions(options...)
var rects []image.Rectangle
for _, ocrText := range ocrTexts {
rect = ocrText.Rect
// not contains text
if !strings.Contains(ocrText.Text, text) {
continue
}
rects = append(rects, rect)
// contains text while not match exactly
if ocrText.Text != text {
continue
}
// match exactly, and not specify index, return the first one
if dataOptions.Index == 0 {
return rect, nil
}
if env.VEDEM_OCR_AK == "" {
return errors.Wrap(code.OCREnvMissedError, "VEDEM_OCR_AK missed")
}
if len(rects) == 0 {
return image.Rectangle{}, errors.Wrap(code.OCRTextNotFoundError,
fmt.Sprintf("text %s not found in %v", text, ocrTexts.Texts()))
if env.VEDEM_OCR_SK == "" {
return errors.Wrap(code.OCREnvMissedError, "VEDEM_OCR_SK missed")
}
// get index
idx := dataOptions.Index
if idx > 0 {
// NOTICE: index start from 1
idx = idx - 1
} else if idx < 0 {
idx = len(rects) + idx
}
// index out of range
if idx >= len(rects) {
return image.Rectangle{}, errors.Wrap(code.OCRTextNotFoundError,
fmt.Sprintf("text %s found %d, index %d out of range", text, len(rects), idx))
}
return rects[idx], nil
return nil
}
func (s *veDEMOCRService) FindTexts(texts []string, imageBuf *bytes.Buffer, options ...DataOption) (
rects []image.Rectangle, err error) {
ocrTexts, err := s.GetTexts(imageBuf, options...)
if err != nil {
log.Error().Err(err).Msg("GetTexts failed")
return
func getLogID(header http.Header) string {
if len(header) == 0 {
return ""
}
var success bool
for _, text := range texts {
var found bool
for _, ocrText := range ocrTexts {
rect := ocrText.Rect
// not contains text
if !strings.Contains(ocrText.Text, text) {
continue
}
found = true
rects = append(rects, rect)
break
}
if !found {
rects = append(rects, image.Rectangle{})
}
success = found || success
logID, ok := header["X-Tt-Logid"]
if !ok || len(logID) == 0 {
return ""
}
if !success {
return rects, errors.Wrap(code.OCRTextNotFoundError,
fmt.Sprintf("texts %s not found in %v", texts, ocrTexts.Texts()))
}
return rects, nil
return logID[0]
}
type OCRService interface {
type IOCRService interface {
GetTexts(imageBuf *bytes.Buffer, options ...DataOption) (ocrTexts OCRTexts, err error)
FindText(text string, imageBuf *bytes.Buffer, options ...DataOption) (rect image.Rectangle, err error)
FindTexts(texts []string, imageBuf *bytes.Buffer, options ...DataOption) (rects []image.Rectangle, err error)
}
func (dExt *DriverExt) GetTextsByOCR(options ...DataOption) (texts OCRTexts, err error) {
func (dExt *DriverExt) GetScreenTextsByOCR(options ...DataOption) (texts OCRTexts, err error) {
var bufSource *bytes.Buffer
if bufSource, err = dExt.TakeScreenShot(builtin.GenNameWithTimestamp("step_%d_ocr")); err != nil {
if bufSource, err = dExt.TakeScreenShot(
builtin.GenNameWithTimestamp("screenshot_%d_ocr")); err != nil {
return
}
ocrTexts, err := dExt.ocrService.GetTexts(bufSource, options...)
ocrTexts, err := dExt.OCRService.GetTexts(bufSource, options...)
if err != nil {
log.Error().Err(err).Msg("GetTexts failed")
log.Error().Err(err).Msg("GetScreenTextsByOCR failed")
return
}
log.Debug().Interface("texts", ocrTexts).Msg("get screen texts by OCR")
return ocrTexts, nil
}
func (dExt *DriverExt) FindTextByOCR(ocrText string, options ...DataOption) (x, y, width, height float64, err error) {
var bufSource *bytes.Buffer
if bufSource, err = dExt.TakeScreenShot(builtin.GenNameWithTimestamp("step_%d_ocr")); err != nil {
func (dExt *DriverExt) FindScreenTextByOCR(text string, options ...DataOption) (point PointF, err error) {
ocrTexts, err := dExt.GetScreenTextsByOCR(options...)
if err != nil {
return
}
rect, err := dExt.ocrService.FindText(ocrText, bufSource, options...)
point, err = ocrTexts.FindText(text, options...)
if err != nil {
log.Warn().Msgf("FindText failed: %s", err.Error())
return
}
log.Info().Str("ocrText", ocrText).
Interface("rect", rect).Msgf("FindTextByOCR success")
x, y, width, height = dExt.MappingToRectInUIKit(rect)
log.Info().Str("text", text).
Interface("point", point).Msgf("FindScreenTextByOCR success")
return
}
func (dExt *DriverExt) FindTextsByOCR(ocrTexts []string, options ...DataOption) (points [][]float64, err error) {
var bufSource *bytes.Buffer
if bufSource, err = dExt.TakeScreenShot(builtin.GenNameWithTimestamp("step_%d_ocr")); err != nil {
return
func getRectangleCenterPoint(rect image.Rectangle) (point PointF) {
x, y := float64(rect.Min.X), float64(rect.Min.Y)
width, height := float64(rect.Dx()), float64(rect.Dy())
point = PointF{
X: x + width*0.5,
Y: y + height*0.5,
}
rects, err := dExt.ocrService.FindTexts(ocrTexts, bufSource, options...)
if err != nil {
log.Warn().Msgf("FindTexts failed: %s", err.Error())
return
}
log.Info().Interface("ocrTexts", ocrTexts).
Interface("rects", rects).Msgf("FindTextsByOCR success")
for _, rect := range rects {
x, y, width, height := dExt.MappingToRectInUIKit(rect)
points = append(points, []float64{x, y, width, height})
}
return
return point
}

View File

@@ -113,25 +113,19 @@ func (dExt *DriverExt) FindAllImageRect(search string) (rects []image.Rectangle,
return
}
func (dExt *DriverExt) FindImageRectInUIKit(imagePath string, options ...DataOption) (x, y, width, height float64, err error) {
func (dExt *DriverExt) FindImageRectInUIKit(imagePath string, options ...DataOption) (point PointF, err error) {
var bufSource, bufSearch *bytes.Buffer
if bufSearch, err = getBufFromDisk(imagePath); err != nil {
return 0, 0, 0, 0, err
}
if bufSource, err = dExt.TakeScreenShot(builtin.GenNameWithTimestamp("step_%d_cv")); err != nil {
return 0, 0, 0, 0, err
return PointF{}, err
}
var rect image.Rectangle
if rect, err = FindImageRectFromRaw(bufSource, bufSearch, float32(dExt.threshold), TemplateMatchMode(dExt.matchMode)); err != nil {
return 0, 0, 0, 0, err
return PointF{}, err
}
// if rect, err = dExt.findImgRect(search); err != nil {
// return 0, 0, 0, 0, err
// }
x, y, width, height = dExt.MappingToRectInUIKit(rect)
return
point = getRectangleCenterPoint(rect)
return point, nil
}
func getBufFromDisk(name string) (*bytes.Buffer, error) {

View File

@@ -17,7 +17,7 @@ func (dExt *DriverExt) FindAllImageRect(search string) (rects []image.Rectangle,
return
}
func (dExt *DriverExt) FindImageRectInUIKit(imagePath string, options ...DataOption) (x, y, width, height float64, err error) {
func (dExt *DriverExt) FindImageRectInUIKit(imagePath string, options ...DataOption) (point PointF, err error) {
log.Fatal().Msg("opencv is not supported")
return
}

View File

@@ -151,7 +151,7 @@ func (dExt *DriverExt) swipeToTapApp(appName string, action MobileAction) error
}
findAppCondition := func(d *DriverExt) error {
var err error
point, err = d.GetTextXY(appName, scopeOption, indexOption)
point, err = d.FindScreenTextByOCR(appName, scopeOption, indexOption)
return err
}
foundAppAction := func(d *DriverExt) error {

View File

@@ -13,7 +13,7 @@ func TestSwipeUntil(t *testing.T) {
var point PointF
findApp := func(d *DriverExt) error {
var err error
point, err = d.GetTextXY("抖音")
point, err = d.FindScreenTextByOCR("抖音")
return err
}
foundAppAction := func(d *DriverExt) error {
@@ -34,7 +34,7 @@ func TestSwipeUntil(t *testing.T) {
findLive := func(d *DriverExt) error {
var err error
point, err = d.GetTextXY("点击进入直播间")
point, err = d.FindScreenTextByOCR("点击进入直播间")
return err
}
foundLiveAction := func(d *DriverExt) error {

View File

@@ -21,53 +21,10 @@ func (dExt *DriverExt) TapXY(x, y float64, options ...DataOption) error {
return dExt.TapAbsXY(x, y, options...)
}
func (dExt *DriverExt) GetTextXY(ocrText string, options ...DataOption) (point PointF, err error) {
x, y, width, height, err := dExt.FindTextByOCR(ocrText, options...)
if err != nil {
return PointF{}, err
}
point = PointF{
X: x + width*0.5,
Y: y + height*0.5,
}
return point, nil
}
func (dExt *DriverExt) GetTextXYs(ocrText []string, options ...DataOption) (points []PointF, err error) {
ps, err := dExt.FindTextsByOCR(ocrText, options...)
if err != nil {
return nil, err
}
for _, point := range ps {
pointF := PointF{
X: point[0] + point[2]*0.5,
Y: point[1] + point[3]*0.5,
}
points = append(points, pointF)
}
return points, nil
}
func (dExt *DriverExt) GetImageXY(imagePath string, options ...DataOption) (point PointF, err error) {
x, y, width, height, err := dExt.FindImageRectInUIKit(imagePath, options...)
if err != nil {
return PointF{}, err
}
point = PointF{
X: x + width*0.5,
Y: y + height*0.5,
}
return point, nil
}
func (dExt *DriverExt) TapByOCR(ocrText string, options ...DataOption) error {
dataOptions := NewDataOptions(options...)
point, err := dExt.GetTextXY(ocrText, options...)
point, err := dExt.FindScreenTextByOCR(ocrText, options...)
if err != nil {
if dataOptions.IgnoreNotFoundError {
return nil
@@ -81,7 +38,7 @@ func (dExt *DriverExt) TapByOCR(ocrText string, options ...DataOption) error {
func (dExt *DriverExt) TapByCV(imagePath string, options ...DataOption) error {
dataOptions := NewDataOptions(options...)
point, err := dExt.GetImageXY(imagePath, options...)
point, err := dExt.FindImageRectInUIKit(imagePath, options...)
if err != nil {
if dataOptions.IgnoreNotFoundError {
return nil
@@ -99,7 +56,7 @@ func (dExt *DriverExt) Tap(param string, options ...DataOption) error {
func (dExt *DriverExt) TapOffset(param string, xOffset, yOffset float64, options ...DataOption) (err error) {
dataOptions := NewDataOptions(options...)
x, y, width, height, err := dExt.FindUIRectInUIKit(param, options...)
point, err := dExt.FindUIRectInUIKit(param, options...)
if err != nil {
if dataOptions.IgnoreNotFoundError {
return nil
@@ -107,7 +64,8 @@ func (dExt *DriverExt) TapOffset(param string, xOffset, yOffset float64, options
return err
}
return dExt.TapAbsXY(x+width*xOffset, y+height*yOffset, options...)
// FIXME: handle offset
return dExt.TapAbsXY(point.X+xOffset, point.Y+yOffset, options...)
}
func (dExt *DriverExt) DoubleTapXY(x, y float64) error {
@@ -126,10 +84,11 @@ func (dExt *DriverExt) DoubleTap(param string) (err error) {
}
func (dExt *DriverExt) DoubleTapOffset(param string, xOffset, yOffset float64) (err error) {
var x, y, width, height float64
if x, y, width, height, err = dExt.FindUIRectInUIKit(param); err != nil {
point, err := dExt.FindUIRectInUIKit(param)
if err != nil {
return err
}
return dExt.Driver.DoubleTapFloat(x+width*xOffset, y+height*yOffset)
// FIXME: handle offset
return dExt.Driver.DoubleTapFloat(point.X+xOffset, point.Y+yOffset)
}

View File

@@ -1,9 +1,50 @@
package uixt
import (
"time"
"github.com/rs/zerolog/log"
)
type VideoCrawlerConfigs struct {
Target struct{}
AppPackageName string `json:"app_package_name"`
TargetFeedCount int `json:"target_feed_count"`
TargetLiveCount int `json:"target_live_count"`
}
func (dExt *DriverExt) VideoCrawler(params map[string]interface{}) error {
return nil
func (dExt *DriverExt) VideoCrawler(configs *VideoCrawlerConfigs) (err error) {
// launch app
if configs.AppPackageName != "" {
if err = dExt.Driver.AppLaunch(configs.AppPackageName); err != nil {
return err
}
time.Sleep(5 * time.Second)
}
// loop until target count achieved
for {
// take screenshot and get screen texts by OCR
_, err := dExt.GetScreenTextsByOCR()
if err != nil {
log.Error().Err(err).Msg("OCR GetTexts failed")
return err
}
// TODO: check if text popup exists
// TODO: check if live video
// assert feed video type
// swipe to next video
if err = dExt.SwipeUp(); err != nil {
log.Error().Err(err).Msg("swipe up failed")
return err
}
time.Sleep(5 * time.Second)
}
// return nil
}

View File

@@ -0,0 +1,23 @@
//go:build localtest
package uixt
import "testing"
func TestVideoCrawler(t *testing.T) {
device, err := NewAndroidDevice()
if err != nil {
t.Fatal(err)
}
driver, err := device.NewDriver(nil)
if err != nil {
t.Fatal(err)
}
configs := &VideoCrawlerConfigs{
AppPackageName: "com.ss.android.ugc.aweme",
}
err = driver.VideoCrawler(configs)
if err != nil {
t.Fatal(err)
}
}