From 7f69052be6289f2ebe23a8116bbc1f7b6f7514bb Mon Sep 17 00:00:00 2001 From: "lilong.129" Date: Mon, 17 Mar 2025 15:09:04 +0800 Subject: [PATCH] change: FindScreenText return text rect --- internal/version/VERSION | 2 +- uixt/ai/cv.go | 7 ++----- uixt/driver_ext_screenshot.go | 9 ++++----- uixt/driver_ext_tap.go | 3 ++- uixt/driver_ext_test.go | 9 ++++++--- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/internal/version/VERSION b/internal/version/VERSION index fe8b7add..cbeff748 100644 --- a/internal/version/VERSION +++ b/internal/version/VERSION @@ -1 +1 @@ -v5.0.0-beta-2503171436 +v5.0.0-beta-2503171509 diff --git a/uixt/ai/cv.go b/uixt/ai/cv.go index fa314855..0a6ff51d 100644 --- a/uixt/ai/cv.go +++ b/uixt/ai/cv.go @@ -86,13 +86,10 @@ func (t OCRText) Size() types.Size { } func (t OCRText) Center() PointF { - return getRectangleCenterPoint(t.Rect) -} - -func getRectangleCenterPoint(rect image.Rectangle) (point PointF) { + rect := t.Rect x, y := float64(rect.Min.X), float64(rect.Min.Y) width, height := float64(rect.Dx()), float64(rect.Dy()) - point = PointF{ + point := PointF{ X: x + width*0.5, Y: y + height*0.5, } diff --git a/uixt/driver_ext_screenshot.go b/uixt/driver_ext_screenshot.go index 2ace7882..854baeb6 100644 --- a/uixt/driver_ext_screenshot.go +++ b/uixt/driver_ext_screenshot.go @@ -145,7 +145,7 @@ func (dExt *XTDriver) GetScreenTexts(opts ...option.ActionOption) (ocrTexts ai.O return screenResult.Texts, nil } -func (dExt *XTDriver) FindScreenText(text string, opts ...option.ActionOption) (point ai.PointF, err error) { +func (dExt *XTDriver) FindScreenText(text string, opts ...option.ActionOption) (textRect ai.OCRText, err error) { options := option.NewActionOptions(opts...) if options.ScreenShotFileName == "" { opts = append(opts, option.WithScreenShotFileName(fmt.Sprintf("find_screen_text_%s", text))) @@ -155,16 +155,15 @@ func (dExt *XTDriver) FindScreenText(text string, opts ...option.ActionOption) ( return } - result, err := ocrTexts.FindText(text, opts...) + textRect, err = ocrTexts.FindText(text, opts...) if err != nil { log.Warn().Msgf("FindText failed: %s", err.Error()) return } - point = result.Center() log.Info().Str("text", text). - Interface("point", point).Msgf("FindScreenText success") - return + Interface("textRect", textRect).Msgf("FindScreenText success") + return textRect, nil } func (dExt *XTDriver) FindUIResult(opts ...option.ActionOption) (point ai.PointF, err error) { diff --git a/uixt/driver_ext_tap.go b/uixt/driver_ext_tap.go index b4e8f151..8881fc38 100644 --- a/uixt/driver_ext_tap.go +++ b/uixt/driver_ext_tap.go @@ -12,13 +12,14 @@ func (dExt *XTDriver) TapByOCR(text string, opts ...option.ActionOption) error { opts = append(opts, option.WithScreenShotFileName(fmt.Sprintf("tap_by_ocr_%s", text))) } - point, err := dExt.FindScreenText(text, opts...) + textRect, err := dExt.FindScreenText(text, opts...) if err != nil { if actionOptions.IgnoreNotFoundError { return nil } return err } + point := textRect.Center() return dExt.TapAbsXY(point.X, point.Y, opts...) } diff --git a/uixt/driver_ext_test.go b/uixt/driver_ext_test.go index a50465ba..dec94fb2 100644 --- a/uixt/driver_ext_test.go +++ b/uixt/driver_ext_test.go @@ -47,8 +47,8 @@ func TestDriverExt(t *testing.T) { driverExt.TapByOCR("推荐") texts, _ := driverExt.GetScreenTexts() t.Log(texts) - point, _ := driverExt.FindScreenText("hello") - t.Log(point) + textRect, _ := driverExt.FindScreenText("hello") + t.Log(textRect) err := driverExt.TapByCV( option.WithScreenShotUITypes("deepseek_send"), @@ -98,13 +98,14 @@ func TestDriverExt_FindScreenText(t *testing.T) { func TestDriverExt_Seek(t *testing.T) { driver := setupDriverExt(t) - point, err := driver.FindScreenText("首页") + textRect, err := driver.FindScreenText("首页") assert.Nil(t, err) size, err := driver.WindowSize() assert.Nil(t, err) width := size.Width + point := textRect.Center() y := point.Y - 40 for i := 0; i < 5; i++ { err := driver.Swipe(0.5, 0.8, 0.5, 0.2) @@ -180,4 +181,6 @@ func TestDriverExt_Action_Risk(t *testing.T) { err = driver.Swipe(0.5, 0.5, 0.5, 0.9, option.WithSwipeOffset(-50, 50, -50, 50)) assert.Nil(t, err) + + err = driver.TapByOCR("首页", option.WithTapOffset(-10, 10)) }