Files
httprunner/hrp/pkg/uixt/ocr_vedem.go
2023-07-21 10:32:48 +08:00

403 lines
9.6 KiB
Go

package uixt
import (
"bytes"
"fmt"
"image"
"io/ioutil"
"mime/multipart"
"net/http"
"regexp"
"time"
"github.com/pkg/errors"
"github.com/rs/zerolog/log"
"github.com/httprunner/httprunner/v4/hrp/internal/builtin"
"github.com/httprunner/httprunner/v4/hrp/internal/code"
"github.com/httprunner/httprunner/v4/hrp/internal/env"
"github.com/httprunner/httprunner/v4/hrp/internal/json"
)
var client = &http.Client{
Timeout: time.Second * 10,
}
type OCRResult struct {
Text string `json:"text"`
Points []PointF `json:"points"`
}
type OCRResults []OCRResult
func (o OCRResults) ToOCRTexts() (ocrTexts OCRTexts) {
for _, ocrResult := range o {
rect := image.Rectangle{
// ocrResult.Points 顺序:左上 -> 右上 -> 右下 -> 左下
Min: image.Point{
X: int(ocrResult.Points[0].X),
Y: int(ocrResult.Points[0].Y),
},
Max: image.Point{
X: int(ocrResult.Points[2].X),
Y: int(ocrResult.Points[2].Y),
},
}
ocrText := OCRText{
Text: ocrResult.Text,
Rect: rect,
}
ocrTexts = append(ocrTexts, ocrText)
}
return
}
type ImageResult struct {
imagePath string
URL string `json:"url"` // image uploaded url
OCRResult OCRResults `json:"ocrResult"` // OCR texts
LiveType string `json:"liveType"` // 直播间类型
}
type APIResponseImage struct {
Code int `json:"code"`
Message string `json:"message"`
Result ImageResult `json:"result"`
}
type OCRText struct {
Text string
Rect image.Rectangle
}
func (t OCRText) Center() PointF {
return getRectangleCenterPoint(t.Rect)
}
type OCRTexts []OCRText
func (t OCRTexts) texts() (texts []string) {
for _, text := range t {
texts = append(texts, text.Text)
}
return texts
}
func (t OCRTexts) FilterScope(scope AbsScope) (results OCRTexts) {
for _, ocrText := range t {
rect := ocrText.Rect
// check if text in scope
if len(scope) == 4 {
if rect.Min.X < scope[0] ||
rect.Min.Y < scope[1] ||
rect.Max.X > scope[2] ||
rect.Max.Y > scope[3] {
// not in scope
continue
}
}
results = append(results, ocrText)
}
return
}
func (t OCRTexts) FindText(text string, options ...ActionOption) (
result OCRText, err error) {
actionOptions := NewActionOptions(options...)
var results []OCRText
for _, ocrText := range t.FilterScope(actionOptions.AbsScope) {
if actionOptions.Regex {
// regex on, check if match regex
if !regexp.MustCompile(text).MatchString(ocrText.Text) {
continue
}
} else {
// regex off, check if match exactly
if ocrText.Text != text {
continue
}
}
results = append(results, ocrText)
}
if len(results) == 0 {
return OCRText{}, errors.Wrap(code.OCRTextNotFoundError,
fmt.Sprintf("text %s not found in %v", text, t.texts()))
}
// get index
idx := actionOptions.Index
if idx < 0 {
idx = len(results) + idx
}
// index out of range
if idx >= len(results) || idx < 0 {
return OCRText{}, errors.Wrap(code.OCRTextNotFoundError,
fmt.Sprintf("text %s found %d, index %d out of range", text, len(results), idx))
}
return results[idx], nil
}
func (t OCRTexts) FindTexts(texts []string, options ...ActionOption) (
results OCRTexts, err error) {
for _, text := range texts {
ocrText, err := t.FindText(text, options...)
if err != nil {
continue
}
results = append(results, ocrText)
}
if len(results) != len(texts) {
return nil, errors.Wrap(code.OCRTextNotFoundError,
fmt.Sprintf("texts %s not found in %v", texts, t.texts()))
}
return results, nil
}
func newVEDEMImageService(actions ...string) (*veDEMImageService, error) {
if err := checkEnv(); err != nil {
return nil, err
}
if len(actions) == 0 {
actions = []string{"ocr"}
}
return &veDEMImageService{
actions: actions,
}, nil
}
// veDEMImageService implements IImageService interface
// actions:
//
// ocr - get ocr texts
// upload - get image uploaded url
// liveType - get live type
// popup - get popup windows
// close - get close popup
type veDEMImageService struct {
actions []string
}
func (s *veDEMImageService) GetImage(imageBuf *bytes.Buffer) (
imageResult ImageResult, err error) {
bodyBuf := &bytes.Buffer{}
bodyWriter := multipart.NewWriter(bodyBuf)
for _, action := range s.actions {
bodyWriter.WriteField("actions", action)
}
bodyWriter.WriteField("ocrCluster", "highPrecision")
formWriter, err := bodyWriter.CreateFormFile("image", "screenshot.png")
if err != nil {
err = errors.Wrap(code.OCRRequestError,
fmt.Sprintf("create form file error: %v", err))
return
}
size, err := formWriter.Write(imageBuf.Bytes())
if err != nil {
err = errors.Wrap(code.OCRRequestError,
fmt.Sprintf("write form error: %v", err))
return
}
err = bodyWriter.Close()
if err != nil {
err = errors.Wrap(code.OCRRequestError,
fmt.Sprintf("close body writer error: %v", err))
return
}
req, err := http.NewRequest("POST", env.VEDEM_IMAGE_URL, bodyBuf)
if err != nil {
err = errors.Wrap(code.OCRRequestError,
fmt.Sprintf("construct request error: %v", err))
return
}
signToken := "UNSIGNED-PAYLOAD"
token := builtin.Sign("auth-v2", env.VEDEM_IMAGE_AK, env.VEDEM_IMAGE_SK, []byte(signToken))
req.Header.Add("Agw-Auth", token)
req.Header.Add("Agw-Auth-Content", signToken)
req.Header.Add("Content-Type", bodyWriter.FormDataContentType())
var resp *http.Response
// retry 3 times
for i := 1; i <= 3; i++ {
start := time.Now()
resp, err = client.Do(req)
elapsed := time.Since(start)
var logID string
if resp != nil {
logID = getLogID(resp.Header)
}
if err == nil && resp.StatusCode == http.StatusOK {
log.Debug().
Str("X-TT-LOGID", logID).
Int("image_bytes", size).
Float64("elapsed(s)", elapsed.Seconds()).
Msg("request OCR service success")
break
}
log.Error().Err(err).
Str("X-TT-LOGID", logID).
Int("imageBufSize", size).
Msgf("request veDEM OCR service failed, retry %d", i)
time.Sleep(1 * time.Second)
}
if resp == nil {
err = code.OCRServiceConnectionError
return
}
defer resp.Body.Close()
results, err := ioutil.ReadAll(resp.Body)
if err != nil {
err = errors.Wrap(code.OCRResponseError,
fmt.Sprintf("read response body error: %v", err))
return
}
if resp.StatusCode != http.StatusOK {
err = errors.Wrap(code.OCRResponseError,
fmt.Sprintf("unexpected response status code: %d, results: %v",
resp.StatusCode, string(results)))
return
}
var imageResponse APIResponseImage
err = json.Unmarshal(results, &imageResponse)
if err != nil {
log.Error().Err(err).
Str("response", string(results)).
Msg("json unmarshal veDEM image response body failed")
err = errors.Wrap(code.OCRResponseError,
"json unmarshal veDEM image response body error")
return
}
if imageResponse.Code != 0 {
log.Error().
Int("code", imageResponse.Code).
Str("message", imageResponse.Message).
Msg("request veDEM OCR service failed")
}
imageResult = imageResponse.Result
log.Debug().Interface("imageResult", imageResult).Msg("get image data by veDEM")
return imageResult, nil
}
func checkEnv() error {
if env.VEDEM_IMAGE_URL == "" {
return errors.Wrap(code.OCREnvMissedError, "VEDEM_IMAGE_URL missed")
}
log.Info().Str("VEDEM_IMAGE_URL", env.VEDEM_IMAGE_URL).Msg("get env")
if env.VEDEM_IMAGE_AK == "" {
return errors.Wrap(code.OCREnvMissedError, "VEDEM_IMAGE_AK missed")
}
if env.VEDEM_IMAGE_SK == "" {
return errors.Wrap(code.OCREnvMissedError, "VEDEM_IMAGE_SK missed")
}
return nil
}
func getLogID(header http.Header) string {
if len(header) == 0 {
return ""
}
logID, ok := header["X-Tt-Logid"]
if !ok || len(logID) == 0 {
return ""
}
return logID[0]
}
type IImageService interface {
// GetImage returns image result including ocr texts, uploaded image url, etc
GetImage(imageBuf *bytes.Buffer) (imageResult ImageResult, err error)
}
// GetScreenResult takes a screenshot, returns the image recognization result
func (dExt *DriverExt) GetScreenResult() (screenResult *ScreenResult, err error) {
var bufSource *bytes.Buffer
var imagePath string
if bufSource, imagePath, err = dExt.takeScreenShot(
builtin.GenNameWithTimestamp("%d_ocr")); err != nil {
return
}
imageResult, err := dExt.ImageService.GetImage(bufSource)
if err != nil {
log.Error().Err(err).Msg("GetImage from ImageService failed")
return
}
imageResult.imagePath = imagePath
imageUrl := imageResult.URL
if imageUrl != "" {
dExt.cacheStepData.screenShotsUrls[imagePath] = imageUrl
log.Debug().Str("imagePath", imagePath).Str("imageUrl", imageUrl).Msg("log screenshot")
}
screenResult = &ScreenResult{
Texts: imageResult.OCRResult.ToOCRTexts(),
Tags: nil,
Popularity: Popularity{},
}
if imageResult.LiveType != "" {
screenResult.Tags = []string{imageResult.LiveType}
}
dExt.cacheStepData.screenResults[imagePath] = screenResult
return screenResult, nil
}
func (dExt *DriverExt) GetScreenTexts() (ocrTexts OCRTexts, err error) {
screenResult, err := dExt.GetScreenResult()
if err != nil {
return
}
return screenResult.Texts, nil
}
func (dExt *DriverExt) FindScreenText(text string, options ...ActionOption) (point PointF, err error) {
ocrTexts, err := dExt.GetScreenTexts()
if err != nil {
return
}
result, err := ocrTexts.FindText(text, dExt.ParseActionOptions(options...)...)
if err != nil {
log.Warn().Msgf("FindText failed: %s", err.Error())
return
}
point = result.Center()
log.Info().Str("text", text).
Interface("point", point).Msgf("FindScreenText success")
return
}
func getRectangleCenterPoint(rect image.Rectangle) (point PointF) {
x, y := float64(rect.Min.X), float64(rect.Min.Y)
width, height := float64(rect.Dx()), float64(rect.Dy())
point = PointF{
X: x + width*0.5,
Y: y + height*0.5,
}
return point
}