From 41e4d89c95a5ae18481030ac19b8a8bc3018a5f4 Mon Sep 17 00:00:00 2001 From: "lilong.129" Date: Tue, 9 May 2023 20:12:14 +0800 Subject: [PATCH] feat: request vedem ocr with uploading image --- examples/uitest/demo_android_live_swipe.json | 23 +++++--- hrp/pkg/uixt/ext.go | 15 +++--- hrp/pkg/uixt/ocr_vedem.go | 57 ++++++++++++++------ hrp/pkg/uixt/ocr_vedem_test.go | 11 ++-- 4 files changed, 71 insertions(+), 35 deletions(-) diff --git a/examples/uitest/demo_android_live_swipe.json b/examples/uitest/demo_android_live_swipe.json index bd79ad18..4c5edadc 100644 --- a/examples/uitest/demo_android_live_swipe.json +++ b/examples/uitest/demo_android_live_swipe.json @@ -45,7 +45,9 @@ { "method": "tap_ocr", "params": "我知道了", - "ignore_NotFoundError": true + "options": { + "ignore_NotFoundError": true + } } ] } @@ -57,8 +59,10 @@ { "method": "swipe_to_tap_text", "params": "点击进入直播间", - "identifier": "进入直播间", - "max_retry_times": 10 + "options": { + "identifier": "进入直播间", + "max_retry_times": 10 + } } ] } @@ -69,7 +73,10 @@ "actions": [ { "method": "swipe", - "params": "up" + "params": "up", + "options": { + + } }, { "method": "sleep_random", @@ -93,7 +100,9 @@ { "method": "swipe", "params": "up", - "identifier": "第一次上划" + "options": { + "identifier": "第一次上划" + } }, { "method": "sleep", @@ -105,7 +114,9 @@ { "method": "swipe", "params": "up", - "identifier": "第二次上划" + "options": { + "identifier": "第二次上划" + } }, { "method": "sleep", diff --git a/hrp/pkg/uixt/ext.go b/hrp/pkg/uixt/ext.go index 13d99aac..b774ffb7 100644 --- a/hrp/pkg/uixt/ext.go +++ b/hrp/pkg/uixt/ext.go @@ -64,7 +64,8 @@ type OcrResult struct { type cacheStepData struct { // cache step screenshot paths - ScreenShots []string + ScreenShots []string + screenShotsUrls map[string]string // map screenshot file path to uploaded url // cache step screenshot ocr results, key is image path, value is OcrResult OcrResults map[string]*OcrResult // cache feed/live video stat @@ -73,6 +74,7 @@ type cacheStepData struct { func (d *cacheStepData) reset() { d.ScreenShots = make([]string, 0) + d.screenShotsUrls = make(map[string]string) d.OcrResults = make(map[string]*OcrResult) d.VideoStat = nil } @@ -93,14 +95,12 @@ type DriverExt struct { func NewDriverExt(device Device, driver WebDriver) (dExt *DriverExt, err error) { dExt = &DriverExt{ - Device: device, - Driver: driver, - cacheStepData: cacheStepData{ - ScreenShots: make([]string, 0), - OcrResults: make(map[string]*OcrResult), - }, + Device: device, + Driver: driver, + cacheStepData: cacheStepData{}, interruptSignal: make(chan os.Signal, 1), } + dExt.cacheStepData.reset() signal.Notify(dExt.interruptSignal, syscall.SIGTERM, syscall.SIGINT) dExt.doneMjpegStream = make(chan bool, 1) @@ -208,6 +208,7 @@ func (dExt *DriverExt) GetStepCacheData() map[string]interface{} { cacheData := make(map[string]interface{}) cacheData["video_stat"] = dExt.cacheStepData.VideoStat cacheData["screenshots"] = dExt.cacheStepData.ScreenShots + cacheData["screenshots_urls"] = dExt.cacheStepData.screenShotsUrls ocrResults := make(map[string]interface{}) for imagePath, ocrResult := range dExt.cacheStepData.OcrResults { diff --git a/hrp/pkg/uixt/ocr_vedem.go b/hrp/pkg/uixt/ocr_vedem.go index 152b7f40..d2b1111f 100644 --- a/hrp/pkg/uixt/ocr_vedem.go +++ b/hrp/pkg/uixt/ocr_vedem.go @@ -31,6 +31,7 @@ type OCRResult struct { type ResponseOCR struct { Code int `json:"code"` Message string `json:"message"` + URL string `json:"url"` // image uploaded url OCRResult []OCRResult `json:"ocrResult"` } @@ -141,33 +142,40 @@ func newVEDEMOCRService() (*veDEMOCRService, error) { // veDEMOCRService implements IOCRService interface type veDEMOCRService struct{} -func (s *veDEMOCRService) getOCRResult(imageBuf *bytes.Buffer) ([]OCRResult, error) { +func (s *veDEMOCRService) getOCRResult(imageBuf *bytes.Buffer) ( + ocrResutls []OCRResult, url string, err error) { + bodyBuf := &bytes.Buffer{} bodyWriter := multipart.NewWriter(bodyBuf) bodyWriter.WriteField("withDet", "true") + bodyWriter.WriteField("upload", "true") // get image uploaded url // bodyWriter.WriteField("timestampOnly", "true") formWriter, err := bodyWriter.CreateFormFile("image", "screenshot.png") if err != nil { - return nil, errors.Wrap(code.OCRRequestError, + err = errors.Wrap(code.OCRRequestError, fmt.Sprintf("create form file error: %v", err)) + return } size, err := formWriter.Write(imageBuf.Bytes()) if err != nil { - return nil, errors.Wrap(code.OCRRequestError, + err = errors.Wrap(code.OCRRequestError, fmt.Sprintf("write form error: %v", err)) + return } err = bodyWriter.Close() if err != nil { - return nil, errors.Wrap(code.OCRRequestError, + err = errors.Wrap(code.OCRRequestError, fmt.Sprintf("close body writer error: %v", err)) + return } req, err := http.NewRequest("POST", env.VEDEM_OCR_URL, bodyBuf) if err != nil { - return nil, errors.Wrap(code.OCRRequestError, + err = errors.Wrap(code.OCRRequestError, fmt.Sprintf("construct request error: %v", err)) + return } token := builtin.Sign("auth-v2", env.VEDEM_OCR_AK, env.VEDEM_OCR_SK, bodyBuf.Bytes()) @@ -195,41 +203,52 @@ func (s *veDEMOCRService) getOCRResult(imageBuf *bytes.Buffer) ([]OCRResult, err log.Error().Err(err). Str("X-TT-LOGID", logID). Int("imageBufSize", size). - Msgf("request OCR service failed, retry %d", i) + Msgf("request veDEM OCR service failed, retry %d", i) time.Sleep(1 * time.Second) } if resp == nil { - return nil, code.OCRServiceConnectionError + err = code.OCRServiceConnectionError + return } defer resp.Body.Close() results, err := ioutil.ReadAll(resp.Body) if err != nil { - return nil, errors.Wrap(code.OCRResponseError, + err = errors.Wrap(code.OCRResponseError, fmt.Sprintf("read response body error: %v", err)) + return } if resp.StatusCode != http.StatusOK { - return nil, errors.Wrap(code.OCRResponseError, + err = errors.Wrap(code.OCRResponseError, fmt.Sprintf("unexpected response status code: %d, results: %v", resp.StatusCode, string(results))) + return } var ocrResult ResponseOCR err = json.Unmarshal(results, &ocrResult) if err != nil { - return nil, errors.Wrap(code.OCRResponseError, + err = errors.Wrap(code.OCRResponseError, fmt.Sprintf("json unmarshal response body error: %v", err)) + return } - return ocrResult.OCRResult, nil + if ocrResult.Code != 0 { + log.Error(). + Int("code", ocrResult.Code). + Str("message", ocrResult.Message). + Msg("request veDEM OCR service failed") + } + + return ocrResult.OCRResult, ocrResult.URL, nil } func (s *veDEMOCRService) GetTexts(imageBuf *bytes.Buffer) ( - ocrTexts OCRTexts, err error) { + ocrTexts OCRTexts, url string, err error) { - ocrResults, err := s.getOCRResult(imageBuf) + ocrResults, url, err := s.getOCRResult(imageBuf) if err != nil { log.Error().Err(err).Msg("getOCRResult failed") return @@ -262,6 +281,7 @@ func checkEnv() error { if env.VEDEM_OCR_URL == "" { return errors.Wrap(code.OCREnvMissedError, "VEDEM_OCR_URL missed") } + log.Info().Str("VEDEM_OCR_URL", env.VEDEM_OCR_URL).Msg("get env") if env.VEDEM_OCR_AK == "" { return errors.Wrap(code.OCREnvMissedError, "VEDEM_OCR_AK missed") } @@ -284,7 +304,8 @@ func getLogID(header http.Header) string { } type IOCRService interface { - GetTexts(imageBuf *bytes.Buffer) (texts OCRTexts, err error) + // GetTexts returns ocr texts and uploaded image url + GetTexts(imageBuf *bytes.Buffer) (texts OCRTexts, url string, err error) } // GetScreenTextsByOCR takes a screenshot, returns the image path and OCR texts. @@ -295,12 +316,18 @@ func (dExt *DriverExt) GetScreenTextsByOCR() (imagePath string, ocrTexts OCRText return } - ocrTexts, err = dExt.OCRService.GetTexts(bufSource) + var imageUrl string + ocrTexts, imageUrl, err = dExt.OCRService.GetTexts(bufSource) if err != nil { log.Error().Err(err).Msg("GetScreenTextsByOCR failed") return } + if imageUrl != "" { + dExt.cacheStepData.screenShotsUrls[imagePath] = imageUrl + log.Debug().Str("imagePath", imagePath).Str("imageUrl", imageUrl).Msg("log screenshot") + } + dExt.cacheStepData.OcrResults[imagePath] = &OcrResult{ Texts: ocrTexts, } diff --git a/hrp/pkg/uixt/ocr_vedem_test.go b/hrp/pkg/uixt/ocr_vedem_test.go index fa409fc9..afa2262f 100644 --- a/hrp/pkg/uixt/ocr_vedem_test.go +++ b/hrp/pkg/uixt/ocr_vedem_test.go @@ -14,22 +14,19 @@ func checkOCR(buff *bytes.Buffer) error { if err != nil { return err } - ocrResults, err := service.getOCRResult(buff) + ocrResults, url, err := service.getOCRResult(buff) if err != nil { return err } fmt.Println(ocrResults) + fmt.Println(url) return nil } func TestOCRWithScreenshot(t *testing.T) { - device, _ := NewAndroidDevice() - driver, err := device.NewDriver(nil) - if err != nil { - t.Fatal(err) - } + setupAndroid(t) - raw, err := driver.Driver.Screenshot() + raw, err := driverExt.Driver.Screenshot() if err != nil { t.Fatal(err) }