feat: add popularity data for feed

This commit is contained in:
lilong.129
2023-05-04 17:39:59 +08:00
parent ab8b5b133f
commit 038f4171ba
3 changed files with 53 additions and 21 deletions

View File

@@ -48,9 +48,17 @@ func WithThreshold(threshold float64) CVOption {
}
}
type Popularity struct {
Stars string `json:"stars"` // 点赞数
Comments string `json:"comments"` // 评论数
Favorites string `json:"favorites"` // 收藏数
Shares string `json:"shares"` // 分享数
}
type OcrResult struct {
Texts OCRTexts `json:"texts"` // dumped OCRTexts
Tags []string `json:"tags"` // tags for image, e.g. ["feed", "ad", "live"]
Texts OCRTexts `json:"texts"` // dumped OCRTexts
Tags []string `json:"tags"` // tags for image, e.g. ["feed", "ad", "live"]
Popularity Popularity `json:"popularity"` // video popularity data
}
type cacheStepData struct {

View File

@@ -52,26 +52,33 @@ func (t OCRTexts) texts() (texts []string) {
return texts
}
func (t OCRTexts) FilterScope(scope AbsScope) (results OCRTexts) {
for _, ocrText := range t {
rect := ocrText.Rect
// check if text in scope
if len(scope) == 4 {
if rect.Min.X < scope[0] ||
rect.Min.Y < scope[1] ||
rect.Max.X > scope[2] ||
rect.Max.Y > scope[3] {
// not in scope
continue
}
}
results = append(results, ocrText)
}
return
}
func (t OCRTexts) FindText(text string, options ...ActionOption) (
result OCRText, err error) {
actionOptions := NewActionOptions(options...)
var results []OCRText
for _, ocrText := range t {
rect := ocrText.Rect
// check if text in scope
if len(actionOptions.AbsScope) == 4 {
if rect.Min.X < actionOptions.AbsScope[0] ||
rect.Min.Y < actionOptions.AbsScope[1] ||
rect.Max.X > actionOptions.AbsScope[2] ||
rect.Max.Y > actionOptions.AbsScope[3] {
// not in scope
continue
}
}
for _, ocrText := range t.FilterScope(actionOptions.AbsScope) {
if actionOptions.Regex {
// regex on, check if match regex
if !regexp.MustCompile(text).MatchString(ocrText.Text) {

View File

@@ -112,6 +112,21 @@ func (s *VideoStat) incrFeed(ocrResult *OcrResult, driverExt *DriverExt) error {
}
}
// add popularity data for feed
popularityData := ocrResult.Texts.FilterScope(driverExt.GenAbsScope(0.8, 0.5, 1, 0.8))
if len(popularityData) != 4 {
log.Warn().Interface("popularity", popularityData).Msg("get popularity data failed")
} else {
ocrResult.Popularity = Popularity{
Stars: popularityData[0].Text,
Comments: popularityData[1].Text,
Favorites: popularityData[2].Text,
Shares: popularityData[3].Text,
}
log.Info().Interface("popularity", ocrResult.Popularity).
Msg("found feed popularity success")
}
s.FeedCount++
return nil
}
@@ -308,7 +323,6 @@ func (dExt *DriverExt) VideoCrawler(configs *VideoCrawlerConfigs) (err error) {
continue
}
ocrResult := dExt.cacheStepData.OcrResults[imagePath]
ocrResult.Tags = []string{"feed"}
// automatic handling of pop-up windows
if err := dExt.autoPopupHandler(ocrResult); err != nil {
@@ -328,11 +342,14 @@ func (dExt *DriverExt) VideoCrawler(configs *VideoCrawlerConfigs) (err error) {
continue
}
}
}
ocrResult.Tags = []string{"live-preview"}
} else {
ocrResult.Tags = []string{"feed"}
// check feed type and incr feed count
if err := currVideoStat.incrFeed(ocrResult, dExt); err != nil {
log.Error().Err(err).Msg("incr feed failed")
// check feed type and incr feed count
if err := currVideoStat.incrFeed(ocrResult, dExt); err != nil {
log.Error().Err(err).Msg("incr feed failed")
}
}
// sleep custom random time