feat: check feed type and incr feed count

This commit is contained in:
lilong.129
2023-05-02 23:29:06 +08:00
parent 1c40f60f4b
commit 7b9e637d87
5 changed files with 130 additions and 10 deletions

View File

@@ -28,7 +28,59 @@
10,
0.3
],
"target_count": 5
"target_count": 5,
"target_labels": [
{
"regex": true,
"scope": [
0,
0.5,
1,
1
],
"text": "^广告$"
},
{
"regex": true,
"scope": [
0,
0.5,
1,
1
],
"text": "^图文$"
},
{
"regex": true,
"scope": [
0,
0.5,
1,
1
],
"text": "^特效\\|"
},
{
"regex": true,
"scope": [
0,
0.5,
1,
1
],
"text": "^模板\\|"
},
{
"regex": true,
"scope": [
0,
0.5,
1,
1
],
"text": "^购物\\|"
}
]
},
"live": {
"sleep_random": [

View File

@@ -23,6 +23,13 @@ func TestAndroidVideoCrawlerTest(t *testing.T) {
"app_package_name": "com.ss.android.ugc.aweme",
"feed": map[string]interface{}{
"target_count": 5,
"target_labels": []map[string]interface{}{
{"text": "^广告$", "scope": []float64{0, 0.5, 1, 1}, "regex": true},
{"text": "^图文$", "scope": []float64{0, 0.5, 1, 1}, "regex": true},
{"text": `^特效\|`, "scope": []float64{0, 0.5, 1, 1}, "regex": true},
{"text": `^模板\|`, "scope": []float64{0, 0.5, 1, 1}, "regex": true},
{"text": `^购物\|`, "scope": []float64{0, 0.5, 1, 1}, "regex": true},
},
"sleep_random": []float64{0, 5, 0.7, 5, 10, 0.3},
},
"live": map[string]interface{}{

View File

@@ -214,7 +214,8 @@ func (wc *WorldCupLive) EnterLive(bundleID string) error {
// 青少年弹窗处理
if ocrTexts, err := wc.driver.GetScreenTextsByOCR(); err == nil {
if points, err := ocrTexts.FindTexts([]string{"青少年模式", "我知道了"}); err == nil {
_ = wc.driver.TapAbsXY(points[1].X, points[1].Y)
point := points[1].Center()
_ = wc.driver.TapAbsXY(point.X, point.Y)
}
}

View File

@@ -12,13 +12,16 @@ import (
type VideoStat struct {
configs *VideoCrawlerConfigs
FeedCount int `json:"feed_count"`
LiveCount int `json:"live_count"`
FeedCount int `json:"feed_count"`
FeedStat map[string]int `json:"feed_stat"` // 分类统计 feed 数量:视频/图文/广告/特效/模板/购物
LiveCount int `json:"live_count"`
LiveStat map[string]int `json:"live_stat"` // 分类统计 live 数量:秀场/游戏/电商/多人
}
func (s *VideoStat) isFeedTargetAchieved() bool {
log.Info().
Int("count", s.FeedCount).
Interface("stat", s.FeedStat).
Int("target", s.configs.Feed.TargetCount).
Msg("current feed count")
@@ -28,6 +31,7 @@ func (s *VideoStat) isFeedTargetAchieved() bool {
func (s *VideoStat) isLiveTargetAchieved() bool {
log.Info().
Int("count", s.LiveCount).
Interface("stat", s.FeedStat).
Int("target", s.configs.Live.TargetCount).
Msg("current live count")
@@ -38,14 +42,55 @@ func (s *VideoStat) isTargetAchieved() bool {
return s.isFeedTargetAchieved() && s.isLiveTargetAchieved()
}
// incrFeed increases feed count and feed stat
func (s *VideoStat) incrFeed(texts OCRTexts, driverExt *DriverExt) error {
// feed author
actionOptions := []ActionOption{
WithRegex(true),
driverExt.GenAbsScope(0, 0.5, 1, 1).Option(),
}
if ocrText, err := texts.FindText("^@", actionOptions...); err == nil {
log.Info().Str("author", ocrText.Text).Msg("found feed author")
}
for _, targetLabel := range s.configs.Feed.TargetLabels {
scope := targetLabel.Scope
actionOptions := []ActionOption{
WithRegex(targetLabel.Regex),
driverExt.GenAbsScope(scope[0], scope[1], scope[2], scope[3]).Option(),
}
if ocrText, err := texts.FindText(targetLabel.Text, actionOptions...); err == nil {
log.Info().Str("label", targetLabel.Text).
Str("text", ocrText.Text).Msg("found feed success")
key := targetLabel.Text
if _, ok := s.FeedStat[key]; !ok {
s.FeedStat[key] = 0
}
s.FeedStat[key]++
}
}
s.FeedCount++
return nil
}
type TargetLabel struct {
Text string `json:"text"`
Scope Scope `json:"scope"`
Regex bool `json:"regex"`
}
type FeedConfig struct {
TargetCount int `json:"target_count"`
SleepRandom []interface{} `json:"sleep_random"`
TargetCount int `json:"target_count"`
TargetLabels []TargetLabel `json:"target_labels"`
SleepRandom []interface{} `json:"sleep_random"`
}
type LiveConfig struct {
TargetCount int `json:"target_count"`
SleepRandom []interface{} `json:"sleep_random"`
TargetCount int `json:"target_count"`
TargetLabels []TargetLabel `json:"target_labels"`
SleepRandom []interface{} `json:"sleep_random"`
}
type VideoCrawlerConfigs struct {
@@ -156,6 +201,11 @@ func (l *LiveCrawler) exitLiveRoom() error {
func (dExt *DriverExt) VideoCrawler(configs *VideoCrawlerConfigs) (err error) {
currVideoStat := &VideoStat{
configs: configs,
FeedCount: 0,
FeedStat: make(map[string]int),
LiveCount: 0,
LiveStat: make(map[string]int),
}
defer func() {
dExt.cacheStepData.VideoStat = currVideoStat
@@ -205,9 +255,11 @@ func (dExt *DriverExt) VideoCrawler(configs *VideoCrawlerConfigs) (err error) {
}
}
// TODO: check feed type
// check feed type and incr feed count
if err := currVideoStat.incrFeed(texts, dExt); err != nil {
log.Error().Err(err).Msg("incr feed failed")
}
currVideoStat.FeedCount++
// sleep custom random time
if err := sleepRandom(configs.Feed.SleepRandom); err != nil {
log.Error().Err(err).Msg("sleep random failed")
@@ -225,6 +277,7 @@ func (dExt *DriverExt) VideoCrawler(configs *VideoCrawlerConfigs) (err error) {
log.Error().Err(err).Msg("swipe up failed")
return err
}
time.Sleep(1 * time.Second)
}
return nil

View File

@@ -12,6 +12,13 @@ func TestVideoCrawler(t *testing.T) {
Feed: FeedConfig{
TargetCount: 5,
TargetLabels: []TargetLabel{
{Text: `^广告$`, Scope: Scope{0, 0.5, 1, 1}, Regex: true},
{Text: `^图文$`, Scope: Scope{0, 0.5, 1, 1}, Regex: true},
{Text: `^特效\|`, Scope: Scope{0, 0.5, 1, 1}, Regex: true},
{Text: `^模板\|`, Scope: Scope{0, 0.5, 1, 1}, Regex: true},
{Text: `^购物\|`, Scope: Scope{0, 0.5, 1, 1}, Regex: true},
},
SleepRandom: []interface{}{0, 5, 0.7, 5, 10, 0.3},
},
Live: LiveConfig{