change: removeNonAlphanumeric for feed author name

This commit is contained in:
lilong.129
2023-08-19 11:48:29 +08:00
parent 521462f362
commit 5280b6bb5b
2 changed files with 33 additions and 2 deletions

View File

@@ -1,6 +1,8 @@
package uixt
import (
"fmt"
"regexp"
"strings"
"time"
@@ -96,7 +98,7 @@ func (s *VideoStat) incrFeed(screenResult *ScreenResult, driverExt *DriverExt) e
if err != nil {
return errors.Wrap(err, "find feed author failed")
}
author := ocrText.Text
author := fmt.Sprintf("@%s", removeNonAlphanumeric(ocrText.Text))
log.Info().Str("author", author).Msg("found feed author by OCR")
screenResult.Feed.UserName = author
@@ -473,6 +475,14 @@ func getFeedVideo(plugin funplugin.IPlugin, authorName string) (feedVideo *FeedV
return feedVideo, nil
}
func removeNonAlphanumeric(input string) string {
// 使用正则表达式匹配中英文字符以外的内容
re := regexp.MustCompile(`[^\p{L}\p{N}]+`)
// 删除匹配到的非中英文字符
processed := re.ReplaceAllString(input, "")
return processed
}
type FeedVideo struct {
// 视频基础数据
UserName string `json:"user_name"` // 视频作者

View File

@@ -2,7 +2,11 @@
package uixt
import "testing"
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestVideoCrawler(t *testing.T) {
setupAndroid(t)
@@ -30,3 +34,20 @@ func TestVideoCrawler(t *testing.T) {
err := driverExt.VideoCrawler(configs)
checkErr(t, err)
}
func TestRemoveNonAlphanumeric(t *testing.T) {
testData := []struct {
input string
expect string
}{
{"@Hello 你好123#@", "Hello你好123"},
{"@夏夏在发呆。", "夏夏在发呆"},
{"@·霖霖", "霖霖"},
{"@我❤️小云朵", "我小云朵"},
}
for _, data := range testData {
out := removeNonAlphanumeric(data.input)
assert.Equal(t, data.expect, out)
}
}