feat: parse url with js plugins support (#96)
* feat: WIP. add parser functionality and text message handling * fix: use json to marshal js result * feat: add metadata handling and version validation for jsParser * refactor: rename parser package to parsers and restructure parser handling * refactor: core code struct and impl parse task handle * feat: impl parsed download * fix: seek cache file when processing tph picture * feat: implement parsed task handling and progress tracking * feat: enhance task processing with concurrency control and progress tracking * feat: add resource ID generation and improve resource processing handling * feat: improve message formatting in parsed text and progress completion * feat: add example js plugin * feat: implement Twitter parser * fix: twitter parse video json decode error * feat: impl stream mode for parse task
This commit is contained in:
129
core/tasks/batchtfile/execute.go
Normal file
129
core/tasks/batchtfile/execute.go
Normal file
@@ -0,0 +1,129 @@
|
||||
package batchtfile
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path"
|
||||
|
||||
"github.com/charmbracelet/log"
|
||||
"github.com/duke-git/lancet/v2/retry"
|
||||
"github.com/krau/SaveAny-Bot/common/utils/fsutil"
|
||||
"github.com/krau/SaveAny-Bot/common/utils/ioutil"
|
||||
"github.com/krau/SaveAny-Bot/config"
|
||||
"github.com/krau/SaveAny-Bot/pkg/enums/ctxkey"
|
||||
"github.com/krau/SaveAny-Bot/pkg/tfile"
|
||||
"golang.org/x/sync/errgroup"
|
||||
)
|
||||
|
||||
func (t *Task) Execute(ctx context.Context) error {
|
||||
logger := log.FromContext(ctx).WithPrefix(fmt.Sprintf("batch_file[%s]", t.ID))
|
||||
logger.Info("Starting batch file task")
|
||||
t.Progress.OnStart(ctx, t)
|
||||
workers := config.Cfg.Workers
|
||||
eg, gctx := errgroup.WithContext(ctx)
|
||||
eg.SetLimit(workers)
|
||||
for _, elem := range t.Elems {
|
||||
eg.Go(func() error {
|
||||
t.processingMu.RLock()
|
||||
if t.processing[elem.ID] != nil {
|
||||
return fmt.Errorf("element with ID %s is already being processed", elem.ID)
|
||||
}
|
||||
t.processingMu.RUnlock()
|
||||
t.processingMu.Lock()
|
||||
t.processing[elem.ID] = &elem
|
||||
t.processingMu.Unlock()
|
||||
defer func() {
|
||||
t.processingMu.Lock()
|
||||
delete(t.processing, elem.ID)
|
||||
t.processingMu.Unlock()
|
||||
}()
|
||||
return t.processElement(gctx, elem)
|
||||
})
|
||||
}
|
||||
err := eg.Wait()
|
||||
if err != nil {
|
||||
logger.Errorf("Error during batch file processing: %v", err)
|
||||
} else {
|
||||
logger.Info("Batch file task completed successfully")
|
||||
}
|
||||
t.Progress.OnDone(ctx, t, err)
|
||||
return err
|
||||
}
|
||||
|
||||
func (t *Task) processElement(ctx context.Context, elem TaskElement) error {
|
||||
logger := log.FromContext(ctx).WithPrefix(fmt.Sprintf("file[%s]", elem.File.Name()))
|
||||
if elem.stream {
|
||||
pr, pw := io.Pipe()
|
||||
defer pr.Close()
|
||||
errg, uploadCtx := errgroup.WithContext(ctx)
|
||||
errg.Go(func() error {
|
||||
return elem.Storage.Save(uploadCtx, pr, elem.Path)
|
||||
})
|
||||
wr := ioutil.NewProgressWriter(pw, func(n int) {
|
||||
t.downloaded.Add(int64(n))
|
||||
t.Progress.OnProgress(ctx, t)
|
||||
})
|
||||
errg.Go(func() error {
|
||||
defer pw.Close()
|
||||
logger.Info("Starting file download in stream mode")
|
||||
_, err := tfile.NewDownloader(elem.File).Stream(uploadCtx, wr)
|
||||
if err != nil {
|
||||
logger.Errorf("Failed to download file: %v", err)
|
||||
pw.CloseWithError(err)
|
||||
}
|
||||
return err
|
||||
})
|
||||
if err := errg.Wait(); err != nil {
|
||||
return fmt.Errorf("failed to download file in stream mode: %w", err)
|
||||
}
|
||||
logger.Info("File downloaded successfully in stream mode")
|
||||
return nil
|
||||
}
|
||||
logger.Info("Starting file download")
|
||||
localFile, err := fsutil.CreateFile(elem.localPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create local file: %w", err)
|
||||
}
|
||||
defer func() {
|
||||
if err := localFile.CloseAndRemove(); err != nil {
|
||||
logger.Errorf("Failed to close local file: %v", err)
|
||||
}
|
||||
}()
|
||||
wrAt := ioutil.NewProgressWriterAt(localFile, func(n int) {
|
||||
t.downloaded.Add(int64(n))
|
||||
t.Progress.OnProgress(ctx, t)
|
||||
})
|
||||
_, err = tfile.NewDownloader(elem.File).Parallel(ctx, wrAt)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to download file: %w", err)
|
||||
}
|
||||
logger.Info("File downloaded successfully")
|
||||
if path.Ext(elem.FileName()) == "" {
|
||||
ext := fsutil.DetectFileExt(elem.localPath)
|
||||
if ext != "" {
|
||||
elem.Path = elem.Path + ext
|
||||
}
|
||||
}
|
||||
var fileStat os.FileInfo
|
||||
fileStat, err = os.Stat(elem.localPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get file stat: %w", err)
|
||||
}
|
||||
vctx := context.WithValue(ctx, ctxkey.ContentLength, fileStat.Size())
|
||||
err = retry.Retry(func() error {
|
||||
var file *os.File
|
||||
file, err = os.Open(elem.localPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open cache file: %w", err)
|
||||
}
|
||||
defer file.Close()
|
||||
if err = elem.Storage.Save(vctx, file, elem.Path); err != nil {
|
||||
logger.Errorf("Failed to save file: %s, retrying...", err)
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}, retry.Context(vctx), retry.RetryTimes(uint(config.Cfg.Retry)))
|
||||
return err
|
||||
}
|
||||
176
core/tasks/batchtfile/progress.go
Normal file
176
core/tasks/batchtfile/progress.go
Normal file
@@ -0,0 +1,176 @@
|
||||
package batchtfile
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/charmbracelet/log"
|
||||
"github.com/duke-git/lancet/v2/slice"
|
||||
"github.com/gotd/td/telegram/message/entity"
|
||||
"github.com/gotd/td/telegram/message/styling"
|
||||
"github.com/gotd/td/tg"
|
||||
"github.com/krau/SaveAny-Bot/common/utils/dlutil"
|
||||
"github.com/krau/SaveAny-Bot/common/utils/tgutil"
|
||||
)
|
||||
|
||||
type ProgressTracker interface {
|
||||
OnStart(ctx context.Context, info TaskInfo)
|
||||
OnProgress(ctx context.Context, info TaskInfo)
|
||||
OnDone(ctx context.Context, info TaskInfo, err error)
|
||||
}
|
||||
|
||||
type Progress struct {
|
||||
MessageID int
|
||||
ChatID int64
|
||||
start time.Time
|
||||
lastUpdatePercent atomic.Int32
|
||||
}
|
||||
|
||||
func (p *Progress) OnStart(ctx context.Context, info TaskInfo) {
|
||||
p.start = time.Now()
|
||||
p.lastUpdatePercent.Store(0)
|
||||
log.FromContext(ctx).Debugf("Batch task progress tracking started for message %d in chat %d", p.MessageID, p.ChatID)
|
||||
entityBuilder := entity.Builder{}
|
||||
var entities []tg.MessageEntityClass
|
||||
if err := styling.Perform(&entityBuilder,
|
||||
styling.Plain("开始执行批量下载任务\n总大小: "),
|
||||
styling.Code(fmt.Sprintf("%.2f MB (%d个文件)", float64(info.TotalSize())/(1024*1024), info.Count())),
|
||||
); err != nil {
|
||||
log.FromContext(ctx).Errorf("Failed to build entities: %s", err)
|
||||
return
|
||||
}
|
||||
text, entities := entityBuilder.Complete()
|
||||
req := &tg.MessagesEditMessageRequest{
|
||||
ID: p.MessageID,
|
||||
}
|
||||
req.SetMessage(text)
|
||||
req.SetEntities(entities)
|
||||
req.SetReplyMarkup(&tg.ReplyInlineMarkup{
|
||||
Rows: []tg.KeyboardButtonRow{
|
||||
{
|
||||
Buttons: []tg.KeyboardButtonClass{
|
||||
tgutil.BuildCancelButton(info.TaskID()),
|
||||
},
|
||||
},
|
||||
}},
|
||||
)
|
||||
ext := tgutil.ExtFromContext(ctx)
|
||||
if ext != nil {
|
||||
ext.EditMessage(p.ChatID, req)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
func (p *Progress) OnProgress(ctx context.Context, info TaskInfo) {
|
||||
if !shouldUpdateProgress(info.TotalSize(), info.Downloaded(), int(p.lastUpdatePercent.Load())) {
|
||||
return
|
||||
}
|
||||
percent := int((info.Downloaded() * 100) / info.TotalSize())
|
||||
if p.lastUpdatePercent.Load() == int32(percent) {
|
||||
return
|
||||
}
|
||||
p.lastUpdatePercent.Store(int32(percent))
|
||||
log.FromContext(ctx).Debugf("Progress update: %s, %d/%d", info.TaskID(), info.Downloaded(), info.TotalSize())
|
||||
entityBuilder := entity.Builder{}
|
||||
var entities []tg.MessageEntityClass
|
||||
if err := styling.Perform(&entityBuilder,
|
||||
styling.Plain("正在处理批量下载任务\n总大小: "),
|
||||
styling.Code(fmt.Sprintf("%.2f MB (%d个文件)", float64(info.TotalSize())/(1024*1024), info.Count())),
|
||||
styling.Plain("\n正在处理:\n"),
|
||||
func() styling.StyledTextOption {
|
||||
var lines []string
|
||||
for _, elem := range info.Processing() {
|
||||
lines = append(lines, fmt.Sprintf(" - %s (%.2f MB)", elem.FileName(), float64(elem.FileSize())/(1024*1024)))
|
||||
}
|
||||
if len(lines) == 0 {
|
||||
lines = append(lines, " - 无")
|
||||
}
|
||||
return styling.Plain(slice.Join(lines, "\n"))
|
||||
}(),
|
||||
styling.Plain("\n平均速度: "),
|
||||
styling.Bold(fmt.Sprintf("%.2f MB/s", dlutil.GetSpeed(info.Downloaded(), p.start)/(1024*1024))),
|
||||
styling.Plain("\n当前进度: "),
|
||||
styling.Bold(fmt.Sprintf("%.2f%%", float64(info.Downloaded())/float64(info.TotalSize())*100)),
|
||||
); err != nil {
|
||||
log.FromContext(ctx).Errorf("Failed to build entities: %s", err)
|
||||
return
|
||||
}
|
||||
text, entities := entityBuilder.Complete()
|
||||
req := &tg.MessagesEditMessageRequest{
|
||||
ID: p.MessageID,
|
||||
}
|
||||
req.SetMessage(text)
|
||||
req.SetEntities(entities)
|
||||
req.SetReplyMarkup(&tg.ReplyInlineMarkup{
|
||||
Rows: []tg.KeyboardButtonRow{
|
||||
{
|
||||
Buttons: []tg.KeyboardButtonClass{
|
||||
tgutil.BuildCancelButton(info.TaskID()),
|
||||
},
|
||||
},
|
||||
}},
|
||||
)
|
||||
ext := tgutil.ExtFromContext(ctx)
|
||||
if ext != nil {
|
||||
ext.EditMessage(p.ChatID, req)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
func (p *Progress) OnDone(ctx context.Context, info TaskInfo, err error) {
|
||||
if err != nil {
|
||||
log.FromContext(ctx).Errorf("Batch task %s failed: %s", info.TaskID(), err)
|
||||
} else {
|
||||
log.FromContext(ctx).Debugf("Batch task %s completed successfully", info.TaskID())
|
||||
}
|
||||
entityBuilder := entity.Builder{}
|
||||
var stylingErr error
|
||||
|
||||
if err != nil {
|
||||
if errors.Is(err, context.Canceled) {
|
||||
stylingErr = styling.Perform(&entityBuilder,
|
||||
styling.Plain("任务已取消"),
|
||||
)
|
||||
} else {
|
||||
stylingErr = styling.Perform(&entityBuilder,
|
||||
styling.Plain("处理失败, 错误:\n "),
|
||||
styling.Code(err.Error()),
|
||||
)
|
||||
}
|
||||
} else {
|
||||
stylingErr = styling.Perform(&entityBuilder,
|
||||
styling.Plain("处理完成\n文件数: "),
|
||||
styling.Code(strconv.Itoa(info.Count())),
|
||||
styling.Plain("\n总大小: "),
|
||||
styling.Code(fmt.Sprintf("%.2f MB", float64(info.TotalSize())/(1024*1024))),
|
||||
)
|
||||
}
|
||||
|
||||
if stylingErr != nil {
|
||||
log.FromContext(ctx).Errorf("Failed to build entities: %s", stylingErr)
|
||||
return
|
||||
}
|
||||
|
||||
text, entities := entityBuilder.Complete()
|
||||
req := &tg.MessagesEditMessageRequest{
|
||||
ID: p.MessageID,
|
||||
}
|
||||
req.SetMessage(text)
|
||||
req.SetEntities(entities)
|
||||
|
||||
ext := tgutil.ExtFromContext(ctx)
|
||||
if ext != nil {
|
||||
ext.EditMessage(p.ChatID, req)
|
||||
}
|
||||
}
|
||||
|
||||
func NewProgressTracker(messageID int, chatID int64) ProgressTracker {
|
||||
return &Progress{
|
||||
MessageID: messageID,
|
||||
ChatID: chatID,
|
||||
}
|
||||
}
|
||||
98
core/tasks/batchtfile/task.go
Normal file
98
core/tasks/batchtfile/task.go
Normal file
@@ -0,0 +1,98 @@
|
||||
package batchtfile
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/krau/SaveAny-Bot/config"
|
||||
"github.com/krau/SaveAny-Bot/pkg/enums/tasktype"
|
||||
"github.com/krau/SaveAny-Bot/pkg/tfile"
|
||||
"github.com/krau/SaveAny-Bot/storage"
|
||||
"github.com/rs/xid"
|
||||
)
|
||||
|
||||
type TaskElement struct {
|
||||
ID string
|
||||
Storage storage.Storage
|
||||
Path string
|
||||
File tfile.TGFile
|
||||
localPath string
|
||||
stream bool
|
||||
}
|
||||
|
||||
type Task struct {
|
||||
ID string
|
||||
Ctx context.Context
|
||||
Elems []TaskElement
|
||||
Progress ProgressTracker
|
||||
IgnoreErrors bool // if true, errors during processing will be ignored
|
||||
downloaded atomic.Int64
|
||||
totalSize int64
|
||||
processing map[string]TaskElementInfo
|
||||
processingMu sync.RWMutex
|
||||
failed map[string]error // [TODO] errors for each element
|
||||
}
|
||||
|
||||
func (t *Task) Type() tasktype.TaskType {
|
||||
return tasktype.TaskTypeTgfiles
|
||||
}
|
||||
|
||||
func NewTaskElement(
|
||||
stor storage.Storage,
|
||||
path string,
|
||||
file tfile.TGFile,
|
||||
) (*TaskElement, error) {
|
||||
id := xid.New().String()
|
||||
_, ok := stor.(storage.StorageCannotStream)
|
||||
if !config.Cfg.Stream || ok {
|
||||
cachePath, err := filepath.Abs(filepath.Join(config.Cfg.Temp.BasePath, fmt.Sprintf("%s_%s", id, file.Name())))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get absolute path for cache: %w", err)
|
||||
}
|
||||
return &TaskElement{
|
||||
ID: id,
|
||||
Storage: stor,
|
||||
Path: path,
|
||||
File: file,
|
||||
localPath: cachePath,
|
||||
}, nil
|
||||
}
|
||||
return &TaskElement{
|
||||
ID: id,
|
||||
Storage: stor,
|
||||
Path: path,
|
||||
File: file,
|
||||
stream: true,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func NewBatchTGFileTask(
|
||||
id string,
|
||||
ctx context.Context,
|
||||
files []TaskElement,
|
||||
progress ProgressTracker,
|
||||
ignoreErrors bool,
|
||||
) *Task {
|
||||
task := &Task{
|
||||
ID: id,
|
||||
Ctx: ctx,
|
||||
Elems: files,
|
||||
Progress: progress,
|
||||
downloaded: atomic.Int64{},
|
||||
totalSize: func() int64 {
|
||||
var total int64
|
||||
for _, elem := range files {
|
||||
total += elem.File.Size()
|
||||
}
|
||||
return total
|
||||
}(),
|
||||
processing: make(map[string]TaskElementInfo),
|
||||
IgnoreErrors: ignoreErrors,
|
||||
processingMu: sync.RWMutex{},
|
||||
failed: make(map[string]error),
|
||||
}
|
||||
return task
|
||||
}
|
||||
56
core/tasks/batchtfile/taskinfo.go
Normal file
56
core/tasks/batchtfile/taskinfo.go
Normal file
@@ -0,0 +1,56 @@
|
||||
package batchtfile
|
||||
|
||||
type TaskElementInfo interface {
|
||||
FileName() string
|
||||
FileSize() int64
|
||||
StoragePath() string
|
||||
StorageName() string
|
||||
}
|
||||
|
||||
func (e *TaskElement) FileName() string {
|
||||
return e.File.Name()
|
||||
}
|
||||
|
||||
func (e *TaskElement) FileSize() int64 {
|
||||
return e.File.Size()
|
||||
}
|
||||
|
||||
func (e *TaskElement) StoragePath() string {
|
||||
return e.Path
|
||||
}
|
||||
|
||||
func (e *TaskElement) StorageName() string {
|
||||
return e.Storage.Name()
|
||||
}
|
||||
|
||||
type TaskInfo interface {
|
||||
TaskID() string
|
||||
TotalSize() int64
|
||||
Downloaded() int64
|
||||
Count() int
|
||||
Processing() []TaskElementInfo
|
||||
}
|
||||
|
||||
func (t *Task) TaskID() string {
|
||||
return t.ID
|
||||
}
|
||||
|
||||
func (t *Task) TotalSize() int64 {
|
||||
return t.totalSize
|
||||
}
|
||||
|
||||
func (t *Task) Downloaded() int64 {
|
||||
return t.downloaded.Load()
|
||||
}
|
||||
|
||||
func (t *Task) Count() int {
|
||||
return len(t.Elems)
|
||||
}
|
||||
|
||||
func (t *Task) Processing() []TaskElementInfo {
|
||||
processing := make([]TaskElementInfo, 0, len(t.Elems))
|
||||
for _, elem := range t.processing {
|
||||
processing = append(processing, elem)
|
||||
}
|
||||
return processing
|
||||
}
|
||||
32
core/tasks/batchtfile/utils.go
Normal file
32
core/tasks/batchtfile/utils.go
Normal file
@@ -0,0 +1,32 @@
|
||||
package batchtfile
|
||||
|
||||
var progressUpdatesLevels = []struct {
|
||||
size int64 // 文件大小阈值
|
||||
stepPercent int // 每多少 % 更新一次
|
||||
}{
|
||||
{10 << 20, 100},
|
||||
{50 << 20, 20},
|
||||
{200 << 20, 10},
|
||||
{500 << 20, 5},
|
||||
}
|
||||
|
||||
func shouldUpdateProgress(total, downloaded int64, lastUpdatePercent int) bool {
|
||||
if total <= 0 || downloaded <= 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
percent := int((downloaded * 100) / total)
|
||||
if percent <= lastUpdatePercent {
|
||||
return false
|
||||
}
|
||||
|
||||
step := progressUpdatesLevels[len(progressUpdatesLevels)-1].stepPercent
|
||||
for _, lvl := range progressUpdatesLevels {
|
||||
if total < lvl.size {
|
||||
step = lvl.stepPercent
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return percent >= lastUpdatePercent+step
|
||||
}
|
||||
Reference in New Issue
Block a user