feat: parse url with js plugins support (#96)

* feat: WIP. add parser functionality and text message handling

* fix: use json to marshal js result

* feat: add metadata handling and version validation for jsParser

* refactor: rename parser package to parsers and restructure parser handling

* refactor: core code struct and impl parse task handle

* feat: impl parsed download

* fix: seek cache file when processing tph picture

* feat: implement parsed task handling and progress tracking

* feat: enhance task processing with concurrency control and progress tracking

* feat: add resource ID generation and improve resource processing handling

* feat: improve message formatting in parsed text and progress completion

* feat: add example js plugin

* feat: implement Twitter parser

* fix: twitter parse video json decode error

* feat: impl stream mode for parse task
This commit is contained in:
Krau
2025-08-21 23:48:17 +08:00
committed by GitHub
parent 79386bdd7d
commit 302db2fe75
47 changed files with 1348 additions and 47 deletions

View File

@@ -0,0 +1,129 @@
package batchtfile
import (
"context"
"fmt"
"io"
"os"
"path"
"github.com/charmbracelet/log"
"github.com/duke-git/lancet/v2/retry"
"github.com/krau/SaveAny-Bot/common/utils/fsutil"
"github.com/krau/SaveAny-Bot/common/utils/ioutil"
"github.com/krau/SaveAny-Bot/config"
"github.com/krau/SaveAny-Bot/pkg/enums/ctxkey"
"github.com/krau/SaveAny-Bot/pkg/tfile"
"golang.org/x/sync/errgroup"
)
func (t *Task) Execute(ctx context.Context) error {
logger := log.FromContext(ctx).WithPrefix(fmt.Sprintf("batch_file[%s]", t.ID))
logger.Info("Starting batch file task")
t.Progress.OnStart(ctx, t)
workers := config.Cfg.Workers
eg, gctx := errgroup.WithContext(ctx)
eg.SetLimit(workers)
for _, elem := range t.Elems {
eg.Go(func() error {
t.processingMu.RLock()
if t.processing[elem.ID] != nil {
return fmt.Errorf("element with ID %s is already being processed", elem.ID)
}
t.processingMu.RUnlock()
t.processingMu.Lock()
t.processing[elem.ID] = &elem
t.processingMu.Unlock()
defer func() {
t.processingMu.Lock()
delete(t.processing, elem.ID)
t.processingMu.Unlock()
}()
return t.processElement(gctx, elem)
})
}
err := eg.Wait()
if err != nil {
logger.Errorf("Error during batch file processing: %v", err)
} else {
logger.Info("Batch file task completed successfully")
}
t.Progress.OnDone(ctx, t, err)
return err
}
func (t *Task) processElement(ctx context.Context, elem TaskElement) error {
logger := log.FromContext(ctx).WithPrefix(fmt.Sprintf("file[%s]", elem.File.Name()))
if elem.stream {
pr, pw := io.Pipe()
defer pr.Close()
errg, uploadCtx := errgroup.WithContext(ctx)
errg.Go(func() error {
return elem.Storage.Save(uploadCtx, pr, elem.Path)
})
wr := ioutil.NewProgressWriter(pw, func(n int) {
t.downloaded.Add(int64(n))
t.Progress.OnProgress(ctx, t)
})
errg.Go(func() error {
defer pw.Close()
logger.Info("Starting file download in stream mode")
_, err := tfile.NewDownloader(elem.File).Stream(uploadCtx, wr)
if err != nil {
logger.Errorf("Failed to download file: %v", err)
pw.CloseWithError(err)
}
return err
})
if err := errg.Wait(); err != nil {
return fmt.Errorf("failed to download file in stream mode: %w", err)
}
logger.Info("File downloaded successfully in stream mode")
return nil
}
logger.Info("Starting file download")
localFile, err := fsutil.CreateFile(elem.localPath)
if err != nil {
return fmt.Errorf("failed to create local file: %w", err)
}
defer func() {
if err := localFile.CloseAndRemove(); err != nil {
logger.Errorf("Failed to close local file: %v", err)
}
}()
wrAt := ioutil.NewProgressWriterAt(localFile, func(n int) {
t.downloaded.Add(int64(n))
t.Progress.OnProgress(ctx, t)
})
_, err = tfile.NewDownloader(elem.File).Parallel(ctx, wrAt)
if err != nil {
return fmt.Errorf("failed to download file: %w", err)
}
logger.Info("File downloaded successfully")
if path.Ext(elem.FileName()) == "" {
ext := fsutil.DetectFileExt(elem.localPath)
if ext != "" {
elem.Path = elem.Path + ext
}
}
var fileStat os.FileInfo
fileStat, err = os.Stat(elem.localPath)
if err != nil {
return fmt.Errorf("failed to get file stat: %w", err)
}
vctx := context.WithValue(ctx, ctxkey.ContentLength, fileStat.Size())
err = retry.Retry(func() error {
var file *os.File
file, err = os.Open(elem.localPath)
if err != nil {
return fmt.Errorf("failed to open cache file: %w", err)
}
defer file.Close()
if err = elem.Storage.Save(vctx, file, elem.Path); err != nil {
logger.Errorf("Failed to save file: %s, retrying...", err)
return err
}
return nil
}, retry.Context(vctx), retry.RetryTimes(uint(config.Cfg.Retry)))
return err
}

View File

@@ -0,0 +1,176 @@
package batchtfile
import (
"context"
"errors"
"fmt"
"strconv"
"sync/atomic"
"time"
"github.com/charmbracelet/log"
"github.com/duke-git/lancet/v2/slice"
"github.com/gotd/td/telegram/message/entity"
"github.com/gotd/td/telegram/message/styling"
"github.com/gotd/td/tg"
"github.com/krau/SaveAny-Bot/common/utils/dlutil"
"github.com/krau/SaveAny-Bot/common/utils/tgutil"
)
type ProgressTracker interface {
OnStart(ctx context.Context, info TaskInfo)
OnProgress(ctx context.Context, info TaskInfo)
OnDone(ctx context.Context, info TaskInfo, err error)
}
type Progress struct {
MessageID int
ChatID int64
start time.Time
lastUpdatePercent atomic.Int32
}
func (p *Progress) OnStart(ctx context.Context, info TaskInfo) {
p.start = time.Now()
p.lastUpdatePercent.Store(0)
log.FromContext(ctx).Debugf("Batch task progress tracking started for message %d in chat %d", p.MessageID, p.ChatID)
entityBuilder := entity.Builder{}
var entities []tg.MessageEntityClass
if err := styling.Perform(&entityBuilder,
styling.Plain("开始执行批量下载任务\n总大小: "),
styling.Code(fmt.Sprintf("%.2f MB (%d个文件)", float64(info.TotalSize())/(1024*1024), info.Count())),
); err != nil {
log.FromContext(ctx).Errorf("Failed to build entities: %s", err)
return
}
text, entities := entityBuilder.Complete()
req := &tg.MessagesEditMessageRequest{
ID: p.MessageID,
}
req.SetMessage(text)
req.SetEntities(entities)
req.SetReplyMarkup(&tg.ReplyInlineMarkup{
Rows: []tg.KeyboardButtonRow{
{
Buttons: []tg.KeyboardButtonClass{
tgutil.BuildCancelButton(info.TaskID()),
},
},
}},
)
ext := tgutil.ExtFromContext(ctx)
if ext != nil {
ext.EditMessage(p.ChatID, req)
return
}
}
func (p *Progress) OnProgress(ctx context.Context, info TaskInfo) {
if !shouldUpdateProgress(info.TotalSize(), info.Downloaded(), int(p.lastUpdatePercent.Load())) {
return
}
percent := int((info.Downloaded() * 100) / info.TotalSize())
if p.lastUpdatePercent.Load() == int32(percent) {
return
}
p.lastUpdatePercent.Store(int32(percent))
log.FromContext(ctx).Debugf("Progress update: %s, %d/%d", info.TaskID(), info.Downloaded(), info.TotalSize())
entityBuilder := entity.Builder{}
var entities []tg.MessageEntityClass
if err := styling.Perform(&entityBuilder,
styling.Plain("正在处理批量下载任务\n总大小: "),
styling.Code(fmt.Sprintf("%.2f MB (%d个文件)", float64(info.TotalSize())/(1024*1024), info.Count())),
styling.Plain("\n正在处理:\n"),
func() styling.StyledTextOption {
var lines []string
for _, elem := range info.Processing() {
lines = append(lines, fmt.Sprintf(" - %s (%.2f MB)", elem.FileName(), float64(elem.FileSize())/(1024*1024)))
}
if len(lines) == 0 {
lines = append(lines, " - 无")
}
return styling.Plain(slice.Join(lines, "\n"))
}(),
styling.Plain("\n平均速度: "),
styling.Bold(fmt.Sprintf("%.2f MB/s", dlutil.GetSpeed(info.Downloaded(), p.start)/(1024*1024))),
styling.Plain("\n当前进度: "),
styling.Bold(fmt.Sprintf("%.2f%%", float64(info.Downloaded())/float64(info.TotalSize())*100)),
); err != nil {
log.FromContext(ctx).Errorf("Failed to build entities: %s", err)
return
}
text, entities := entityBuilder.Complete()
req := &tg.MessagesEditMessageRequest{
ID: p.MessageID,
}
req.SetMessage(text)
req.SetEntities(entities)
req.SetReplyMarkup(&tg.ReplyInlineMarkup{
Rows: []tg.KeyboardButtonRow{
{
Buttons: []tg.KeyboardButtonClass{
tgutil.BuildCancelButton(info.TaskID()),
},
},
}},
)
ext := tgutil.ExtFromContext(ctx)
if ext != nil {
ext.EditMessage(p.ChatID, req)
return
}
}
func (p *Progress) OnDone(ctx context.Context, info TaskInfo, err error) {
if err != nil {
log.FromContext(ctx).Errorf("Batch task %s failed: %s", info.TaskID(), err)
} else {
log.FromContext(ctx).Debugf("Batch task %s completed successfully", info.TaskID())
}
entityBuilder := entity.Builder{}
var stylingErr error
if err != nil {
if errors.Is(err, context.Canceled) {
stylingErr = styling.Perform(&entityBuilder,
styling.Plain("任务已取消"),
)
} else {
stylingErr = styling.Perform(&entityBuilder,
styling.Plain("处理失败, 错误:\n "),
styling.Code(err.Error()),
)
}
} else {
stylingErr = styling.Perform(&entityBuilder,
styling.Plain("处理完成\n文件数: "),
styling.Code(strconv.Itoa(info.Count())),
styling.Plain("\n总大小: "),
styling.Code(fmt.Sprintf("%.2f MB", float64(info.TotalSize())/(1024*1024))),
)
}
if stylingErr != nil {
log.FromContext(ctx).Errorf("Failed to build entities: %s", stylingErr)
return
}
text, entities := entityBuilder.Complete()
req := &tg.MessagesEditMessageRequest{
ID: p.MessageID,
}
req.SetMessage(text)
req.SetEntities(entities)
ext := tgutil.ExtFromContext(ctx)
if ext != nil {
ext.EditMessage(p.ChatID, req)
}
}
func NewProgressTracker(messageID int, chatID int64) ProgressTracker {
return &Progress{
MessageID: messageID,
ChatID: chatID,
}
}

View File

@@ -0,0 +1,98 @@
package batchtfile
import (
"context"
"fmt"
"path/filepath"
"sync"
"sync/atomic"
"github.com/krau/SaveAny-Bot/config"
"github.com/krau/SaveAny-Bot/pkg/enums/tasktype"
"github.com/krau/SaveAny-Bot/pkg/tfile"
"github.com/krau/SaveAny-Bot/storage"
"github.com/rs/xid"
)
type TaskElement struct {
ID string
Storage storage.Storage
Path string
File tfile.TGFile
localPath string
stream bool
}
type Task struct {
ID string
Ctx context.Context
Elems []TaskElement
Progress ProgressTracker
IgnoreErrors bool // if true, errors during processing will be ignored
downloaded atomic.Int64
totalSize int64
processing map[string]TaskElementInfo
processingMu sync.RWMutex
failed map[string]error // [TODO] errors for each element
}
func (t *Task) Type() tasktype.TaskType {
return tasktype.TaskTypeTgfiles
}
func NewTaskElement(
stor storage.Storage,
path string,
file tfile.TGFile,
) (*TaskElement, error) {
id := xid.New().String()
_, ok := stor.(storage.StorageCannotStream)
if !config.Cfg.Stream || ok {
cachePath, err := filepath.Abs(filepath.Join(config.Cfg.Temp.BasePath, fmt.Sprintf("%s_%s", id, file.Name())))
if err != nil {
return nil, fmt.Errorf("failed to get absolute path for cache: %w", err)
}
return &TaskElement{
ID: id,
Storage: stor,
Path: path,
File: file,
localPath: cachePath,
}, nil
}
return &TaskElement{
ID: id,
Storage: stor,
Path: path,
File: file,
stream: true,
}, nil
}
func NewBatchTGFileTask(
id string,
ctx context.Context,
files []TaskElement,
progress ProgressTracker,
ignoreErrors bool,
) *Task {
task := &Task{
ID: id,
Ctx: ctx,
Elems: files,
Progress: progress,
downloaded: atomic.Int64{},
totalSize: func() int64 {
var total int64
for _, elem := range files {
total += elem.File.Size()
}
return total
}(),
processing: make(map[string]TaskElementInfo),
IgnoreErrors: ignoreErrors,
processingMu: sync.RWMutex{},
failed: make(map[string]error),
}
return task
}

View File

@@ -0,0 +1,56 @@
package batchtfile
type TaskElementInfo interface {
FileName() string
FileSize() int64
StoragePath() string
StorageName() string
}
func (e *TaskElement) FileName() string {
return e.File.Name()
}
func (e *TaskElement) FileSize() int64 {
return e.File.Size()
}
func (e *TaskElement) StoragePath() string {
return e.Path
}
func (e *TaskElement) StorageName() string {
return e.Storage.Name()
}
type TaskInfo interface {
TaskID() string
TotalSize() int64
Downloaded() int64
Count() int
Processing() []TaskElementInfo
}
func (t *Task) TaskID() string {
return t.ID
}
func (t *Task) TotalSize() int64 {
return t.totalSize
}
func (t *Task) Downloaded() int64 {
return t.downloaded.Load()
}
func (t *Task) Count() int {
return len(t.Elems)
}
func (t *Task) Processing() []TaskElementInfo {
processing := make([]TaskElementInfo, 0, len(t.Elems))
for _, elem := range t.processing {
processing = append(processing, elem)
}
return processing
}

View File

@@ -0,0 +1,32 @@
package batchtfile
var progressUpdatesLevels = []struct {
size int64 // 文件大小阈值
stepPercent int // 每多少 % 更新一次
}{
{10 << 20, 100},
{50 << 20, 20},
{200 << 20, 10},
{500 << 20, 5},
}
func shouldUpdateProgress(total, downloaded int64, lastUpdatePercent int) bool {
if total <= 0 || downloaded <= 0 {
return false
}
percent := int((downloaded * 100) / total)
if percent <= lastUpdatePercent {
return false
}
step := progressUpdatesLevels[len(progressUpdatesLevels)-1].stepPercent
for _, lvl := range progressUpdatesLevels {
if total < lvl.size {
step = lvl.stepPercent
break
}
}
return percent >= lastUpdatePercent+step
}

View File

@@ -0,0 +1,139 @@
package parsed
import (
"context"
"errors"
"fmt"
"io"
"net/http"
"path"
"path/filepath"
"github.com/charmbracelet/log"
"github.com/duke-git/lancet/v2/retry"
"github.com/krau/SaveAny-Bot/common/utils/fsutil"
"github.com/krau/SaveAny-Bot/common/utils/ioutil"
"github.com/krau/SaveAny-Bot/config"
"github.com/krau/SaveAny-Bot/pkg/enums/ctxkey"
"github.com/krau/SaveAny-Bot/pkg/parser"
"golang.org/x/sync/errgroup"
)
func (t *Task) Execute(ctx context.Context) error {
logger := log.FromContext(ctx)
logger.Infof("Starting Parsed item task %s", t.item.Title)
if t.progress != nil {
t.progress.OnStart(ctx, t)
}
eg, gctx := errgroup.WithContext(ctx)
eg.SetLimit(config.Cfg.Workers)
for _, resource := range t.item.Resources {
eg.Go(func() error {
t.processingMu.RLock()
if t.processing[resource.ID()] != nil {
return fmt.Errorf("resource %s is already being processed", resource.ID())
}
t.processingMu.RUnlock()
t.processingMu.Lock()
t.processing[resource.ID()] = &resource
t.processingMu.Unlock()
defer func() {
t.processingMu.Lock()
delete(t.processing, resource.URL)
t.processingMu.Unlock()
}()
err := t.processResource(gctx, resource)
t.downloaded.Add(1)
if errors.Is(err, context.Canceled) {
logger.Debug("Resource processing canceled")
return err
}
if err != nil {
logger.Errorf("Error processing resource %s: %v", resource.URL, err)
return fmt.Errorf("failed to process resource %s: %w", resource.URL, err)
}
return nil
})
}
err := eg.Wait()
if err != nil {
logger.Errorf("Error during Parsed item task execution: %v", err)
} else {
logger.Infof("Parsed item task %s completed successfully", t.item.Title)
}
if t.progress != nil {
t.progress.OnDone(ctx, t, err)
}
return err
}
func (t *Task) processResource(ctx context.Context, resource parser.Resource) error {
logger := log.FromContext(ctx)
err := retry.Retry(func() error {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, resource.URL, nil)
if err != nil {
return err
}
if resource.Headers != nil {
for k, v := range resource.Headers {
req.Header.Set(k, v)
}
}
resp, err := t.httpClient.Do(req)
if err != nil {
return fmt.Errorf("failed to download resource %s: %w", resource.URL, err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("failed to download resource %s: %s", resource.URL, resp.Status)
}
ctx = context.WithValue(ctx, ctxkey.ContentLength, func() int64 {
if resource.Size > 0 {
return resource.Size
}
return resp.ContentLength
}())
if t.stream {
return t.Stor.Save(ctx, resp.Body, path.Join(t.StorPath, resource.Filename))
}
cacheFile, err := fsutil.CreateFile(filepath.Join(config.Cfg.Temp.BasePath,
fmt.Sprintf("resource_%s_%s", t.ID, resource.Filename)))
if err != nil {
return fmt.Errorf("failed to create cache file for resource %s: %w", resource.URL, err)
}
defer func() {
if err := cacheFile.CloseAndRemove(); err != nil {
logger.Errorf("Failed to close and remove cache file: %v", err)
}
}()
wr := ioutil.NewProgressWriter(cacheFile, func(n int) {
t.downloadedBytes.Add(int64(n))
if t.progress != nil {
t.progress.OnProgress(ctx, t)
}
})
copyResultCh := make(chan error, 1)
go func() {
_, err := io.Copy(wr, resp.Body)
copyResultCh <- err
}()
select {
case err := <-copyResultCh:
if err != nil {
return fmt.Errorf("failed to copy resource %s to cache file: %w", resource.URL, err)
}
case <-ctx.Done():
return ctx.Err()
}
_, err = cacheFile.Seek(0, 0)
if err != nil {
return fmt.Errorf("failed to seek cache file for resource %s: %w", resource.URL, err)
}
return t.Stor.Save(ctx, cacheFile, path.Join(t.StorPath, resource.Filename))
}, retry.Context(ctx), retry.RetryTimes(uint(config.Cfg.Retry)))
if ctx.Err() != nil {
return ctx.Err()
}
return err
}

View File

@@ -0,0 +1,209 @@
package parsed
import (
"context"
"errors"
"fmt"
"sync/atomic"
"time"
"github.com/charmbracelet/log"
"github.com/duke-git/lancet/v2/slice"
"github.com/gotd/td/telegram/message/entity"
"github.com/gotd/td/telegram/message/styling"
"github.com/gotd/td/tg"
"github.com/krau/SaveAny-Bot/common/utils/dlutil"
"github.com/krau/SaveAny-Bot/common/utils/tgutil"
)
var progressUpdatesLevels = []struct {
size int64 // 文件大小阈值
stepPercent int // 每多少 % 更新一次
}{
{10 << 20, 100},
{50 << 20, 50},
{200 << 20, 20},
{500 << 20, 10},
}
func shouldUpdateProgress(total, downloaded int64, lastUpdatePercent int) bool {
if total <= 0 || downloaded <= 0 {
return false
}
percent := int((downloaded * 100) / total)
if percent <= lastUpdatePercent {
return false
}
step := progressUpdatesLevels[len(progressUpdatesLevels)-1].stepPercent
for _, lvl := range progressUpdatesLevels {
if total < lvl.size {
step = lvl.stepPercent
break
}
}
return percent >= lastUpdatePercent+step
}
type ProgressTracker interface {
OnStart(ctx context.Context, info TaskInfo)
OnProgress(ctx context.Context, info TaskInfo)
OnDone(ctx context.Context, info TaskInfo, err error)
}
type Progress struct {
MessageID int
ChatID int64
start time.Time
lastUpdatePercent atomic.Int32
}
func (p *Progress) OnStart(ctx context.Context, info TaskInfo) {
logger := log.FromContext(ctx)
p.start = time.Now()
p.lastUpdatePercent.Store(0)
logger.Debugf("Parsed task progress tracking started for message %d in chat %d", p.MessageID, p.ChatID)
entityBuilder := entity.Builder{}
var entities []tg.MessageEntityClass
if err := styling.Perform(&entityBuilder,
styling.Plain(fmt.Sprintf("开始下载 %s 的资源\n总大小: ", info.Site())),
styling.Code(fmt.Sprintf("%.2f MB (%d个资源)", float64(info.TotalBytes())/(1024*1024), info.TotalResources())),
); err != nil {
log.FromContext(ctx).Errorf("Failed to build entities: %s", err)
return
}
text, entities := entityBuilder.Complete()
req := &tg.MessagesEditMessageRequest{
ID: p.MessageID,
}
req.SetMessage(text)
req.SetEntities(entities)
req.SetReplyMarkup(&tg.ReplyInlineMarkup{
Rows: []tg.KeyboardButtonRow{
{
Buttons: []tg.KeyboardButtonClass{
tgutil.BuildCancelButton(info.TaskID()),
},
},
}},
)
ext := tgutil.ExtFromContext(ctx)
if ext != nil {
ext.EditMessage(p.ChatID, req)
return
}
}
func (p *Progress) OnProgress(ctx context.Context, info TaskInfo) {
if !shouldUpdateProgress(info.TotalBytes(), info.DownloadedBytes(), int(p.lastUpdatePercent.Load())) {
return
}
percent := int((info.DownloadedBytes() * 100) / info.TotalBytes())
if p.lastUpdatePercent.Load() == int32(percent) {
return
}
p.lastUpdatePercent.Store(int32(percent))
log.FromContext(ctx).Debugf("Progress update: %s, %d/%d", info.TaskID(), info.DownloadedBytes(), info.TotalBytes())
entityBuilder := entity.Builder{}
var entities []tg.MessageEntityClass
if err := styling.Perform(&entityBuilder,
styling.Plain("正在下载\n总大小: "),
styling.Code(fmt.Sprintf("%.2f MB (%d个文件)", float64(info.TotalBytes())/(1024*1024), info.TotalResources())),
styling.Plain("\n正在处理:\n"),
func() styling.StyledTextOption {
var lines []string
for _, elem := range info.Processing() {
lines = append(lines, fmt.Sprintf(" - %s (%.2f MB)", elem.FileName(), float64(elem.FileSize())/(1024*1024)))
}
if len(lines) == 0 {
lines = append(lines, " - 无")
}
return styling.Plain(slice.Join(lines, "\n"))
}(),
styling.Plain("\n平均速度: "),
styling.Bold(fmt.Sprintf("%.2f MB/s", dlutil.GetSpeed(info.DownloadedBytes(), p.start)/(1024*1024))),
styling.Plain("\n当前进度: "),
styling.Bold(fmt.Sprintf("%.2f%%", float64(info.DownloadedBytes())/float64(info.TotalBytes())*100)),
); err != nil {
log.FromContext(ctx).Errorf("Failed to build entities: %s", err)
return
}
text, entities := entityBuilder.Complete()
req := &tg.MessagesEditMessageRequest{
ID: p.MessageID,
}
req.SetMessage(text)
req.SetEntities(entities)
req.SetReplyMarkup(&tg.ReplyInlineMarkup{
Rows: []tg.KeyboardButtonRow{
{
Buttons: []tg.KeyboardButtonClass{
tgutil.BuildCancelButton(info.TaskID()),
},
},
}},
)
ext := tgutil.ExtFromContext(ctx)
if ext != nil {
ext.EditMessage(p.ChatID, req)
return
}
}
func (p *Progress) OnDone(ctx context.Context, info TaskInfo, err error) {
logger := log.FromContext(ctx)
if err != nil {
if errors.Is(err, context.Canceled) {
logger.Infof("Parsed task %s was canceled", info.TaskID())
ext := tgutil.ExtFromContext(ctx)
if ext != nil {
ext.EditMessage(p.ChatID, &tg.MessagesEditMessageRequest{
ID: p.MessageID,
Message: fmt.Sprintf("处理已取消: %s", info.TaskID()),
})
}
} else {
logger.Errorf("Parsed task %s failed: %s", info.TaskID(), err)
ext := tgutil.ExtFromContext(ctx)
if ext != nil {
ext.EditMessage(p.ChatID, &tg.MessagesEditMessageRequest{
ID: p.MessageID,
Message: fmt.Sprintf("处理失败: %s", err.Error()),
})
}
}
return
}
logger.Infof("Parsed task %s completed successfully", info.TaskID())
entityBuilder := entity.Builder{}
if err := styling.Perform(&entityBuilder,
styling.Plain("处理完成, 资源数量: "),
styling.Code(fmt.Sprintf("%d", info.TotalResources())),
styling.Plain("\n保存路径: "),
styling.Code(fmt.Sprintf("[%s]:%s", info.StorageName(), info.StoragePath())),
); err != nil {
logger.Errorf("Failed to build entities: %s", err)
return
}
text, entities := entityBuilder.Complete()
req := &tg.MessagesEditMessageRequest{
ID: p.MessageID,
}
req.SetMessage(text)
req.SetEntities(entities)
ext := tgutil.ExtFromContext(ctx)
if ext != nil {
ext.EditMessage(p.ChatID, req)
}
}
func NewProgress(messageID int, chatID int64) *Progress {
return &Progress{
MessageID: messageID,
ChatID: chatID,
}
}

84
core/tasks/parsed/task.go Normal file
View File

@@ -0,0 +1,84 @@
package parsed
import (
"context"
"net/http"
"sync"
"sync/atomic"
"github.com/krau/SaveAny-Bot/config"
"github.com/krau/SaveAny-Bot/pkg/enums/tasktype"
"github.com/krau/SaveAny-Bot/pkg/parser"
"github.com/krau/SaveAny-Bot/storage"
)
type Task struct {
ID string
Ctx context.Context
Stor storage.Storage
StorPath string
item *parser.Item
httpClient *http.Client
progress ProgressTracker
stream bool
totalResources int64
downloaded atomic.Int64 // downloaded resources count
totalBytes int64 // total bytes to download
downloadedBytes atomic.Int64 // downloaded bytes count
processing map[string]ResourceInfo
processingMu sync.RWMutex
failed map[string]error // [TODO] errors for each resource
}
func (t *Task) Type() tasktype.TaskType {
return tasktype.TaskTypeParseditem
}
func (t *Task) TaskID() string {
return t.ID
}
func NewTask(
id string,
ctx context.Context,
stor storage.Storage,
storPath string,
item *parser.Item,
progressTracker ProgressTracker,
) *Task {
client := &http.Client{
Transport: &http.Transport{
// [TODO] configure it via config
Proxy: http.ProxyFromEnvironment,
},
}
_, ok := stor.(storage.StorageCannotStream)
stream := config.Cfg.Stream && !ok
return &Task{
ID: id,
Ctx: ctx,
Stor: stor,
StorPath: storPath,
item: item,
totalResources: int64(len(item.Resources)),
downloaded: atomic.Int64{},
totalBytes: func() int64 {
var total int64
for _, res := range item.Resources {
if res.Size < 0 {
continue // skip resources with unknown size
}
total += res.Size
}
return total
}(),
stream: stream,
downloadedBytes: atomic.Int64{},
httpClient: client,
progress: progressTracker,
processing: make(map[string]ResourceInfo),
processingMu: sync.RWMutex{},
failed: make(map[string]error),
}
}

View File

@@ -0,0 +1,51 @@
package parsed
type TaskInfo interface {
TaskID() string
Site() string
TotalResources() int64
Downloaded() int64
TotalBytes() int64
DownloadedBytes() int64
Processing() map[string]ResourceInfo
StorageName() string
StoragePath() string
}
func (t *Task) StoragePath() string {
return t.StorPath
}
func (t *Task) TotalResources() int64 {
return t.totalResources
}
func (t *Task) Downloaded() int64 {
return t.downloaded.Load()
}
func (t *Task) StorageName() string {
return t.Stor.Name()
}
func (t *Task) Site() string {
return t.item.Site
}
func (t *Task) TotalBytes() int64 {
return t.totalBytes
}
func (t *Task) DownloadedBytes() int64 {
return t.downloadedBytes.Load()
}
func (t *Task) Processing() map[string]ResourceInfo {
t.processingMu.RLock()
defer t.processingMu.RUnlock()
return t.processing
}
type ResourceInfo interface {
FileName() string
FileSize() int64
}

View File

@@ -0,0 +1,97 @@
package telegraph
import (
"context"
"fmt"
"io"
"path"
"path/filepath"
"github.com/charmbracelet/log"
"github.com/duke-git/lancet/v2/retry"
"github.com/krau/SaveAny-Bot/common/utils/fsutil"
"github.com/krau/SaveAny-Bot/config"
"go.uber.org/multierr"
"golang.org/x/sync/errgroup"
)
func (t *Task) Execute(ctx context.Context) error {
logger := log.FromContext(ctx)
logger.Infof("Starting Telegraph task %s", t.PhPath)
t.progress.OnStart(ctx, t)
eg, gctx := errgroup.WithContext(ctx)
eg.SetLimit(config.Cfg.Workers)
for i, pic := range t.Pics {
eg.Go(func() error {
err := t.processPic(gctx, pic, i)
if err != nil {
logger.Errorf("Error processing picture %s: %v", pic, err)
return fmt.Errorf("failed to process picture %s: %w", pic, err)
}
t.downloaded.Add(1)
t.progress.OnProgress(gctx, t)
return nil
})
}
err := eg.Wait()
if err != nil {
logger.Errorf("Error during Telegraph task execution: %v", err)
} else {
logger.Infof("Telegraph task %s completed successfully", t.PhPath)
}
t.progress.OnDone(ctx, t, err)
return err
}
func (t *Task) processPic(ctx context.Context, picUrl string, index int) error {
retryOpts := []retry.Option{
retry.Context(ctx),
retry.RetryTimes(uint(config.Cfg.Retry)),
}
var lastErr error
err := retry.Retry(func() error {
var body io.ReadCloser
body, lastErr = t.client.Download(ctx, picUrl)
if lastErr != nil {
lastErr = fmt.Errorf("failed to download picture %s: %w", picUrl, lastErr)
return lastErr
}
defer body.Close()
filename := fmt.Sprintf("%d%s", index+1, path.Ext(picUrl))
if t.cannotStream {
cacheFile, err := fsutil.CreateFile(filepath.Join(config.Cfg.Temp.BasePath,
fmt.Sprintf("tph_%s_%s", t.TaskID(), filename),
))
if err != nil {
lastErr = fmt.Errorf("failed to create cache file for picture %s: %w", filename, err)
return lastErr
}
defer func() {
if err := cacheFile.CloseAndRemove(); err != nil {
logger := log.FromContext(ctx)
logger.Errorf("Failed to close and remove cache file for picture %s: %v", filename, err)
}
}()
_, lastErr = io.Copy(cacheFile, body)
if lastErr != nil {
lastErr = fmt.Errorf("failed to copy picture %s to cache file: %w", filename, lastErr)
return lastErr
}
_, err = cacheFile.Seek(0, 0)
if err != nil {
lastErr = fmt.Errorf("failed to seek cache file for picture %s: %w", filename, err)
return lastErr
}
lastErr = t.Stor.Save(ctx, cacheFile, path.Join(t.StorPath, filename))
} else {
lastErr = t.Stor.Save(ctx, body, path.Join(t.StorPath, filename))
}
if lastErr != nil {
lastErr = fmt.Errorf("failed to save picture %s: %w", filename, lastErr)
return lastErr
}
return nil
}, retryOpts...)
return multierr.Combine(err, lastErr)
}

View File

@@ -0,0 +1,150 @@
package telegraph
import (
"context"
"errors"
"fmt"
"github.com/charmbracelet/log"
"github.com/gotd/td/telegram/message/entity"
"github.com/gotd/td/telegram/message/styling"
"github.com/gotd/td/tg"
"github.com/krau/SaveAny-Bot/common/utils/tgutil"
)
type ProgressTracker interface {
OnStart(ctx context.Context, info TaskInfo)
OnProgress(ctx context.Context, info TaskInfo)
OnDone(ctx context.Context, info TaskInfo, err error)
}
type Progress struct {
MessageID int
ChatID int64
}
func (p *Progress) OnStart(ctx context.Context, info TaskInfo) {
logger := log.FromContext(ctx)
logger.Debugf("Telegraph task progress tracking started for message %d in chat %d", p.MessageID, p.ChatID)
entityBuilder := entity.Builder{}
var entities []tg.MessageEntityClass
if err := styling.Perform(&entityBuilder,
styling.Plain("开始下载Telegraph\n图片数量: "),
styling.Code(fmt.Sprintf("%d", info.TotalPics())),
); err != nil {
log.FromContext(ctx).Errorf("Failed to build entities: %s", err)
return
}
text, entities := entityBuilder.Complete()
req := &tg.MessagesEditMessageRequest{
ID: p.MessageID,
}
req.SetMessage(text)
req.SetEntities(entities)
req.SetReplyMarkup(&tg.ReplyInlineMarkup{
Rows: []tg.KeyboardButtonRow{
{
Buttons: []tg.KeyboardButtonClass{
tgutil.BuildCancelButton(info.TaskID()),
},
},
}},
)
ext := tgutil.ExtFromContext(ctx)
if ext != nil {
ext.EditMessage(p.ChatID, req)
return
}
}
func (p *Progress) OnProgress(ctx context.Context, info TaskInfo) {
if !shouldUpdateProgress(info.Downloaded(), int64(info.TotalPics())) {
return
}
log.FromContext(ctx).Debugf("Progress update: %s, %d/%d", info.TaskID(), info.Downloaded(), info.TotalPics())
entityBuilder := entity.Builder{}
var entities []tg.MessageEntityClass
if err := styling.Perform(&entityBuilder,
styling.Plain("正在下载\n当前进度: "),
styling.Code(fmt.Sprintf("%d/%d", info.Downloaded(), info.TotalPics())),
); err != nil {
log.FromContext(ctx).Errorf("Failed to build entities: %s", err)
return
}
text, entities := entityBuilder.Complete()
req := &tg.MessagesEditMessageRequest{
ID: p.MessageID,
}
req.SetMessage(text)
req.SetEntities(entities)
req.SetReplyMarkup(&tg.ReplyInlineMarkup{
Rows: []tg.KeyboardButtonRow{
{
Buttons: []tg.KeyboardButtonClass{
tgutil.BuildCancelButton(info.TaskID()),
},
},
}},
)
ext := tgutil.ExtFromContext(ctx)
if ext != nil {
ext.EditMessage(p.ChatID, req)
return
}
}
func (p *Progress) OnDone(ctx context.Context, info TaskInfo, err error) {
logger := log.FromContext(ctx)
if err != nil {
if errors.Is(err, context.Canceled) {
logger.Infof("Telegraph task %s was canceled", info.TaskID())
ext := tgutil.ExtFromContext(ctx)
if ext != nil {
ext.EditMessage(p.ChatID, &tg.MessagesEditMessageRequest{
ID: p.MessageID,
Message: fmt.Sprintf("处理已取消: %s", info.TaskID()),
})
}
} else {
logger.Errorf("Telegraph task %s failed: %s", info.TaskID(), err)
ext := tgutil.ExtFromContext(ctx)
if ext != nil {
ext.EditMessage(p.ChatID, &tg.MessagesEditMessageRequest{
ID: p.MessageID,
Message: fmt.Sprintf("处理失败: %s", err.Error()),
})
}
}
return
}
logger.Infof("Telegraph task %s completed successfully", info.TaskID())
entityBuilder := entity.Builder{}
if err := styling.Perform(&entityBuilder,
styling.Plain("处理完成\n图片数量: "),
styling.Code(fmt.Sprintf("%d", info.TotalPics())),
styling.Plain("\n保存路径: "),
styling.Code(fmt.Sprintf("[%s]:%s", info.StorageName(), info.StoragePath())),
); err != nil {
logger.Errorf("Failed to build entities: %s", err)
return
}
text, entities := entityBuilder.Complete()
req := &tg.MessagesEditMessageRequest{
ID: p.MessageID,
}
req.SetMessage(text)
req.SetEntities(entities)
ext := tgutil.ExtFromContext(ctx)
if ext != nil {
ext.EditMessage(p.ChatID, req)
}
}
func NewProgress(messageID int, chatID int64) *Progress {
return &Progress{
MessageID: messageID,
ChatID: chatID,
}
}

View File

@@ -0,0 +1,56 @@
package telegraph
import (
"context"
"sync/atomic"
"github.com/krau/SaveAny-Bot/pkg/enums/tasktype"
"github.com/krau/SaveAny-Bot/pkg/telegraph"
"github.com/krau/SaveAny-Bot/storage"
)
type Task struct {
ID string
Ctx context.Context
PhPath string
Pics []string
Stor storage.Storage
StorPath string
client *telegraph.Client
progress ProgressTracker
cannotStream bool
totalpics int
downloaded atomic.Int64
}
func (t *Task) Type() tasktype.TaskType {
return tasktype.TaskTypeTphpics
}
func NewTask(
id string,
ctx context.Context,
phPath string,
pics []string,
stor storage.Storage,
storPath string,
client *telegraph.Client,
progress ProgressTracker,
) *Task {
_, cannotStream := stor.(storage.StorageCannotStream)
telegraph := &Task{
ID: id,
Ctx: ctx,
PhPath: phPath,
Pics: pics,
Stor: stor,
StorPath: storPath,
client: client,
progress: progress,
cannotStream: cannotStream,
totalpics: len(pics),
downloaded: atomic.Int64{},
}
return telegraph
}

View File

@@ -0,0 +1,34 @@
package telegraph
type TaskInfo interface {
TaskID() string
Phpath() string
TotalPics() int
Downloaded() int64
StorageName() string
StoragePath() string
}
func (t *Task) TaskID() string {
return t.ID
}
func (t *Task) Phpath() string {
return t.PhPath
}
func (t *Task) TotalPics() int {
return t.totalpics
}
func (t *Task) Downloaded() int64 {
return t.downloaded.Load()
}
func (t *Task) StorageName() string {
return t.Stor.Name()
}
func (t *Task) StoragePath() string {
return t.StorPath
}

View File

@@ -0,0 +1,13 @@
package telegraph
func shouldUpdateProgress(downloaded int64, total int64) bool {
if total <= 0 || downloaded <= 0 {
return false
}
step := int64(10)
if downloaded < step {
return downloaded == total
}
return downloaded%step == 0 || downloaded == total
}

View File

@@ -0,0 +1,86 @@
package tfile
import (
"context"
"fmt"
"os"
"path"
"time"
"github.com/charmbracelet/log"
"github.com/krau/SaveAny-Bot/common/utils/fsutil"
"github.com/krau/SaveAny-Bot/config"
"github.com/krau/SaveAny-Bot/pkg/enums/ctxkey"
"github.com/krau/SaveAny-Bot/pkg/tfile"
)
func (t *Task) Execute(ctx context.Context) error {
logger := log.FromContext(ctx).WithPrefix(fmt.Sprintf("file[%s]", t.File.Name()))
if t.Progress != nil {
t.Progress.OnStart(ctx, t)
}
if t.stream {
return executeStream(ctx, t)
}
logger.Info("Starting file download")
localFile, err := fsutil.CreateFile(t.localPath)
if err != nil {
return fmt.Errorf("failed to create local file: %w", err)
}
defer func() {
if err := localFile.CloseAndRemove(); err != nil {
logger.Errorf("Failed to close local file: %v", err)
}
}()
wrAt := newWriterAt(ctx, localFile, t.Progress, t)
defer func() {
if t.Progress != nil {
t.Progress.OnDone(ctx, t, err)
}
}()
_, err = tfile.NewDownloader(t.File).Parallel(ctx, wrAt)
if err != nil {
return fmt.Errorf("failed to download file: %w", err)
}
logger.Infof("File downloaded successfully")
if path.Ext(t.File.Name()) == "" {
ext := fsutil.DetectFileExt(t.localPath)
if ext != "" {
t.Path = t.Path + ext
}
}
var fileStat os.FileInfo
fileStat, err = os.Stat(t.localPath)
if err != nil {
return fmt.Errorf("failed to get file stat: %w", err)
}
vctx := context.WithValue(ctx, ctxkey.ContentLength, fileStat.Size())
for i := range config.Cfg.Retry + 1 {
if err = vctx.Err(); err != nil {
return fmt.Errorf("context canceled while saving file: %w", err)
}
var file *os.File
file, err = os.Open(t.localPath)
if err != nil {
return fmt.Errorf("failed to open cache file: %w", err)
}
defer file.Close()
if err = t.Storage.Save(vctx, file, t.Path); err != nil {
if i == config.Cfg.Retry {
return fmt.Errorf("failed to save file: %w", err)
}
logger.Errorf("Failed to save file: %s, retrying...", err)
select {
case <-vctx.Done():
return fmt.Errorf("context canceled during retry delay: %w", vctx.Err())
case <-time.After(time.Duration(i*500) * time.Millisecond):
}
continue
}
return nil
}
return fmt.Errorf("failed to save file after retries")
}

View File

@@ -0,0 +1,186 @@
package tfile
import (
"context"
"errors"
"fmt"
"sync/atomic"
"time"
"github.com/charmbracelet/log"
"github.com/gotd/td/telegram/message/entity"
"github.com/gotd/td/telegram/message/styling"
"github.com/gotd/td/tg"
"github.com/krau/SaveAny-Bot/common/utils/dlutil"
"github.com/krau/SaveAny-Bot/common/utils/tgutil"
)
type ProgressTracker interface {
OnStart(ctx context.Context, info TaskInfo)
OnProgress(ctx context.Context, info TaskInfo, downloaded, total int64)
OnDone(ctx context.Context, info TaskInfo, err error)
}
type Progress struct {
MessageID int
ChatID int64
start time.Time
lastUpdatePercent atomic.Int32
}
func (p *Progress) OnStart(ctx context.Context, info TaskInfo) {
p.start = time.Now()
p.lastUpdatePercent.Store(0)
log.FromContext(ctx).Debugf("Progress tracking started for message %d in chat %d", p.MessageID, p.ChatID)
entityBuilder := entity.Builder{}
var entities []tg.MessageEntityClass
if err := styling.Perform(&entityBuilder,
styling.Plain("开始下载\n文件名: "),
styling.Code(info.FileName()),
styling.Plain("\n保存路径: "),
styling.Code(fmt.Sprintf("[%s]:%s", info.StorageName(), info.StoragePath())),
styling.Plain("\n文件大小: "),
styling.Code(fmt.Sprintf("%.2f MB", float64(info.FileSize())/(1024*1024))),
); err != nil {
log.FromContext(ctx).Errorf("Failed to build entities: %s", err)
return
}
text, entities := entityBuilder.Complete()
req := &tg.MessagesEditMessageRequest{
ID: p.MessageID,
}
req.SetMessage(text)
req.SetEntities(entities)
req.SetReplyMarkup(&tg.ReplyInlineMarkup{
Rows: []tg.KeyboardButtonRow{
{
Buttons: []tg.KeyboardButtonClass{
tgutil.BuildCancelButton(info.TaskID()),
},
},
}},
)
ext := tgutil.ExtFromContext(ctx)
if ext != nil {
ext.EditMessage(p.ChatID, req)
return
}
}
func (p *Progress) OnProgress(ctx context.Context, info TaskInfo, downloaded, total int64) {
if !shouldUpdateProgress(total, downloaded, int(p.lastUpdatePercent.Load())) {
return
}
percent := int32((downloaded * 100) / total)
if p.lastUpdatePercent.Load() == percent {
return
}
p.lastUpdatePercent.Store(percent)
log.FromContext(ctx).Debugf("Progress update: %s, %d/%d", info.FileName(), downloaded, total)
entityBuilder := entity.Builder{}
var entities []tg.MessageEntityClass
if err := styling.Perform(&entityBuilder,
styling.Plain("正在处理下载任务\n文件名: "),
styling.Code(info.FileName()),
styling.Plain("\n保存路径: "),
styling.Code(fmt.Sprintf("[%s]:%s", info.StorageName(), info.StoragePath())),
styling.Plain("\n文件大小: "),
styling.Code(fmt.Sprintf("%.2f MB", float64(total)/(1024*1024))),
styling.Plain("\n平均速度: "),
styling.Bold(fmt.Sprintf("%.2f MB/s", dlutil.GetSpeed(downloaded, p.start)/(1024*1024))),
styling.Plain("\n当前进度: "),
styling.Bold(fmt.Sprintf("%.2f%%", float64(downloaded)/float64(total)*100)),
); err != nil {
log.FromContext(ctx).Errorf("Failed to build entities: %s", err)
return
}
text, entities := entityBuilder.Complete()
req := &tg.MessagesEditMessageRequest{
ID: p.MessageID,
}
req.SetMessage(text)
req.SetEntities(entities)
req.SetReplyMarkup(&tg.ReplyInlineMarkup{
Rows: []tg.KeyboardButtonRow{
{
Buttons: []tg.KeyboardButtonClass{
tgutil.BuildCancelButton(info.TaskID()),
},
},
}},
)
ext := tgutil.ExtFromContext(ctx)
if ext != nil {
ext.EditMessage(p.ChatID, req)
return
}
}
func (p *Progress) OnDone(ctx context.Context, info TaskInfo, err error) {
if err != nil {
log.FromContext(ctx).Errorf("Progress error for file [%s]: %v", info.FileName(), err)
} else {
log.FromContext(ctx).Debugf("Progress done for file [%s]", info.FileName())
}
entityBuilder := entity.Builder{}
var stylingErr error
if err != nil {
if errors.Is(err, context.Canceled) {
stylingErr = styling.Perform(&entityBuilder,
styling.Plain("任务已取消\n文件名: "),
styling.Code(info.FileName()),
)
} else {
stylingErr = styling.Perform(&entityBuilder,
styling.Plain("下载失败\n文件名: "),
styling.Code(info.FileName()),
styling.Plain("\n错误: "),
styling.Bold(err.Error()),
)
}
} else {
stylingErr = styling.Perform(&entityBuilder,
styling.Plain("下载完成\n文件名: "),
styling.Code(info.FileName()),
styling.Plain("\n保存路径: "),
styling.Code(fmt.Sprintf("[%s]:%s", info.StorageName(), info.StoragePath())),
)
}
if stylingErr != nil {
log.FromContext(ctx).Errorf("Failed to build entities: %s", stylingErr)
return
}
text, entities := entityBuilder.Complete()
req := &tg.MessagesEditMessageRequest{
ID: p.MessageID,
}
req.SetMessage(text)
req.SetEntities(entities)
ext := tgutil.ExtFromContext(ctx)
if ext != nil {
ext.EditMessage(p.ChatID, req)
}
}
type ProgressOption func(*Progress)
func NewProgressTrack(
messageID int,
chatID int64,
opts ...ProgressOption,
) ProgressTracker {
p := &Progress{
MessageID: messageID,
ChatID: chatID,
}
for _, opt := range opts {
opt(p)
}
return p
}

View File

@@ -0,0 +1,44 @@
package tfile
import (
"context"
"fmt"
"io"
"github.com/charmbracelet/log"
"github.com/krau/SaveAny-Bot/pkg/tfile"
"golang.org/x/sync/errgroup"
)
func executeStream(ctx context.Context, task *Task) error {
logger := log.FromContext(ctx).WithPrefix(fmt.Sprintf("file[%s]", task.File.Name()))
pr, pw := io.Pipe()
defer pr.Close()
errg, uploadCtx := errgroup.WithContext(ctx)
errg.Go(func() error {
return task.Storage.Save(uploadCtx, pr, task.Path)
})
wr := newWriter(ctx, pw, task.Progress, task)
errg.Go(func() error {
defer pw.Close()
logger.Info("Starting file download in stream mode")
_, err := tfile.NewDownloader(task.File).Stream(uploadCtx, wr)
if err != nil {
logger.Errorf("Failed to download file: %v", err)
pw.CloseWithError(err)
}
return err
})
var err error
defer func() {
if task.Progress != nil {
task.Progress.OnDone(ctx, task, err)
}
}()
if err = errg.Wait(); err != nil {
return err
}
logger.Info("File downloaded successfully in stream mode")
return nil
}

View File

@@ -0,0 +1,29 @@
package tfile
type TaskInfo interface {
TaskID() string
FileName() string
FileSize() int64
StoragePath() string
StorageName() string
}
func (t *Task) TaskID() string {
return t.ID
}
func (t *Task) FileName() string {
return t.File.Name()
}
func (t *Task) FileSize() int64 {
return t.File.Size()
}
func (t *Task) StoragePath() string {
return t.Path
}
func (t *Task) StorageName() string {
return t.Storage.Name()
}

View File

@@ -0,0 +1,64 @@
package tfile
import (
"context"
"fmt"
"path/filepath"
"github.com/krau/SaveAny-Bot/config"
"github.com/krau/SaveAny-Bot/pkg/enums/tasktype"
"github.com/krau/SaveAny-Bot/pkg/tfile"
"github.com/krau/SaveAny-Bot/storage"
)
type Task struct {
ID string
Ctx context.Context
File tfile.TGFile
Storage storage.Storage
Path string
Progress ProgressTracker
stream bool // true if the file should be downloaded in stream mode
localPath string
}
func (t *Task) Type() tasktype.TaskType {
return tasktype.TaskTypeTgfiles
}
func NewTGFileTask(
id string,
ctx context.Context,
file tfile.TGFile,
stor storage.Storage,
path string,
progress ProgressTracker,
) (*Task, error) {
_, ok := stor.(storage.StorageCannotStream)
if !config.Cfg.Stream || ok {
cachePath, err := filepath.Abs(filepath.Join(config.Cfg.Temp.BasePath, fmt.Sprintf("%s_%s", id, file.Name())))
if err != nil {
return nil, fmt.Errorf("failed to get absolute path for cache: %w", err)
}
tfile := &Task{
ID: id,
Ctx: ctx,
File: file,
Storage: stor,
Path: path,
Progress: progress,
localPath: cachePath,
}
return tfile, nil
}
tfileTask := &Task{
ID: id,
Ctx: ctx,
File: file,
Storage: stor,
Path: path,
Progress: progress,
stream: true,
}
return tfileTask, nil
}

32
core/tasks/tfile/util.go Normal file
View File

@@ -0,0 +1,32 @@
package tfile
var progressUpdatesLevels = []struct {
size int64 // 文件大小阈值
stepPercent int // 每多少 % 更新一次
}{
{10 << 20, 100},
{50 << 20, 20},
{200 << 20, 10},
{500 << 20, 5},
}
func shouldUpdateProgress(total, downloaded int64, lastUpdatePercent int) bool {
if total <= 0 || downloaded <= 0 {
return false
}
percent := int((downloaded * 100) / total)
if percent <= lastUpdatePercent {
return false
}
step := progressUpdatesLevels[len(progressUpdatesLevels)-1].stepPercent
for _, lvl := range progressUpdatesLevels {
if total < lvl.size {
step = lvl.stepPercent
break
}
}
return percent >= lastUpdatePercent+step
}

View File

@@ -0,0 +1,79 @@
package tfile
import (
"context"
"io"
"sync/atomic"
)
type ProgressWriterAt struct {
ctx context.Context
wrAt io.WriterAt
progress ProgressTracker
downloaded *atomic.Int64
total int64
info TaskInfo
}
func (w *ProgressWriterAt) WriteAt(p []byte, off int64) (int, error) {
at, err := w.wrAt.WriteAt(p, off)
if err != nil {
return 0, err
}
if w.progress != nil {
w.progress.OnProgress(w.ctx, w.info, w.downloaded.Add(int64(at)), w.total)
}
return at, nil
}
func newWriterAt(
ctx context.Context,
wrAt io.WriterAt,
progress ProgressTracker,
taskInfo TaskInfo,
) *ProgressWriterAt {
return &ProgressWriterAt{
ctx: ctx,
progress: progress,
downloaded: &atomic.Int64{},
total: taskInfo.FileSize(),
wrAt: wrAt,
info: taskInfo,
}
}
type ProgressWriter struct {
ctx context.Context
wrAt io.Writer
progress ProgressTracker
downloaded *atomic.Int64
total int64
info TaskInfo
}
func (w *ProgressWriter) Write(p []byte) (int, error) {
at, err := w.wrAt.Write(p)
if err != nil {
return 0, err
}
if w.progress != nil {
w.progress.OnProgress(w.ctx, w.info, w.downloaded.Add(int64(at)), w.total)
}
return at, nil
}
func newWriter(
ctx context.Context,
wr io.Writer,
progress ProgressTracker,
taskInfo TaskInfo,
) *ProgressWriter {
return &ProgressWriter{
ctx: ctx,
progress: progress,
downloaded: &atomic.Int64{},
total: taskInfo.FileSize(),
wrAt: wr,
info: taskInfo,
}
}