feat: parse url with js plugins support (#96)

* feat: WIP. add parser functionality and text message handling

* fix: use json to marshal js result

* feat: add metadata handling and version validation for jsParser

* refactor: rename parser package to parsers and restructure parser handling

* refactor: core code struct and impl parse task handle

* feat: impl parsed download

* fix: seek cache file when processing tph picture

* feat: implement parsed task handling and progress tracking

* feat: enhance task processing with concurrency control and progress tracking

* feat: add resource ID generation and improve resource processing handling

* feat: improve message formatting in parsed text and progress completion

* feat: add example js plugin

* feat: implement Twitter parser

* fix: twitter parse video json decode error

* feat: impl stream mode for parse task
This commit is contained in:
Krau
2025-08-21 23:48:17 +08:00
committed by GitHub
parent 79386bdd7d
commit 302db2fe75
47 changed files with 1348 additions and 47 deletions

View File

@@ -0,0 +1,97 @@
package telegraph
import (
"context"
"fmt"
"io"
"path"
"path/filepath"
"github.com/charmbracelet/log"
"github.com/duke-git/lancet/v2/retry"
"github.com/krau/SaveAny-Bot/common/utils/fsutil"
"github.com/krau/SaveAny-Bot/config"
"go.uber.org/multierr"
"golang.org/x/sync/errgroup"
)
func (t *Task) Execute(ctx context.Context) error {
logger := log.FromContext(ctx)
logger.Infof("Starting Telegraph task %s", t.PhPath)
t.progress.OnStart(ctx, t)
eg, gctx := errgroup.WithContext(ctx)
eg.SetLimit(config.Cfg.Workers)
for i, pic := range t.Pics {
eg.Go(func() error {
err := t.processPic(gctx, pic, i)
if err != nil {
logger.Errorf("Error processing picture %s: %v", pic, err)
return fmt.Errorf("failed to process picture %s: %w", pic, err)
}
t.downloaded.Add(1)
t.progress.OnProgress(gctx, t)
return nil
})
}
err := eg.Wait()
if err != nil {
logger.Errorf("Error during Telegraph task execution: %v", err)
} else {
logger.Infof("Telegraph task %s completed successfully", t.PhPath)
}
t.progress.OnDone(ctx, t, err)
return err
}
func (t *Task) processPic(ctx context.Context, picUrl string, index int) error {
retryOpts := []retry.Option{
retry.Context(ctx),
retry.RetryTimes(uint(config.Cfg.Retry)),
}
var lastErr error
err := retry.Retry(func() error {
var body io.ReadCloser
body, lastErr = t.client.Download(ctx, picUrl)
if lastErr != nil {
lastErr = fmt.Errorf("failed to download picture %s: %w", picUrl, lastErr)
return lastErr
}
defer body.Close()
filename := fmt.Sprintf("%d%s", index+1, path.Ext(picUrl))
if t.cannotStream {
cacheFile, err := fsutil.CreateFile(filepath.Join(config.Cfg.Temp.BasePath,
fmt.Sprintf("tph_%s_%s", t.TaskID(), filename),
))
if err != nil {
lastErr = fmt.Errorf("failed to create cache file for picture %s: %w", filename, err)
return lastErr
}
defer func() {
if err := cacheFile.CloseAndRemove(); err != nil {
logger := log.FromContext(ctx)
logger.Errorf("Failed to close and remove cache file for picture %s: %v", filename, err)
}
}()
_, lastErr = io.Copy(cacheFile, body)
if lastErr != nil {
lastErr = fmt.Errorf("failed to copy picture %s to cache file: %w", filename, lastErr)
return lastErr
}
_, err = cacheFile.Seek(0, 0)
if err != nil {
lastErr = fmt.Errorf("failed to seek cache file for picture %s: %w", filename, err)
return lastErr
}
lastErr = t.Stor.Save(ctx, cacheFile, path.Join(t.StorPath, filename))
} else {
lastErr = t.Stor.Save(ctx, body, path.Join(t.StorPath, filename))
}
if lastErr != nil {
lastErr = fmt.Errorf("failed to save picture %s: %w", filename, lastErr)
return lastErr
}
return nil
}, retryOpts...)
return multierr.Combine(err, lastErr)
}

View File

@@ -0,0 +1,150 @@
package telegraph
import (
"context"
"errors"
"fmt"
"github.com/charmbracelet/log"
"github.com/gotd/td/telegram/message/entity"
"github.com/gotd/td/telegram/message/styling"
"github.com/gotd/td/tg"
"github.com/krau/SaveAny-Bot/common/utils/tgutil"
)
type ProgressTracker interface {
OnStart(ctx context.Context, info TaskInfo)
OnProgress(ctx context.Context, info TaskInfo)
OnDone(ctx context.Context, info TaskInfo, err error)
}
type Progress struct {
MessageID int
ChatID int64
}
func (p *Progress) OnStart(ctx context.Context, info TaskInfo) {
logger := log.FromContext(ctx)
logger.Debugf("Telegraph task progress tracking started for message %d in chat %d", p.MessageID, p.ChatID)
entityBuilder := entity.Builder{}
var entities []tg.MessageEntityClass
if err := styling.Perform(&entityBuilder,
styling.Plain("开始下载Telegraph\n图片数量: "),
styling.Code(fmt.Sprintf("%d", info.TotalPics())),
); err != nil {
log.FromContext(ctx).Errorf("Failed to build entities: %s", err)
return
}
text, entities := entityBuilder.Complete()
req := &tg.MessagesEditMessageRequest{
ID: p.MessageID,
}
req.SetMessage(text)
req.SetEntities(entities)
req.SetReplyMarkup(&tg.ReplyInlineMarkup{
Rows: []tg.KeyboardButtonRow{
{
Buttons: []tg.KeyboardButtonClass{
tgutil.BuildCancelButton(info.TaskID()),
},
},
}},
)
ext := tgutil.ExtFromContext(ctx)
if ext != nil {
ext.EditMessage(p.ChatID, req)
return
}
}
func (p *Progress) OnProgress(ctx context.Context, info TaskInfo) {
if !shouldUpdateProgress(info.Downloaded(), int64(info.TotalPics())) {
return
}
log.FromContext(ctx).Debugf("Progress update: %s, %d/%d", info.TaskID(), info.Downloaded(), info.TotalPics())
entityBuilder := entity.Builder{}
var entities []tg.MessageEntityClass
if err := styling.Perform(&entityBuilder,
styling.Plain("正在下载\n当前进度: "),
styling.Code(fmt.Sprintf("%d/%d", info.Downloaded(), info.TotalPics())),
); err != nil {
log.FromContext(ctx).Errorf("Failed to build entities: %s", err)
return
}
text, entities := entityBuilder.Complete()
req := &tg.MessagesEditMessageRequest{
ID: p.MessageID,
}
req.SetMessage(text)
req.SetEntities(entities)
req.SetReplyMarkup(&tg.ReplyInlineMarkup{
Rows: []tg.KeyboardButtonRow{
{
Buttons: []tg.KeyboardButtonClass{
tgutil.BuildCancelButton(info.TaskID()),
},
},
}},
)
ext := tgutil.ExtFromContext(ctx)
if ext != nil {
ext.EditMessage(p.ChatID, req)
return
}
}
func (p *Progress) OnDone(ctx context.Context, info TaskInfo, err error) {
logger := log.FromContext(ctx)
if err != nil {
if errors.Is(err, context.Canceled) {
logger.Infof("Telegraph task %s was canceled", info.TaskID())
ext := tgutil.ExtFromContext(ctx)
if ext != nil {
ext.EditMessage(p.ChatID, &tg.MessagesEditMessageRequest{
ID: p.MessageID,
Message: fmt.Sprintf("处理已取消: %s", info.TaskID()),
})
}
} else {
logger.Errorf("Telegraph task %s failed: %s", info.TaskID(), err)
ext := tgutil.ExtFromContext(ctx)
if ext != nil {
ext.EditMessage(p.ChatID, &tg.MessagesEditMessageRequest{
ID: p.MessageID,
Message: fmt.Sprintf("处理失败: %s", err.Error()),
})
}
}
return
}
logger.Infof("Telegraph task %s completed successfully", info.TaskID())
entityBuilder := entity.Builder{}
if err := styling.Perform(&entityBuilder,
styling.Plain("处理完成\n图片数量: "),
styling.Code(fmt.Sprintf("%d", info.TotalPics())),
styling.Plain("\n保存路径: "),
styling.Code(fmt.Sprintf("[%s]:%s", info.StorageName(), info.StoragePath())),
); err != nil {
logger.Errorf("Failed to build entities: %s", err)
return
}
text, entities := entityBuilder.Complete()
req := &tg.MessagesEditMessageRequest{
ID: p.MessageID,
}
req.SetMessage(text)
req.SetEntities(entities)
ext := tgutil.ExtFromContext(ctx)
if ext != nil {
ext.EditMessage(p.ChatID, req)
}
}
func NewProgress(messageID int, chatID int64) *Progress {
return &Progress{
MessageID: messageID,
ChatID: chatID,
}
}

View File

@@ -0,0 +1,56 @@
package telegraph
import (
"context"
"sync/atomic"
"github.com/krau/SaveAny-Bot/pkg/enums/tasktype"
"github.com/krau/SaveAny-Bot/pkg/telegraph"
"github.com/krau/SaveAny-Bot/storage"
)
type Task struct {
ID string
Ctx context.Context
PhPath string
Pics []string
Stor storage.Storage
StorPath string
client *telegraph.Client
progress ProgressTracker
cannotStream bool
totalpics int
downloaded atomic.Int64
}
func (t *Task) Type() tasktype.TaskType {
return tasktype.TaskTypeTphpics
}
func NewTask(
id string,
ctx context.Context,
phPath string,
pics []string,
stor storage.Storage,
storPath string,
client *telegraph.Client,
progress ProgressTracker,
) *Task {
_, cannotStream := stor.(storage.StorageCannotStream)
telegraph := &Task{
ID: id,
Ctx: ctx,
PhPath: phPath,
Pics: pics,
Stor: stor,
StorPath: storPath,
client: client,
progress: progress,
cannotStream: cannotStream,
totalpics: len(pics),
downloaded: atomic.Int64{},
}
return telegraph
}

View File

@@ -0,0 +1,34 @@
package telegraph
type TaskInfo interface {
TaskID() string
Phpath() string
TotalPics() int
Downloaded() int64
StorageName() string
StoragePath() string
}
func (t *Task) TaskID() string {
return t.ID
}
func (t *Task) Phpath() string {
return t.PhPath
}
func (t *Task) TotalPics() int {
return t.totalpics
}
func (t *Task) Downloaded() int64 {
return t.downloaded.Load()
}
func (t *Task) StorageName() string {
return t.Stor.Name()
}
func (t *Task) StoragePath() string {
return t.StorPath
}

View File

@@ -0,0 +1,13 @@
package telegraph
func shouldUpdateProgress(downloaded int64, total int64) bool {
if total <= 0 || downloaded <= 0 {
return false
}
step := int64(10)
if downloaded < step {
return downloaded == total
}
return downloaded%step == 0 || downloaded == total
}