feat: parse url with js plugins support (#96)

* feat: WIP. add parser functionality and text message handling

* fix: use json to marshal js result

* feat: add metadata handling and version validation for jsParser

* refactor: rename parser package to parsers and restructure parser handling

* refactor: core code struct and impl parse task handle

* feat: impl parsed download

* fix: seek cache file when processing tph picture

* feat: implement parsed task handling and progress tracking

* feat: enhance task processing with concurrency control and progress tracking

* feat: add resource ID generation and improve resource processing handling

* feat: improve message formatting in parsed text and progress completion

* feat: add example js plugin

* feat: implement Twitter parser

* fix: twitter parse video json decode error

* feat: impl stream mode for parse task
This commit is contained in:
Krau
2025-08-21 23:48:17 +08:00
committed by GitHub
parent 79386bdd7d
commit 302db2fe75
47 changed files with 1348 additions and 47 deletions

View File

@@ -72,7 +72,9 @@ func handleAddCallback(ctx *ext.Context, update *ext.Update) error {
}
return shortcut.CreateAndAddTGFileTaskWithEdit(ctx, userID, selectedStorage, dirPath, data.Files[0], msgID)
case tasktype.TaskTypeTphpics:
return shortcut.CreateAndAddTphTaskWithEdit(ctx, userID, data.TphPageNode, data.TphDirPath, data.TphPics, selectedStorage, msgID)
return shortcut.CreateAndAddtelegraphWithEdit(ctx, userID, data.TphPageNode, data.TphDirPath, data.TphPics, selectedStorage, msgID)
case tasktype.TaskTypeParseditem:
shortcut.CreateAndAddParsedTaskWithEdit(ctx, selectedStorage, dirPath, data.ParsedItem, msgID, userID)
default:
log.FromContext(ctx).Errorf("Unsupported task type: %s", data.TaskType)
}

View File

@@ -0,0 +1,104 @@
// 处理任意文本消息, 用于通用地从外部源下载文件
package handlers
import (
"errors"
"github.com/celestix/gotgproto/dispatcher"
"github.com/celestix/gotgproto/ext"
"github.com/charmbracelet/log"
"github.com/gotd/td/tg"
"github.com/krau/SaveAny-Bot/client/bot/handlers/utils/msgelem"
"github.com/krau/SaveAny-Bot/client/bot/handlers/utils/shortcut"
"github.com/krau/SaveAny-Bot/parsers"
"github.com/krau/SaveAny-Bot/pkg/enums/tasktype"
"github.com/krau/SaveAny-Bot/pkg/tcbdata"
"github.com/krau/SaveAny-Bot/storage"
)
func handleTextMessage(ctx *ext.Context, u *ext.Update) error {
logger := log.FromContext(ctx)
text := u.EffectiveMessage.Text
item, err := parsers.ParseWithContext(ctx, text)
if errors.Is(err, parsers.ErrNoParserFound) {
return dispatcher.EndGroups
}
if err != nil {
logger.Error("Failed to parse text", "error", err)
ctx.Reply(u, ext.ReplyTextString("Failed to parse text: "+err.Error()), nil)
return dispatcher.EndGroups
}
logger.Debug("Parsed item from text message", "text", text, "item", item)
userID := u.GetUserChat().GetID()
markup, err := msgelem.BuildAddSelectStorageKeyboard(storage.GetUserStorages(ctx, userID), tcbdata.Add{
TaskType: tasktype.TaskTypeParseditem,
ParsedItem: item,
})
if err != nil {
logger.Errorf("Failed to build storage selection keyboard: %s", err)
ctx.Reply(u, ext.ReplyTextString("Failed to build storage selection keyboard: "+err.Error()), nil)
return dispatcher.EndGroups
}
text, entities, err := msgelem.BuildParsedTextEntity(*item)
if err != nil {
logger.Errorf("Failed to build parsed text entity: %s", err)
ctx.Reply(u, ext.ReplyTextString("Failed to build parsed text entity: "+err.Error()), nil)
return dispatcher.EndGroups
}
ctx.SendMessage(userID, &tg.MessagesSendMessageRequest{
Message: text,
ReplyMarkup: markup,
Entities: entities,
ReplyTo: &tg.InputReplyToMessage{
ReplyToMsgID: u.EffectiveMessage.ID,
ReplyToPeerID: u.GetUserChat().AsInputPeer(),
},
})
return dispatcher.EndGroups
}
func handleSilentSaveText(ctx *ext.Context, u *ext.Update) error {
logger := log.FromContext(ctx)
stor := storage.FromContext(ctx)
if stor == nil {
logger.Warn("Context storage is nil")
ctx.Reply(u, ext.ReplyTextString("未找到存储"), nil)
return dispatcher.EndGroups
}
text := u.EffectiveMessage.Text
if text == "" {
return dispatcher.EndGroups
}
item, err := parsers.ParseWithContext(ctx, text)
if errors.Is(err, parsers.ErrNoParserFound) {
return dispatcher.EndGroups
}
if err != nil {
logger.Error("Failed to parse text", "error", err)
ctx.Reply(u, ext.ReplyTextString("Failed to parse text: "+err.Error()), nil)
return dispatcher.EndGroups
}
logger.Debug("Parsed item from text message", "text", text, "item", item)
userID := u.GetUserChat().GetID()
text, entities, err := msgelem.BuildParsedTextEntity(*item)
if err != nil {
logger.Errorf("Failed to build parsed text entity: %s", err)
ctx.Reply(u, ext.ReplyTextString("Failed to build parsed text entity: "+err.Error()), nil)
return dispatcher.EndGroups
}
msg, err := ctx.SendMessage(userID, &tg.MessagesSendMessageRequest{
Message: text,
Entities: entities,
ReplyTo: &tg.InputReplyToMessage{
ReplyToMsgID: u.EffectiveMessage.ID,
ReplyToPeerID: u.GetUserChat().AsInputPeer(),
},
})
if err != nil {
logger.Errorf("Failed to send message: %s", err)
return dispatcher.EndGroups
}
return shortcut.CreateAndAddParsedTaskWithEdit(ctx, stor, "", item, msg.ID, userID)
}

View File

@@ -16,7 +16,7 @@ import (
"github.com/krau/SaveAny-Bot/common/utils/tgutil"
"github.com/krau/SaveAny-Bot/config"
"github.com/krau/SaveAny-Bot/core"
"github.com/krau/SaveAny-Bot/core/tftask"
"github.com/krau/SaveAny-Bot/core/tasks/tfile"
"github.com/krau/SaveAny-Bot/database"
"github.com/krau/SaveAny-Bot/pkg/tcbdata"
"github.com/krau/SaveAny-Bot/storage"
@@ -54,6 +54,7 @@ func Register(disp dispatcher.Dispatcher) {
}
disp.AddHandler(handlers.NewMessage(telegraphUrlRegexFilter, handleSilentMode(handleTelegraphUrlMessage, handleSilentSaveTelegraph)))
disp.AddHandler(handlers.NewMessage(filters.Message.Media, handleSilentMode(handleMediaMessage, handleSilentSaveMedia)))
disp.AddHandler(handlers.NewMessage(filters.Message.Text, handleSilentMode(handleTextMessage, handleSilentSaveText)))
if config.Cfg.Telegram.Userbot.Enable {
go listenMediaMessageEvent(userclient.GetMediaMessageCh())
@@ -122,7 +123,7 @@ func listenMediaMessageEvent(ch chan userclient.MediaMessageEvent) {
storagePath := stor.JoinStoragePath(path.Join(dirPath, file.Name()))
injectCtx := tgutil.ExtWithContext(ctx.Context, ctx)
taskid := xid.New().String()
task, err := tftask.NewTGFileTask(taskid, injectCtx, file, stor, storagePath, nil)
task, err := tfile.NewTGFileTask(taskid, injectCtx, file, stor, storagePath, nil)
if err != nil {
logger.Errorf("create task failed: %s", err)
continue

View File

@@ -71,6 +71,6 @@ func handleSilentSaveTelegraph(ctx *ext.Context, update *ext.Update) error {
return err
}
userID := update.GetUserChat().GetID()
return shortcut.CreateAndAddTphTaskWithEdit(ctx, userID, result.Page, result.TphDir, result.Pics, stor, msg.ID)
return shortcut.CreateAndAddtelegraphWithEdit(ctx, userID, result.Page, result.TphDir, result.Pics, stor, msg.ID)
}

View File

@@ -0,0 +1,38 @@
package msgelem
import (
"fmt"
"github.com/gotd/td/telegram/message/entity"
"github.com/gotd/td/telegram/message/styling"
"github.com/gotd/td/tg"
"github.com/krau/SaveAny-Bot/pkg/parser"
)
func BuildParsedTextEntity(item parser.Item) (string, []tg.MessageEntityClass, error) {
eb := entity.Builder{}
if err := styling.Perform(&eb,
styling.Bold(fmt.Sprintf("[%s]%s", item.Site, item.Title)),
styling.Plain("\n链接: "),
styling.Code(item.URL),
styling.Plain("\n作者: "),
styling.Code(item.Author),
styling.Plain("\n描述: "),
styling.Code(item.Description),
styling.Plain("\n文件数量: "),
styling.Code(fmt.Sprintf("%d", len(item.Resources))),
styling.Plain("\n预计总大小: "),
styling.Code(fmt.Sprintf("%.2f MB", func() float64 {
var totalSize int64
for _, res := range item.Resources {
totalSize += res.Size
}
return float64(totalSize) / 1024 / 1024
}())),
styling.Plain("\n请选择存储位置"),
); err != nil {
return "", nil, fmt.Errorf("构建消息失败: %w", err)
}
text, entities := eb.Complete()
return text, entities, nil
}

View File

@@ -24,6 +24,8 @@ func BuildAddSelectStorageKeyboard(stors []storage.Storage, adddata tcbdata.Add)
taskType = tasktype.TaskTypeTgfiles
} else if adddata.TphPageNode != nil {
taskType = tasktype.TaskTypeTphpics
} else if adddata.ParsedItem != nil {
taskType = tasktype.TaskTypeParseditem
} else {
return nil, fmt.Errorf("unknown task type: %s", taskType)
}
@@ -41,6 +43,8 @@ func BuildAddSelectStorageKeyboard(stors []storage.Storage, adddata tcbdata.Add)
TphPageNode: adddata.TphPageNode,
TphPics: adddata.TphPics,
TphDirPath: adddata.TphDirPath,
ParsedItem: adddata.ParsedItem,
}
dataid := xid.New().String()
err := cache.Set(dataid, data)

View File

@@ -32,8 +32,7 @@ func GetFileFromMessageWithReply(ctx *ext.Context, update *ext.Update, message *
media := message.Media
supported := mediautil.IsSupported(media)
if !supported {
ctx.Reply(update, ext.ReplyTextString("不支持的消息类型"), nil)
return nil, nil, dispatcher.EndGroups
return nil, nil, dispatcher.ContinueGroups
}
replied, err = ctx.Reply(update, ext.ReplyTextString("正在获取文件信息..."), nil)

View File

@@ -0,0 +1,35 @@
package shortcut
import (
"github.com/celestix/gotgproto/dispatcher"
"github.com/celestix/gotgproto/ext"
"github.com/charmbracelet/log"
"github.com/gotd/td/tg"
"github.com/krau/SaveAny-Bot/client/bot/handlers/utils/msgelem"
"github.com/krau/SaveAny-Bot/common/utils/tgutil"
"github.com/krau/SaveAny-Bot/core"
"github.com/krau/SaveAny-Bot/core/tasks/parsed"
"github.com/krau/SaveAny-Bot/pkg/parser"
"github.com/krau/SaveAny-Bot/storage"
"github.com/rs/xid"
)
func CreateAndAddParsedTaskWithEdit(ctx *ext.Context, stor storage.Storage, dirPath string, item *parser.Item, msgID int, userID int64) error {
injectCtx := tgutil.ExtWithContext(ctx.Context, ctx)
task := parsed.NewTask(xid.New().String(), injectCtx, stor, stor.JoinStoragePath(dirPath), item, parsed.NewProgress(msgID, userID))
if err := core.AddTask(injectCtx, task); err != nil {
log.FromContext(ctx).Errorf("Failed to add task: %s", err)
ctx.EditMessage(userID, &tg.MessagesEditMessageRequest{
ID: msgID,
Message: "任务添加失败: " + err.Error(),
})
return dispatcher.EndGroups
}
text, entities := msgelem.BuildTaskAddedEntities(ctx, item.Title, core.GetLength(ctx))
ctx.EditMessage(userID, &tg.MessagesEditMessageRequest{
ID: msgID,
Message: text,
Entities: entities,
})
return dispatcher.EndGroups
}

View File

@@ -13,15 +13,15 @@ import (
"github.com/krau/SaveAny-Bot/client/bot/handlers/utils/ruleutil"
"github.com/krau/SaveAny-Bot/common/utils/tgutil"
"github.com/krau/SaveAny-Bot/core"
"github.com/krau/SaveAny-Bot/core/batchtftask"
"github.com/krau/SaveAny-Bot/core/tftask"
"github.com/krau/SaveAny-Bot/core/tasks/batchtfile"
tftask "github.com/krau/SaveAny-Bot/core/tasks/tfile"
"github.com/krau/SaveAny-Bot/database"
"github.com/krau/SaveAny-Bot/pkg/tfile"
"github.com/krau/SaveAny-Bot/storage"
"github.com/rs/xid"
)
// 创建一个 tftask.TGFileTask 并添加到任务队列中, 以编辑消息的方式反馈结果
// 创建一个 tfile.TGFileTask 并添加到任务队列中, 以编辑消息的方式反馈结果
func CreateAndAddTGFileTaskWithEdit(ctx *ext.Context, userID int64, stor storage.Storage, dirPath string, file tfile.TGFileMessage, trackMsgID int) error {
logger := log.FromContext(ctx)
user, err := database.GetUserByChatID(ctx, userID)
@@ -82,7 +82,7 @@ func CreateAndAddTGFileTaskWithEdit(ctx *ext.Context, userID int64, stor storage
return dispatcher.EndGroups
}
// 创建一个 batchtftask.BatchTGFileTask 并添加到任务队列中, 以编辑消息的方式反馈结果
// 创建一个 batchtfile.BatchTGFileTask 并添加到任务队列中, 以编辑消息的方式反馈结果
func CreateAndAddBatchTGFileTaskWithEdit(ctx *ext.Context, userID int64, stor storage.Storage, dirPath string, files []tfile.TGFileMessage, trackMsgID int) error {
logger := log.FromContext(ctx)
user, err := database.GetUserByChatID(ctx, userID)
@@ -110,7 +110,7 @@ func CreateAndAddBatchTGFileTaskWithEdit(ctx *ext.Context, userID int64, stor st
return storname, dirP
}
elems := make([]batchtftask.TaskElement, 0, len(files))
elems := make([]batchtfile.TaskElement, 0, len(files))
type albumFile struct {
file tfile.TGFileMessage
storage storage.Storage
@@ -132,7 +132,7 @@ func CreateAndAddBatchTGFileTaskWithEdit(ctx *ext.Context, userID int64, stor st
}
if !dirPath.NeedNewForAlbum() {
storPath := fileStor.JoinStoragePath(path.Join(dirPath.String(), file.Name()))
elem, err := batchtftask.NewTaskElement(fileStor, storPath, file)
elem, err := batchtfile.NewTaskElement(fileStor, storPath, file)
if err != nil {
logger.Errorf("Failed to create task element: %s", err)
ctx.EditMessage(userID, &tg.MessagesEditMessageRequest{
@@ -167,7 +167,7 @@ func CreateAndAddBatchTGFileTaskWithEdit(ctx *ext.Context, userID int64, stor st
albumStor := afiles[0].storage
for _, af := range afiles {
afstorPath := af.storage.JoinStoragePath(path.Join(dirPath, albumDir, af.file.Name()))
elem, err := batchtftask.NewTaskElement(albumStor, afstorPath, af.file)
elem, err := batchtfile.NewTaskElement(albumStor, afstorPath, af.file)
if err != nil {
logger.Errorf("Failed to create task element for album file: %s", err)
ctx.EditMessage(userID, &tg.MessagesEditMessageRequest{
@@ -182,7 +182,7 @@ func CreateAndAddBatchTGFileTaskWithEdit(ctx *ext.Context, userID int64, stor st
injectCtx := tgutil.ExtWithContext(ctx.Context, ctx)
taskid := xid.New().String()
task := batchtftask.NewBatchTGFileTask(taskid, injectCtx, elems, batchtftask.NewProgressTracker(trackMsgID, userID), true)
task := batchtfile.NewBatchTGFileTask(taskid, injectCtx, elems, batchtfile.NewProgressTracker(trackMsgID, userID), true)
if err := core.AddTask(injectCtx, task); err != nil {
logger.Errorf("Failed to add batch task: %s", err)
ctx.EditMessage(userID, &tg.MessagesEditMessageRequest{

View File

@@ -9,19 +9,21 @@ import (
"github.com/krau/SaveAny-Bot/common/utils/tgutil"
"github.com/krau/SaveAny-Bot/common/utils/tphutil"
"github.com/krau/SaveAny-Bot/core"
"github.com/krau/SaveAny-Bot/core/tphtask"
tphtask "github.com/krau/SaveAny-Bot/core/tasks/telegraph"
"github.com/krau/SaveAny-Bot/pkg/telegraph"
"github.com/krau/SaveAny-Bot/storage"
"github.com/rs/xid"
)
func CreateAndAddTphTaskWithEdit(ctx *ext.Context,
func CreateAndAddtelegraphWithEdit(
ctx *ext.Context,
userID int64,
tphpage *telegraph.Page,
dirPath string, // unescaped ph path for file storage
pics []string,
stor storage.Storage,
trackMsgID int) error {
injectCtx := tgutil.ExtWithContext(ctx.Context, ctx)
task := tphtask.NewTask(xid.New().String(),
injectCtx,