feat: parse url with js plugins support (#96)
* feat: WIP. add parser functionality and text message handling * fix: use json to marshal js result * feat: add metadata handling and version validation for jsParser * refactor: rename parser package to parsers and restructure parser handling * refactor: core code struct and impl parse task handle * feat: impl parsed download * fix: seek cache file when processing tph picture * feat: implement parsed task handling and progress tracking * feat: enhance task processing with concurrency control and progress tracking * feat: add resource ID generation and improve resource processing handling * feat: improve message formatting in parsed text and progress completion * feat: add example js plugin * feat: implement Twitter parser * fix: twitter parse video json decode error * feat: impl stream mode for parse task
This commit is contained in:
@@ -72,7 +72,9 @@ func handleAddCallback(ctx *ext.Context, update *ext.Update) error {
|
||||
}
|
||||
return shortcut.CreateAndAddTGFileTaskWithEdit(ctx, userID, selectedStorage, dirPath, data.Files[0], msgID)
|
||||
case tasktype.TaskTypeTphpics:
|
||||
return shortcut.CreateAndAddTphTaskWithEdit(ctx, userID, data.TphPageNode, data.TphDirPath, data.TphPics, selectedStorage, msgID)
|
||||
return shortcut.CreateAndAddtelegraphWithEdit(ctx, userID, data.TphPageNode, data.TphDirPath, data.TphPics, selectedStorage, msgID)
|
||||
case tasktype.TaskTypeParseditem:
|
||||
shortcut.CreateAndAddParsedTaskWithEdit(ctx, selectedStorage, dirPath, data.ParsedItem, msgID, userID)
|
||||
default:
|
||||
log.FromContext(ctx).Errorf("Unsupported task type: %s", data.TaskType)
|
||||
}
|
||||
|
||||
104
client/bot/handlers/parse.go
Normal file
104
client/bot/handlers/parse.go
Normal file
@@ -0,0 +1,104 @@
|
||||
// 处理任意文本消息, 用于通用地从外部源下载文件
|
||||
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"errors"
|
||||
|
||||
"github.com/celestix/gotgproto/dispatcher"
|
||||
"github.com/celestix/gotgproto/ext"
|
||||
"github.com/charmbracelet/log"
|
||||
"github.com/gotd/td/tg"
|
||||
"github.com/krau/SaveAny-Bot/client/bot/handlers/utils/msgelem"
|
||||
"github.com/krau/SaveAny-Bot/client/bot/handlers/utils/shortcut"
|
||||
"github.com/krau/SaveAny-Bot/parsers"
|
||||
"github.com/krau/SaveAny-Bot/pkg/enums/tasktype"
|
||||
"github.com/krau/SaveAny-Bot/pkg/tcbdata"
|
||||
"github.com/krau/SaveAny-Bot/storage"
|
||||
)
|
||||
|
||||
func handleTextMessage(ctx *ext.Context, u *ext.Update) error {
|
||||
logger := log.FromContext(ctx)
|
||||
text := u.EffectiveMessage.Text
|
||||
item, err := parsers.ParseWithContext(ctx, text)
|
||||
if errors.Is(err, parsers.ErrNoParserFound) {
|
||||
return dispatcher.EndGroups
|
||||
}
|
||||
if err != nil {
|
||||
logger.Error("Failed to parse text", "error", err)
|
||||
ctx.Reply(u, ext.ReplyTextString("Failed to parse text: "+err.Error()), nil)
|
||||
return dispatcher.EndGroups
|
||||
}
|
||||
logger.Debug("Parsed item from text message", "text", text, "item", item)
|
||||
userID := u.GetUserChat().GetID()
|
||||
markup, err := msgelem.BuildAddSelectStorageKeyboard(storage.GetUserStorages(ctx, userID), tcbdata.Add{
|
||||
TaskType: tasktype.TaskTypeParseditem,
|
||||
ParsedItem: item,
|
||||
})
|
||||
if err != nil {
|
||||
logger.Errorf("Failed to build storage selection keyboard: %s", err)
|
||||
ctx.Reply(u, ext.ReplyTextString("Failed to build storage selection keyboard: "+err.Error()), nil)
|
||||
return dispatcher.EndGroups
|
||||
}
|
||||
text, entities, err := msgelem.BuildParsedTextEntity(*item)
|
||||
if err != nil {
|
||||
logger.Errorf("Failed to build parsed text entity: %s", err)
|
||||
ctx.Reply(u, ext.ReplyTextString("Failed to build parsed text entity: "+err.Error()), nil)
|
||||
return dispatcher.EndGroups
|
||||
}
|
||||
ctx.SendMessage(userID, &tg.MessagesSendMessageRequest{
|
||||
Message: text,
|
||||
ReplyMarkup: markup,
|
||||
Entities: entities,
|
||||
ReplyTo: &tg.InputReplyToMessage{
|
||||
ReplyToMsgID: u.EffectiveMessage.ID,
|
||||
ReplyToPeerID: u.GetUserChat().AsInputPeer(),
|
||||
},
|
||||
})
|
||||
|
||||
return dispatcher.EndGroups
|
||||
}
|
||||
|
||||
func handleSilentSaveText(ctx *ext.Context, u *ext.Update) error {
|
||||
logger := log.FromContext(ctx)
|
||||
stor := storage.FromContext(ctx)
|
||||
if stor == nil {
|
||||
logger.Warn("Context storage is nil")
|
||||
ctx.Reply(u, ext.ReplyTextString("未找到存储"), nil)
|
||||
return dispatcher.EndGroups
|
||||
}
|
||||
text := u.EffectiveMessage.Text
|
||||
if text == "" {
|
||||
return dispatcher.EndGroups
|
||||
}
|
||||
item, err := parsers.ParseWithContext(ctx, text)
|
||||
if errors.Is(err, parsers.ErrNoParserFound) {
|
||||
return dispatcher.EndGroups
|
||||
}
|
||||
if err != nil {
|
||||
logger.Error("Failed to parse text", "error", err)
|
||||
ctx.Reply(u, ext.ReplyTextString("Failed to parse text: "+err.Error()), nil)
|
||||
return dispatcher.EndGroups
|
||||
}
|
||||
logger.Debug("Parsed item from text message", "text", text, "item", item)
|
||||
userID := u.GetUserChat().GetID()
|
||||
text, entities, err := msgelem.BuildParsedTextEntity(*item)
|
||||
if err != nil {
|
||||
logger.Errorf("Failed to build parsed text entity: %s", err)
|
||||
ctx.Reply(u, ext.ReplyTextString("Failed to build parsed text entity: "+err.Error()), nil)
|
||||
return dispatcher.EndGroups
|
||||
}
|
||||
msg, err := ctx.SendMessage(userID, &tg.MessagesSendMessageRequest{
|
||||
Message: text,
|
||||
Entities: entities,
|
||||
ReplyTo: &tg.InputReplyToMessage{
|
||||
ReplyToMsgID: u.EffectiveMessage.ID,
|
||||
ReplyToPeerID: u.GetUserChat().AsInputPeer(),
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
logger.Errorf("Failed to send message: %s", err)
|
||||
return dispatcher.EndGroups
|
||||
}
|
||||
return shortcut.CreateAndAddParsedTaskWithEdit(ctx, stor, "", item, msg.ID, userID)
|
||||
}
|
||||
@@ -16,7 +16,7 @@ import (
|
||||
"github.com/krau/SaveAny-Bot/common/utils/tgutil"
|
||||
"github.com/krau/SaveAny-Bot/config"
|
||||
"github.com/krau/SaveAny-Bot/core"
|
||||
"github.com/krau/SaveAny-Bot/core/tftask"
|
||||
"github.com/krau/SaveAny-Bot/core/tasks/tfile"
|
||||
"github.com/krau/SaveAny-Bot/database"
|
||||
"github.com/krau/SaveAny-Bot/pkg/tcbdata"
|
||||
"github.com/krau/SaveAny-Bot/storage"
|
||||
@@ -54,6 +54,7 @@ func Register(disp dispatcher.Dispatcher) {
|
||||
}
|
||||
disp.AddHandler(handlers.NewMessage(telegraphUrlRegexFilter, handleSilentMode(handleTelegraphUrlMessage, handleSilentSaveTelegraph)))
|
||||
disp.AddHandler(handlers.NewMessage(filters.Message.Media, handleSilentMode(handleMediaMessage, handleSilentSaveMedia)))
|
||||
disp.AddHandler(handlers.NewMessage(filters.Message.Text, handleSilentMode(handleTextMessage, handleSilentSaveText)))
|
||||
|
||||
if config.Cfg.Telegram.Userbot.Enable {
|
||||
go listenMediaMessageEvent(userclient.GetMediaMessageCh())
|
||||
@@ -122,7 +123,7 @@ func listenMediaMessageEvent(ch chan userclient.MediaMessageEvent) {
|
||||
storagePath := stor.JoinStoragePath(path.Join(dirPath, file.Name()))
|
||||
injectCtx := tgutil.ExtWithContext(ctx.Context, ctx)
|
||||
taskid := xid.New().String()
|
||||
task, err := tftask.NewTGFileTask(taskid, injectCtx, file, stor, storagePath, nil)
|
||||
task, err := tfile.NewTGFileTask(taskid, injectCtx, file, stor, storagePath, nil)
|
||||
if err != nil {
|
||||
logger.Errorf("create task failed: %s", err)
|
||||
continue
|
||||
|
||||
@@ -71,6 +71,6 @@ func handleSilentSaveTelegraph(ctx *ext.Context, update *ext.Update) error {
|
||||
return err
|
||||
}
|
||||
userID := update.GetUserChat().GetID()
|
||||
return shortcut.CreateAndAddTphTaskWithEdit(ctx, userID, result.Page, result.TphDir, result.Pics, stor, msg.ID)
|
||||
return shortcut.CreateAndAddtelegraphWithEdit(ctx, userID, result.Page, result.TphDir, result.Pics, stor, msg.ID)
|
||||
|
||||
}
|
||||
|
||||
38
client/bot/handlers/utils/msgelem/parse.go
Normal file
38
client/bot/handlers/utils/msgelem/parse.go
Normal file
@@ -0,0 +1,38 @@
|
||||
package msgelem
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/gotd/td/telegram/message/entity"
|
||||
"github.com/gotd/td/telegram/message/styling"
|
||||
"github.com/gotd/td/tg"
|
||||
"github.com/krau/SaveAny-Bot/pkg/parser"
|
||||
)
|
||||
|
||||
func BuildParsedTextEntity(item parser.Item) (string, []tg.MessageEntityClass, error) {
|
||||
eb := entity.Builder{}
|
||||
if err := styling.Perform(&eb,
|
||||
styling.Bold(fmt.Sprintf("[%s]%s", item.Site, item.Title)),
|
||||
styling.Plain("\n链接: "),
|
||||
styling.Code(item.URL),
|
||||
styling.Plain("\n作者: "),
|
||||
styling.Code(item.Author),
|
||||
styling.Plain("\n描述: "),
|
||||
styling.Code(item.Description),
|
||||
styling.Plain("\n文件数量: "),
|
||||
styling.Code(fmt.Sprintf("%d", len(item.Resources))),
|
||||
styling.Plain("\n预计总大小: "),
|
||||
styling.Code(fmt.Sprintf("%.2f MB", func() float64 {
|
||||
var totalSize int64
|
||||
for _, res := range item.Resources {
|
||||
totalSize += res.Size
|
||||
}
|
||||
return float64(totalSize) / 1024 / 1024
|
||||
}())),
|
||||
styling.Plain("\n请选择存储位置"),
|
||||
); err != nil {
|
||||
return "", nil, fmt.Errorf("构建消息失败: %w", err)
|
||||
}
|
||||
text, entities := eb.Complete()
|
||||
return text, entities, nil
|
||||
}
|
||||
@@ -24,6 +24,8 @@ func BuildAddSelectStorageKeyboard(stors []storage.Storage, adddata tcbdata.Add)
|
||||
taskType = tasktype.TaskTypeTgfiles
|
||||
} else if adddata.TphPageNode != nil {
|
||||
taskType = tasktype.TaskTypeTphpics
|
||||
} else if adddata.ParsedItem != nil {
|
||||
taskType = tasktype.TaskTypeParseditem
|
||||
} else {
|
||||
return nil, fmt.Errorf("unknown task type: %s", taskType)
|
||||
}
|
||||
@@ -41,6 +43,8 @@ func BuildAddSelectStorageKeyboard(stors []storage.Storage, adddata tcbdata.Add)
|
||||
TphPageNode: adddata.TphPageNode,
|
||||
TphPics: adddata.TphPics,
|
||||
TphDirPath: adddata.TphDirPath,
|
||||
|
||||
ParsedItem: adddata.ParsedItem,
|
||||
}
|
||||
dataid := xid.New().String()
|
||||
err := cache.Set(dataid, data)
|
||||
|
||||
@@ -32,8 +32,7 @@ func GetFileFromMessageWithReply(ctx *ext.Context, update *ext.Update, message *
|
||||
media := message.Media
|
||||
supported := mediautil.IsSupported(media)
|
||||
if !supported {
|
||||
ctx.Reply(update, ext.ReplyTextString("不支持的消息类型"), nil)
|
||||
return nil, nil, dispatcher.EndGroups
|
||||
return nil, nil, dispatcher.ContinueGroups
|
||||
}
|
||||
|
||||
replied, err = ctx.Reply(update, ext.ReplyTextString("正在获取文件信息..."), nil)
|
||||
|
||||
35
client/bot/handlers/utils/shortcut/parsed.go
Normal file
35
client/bot/handlers/utils/shortcut/parsed.go
Normal file
@@ -0,0 +1,35 @@
|
||||
package shortcut
|
||||
|
||||
import (
|
||||
"github.com/celestix/gotgproto/dispatcher"
|
||||
"github.com/celestix/gotgproto/ext"
|
||||
"github.com/charmbracelet/log"
|
||||
"github.com/gotd/td/tg"
|
||||
"github.com/krau/SaveAny-Bot/client/bot/handlers/utils/msgelem"
|
||||
"github.com/krau/SaveAny-Bot/common/utils/tgutil"
|
||||
"github.com/krau/SaveAny-Bot/core"
|
||||
"github.com/krau/SaveAny-Bot/core/tasks/parsed"
|
||||
"github.com/krau/SaveAny-Bot/pkg/parser"
|
||||
"github.com/krau/SaveAny-Bot/storage"
|
||||
"github.com/rs/xid"
|
||||
)
|
||||
|
||||
func CreateAndAddParsedTaskWithEdit(ctx *ext.Context, stor storage.Storage, dirPath string, item *parser.Item, msgID int, userID int64) error {
|
||||
injectCtx := tgutil.ExtWithContext(ctx.Context, ctx)
|
||||
task := parsed.NewTask(xid.New().String(), injectCtx, stor, stor.JoinStoragePath(dirPath), item, parsed.NewProgress(msgID, userID))
|
||||
if err := core.AddTask(injectCtx, task); err != nil {
|
||||
log.FromContext(ctx).Errorf("Failed to add task: %s", err)
|
||||
ctx.EditMessage(userID, &tg.MessagesEditMessageRequest{
|
||||
ID: msgID,
|
||||
Message: "任务添加失败: " + err.Error(),
|
||||
})
|
||||
return dispatcher.EndGroups
|
||||
}
|
||||
text, entities := msgelem.BuildTaskAddedEntities(ctx, item.Title, core.GetLength(ctx))
|
||||
ctx.EditMessage(userID, &tg.MessagesEditMessageRequest{
|
||||
ID: msgID,
|
||||
Message: text,
|
||||
Entities: entities,
|
||||
})
|
||||
return dispatcher.EndGroups
|
||||
}
|
||||
@@ -13,15 +13,15 @@ import (
|
||||
"github.com/krau/SaveAny-Bot/client/bot/handlers/utils/ruleutil"
|
||||
"github.com/krau/SaveAny-Bot/common/utils/tgutil"
|
||||
"github.com/krau/SaveAny-Bot/core"
|
||||
"github.com/krau/SaveAny-Bot/core/batchtftask"
|
||||
"github.com/krau/SaveAny-Bot/core/tftask"
|
||||
"github.com/krau/SaveAny-Bot/core/tasks/batchtfile"
|
||||
tftask "github.com/krau/SaveAny-Bot/core/tasks/tfile"
|
||||
"github.com/krau/SaveAny-Bot/database"
|
||||
"github.com/krau/SaveAny-Bot/pkg/tfile"
|
||||
"github.com/krau/SaveAny-Bot/storage"
|
||||
"github.com/rs/xid"
|
||||
)
|
||||
|
||||
// 创建一个 tftask.TGFileTask 并添加到任务队列中, 以编辑消息的方式反馈结果
|
||||
// 创建一个 tfile.TGFileTask 并添加到任务队列中, 以编辑消息的方式反馈结果
|
||||
func CreateAndAddTGFileTaskWithEdit(ctx *ext.Context, userID int64, stor storage.Storage, dirPath string, file tfile.TGFileMessage, trackMsgID int) error {
|
||||
logger := log.FromContext(ctx)
|
||||
user, err := database.GetUserByChatID(ctx, userID)
|
||||
@@ -82,7 +82,7 @@ func CreateAndAddTGFileTaskWithEdit(ctx *ext.Context, userID int64, stor storage
|
||||
return dispatcher.EndGroups
|
||||
}
|
||||
|
||||
// 创建一个 batchtftask.BatchTGFileTask 并添加到任务队列中, 以编辑消息的方式反馈结果
|
||||
// 创建一个 batchtfile.BatchTGFileTask 并添加到任务队列中, 以编辑消息的方式反馈结果
|
||||
func CreateAndAddBatchTGFileTaskWithEdit(ctx *ext.Context, userID int64, stor storage.Storage, dirPath string, files []tfile.TGFileMessage, trackMsgID int) error {
|
||||
logger := log.FromContext(ctx)
|
||||
user, err := database.GetUserByChatID(ctx, userID)
|
||||
@@ -110,7 +110,7 @@ func CreateAndAddBatchTGFileTaskWithEdit(ctx *ext.Context, userID int64, stor st
|
||||
return storname, dirP
|
||||
}
|
||||
|
||||
elems := make([]batchtftask.TaskElement, 0, len(files))
|
||||
elems := make([]batchtfile.TaskElement, 0, len(files))
|
||||
type albumFile struct {
|
||||
file tfile.TGFileMessage
|
||||
storage storage.Storage
|
||||
@@ -132,7 +132,7 @@ func CreateAndAddBatchTGFileTaskWithEdit(ctx *ext.Context, userID int64, stor st
|
||||
}
|
||||
if !dirPath.NeedNewForAlbum() {
|
||||
storPath := fileStor.JoinStoragePath(path.Join(dirPath.String(), file.Name()))
|
||||
elem, err := batchtftask.NewTaskElement(fileStor, storPath, file)
|
||||
elem, err := batchtfile.NewTaskElement(fileStor, storPath, file)
|
||||
if err != nil {
|
||||
logger.Errorf("Failed to create task element: %s", err)
|
||||
ctx.EditMessage(userID, &tg.MessagesEditMessageRequest{
|
||||
@@ -167,7 +167,7 @@ func CreateAndAddBatchTGFileTaskWithEdit(ctx *ext.Context, userID int64, stor st
|
||||
albumStor := afiles[0].storage
|
||||
for _, af := range afiles {
|
||||
afstorPath := af.storage.JoinStoragePath(path.Join(dirPath, albumDir, af.file.Name()))
|
||||
elem, err := batchtftask.NewTaskElement(albumStor, afstorPath, af.file)
|
||||
elem, err := batchtfile.NewTaskElement(albumStor, afstorPath, af.file)
|
||||
if err != nil {
|
||||
logger.Errorf("Failed to create task element for album file: %s", err)
|
||||
ctx.EditMessage(userID, &tg.MessagesEditMessageRequest{
|
||||
@@ -182,7 +182,7 @@ func CreateAndAddBatchTGFileTaskWithEdit(ctx *ext.Context, userID int64, stor st
|
||||
|
||||
injectCtx := tgutil.ExtWithContext(ctx.Context, ctx)
|
||||
taskid := xid.New().String()
|
||||
task := batchtftask.NewBatchTGFileTask(taskid, injectCtx, elems, batchtftask.NewProgressTracker(trackMsgID, userID), true)
|
||||
task := batchtfile.NewBatchTGFileTask(taskid, injectCtx, elems, batchtfile.NewProgressTracker(trackMsgID, userID), true)
|
||||
if err := core.AddTask(injectCtx, task); err != nil {
|
||||
logger.Errorf("Failed to add batch task: %s", err)
|
||||
ctx.EditMessage(userID, &tg.MessagesEditMessageRequest{
|
||||
|
||||
@@ -9,19 +9,21 @@ import (
|
||||
"github.com/krau/SaveAny-Bot/common/utils/tgutil"
|
||||
"github.com/krau/SaveAny-Bot/common/utils/tphutil"
|
||||
"github.com/krau/SaveAny-Bot/core"
|
||||
"github.com/krau/SaveAny-Bot/core/tphtask"
|
||||
tphtask "github.com/krau/SaveAny-Bot/core/tasks/telegraph"
|
||||
"github.com/krau/SaveAny-Bot/pkg/telegraph"
|
||||
"github.com/krau/SaveAny-Bot/storage"
|
||||
"github.com/rs/xid"
|
||||
)
|
||||
|
||||
func CreateAndAddTphTaskWithEdit(ctx *ext.Context,
|
||||
func CreateAndAddtelegraphWithEdit(
|
||||
ctx *ext.Context,
|
||||
userID int64,
|
||||
tphpage *telegraph.Page,
|
||||
dirPath string, // unescaped ph path for file storage
|
||||
pics []string,
|
||||
stor storage.Storage,
|
||||
trackMsgID int) error {
|
||||
|
||||
injectCtx := tgutil.ExtWithContext(ctx.Context, ctx)
|
||||
task := tphtask.NewTask(xid.New().String(),
|
||||
injectCtx,
|
||||
|
||||
Reference in New Issue
Block a user