From 302db2fe756ec38d41d0aaae4e51fa673e73ea00 Mon Sep 17 00:00:00 2001 From: Krau <71133316+krau@users.noreply.github.com> Date: Thu, 21 Aug 2025 23:48:17 +0800 Subject: [PATCH] feat: parse url with js plugins support (#96) * feat: WIP. add parser functionality and text message handling * fix: use json to marshal js result * feat: add metadata handling and version validation for jsParser * refactor: rename parser package to parsers and restructure parser handling * refactor: core code struct and impl parse task handle * feat: impl parsed download * fix: seek cache file when processing tph picture * feat: implement parsed task handling and progress tracking * feat: enhance task processing with concurrency control and progress tracking * feat: add resource ID generation and improve resource processing handling * feat: improve message formatting in parsed text and progress completion * feat: add example js plugin * feat: implement Twitter parser * fix: twitter parse video json decode error * feat: impl stream mode for parse task --- .gitignore | 2 +- client/bot/handlers/add_task.go | 4 +- client/bot/handlers/parse.go | 104 +++++++++ client/bot/handlers/register.go | 5 +- client/bot/handlers/telegraph.go | 2 +- client/bot/handlers/utils/msgelem/parse.go | 38 ++++ client/bot/handlers/utils/msgelem/storage.go | 4 + client/bot/handlers/utils/shortcut/message.go | 3 +- client/bot/handlers/utils/shortcut/parsed.go | 35 +++ client/bot/handlers/utils/shortcut/tftask.go | 16 +- client/bot/handlers/utils/shortcut/tphtask.go | 6 +- cmd/run.go | 10 + config/parser.go | 6 + config/tg.go | 2 +- config/viper.go | 1 + .../batchtfile}/execute.go | 9 +- .../batchtfile}/progress.go | 2 +- .../{batchtftask => tasks/batchtfile}/task.go | 7 +- .../batchtfile}/taskinfo.go | 2 +- .../batchtfile}/utils.go | 2 +- core/tasks/parsed/execute.go | 139 ++++++++++++ core/tasks/parsed/progress.go | 209 ++++++++++++++++++ core/tasks/parsed/task.go | 84 +++++++ core/tasks/parsed/taskinfo.go | 51 +++++ core/{tphtask => tasks/telegraph}/execute.go | 9 +- core/{tphtask => tasks/telegraph}/progress.go | 2 +- core/{tphtask => tasks/telegraph}/task.go | 6 +- core/{tphtask => tasks/telegraph}/taskinfo.go | 2 +- core/{tphtask => tasks/telegraph}/utils.go | 2 +- core/{tftask => tasks/tfile}/execute.go | 2 +- core/{tftask => tasks/tfile}/progress.go | 2 +- core/{tftask => tasks/tfile}/stream.go | 2 +- core/{tftask => tasks/tfile}/taskinfo.go | 2 +- core/{tftask => tasks/tfile}/tftask.go | 6 +- core/{tftask => tasks/tfile}/util.go | 2 +- core/{tftask => tasks/tfile}/writer.go | 2 +- go.mod | 5 +- go.sum | 6 + parsers/js.go | 184 +++++++++++++++ parsers/parser.go | 65 ++++++ parsers/twitter/parser.go | 83 +++++++ parsers/twitter/types.go | 122 ++++++++++ pkg/enums/tasktype/tasktype.go | 2 +- pkg/enums/tasktype/tasktype_enum.go | 9 +- pkg/parser/parser.go | 63 ++++++ pkg/tcbdata/data.go | 4 + plugins/example_parser.js | 70 ++++++ 47 files changed, 1348 insertions(+), 47 deletions(-) create mode 100644 client/bot/handlers/parse.go create mode 100644 client/bot/handlers/utils/msgelem/parse.go create mode 100644 client/bot/handlers/utils/shortcut/parsed.go create mode 100644 config/parser.go rename core/{batchtftask => tasks/batchtfile}/execute.go (95%) rename core/{batchtftask => tasks/batchtfile}/progress.go (99%) rename core/{batchtftask => tasks/batchtfile}/task.go (92%) rename core/{batchtftask => tasks/batchtfile}/taskinfo.go (97%) rename core/{batchtftask => tasks/batchtfile}/utils.go (97%) create mode 100644 core/tasks/parsed/execute.go create mode 100644 core/tasks/parsed/progress.go create mode 100644 core/tasks/parsed/task.go create mode 100644 core/tasks/parsed/taskinfo.go rename core/{tphtask => tasks/telegraph}/execute.go (93%) rename core/{tphtask => tasks/telegraph}/progress.go (99%) rename core/{tphtask => tasks/telegraph}/task.go (94%) rename core/{tphtask => tasks/telegraph}/taskinfo.go (96%) rename core/{tphtask => tasks/telegraph}/utils.go (93%) rename core/{tftask => tasks/tfile}/execute.go (99%) rename core/{tftask => tasks/tfile}/progress.go (99%) rename core/{tftask => tasks/tfile}/stream.go (98%) rename core/{tftask => tasks/tfile}/taskinfo.go (96%) rename core/{tftask => tasks/tfile}/tftask.go (96%) rename core/{tftask => tasks/tfile}/util.go (97%) rename core/{tftask => tasks/tfile}/writer.go (99%) create mode 100644 parsers/js.go create mode 100644 parsers/parser.go create mode 100644 parsers/twitter/parser.go create mode 100644 parsers/twitter/types.go create mode 100644 pkg/parser/parser.go create mode 100644 plugins/example_parser.js diff --git a/.gitignore b/.gitignore index 3b51b80..81eb713 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,4 @@ session.* cache.db .vscode/ temp/ -.hugo_build.lock \ No newline at end of file +.hugo_build.lock diff --git a/client/bot/handlers/add_task.go b/client/bot/handlers/add_task.go index 40c348c..578a2c4 100644 --- a/client/bot/handlers/add_task.go +++ b/client/bot/handlers/add_task.go @@ -72,7 +72,9 @@ func handleAddCallback(ctx *ext.Context, update *ext.Update) error { } return shortcut.CreateAndAddTGFileTaskWithEdit(ctx, userID, selectedStorage, dirPath, data.Files[0], msgID) case tasktype.TaskTypeTphpics: - return shortcut.CreateAndAddTphTaskWithEdit(ctx, userID, data.TphPageNode, data.TphDirPath, data.TphPics, selectedStorage, msgID) + return shortcut.CreateAndAddtelegraphWithEdit(ctx, userID, data.TphPageNode, data.TphDirPath, data.TphPics, selectedStorage, msgID) + case tasktype.TaskTypeParseditem: + shortcut.CreateAndAddParsedTaskWithEdit(ctx, selectedStorage, dirPath, data.ParsedItem, msgID, userID) default: log.FromContext(ctx).Errorf("Unsupported task type: %s", data.TaskType) } diff --git a/client/bot/handlers/parse.go b/client/bot/handlers/parse.go new file mode 100644 index 0000000..a732c19 --- /dev/null +++ b/client/bot/handlers/parse.go @@ -0,0 +1,104 @@ +// 处理任意文本消息, 用于通用地从外部源下载文件 + +package handlers + +import ( + "errors" + + "github.com/celestix/gotgproto/dispatcher" + "github.com/celestix/gotgproto/ext" + "github.com/charmbracelet/log" + "github.com/gotd/td/tg" + "github.com/krau/SaveAny-Bot/client/bot/handlers/utils/msgelem" + "github.com/krau/SaveAny-Bot/client/bot/handlers/utils/shortcut" + "github.com/krau/SaveAny-Bot/parsers" + "github.com/krau/SaveAny-Bot/pkg/enums/tasktype" + "github.com/krau/SaveAny-Bot/pkg/tcbdata" + "github.com/krau/SaveAny-Bot/storage" +) + +func handleTextMessage(ctx *ext.Context, u *ext.Update) error { + logger := log.FromContext(ctx) + text := u.EffectiveMessage.Text + item, err := parsers.ParseWithContext(ctx, text) + if errors.Is(err, parsers.ErrNoParserFound) { + return dispatcher.EndGroups + } + if err != nil { + logger.Error("Failed to parse text", "error", err) + ctx.Reply(u, ext.ReplyTextString("Failed to parse text: "+err.Error()), nil) + return dispatcher.EndGroups + } + logger.Debug("Parsed item from text message", "text", text, "item", item) + userID := u.GetUserChat().GetID() + markup, err := msgelem.BuildAddSelectStorageKeyboard(storage.GetUserStorages(ctx, userID), tcbdata.Add{ + TaskType: tasktype.TaskTypeParseditem, + ParsedItem: item, + }) + if err != nil { + logger.Errorf("Failed to build storage selection keyboard: %s", err) + ctx.Reply(u, ext.ReplyTextString("Failed to build storage selection keyboard: "+err.Error()), nil) + return dispatcher.EndGroups + } + text, entities, err := msgelem.BuildParsedTextEntity(*item) + if err != nil { + logger.Errorf("Failed to build parsed text entity: %s", err) + ctx.Reply(u, ext.ReplyTextString("Failed to build parsed text entity: "+err.Error()), nil) + return dispatcher.EndGroups + } + ctx.SendMessage(userID, &tg.MessagesSendMessageRequest{ + Message: text, + ReplyMarkup: markup, + Entities: entities, + ReplyTo: &tg.InputReplyToMessage{ + ReplyToMsgID: u.EffectiveMessage.ID, + ReplyToPeerID: u.GetUserChat().AsInputPeer(), + }, + }) + + return dispatcher.EndGroups +} + +func handleSilentSaveText(ctx *ext.Context, u *ext.Update) error { + logger := log.FromContext(ctx) + stor := storage.FromContext(ctx) + if stor == nil { + logger.Warn("Context storage is nil") + ctx.Reply(u, ext.ReplyTextString("未找到存储"), nil) + return dispatcher.EndGroups + } + text := u.EffectiveMessage.Text + if text == "" { + return dispatcher.EndGroups + } + item, err := parsers.ParseWithContext(ctx, text) + if errors.Is(err, parsers.ErrNoParserFound) { + return dispatcher.EndGroups + } + if err != nil { + logger.Error("Failed to parse text", "error", err) + ctx.Reply(u, ext.ReplyTextString("Failed to parse text: "+err.Error()), nil) + return dispatcher.EndGroups + } + logger.Debug("Parsed item from text message", "text", text, "item", item) + userID := u.GetUserChat().GetID() + text, entities, err := msgelem.BuildParsedTextEntity(*item) + if err != nil { + logger.Errorf("Failed to build parsed text entity: %s", err) + ctx.Reply(u, ext.ReplyTextString("Failed to build parsed text entity: "+err.Error()), nil) + return dispatcher.EndGroups + } + msg, err := ctx.SendMessage(userID, &tg.MessagesSendMessageRequest{ + Message: text, + Entities: entities, + ReplyTo: &tg.InputReplyToMessage{ + ReplyToMsgID: u.EffectiveMessage.ID, + ReplyToPeerID: u.GetUserChat().AsInputPeer(), + }, + }) + if err != nil { + logger.Errorf("Failed to send message: %s", err) + return dispatcher.EndGroups + } + return shortcut.CreateAndAddParsedTaskWithEdit(ctx, stor, "", item, msg.ID, userID) +} diff --git a/client/bot/handlers/register.go b/client/bot/handlers/register.go index 5e6c775..c4243f7 100644 --- a/client/bot/handlers/register.go +++ b/client/bot/handlers/register.go @@ -16,7 +16,7 @@ import ( "github.com/krau/SaveAny-Bot/common/utils/tgutil" "github.com/krau/SaveAny-Bot/config" "github.com/krau/SaveAny-Bot/core" - "github.com/krau/SaveAny-Bot/core/tftask" + "github.com/krau/SaveAny-Bot/core/tasks/tfile" "github.com/krau/SaveAny-Bot/database" "github.com/krau/SaveAny-Bot/pkg/tcbdata" "github.com/krau/SaveAny-Bot/storage" @@ -54,6 +54,7 @@ func Register(disp dispatcher.Dispatcher) { } disp.AddHandler(handlers.NewMessage(telegraphUrlRegexFilter, handleSilentMode(handleTelegraphUrlMessage, handleSilentSaveTelegraph))) disp.AddHandler(handlers.NewMessage(filters.Message.Media, handleSilentMode(handleMediaMessage, handleSilentSaveMedia))) + disp.AddHandler(handlers.NewMessage(filters.Message.Text, handleSilentMode(handleTextMessage, handleSilentSaveText))) if config.Cfg.Telegram.Userbot.Enable { go listenMediaMessageEvent(userclient.GetMediaMessageCh()) @@ -122,7 +123,7 @@ func listenMediaMessageEvent(ch chan userclient.MediaMessageEvent) { storagePath := stor.JoinStoragePath(path.Join(dirPath, file.Name())) injectCtx := tgutil.ExtWithContext(ctx.Context, ctx) taskid := xid.New().String() - task, err := tftask.NewTGFileTask(taskid, injectCtx, file, stor, storagePath, nil) + task, err := tfile.NewTGFileTask(taskid, injectCtx, file, stor, storagePath, nil) if err != nil { logger.Errorf("create task failed: %s", err) continue diff --git a/client/bot/handlers/telegraph.go b/client/bot/handlers/telegraph.go index b239949..6a660f6 100644 --- a/client/bot/handlers/telegraph.go +++ b/client/bot/handlers/telegraph.go @@ -71,6 +71,6 @@ func handleSilentSaveTelegraph(ctx *ext.Context, update *ext.Update) error { return err } userID := update.GetUserChat().GetID() - return shortcut.CreateAndAddTphTaskWithEdit(ctx, userID, result.Page, result.TphDir, result.Pics, stor, msg.ID) + return shortcut.CreateAndAddtelegraphWithEdit(ctx, userID, result.Page, result.TphDir, result.Pics, stor, msg.ID) } diff --git a/client/bot/handlers/utils/msgelem/parse.go b/client/bot/handlers/utils/msgelem/parse.go new file mode 100644 index 0000000..1211227 --- /dev/null +++ b/client/bot/handlers/utils/msgelem/parse.go @@ -0,0 +1,38 @@ +package msgelem + +import ( + "fmt" + + "github.com/gotd/td/telegram/message/entity" + "github.com/gotd/td/telegram/message/styling" + "github.com/gotd/td/tg" + "github.com/krau/SaveAny-Bot/pkg/parser" +) + +func BuildParsedTextEntity(item parser.Item) (string, []tg.MessageEntityClass, error) { + eb := entity.Builder{} + if err := styling.Perform(&eb, + styling.Bold(fmt.Sprintf("[%s]%s", item.Site, item.Title)), + styling.Plain("\n链接: "), + styling.Code(item.URL), + styling.Plain("\n作者: "), + styling.Code(item.Author), + styling.Plain("\n描述: "), + styling.Code(item.Description), + styling.Plain("\n文件数量: "), + styling.Code(fmt.Sprintf("%d", len(item.Resources))), + styling.Plain("\n预计总大小: "), + styling.Code(fmt.Sprintf("%.2f MB", func() float64 { + var totalSize int64 + for _, res := range item.Resources { + totalSize += res.Size + } + return float64(totalSize) / 1024 / 1024 + }())), + styling.Plain("\n请选择存储位置"), + ); err != nil { + return "", nil, fmt.Errorf("构建消息失败: %w", err) + } + text, entities := eb.Complete() + return text, entities, nil +} diff --git a/client/bot/handlers/utils/msgelem/storage.go b/client/bot/handlers/utils/msgelem/storage.go index cd81775..c8d0822 100644 --- a/client/bot/handlers/utils/msgelem/storage.go +++ b/client/bot/handlers/utils/msgelem/storage.go @@ -24,6 +24,8 @@ func BuildAddSelectStorageKeyboard(stors []storage.Storage, adddata tcbdata.Add) taskType = tasktype.TaskTypeTgfiles } else if adddata.TphPageNode != nil { taskType = tasktype.TaskTypeTphpics + } else if adddata.ParsedItem != nil { + taskType = tasktype.TaskTypeParseditem } else { return nil, fmt.Errorf("unknown task type: %s", taskType) } @@ -41,6 +43,8 @@ func BuildAddSelectStorageKeyboard(stors []storage.Storage, adddata tcbdata.Add) TphPageNode: adddata.TphPageNode, TphPics: adddata.TphPics, TphDirPath: adddata.TphDirPath, + + ParsedItem: adddata.ParsedItem, } dataid := xid.New().String() err := cache.Set(dataid, data) diff --git a/client/bot/handlers/utils/shortcut/message.go b/client/bot/handlers/utils/shortcut/message.go index 5b2f0b7..1563707 100644 --- a/client/bot/handlers/utils/shortcut/message.go +++ b/client/bot/handlers/utils/shortcut/message.go @@ -32,8 +32,7 @@ func GetFileFromMessageWithReply(ctx *ext.Context, update *ext.Update, message * media := message.Media supported := mediautil.IsSupported(media) if !supported { - ctx.Reply(update, ext.ReplyTextString("不支持的消息类型"), nil) - return nil, nil, dispatcher.EndGroups + return nil, nil, dispatcher.ContinueGroups } replied, err = ctx.Reply(update, ext.ReplyTextString("正在获取文件信息..."), nil) diff --git a/client/bot/handlers/utils/shortcut/parsed.go b/client/bot/handlers/utils/shortcut/parsed.go new file mode 100644 index 0000000..83d79ce --- /dev/null +++ b/client/bot/handlers/utils/shortcut/parsed.go @@ -0,0 +1,35 @@ +package shortcut + +import ( + "github.com/celestix/gotgproto/dispatcher" + "github.com/celestix/gotgproto/ext" + "github.com/charmbracelet/log" + "github.com/gotd/td/tg" + "github.com/krau/SaveAny-Bot/client/bot/handlers/utils/msgelem" + "github.com/krau/SaveAny-Bot/common/utils/tgutil" + "github.com/krau/SaveAny-Bot/core" + "github.com/krau/SaveAny-Bot/core/tasks/parsed" + "github.com/krau/SaveAny-Bot/pkg/parser" + "github.com/krau/SaveAny-Bot/storage" + "github.com/rs/xid" +) + +func CreateAndAddParsedTaskWithEdit(ctx *ext.Context, stor storage.Storage, dirPath string, item *parser.Item, msgID int, userID int64) error { + injectCtx := tgutil.ExtWithContext(ctx.Context, ctx) + task := parsed.NewTask(xid.New().String(), injectCtx, stor, stor.JoinStoragePath(dirPath), item, parsed.NewProgress(msgID, userID)) + if err := core.AddTask(injectCtx, task); err != nil { + log.FromContext(ctx).Errorf("Failed to add task: %s", err) + ctx.EditMessage(userID, &tg.MessagesEditMessageRequest{ + ID: msgID, + Message: "任务添加失败: " + err.Error(), + }) + return dispatcher.EndGroups + } + text, entities := msgelem.BuildTaskAddedEntities(ctx, item.Title, core.GetLength(ctx)) + ctx.EditMessage(userID, &tg.MessagesEditMessageRequest{ + ID: msgID, + Message: text, + Entities: entities, + }) + return dispatcher.EndGroups +} diff --git a/client/bot/handlers/utils/shortcut/tftask.go b/client/bot/handlers/utils/shortcut/tftask.go index 1208c20..bdd9687 100644 --- a/client/bot/handlers/utils/shortcut/tftask.go +++ b/client/bot/handlers/utils/shortcut/tftask.go @@ -13,15 +13,15 @@ import ( "github.com/krau/SaveAny-Bot/client/bot/handlers/utils/ruleutil" "github.com/krau/SaveAny-Bot/common/utils/tgutil" "github.com/krau/SaveAny-Bot/core" - "github.com/krau/SaveAny-Bot/core/batchtftask" - "github.com/krau/SaveAny-Bot/core/tftask" + "github.com/krau/SaveAny-Bot/core/tasks/batchtfile" + tftask "github.com/krau/SaveAny-Bot/core/tasks/tfile" "github.com/krau/SaveAny-Bot/database" "github.com/krau/SaveAny-Bot/pkg/tfile" "github.com/krau/SaveAny-Bot/storage" "github.com/rs/xid" ) -// 创建一个 tftask.TGFileTask 并添加到任务队列中, 以编辑消息的方式反馈结果 +// 创建一个 tfile.TGFileTask 并添加到任务队列中, 以编辑消息的方式反馈结果 func CreateAndAddTGFileTaskWithEdit(ctx *ext.Context, userID int64, stor storage.Storage, dirPath string, file tfile.TGFileMessage, trackMsgID int) error { logger := log.FromContext(ctx) user, err := database.GetUserByChatID(ctx, userID) @@ -82,7 +82,7 @@ func CreateAndAddTGFileTaskWithEdit(ctx *ext.Context, userID int64, stor storage return dispatcher.EndGroups } -// 创建一个 batchtftask.BatchTGFileTask 并添加到任务队列中, 以编辑消息的方式反馈结果 +// 创建一个 batchtfile.BatchTGFileTask 并添加到任务队列中, 以编辑消息的方式反馈结果 func CreateAndAddBatchTGFileTaskWithEdit(ctx *ext.Context, userID int64, stor storage.Storage, dirPath string, files []tfile.TGFileMessage, trackMsgID int) error { logger := log.FromContext(ctx) user, err := database.GetUserByChatID(ctx, userID) @@ -110,7 +110,7 @@ func CreateAndAddBatchTGFileTaskWithEdit(ctx *ext.Context, userID int64, stor st return storname, dirP } - elems := make([]batchtftask.TaskElement, 0, len(files)) + elems := make([]batchtfile.TaskElement, 0, len(files)) type albumFile struct { file tfile.TGFileMessage storage storage.Storage @@ -132,7 +132,7 @@ func CreateAndAddBatchTGFileTaskWithEdit(ctx *ext.Context, userID int64, stor st } if !dirPath.NeedNewForAlbum() { storPath := fileStor.JoinStoragePath(path.Join(dirPath.String(), file.Name())) - elem, err := batchtftask.NewTaskElement(fileStor, storPath, file) + elem, err := batchtfile.NewTaskElement(fileStor, storPath, file) if err != nil { logger.Errorf("Failed to create task element: %s", err) ctx.EditMessage(userID, &tg.MessagesEditMessageRequest{ @@ -167,7 +167,7 @@ func CreateAndAddBatchTGFileTaskWithEdit(ctx *ext.Context, userID int64, stor st albumStor := afiles[0].storage for _, af := range afiles { afstorPath := af.storage.JoinStoragePath(path.Join(dirPath, albumDir, af.file.Name())) - elem, err := batchtftask.NewTaskElement(albumStor, afstorPath, af.file) + elem, err := batchtfile.NewTaskElement(albumStor, afstorPath, af.file) if err != nil { logger.Errorf("Failed to create task element for album file: %s", err) ctx.EditMessage(userID, &tg.MessagesEditMessageRequest{ @@ -182,7 +182,7 @@ func CreateAndAddBatchTGFileTaskWithEdit(ctx *ext.Context, userID int64, stor st injectCtx := tgutil.ExtWithContext(ctx.Context, ctx) taskid := xid.New().String() - task := batchtftask.NewBatchTGFileTask(taskid, injectCtx, elems, batchtftask.NewProgressTracker(trackMsgID, userID), true) + task := batchtfile.NewBatchTGFileTask(taskid, injectCtx, elems, batchtfile.NewProgressTracker(trackMsgID, userID), true) if err := core.AddTask(injectCtx, task); err != nil { logger.Errorf("Failed to add batch task: %s", err) ctx.EditMessage(userID, &tg.MessagesEditMessageRequest{ diff --git a/client/bot/handlers/utils/shortcut/tphtask.go b/client/bot/handlers/utils/shortcut/tphtask.go index 4d1c1a2..b263595 100644 --- a/client/bot/handlers/utils/shortcut/tphtask.go +++ b/client/bot/handlers/utils/shortcut/tphtask.go @@ -9,19 +9,21 @@ import ( "github.com/krau/SaveAny-Bot/common/utils/tgutil" "github.com/krau/SaveAny-Bot/common/utils/tphutil" "github.com/krau/SaveAny-Bot/core" - "github.com/krau/SaveAny-Bot/core/tphtask" + tphtask "github.com/krau/SaveAny-Bot/core/tasks/telegraph" "github.com/krau/SaveAny-Bot/pkg/telegraph" "github.com/krau/SaveAny-Bot/storage" "github.com/rs/xid" ) -func CreateAndAddTphTaskWithEdit(ctx *ext.Context, +func CreateAndAddtelegraphWithEdit( + ctx *ext.Context, userID int64, tphpage *telegraph.Page, dirPath string, // unescaped ph path for file storage pics []string, stor storage.Storage, trackMsgID int) error { + injectCtx := tgutil.ExtWithContext(ctx.Context, ctx) task := tphtask.NewTask(xid.New().String(), injectCtx, diff --git a/cmd/run.go b/cmd/run.go index 9976896..4059f23 100644 --- a/cmd/run.go +++ b/cmd/run.go @@ -19,6 +19,7 @@ import ( "github.com/krau/SaveAny-Bot/config" "github.com/krau/SaveAny-Bot/core" "github.com/krau/SaveAny-Bot/database" + "github.com/krau/SaveAny-Bot/parsers" "github.com/krau/SaveAny-Bot/storage" "github.com/spf13/cobra" ) @@ -53,6 +54,15 @@ func initAll(ctx context.Context) { logger.Info(i18n.T(i18nk.Initing)) database.Init(ctx) storage.LoadStorages(ctx) + if config.Cfg.Parser.PluginEnable { + for _, dir := range config.Cfg.Parser.PluginDirs { + if err := parsers.LoadPlugins(ctx, dir); err != nil { + logger.Error("Failed to load parser plugins", "dir", dir, "error", err) + } else { + logger.Debug("Loaded parser plugins", "dir", dir) + } + } + } if config.Cfg.Telegram.Userbot.Enable { _, err := userclient.Login(ctx) if err != nil { diff --git a/config/parser.go b/config/parser.go new file mode 100644 index 0000000..4d5cfc7 --- /dev/null +++ b/config/parser.go @@ -0,0 +1,6 @@ +package config + +type parserConfig struct { + PluginEnable bool `toml:"plugin_enable" mapstructure:"plugin_enable" json:"plugin_enable"` + PluginDirs []string `toml:"plugin_dirs" mapstructure:"plugin_dirs" json:"plugin_dirs"` +} diff --git a/config/tg.go b/config/tg.go index ba62174..2f15790 100644 --- a/config/tg.go +++ b/config/tg.go @@ -6,7 +6,7 @@ type telegramConfig struct { AppHash string `toml:"app_hash" mapstructure:"app_hash" json:"app_hash"` Proxy tgProxyConfig `toml:"proxy" mapstructure:"proxy"` RpcRetry int `toml:"rpc_retry" mapstructure:"rpc_retry" json:"rpc_retry"` - Userbot userbotConfig `toml:"userbot" mapstructure:"userbot" json:"userbot"` // [TODO] + Userbot userbotConfig `toml:"userbot" mapstructure:"userbot" json:"userbot"` } type userbotConfig struct { diff --git a/config/viper.go b/config/viper.go index 2049905..df8dde1 100644 --- a/config/viper.go +++ b/config/viper.go @@ -28,6 +28,7 @@ type Config struct { DB dbConfig `toml:"db" mapstructure:"db"` Telegram telegramConfig `toml:"telegram" mapstructure:"telegram"` Storages []storage.StorageConfig `toml:"-" mapstructure:"-" json:"storages"` + Parser parserConfig `toml:"parser" mapstructure:"parser" json:"parser"` Hook hookConfig `toml:"hook" mapstructure:"hook" json:"hook"` } diff --git a/core/batchtftask/execute.go b/core/tasks/batchtfile/execute.go similarity index 95% rename from core/batchtftask/execute.go rename to core/tasks/batchtfile/execute.go index 22c2212..893b9ac 100644 --- a/core/batchtftask/execute.go +++ b/core/tasks/batchtfile/execute.go @@ -1,4 +1,4 @@ -package batchtftask +package batchtfile import ( "context" @@ -25,14 +25,19 @@ func (t *Task) Execute(ctx context.Context) error { eg, gctx := errgroup.WithContext(ctx) eg.SetLimit(workers) for _, elem := range t.Elems { - elem := elem eg.Go(func() error { + t.processingMu.RLock() if t.processing[elem.ID] != nil { return fmt.Errorf("element with ID %s is already being processed", elem.ID) } + t.processingMu.RUnlock() + t.processingMu.Lock() t.processing[elem.ID] = &elem + t.processingMu.Unlock() defer func() { + t.processingMu.Lock() delete(t.processing, elem.ID) + t.processingMu.Unlock() }() return t.processElement(gctx, elem) }) diff --git a/core/batchtftask/progress.go b/core/tasks/batchtfile/progress.go similarity index 99% rename from core/batchtftask/progress.go rename to core/tasks/batchtfile/progress.go index 25ec691..d876325 100644 --- a/core/batchtftask/progress.go +++ b/core/tasks/batchtfile/progress.go @@ -1,4 +1,4 @@ -package batchtftask +package batchtfile import ( "context" diff --git a/core/batchtftask/task.go b/core/tasks/batchtfile/task.go similarity index 92% rename from core/batchtftask/task.go rename to core/tasks/batchtfile/task.go index a357f3a..cb7de1a 100644 --- a/core/batchtftask/task.go +++ b/core/tasks/batchtfile/task.go @@ -1,9 +1,10 @@ -package batchtftask +package batchtfile import ( "context" "fmt" "path/filepath" + "sync" "sync/atomic" "github.com/krau/SaveAny-Bot/config" @@ -31,7 +32,8 @@ type Task struct { downloaded atomic.Int64 totalSize int64 processing map[string]TaskElementInfo - failed map[string]error // errors for each element + processingMu sync.RWMutex + failed map[string]error // [TODO] errors for each element } func (t *Task) Type() tasktype.TaskType { @@ -89,6 +91,7 @@ func NewBatchTGFileTask( }(), processing: make(map[string]TaskElementInfo), IgnoreErrors: ignoreErrors, + processingMu: sync.RWMutex{}, failed: make(map[string]error), } return task diff --git a/core/batchtftask/taskinfo.go b/core/tasks/batchtfile/taskinfo.go similarity index 97% rename from core/batchtftask/taskinfo.go rename to core/tasks/batchtfile/taskinfo.go index c702da5..396b7f3 100644 --- a/core/batchtftask/taskinfo.go +++ b/core/tasks/batchtfile/taskinfo.go @@ -1,4 +1,4 @@ -package batchtftask +package batchtfile type TaskElementInfo interface { FileName() string diff --git a/core/batchtftask/utils.go b/core/tasks/batchtfile/utils.go similarity index 97% rename from core/batchtftask/utils.go rename to core/tasks/batchtfile/utils.go index 9c38ecf..b8c39cd 100644 --- a/core/batchtftask/utils.go +++ b/core/tasks/batchtfile/utils.go @@ -1,4 +1,4 @@ -package batchtftask +package batchtfile var progressUpdatesLevels = []struct { size int64 // 文件大小阈值 diff --git a/core/tasks/parsed/execute.go b/core/tasks/parsed/execute.go new file mode 100644 index 0000000..899921e --- /dev/null +++ b/core/tasks/parsed/execute.go @@ -0,0 +1,139 @@ +package parsed + +import ( + "context" + "errors" + "fmt" + "io" + "net/http" + "path" + "path/filepath" + + "github.com/charmbracelet/log" + "github.com/duke-git/lancet/v2/retry" + "github.com/krau/SaveAny-Bot/common/utils/fsutil" + "github.com/krau/SaveAny-Bot/common/utils/ioutil" + "github.com/krau/SaveAny-Bot/config" + "github.com/krau/SaveAny-Bot/pkg/enums/ctxkey" + "github.com/krau/SaveAny-Bot/pkg/parser" + "golang.org/x/sync/errgroup" +) + +func (t *Task) Execute(ctx context.Context) error { + logger := log.FromContext(ctx) + logger.Infof("Starting Parsed item task %s", t.item.Title) + if t.progress != nil { + t.progress.OnStart(ctx, t) + } + eg, gctx := errgroup.WithContext(ctx) + eg.SetLimit(config.Cfg.Workers) + for _, resource := range t.item.Resources { + eg.Go(func() error { + t.processingMu.RLock() + if t.processing[resource.ID()] != nil { + return fmt.Errorf("resource %s is already being processed", resource.ID()) + } + t.processingMu.RUnlock() + t.processingMu.Lock() + t.processing[resource.ID()] = &resource + t.processingMu.Unlock() + defer func() { + t.processingMu.Lock() + delete(t.processing, resource.URL) + t.processingMu.Unlock() + }() + err := t.processResource(gctx, resource) + t.downloaded.Add(1) + if errors.Is(err, context.Canceled) { + logger.Debug("Resource processing canceled") + return err + } + if err != nil { + logger.Errorf("Error processing resource %s: %v", resource.URL, err) + return fmt.Errorf("failed to process resource %s: %w", resource.URL, err) + } + return nil + }) + } + err := eg.Wait() + if err != nil { + logger.Errorf("Error during Parsed item task execution: %v", err) + } else { + logger.Infof("Parsed item task %s completed successfully", t.item.Title) + } + if t.progress != nil { + t.progress.OnDone(ctx, t, err) + } + return err +} + +func (t *Task) processResource(ctx context.Context, resource parser.Resource) error { + logger := log.FromContext(ctx) + err := retry.Retry(func() error { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, resource.URL, nil) + if err != nil { + return err + } + if resource.Headers != nil { + for k, v := range resource.Headers { + req.Header.Set(k, v) + } + } + resp, err := t.httpClient.Do(req) + if err != nil { + return fmt.Errorf("failed to download resource %s: %w", resource.URL, err) + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("failed to download resource %s: %s", resource.URL, resp.Status) + } + ctx = context.WithValue(ctx, ctxkey.ContentLength, func() int64 { + if resource.Size > 0 { + return resource.Size + } + return resp.ContentLength + }()) + if t.stream { + return t.Stor.Save(ctx, resp.Body, path.Join(t.StorPath, resource.Filename)) + } + cacheFile, err := fsutil.CreateFile(filepath.Join(config.Cfg.Temp.BasePath, + fmt.Sprintf("resource_%s_%s", t.ID, resource.Filename))) + if err != nil { + return fmt.Errorf("failed to create cache file for resource %s: %w", resource.URL, err) + } + defer func() { + if err := cacheFile.CloseAndRemove(); err != nil { + logger.Errorf("Failed to close and remove cache file: %v", err) + } + }() + wr := ioutil.NewProgressWriter(cacheFile, func(n int) { + t.downloadedBytes.Add(int64(n)) + if t.progress != nil { + t.progress.OnProgress(ctx, t) + } + }) + + copyResultCh := make(chan error, 1) + go func() { + _, err := io.Copy(wr, resp.Body) + copyResultCh <- err + }() + select { + case err := <-copyResultCh: + if err != nil { + return fmt.Errorf("failed to copy resource %s to cache file: %w", resource.URL, err) + } + case <-ctx.Done(): + return ctx.Err() + } + _, err = cacheFile.Seek(0, 0) + if err != nil { + return fmt.Errorf("failed to seek cache file for resource %s: %w", resource.URL, err) + } + return t.Stor.Save(ctx, cacheFile, path.Join(t.StorPath, resource.Filename)) + }, retry.Context(ctx), retry.RetryTimes(uint(config.Cfg.Retry))) + if ctx.Err() != nil { + return ctx.Err() + } + return err +} diff --git a/core/tasks/parsed/progress.go b/core/tasks/parsed/progress.go new file mode 100644 index 0000000..9daa192 --- /dev/null +++ b/core/tasks/parsed/progress.go @@ -0,0 +1,209 @@ +package parsed + +import ( + "context" + "errors" + "fmt" + "sync/atomic" + "time" + + "github.com/charmbracelet/log" + "github.com/duke-git/lancet/v2/slice" + "github.com/gotd/td/telegram/message/entity" + "github.com/gotd/td/telegram/message/styling" + "github.com/gotd/td/tg" + "github.com/krau/SaveAny-Bot/common/utils/dlutil" + "github.com/krau/SaveAny-Bot/common/utils/tgutil" +) + +var progressUpdatesLevels = []struct { + size int64 // 文件大小阈值 + stepPercent int // 每多少 % 更新一次 +}{ + {10 << 20, 100}, + {50 << 20, 50}, + {200 << 20, 20}, + {500 << 20, 10}, +} + +func shouldUpdateProgress(total, downloaded int64, lastUpdatePercent int) bool { + if total <= 0 || downloaded <= 0 { + return false + } + + percent := int((downloaded * 100) / total) + if percent <= lastUpdatePercent { + return false + } + + step := progressUpdatesLevels[len(progressUpdatesLevels)-1].stepPercent + for _, lvl := range progressUpdatesLevels { + if total < lvl.size { + step = lvl.stepPercent + break + } + } + + return percent >= lastUpdatePercent+step +} + +type ProgressTracker interface { + OnStart(ctx context.Context, info TaskInfo) + OnProgress(ctx context.Context, info TaskInfo) + OnDone(ctx context.Context, info TaskInfo, err error) +} + +type Progress struct { + MessageID int + ChatID int64 + start time.Time + lastUpdatePercent atomic.Int32 +} + +func (p *Progress) OnStart(ctx context.Context, info TaskInfo) { + logger := log.FromContext(ctx) + p.start = time.Now() + p.lastUpdatePercent.Store(0) + logger.Debugf("Parsed task progress tracking started for message %d in chat %d", p.MessageID, p.ChatID) + entityBuilder := entity.Builder{} + var entities []tg.MessageEntityClass + if err := styling.Perform(&entityBuilder, + styling.Plain(fmt.Sprintf("开始下载 %s 的资源\n总大小: ", info.Site())), + styling.Code(fmt.Sprintf("%.2f MB (%d个资源)", float64(info.TotalBytes())/(1024*1024), info.TotalResources())), + ); err != nil { + log.FromContext(ctx).Errorf("Failed to build entities: %s", err) + return + } + text, entities := entityBuilder.Complete() + req := &tg.MessagesEditMessageRequest{ + ID: p.MessageID, + } + req.SetMessage(text) + req.SetEntities(entities) + req.SetReplyMarkup(&tg.ReplyInlineMarkup{ + Rows: []tg.KeyboardButtonRow{ + { + Buttons: []tg.KeyboardButtonClass{ + tgutil.BuildCancelButton(info.TaskID()), + }, + }, + }}, + ) + ext := tgutil.ExtFromContext(ctx) + if ext != nil { + ext.EditMessage(p.ChatID, req) + return + } +} + +func (p *Progress) OnProgress(ctx context.Context, info TaskInfo) { + if !shouldUpdateProgress(info.TotalBytes(), info.DownloadedBytes(), int(p.lastUpdatePercent.Load())) { + return + } + percent := int((info.DownloadedBytes() * 100) / info.TotalBytes()) + if p.lastUpdatePercent.Load() == int32(percent) { + return + } + p.lastUpdatePercent.Store(int32(percent)) + log.FromContext(ctx).Debugf("Progress update: %s, %d/%d", info.TaskID(), info.DownloadedBytes(), info.TotalBytes()) + entityBuilder := entity.Builder{} + var entities []tg.MessageEntityClass + if err := styling.Perform(&entityBuilder, + styling.Plain("正在下载\n总大小: "), + styling.Code(fmt.Sprintf("%.2f MB (%d个文件)", float64(info.TotalBytes())/(1024*1024), info.TotalResources())), + styling.Plain("\n正在处理:\n"), + func() styling.StyledTextOption { + var lines []string + for _, elem := range info.Processing() { + lines = append(lines, fmt.Sprintf(" - %s (%.2f MB)", elem.FileName(), float64(elem.FileSize())/(1024*1024))) + } + if len(lines) == 0 { + lines = append(lines, " - 无") + } + return styling.Plain(slice.Join(lines, "\n")) + }(), + styling.Plain("\n平均速度: "), + styling.Bold(fmt.Sprintf("%.2f MB/s", dlutil.GetSpeed(info.DownloadedBytes(), p.start)/(1024*1024))), + styling.Plain("\n当前进度: "), + styling.Bold(fmt.Sprintf("%.2f%%", float64(info.DownloadedBytes())/float64(info.TotalBytes())*100)), + ); err != nil { + log.FromContext(ctx).Errorf("Failed to build entities: %s", err) + return + } + text, entities := entityBuilder.Complete() + req := &tg.MessagesEditMessageRequest{ + ID: p.MessageID, + } + req.SetMessage(text) + req.SetEntities(entities) + req.SetReplyMarkup(&tg.ReplyInlineMarkup{ + Rows: []tg.KeyboardButtonRow{ + { + Buttons: []tg.KeyboardButtonClass{ + tgutil.BuildCancelButton(info.TaskID()), + }, + }, + }}, + ) + ext := tgutil.ExtFromContext(ctx) + if ext != nil { + ext.EditMessage(p.ChatID, req) + return + } +} + +func (p *Progress) OnDone(ctx context.Context, info TaskInfo, err error) { + logger := log.FromContext(ctx) + if err != nil { + if errors.Is(err, context.Canceled) { + logger.Infof("Parsed task %s was canceled", info.TaskID()) + ext := tgutil.ExtFromContext(ctx) + if ext != nil { + ext.EditMessage(p.ChatID, &tg.MessagesEditMessageRequest{ + ID: p.MessageID, + Message: fmt.Sprintf("处理已取消: %s", info.TaskID()), + }) + } + } else { + logger.Errorf("Parsed task %s failed: %s", info.TaskID(), err) + ext := tgutil.ExtFromContext(ctx) + if ext != nil { + ext.EditMessage(p.ChatID, &tg.MessagesEditMessageRequest{ + ID: p.MessageID, + Message: fmt.Sprintf("处理失败: %s", err.Error()), + }) + } + } + return + } + logger.Infof("Parsed task %s completed successfully", info.TaskID()) + + entityBuilder := entity.Builder{} + if err := styling.Perform(&entityBuilder, + styling.Plain("处理完成, 资源数量: "), + styling.Code(fmt.Sprintf("%d", info.TotalResources())), + styling.Plain("\n保存路径: "), + styling.Code(fmt.Sprintf("[%s]:%s", info.StorageName(), info.StoragePath())), + ); err != nil { + logger.Errorf("Failed to build entities: %s", err) + return + } + text, entities := entityBuilder.Complete() + req := &tg.MessagesEditMessageRequest{ + ID: p.MessageID, + } + req.SetMessage(text) + req.SetEntities(entities) + + ext := tgutil.ExtFromContext(ctx) + if ext != nil { + ext.EditMessage(p.ChatID, req) + } +} + +func NewProgress(messageID int, chatID int64) *Progress { + return &Progress{ + MessageID: messageID, + ChatID: chatID, + } +} diff --git a/core/tasks/parsed/task.go b/core/tasks/parsed/task.go new file mode 100644 index 0000000..e6e221d --- /dev/null +++ b/core/tasks/parsed/task.go @@ -0,0 +1,84 @@ +package parsed + +import ( + "context" + "net/http" + "sync" + "sync/atomic" + + "github.com/krau/SaveAny-Bot/config" + "github.com/krau/SaveAny-Bot/pkg/enums/tasktype" + "github.com/krau/SaveAny-Bot/pkg/parser" + "github.com/krau/SaveAny-Bot/storage" +) + +type Task struct { + ID string + Ctx context.Context + Stor storage.Storage + StorPath string + item *parser.Item + httpClient *http.Client + progress ProgressTracker + stream bool + + totalResources int64 + downloaded atomic.Int64 // downloaded resources count + totalBytes int64 // total bytes to download + downloadedBytes atomic.Int64 // downloaded bytes count + processing map[string]ResourceInfo + processingMu sync.RWMutex + failed map[string]error // [TODO] errors for each resource +} + +func (t *Task) Type() tasktype.TaskType { + return tasktype.TaskTypeParseditem +} + +func (t *Task) TaskID() string { + return t.ID +} + +func NewTask( + id string, + ctx context.Context, + stor storage.Storage, + storPath string, + item *parser.Item, + progressTracker ProgressTracker, +) *Task { + client := &http.Client{ + Transport: &http.Transport{ + // [TODO] configure it via config + Proxy: http.ProxyFromEnvironment, + }, + } + _, ok := stor.(storage.StorageCannotStream) + stream := config.Cfg.Stream && !ok + return &Task{ + ID: id, + Ctx: ctx, + Stor: stor, + StorPath: storPath, + item: item, + totalResources: int64(len(item.Resources)), + downloaded: atomic.Int64{}, + totalBytes: func() int64 { + var total int64 + for _, res := range item.Resources { + if res.Size < 0 { + continue // skip resources with unknown size + } + total += res.Size + } + return total + }(), + stream: stream, + downloadedBytes: atomic.Int64{}, + httpClient: client, + progress: progressTracker, + processing: make(map[string]ResourceInfo), + processingMu: sync.RWMutex{}, + failed: make(map[string]error), + } +} diff --git a/core/tasks/parsed/taskinfo.go b/core/tasks/parsed/taskinfo.go new file mode 100644 index 0000000..cba7f7b --- /dev/null +++ b/core/tasks/parsed/taskinfo.go @@ -0,0 +1,51 @@ +package parsed + +type TaskInfo interface { + TaskID() string + Site() string + TotalResources() int64 + Downloaded() int64 + TotalBytes() int64 + DownloadedBytes() int64 + Processing() map[string]ResourceInfo + StorageName() string + StoragePath() string +} + +func (t *Task) StoragePath() string { + return t.StorPath +} +func (t *Task) TotalResources() int64 { + return t.totalResources +} + +func (t *Task) Downloaded() int64 { + return t.downloaded.Load() +} + +func (t *Task) StorageName() string { + return t.Stor.Name() +} + +func (t *Task) Site() string { + return t.item.Site +} + +func (t *Task) TotalBytes() int64 { + return t.totalBytes +} + +func (t *Task) DownloadedBytes() int64 { + return t.downloadedBytes.Load() +} + +func (t *Task) Processing() map[string]ResourceInfo { + t.processingMu.RLock() + defer t.processingMu.RUnlock() + return t.processing +} + +type ResourceInfo interface { + FileName() string + FileSize() int64 +} diff --git a/core/tphtask/execute.go b/core/tasks/telegraph/execute.go similarity index 93% rename from core/tphtask/execute.go rename to core/tasks/telegraph/execute.go index 53edc95..d9b1375 100644 --- a/core/tphtask/execute.go +++ b/core/tasks/telegraph/execute.go @@ -1,4 +1,4 @@ -package tphtask +package telegraph import ( "context" @@ -22,8 +22,6 @@ func (t *Task) Execute(ctx context.Context) error { eg, gctx := errgroup.WithContext(ctx) eg.SetLimit(config.Cfg.Workers) for i, pic := range t.Pics { - pic := pic - i := i eg.Go(func() error { err := t.processPic(gctx, pic, i) if err != nil { @@ -79,6 +77,11 @@ func (t *Task) processPic(ctx context.Context, picUrl string, index int) error { lastErr = fmt.Errorf("failed to copy picture %s to cache file: %w", filename, lastErr) return lastErr } + _, err = cacheFile.Seek(0, 0) + if err != nil { + lastErr = fmt.Errorf("failed to seek cache file for picture %s: %w", filename, err) + return lastErr + } lastErr = t.Stor.Save(ctx, cacheFile, path.Join(t.StorPath, filename)) } else { lastErr = t.Stor.Save(ctx, body, path.Join(t.StorPath, filename)) diff --git a/core/tphtask/progress.go b/core/tasks/telegraph/progress.go similarity index 99% rename from core/tphtask/progress.go rename to core/tasks/telegraph/progress.go index df0b7fd..e664ea3 100644 --- a/core/tphtask/progress.go +++ b/core/tasks/telegraph/progress.go @@ -1,4 +1,4 @@ -package tphtask +package telegraph import ( "context" diff --git a/core/tphtask/task.go b/core/tasks/telegraph/task.go similarity index 94% rename from core/tphtask/task.go rename to core/tasks/telegraph/task.go index d232440..36d36e8 100644 --- a/core/tphtask/task.go +++ b/core/tasks/telegraph/task.go @@ -1,4 +1,4 @@ -package tphtask +package telegraph import ( "context" @@ -39,7 +39,7 @@ func NewTask( progress ProgressTracker, ) *Task { _, cannotStream := stor.(storage.StorageCannotStream) - tphtask := &Task{ + telegraph := &Task{ ID: id, Ctx: ctx, PhPath: phPath, @@ -52,5 +52,5 @@ func NewTask( totalpics: len(pics), downloaded: atomic.Int64{}, } - return tphtask + return telegraph } diff --git a/core/tphtask/taskinfo.go b/core/tasks/telegraph/taskinfo.go similarity index 96% rename from core/tphtask/taskinfo.go rename to core/tasks/telegraph/taskinfo.go index abc33cd..d4910fa 100644 --- a/core/tphtask/taskinfo.go +++ b/core/tasks/telegraph/taskinfo.go @@ -1,4 +1,4 @@ -package tphtask +package telegraph type TaskInfo interface { TaskID() string diff --git a/core/tphtask/utils.go b/core/tasks/telegraph/utils.go similarity index 93% rename from core/tphtask/utils.go rename to core/tasks/telegraph/utils.go index c2e83a1..ce109eb 100644 --- a/core/tphtask/utils.go +++ b/core/tasks/telegraph/utils.go @@ -1,4 +1,4 @@ -package tphtask +package telegraph func shouldUpdateProgress(downloaded int64, total int64) bool { if total <= 0 || downloaded <= 0 { diff --git a/core/tftask/execute.go b/core/tasks/tfile/execute.go similarity index 99% rename from core/tftask/execute.go rename to core/tasks/tfile/execute.go index 8125fad..8418654 100644 --- a/core/tftask/execute.go +++ b/core/tasks/tfile/execute.go @@ -1,4 +1,4 @@ -package tftask +package tfile import ( "context" diff --git a/core/tftask/progress.go b/core/tasks/tfile/progress.go similarity index 99% rename from core/tftask/progress.go rename to core/tasks/tfile/progress.go index 3b5cf9b..23fb1c2 100644 --- a/core/tftask/progress.go +++ b/core/tasks/tfile/progress.go @@ -1,4 +1,4 @@ -package tftask +package tfile import ( "context" diff --git a/core/tftask/stream.go b/core/tasks/tfile/stream.go similarity index 98% rename from core/tftask/stream.go rename to core/tasks/tfile/stream.go index 0485088..7bff591 100644 --- a/core/tftask/stream.go +++ b/core/tasks/tfile/stream.go @@ -1,4 +1,4 @@ -package tftask +package tfile import ( "context" diff --git a/core/tftask/taskinfo.go b/core/tasks/tfile/taskinfo.go similarity index 96% rename from core/tftask/taskinfo.go rename to core/tasks/tfile/taskinfo.go index 891580f..06ca8f9 100644 --- a/core/tftask/taskinfo.go +++ b/core/tasks/tfile/taskinfo.go @@ -1,4 +1,4 @@ -package tftask +package tfile type TaskInfo interface { TaskID() string diff --git a/core/tftask/tftask.go b/core/tasks/tfile/tftask.go similarity index 96% rename from core/tftask/tftask.go rename to core/tasks/tfile/tftask.go index a8032b9..82718d7 100644 --- a/core/tftask/tftask.go +++ b/core/tasks/tfile/tftask.go @@ -1,4 +1,4 @@ -package tftask +package tfile import ( "context" @@ -40,7 +40,7 @@ func NewTGFileTask( if err != nil { return nil, fmt.Errorf("failed to get absolute path for cache: %w", err) } - tftask := &Task{ + tfile := &Task{ ID: id, Ctx: ctx, File: file, @@ -49,7 +49,7 @@ func NewTGFileTask( Progress: progress, localPath: cachePath, } - return tftask, nil + return tfile, nil } tfileTask := &Task{ ID: id, diff --git a/core/tftask/util.go b/core/tasks/tfile/util.go similarity index 97% rename from core/tftask/util.go rename to core/tasks/tfile/util.go index 15cff77..81862d9 100644 --- a/core/tftask/util.go +++ b/core/tasks/tfile/util.go @@ -1,4 +1,4 @@ -package tftask +package tfile var progressUpdatesLevels = []struct { size int64 // 文件大小阈值 diff --git a/core/tftask/writer.go b/core/tasks/tfile/writer.go similarity index 99% rename from core/tftask/writer.go rename to core/tasks/tfile/writer.go index 7c1d765..7d76ee5 100644 --- a/core/tftask/writer.go +++ b/core/tasks/tfile/writer.go @@ -1,4 +1,4 @@ -package tftask +package tfile import ( "context" diff --git a/go.mod b/go.mod index 185abbc..b00bb06 100644 --- a/go.mod +++ b/go.mod @@ -10,7 +10,6 @@ require ( github.com/charmbracelet/log v0.4.2 github.com/fatih/color v1.18.0 github.com/gabriel-vasile/mimetype v1.4.9 - github.com/go-faster/errors v0.7.1 github.com/gotd/contrib v0.21.0 github.com/gotd/td v0.129.0 github.com/minio/minio-go/v7 v7.0.95 @@ -42,15 +41,18 @@ require ( github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect github.com/ghodss/yaml v1.0.0 // indirect github.com/glebarez/go-sqlite v1.22.0 // indirect + github.com/go-faster/errors v0.7.1 // indirect github.com/go-faster/jx v1.1.0 // indirect github.com/go-faster/xor v1.0.0 // indirect github.com/go-faster/yaml v0.4.6 // indirect github.com/go-ini/ini v1.67.0 // indirect github.com/go-logfmt/logfmt v0.6.0 // indirect + github.com/go-sourcemap/sourcemap v2.1.3+incompatible // indirect github.com/go-viper/mapstructure/v2 v2.4.0 // indirect github.com/goccy/go-json v0.10.5 // indirect github.com/google/go-github/v30 v30.1.0 // indirect github.com/google/go-querystring v1.1.0 // indirect + github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e // indirect github.com/google/uuid v1.6.0 // indirect github.com/gotd/ige v0.2.2 // indirect github.com/gotd/neo v0.1.5 // indirect @@ -102,6 +104,7 @@ require ( require ( github.com/dgraph-io/ristretto/v2 v2.2.0 + github.com/dop251/goja v0.0.0-20250630131328-58d95d85e994 github.com/duke-git/lancet/v2 v2.3.7 github.com/fsnotify/fsnotify v1.9.0 // indirect github.com/glebarez/sqlite v1.11.0 // indirect diff --git a/go.sum b/go.sum index 5dd1b34..0c098e7 100644 --- a/go.sum +++ b/go.sum @@ -4,6 +4,8 @@ github.com/BurntSushi/toml v1.5.0 h1:W5quZX/G/csjUnuI8SUYlsHs9M38FC7znL0lIO+DvMg github.com/BurntSushi/toml v1.5.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho= github.com/MakeNowJust/heredoc v1.0.0 h1:cXCdzVdstXyiTqTvfqk9SDHpKNjxuom+DOlyEeQ4pzQ= github.com/MakeNowJust/heredoc v1.0.0/go.mod h1:mG5amYoWBHf8vpLOuehzbGGw0EHxpZZ6lCpQ4fNJ8LE= +github.com/Masterminds/semver/v3 v3.2.1 h1:RN9w6+7QoMeJVGyfmbcgs28Br8cvmnucEXnY0rYXWg0= +github.com/Masterminds/semver/v3 v3.2.1/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ= github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z4= github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI= github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k= @@ -61,6 +63,8 @@ github.com/dgryski/go-farm v0.0.0-20240924180020-3414d57e47da h1:aIftn67I1fkbMa5 github.com/dgryski/go-farm v0.0.0-20240924180020-3414d57e47da/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw= github.com/dlclark/regexp2 v1.11.5 h1:Q/sSnsKerHeCkc/jSTNq1oCm7KiVgUMZRDUoRu0JQZQ= github.com/dlclark/regexp2 v1.11.5/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= +github.com/dop251/goja v0.0.0-20250630131328-58d95d85e994 h1:aQYWswi+hRL2zJqGacdCZx32XjKYV8ApXFGntw79XAM= +github.com/dop251/goja v0.0.0-20250630131328-58d95d85e994/go.mod h1:MxLav0peU43GgvwVgNbLAj1s/bSGboKkhuULvq/7hx4= github.com/duke-git/lancet/v2 v2.3.7 h1:nnNBA9KyoqwbPm4nFmEFVIbXeAmpqf6IDCH45+HHHNs= github.com/duke-git/lancet/v2 v2.3.7/go.mod h1:zGa2R4xswg6EG9I6WnyubDbFO/+A/RROxIbXcwryTsc= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= @@ -99,6 +103,8 @@ github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/go-sourcemap/sourcemap v2.1.3+incompatible h1:W1iEw64niKVGogNgBN3ePyLFfuisuzeidWPMPWmECqU= +github.com/go-sourcemap/sourcemap v2.1.3+incompatible/go.mod h1:F8jJfvm2KbVjc5NqelyYJmf/v5J0dwNLS2mL4sNA1Jg= github.com/go-viper/mapstructure/v2 v2.4.0 h1:EBsztssimR/CONLSZZ04E8qAkxNYq4Qp9LvH92wZUgs= github.com/go-viper/mapstructure/v2 v2.4.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= github.com/goccy/go-json v0.10.5 h1:Fq85nIqj+gXn/S5ahsiTlK3TmC85qgirsdTP/+DeaC4= diff --git a/parsers/js.go b/parsers/js.go new file mode 100644 index 0000000..b68ab9d --- /dev/null +++ b/parsers/js.go @@ -0,0 +1,184 @@ +package parsers + +import ( + "context" + "encoding/json" + "fmt" + "os" + "path/filepath" + + "github.com/blang/semver" + "github.com/charmbracelet/log" + "github.com/dop251/goja" + "github.com/krau/SaveAny-Bot/pkg/parser" +) + +var ( + LatestParserVersion = semver.MustParse("1.0.0") + MinimumParserVersion = semver.MustParse("1.0.0") +) + +type PluginMeta struct { + Name string `json:"name"` + Version string `json:"version"` // [TODO] 分版本解析, 但是我们现在只有 v1 所以先不写 + Description string `json:"description"` + Author string `json:"author"` +} + +type jsParser struct { + meta PluginMeta + vm *goja.Runtime + reqCh chan jsParserReq +} + +type jsParserReq struct { + method string + url string + respCh chan jsParserResp +} + +type jsParserResp struct { + item *parser.Item + ok bool + err error +} + +func (p *jsParser) CanHandle(url string) bool { + respCh := make(chan jsParserResp, 1) + p.reqCh <- jsParserReq{method: "canHandle", url: url, respCh: respCh} + resp := <-respCh + return resp.ok && resp.err == nil +} + +func (p *jsParser) Parse(url string) (*parser.Item, error) { + respCh := make(chan jsParserResp, 1) + p.reqCh <- jsParserReq{method: "parse", url: url, respCh: respCh} + resp := <-respCh + return resp.item, resp.err +} + +func newJSParser(vm *goja.Runtime, canHandleFunc, parseFunc goja.Value, metadata PluginMeta) *jsParser { + p := &jsParser{ + vm: vm, + reqCh: make(chan jsParserReq, 10), + meta: metadata, + } + + go func() { + for req := range p.reqCh { + switch req.method { + case "canHandle": + fn, _ := goja.AssertFunction(canHandleFunc) + res, err := fn(goja.Undefined(), p.vm.ToValue(req.url)) + if err != nil { + req.respCh <- jsParserResp{ok: false, err: err} + continue + } + req.respCh <- jsParserResp{ok: res.ToBoolean()} + case "parse": + fn, _ := goja.AssertFunction(parseFunc) + result, err := fn(goja.Undefined(), p.vm.ToValue(req.url)) + if err != nil { + req.respCh <- jsParserResp{err: err} + continue + } + + var item parser.Item + if exported := result.Export(); exported != nil { + data, err := json.Marshal(exported) + if err != nil { + req.respCh <- jsParserResp{err: fmt.Errorf("failed to marshal result to JSON: %w", err)} + continue + } + + if err := json.Unmarshal(data, &item); err != nil { + req.respCh <- jsParserResp{err: fmt.Errorf("failed to unmarshal JSON to Item: %w", err)} + continue + } + } else { + req.respCh <- jsParserResp{err: fmt.Errorf("JS function returned null or undefined")} + continue + } + req.respCh <- jsParserResp{item: &item} + } + } + }() + + return p +} + +func registerParser(vm *goja.Runtime) func(call goja.FunctionCall) goja.Value { + return func(call goja.FunctionCall) goja.Value { + jsObj := call.Argument(0) + if jsObj == nil || goja.IsUndefined(jsObj) || goja.IsNull(jsObj) { + panic("registerParser expects an object { canHandle, parse }") + } + + obj := jsObj.ToObject(vm) + if obj == nil { + panic("registerParser: cannot convert argument to object") + } + metaValue := obj.Get("metadata") + if metaValue == nil || goja.IsUndefined(metaValue) { + panic("parser must provide metadata") + } + var metadata PluginMeta + if exported := metaValue.Export(); exported != nil { + data, err := json.Marshal(exported) + if err != nil { + panic(fmt.Sprintf("failed to marshal metadata to JSON: %v", err)) + } + if err := json.Unmarshal(data, &metadata); err != nil { + panic(fmt.Sprintf("failed to unmarshal JSON to PluginMeta: %v", err)) + } + } else { + panic("metadata cannot be null or undefined") + } + + pluginV := semver.MustParse(metadata.Version) + if pluginV.LT(MinimumParserVersion) || pluginV.GT(LatestParserVersion) { + panic(fmt.Sprintf("parser version %s is not supported, must be between %s and %s", metadata.Version, MinimumParserVersion, LatestParserVersion)) + } + + handleFn := obj.Get("canHandle") + parseFn := obj.Get("parse") + if parseFn == nil || goja.IsUndefined(parseFn) { + panic("parser must provide a parse function") + } + + parsers = append(parsers, newJSParser(vm, handleFn, parseFn, metadata)) + return goja.Undefined() + } +} + +func LoadPlugins(ctx context.Context, dir string) error { + entries, err := os.ReadDir(dir) + if err != nil { + return err + } + + for _, e := range entries { + if filepath.Ext(e.Name()) != ".js" { + continue + } + scriptPath := filepath.Join(dir, e.Name()) + code, err := os.ReadFile(scriptPath) + if err != nil { + return err + } + + vm := goja.New() + logger := log.FromContext(ctx).WithPrefix(fmt.Sprintf("[plugin|parser]/%s", e.Name())) + vm.Set("registerParser", registerParser(vm)) + vm.Set("console", map[string]any{ + "log": func(args ...any) { + logger.Info(fmt.Sprint(args...)) + }, + }) + + if _, err := vm.RunString(string(code)); err != nil { + return fmt.Errorf("error loading plugin %s: %w", e.Name(), err) + } + } + return nil +} diff --git a/parsers/parser.go b/parsers/parser.go new file mode 100644 index 0000000..d2e40aa --- /dev/null +++ b/parsers/parser.go @@ -0,0 +1,65 @@ +package parsers + +import ( + "context" + "fmt" + "sync" + + "github.com/krau/SaveAny-Bot/parsers/twitter" + "github.com/krau/SaveAny-Bot/pkg/parser" +) + +var ( + parsers []parser.Parser + parsersMu sync.Mutex +) + +func GetParsers() []parser.Parser { + parsersMu.Lock() + defer parsersMu.Unlock() + return parsers +} + +func AddParser(p parser.Parser) { + parsersMu.Lock() + defer parsersMu.Unlock() + parsers = append(parsers, p) +} + +func init() { + AddParser(new(twitter.TwitterParser)) +} + +var ( + ErrNoParserFound = fmt.Errorf("no parser found for the given URL") +) + +func ParseWithContext(ctx context.Context, url string) (*parser.Item, error) { + ch := make(chan *parser.Item, 1) + errCh := make(chan error, 1) + + go func() { + for _, pser := range parsers { + if !pser.CanHandle(url) { + continue + } + item, err := pser.Parse(url) + if err != nil { + errCh <- err + return + } + ch <- item + return + } + errCh <- ErrNoParserFound + }() + + select { + case item := <-ch: + return item, nil + case err := <-errCh: + return nil, err + case <-ctx.Done(): + return nil, ctx.Err() + } +} diff --git a/parsers/twitter/parser.go b/parsers/twitter/parser.go new file mode 100644 index 0000000..db09d97 --- /dev/null +++ b/parsers/twitter/parser.go @@ -0,0 +1,83 @@ +package twitter + +import ( + "encoding/json" + "errors" + "fmt" + "net/http" + "path" + "regexp" + "strings" + + "github.com/krau/SaveAny-Bot/pkg/parser" +) + +type TwitterParser struct { + client http.Client +} + +const ( + FxTwitterApi = "api.fxtwitter.com" +) + +var _ parser.Parser = (*TwitterParser)(nil) + +var ( + twitterSourceURLRegexp *regexp.Regexp = regexp.MustCompile(`(?:twitter|x)\.com/([^/]+)/status/(\d+)`) +) + +func getTweetID(sourceURL string) string { + matches := twitterSourceURLRegexp.FindStringSubmatch(sourceURL) + if len(matches) < 3 { + return "" + } + return matches[2] +} + +func (p *TwitterParser) Parse(u string) (*parser.Item, error) { + id := getTweetID(u) + if id == "" { + return nil, errors.New("invalid Twitter URL") + } + apiUrl := fmt.Sprintf("https://%s/_/status/%s", FxTwitterApi, id) + resp, err := p.client.Get(apiUrl) + if err != nil { + return nil, fmt.Errorf("failed to fetch Twitter API: %w", err) + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("failed to fetch Twitter API, status code: %d", resp.StatusCode) + } + var fxResp FxTwitterApiResp + if err := json.NewDecoder(resp.Body).Decode(&fxResp); err != nil { + return nil, fmt.Errorf("failed to decode Twitter API response: %w", err) + } + if fxResp.Code != 200 { + return nil, fmt.Errorf("request twitter API error: %s", fxResp.Message) + } + if len(fxResp.Tweet.Media.All) == 0 { + return nil, errors.New("no media found in the tweet") + } + resources := make([]parser.Resource, 0, len(fxResp.Tweet.Media.All)) + for _, media := range fxResp.Tweet.Media.All { + resources = append(resources, parser.Resource{ + URL: media.URL, + Filename: path.Base(strings.Split(media.URL, "?")[0]), + }) + } + item := &parser.Item{ + Site: "Twitter", + Title: fmt.Sprintf("Tweet/%s", id), + URL: fxResp.Tweet.URL, + Description: fxResp.Tweet.Text, + Author: fxResp.Tweet.Author.Name, + Tags: make([]string, 0), + Extra: make(map[string]any), + Resources: resources, + } + return item, nil +} + +func (p *TwitterParser) CanHandle(u string) bool { + return twitterSourceURLRegexp.MatchString(u) +} diff --git a/parsers/twitter/types.go b/parsers/twitter/types.go new file mode 100644 index 0000000..255a31e --- /dev/null +++ b/parsers/twitter/types.go @@ -0,0 +1,122 @@ +package twitter + +// type AutoGenerated struct { +// Code int `json:"code"` +// Message string `json:"message"` +// Tweet struct { +// URL string `json:"url"` +// ID string `json:"id"` +// Text string `json:"text"` +// RawText struct { +// Text string `json:"text"` +// Facets []struct { +// Type string `json:"type"` +// Indices []int `json:"indices"` +// Original string `json:"original"` +// ID string `json:"id,omitempty"` +// Display string `json:"display,omitempty"` +// Replacement string `json:"replacement,omitempty"` +// } `json:"facets"` +// } `json:"raw_text"` +// Author struct { +// ID string `json:"id"` +// Name string `json:"name"` +// ScreenName string `json:"screen_name"` +// AvatarURL string `json:"avatar_url"` +// BannerURL interface{} `json:"banner_url"` +// Description string `json:"description"` +// Location string `json:"location"` +// URL string `json:"url"` +// Followers int `json:"followers"` +// Following int `json:"following"` +// Joined string `json:"joined"` +// Likes int `json:"likes"` +// MediaCount int `json:"media_count"` +// Protected bool `json:"protected"` +// Website struct { +// URL string `json:"url"` +// DisplayURL string `json:"display_url"` +// } `json:"website"` +// Tweets int `json:"tweets"` +// AvatarColor interface{} `json:"avatar_color"` +// } `json:"author"` +// Replies int `json:"replies"` +// Retweets int `json:"retweets"` +// Likes int `json:"likes"` +// Bookmarks int `json:"bookmarks"` +// CreatedAt string `json:"created_at"` +// CreatedTimestamp int `json:"created_timestamp"` +// PossiblySensitive bool `json:"possibly_sensitive"` +// Views int `json:"views"` +// IsNoteTweet bool `json:"is_note_tweet"` +// CommunityNote interface{} `json:"community_note"` +// Lang string `json:"lang"` +// ReplyingTo interface{} `json:"replying_to"` +// ReplyingToStatus interface{} `json:"replying_to_status"` +// Media struct { +// All []struct { +// URL string `json:"url"` +// ThumbnailURL string `json:"thumbnail_url"` +// Duration int `json:"duration"` +// Width int `json:"width"` +// Height int `json:"height"` +// Format string `json:"format"` +// Type string `json:"type"` +// Variants []struct { +// Bitrate int `json:"bitrate"` +// ContentType string `json:"content_type"` +// URL string `json:"url"` +// } `json:"variants"` +// } `json:"all"` +// Photos []struct { +// Type string `json:"type"` +// URL string `json:"url"` +// Width int `json:"width"` +// Height int `json:"height"` +// } `json:"photos"` +// Videos []struct { +// URL string `json:"url"` +// ThumbnailURL string `json:"thumbnail_url"` +// Duration int `json:"duration"` +// Width int `json:"width"` +// Height int `json:"height"` +// Format string `json:"format"` +// Type string `json:"type"` +// Variants []struct { +// Bitrate int `json:"bitrate"` +// ContentType string `json:"content_type"` +// URL string `json:"url"` +// } `json:"variants"` +// } `json:"videos"` +// } `json:"media"` +// Source string `json:"source"` +// TwitterCard string `json:"twitter_card"` +// Color interface{} `json:"color"` +// Provider string `json:"provider"` +// } `json:"tweet"` +// } + +type FxTwitterApiResp struct { + Code int `json:"code"` + Message string `json:"message"` + Tweet struct { + URL string `json:"url"` + ID string `json:"id"` + Text string `json:"text"` + Author struct { + ID string `json:"id"` + Name string `json:"name"` + ScreenName string `json:"screen_name"` + Protected bool `json:"protected"` + } `json:"author"` + PossiblySensitive bool `json:"possibly_sensitive"` + IsNoteTweet bool `json:"is_note_tweet"` + Lang string `json:"lang"` + Media struct { + All []struct { + URL string `json:"url"` + Type string `json:"type"` + } `json:"all"` + } `json:"media"` + } `json:"tweet"` +} diff --git a/pkg/enums/tasktype/tasktype.go b/pkg/enums/tasktype/tasktype.go index 26a3239..5621674 100644 --- a/pkg/enums/tasktype/tasktype.go +++ b/pkg/enums/tasktype/tasktype.go @@ -1,5 +1,5 @@ package tasktype //go:generate go-enum --values --names --flag --nocase -// ENUM(tgfiles,tphpics) +// ENUM(tgfiles,tphpics,parseditem) type TaskType string diff --git a/pkg/enums/tasktype/tasktype_enum.go b/pkg/enums/tasktype/tasktype_enum.go index f8c117f..09e5396 100644 --- a/pkg/enums/tasktype/tasktype_enum.go +++ b/pkg/enums/tasktype/tasktype_enum.go @@ -16,6 +16,8 @@ const ( TaskTypeTgfiles TaskType = "tgfiles" // TaskTypeTphpics is a TaskType of type tphpics. TaskTypeTphpics TaskType = "tphpics" + // TaskTypeParseditem is a TaskType of type parseditem. + TaskTypeParseditem TaskType = "parseditem" ) var ErrInvalidTaskType = fmt.Errorf("not a valid TaskType, try [%s]", strings.Join(_TaskTypeNames, ", ")) @@ -23,6 +25,7 @@ var ErrInvalidTaskType = fmt.Errorf("not a valid TaskType, try [%s]", strings.Jo var _TaskTypeNames = []string{ string(TaskTypeTgfiles), string(TaskTypeTphpics), + string(TaskTypeParseditem), } // TaskTypeNames returns a list of possible string values of TaskType. @@ -37,6 +40,7 @@ func TaskTypeValues() []TaskType { return []TaskType{ TaskTypeTgfiles, TaskTypeTphpics, + TaskTypeParseditem, } } @@ -53,8 +57,9 @@ func (x TaskType) IsValid() bool { } var _TaskTypeValue = map[string]TaskType{ - "tgfiles": TaskTypeTgfiles, - "tphpics": TaskTypeTphpics, + "tgfiles": TaskTypeTgfiles, + "tphpics": TaskTypeTphpics, + "parseditem": TaskTypeParseditem, } // ParseTaskType attempts to convert a string to a TaskType. diff --git a/pkg/parser/parser.go b/pkg/parser/parser.go new file mode 100644 index 0000000..71e3153 --- /dev/null +++ b/pkg/parser/parser.go @@ -0,0 +1,63 @@ +package parser + +import ( + "crypto/md5" + "fmt" +) + +type Parser interface { + CanHandle(url string) bool + Parse(url string) (*Item, error) +} + +// Resource is a single downloadable resource with metadata. +type Resource struct { + URL string `json:"url"` + Filename string `json:"filename"` // with ext + MimeType string `json:"mime_type"` + Extension string `json:"extension"` + Size int64 `json:"size"` // 0 when unknown + Hash map[string]string `json:"hash"` // {"md5": "...", "sha256": "..."} + Headers map[string]string `json:"headers"` // HTTP headers when downloading + Extra map[string]any `json:"extra"` +} + +type Item struct { + Site string `json:"site"` + URL string `json:"url"` // original URL of the item + Title string `json:"title"` + Author string `json:"author"` + Description string `json:"description"` + Tags []string `json:"tags"` + Resources []Resource `json:"resources"` + Extra map[string]any `json:"extra"` +} + +func (r *Resource) FileName() string { + return r.Filename +} + +func (r *Resource) FileSize() int64 { + return r.Size +} + +func (r *Resource) ID() string { + h := md5.New() + h.Write([]byte(r.URL)) + h.Write([]byte(r.Filename)) + h.Write([]byte(r.MimeType)) + h.Write([]byte(r.Extension)) + h.Write([]byte(fmt.Sprintf("%d", r.Size))) + + for k, v := range r.Hash { + h.Write([]byte(k)) + h.Write([]byte(v)) + } + + for k, v := range r.Headers { + h.Write([]byte(k)) + h.Write([]byte(v)) + } + + return fmt.Sprintf("%x", h.Sum(nil)) +} diff --git a/pkg/tcbdata/data.go b/pkg/tcbdata/data.go index a256e8c..8b427ce 100644 --- a/pkg/tcbdata/data.go +++ b/pkg/tcbdata/data.go @@ -2,6 +2,7 @@ package tcbdata import ( "github.com/krau/SaveAny-Bot/pkg/enums/tasktype" + "github.com/krau/SaveAny-Bot/pkg/parser" "github.com/krau/SaveAny-Bot/pkg/telegraph" "github.com/krau/SaveAny-Bot/pkg/tfile" ) @@ -26,6 +27,7 @@ const ( // } type Add struct { + // [TODO] maybe we should to spilit this into different types... TaskType tasktype.TaskType SelectedStorName string DirID uint @@ -37,6 +39,8 @@ type Add struct { TphPageNode *telegraph.Page TphPics []string TphDirPath string // unescaped telegraph.Page.Path + // parseditem + ParsedItem *parser.Item } type SetDefaultStorage struct { diff --git a/plugins/example_parser.js b/plugins/example_parser.js new file mode 100644 index 0000000..a7b813d --- /dev/null +++ b/plugins/example_parser.js @@ -0,0 +1,70 @@ +// 这是一个示例解析器插件, 模拟处理 YouTube 的视频链接 + +// 你可以使用 console.log 来在终端中使用 go 的 logger 打印信息 +console.log("Example parser loaded"); + +/** + * 插件元数据 + * 版本号是 saveany-bot 本体支持的插件规范版本号, 必须提供 + */ +const metadata = { + name: "Example Parser", // 插件名称 + version: "1.0.0", // 插件版本号 + description: "A parser for example links", // 插件描述 + author: "Krau", // 插件作者 +} + +/** + * canHandle 函数用于判断当前解析器能否解析给定的 URL + */ +const canHandle = function (url) { + // 这里我们简单地检查 URL 是否包含 "youtube.com/watch?v" + return url.includes("youtube.com/watch?v"); +} + + +/** + * 解析 url 并返回一个 Item 对象, 类型定义在 pkg/parser.go 中 + */ +const parse = function (url) { + var result = { + // 元信息 + site: "YouTube", + url: url, + title: "测试 YouTube 视频", + author: "某视频作者", + description: "这是一个测试视频", + tags: ["test", "youtube"], + // 资源(可下载的文件)列表 + resources: [ + { + url: "https://example.com/video1.mp4", // 文件直链 + filename: "somevideo.mp4", // 文件名 + mime_type: "video/mp4", // 文件 MIME 类型, 可选 + extension: "mp4", // 文件扩展名, 可选 + size: 100 * 1024 * 1024, // 文件大小, 单位为字节, 未知可以设置为 0 + hash: {}, // 文件哈希, 可选, 格式为 {"md5": "xxx", "sha256": "xxx"} 等 + headers: {}, // 下载文件时所需的 HTTP 头部, 可选, 例如 {"User-Agent": "Mozilla/5.0"} + extra: {} // 额外信息, 可选, 可以包含任何自定义数据 + }, + { + url: "https://example.com/picture1.png", + filename: "picture1.png", + mime_type: "image/png", + extension: "png", + size: 1 * 1024 * 1024, + hash: {}, + headers: {}, + extra: {} + } + ], + extra: {} + }; + return result; +} + +registerParser({ + metadata, + canHandle, + parse +}); \ No newline at end of file