feat: parse url with js plugins support (#96)

* feat: WIP. add parser functionality and text message handling

* fix: use json to marshal js result

* feat: add metadata handling and version validation for jsParser

* refactor: rename parser package to parsers and restructure parser handling

* refactor: core code struct and impl parse task handle

* feat: impl parsed download

* fix: seek cache file when processing tph picture

* feat: implement parsed task handling and progress tracking

* feat: enhance task processing with concurrency control and progress tracking

* feat: add resource ID generation and improve resource processing handling

* feat: improve message formatting in parsed text and progress completion

* feat: add example js plugin

* feat: implement Twitter parser

* fix: twitter parse video json decode error

* feat: impl stream mode for parse task
This commit is contained in:
Krau
2025-08-21 23:48:17 +08:00
committed by GitHub
parent 79386bdd7d
commit 302db2fe75
47 changed files with 1348 additions and 47 deletions

2
.gitignore vendored
View File

@@ -7,4 +7,4 @@ session.*
cache.db cache.db
.vscode/ .vscode/
temp/ temp/
.hugo_build.lock .hugo_build.lock

View File

@@ -72,7 +72,9 @@ func handleAddCallback(ctx *ext.Context, update *ext.Update) error {
} }
return shortcut.CreateAndAddTGFileTaskWithEdit(ctx, userID, selectedStorage, dirPath, data.Files[0], msgID) return shortcut.CreateAndAddTGFileTaskWithEdit(ctx, userID, selectedStorage, dirPath, data.Files[0], msgID)
case tasktype.TaskTypeTphpics: case tasktype.TaskTypeTphpics:
return shortcut.CreateAndAddTphTaskWithEdit(ctx, userID, data.TphPageNode, data.TphDirPath, data.TphPics, selectedStorage, msgID) return shortcut.CreateAndAddtelegraphWithEdit(ctx, userID, data.TphPageNode, data.TphDirPath, data.TphPics, selectedStorage, msgID)
case tasktype.TaskTypeParseditem:
shortcut.CreateAndAddParsedTaskWithEdit(ctx, selectedStorage, dirPath, data.ParsedItem, msgID, userID)
default: default:
log.FromContext(ctx).Errorf("Unsupported task type: %s", data.TaskType) log.FromContext(ctx).Errorf("Unsupported task type: %s", data.TaskType)
} }

View File

@@ -0,0 +1,104 @@
// 处理任意文本消息, 用于通用地从外部源下载文件
package handlers
import (
"errors"
"github.com/celestix/gotgproto/dispatcher"
"github.com/celestix/gotgproto/ext"
"github.com/charmbracelet/log"
"github.com/gotd/td/tg"
"github.com/krau/SaveAny-Bot/client/bot/handlers/utils/msgelem"
"github.com/krau/SaveAny-Bot/client/bot/handlers/utils/shortcut"
"github.com/krau/SaveAny-Bot/parsers"
"github.com/krau/SaveAny-Bot/pkg/enums/tasktype"
"github.com/krau/SaveAny-Bot/pkg/tcbdata"
"github.com/krau/SaveAny-Bot/storage"
)
func handleTextMessage(ctx *ext.Context, u *ext.Update) error {
logger := log.FromContext(ctx)
text := u.EffectiveMessage.Text
item, err := parsers.ParseWithContext(ctx, text)
if errors.Is(err, parsers.ErrNoParserFound) {
return dispatcher.EndGroups
}
if err != nil {
logger.Error("Failed to parse text", "error", err)
ctx.Reply(u, ext.ReplyTextString("Failed to parse text: "+err.Error()), nil)
return dispatcher.EndGroups
}
logger.Debug("Parsed item from text message", "text", text, "item", item)
userID := u.GetUserChat().GetID()
markup, err := msgelem.BuildAddSelectStorageKeyboard(storage.GetUserStorages(ctx, userID), tcbdata.Add{
TaskType: tasktype.TaskTypeParseditem,
ParsedItem: item,
})
if err != nil {
logger.Errorf("Failed to build storage selection keyboard: %s", err)
ctx.Reply(u, ext.ReplyTextString("Failed to build storage selection keyboard: "+err.Error()), nil)
return dispatcher.EndGroups
}
text, entities, err := msgelem.BuildParsedTextEntity(*item)
if err != nil {
logger.Errorf("Failed to build parsed text entity: %s", err)
ctx.Reply(u, ext.ReplyTextString("Failed to build parsed text entity: "+err.Error()), nil)
return dispatcher.EndGroups
}
ctx.SendMessage(userID, &tg.MessagesSendMessageRequest{
Message: text,
ReplyMarkup: markup,
Entities: entities,
ReplyTo: &tg.InputReplyToMessage{
ReplyToMsgID: u.EffectiveMessage.ID,
ReplyToPeerID: u.GetUserChat().AsInputPeer(),
},
})
return dispatcher.EndGroups
}
func handleSilentSaveText(ctx *ext.Context, u *ext.Update) error {
logger := log.FromContext(ctx)
stor := storage.FromContext(ctx)
if stor == nil {
logger.Warn("Context storage is nil")
ctx.Reply(u, ext.ReplyTextString("未找到存储"), nil)
return dispatcher.EndGroups
}
text := u.EffectiveMessage.Text
if text == "" {
return dispatcher.EndGroups
}
item, err := parsers.ParseWithContext(ctx, text)
if errors.Is(err, parsers.ErrNoParserFound) {
return dispatcher.EndGroups
}
if err != nil {
logger.Error("Failed to parse text", "error", err)
ctx.Reply(u, ext.ReplyTextString("Failed to parse text: "+err.Error()), nil)
return dispatcher.EndGroups
}
logger.Debug("Parsed item from text message", "text", text, "item", item)
userID := u.GetUserChat().GetID()
text, entities, err := msgelem.BuildParsedTextEntity(*item)
if err != nil {
logger.Errorf("Failed to build parsed text entity: %s", err)
ctx.Reply(u, ext.ReplyTextString("Failed to build parsed text entity: "+err.Error()), nil)
return dispatcher.EndGroups
}
msg, err := ctx.SendMessage(userID, &tg.MessagesSendMessageRequest{
Message: text,
Entities: entities,
ReplyTo: &tg.InputReplyToMessage{
ReplyToMsgID: u.EffectiveMessage.ID,
ReplyToPeerID: u.GetUserChat().AsInputPeer(),
},
})
if err != nil {
logger.Errorf("Failed to send message: %s", err)
return dispatcher.EndGroups
}
return shortcut.CreateAndAddParsedTaskWithEdit(ctx, stor, "", item, msg.ID, userID)
}

View File

@@ -16,7 +16,7 @@ import (
"github.com/krau/SaveAny-Bot/common/utils/tgutil" "github.com/krau/SaveAny-Bot/common/utils/tgutil"
"github.com/krau/SaveAny-Bot/config" "github.com/krau/SaveAny-Bot/config"
"github.com/krau/SaveAny-Bot/core" "github.com/krau/SaveAny-Bot/core"
"github.com/krau/SaveAny-Bot/core/tftask" "github.com/krau/SaveAny-Bot/core/tasks/tfile"
"github.com/krau/SaveAny-Bot/database" "github.com/krau/SaveAny-Bot/database"
"github.com/krau/SaveAny-Bot/pkg/tcbdata" "github.com/krau/SaveAny-Bot/pkg/tcbdata"
"github.com/krau/SaveAny-Bot/storage" "github.com/krau/SaveAny-Bot/storage"
@@ -54,6 +54,7 @@ func Register(disp dispatcher.Dispatcher) {
} }
disp.AddHandler(handlers.NewMessage(telegraphUrlRegexFilter, handleSilentMode(handleTelegraphUrlMessage, handleSilentSaveTelegraph))) disp.AddHandler(handlers.NewMessage(telegraphUrlRegexFilter, handleSilentMode(handleTelegraphUrlMessage, handleSilentSaveTelegraph)))
disp.AddHandler(handlers.NewMessage(filters.Message.Media, handleSilentMode(handleMediaMessage, handleSilentSaveMedia))) disp.AddHandler(handlers.NewMessage(filters.Message.Media, handleSilentMode(handleMediaMessage, handleSilentSaveMedia)))
disp.AddHandler(handlers.NewMessage(filters.Message.Text, handleSilentMode(handleTextMessage, handleSilentSaveText)))
if config.Cfg.Telegram.Userbot.Enable { if config.Cfg.Telegram.Userbot.Enable {
go listenMediaMessageEvent(userclient.GetMediaMessageCh()) go listenMediaMessageEvent(userclient.GetMediaMessageCh())
@@ -122,7 +123,7 @@ func listenMediaMessageEvent(ch chan userclient.MediaMessageEvent) {
storagePath := stor.JoinStoragePath(path.Join(dirPath, file.Name())) storagePath := stor.JoinStoragePath(path.Join(dirPath, file.Name()))
injectCtx := tgutil.ExtWithContext(ctx.Context, ctx) injectCtx := tgutil.ExtWithContext(ctx.Context, ctx)
taskid := xid.New().String() taskid := xid.New().String()
task, err := tftask.NewTGFileTask(taskid, injectCtx, file, stor, storagePath, nil) task, err := tfile.NewTGFileTask(taskid, injectCtx, file, stor, storagePath, nil)
if err != nil { if err != nil {
logger.Errorf("create task failed: %s", err) logger.Errorf("create task failed: %s", err)
continue continue

View File

@@ -71,6 +71,6 @@ func handleSilentSaveTelegraph(ctx *ext.Context, update *ext.Update) error {
return err return err
} }
userID := update.GetUserChat().GetID() userID := update.GetUserChat().GetID()
return shortcut.CreateAndAddTphTaskWithEdit(ctx, userID, result.Page, result.TphDir, result.Pics, stor, msg.ID) return shortcut.CreateAndAddtelegraphWithEdit(ctx, userID, result.Page, result.TphDir, result.Pics, stor, msg.ID)
} }

View File

@@ -0,0 +1,38 @@
package msgelem
import (
"fmt"
"github.com/gotd/td/telegram/message/entity"
"github.com/gotd/td/telegram/message/styling"
"github.com/gotd/td/tg"
"github.com/krau/SaveAny-Bot/pkg/parser"
)
func BuildParsedTextEntity(item parser.Item) (string, []tg.MessageEntityClass, error) {
eb := entity.Builder{}
if err := styling.Perform(&eb,
styling.Bold(fmt.Sprintf("[%s]%s", item.Site, item.Title)),
styling.Plain("\n链接: "),
styling.Code(item.URL),
styling.Plain("\n作者: "),
styling.Code(item.Author),
styling.Plain("\n描述: "),
styling.Code(item.Description),
styling.Plain("\n文件数量: "),
styling.Code(fmt.Sprintf("%d", len(item.Resources))),
styling.Plain("\n预计总大小: "),
styling.Code(fmt.Sprintf("%.2f MB", func() float64 {
var totalSize int64
for _, res := range item.Resources {
totalSize += res.Size
}
return float64(totalSize) / 1024 / 1024
}())),
styling.Plain("\n请选择存储位置"),
); err != nil {
return "", nil, fmt.Errorf("构建消息失败: %w", err)
}
text, entities := eb.Complete()
return text, entities, nil
}

View File

@@ -24,6 +24,8 @@ func BuildAddSelectStorageKeyboard(stors []storage.Storage, adddata tcbdata.Add)
taskType = tasktype.TaskTypeTgfiles taskType = tasktype.TaskTypeTgfiles
} else if adddata.TphPageNode != nil { } else if adddata.TphPageNode != nil {
taskType = tasktype.TaskTypeTphpics taskType = tasktype.TaskTypeTphpics
} else if adddata.ParsedItem != nil {
taskType = tasktype.TaskTypeParseditem
} else { } else {
return nil, fmt.Errorf("unknown task type: %s", taskType) return nil, fmt.Errorf("unknown task type: %s", taskType)
} }
@@ -41,6 +43,8 @@ func BuildAddSelectStorageKeyboard(stors []storage.Storage, adddata tcbdata.Add)
TphPageNode: adddata.TphPageNode, TphPageNode: adddata.TphPageNode,
TphPics: adddata.TphPics, TphPics: adddata.TphPics,
TphDirPath: adddata.TphDirPath, TphDirPath: adddata.TphDirPath,
ParsedItem: adddata.ParsedItem,
} }
dataid := xid.New().String() dataid := xid.New().String()
err := cache.Set(dataid, data) err := cache.Set(dataid, data)

View File

@@ -32,8 +32,7 @@ func GetFileFromMessageWithReply(ctx *ext.Context, update *ext.Update, message *
media := message.Media media := message.Media
supported := mediautil.IsSupported(media) supported := mediautil.IsSupported(media)
if !supported { if !supported {
ctx.Reply(update, ext.ReplyTextString("不支持的消息类型"), nil) return nil, nil, dispatcher.ContinueGroups
return nil, nil, dispatcher.EndGroups
} }
replied, err = ctx.Reply(update, ext.ReplyTextString("正在获取文件信息..."), nil) replied, err = ctx.Reply(update, ext.ReplyTextString("正在获取文件信息..."), nil)

View File

@@ -0,0 +1,35 @@
package shortcut
import (
"github.com/celestix/gotgproto/dispatcher"
"github.com/celestix/gotgproto/ext"
"github.com/charmbracelet/log"
"github.com/gotd/td/tg"
"github.com/krau/SaveAny-Bot/client/bot/handlers/utils/msgelem"
"github.com/krau/SaveAny-Bot/common/utils/tgutil"
"github.com/krau/SaveAny-Bot/core"
"github.com/krau/SaveAny-Bot/core/tasks/parsed"
"github.com/krau/SaveAny-Bot/pkg/parser"
"github.com/krau/SaveAny-Bot/storage"
"github.com/rs/xid"
)
func CreateAndAddParsedTaskWithEdit(ctx *ext.Context, stor storage.Storage, dirPath string, item *parser.Item, msgID int, userID int64) error {
injectCtx := tgutil.ExtWithContext(ctx.Context, ctx)
task := parsed.NewTask(xid.New().String(), injectCtx, stor, stor.JoinStoragePath(dirPath), item, parsed.NewProgress(msgID, userID))
if err := core.AddTask(injectCtx, task); err != nil {
log.FromContext(ctx).Errorf("Failed to add task: %s", err)
ctx.EditMessage(userID, &tg.MessagesEditMessageRequest{
ID: msgID,
Message: "任务添加失败: " + err.Error(),
})
return dispatcher.EndGroups
}
text, entities := msgelem.BuildTaskAddedEntities(ctx, item.Title, core.GetLength(ctx))
ctx.EditMessage(userID, &tg.MessagesEditMessageRequest{
ID: msgID,
Message: text,
Entities: entities,
})
return dispatcher.EndGroups
}

View File

@@ -13,15 +13,15 @@ import (
"github.com/krau/SaveAny-Bot/client/bot/handlers/utils/ruleutil" "github.com/krau/SaveAny-Bot/client/bot/handlers/utils/ruleutil"
"github.com/krau/SaveAny-Bot/common/utils/tgutil" "github.com/krau/SaveAny-Bot/common/utils/tgutil"
"github.com/krau/SaveAny-Bot/core" "github.com/krau/SaveAny-Bot/core"
"github.com/krau/SaveAny-Bot/core/batchtftask" "github.com/krau/SaveAny-Bot/core/tasks/batchtfile"
"github.com/krau/SaveAny-Bot/core/tftask" tftask "github.com/krau/SaveAny-Bot/core/tasks/tfile"
"github.com/krau/SaveAny-Bot/database" "github.com/krau/SaveAny-Bot/database"
"github.com/krau/SaveAny-Bot/pkg/tfile" "github.com/krau/SaveAny-Bot/pkg/tfile"
"github.com/krau/SaveAny-Bot/storage" "github.com/krau/SaveAny-Bot/storage"
"github.com/rs/xid" "github.com/rs/xid"
) )
// 创建一个 tftask.TGFileTask 并添加到任务队列中, 以编辑消息的方式反馈结果 // 创建一个 tfile.TGFileTask 并添加到任务队列中, 以编辑消息的方式反馈结果
func CreateAndAddTGFileTaskWithEdit(ctx *ext.Context, userID int64, stor storage.Storage, dirPath string, file tfile.TGFileMessage, trackMsgID int) error { func CreateAndAddTGFileTaskWithEdit(ctx *ext.Context, userID int64, stor storage.Storage, dirPath string, file tfile.TGFileMessage, trackMsgID int) error {
logger := log.FromContext(ctx) logger := log.FromContext(ctx)
user, err := database.GetUserByChatID(ctx, userID) user, err := database.GetUserByChatID(ctx, userID)
@@ -82,7 +82,7 @@ func CreateAndAddTGFileTaskWithEdit(ctx *ext.Context, userID int64, stor storage
return dispatcher.EndGroups return dispatcher.EndGroups
} }
// 创建一个 batchtftask.BatchTGFileTask 并添加到任务队列中, 以编辑消息的方式反馈结果 // 创建一个 batchtfile.BatchTGFileTask 并添加到任务队列中, 以编辑消息的方式反馈结果
func CreateAndAddBatchTGFileTaskWithEdit(ctx *ext.Context, userID int64, stor storage.Storage, dirPath string, files []tfile.TGFileMessage, trackMsgID int) error { func CreateAndAddBatchTGFileTaskWithEdit(ctx *ext.Context, userID int64, stor storage.Storage, dirPath string, files []tfile.TGFileMessage, trackMsgID int) error {
logger := log.FromContext(ctx) logger := log.FromContext(ctx)
user, err := database.GetUserByChatID(ctx, userID) user, err := database.GetUserByChatID(ctx, userID)
@@ -110,7 +110,7 @@ func CreateAndAddBatchTGFileTaskWithEdit(ctx *ext.Context, userID int64, stor st
return storname, dirP return storname, dirP
} }
elems := make([]batchtftask.TaskElement, 0, len(files)) elems := make([]batchtfile.TaskElement, 0, len(files))
type albumFile struct { type albumFile struct {
file tfile.TGFileMessage file tfile.TGFileMessage
storage storage.Storage storage storage.Storage
@@ -132,7 +132,7 @@ func CreateAndAddBatchTGFileTaskWithEdit(ctx *ext.Context, userID int64, stor st
} }
if !dirPath.NeedNewForAlbum() { if !dirPath.NeedNewForAlbum() {
storPath := fileStor.JoinStoragePath(path.Join(dirPath.String(), file.Name())) storPath := fileStor.JoinStoragePath(path.Join(dirPath.String(), file.Name()))
elem, err := batchtftask.NewTaskElement(fileStor, storPath, file) elem, err := batchtfile.NewTaskElement(fileStor, storPath, file)
if err != nil { if err != nil {
logger.Errorf("Failed to create task element: %s", err) logger.Errorf("Failed to create task element: %s", err)
ctx.EditMessage(userID, &tg.MessagesEditMessageRequest{ ctx.EditMessage(userID, &tg.MessagesEditMessageRequest{
@@ -167,7 +167,7 @@ func CreateAndAddBatchTGFileTaskWithEdit(ctx *ext.Context, userID int64, stor st
albumStor := afiles[0].storage albumStor := afiles[0].storage
for _, af := range afiles { for _, af := range afiles {
afstorPath := af.storage.JoinStoragePath(path.Join(dirPath, albumDir, af.file.Name())) afstorPath := af.storage.JoinStoragePath(path.Join(dirPath, albumDir, af.file.Name()))
elem, err := batchtftask.NewTaskElement(albumStor, afstorPath, af.file) elem, err := batchtfile.NewTaskElement(albumStor, afstorPath, af.file)
if err != nil { if err != nil {
logger.Errorf("Failed to create task element for album file: %s", err) logger.Errorf("Failed to create task element for album file: %s", err)
ctx.EditMessage(userID, &tg.MessagesEditMessageRequest{ ctx.EditMessage(userID, &tg.MessagesEditMessageRequest{
@@ -182,7 +182,7 @@ func CreateAndAddBatchTGFileTaskWithEdit(ctx *ext.Context, userID int64, stor st
injectCtx := tgutil.ExtWithContext(ctx.Context, ctx) injectCtx := tgutil.ExtWithContext(ctx.Context, ctx)
taskid := xid.New().String() taskid := xid.New().String()
task := batchtftask.NewBatchTGFileTask(taskid, injectCtx, elems, batchtftask.NewProgressTracker(trackMsgID, userID), true) task := batchtfile.NewBatchTGFileTask(taskid, injectCtx, elems, batchtfile.NewProgressTracker(trackMsgID, userID), true)
if err := core.AddTask(injectCtx, task); err != nil { if err := core.AddTask(injectCtx, task); err != nil {
logger.Errorf("Failed to add batch task: %s", err) logger.Errorf("Failed to add batch task: %s", err)
ctx.EditMessage(userID, &tg.MessagesEditMessageRequest{ ctx.EditMessage(userID, &tg.MessagesEditMessageRequest{

View File

@@ -9,19 +9,21 @@ import (
"github.com/krau/SaveAny-Bot/common/utils/tgutil" "github.com/krau/SaveAny-Bot/common/utils/tgutil"
"github.com/krau/SaveAny-Bot/common/utils/tphutil" "github.com/krau/SaveAny-Bot/common/utils/tphutil"
"github.com/krau/SaveAny-Bot/core" "github.com/krau/SaveAny-Bot/core"
"github.com/krau/SaveAny-Bot/core/tphtask" tphtask "github.com/krau/SaveAny-Bot/core/tasks/telegraph"
"github.com/krau/SaveAny-Bot/pkg/telegraph" "github.com/krau/SaveAny-Bot/pkg/telegraph"
"github.com/krau/SaveAny-Bot/storage" "github.com/krau/SaveAny-Bot/storage"
"github.com/rs/xid" "github.com/rs/xid"
) )
func CreateAndAddTphTaskWithEdit(ctx *ext.Context, func CreateAndAddtelegraphWithEdit(
ctx *ext.Context,
userID int64, userID int64,
tphpage *telegraph.Page, tphpage *telegraph.Page,
dirPath string, // unescaped ph path for file storage dirPath string, // unescaped ph path for file storage
pics []string, pics []string,
stor storage.Storage, stor storage.Storage,
trackMsgID int) error { trackMsgID int) error {
injectCtx := tgutil.ExtWithContext(ctx.Context, ctx) injectCtx := tgutil.ExtWithContext(ctx.Context, ctx)
task := tphtask.NewTask(xid.New().String(), task := tphtask.NewTask(xid.New().String(),
injectCtx, injectCtx,

View File

@@ -19,6 +19,7 @@ import (
"github.com/krau/SaveAny-Bot/config" "github.com/krau/SaveAny-Bot/config"
"github.com/krau/SaveAny-Bot/core" "github.com/krau/SaveAny-Bot/core"
"github.com/krau/SaveAny-Bot/database" "github.com/krau/SaveAny-Bot/database"
"github.com/krau/SaveAny-Bot/parsers"
"github.com/krau/SaveAny-Bot/storage" "github.com/krau/SaveAny-Bot/storage"
"github.com/spf13/cobra" "github.com/spf13/cobra"
) )
@@ -53,6 +54,15 @@ func initAll(ctx context.Context) {
logger.Info(i18n.T(i18nk.Initing)) logger.Info(i18n.T(i18nk.Initing))
database.Init(ctx) database.Init(ctx)
storage.LoadStorages(ctx) storage.LoadStorages(ctx)
if config.Cfg.Parser.PluginEnable {
for _, dir := range config.Cfg.Parser.PluginDirs {
if err := parsers.LoadPlugins(ctx, dir); err != nil {
logger.Error("Failed to load parser plugins", "dir", dir, "error", err)
} else {
logger.Debug("Loaded parser plugins", "dir", dir)
}
}
}
if config.Cfg.Telegram.Userbot.Enable { if config.Cfg.Telegram.Userbot.Enable {
_, err := userclient.Login(ctx) _, err := userclient.Login(ctx)
if err != nil { if err != nil {

6
config/parser.go Normal file
View File

@@ -0,0 +1,6 @@
package config
type parserConfig struct {
PluginEnable bool `toml:"plugin_enable" mapstructure:"plugin_enable" json:"plugin_enable"`
PluginDirs []string `toml:"plugin_dirs" mapstructure:"plugin_dirs" json:"plugin_dirs"`
}

View File

@@ -6,7 +6,7 @@ type telegramConfig struct {
AppHash string `toml:"app_hash" mapstructure:"app_hash" json:"app_hash"` AppHash string `toml:"app_hash" mapstructure:"app_hash" json:"app_hash"`
Proxy tgProxyConfig `toml:"proxy" mapstructure:"proxy"` Proxy tgProxyConfig `toml:"proxy" mapstructure:"proxy"`
RpcRetry int `toml:"rpc_retry" mapstructure:"rpc_retry" json:"rpc_retry"` RpcRetry int `toml:"rpc_retry" mapstructure:"rpc_retry" json:"rpc_retry"`
Userbot userbotConfig `toml:"userbot" mapstructure:"userbot" json:"userbot"` // [TODO] Userbot userbotConfig `toml:"userbot" mapstructure:"userbot" json:"userbot"`
} }
type userbotConfig struct { type userbotConfig struct {

View File

@@ -28,6 +28,7 @@ type Config struct {
DB dbConfig `toml:"db" mapstructure:"db"` DB dbConfig `toml:"db" mapstructure:"db"`
Telegram telegramConfig `toml:"telegram" mapstructure:"telegram"` Telegram telegramConfig `toml:"telegram" mapstructure:"telegram"`
Storages []storage.StorageConfig `toml:"-" mapstructure:"-" json:"storages"` Storages []storage.StorageConfig `toml:"-" mapstructure:"-" json:"storages"`
Parser parserConfig `toml:"parser" mapstructure:"parser" json:"parser"`
Hook hookConfig `toml:"hook" mapstructure:"hook" json:"hook"` Hook hookConfig `toml:"hook" mapstructure:"hook" json:"hook"`
} }

View File

@@ -1,4 +1,4 @@
package batchtftask package batchtfile
import ( import (
"context" "context"
@@ -25,14 +25,19 @@ func (t *Task) Execute(ctx context.Context) error {
eg, gctx := errgroup.WithContext(ctx) eg, gctx := errgroup.WithContext(ctx)
eg.SetLimit(workers) eg.SetLimit(workers)
for _, elem := range t.Elems { for _, elem := range t.Elems {
elem := elem
eg.Go(func() error { eg.Go(func() error {
t.processingMu.RLock()
if t.processing[elem.ID] != nil { if t.processing[elem.ID] != nil {
return fmt.Errorf("element with ID %s is already being processed", elem.ID) return fmt.Errorf("element with ID %s is already being processed", elem.ID)
} }
t.processingMu.RUnlock()
t.processingMu.Lock()
t.processing[elem.ID] = &elem t.processing[elem.ID] = &elem
t.processingMu.Unlock()
defer func() { defer func() {
t.processingMu.Lock()
delete(t.processing, elem.ID) delete(t.processing, elem.ID)
t.processingMu.Unlock()
}() }()
return t.processElement(gctx, elem) return t.processElement(gctx, elem)
}) })

View File

@@ -1,4 +1,4 @@
package batchtftask package batchtfile
import ( import (
"context" "context"

View File

@@ -1,9 +1,10 @@
package batchtftask package batchtfile
import ( import (
"context" "context"
"fmt" "fmt"
"path/filepath" "path/filepath"
"sync"
"sync/atomic" "sync/atomic"
"github.com/krau/SaveAny-Bot/config" "github.com/krau/SaveAny-Bot/config"
@@ -31,7 +32,8 @@ type Task struct {
downloaded atomic.Int64 downloaded atomic.Int64
totalSize int64 totalSize int64
processing map[string]TaskElementInfo processing map[string]TaskElementInfo
failed map[string]error // errors for each element processingMu sync.RWMutex
failed map[string]error // [TODO] errors for each element
} }
func (t *Task) Type() tasktype.TaskType { func (t *Task) Type() tasktype.TaskType {
@@ -89,6 +91,7 @@ func NewBatchTGFileTask(
}(), }(),
processing: make(map[string]TaskElementInfo), processing: make(map[string]TaskElementInfo),
IgnoreErrors: ignoreErrors, IgnoreErrors: ignoreErrors,
processingMu: sync.RWMutex{},
failed: make(map[string]error), failed: make(map[string]error),
} }
return task return task

View File

@@ -1,4 +1,4 @@
package batchtftask package batchtfile
type TaskElementInfo interface { type TaskElementInfo interface {
FileName() string FileName() string

View File

@@ -1,4 +1,4 @@
package batchtftask package batchtfile
var progressUpdatesLevels = []struct { var progressUpdatesLevels = []struct {
size int64 // 文件大小阈值 size int64 // 文件大小阈值

View File

@@ -0,0 +1,139 @@
package parsed
import (
"context"
"errors"
"fmt"
"io"
"net/http"
"path"
"path/filepath"
"github.com/charmbracelet/log"
"github.com/duke-git/lancet/v2/retry"
"github.com/krau/SaveAny-Bot/common/utils/fsutil"
"github.com/krau/SaveAny-Bot/common/utils/ioutil"
"github.com/krau/SaveAny-Bot/config"
"github.com/krau/SaveAny-Bot/pkg/enums/ctxkey"
"github.com/krau/SaveAny-Bot/pkg/parser"
"golang.org/x/sync/errgroup"
)
func (t *Task) Execute(ctx context.Context) error {
logger := log.FromContext(ctx)
logger.Infof("Starting Parsed item task %s", t.item.Title)
if t.progress != nil {
t.progress.OnStart(ctx, t)
}
eg, gctx := errgroup.WithContext(ctx)
eg.SetLimit(config.Cfg.Workers)
for _, resource := range t.item.Resources {
eg.Go(func() error {
t.processingMu.RLock()
if t.processing[resource.ID()] != nil {
return fmt.Errorf("resource %s is already being processed", resource.ID())
}
t.processingMu.RUnlock()
t.processingMu.Lock()
t.processing[resource.ID()] = &resource
t.processingMu.Unlock()
defer func() {
t.processingMu.Lock()
delete(t.processing, resource.URL)
t.processingMu.Unlock()
}()
err := t.processResource(gctx, resource)
t.downloaded.Add(1)
if errors.Is(err, context.Canceled) {
logger.Debug("Resource processing canceled")
return err
}
if err != nil {
logger.Errorf("Error processing resource %s: %v", resource.URL, err)
return fmt.Errorf("failed to process resource %s: %w", resource.URL, err)
}
return nil
})
}
err := eg.Wait()
if err != nil {
logger.Errorf("Error during Parsed item task execution: %v", err)
} else {
logger.Infof("Parsed item task %s completed successfully", t.item.Title)
}
if t.progress != nil {
t.progress.OnDone(ctx, t, err)
}
return err
}
func (t *Task) processResource(ctx context.Context, resource parser.Resource) error {
logger := log.FromContext(ctx)
err := retry.Retry(func() error {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, resource.URL, nil)
if err != nil {
return err
}
if resource.Headers != nil {
for k, v := range resource.Headers {
req.Header.Set(k, v)
}
}
resp, err := t.httpClient.Do(req)
if err != nil {
return fmt.Errorf("failed to download resource %s: %w", resource.URL, err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("failed to download resource %s: %s", resource.URL, resp.Status)
}
ctx = context.WithValue(ctx, ctxkey.ContentLength, func() int64 {
if resource.Size > 0 {
return resource.Size
}
return resp.ContentLength
}())
if t.stream {
return t.Stor.Save(ctx, resp.Body, path.Join(t.StorPath, resource.Filename))
}
cacheFile, err := fsutil.CreateFile(filepath.Join(config.Cfg.Temp.BasePath,
fmt.Sprintf("resource_%s_%s", t.ID, resource.Filename)))
if err != nil {
return fmt.Errorf("failed to create cache file for resource %s: %w", resource.URL, err)
}
defer func() {
if err := cacheFile.CloseAndRemove(); err != nil {
logger.Errorf("Failed to close and remove cache file: %v", err)
}
}()
wr := ioutil.NewProgressWriter(cacheFile, func(n int) {
t.downloadedBytes.Add(int64(n))
if t.progress != nil {
t.progress.OnProgress(ctx, t)
}
})
copyResultCh := make(chan error, 1)
go func() {
_, err := io.Copy(wr, resp.Body)
copyResultCh <- err
}()
select {
case err := <-copyResultCh:
if err != nil {
return fmt.Errorf("failed to copy resource %s to cache file: %w", resource.URL, err)
}
case <-ctx.Done():
return ctx.Err()
}
_, err = cacheFile.Seek(0, 0)
if err != nil {
return fmt.Errorf("failed to seek cache file for resource %s: %w", resource.URL, err)
}
return t.Stor.Save(ctx, cacheFile, path.Join(t.StorPath, resource.Filename))
}, retry.Context(ctx), retry.RetryTimes(uint(config.Cfg.Retry)))
if ctx.Err() != nil {
return ctx.Err()
}
return err
}

View File

@@ -0,0 +1,209 @@
package parsed
import (
"context"
"errors"
"fmt"
"sync/atomic"
"time"
"github.com/charmbracelet/log"
"github.com/duke-git/lancet/v2/slice"
"github.com/gotd/td/telegram/message/entity"
"github.com/gotd/td/telegram/message/styling"
"github.com/gotd/td/tg"
"github.com/krau/SaveAny-Bot/common/utils/dlutil"
"github.com/krau/SaveAny-Bot/common/utils/tgutil"
)
var progressUpdatesLevels = []struct {
size int64 // 文件大小阈值
stepPercent int // 每多少 % 更新一次
}{
{10 << 20, 100},
{50 << 20, 50},
{200 << 20, 20},
{500 << 20, 10},
}
func shouldUpdateProgress(total, downloaded int64, lastUpdatePercent int) bool {
if total <= 0 || downloaded <= 0 {
return false
}
percent := int((downloaded * 100) / total)
if percent <= lastUpdatePercent {
return false
}
step := progressUpdatesLevels[len(progressUpdatesLevels)-1].stepPercent
for _, lvl := range progressUpdatesLevels {
if total < lvl.size {
step = lvl.stepPercent
break
}
}
return percent >= lastUpdatePercent+step
}
type ProgressTracker interface {
OnStart(ctx context.Context, info TaskInfo)
OnProgress(ctx context.Context, info TaskInfo)
OnDone(ctx context.Context, info TaskInfo, err error)
}
type Progress struct {
MessageID int
ChatID int64
start time.Time
lastUpdatePercent atomic.Int32
}
func (p *Progress) OnStart(ctx context.Context, info TaskInfo) {
logger := log.FromContext(ctx)
p.start = time.Now()
p.lastUpdatePercent.Store(0)
logger.Debugf("Parsed task progress tracking started for message %d in chat %d", p.MessageID, p.ChatID)
entityBuilder := entity.Builder{}
var entities []tg.MessageEntityClass
if err := styling.Perform(&entityBuilder,
styling.Plain(fmt.Sprintf("开始下载 %s 的资源\n总大小: ", info.Site())),
styling.Code(fmt.Sprintf("%.2f MB (%d个资源)", float64(info.TotalBytes())/(1024*1024), info.TotalResources())),
); err != nil {
log.FromContext(ctx).Errorf("Failed to build entities: %s", err)
return
}
text, entities := entityBuilder.Complete()
req := &tg.MessagesEditMessageRequest{
ID: p.MessageID,
}
req.SetMessage(text)
req.SetEntities(entities)
req.SetReplyMarkup(&tg.ReplyInlineMarkup{
Rows: []tg.KeyboardButtonRow{
{
Buttons: []tg.KeyboardButtonClass{
tgutil.BuildCancelButton(info.TaskID()),
},
},
}},
)
ext := tgutil.ExtFromContext(ctx)
if ext != nil {
ext.EditMessage(p.ChatID, req)
return
}
}
func (p *Progress) OnProgress(ctx context.Context, info TaskInfo) {
if !shouldUpdateProgress(info.TotalBytes(), info.DownloadedBytes(), int(p.lastUpdatePercent.Load())) {
return
}
percent := int((info.DownloadedBytes() * 100) / info.TotalBytes())
if p.lastUpdatePercent.Load() == int32(percent) {
return
}
p.lastUpdatePercent.Store(int32(percent))
log.FromContext(ctx).Debugf("Progress update: %s, %d/%d", info.TaskID(), info.DownloadedBytes(), info.TotalBytes())
entityBuilder := entity.Builder{}
var entities []tg.MessageEntityClass
if err := styling.Perform(&entityBuilder,
styling.Plain("正在下载\n总大小: "),
styling.Code(fmt.Sprintf("%.2f MB (%d个文件)", float64(info.TotalBytes())/(1024*1024), info.TotalResources())),
styling.Plain("\n正在处理:\n"),
func() styling.StyledTextOption {
var lines []string
for _, elem := range info.Processing() {
lines = append(lines, fmt.Sprintf(" - %s (%.2f MB)", elem.FileName(), float64(elem.FileSize())/(1024*1024)))
}
if len(lines) == 0 {
lines = append(lines, " - 无")
}
return styling.Plain(slice.Join(lines, "\n"))
}(),
styling.Plain("\n平均速度: "),
styling.Bold(fmt.Sprintf("%.2f MB/s", dlutil.GetSpeed(info.DownloadedBytes(), p.start)/(1024*1024))),
styling.Plain("\n当前进度: "),
styling.Bold(fmt.Sprintf("%.2f%%", float64(info.DownloadedBytes())/float64(info.TotalBytes())*100)),
); err != nil {
log.FromContext(ctx).Errorf("Failed to build entities: %s", err)
return
}
text, entities := entityBuilder.Complete()
req := &tg.MessagesEditMessageRequest{
ID: p.MessageID,
}
req.SetMessage(text)
req.SetEntities(entities)
req.SetReplyMarkup(&tg.ReplyInlineMarkup{
Rows: []tg.KeyboardButtonRow{
{
Buttons: []tg.KeyboardButtonClass{
tgutil.BuildCancelButton(info.TaskID()),
},
},
}},
)
ext := tgutil.ExtFromContext(ctx)
if ext != nil {
ext.EditMessage(p.ChatID, req)
return
}
}
func (p *Progress) OnDone(ctx context.Context, info TaskInfo, err error) {
logger := log.FromContext(ctx)
if err != nil {
if errors.Is(err, context.Canceled) {
logger.Infof("Parsed task %s was canceled", info.TaskID())
ext := tgutil.ExtFromContext(ctx)
if ext != nil {
ext.EditMessage(p.ChatID, &tg.MessagesEditMessageRequest{
ID: p.MessageID,
Message: fmt.Sprintf("处理已取消: %s", info.TaskID()),
})
}
} else {
logger.Errorf("Parsed task %s failed: %s", info.TaskID(), err)
ext := tgutil.ExtFromContext(ctx)
if ext != nil {
ext.EditMessage(p.ChatID, &tg.MessagesEditMessageRequest{
ID: p.MessageID,
Message: fmt.Sprintf("处理失败: %s", err.Error()),
})
}
}
return
}
logger.Infof("Parsed task %s completed successfully", info.TaskID())
entityBuilder := entity.Builder{}
if err := styling.Perform(&entityBuilder,
styling.Plain("处理完成, 资源数量: "),
styling.Code(fmt.Sprintf("%d", info.TotalResources())),
styling.Plain("\n保存路径: "),
styling.Code(fmt.Sprintf("[%s]:%s", info.StorageName(), info.StoragePath())),
); err != nil {
logger.Errorf("Failed to build entities: %s", err)
return
}
text, entities := entityBuilder.Complete()
req := &tg.MessagesEditMessageRequest{
ID: p.MessageID,
}
req.SetMessage(text)
req.SetEntities(entities)
ext := tgutil.ExtFromContext(ctx)
if ext != nil {
ext.EditMessage(p.ChatID, req)
}
}
func NewProgress(messageID int, chatID int64) *Progress {
return &Progress{
MessageID: messageID,
ChatID: chatID,
}
}

84
core/tasks/parsed/task.go Normal file
View File

@@ -0,0 +1,84 @@
package parsed
import (
"context"
"net/http"
"sync"
"sync/atomic"
"github.com/krau/SaveAny-Bot/config"
"github.com/krau/SaveAny-Bot/pkg/enums/tasktype"
"github.com/krau/SaveAny-Bot/pkg/parser"
"github.com/krau/SaveAny-Bot/storage"
)
type Task struct {
ID string
Ctx context.Context
Stor storage.Storage
StorPath string
item *parser.Item
httpClient *http.Client
progress ProgressTracker
stream bool
totalResources int64
downloaded atomic.Int64 // downloaded resources count
totalBytes int64 // total bytes to download
downloadedBytes atomic.Int64 // downloaded bytes count
processing map[string]ResourceInfo
processingMu sync.RWMutex
failed map[string]error // [TODO] errors for each resource
}
func (t *Task) Type() tasktype.TaskType {
return tasktype.TaskTypeParseditem
}
func (t *Task) TaskID() string {
return t.ID
}
func NewTask(
id string,
ctx context.Context,
stor storage.Storage,
storPath string,
item *parser.Item,
progressTracker ProgressTracker,
) *Task {
client := &http.Client{
Transport: &http.Transport{
// [TODO] configure it via config
Proxy: http.ProxyFromEnvironment,
},
}
_, ok := stor.(storage.StorageCannotStream)
stream := config.Cfg.Stream && !ok
return &Task{
ID: id,
Ctx: ctx,
Stor: stor,
StorPath: storPath,
item: item,
totalResources: int64(len(item.Resources)),
downloaded: atomic.Int64{},
totalBytes: func() int64 {
var total int64
for _, res := range item.Resources {
if res.Size < 0 {
continue // skip resources with unknown size
}
total += res.Size
}
return total
}(),
stream: stream,
downloadedBytes: atomic.Int64{},
httpClient: client,
progress: progressTracker,
processing: make(map[string]ResourceInfo),
processingMu: sync.RWMutex{},
failed: make(map[string]error),
}
}

View File

@@ -0,0 +1,51 @@
package parsed
type TaskInfo interface {
TaskID() string
Site() string
TotalResources() int64
Downloaded() int64
TotalBytes() int64
DownloadedBytes() int64
Processing() map[string]ResourceInfo
StorageName() string
StoragePath() string
}
func (t *Task) StoragePath() string {
return t.StorPath
}
func (t *Task) TotalResources() int64 {
return t.totalResources
}
func (t *Task) Downloaded() int64 {
return t.downloaded.Load()
}
func (t *Task) StorageName() string {
return t.Stor.Name()
}
func (t *Task) Site() string {
return t.item.Site
}
func (t *Task) TotalBytes() int64 {
return t.totalBytes
}
func (t *Task) DownloadedBytes() int64 {
return t.downloadedBytes.Load()
}
func (t *Task) Processing() map[string]ResourceInfo {
t.processingMu.RLock()
defer t.processingMu.RUnlock()
return t.processing
}
type ResourceInfo interface {
FileName() string
FileSize() int64
}

View File

@@ -1,4 +1,4 @@
package tphtask package telegraph
import ( import (
"context" "context"
@@ -22,8 +22,6 @@ func (t *Task) Execute(ctx context.Context) error {
eg, gctx := errgroup.WithContext(ctx) eg, gctx := errgroup.WithContext(ctx)
eg.SetLimit(config.Cfg.Workers) eg.SetLimit(config.Cfg.Workers)
for i, pic := range t.Pics { for i, pic := range t.Pics {
pic := pic
i := i
eg.Go(func() error { eg.Go(func() error {
err := t.processPic(gctx, pic, i) err := t.processPic(gctx, pic, i)
if err != nil { if err != nil {
@@ -79,6 +77,11 @@ func (t *Task) processPic(ctx context.Context, picUrl string, index int) error {
lastErr = fmt.Errorf("failed to copy picture %s to cache file: %w", filename, lastErr) lastErr = fmt.Errorf("failed to copy picture %s to cache file: %w", filename, lastErr)
return lastErr return lastErr
} }
_, err = cacheFile.Seek(0, 0)
if err != nil {
lastErr = fmt.Errorf("failed to seek cache file for picture %s: %w", filename, err)
return lastErr
}
lastErr = t.Stor.Save(ctx, cacheFile, path.Join(t.StorPath, filename)) lastErr = t.Stor.Save(ctx, cacheFile, path.Join(t.StorPath, filename))
} else { } else {
lastErr = t.Stor.Save(ctx, body, path.Join(t.StorPath, filename)) lastErr = t.Stor.Save(ctx, body, path.Join(t.StorPath, filename))

View File

@@ -1,4 +1,4 @@
package tphtask package telegraph
import ( import (
"context" "context"

View File

@@ -1,4 +1,4 @@
package tphtask package telegraph
import ( import (
"context" "context"
@@ -39,7 +39,7 @@ func NewTask(
progress ProgressTracker, progress ProgressTracker,
) *Task { ) *Task {
_, cannotStream := stor.(storage.StorageCannotStream) _, cannotStream := stor.(storage.StorageCannotStream)
tphtask := &Task{ telegraph := &Task{
ID: id, ID: id,
Ctx: ctx, Ctx: ctx,
PhPath: phPath, PhPath: phPath,
@@ -52,5 +52,5 @@ func NewTask(
totalpics: len(pics), totalpics: len(pics),
downloaded: atomic.Int64{}, downloaded: atomic.Int64{},
} }
return tphtask return telegraph
} }

View File

@@ -1,4 +1,4 @@
package tphtask package telegraph
type TaskInfo interface { type TaskInfo interface {
TaskID() string TaskID() string

View File

@@ -1,4 +1,4 @@
package tphtask package telegraph
func shouldUpdateProgress(downloaded int64, total int64) bool { func shouldUpdateProgress(downloaded int64, total int64) bool {
if total <= 0 || downloaded <= 0 { if total <= 0 || downloaded <= 0 {

View File

@@ -1,4 +1,4 @@
package tftask package tfile
import ( import (
"context" "context"

View File

@@ -1,4 +1,4 @@
package tftask package tfile
import ( import (
"context" "context"

View File

@@ -1,4 +1,4 @@
package tftask package tfile
import ( import (
"context" "context"

View File

@@ -1,4 +1,4 @@
package tftask package tfile
type TaskInfo interface { type TaskInfo interface {
TaskID() string TaskID() string

View File

@@ -1,4 +1,4 @@
package tftask package tfile
import ( import (
"context" "context"
@@ -40,7 +40,7 @@ func NewTGFileTask(
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to get absolute path for cache: %w", err) return nil, fmt.Errorf("failed to get absolute path for cache: %w", err)
} }
tftask := &Task{ tfile := &Task{
ID: id, ID: id,
Ctx: ctx, Ctx: ctx,
File: file, File: file,
@@ -49,7 +49,7 @@ func NewTGFileTask(
Progress: progress, Progress: progress,
localPath: cachePath, localPath: cachePath,
} }
return tftask, nil return tfile, nil
} }
tfileTask := &Task{ tfileTask := &Task{
ID: id, ID: id,

View File

@@ -1,4 +1,4 @@
package tftask package tfile
var progressUpdatesLevels = []struct { var progressUpdatesLevels = []struct {
size int64 // 文件大小阈值 size int64 // 文件大小阈值

View File

@@ -1,4 +1,4 @@
package tftask package tfile
import ( import (
"context" "context"

5
go.mod
View File

@@ -10,7 +10,6 @@ require (
github.com/charmbracelet/log v0.4.2 github.com/charmbracelet/log v0.4.2
github.com/fatih/color v1.18.0 github.com/fatih/color v1.18.0
github.com/gabriel-vasile/mimetype v1.4.9 github.com/gabriel-vasile/mimetype v1.4.9
github.com/go-faster/errors v0.7.1
github.com/gotd/contrib v0.21.0 github.com/gotd/contrib v0.21.0
github.com/gotd/td v0.129.0 github.com/gotd/td v0.129.0
github.com/minio/minio-go/v7 v7.0.95 github.com/minio/minio-go/v7 v7.0.95
@@ -42,15 +41,18 @@ require (
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect
github.com/ghodss/yaml v1.0.0 // indirect github.com/ghodss/yaml v1.0.0 // indirect
github.com/glebarez/go-sqlite v1.22.0 // indirect github.com/glebarez/go-sqlite v1.22.0 // indirect
github.com/go-faster/errors v0.7.1 // indirect
github.com/go-faster/jx v1.1.0 // indirect github.com/go-faster/jx v1.1.0 // indirect
github.com/go-faster/xor v1.0.0 // indirect github.com/go-faster/xor v1.0.0 // indirect
github.com/go-faster/yaml v0.4.6 // indirect github.com/go-faster/yaml v0.4.6 // indirect
github.com/go-ini/ini v1.67.0 // indirect github.com/go-ini/ini v1.67.0 // indirect
github.com/go-logfmt/logfmt v0.6.0 // indirect github.com/go-logfmt/logfmt v0.6.0 // indirect
github.com/go-sourcemap/sourcemap v2.1.3+incompatible // indirect
github.com/go-viper/mapstructure/v2 v2.4.0 // indirect github.com/go-viper/mapstructure/v2 v2.4.0 // indirect
github.com/goccy/go-json v0.10.5 // indirect github.com/goccy/go-json v0.10.5 // indirect
github.com/google/go-github/v30 v30.1.0 // indirect github.com/google/go-github/v30 v30.1.0 // indirect
github.com/google/go-querystring v1.1.0 // indirect github.com/google/go-querystring v1.1.0 // indirect
github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e // indirect
github.com/google/uuid v1.6.0 // indirect github.com/google/uuid v1.6.0 // indirect
github.com/gotd/ige v0.2.2 // indirect github.com/gotd/ige v0.2.2 // indirect
github.com/gotd/neo v0.1.5 // indirect github.com/gotd/neo v0.1.5 // indirect
@@ -102,6 +104,7 @@ require (
require ( require (
github.com/dgraph-io/ristretto/v2 v2.2.0 github.com/dgraph-io/ristretto/v2 v2.2.0
github.com/dop251/goja v0.0.0-20250630131328-58d95d85e994
github.com/duke-git/lancet/v2 v2.3.7 github.com/duke-git/lancet/v2 v2.3.7
github.com/fsnotify/fsnotify v1.9.0 // indirect github.com/fsnotify/fsnotify v1.9.0 // indirect
github.com/glebarez/sqlite v1.11.0 // indirect github.com/glebarez/sqlite v1.11.0 // indirect

6
go.sum
View File

@@ -4,6 +4,8 @@ github.com/BurntSushi/toml v1.5.0 h1:W5quZX/G/csjUnuI8SUYlsHs9M38FC7znL0lIO+DvMg
github.com/BurntSushi/toml v1.5.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho= github.com/BurntSushi/toml v1.5.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho=
github.com/MakeNowJust/heredoc v1.0.0 h1:cXCdzVdstXyiTqTvfqk9SDHpKNjxuom+DOlyEeQ4pzQ= github.com/MakeNowJust/heredoc v1.0.0 h1:cXCdzVdstXyiTqTvfqk9SDHpKNjxuom+DOlyEeQ4pzQ=
github.com/MakeNowJust/heredoc v1.0.0/go.mod h1:mG5amYoWBHf8vpLOuehzbGGw0EHxpZZ6lCpQ4fNJ8LE= github.com/MakeNowJust/heredoc v1.0.0/go.mod h1:mG5amYoWBHf8vpLOuehzbGGw0EHxpZZ6lCpQ4fNJ8LE=
github.com/Masterminds/semver/v3 v3.2.1 h1:RN9w6+7QoMeJVGyfmbcgs28Br8cvmnucEXnY0rYXWg0=
github.com/Masterminds/semver/v3 v3.2.1/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ=
github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z4= github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z4=
github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI= github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI=
github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k= github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k=
@@ -61,6 +63,8 @@ github.com/dgryski/go-farm v0.0.0-20240924180020-3414d57e47da h1:aIftn67I1fkbMa5
github.com/dgryski/go-farm v0.0.0-20240924180020-3414d57e47da/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw= github.com/dgryski/go-farm v0.0.0-20240924180020-3414d57e47da/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw=
github.com/dlclark/regexp2 v1.11.5 h1:Q/sSnsKerHeCkc/jSTNq1oCm7KiVgUMZRDUoRu0JQZQ= github.com/dlclark/regexp2 v1.11.5 h1:Q/sSnsKerHeCkc/jSTNq1oCm7KiVgUMZRDUoRu0JQZQ=
github.com/dlclark/regexp2 v1.11.5/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= github.com/dlclark/regexp2 v1.11.5/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
github.com/dop251/goja v0.0.0-20250630131328-58d95d85e994 h1:aQYWswi+hRL2zJqGacdCZx32XjKYV8ApXFGntw79XAM=
github.com/dop251/goja v0.0.0-20250630131328-58d95d85e994/go.mod h1:MxLav0peU43GgvwVgNbLAj1s/bSGboKkhuULvq/7hx4=
github.com/duke-git/lancet/v2 v2.3.7 h1:nnNBA9KyoqwbPm4nFmEFVIbXeAmpqf6IDCH45+HHHNs= github.com/duke-git/lancet/v2 v2.3.7 h1:nnNBA9KyoqwbPm4nFmEFVIbXeAmpqf6IDCH45+HHHNs=
github.com/duke-git/lancet/v2 v2.3.7/go.mod h1:zGa2R4xswg6EG9I6WnyubDbFO/+A/RROxIbXcwryTsc= github.com/duke-git/lancet/v2 v2.3.7/go.mod h1:zGa2R4xswg6EG9I6WnyubDbFO/+A/RROxIbXcwryTsc=
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
@@ -99,6 +103,8 @@ github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/go-sourcemap/sourcemap v2.1.3+incompatible h1:W1iEw64niKVGogNgBN3ePyLFfuisuzeidWPMPWmECqU=
github.com/go-sourcemap/sourcemap v2.1.3+incompatible/go.mod h1:F8jJfvm2KbVjc5NqelyYJmf/v5J0dwNLS2mL4sNA1Jg=
github.com/go-viper/mapstructure/v2 v2.4.0 h1:EBsztssimR/CONLSZZ04E8qAkxNYq4Qp9LvH92wZUgs= github.com/go-viper/mapstructure/v2 v2.4.0 h1:EBsztssimR/CONLSZZ04E8qAkxNYq4Qp9LvH92wZUgs=
github.com/go-viper/mapstructure/v2 v2.4.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= github.com/go-viper/mapstructure/v2 v2.4.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM=
github.com/goccy/go-json v0.10.5 h1:Fq85nIqj+gXn/S5ahsiTlK3TmC85qgirsdTP/+DeaC4= github.com/goccy/go-json v0.10.5 h1:Fq85nIqj+gXn/S5ahsiTlK3TmC85qgirsdTP/+DeaC4=

184
parsers/js.go Normal file
View File

@@ -0,0 +1,184 @@
package parsers
import (
"context"
"encoding/json"
"fmt"
"os"
"path/filepath"
"github.com/blang/semver"
"github.com/charmbracelet/log"
"github.com/dop251/goja"
"github.com/krau/SaveAny-Bot/pkg/parser"
)
var (
LatestParserVersion = semver.MustParse("1.0.0")
MinimumParserVersion = semver.MustParse("1.0.0")
)
type PluginMeta struct {
Name string `json:"name"`
Version string `json:"version"` // [TODO] 分版本解析, 但是我们现在只有 v1 所以先不写
Description string `json:"description"`
Author string `json:"author"`
}
type jsParser struct {
meta PluginMeta
vm *goja.Runtime
reqCh chan jsParserReq
}
type jsParserReq struct {
method string
url string
respCh chan jsParserResp
}
type jsParserResp struct {
item *parser.Item
ok bool
err error
}
func (p *jsParser) CanHandle(url string) bool {
respCh := make(chan jsParserResp, 1)
p.reqCh <- jsParserReq{method: "canHandle", url: url, respCh: respCh}
resp := <-respCh
return resp.ok && resp.err == nil
}
func (p *jsParser) Parse(url string) (*parser.Item, error) {
respCh := make(chan jsParserResp, 1)
p.reqCh <- jsParserReq{method: "parse", url: url, respCh: respCh}
resp := <-respCh
return resp.item, resp.err
}
func newJSParser(vm *goja.Runtime, canHandleFunc, parseFunc goja.Value, metadata PluginMeta) *jsParser {
p := &jsParser{
vm: vm,
reqCh: make(chan jsParserReq, 10),
meta: metadata,
}
go func() {
for req := range p.reqCh {
switch req.method {
case "canHandle":
fn, _ := goja.AssertFunction(canHandleFunc)
res, err := fn(goja.Undefined(), p.vm.ToValue(req.url))
if err != nil {
req.respCh <- jsParserResp{ok: false, err: err}
continue
}
req.respCh <- jsParserResp{ok: res.ToBoolean()}
case "parse":
fn, _ := goja.AssertFunction(parseFunc)
result, err := fn(goja.Undefined(), p.vm.ToValue(req.url))
if err != nil {
req.respCh <- jsParserResp{err: err}
continue
}
var item parser.Item
if exported := result.Export(); exported != nil {
data, err := json.Marshal(exported)
if err != nil {
req.respCh <- jsParserResp{err: fmt.Errorf("failed to marshal result to JSON: %w", err)}
continue
}
if err := json.Unmarshal(data, &item); err != nil {
req.respCh <- jsParserResp{err: fmt.Errorf("failed to unmarshal JSON to Item: %w", err)}
continue
}
} else {
req.respCh <- jsParserResp{err: fmt.Errorf("JS function returned null or undefined")}
continue
}
req.respCh <- jsParserResp{item: &item}
}
}
}()
return p
}
func registerParser(vm *goja.Runtime) func(call goja.FunctionCall) goja.Value {
return func(call goja.FunctionCall) goja.Value {
jsObj := call.Argument(0)
if jsObj == nil || goja.IsUndefined(jsObj) || goja.IsNull(jsObj) {
panic("registerParser expects an object { canHandle, parse }")
}
obj := jsObj.ToObject(vm)
if obj == nil {
panic("registerParser: cannot convert argument to object")
}
metaValue := obj.Get("metadata")
if metaValue == nil || goja.IsUndefined(metaValue) {
panic("parser must provide metadata")
}
var metadata PluginMeta
if exported := metaValue.Export(); exported != nil {
data, err := json.Marshal(exported)
if err != nil {
panic(fmt.Sprintf("failed to marshal metadata to JSON: %v", err))
}
if err := json.Unmarshal(data, &metadata); err != nil {
panic(fmt.Sprintf("failed to unmarshal JSON to PluginMeta: %v", err))
}
} else {
panic("metadata cannot be null or undefined")
}
pluginV := semver.MustParse(metadata.Version)
if pluginV.LT(MinimumParserVersion) || pluginV.GT(LatestParserVersion) {
panic(fmt.Sprintf("parser version %s is not supported, must be between %s and %s", metadata.Version, MinimumParserVersion, LatestParserVersion))
}
handleFn := obj.Get("canHandle")
parseFn := obj.Get("parse")
if parseFn == nil || goja.IsUndefined(parseFn) {
panic("parser must provide a parse function")
}
parsers = append(parsers, newJSParser(vm, handleFn, parseFn, metadata))
return goja.Undefined()
}
}
func LoadPlugins(ctx context.Context, dir string) error {
entries, err := os.ReadDir(dir)
if err != nil {
return err
}
for _, e := range entries {
if filepath.Ext(e.Name()) != ".js" {
continue
}
scriptPath := filepath.Join(dir, e.Name())
code, err := os.ReadFile(scriptPath)
if err != nil {
return err
}
vm := goja.New()
logger := log.FromContext(ctx).WithPrefix(fmt.Sprintf("[plugin|parser]/%s", e.Name()))
vm.Set("registerParser", registerParser(vm))
vm.Set("console", map[string]any{
"log": func(args ...any) {
logger.Info(fmt.Sprint(args...))
},
})
if _, err := vm.RunString(string(code)); err != nil {
return fmt.Errorf("error loading plugin %s: %w", e.Name(), err)
}
}
return nil
}

65
parsers/parser.go Normal file
View File

@@ -0,0 +1,65 @@
package parsers
import (
"context"
"fmt"
"sync"
"github.com/krau/SaveAny-Bot/parsers/twitter"
"github.com/krau/SaveAny-Bot/pkg/parser"
)
var (
parsers []parser.Parser
parsersMu sync.Mutex
)
func GetParsers() []parser.Parser {
parsersMu.Lock()
defer parsersMu.Unlock()
return parsers
}
func AddParser(p parser.Parser) {
parsersMu.Lock()
defer parsersMu.Unlock()
parsers = append(parsers, p)
}
func init() {
AddParser(new(twitter.TwitterParser))
}
var (
ErrNoParserFound = fmt.Errorf("no parser found for the given URL")
)
func ParseWithContext(ctx context.Context, url string) (*parser.Item, error) {
ch := make(chan *parser.Item, 1)
errCh := make(chan error, 1)
go func() {
for _, pser := range parsers {
if !pser.CanHandle(url) {
continue
}
item, err := pser.Parse(url)
if err != nil {
errCh <- err
return
}
ch <- item
return
}
errCh <- ErrNoParserFound
}()
select {
case item := <-ch:
return item, nil
case err := <-errCh:
return nil, err
case <-ctx.Done():
return nil, ctx.Err()
}
}

83
parsers/twitter/parser.go Normal file
View File

@@ -0,0 +1,83 @@
package twitter
import (
"encoding/json"
"errors"
"fmt"
"net/http"
"path"
"regexp"
"strings"
"github.com/krau/SaveAny-Bot/pkg/parser"
)
type TwitterParser struct {
client http.Client
}
const (
FxTwitterApi = "api.fxtwitter.com"
)
var _ parser.Parser = (*TwitterParser)(nil)
var (
twitterSourceURLRegexp *regexp.Regexp = regexp.MustCompile(`(?:twitter|x)\.com/([^/]+)/status/(\d+)`)
)
func getTweetID(sourceURL string) string {
matches := twitterSourceURLRegexp.FindStringSubmatch(sourceURL)
if len(matches) < 3 {
return ""
}
return matches[2]
}
func (p *TwitterParser) Parse(u string) (*parser.Item, error) {
id := getTweetID(u)
if id == "" {
return nil, errors.New("invalid Twitter URL")
}
apiUrl := fmt.Sprintf("https://%s/_/status/%s", FxTwitterApi, id)
resp, err := p.client.Get(apiUrl)
if err != nil {
return nil, fmt.Errorf("failed to fetch Twitter API: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("failed to fetch Twitter API, status code: %d", resp.StatusCode)
}
var fxResp FxTwitterApiResp
if err := json.NewDecoder(resp.Body).Decode(&fxResp); err != nil {
return nil, fmt.Errorf("failed to decode Twitter API response: %w", err)
}
if fxResp.Code != 200 {
return nil, fmt.Errorf("request twitter API error: %s", fxResp.Message)
}
if len(fxResp.Tweet.Media.All) == 0 {
return nil, errors.New("no media found in the tweet")
}
resources := make([]parser.Resource, 0, len(fxResp.Tweet.Media.All))
for _, media := range fxResp.Tweet.Media.All {
resources = append(resources, parser.Resource{
URL: media.URL,
Filename: path.Base(strings.Split(media.URL, "?")[0]),
})
}
item := &parser.Item{
Site: "Twitter",
Title: fmt.Sprintf("Tweet/%s", id),
URL: fxResp.Tweet.URL,
Description: fxResp.Tweet.Text,
Author: fxResp.Tweet.Author.Name,
Tags: make([]string, 0),
Extra: make(map[string]any),
Resources: resources,
}
return item, nil
}
func (p *TwitterParser) CanHandle(u string) bool {
return twitterSourceURLRegexp.MatchString(u)
}

122
parsers/twitter/types.go Normal file
View File

@@ -0,0 +1,122 @@
package twitter
// type AutoGenerated struct {
// Code int `json:"code"`
// Message string `json:"message"`
// Tweet struct {
// URL string `json:"url"`
// ID string `json:"id"`
// Text string `json:"text"`
// RawText struct {
// Text string `json:"text"`
// Facets []struct {
// Type string `json:"type"`
// Indices []int `json:"indices"`
// Original string `json:"original"`
// ID string `json:"id,omitempty"`
// Display string `json:"display,omitempty"`
// Replacement string `json:"replacement,omitempty"`
// } `json:"facets"`
// } `json:"raw_text"`
// Author struct {
// ID string `json:"id"`
// Name string `json:"name"`
// ScreenName string `json:"screen_name"`
// AvatarURL string `json:"avatar_url"`
// BannerURL interface{} `json:"banner_url"`
// Description string `json:"description"`
// Location string `json:"location"`
// URL string `json:"url"`
// Followers int `json:"followers"`
// Following int `json:"following"`
// Joined string `json:"joined"`
// Likes int `json:"likes"`
// MediaCount int `json:"media_count"`
// Protected bool `json:"protected"`
// Website struct {
// URL string `json:"url"`
// DisplayURL string `json:"display_url"`
// } `json:"website"`
// Tweets int `json:"tweets"`
// AvatarColor interface{} `json:"avatar_color"`
// } `json:"author"`
// Replies int `json:"replies"`
// Retweets int `json:"retweets"`
// Likes int `json:"likes"`
// Bookmarks int `json:"bookmarks"`
// CreatedAt string `json:"created_at"`
// CreatedTimestamp int `json:"created_timestamp"`
// PossiblySensitive bool `json:"possibly_sensitive"`
// Views int `json:"views"`
// IsNoteTweet bool `json:"is_note_tweet"`
// CommunityNote interface{} `json:"community_note"`
// Lang string `json:"lang"`
// ReplyingTo interface{} `json:"replying_to"`
// ReplyingToStatus interface{} `json:"replying_to_status"`
// Media struct {
// All []struct {
// URL string `json:"url"`
// ThumbnailURL string `json:"thumbnail_url"`
// Duration int `json:"duration"`
// Width int `json:"width"`
// Height int `json:"height"`
// Format string `json:"format"`
// Type string `json:"type"`
// Variants []struct {
// Bitrate int `json:"bitrate"`
// ContentType string `json:"content_type"`
// URL string `json:"url"`
// } `json:"variants"`
// } `json:"all"`
// Photos []struct {
// Type string `json:"type"`
// URL string `json:"url"`
// Width int `json:"width"`
// Height int `json:"height"`
// } `json:"photos"`
// Videos []struct {
// URL string `json:"url"`
// ThumbnailURL string `json:"thumbnail_url"`
// Duration int `json:"duration"`
// Width int `json:"width"`
// Height int `json:"height"`
// Format string `json:"format"`
// Type string `json:"type"`
// Variants []struct {
// Bitrate int `json:"bitrate"`
// ContentType string `json:"content_type"`
// URL string `json:"url"`
// } `json:"variants"`
// } `json:"videos"`
// } `json:"media"`
// Source string `json:"source"`
// TwitterCard string `json:"twitter_card"`
// Color interface{} `json:"color"`
// Provider string `json:"provider"`
// } `json:"tweet"`
// }
type FxTwitterApiResp struct {
Code int `json:"code"`
Message string `json:"message"`
Tweet struct {
URL string `json:"url"`
ID string `json:"id"`
Text string `json:"text"`
Author struct {
ID string `json:"id"`
Name string `json:"name"`
ScreenName string `json:"screen_name"`
Protected bool `json:"protected"`
} `json:"author"`
PossiblySensitive bool `json:"possibly_sensitive"`
IsNoteTweet bool `json:"is_note_tweet"`
Lang string `json:"lang"`
Media struct {
All []struct {
URL string `json:"url"`
Type string `json:"type"`
} `json:"all"`
} `json:"media"`
} `json:"tweet"`
}

View File

@@ -1,5 +1,5 @@
package tasktype package tasktype
//go:generate go-enum --values --names --flag --nocase //go:generate go-enum --values --names --flag --nocase
// ENUM(tgfiles,tphpics) // ENUM(tgfiles,tphpics,parseditem)
type TaskType string type TaskType string

View File

@@ -16,6 +16,8 @@ const (
TaskTypeTgfiles TaskType = "tgfiles" TaskTypeTgfiles TaskType = "tgfiles"
// TaskTypeTphpics is a TaskType of type tphpics. // TaskTypeTphpics is a TaskType of type tphpics.
TaskTypeTphpics TaskType = "tphpics" TaskTypeTphpics TaskType = "tphpics"
// TaskTypeParseditem is a TaskType of type parseditem.
TaskTypeParseditem TaskType = "parseditem"
) )
var ErrInvalidTaskType = fmt.Errorf("not a valid TaskType, try [%s]", strings.Join(_TaskTypeNames, ", ")) var ErrInvalidTaskType = fmt.Errorf("not a valid TaskType, try [%s]", strings.Join(_TaskTypeNames, ", "))
@@ -23,6 +25,7 @@ var ErrInvalidTaskType = fmt.Errorf("not a valid TaskType, try [%s]", strings.Jo
var _TaskTypeNames = []string{ var _TaskTypeNames = []string{
string(TaskTypeTgfiles), string(TaskTypeTgfiles),
string(TaskTypeTphpics), string(TaskTypeTphpics),
string(TaskTypeParseditem),
} }
// TaskTypeNames returns a list of possible string values of TaskType. // TaskTypeNames returns a list of possible string values of TaskType.
@@ -37,6 +40,7 @@ func TaskTypeValues() []TaskType {
return []TaskType{ return []TaskType{
TaskTypeTgfiles, TaskTypeTgfiles,
TaskTypeTphpics, TaskTypeTphpics,
TaskTypeParseditem,
} }
} }
@@ -53,8 +57,9 @@ func (x TaskType) IsValid() bool {
} }
var _TaskTypeValue = map[string]TaskType{ var _TaskTypeValue = map[string]TaskType{
"tgfiles": TaskTypeTgfiles, "tgfiles": TaskTypeTgfiles,
"tphpics": TaskTypeTphpics, "tphpics": TaskTypeTphpics,
"parseditem": TaskTypeParseditem,
} }
// ParseTaskType attempts to convert a string to a TaskType. // ParseTaskType attempts to convert a string to a TaskType.

63
pkg/parser/parser.go Normal file
View File

@@ -0,0 +1,63 @@
package parser
import (
"crypto/md5"
"fmt"
)
type Parser interface {
CanHandle(url string) bool
Parse(url string) (*Item, error)
}
// Resource is a single downloadable resource with metadata.
type Resource struct {
URL string `json:"url"`
Filename string `json:"filename"` // with ext
MimeType string `json:"mime_type"`
Extension string `json:"extension"`
Size int64 `json:"size"` // 0 when unknown
Hash map[string]string `json:"hash"` // {"md5": "...", "sha256": "..."}
Headers map[string]string `json:"headers"` // HTTP headers when downloading
Extra map[string]any `json:"extra"`
}
type Item struct {
Site string `json:"site"`
URL string `json:"url"` // original URL of the item
Title string `json:"title"`
Author string `json:"author"`
Description string `json:"description"`
Tags []string `json:"tags"`
Resources []Resource `json:"resources"`
Extra map[string]any `json:"extra"`
}
func (r *Resource) FileName() string {
return r.Filename
}
func (r *Resource) FileSize() int64 {
return r.Size
}
func (r *Resource) ID() string {
h := md5.New()
h.Write([]byte(r.URL))
h.Write([]byte(r.Filename))
h.Write([]byte(r.MimeType))
h.Write([]byte(r.Extension))
h.Write([]byte(fmt.Sprintf("%d", r.Size)))
for k, v := range r.Hash {
h.Write([]byte(k))
h.Write([]byte(v))
}
for k, v := range r.Headers {
h.Write([]byte(k))
h.Write([]byte(v))
}
return fmt.Sprintf("%x", h.Sum(nil))
}

View File

@@ -2,6 +2,7 @@ package tcbdata
import ( import (
"github.com/krau/SaveAny-Bot/pkg/enums/tasktype" "github.com/krau/SaveAny-Bot/pkg/enums/tasktype"
"github.com/krau/SaveAny-Bot/pkg/parser"
"github.com/krau/SaveAny-Bot/pkg/telegraph" "github.com/krau/SaveAny-Bot/pkg/telegraph"
"github.com/krau/SaveAny-Bot/pkg/tfile" "github.com/krau/SaveAny-Bot/pkg/tfile"
) )
@@ -26,6 +27,7 @@ const (
// } // }
type Add struct { type Add struct {
// [TODO] maybe we should to spilit this into different types...
TaskType tasktype.TaskType TaskType tasktype.TaskType
SelectedStorName string SelectedStorName string
DirID uint DirID uint
@@ -37,6 +39,8 @@ type Add struct {
TphPageNode *telegraph.Page TphPageNode *telegraph.Page
TphPics []string TphPics []string
TphDirPath string // unescaped telegraph.Page.Path TphDirPath string // unescaped telegraph.Page.Path
// parseditem
ParsedItem *parser.Item
} }
type SetDefaultStorage struct { type SetDefaultStorage struct {

70
plugins/example_parser.js Normal file
View File

@@ -0,0 +1,70 @@
// 这是一个示例解析器插件, 模拟处理 YouTube 的视频链接
// 你可以使用 console.log 来在终端中使用 go 的 logger 打印信息
console.log("Example parser loaded");
/**
* 插件元数据
* 版本号是 saveany-bot 本体支持的插件规范版本号, 必须提供
*/
const metadata = {
name: "Example Parser", // 插件名称
version: "1.0.0", // 插件版本号
description: "A parser for example links", // 插件描述
author: "Krau", // 插件作者
}
/**
* canHandle 函数用于判断当前解析器能否解析给定的 URL
*/
const canHandle = function (url) {
// 这里我们简单地检查 URL 是否包含 "youtube.com/watch?v"
return url.includes("youtube.com/watch?v");
}
/**
* 解析 url 并返回一个 Item 对象, 类型定义在 pkg/parser.go 中
*/
const parse = function (url) {
var result = {
// 元信息
site: "YouTube",
url: url,
title: "测试 YouTube 视频",
author: "某视频作者",
description: "这是一个测试视频",
tags: ["test", "youtube"],
// 资源(可下载的文件)列表
resources: [
{
url: "https://example.com/video1.mp4", // 文件直链
filename: "somevideo.mp4", // 文件名
mime_type: "video/mp4", // 文件 MIME 类型, 可选
extension: "mp4", // 文件扩展名, 可选
size: 100 * 1024 * 1024, // 文件大小, 单位为字节, 未知可以设置为 0
hash: {}, // 文件哈希, 可选, 格式为 {"md5": "xxx", "sha256": "xxx"} 等
headers: {}, // 下载文件时所需的 HTTP 头部, 可选, 例如 {"User-Agent": "Mozilla/5.0"}
extra: {} // 额外信息, 可选, 可以包含任何自定义数据
},
{
url: "https://example.com/picture1.png",
filename: "picture1.png",
mime_type: "image/png",
extension: "png",
size: 1 * 1024 * 1024,
hash: {},
headers: {},
extra: {}
}
],
extra: {}
};
return result;
}
registerParser({
metadata,
canHandle,
parse
});