mirror of
https://github.com/krau/SaveAny-Bot.git
synced 2026-06-25 17:23:50 +08:00
feat: parse url with js plugins support (#96)
* feat: WIP. add parser functionality and text message handling * fix: use json to marshal js result * feat: add metadata handling and version validation for jsParser * refactor: rename parser package to parsers and restructure parser handling * refactor: core code struct and impl parse task handle * feat: impl parsed download * fix: seek cache file when processing tph picture * feat: implement parsed task handling and progress tracking * feat: enhance task processing with concurrency control and progress tracking * feat: add resource ID generation and improve resource processing handling * feat: improve message formatting in parsed text and progress completion * feat: add example js plugin * feat: implement Twitter parser * fix: twitter parse video json decode error * feat: impl stream mode for parse task
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
package tasktype
|
||||
|
||||
//go:generate go-enum --values --names --flag --nocase
|
||||
// ENUM(tgfiles,tphpics)
|
||||
// ENUM(tgfiles,tphpics,parseditem)
|
||||
type TaskType string
|
||||
|
||||
@@ -16,6 +16,8 @@ const (
|
||||
TaskTypeTgfiles TaskType = "tgfiles"
|
||||
// TaskTypeTphpics is a TaskType of type tphpics.
|
||||
TaskTypeTphpics TaskType = "tphpics"
|
||||
// TaskTypeParseditem is a TaskType of type parseditem.
|
||||
TaskTypeParseditem TaskType = "parseditem"
|
||||
)
|
||||
|
||||
var ErrInvalidTaskType = fmt.Errorf("not a valid TaskType, try [%s]", strings.Join(_TaskTypeNames, ", "))
|
||||
@@ -23,6 +25,7 @@ var ErrInvalidTaskType = fmt.Errorf("not a valid TaskType, try [%s]", strings.Jo
|
||||
var _TaskTypeNames = []string{
|
||||
string(TaskTypeTgfiles),
|
||||
string(TaskTypeTphpics),
|
||||
string(TaskTypeParseditem),
|
||||
}
|
||||
|
||||
// TaskTypeNames returns a list of possible string values of TaskType.
|
||||
@@ -37,6 +40,7 @@ func TaskTypeValues() []TaskType {
|
||||
return []TaskType{
|
||||
TaskTypeTgfiles,
|
||||
TaskTypeTphpics,
|
||||
TaskTypeParseditem,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -53,8 +57,9 @@ func (x TaskType) IsValid() bool {
|
||||
}
|
||||
|
||||
var _TaskTypeValue = map[string]TaskType{
|
||||
"tgfiles": TaskTypeTgfiles,
|
||||
"tphpics": TaskTypeTphpics,
|
||||
"tgfiles": TaskTypeTgfiles,
|
||||
"tphpics": TaskTypeTphpics,
|
||||
"parseditem": TaskTypeParseditem,
|
||||
}
|
||||
|
||||
// ParseTaskType attempts to convert a string to a TaskType.
|
||||
|
||||
63
pkg/parser/parser.go
Normal file
63
pkg/parser/parser.go
Normal file
@@ -0,0 +1,63 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"crypto/md5"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
type Parser interface {
|
||||
CanHandle(url string) bool
|
||||
Parse(url string) (*Item, error)
|
||||
}
|
||||
|
||||
// Resource is a single downloadable resource with metadata.
|
||||
type Resource struct {
|
||||
URL string `json:"url"`
|
||||
Filename string `json:"filename"` // with ext
|
||||
MimeType string `json:"mime_type"`
|
||||
Extension string `json:"extension"`
|
||||
Size int64 `json:"size"` // 0 when unknown
|
||||
Hash map[string]string `json:"hash"` // {"md5": "...", "sha256": "..."}
|
||||
Headers map[string]string `json:"headers"` // HTTP headers when downloading
|
||||
Extra map[string]any `json:"extra"`
|
||||
}
|
||||
|
||||
type Item struct {
|
||||
Site string `json:"site"`
|
||||
URL string `json:"url"` // original URL of the item
|
||||
Title string `json:"title"`
|
||||
Author string `json:"author"`
|
||||
Description string `json:"description"`
|
||||
Tags []string `json:"tags"`
|
||||
Resources []Resource `json:"resources"`
|
||||
Extra map[string]any `json:"extra"`
|
||||
}
|
||||
|
||||
func (r *Resource) FileName() string {
|
||||
return r.Filename
|
||||
}
|
||||
|
||||
func (r *Resource) FileSize() int64 {
|
||||
return r.Size
|
||||
}
|
||||
|
||||
func (r *Resource) ID() string {
|
||||
h := md5.New()
|
||||
h.Write([]byte(r.URL))
|
||||
h.Write([]byte(r.Filename))
|
||||
h.Write([]byte(r.MimeType))
|
||||
h.Write([]byte(r.Extension))
|
||||
h.Write([]byte(fmt.Sprintf("%d", r.Size)))
|
||||
|
||||
for k, v := range r.Hash {
|
||||
h.Write([]byte(k))
|
||||
h.Write([]byte(v))
|
||||
}
|
||||
|
||||
for k, v := range r.Headers {
|
||||
h.Write([]byte(k))
|
||||
h.Write([]byte(v))
|
||||
}
|
||||
|
||||
return fmt.Sprintf("%x", h.Sum(nil))
|
||||
}
|
||||
@@ -2,6 +2,7 @@ package tcbdata
|
||||
|
||||
import (
|
||||
"github.com/krau/SaveAny-Bot/pkg/enums/tasktype"
|
||||
"github.com/krau/SaveAny-Bot/pkg/parser"
|
||||
"github.com/krau/SaveAny-Bot/pkg/telegraph"
|
||||
"github.com/krau/SaveAny-Bot/pkg/tfile"
|
||||
)
|
||||
@@ -26,6 +27,7 @@ const (
|
||||
// }
|
||||
|
||||
type Add struct {
|
||||
// [TODO] maybe we should to spilit this into different types...
|
||||
TaskType tasktype.TaskType
|
||||
SelectedStorName string
|
||||
DirID uint
|
||||
@@ -37,6 +39,8 @@ type Add struct {
|
||||
TphPageNode *telegraph.Page
|
||||
TphPics []string
|
||||
TphDirPath string // unescaped telegraph.Page.Path
|
||||
// parseditem
|
||||
ParsedItem *parser.Item
|
||||
}
|
||||
|
||||
type SetDefaultStorage struct {
|
||||
|
||||
Reference in New Issue
Block a user