feat: parse url with js plugins support (#96)

* feat: WIP. add parser functionality and text message handling

* fix: use json to marshal js result

* feat: add metadata handling and version validation for jsParser

* refactor: rename parser package to parsers and restructure parser handling

* refactor: core code struct and impl parse task handle

* feat: impl parsed download

* fix: seek cache file when processing tph picture

* feat: implement parsed task handling and progress tracking

* feat: enhance task processing with concurrency control and progress tracking

* feat: add resource ID generation and improve resource processing handling

* feat: improve message formatting in parsed text and progress completion

* feat: add example js plugin

* feat: implement Twitter parser

* fix: twitter parse video json decode error

* feat: impl stream mode for parse task
This commit is contained in:
Krau
2025-08-21 23:48:17 +08:00
committed by GitHub
parent 79386bdd7d
commit 302db2fe75
47 changed files with 1348 additions and 47 deletions

63
pkg/parser/parser.go Normal file
View File

@@ -0,0 +1,63 @@
package parser
import (
"crypto/md5"
"fmt"
)
type Parser interface {
CanHandle(url string) bool
Parse(url string) (*Item, error)
}
// Resource is a single downloadable resource with metadata.
type Resource struct {
URL string `json:"url"`
Filename string `json:"filename"` // with ext
MimeType string `json:"mime_type"`
Extension string `json:"extension"`
Size int64 `json:"size"` // 0 when unknown
Hash map[string]string `json:"hash"` // {"md5": "...", "sha256": "..."}
Headers map[string]string `json:"headers"` // HTTP headers when downloading
Extra map[string]any `json:"extra"`
}
type Item struct {
Site string `json:"site"`
URL string `json:"url"` // original URL of the item
Title string `json:"title"`
Author string `json:"author"`
Description string `json:"description"`
Tags []string `json:"tags"`
Resources []Resource `json:"resources"`
Extra map[string]any `json:"extra"`
}
func (r *Resource) FileName() string {
return r.Filename
}
func (r *Resource) FileSize() int64 {
return r.Size
}
func (r *Resource) ID() string {
h := md5.New()
h.Write([]byte(r.URL))
h.Write([]byte(r.Filename))
h.Write([]byte(r.MimeType))
h.Write([]byte(r.Extension))
h.Write([]byte(fmt.Sprintf("%d", r.Size)))
for k, v := range r.Hash {
h.Write([]byte(k))
h.Write([]byte(v))
}
for k, v := range r.Headers {
h.Write([]byte(k))
h.Write([]byte(v))
}
return fmt.Sprintf("%x", h.Sum(nil))
}