feat: parse url with js plugins support (#96)

* feat: WIP. add parser functionality and text message handling

* fix: use json to marshal js result

* feat: add metadata handling and version validation for jsParser

* refactor: rename parser package to parsers and restructure parser handling

* refactor: core code struct and impl parse task handle

* feat: impl parsed download

* fix: seek cache file when processing tph picture

* feat: implement parsed task handling and progress tracking

* feat: enhance task processing with concurrency control and progress tracking

* feat: add resource ID generation and improve resource processing handling

* feat: improve message formatting in parsed text and progress completion

* feat: add example js plugin

* feat: implement Twitter parser

* fix: twitter parse video json decode error

* feat: impl stream mode for parse task
This commit is contained in:
Krau
2025-08-21 23:48:17 +08:00
committed by GitHub
parent 79386bdd7d
commit 302db2fe75
47 changed files with 1348 additions and 47 deletions

84
core/tasks/parsed/task.go Normal file
View File

@@ -0,0 +1,84 @@
package parsed
import (
"context"
"net/http"
"sync"
"sync/atomic"
"github.com/krau/SaveAny-Bot/config"
"github.com/krau/SaveAny-Bot/pkg/enums/tasktype"
"github.com/krau/SaveAny-Bot/pkg/parser"
"github.com/krau/SaveAny-Bot/storage"
)
type Task struct {
ID string
Ctx context.Context
Stor storage.Storage
StorPath string
item *parser.Item
httpClient *http.Client
progress ProgressTracker
stream bool
totalResources int64
downloaded atomic.Int64 // downloaded resources count
totalBytes int64 // total bytes to download
downloadedBytes atomic.Int64 // downloaded bytes count
processing map[string]ResourceInfo
processingMu sync.RWMutex
failed map[string]error // [TODO] errors for each resource
}
func (t *Task) Type() tasktype.TaskType {
return tasktype.TaskTypeParseditem
}
func (t *Task) TaskID() string {
return t.ID
}
func NewTask(
id string,
ctx context.Context,
stor storage.Storage,
storPath string,
item *parser.Item,
progressTracker ProgressTracker,
) *Task {
client := &http.Client{
Transport: &http.Transport{
// [TODO] configure it via config
Proxy: http.ProxyFromEnvironment,
},
}
_, ok := stor.(storage.StorageCannotStream)
stream := config.Cfg.Stream && !ok
return &Task{
ID: id,
Ctx: ctx,
Stor: stor,
StorPath: storPath,
item: item,
totalResources: int64(len(item.Resources)),
downloaded: atomic.Int64{},
totalBytes: func() int64 {
var total int64
for _, res := range item.Resources {
if res.Size < 0 {
continue // skip resources with unknown size
}
total += res.Size
}
return total
}(),
stream: stream,
downloadedBytes: atomic.Int64{},
httpClient: client,
progress: progressTracker,
processing: make(map[string]ResourceInfo),
processingMu: sync.RWMutex{},
failed: make(map[string]error),
}
}