refactor: js plugin api

This commit is contained in:
krau
2025-11-16 21:38:30 +08:00
parent 3f40acff55
commit 131dfeb4cd
14 changed files with 165 additions and 112 deletions

173
parsers/js/api.go Normal file
View File

@@ -0,0 +1,173 @@
package js
import (
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"github.com/blang/semver"
"github.com/charmbracelet/log"
"github.com/dop251/goja"
"github.com/krau/SaveAny-Bot/common/utils/netutil"
"github.com/krau/SaveAny-Bot/parsers/parsers"
)
func jsRegisterParser(vm *goja.Runtime) func(call goja.FunctionCall) goja.Value {
return func(call goja.FunctionCall) goja.Value {
jsObj := call.Argument(0)
if jsObj == nil || goja.IsUndefined(jsObj) || goja.IsNull(jsObj) {
return vm.NewGoError(errors.New("registerParser expects an object { canHandle, parse }"))
}
obj := jsObj.ToObject(vm)
if obj == nil {
return vm.NewGoError(errors.New("registerParser expects an object { canHandle, parse }"))
}
metaValue := obj.Get("metadata")
if metaValue == nil || goja.IsUndefined(metaValue) {
return vm.NewGoError(errors.New("parser must provide metadata"))
}
var metadata PluginMeta
if exported := metaValue.Export(); exported != nil {
data, err := json.Marshal(exported)
if err != nil {
return vm.NewGoError(fmt.Errorf("failed to marshal metadata to JSON: %w", err))
}
if err := json.Unmarshal(data, &metadata); err != nil {
return vm.NewGoError(fmt.Errorf("failed to unmarshal JSON to PluginMeta: %w", err))
}
} else {
return vm.NewGoError(errors.New("metadata cannot be null or undefined"))
}
pluginV := semver.MustParse(metadata.Version)
if pluginV.LT(MinimumParserVersion) {
return vm.NewGoError(fmt.Errorf("parser version %s is not supported, must be at least %s", metadata.Version, MinimumParserVersion))
}
if pluginV.Major > LatestParserVersion.Major {
log.Printf("warning: parser major version %d is newer than latest supported major version %d", pluginV.Major, LatestParserVersion.Major)
}
handleFn := obj.Get("canHandle")
parseFn := obj.Get("parse")
if parseFn == nil || goja.IsUndefined(parseFn) {
return vm.NewGoError(errors.New("parser must provide a parse function"))
}
parsers.Add(newJSParser(vm, handleFn, parseFn, metadata))
return goja.Undefined()
}
}
var jsConsole = func(logger *log.Logger) map[string]any {
return map[string]any{
"log": func(args ...any) {
if len(args) == 0 {
return
}
if len(args) > 1 {
logger.Info(args[0], args[1:]...)
} else {
logger.Info(args[0])
}
},
"error": func(args ...any) {
if len(args) == 0 {
return
}
if len(args) > 1 {
logger.Error(fmt.Sprint(args[0]), args[1:]...)
} else {
logger.Error(fmt.Sprint(args[0]))
}
},
}
}
/*
jsGhttp provides a http helper for js plugins
It provides the following functions:
- get(url): performs a GET request and returns the response body as string
- getJSON(url): performs a GET request and returns the response body parsed as JSON
- head(url): performs a HEAD request and returns the response headers and status code
*/
var jsGhttp = func(vm *goja.Runtime) *goja.Object {
ghttp := vm.NewObject()
client := netutil.DefaultParserHTTPClient()
ghttp.Set("get", func(call goja.FunctionCall) goja.Value {
url := call.Argument(0).String()
resp, err := client.Get(url)
if err != nil {
return vm.ToValue(map[string]any{
"error": fmt.Sprintf("failed to fetch %s: %v", url, err),
})
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return vm.ToValue(map[string]any{
"error": fmt.Sprintf("failed to fetch %s: %s", url, resp.Status),
"status": resp.StatusCode,
})
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return vm.ToValue(map[string]any{
"error": fmt.Errorf("failed to read response body: %w", err).Error(),
})
}
return vm.ToValue(string(body))
})
ghttp.Set("getJSON", func(call goja.FunctionCall) goja.Value {
url := call.Argument(0).String()
resp, err := client.Get(url)
if err != nil {
return vm.ToValue(map[string]any{
"error": fmt.Sprintf("failed to fetch %s: %v", url, err),
})
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return vm.ToValue(map[string]any{
"error": fmt.Sprintf("failed to fetch %s: %s", url, resp.Status),
"status": resp.StatusCode,
})
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return vm.ToValue(map[string]any{
"error": fmt.Errorf("failed to read response body: %w", err).Error(),
})
}
var jsonData map[string]any
if err := json.Unmarshal(body, &jsonData); err != nil {
return vm.ToValue(map[string]any{
"error": fmt.Errorf("failed to unmarshal JSON: %w", err).Error(),
})
}
return vm.ToValue(map[string]any{
"data": jsonData,
})
})
ghttp.Set("head", func(call goja.FunctionCall) goja.Value {
url := call.Argument(0).String()
resp, err := client.Head(url)
if err != nil {
return vm.ToValue(map[string]any{
"error": fmt.Sprintf("failed to fetch %s: %v", url, err),
})
}
defer resp.Body.Close()
headers := make(map[string]string)
for k, v := range resp.Header {
headers[k] = v[0]
}
return vm.ToValue(map[string]any{
"status": resp.StatusCode,
"headers": headers,
})
})
return ghttp
}

View File

@@ -0,0 +1,82 @@
package js
import (
"fmt"
"log/slog"
"sync"
"github.com/charmbracelet/log"
"github.com/dop251/goja"
"github.com/playwright-community/playwright-go"
)
var jsPlaywright = func(vm *goja.Runtime, logger *log.Logger) *goja.Object {
pwObj := vm.NewObject()
var installOnce sync.Once
slogger := slog.New(logger)
pwObj.Set("get", func(call goja.FunctionCall) goja.Value {
url := call.Argument(0).String()
var installErr error
installOnce.Do(func() {
installErr = playwright.Install(&playwright.RunOptions{
Browsers: []string{"chromium"},
DriverDirectory: "./playwright",
Logger: slogger,
})
})
if installErr != nil {
return vm.ToValue(map[string]any{
"error": fmt.Sprintf("failed to install playwright: %v", installErr),
})
}
pw, err := playwright.Run(&playwright.RunOptions{
DriverDirectory: "./playwright",
Logger: slogger,
})
if err != nil {
return vm.ToValue(map[string]any{
"error": fmt.Sprintf("failed to start playwright: %v", err),
})
}
defer pw.Stop()
browser, err := pw.Chromium.Launch()
if err != nil {
return vm.ToValue(map[string]any{
"error": fmt.Sprintf("failed to launch browser: %v", err),
})
}
defer browser.Close()
page, err := browser.NewPage()
if err != nil {
return vm.ToValue(map[string]any{
"error": fmt.Sprintf("failed to create page: %v", err),
})
}
resp, err := page.Goto(url, playwright.PageGotoOptions{
WaitUntil: playwright.WaitUntilStateNetworkidle,
Timeout: playwright.Float(60000),
})
if err != nil {
return vm.ToValue(map[string]any{
"error": fmt.Sprintf("failed to navigate: %v", err),
})
}
if resp != nil && resp.Status() >= 400 {
return vm.ToValue(map[string]any{
"error": fmt.Sprintf("bad status code: %d", resp.Status()),
})
}
content, err := page.Content()
if err != nil {
return vm.ToValue(map[string]any{
"error": fmt.Sprintf("failed to get page content: %v", err),
})
}
return vm.ToValue(content)
})
return pwObj
}

View File

@@ -0,0 +1,19 @@
//go:build no_playwright
package js
import (
"github.com/charmbracelet/log"
"github.com/dop251/goja"
)
var jsPlaywright = func(vm *goja.Runtime, _ *log.Logger) *goja.Object {
pwObj := vm.NewObject()
unsupported := vm.ToValue(map[string]any{
"error": "playwright is not supported in this build",
})
pwObj.Set("get", func(call goja.FunctionCall) goja.Value {
return unsupported
})
return pwObj
}

172
parsers/js/js.go Normal file
View File

@@ -0,0 +1,172 @@
package js
import (
"context"
"encoding/json"
"fmt"
"os"
"path/filepath"
"sync"
"github.com/charmbracelet/log"
"github.com/dop251/goja"
"github.com/krau/SaveAny-Bot/config"
"github.com/krau/SaveAny-Bot/pkg/parser"
)
type jsParser struct {
meta PluginMeta
vm *goja.Runtime
reqCh chan jsParserReq
}
type jsParserReq struct {
method ParserMethod
url string
respCh chan jsParserResp
}
type jsParserResp struct {
item *parser.Item
ok bool
err error
}
func (p *jsParser) CanHandle(url string) bool {
respCh := make(chan jsParserResp, 1)
p.reqCh <- jsParserReq{method: ParserMethodCanHandle, url: url, respCh: respCh}
resp := <-respCh
return resp.ok && resp.err == nil
}
func (p *jsParser) Parse(ctx context.Context, url string) (*parser.Item, error) {
respCh := make(chan jsParserResp, 1)
p.reqCh <- jsParserReq{method: ParserMethodParse, url: url, respCh: respCh}
select {
case resp := <-respCh:
return resp.item, resp.err
case <-ctx.Done():
return nil, ctx.Err()
}
}
func newJSParser(vm *goja.Runtime, canHandleFunc, parseFunc goja.Value, metadata PluginMeta) *jsParser {
p := &jsParser{
vm: vm,
reqCh: make(chan jsParserReq, 10),
meta: metadata,
}
go func() {
for req := range p.reqCh {
switch req.method {
case ParserMethodCanHandle:
fn, _ := goja.AssertFunction(canHandleFunc)
res, err := fn(goja.Undefined(), p.vm.ToValue(req.url))
if err != nil {
req.respCh <- jsParserResp{ok: false, err: err}
continue
}
req.respCh <- jsParserResp{ok: res.ToBoolean()}
case ParserMethodParse:
fn, _ := goja.AssertFunction(parseFunc)
result, err := fn(goja.Undefined(), p.vm.ToValue(req.url))
if err != nil {
req.respCh <- jsParserResp{err: err}
continue
}
var item parser.Item
if exported := result.Export(); exported != nil {
data, err := json.Marshal(exported)
if err != nil {
req.respCh <- jsParserResp{err: fmt.Errorf("failed to marshal result to JSON: %w", err)}
continue
}
if err := json.Unmarshal(data, &item); err != nil {
req.respCh <- jsParserResp{err: fmt.Errorf("failed to unmarshal JSON to Item: %w", err)}
continue
}
} else {
req.respCh <- jsParserResp{err: fmt.Errorf("JS function returned null or undefined")}
continue
}
req.respCh <- jsParserResp{item: &item}
}
}
}()
return p
}
// 加载指定文件夹下的所有 JS 解析器插件
func LoadPlugins(ctx context.Context, dir string) error {
entries, err := os.ReadDir(dir)
if err != nil {
return err
}
for _, e := range entries {
if filepath.Ext(e.Name()) != ".js" {
continue
}
scriptPath := filepath.Join(dir, e.Name())
code, err := os.ReadFile(scriptPath)
if err != nil {
return err
}
vm := goja.New()
vm.Set("registerParser", jsRegisterParser(vm))
// Inject some utils to vm
logger := log.FromContext(ctx).WithPrefix(fmt.Sprintf("[plugin|parser]/%s", e.Name()))
vm.Set("console", jsConsole(logger))
// http fetch funcs
vm.Set("ghttp", jsGhttp(vm))
// playwright fetch func
vm.Set("playwright", jsPlaywright(vm, logger))
if _, err := vm.RunString(string(code)); err != nil {
return fmt.Errorf("error loading plugin %s: %w", e.Name(), err)
}
}
return nil
}
var (
pluginNameMu sync.Map
)
func AddPlugin(ctx context.Context, code string, name string) error {
value, _ := pluginNameMu.LoadOrStore(name, &sync.Mutex{})
mu := value.(*sync.Mutex)
mu.Lock()
defer mu.Unlock()
return addPlugin(ctx, code, name)
}
func addPlugin(ctx context.Context, code string, name string) error {
logger := log.FromContext(ctx).WithPrefix(fmt.Sprintf("[plugin|parser]/%s", name))
vm := goja.New()
vm.Set("registerParser", jsRegisterParser(vm))
vm.Set("console", jsConsole(logger))
vm.Set("ghttp", jsGhttp(vm))
vm.Set("playwright", jsPlaywright(vm, logger))
if _, err := vm.RunString(code); err != nil {
return fmt.Errorf("error loading plugin %s: %w", name, err)
}
dir := "plugins"
configuredDirs := config.C().Parser.PluginDirs
if len(configuredDirs) > 0 {
dir = configuredDirs[0]
}
if err := os.MkdirAll(dir, 0755); err == nil {
pluginPath := filepath.Join(dir, name)
if err := os.WriteFile(pluginPath, []byte(code), 0644); err != nil {
logger.Warn("Failed to save plugin file: " + err.Error())
}
}
return nil
}

23
parsers/js/plugin.go Normal file
View File

@@ -0,0 +1,23 @@
package js
import "github.com/blang/semver"
var (
LatestParserVersion = semver.MustParse("1.0.0")
MinimumParserVersion = semver.MustParse("1.0.0")
)
type PluginMeta struct {
Name string `json:"name"`
Version string `json:"version"` // [TODO] 分版本解析, 但是我们现在只有 v1 所以先不写
Description string `json:"description"`
Author string `json:"author"`
}
type ParserMethod uint
const (
_ ParserMethod = iota
ParserMethodCanHandle
ParserMethodParse
)