docs: update parser plugin readme and add example danbooru parser

feat: inject ghttp to js vm
2025-08-23 12:35:04 +08:00 · 2025-08-23 12:34:39 +08:00
4 changed files with 378 additions and 8 deletions
--- a/parsers/js.go
+++ b/parsers/js.go
@@ -4,6 +4,8 @@ import (
 	"context"
 	"encoding/json"
 	"fmt"
+	"io"
+	"net/http"
 	"os"
 	"path/filepath"

@@ -168,13 +170,79 @@ func LoadPlugins(ctx context.Context, dir string) error {
 		}

 		vm := goja.New()
-		logger := log.FromContext(ctx).WithPrefix(fmt.Sprintf("[plugin|parser]/%s", e.Name()))
 		vm.Set("registerParser", registerParser(vm))
+		// Inject some utils to vm
+		logger := log.FromContext(ctx).WithPrefix(fmt.Sprintf("[plugin|parser]/%s", e.Name()))
 		vm.Set("console", map[string]any{
 			"log": func(args ...any) {
-				logger.Info(fmt.Sprint(args...))
+				if len(args) == 0 {
+					return
+				}
+				if len(args) > 1 {
+					logger.Info(args[0], args[1:]...)
+				} else {
+					logger.Info(args[0])
+				}
 			},
 		})
+		// http fetch funcs
+		ghttp := vm.NewObject()
+		ghttp.Set("get", func(call goja.FunctionCall) goja.Value {
+			url := call.Argument(0).String()
+			resp, err := http.Get(url)
+			if err != nil {
+				return vm.ToValue(map[string]any{
+					"error": fmt.Sprintf("failed to fetch %s: %v", url, err),
+				})
+			}
+			defer resp.Body.Close()
+			if resp.StatusCode != http.StatusOK {
+				return vm.ToValue(map[string]any{
+					"error":  fmt.Sprintf("failed to fetch %s: %s", url, resp.Status),
+					"status": resp.StatusCode,
+				})
+			}
+			body, err := io.ReadAll(resp.Body)
+			if err != nil {
+				return vm.ToValue(map[string]any{
+					"error": fmt.Errorf("failed to read response body: %w", err).Error(),
+				})
+			}
+			return vm.ToValue(string(body))
+		})
+		ghttp.Set("getJSON", func(call goja.FunctionCall) goja.Value {
+			url := call.Argument(0).String()
+
+			resp, err := http.Get(url)
+			if err != nil {
+				return vm.ToValue(map[string]any{
+					"error": fmt.Sprintf("failed to fetch %s: %v", url, err),
+				})
+			}
+			defer resp.Body.Close()
+			if resp.StatusCode != http.StatusOK {
+				return vm.ToValue(map[string]any{
+					"error":  fmt.Sprintf("failed to fetch %s: %s", url, resp.Status),
+					"status": resp.StatusCode,
+				})
+			}
+			body, err := io.ReadAll(resp.Body)
+			if err != nil {
+				return vm.ToValue(map[string]any{
+					"error": fmt.Errorf("failed to read response body: %w", err).Error(),
+				})
+			}
+			var jsonData map[string]any
+			if err := json.Unmarshal(body, &jsonData); err != nil {
+				return vm.ToValue(map[string]any{
+					"error": fmt.Errorf("failed to unmarshal JSON: %w", err).Error(),
+				})
+			}
+			return vm.ToValue(map[string]any{
+				"data": jsonData,
+			})
+		})
+		vm.Set("ghttp", ghttp)

 		if _, err := vm.RunString(string(code)); err != nil {
 			return fmt.Errorf("error loading plugin %s: %w", e.Name(), err)
--- a/plugins/README.md
+++ b/plugins/README.md
@@ -0,0 +1,161 @@
+# SaveAnyBot Plugins
+
+SaveAnyBot 可通过插件扩展功能, 目前仅支持 Parser (解析器)插件.
+
+## Parser
+
+解析器为 SaveAnyBot 提供了处理非 Telegram 文件的能力, 例如下载其他网站的图片或视频.
+
+当前解析器接口定义如下:
+
+```go
+type Parser interface {
+	CanHandle(url string) bool // 判断是否能处理给定的 URL
+	Parse(url string) (*Item, error) // 解析 URL, 返回 Item
+}
+
+// Resource is a single downloadable resource with metadata.
+type Resource struct {
+	URL       string            `json:"url"`
+	Filename  string            `json:"filename"` // with ext
+	MimeType  string            `json:"mime_type"`
+	Extension string            `json:"extension"`
+	Size      int64             `json:"size"`    // 0 when unknown
+	Hash      map[string]string `json:"hash"`    // {"md5": "...", "sha256": "..."}
+	Headers   map[string]string `json:"headers"` // HTTP headers when downloading
+	Extra     map[string]any    `json:"extra"`
+}
+
+type Item struct {
+	Site        string         `json:"site"`
+	URL         string         `json:"url"` // original URL of the item
+	Title       string         `json:"title"`
+	Author      string         `json:"author"`
+	Description string         `json:"description"`
+	Tags        []string       `json:"tags"`
+	Resources   []Resource     `json:"resources"`
+	Extra       map[string]any `json:"extra"`
+}
+```
+
+### Write a Parser Plugin
+
+解析器插件可使用 JavaScript 编写, SaveAnyBot 使用 [goja](https://github.com/dop251/goja) 提供运行时, 并向其中注入了以下全局函数或对象:
+
+- **registerParser**: 用于注册解析器, 每个插件必须调用此函数以注册
+- **console.log**: 调用 go 端的 logger 打印日志
+- **ghttp**: 提供 HTTP 请求功能
+
+插件需要提供元数据 `metadata` 并实现 `canHandle` 和 `parse` 两个函数, 最后调用 `registerParser` 注册解析器.
+
+#### Plugin Metadata
+
+插件元数据是一个 JavaScript 对象:
+
+```js
+const metadata = {
+    version: "1.0.0", // 插件版本号, 必须提供, 其他字段可选
+    name: "Example Parser", // 插件名称
+    description: "A parser for example links", // 插件描述
+    author: "Krau", // 插件作者
+}
+```
+
+#### canHandle Function
+
+`canHandle`: `canHandle(url: string): boolean` , 用于判断当前解析器能否解析给定的 URL, 返回布尔值, 例如:
+
+```js
+const canHandle = function (url) {
+	return url.includes("youtube.com/watch?v");
+};
+```
+
+这将让 SaveAnyBot 在遇到包含 `youtube.com/watch?v` 的 url 时调用当前解析器的 `parse`.
+
+#### parse Function
+
+`parse`: `parse(url: string): Item` , 是核心解析函数, 用于解析给定的 url, 返回一个 `Item` 对象, 例:
+
+```js
+const parse = function (url) {
+    var result = {
+        // 元信息
+        site: "YouTube",
+        url: url,
+        title: "测试 YouTube 视频",
+        author: "某视频作者",
+        description: "这是一个测试视频",
+        tags: ["test", "youtube"],
+        // 资源(可下载的文件)列表
+        resources: [
+            {
+                url: "https://example.com/video1.mp4", // 文件直链
+                filename: "somevideo.mp4", // 文件名
+                mime_type: "video/mp4", // 文件 MIME 类型, 可选
+                extension: "mp4", // 文件扩展名, 可选
+                size: 100 * 1024 * 1024, // 文件大小, 单位为字节, 未知可以设置为 0
+                hash: {}, // 文件哈希, 可选, 格式为 {"md5": "xxx", "sha256": "xxx"} 等
+                headers: {}, // 下载文件时所需的 HTTP 头部, 可选, 例如 {"User-Agent": "Mozilla/5.0"}
+                extra: {} // 额外信息, 可选, 可以包含任何自定义数据
+            },
+            {
+                url: "https://example.com/picture1.png",
+                filename: "picture1.png",
+                mime_type: "image/png",
+                extension: "png",
+                size: 1 * 1024 * 1024,
+                hash: {},
+                headers: {},
+                extra: {}
+            }
+        ],
+        extra: {}
+    };
+    return result;
+}
+```
+
+#### HTTP Requests
+
+使用 `ghttp` 对象以发起 HTTP 请求.
+
+**ghttp.get(url: string)** 发起 GET 请求, 当成功时返回响应体字符串, 失败时或响应状态码不为 200 时返回一个包含 `error` 字段的对象:
+
+```js
+const response = ghttp.get("https://example.com/someapi");
+if (response.error) {
+	console.log("Request failed:", response.error);
+}
+if (response.status) {
+	console.log("Response status:", response.status);
+}
+```
+
+**ghttp.getJSON(url: string)** 发起 GET 请求并将响应体解析为 JSON 对象, 始终返回以下对象:
+
+```js
+{
+	data?: any, // 当请求成功且响应体为合法 JSON 时包含解析后的数据
+	error?: string, // 当请求失败或响应状态码不为 200 时包含错误信息
+	status?: number, // 响应状态码, 仅当响应状态码不为 200 时包含
+}
+```
+
+---
+
+最后别忘了调用 `registerParser` 注册解析器:
+
+```js
+registerParser({
+	metadata,
+	canHandle,
+	parse
+});
+```
+
+### Examples
+
+请先查看 [example_parser_basic.js](./example_parser_basic.js) 了解最简示例解析器插件的实现.
+
+然后查看 [example_parser_danbooru.js](./example_parser_danbooru.js) , 这是一个可直接使用的插件, 用于解析 Danbooru 图片页面并提取图片资源.
--- a/plugins/example_parser_basic.js
+++ b/plugins/example_parser_basic.js
@@ -1,7 +1,5 @@
-// 这是一个示例解析器插件, 模拟处理 YouTube 的视频链接
-
-// 你可以使用 console.log 来在终端中使用 go 的 logger 打印信息
-console.log("Example parser loaded");
+// 这是一个最简示例解析器插件, 用于展示插件所需实现的基本功能
+// 此插件将会模拟处理 YouTube 的视频链接

 /**
 * 插件元数据
@@ -14,6 +12,9 @@ const metadata = {
    author: "Krau", // 插件作者
 }

+// 你可以使用 console.log 来在终端中使用 go 的 logger 打印信息
+console.log("Parser loaded", "name", metadata.name);
+
 /**
 * canHandle 函数用于判断当前解析器能否解析给定的 URL
 */
@@ -22,7 +23,6 @@ const canHandle = function (url) {
    return url.includes("youtube.com/watch?v");
 }

-
 /**
 * 解析 url 并返回一个 Item 对象, 类型定义在 pkg/parser.go 中
 */
@@ -63,8 +63,11 @@ const parse = function (url) {
    return result;
 }

+// 最后需要调用 registerParser 来注册这个解析器
 registerParser({
    metadata,
    canHandle,
    parse
-});
+});
+
+// 更进一步的插件编写信息, 请查看 plugins/example_parser_danbooru.js
--- a/plugins/example_parser_danbooru.js
+++ b/plugins/example_parser_danbooru.js
@@ -0,0 +1,138 @@
+// Danbooru post parser for SaveAnyBot
+// request https://danbooru.donmai.us/posts/{id}.json and parse the response
+
+const metadata = {
+    name: "Danbooru Post Parser",
+    version: "1.0.0",
+    description: "Parse Danbooru post links via official JSON API",
+    author: "Krau",
+};
+
+// some utils
+const danbooruSourceURLRegexp = /danbooru\.donmai\.us\/(posts|post\/show)\/(\d+)/;
+function getPostID(url) {
+    const m = url.match(danbooruSourceURLRegexp);
+    return m ? m[2] : "";
+}
+function normalizePostURL(id) {
+    return `https://danbooru.donmai.us/posts/${id}`;
+}
+function apiURLFor(id) {
+    return `https://danbooru.donmai.us/posts/${id}.json`;
+}
+
+
+function basenameFromURL(u) {
+    try {
+        const q = u.split("?")[0];
+        const parts = q.split("/");
+        const name = parts[parts.length - 1] || "";
+        return name || "file";
+    } catch (_) {
+        return "file";
+    }
+}
+function extFromFilename(name) {
+    const idx = name.lastIndexOf(".");
+    if (idx < 0) return "";
+    return name.slice(idx + 1).toLowerCase();
+}
+function mimeFromExt(ext) {
+    switch (ext) {
+        case "jpg":
+        case "jpeg":
+            return "image/jpeg";
+        case "png":
+            return "image/png";
+        case "gif":
+            return "image/gif";
+        default:
+            return "";
+    }
+}
+
+// implement canHandle and parse
+const canHandle = function (url) {
+    return danbooruSourceURLRegexp.test(url);
+};
+
+const parse = function (sourceURL) {
+    const id = getPostID(sourceURL);
+    if (!id) {
+        throw new Error("invalid danbooru post url");
+    }
+
+    const normURL = normalizePostURL(id);
+
+    const apiURL = apiURLFor(id);
+    console.log("Danbooru requesting", "url", apiURL);
+    // You can use ghttp.getJSON to fetch and parse JSON in one step.
+    // While the ghttp.get can be used to fetch raw response.
+    const data = ghttp.getJSON(apiURL);
+
+    if (data && data.error) {
+        throw new Error(data.message || "danbooru returned error");
+    }
+
+    const fileURL = data.file_url || "";
+    const largeURL = data.large_file_url || "";
+    const width = data.image_width || 0;
+    const height = data.image_height || 0;
+
+    if (!fileURL && !largeURL) {
+        throw new Error("danbooru response has no file_url / large_file_url");
+    }
+
+    const resources = [];
+    if (fileURL) {
+        const name = basenameFromURL(fileURL);
+        const ext = extFromFilename(name);
+        resources.push({
+            url: fileURL,
+            filename: name,
+            mime_type: mimeFromExt(ext),
+            extension: ext,
+            size: 0,
+            hash: {},
+            headers: {},
+            extra: { width, height, kind: "original" },
+        });
+    }
+    if (largeURL && largeURL !== fileURL) {
+        const name = basenameFromURL(largeURL);
+        const ext = extFromFilename(name);
+        resources.push({
+            url: largeURL,
+            filename: name,
+            mime_type: mimeFromExt(ext),
+            extension: ext,
+            size: 0,
+            hash: {},
+            headers: {},
+            extra: { width, height, kind: "large" },
+        });
+    }
+
+    const tags = (data.tag_string ? String(data.tag_string) : "")
+        .split(" ")
+        .filter(Boolean);
+
+    const item = {
+        site: "Danbooru",
+        url: normURL,
+        title: `Danbooru/${data.id || id}`,
+        author: "Danbooru",
+        description: "",
+        tags: tags,
+        resources: resources,
+        extra: {},
+    };
+
+    return item;
+};
+
+registerParser({
+    metadata,
+    canHandle,
+    parse,
+});
Author	SHA1	Message	Date
krau	231eb61d25	docs: update parser plugin readme and add example danbooru parser	2025-08-23 12:35:04 +08:00
krau	fd1b586b8d	feat: inject ghttp to js vm	2025-08-23 12:34:39 +08:00