From 347a60f1f74abd2b0a1ba56f9c79adf8d3a8e6f2 Mon Sep 17 00:00:00 2001 From: krau <71133316+krau@users.noreply.github.com> Date: Mon, 24 Mar 2025 22:04:55 +0800 Subject: [PATCH] fix: implement image extraction from Telegraph nodes --- core/core.go | 7 ++++ core/download.go | 31 +++++++++++++++++ core/download_test.go | 80 +++++++++++++++++++++++++++++++++++++++++++ core/downloader.go | 9 ----- 4 files changed, 118 insertions(+), 9 deletions(-) create mode 100644 core/download_test.go delete mode 100644 core/downloader.go diff --git a/core/core.go b/core/core.go index 6620d4e..f76e8e8 100644 --- a/core/core.go +++ b/core/core.go @@ -6,6 +6,7 @@ import ( "fmt" "github.com/celestix/gotgproto/ext" + "github.com/gotd/td/telegram/downloader" "github.com/gotd/td/tg" "github.com/krau/SaveAny-Bot/common" "github.com/krau/SaveAny-Bot/config" @@ -13,6 +14,12 @@ import ( "github.com/krau/SaveAny-Bot/types" ) +var Downloader *downloader.Downloader + +func init() { + Downloader = downloader.NewDownloader().WithPartSize(1024 * 1024) +} + func worker(queue *queue.TaskQueue, semaphore chan struct{}) { for { semaphore <- struct{}{} diff --git a/core/download.go b/core/download.go index f281bec..1c0384d 100644 --- a/core/download.go +++ b/core/download.go @@ -188,6 +188,13 @@ func processTelegraph(extCtx *ext.Context, cancelCtx context.Context, task *type common.Log.Errorf("Failed to unmarshal element: %s", err) continue } + + if len(node.Children) != 0 { + for _, child := range node.Children { + imgs = append(imgs, GetImages(child)...) + } + } + if node.Tag == "img" { if src, ok := node.Attrs["src"]; ok { imgs = append(imgs, src) @@ -258,3 +265,27 @@ func processTelegraph(extCtx *ext.Context, cancelCtx context.Context, task *type return cancelCtx.Err() } } + +func GetImages(node telegraph.Node) []string { + var srcs []string + + var nodeElement telegraph.NodeElement + data, err := json.Marshal(node) + if err != nil { + return srcs + } + err = json.Unmarshal(data, &nodeElement) + if err != nil { + return srcs + } + + if nodeElement.Tag == "img" { + if src, exists := nodeElement.Attrs["src"]; exists { + srcs = append(srcs, src) + } + } + for _, child := range nodeElement.Children { + srcs = append(srcs, GetImages(child)...) + } + return srcs +} diff --git a/core/download_test.go b/core/download_test.go new file mode 100644 index 0000000..bb2c28a --- /dev/null +++ b/core/download_test.go @@ -0,0 +1,80 @@ +package core + +import ( + "reflect" + "testing" + + "github.com/celestix/telegraph-go/v2" +) + +func TestGetImgSrcs(t *testing.T) { + complexStructure := telegraph.NodeElement{ + Tag: "div", + Children: []telegraph.Node{ + telegraph.NodeElement{ + Tag: "figure", + Children: []telegraph.Node{ + telegraph.NodeElement{ + Tag: "img", + Attrs: map[string]string{ + "src": "https://example.com/image1.png", + }, + }, + telegraph.NodeElement{ + Tag: "p", + Children: []telegraph.Node{ + "A text node", + }, + }, + telegraph.NodeElement{ + Tag: "figure", + Children: []telegraph.Node{ + telegraph.NodeElement{ + Tag: "img", + Attrs: map[string]string{ + "src": "https://example.com/image2.png", + }, + }, + }, + }, + }, + }, + telegraph.NodeElement{ + Tag: "img", + Attrs: map[string]string{ + "src": "https://example.com/image3.png", + }, + }, + "text node", + telegraph.NodeElement{ + Tag: "div", + Children: []telegraph.Node{ + telegraph.NodeElement{ + Tag: "span", + Children: []telegraph.Node{ + telegraph.NodeElement{ + Tag: "img", + Attrs: map[string]string{ + "src": "https://example.com/image4.png", + }, + }, + }, + }, + }, + }, + }, + } + + expected := []string{ + "https://example.com/image1.png", + "https://example.com/image2.png", + "https://example.com/image3.png", + "https://example.com/image4.png", + } + + got := GetImages(complexStructure) + + if !reflect.DeepEqual(expected, got) { + t.Errorf("expected %v,got %v", expected, got) + } +} diff --git a/core/downloader.go b/core/downloader.go deleted file mode 100644 index f9e10a4..0000000 --- a/core/downloader.go +++ /dev/null @@ -1,9 +0,0 @@ -package core - -import "github.com/gotd/td/telegram/downloader" - -var Downloader *downloader.Downloader - -func init() { - Downloader = downloader.NewDownloader().WithPartSize(1024 * 1024) -}