fix: implement image extraction from Telegraph nodes

This commit is contained in:
krau
2025-03-24 22:04:55 +08:00
parent da69fe1354
commit 347a60f1f7
4 changed files with 118 additions and 9 deletions

View File

@@ -6,6 +6,7 @@ import (
"fmt"
"github.com/celestix/gotgproto/ext"
"github.com/gotd/td/telegram/downloader"
"github.com/gotd/td/tg"
"github.com/krau/SaveAny-Bot/common"
"github.com/krau/SaveAny-Bot/config"
@@ -13,6 +14,12 @@ import (
"github.com/krau/SaveAny-Bot/types"
)
var Downloader *downloader.Downloader
func init() {
Downloader = downloader.NewDownloader().WithPartSize(1024 * 1024)
}
func worker(queue *queue.TaskQueue, semaphore chan struct{}) {
for {
semaphore <- struct{}{}

View File

@@ -188,6 +188,13 @@ func processTelegraph(extCtx *ext.Context, cancelCtx context.Context, task *type
common.Log.Errorf("Failed to unmarshal element: %s", err)
continue
}
if len(node.Children) != 0 {
for _, child := range node.Children {
imgs = append(imgs, GetImages(child)...)
}
}
if node.Tag == "img" {
if src, ok := node.Attrs["src"]; ok {
imgs = append(imgs, src)
@@ -258,3 +265,27 @@ func processTelegraph(extCtx *ext.Context, cancelCtx context.Context, task *type
return cancelCtx.Err()
}
}
func GetImages(node telegraph.Node) []string {
var srcs []string
var nodeElement telegraph.NodeElement
data, err := json.Marshal(node)
if err != nil {
return srcs
}
err = json.Unmarshal(data, &nodeElement)
if err != nil {
return srcs
}
if nodeElement.Tag == "img" {
if src, exists := nodeElement.Attrs["src"]; exists {
srcs = append(srcs, src)
}
}
for _, child := range nodeElement.Children {
srcs = append(srcs, GetImages(child)...)
}
return srcs
}

80
core/download_test.go Normal file
View File

@@ -0,0 +1,80 @@
package core
import (
"reflect"
"testing"
"github.com/celestix/telegraph-go/v2"
)
func TestGetImgSrcs(t *testing.T) {
complexStructure := telegraph.NodeElement{
Tag: "div",
Children: []telegraph.Node{
telegraph.NodeElement{
Tag: "figure",
Children: []telegraph.Node{
telegraph.NodeElement{
Tag: "img",
Attrs: map[string]string{
"src": "https://example.com/image1.png",
},
},
telegraph.NodeElement{
Tag: "p",
Children: []telegraph.Node{
"A text node",
},
},
telegraph.NodeElement{
Tag: "figure",
Children: []telegraph.Node{
telegraph.NodeElement{
Tag: "img",
Attrs: map[string]string{
"src": "https://example.com/image2.png",
},
},
},
},
},
},
telegraph.NodeElement{
Tag: "img",
Attrs: map[string]string{
"src": "https://example.com/image3.png",
},
},
"text node",
telegraph.NodeElement{
Tag: "div",
Children: []telegraph.Node{
telegraph.NodeElement{
Tag: "span",
Children: []telegraph.Node{
telegraph.NodeElement{
Tag: "img",
Attrs: map[string]string{
"src": "https://example.com/image4.png",
},
},
},
},
},
},
},
}
expected := []string{
"https://example.com/image1.png",
"https://example.com/image2.png",
"https://example.com/image3.png",
"https://example.com/image4.png",
}
got := GetImages(complexStructure)
if !reflect.DeepEqual(expected, got) {
t.Errorf("expected %vgot %v", expected, got)
}
}

View File

@@ -1,9 +0,0 @@
package core
import "github.com/gotd/td/telegram/downloader"
var Downloader *downloader.Downloader
func init() {
Downloader = downloader.NewDownloader().WithPartSize(1024 * 1024)
}