fix: implement image extraction from Telegraph nodes
This commit is contained in:
@@ -6,6 +6,7 @@ import (
|
||||
"fmt"
|
||||
|
||||
"github.com/celestix/gotgproto/ext"
|
||||
"github.com/gotd/td/telegram/downloader"
|
||||
"github.com/gotd/td/tg"
|
||||
"github.com/krau/SaveAny-Bot/common"
|
||||
"github.com/krau/SaveAny-Bot/config"
|
||||
@@ -13,6 +14,12 @@ import (
|
||||
"github.com/krau/SaveAny-Bot/types"
|
||||
)
|
||||
|
||||
var Downloader *downloader.Downloader
|
||||
|
||||
func init() {
|
||||
Downloader = downloader.NewDownloader().WithPartSize(1024 * 1024)
|
||||
}
|
||||
|
||||
func worker(queue *queue.TaskQueue, semaphore chan struct{}) {
|
||||
for {
|
||||
semaphore <- struct{}{}
|
||||
|
||||
@@ -188,6 +188,13 @@ func processTelegraph(extCtx *ext.Context, cancelCtx context.Context, task *type
|
||||
common.Log.Errorf("Failed to unmarshal element: %s", err)
|
||||
continue
|
||||
}
|
||||
|
||||
if len(node.Children) != 0 {
|
||||
for _, child := range node.Children {
|
||||
imgs = append(imgs, GetImages(child)...)
|
||||
}
|
||||
}
|
||||
|
||||
if node.Tag == "img" {
|
||||
if src, ok := node.Attrs["src"]; ok {
|
||||
imgs = append(imgs, src)
|
||||
@@ -258,3 +265,27 @@ func processTelegraph(extCtx *ext.Context, cancelCtx context.Context, task *type
|
||||
return cancelCtx.Err()
|
||||
}
|
||||
}
|
||||
|
||||
func GetImages(node telegraph.Node) []string {
|
||||
var srcs []string
|
||||
|
||||
var nodeElement telegraph.NodeElement
|
||||
data, err := json.Marshal(node)
|
||||
if err != nil {
|
||||
return srcs
|
||||
}
|
||||
err = json.Unmarshal(data, &nodeElement)
|
||||
if err != nil {
|
||||
return srcs
|
||||
}
|
||||
|
||||
if nodeElement.Tag == "img" {
|
||||
if src, exists := nodeElement.Attrs["src"]; exists {
|
||||
srcs = append(srcs, src)
|
||||
}
|
||||
}
|
||||
for _, child := range nodeElement.Children {
|
||||
srcs = append(srcs, GetImages(child)...)
|
||||
}
|
||||
return srcs
|
||||
}
|
||||
|
||||
80
core/download_test.go
Normal file
80
core/download_test.go
Normal file
@@ -0,0 +1,80 @@
|
||||
package core
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/celestix/telegraph-go/v2"
|
||||
)
|
||||
|
||||
func TestGetImgSrcs(t *testing.T) {
|
||||
complexStructure := telegraph.NodeElement{
|
||||
Tag: "div",
|
||||
Children: []telegraph.Node{
|
||||
telegraph.NodeElement{
|
||||
Tag: "figure",
|
||||
Children: []telegraph.Node{
|
||||
telegraph.NodeElement{
|
||||
Tag: "img",
|
||||
Attrs: map[string]string{
|
||||
"src": "https://example.com/image1.png",
|
||||
},
|
||||
},
|
||||
telegraph.NodeElement{
|
||||
Tag: "p",
|
||||
Children: []telegraph.Node{
|
||||
"A text node",
|
||||
},
|
||||
},
|
||||
telegraph.NodeElement{
|
||||
Tag: "figure",
|
||||
Children: []telegraph.Node{
|
||||
telegraph.NodeElement{
|
||||
Tag: "img",
|
||||
Attrs: map[string]string{
|
||||
"src": "https://example.com/image2.png",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
telegraph.NodeElement{
|
||||
Tag: "img",
|
||||
Attrs: map[string]string{
|
||||
"src": "https://example.com/image3.png",
|
||||
},
|
||||
},
|
||||
"text node",
|
||||
telegraph.NodeElement{
|
||||
Tag: "div",
|
||||
Children: []telegraph.Node{
|
||||
telegraph.NodeElement{
|
||||
Tag: "span",
|
||||
Children: []telegraph.Node{
|
||||
telegraph.NodeElement{
|
||||
Tag: "img",
|
||||
Attrs: map[string]string{
|
||||
"src": "https://example.com/image4.png",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
expected := []string{
|
||||
"https://example.com/image1.png",
|
||||
"https://example.com/image2.png",
|
||||
"https://example.com/image3.png",
|
||||
"https://example.com/image4.png",
|
||||
}
|
||||
|
||||
got := GetImages(complexStructure)
|
||||
|
||||
if !reflect.DeepEqual(expected, got) {
|
||||
t.Errorf("expected %v,got %v", expected, got)
|
||||
}
|
||||
}
|
||||
@@ -1,9 +0,0 @@
|
||||
package core
|
||||
|
||||
import "github.com/gotd/td/telegram/downloader"
|
||||
|
||||
var Downloader *downloader.Downloader
|
||||
|
||||
func init() {
|
||||
Downloader = downloader.NewDownloader().WithPartSize(1024 * 1024)
|
||||
}
|
||||
Reference in New Issue
Block a user