Compare commits

...

2 Commits

Author SHA1 Message Date
krau
347a60f1f7 fix: implement image extraction from Telegraph nodes 2025-03-24 22:04:55 +08:00
krau
da69fe1354 feat: enhance file name generation to include media extensions 2025-03-24 21:36:13 +08:00
5 changed files with 150 additions and 9 deletions

View File

@@ -9,6 +9,7 @@ import (
"github.com/celestix/gotgproto/dispatcher"
"github.com/celestix/gotgproto/ext"
"github.com/gabriel-vasile/mimetype"
"github.com/gotd/td/telegram/message/entity"
"github.com/gotd/td/telegram/message/styling"
"github.com/gotd/td/tg"
@@ -277,6 +278,19 @@ func GenFileNameFromMessage(message tg.Message, file *types.File) string {
if file.FileName != "" {
return file.FileName
}
fileName := genFileNameFromMessageText(message, file)
media, ok := message.GetMedia()
if !ok {
return fileName
}
ext, ok := extraMediaExt(media)
if ok {
return fileName + ext
}
return fileName
}
func genFileNameFromMessageText(message tg.Message, file *types.File) string {
text := strings.TrimSpace(message.GetMessage())
if text == "" {
return file.Hash()
@@ -288,3 +302,21 @@ func GenFileNameFromMessage(message tg.Message, file *types.File) string {
runes := []rune(text)
return string(runes[:min(128, len(runes))])
}
func extraMediaExt(media tg.MessageMediaClass) (string, bool) {
switch media := media.(type) {
case *tg.MessageMediaDocument:
doc, ok := media.Document.AsNotEmpty()
if !ok {
return "", false
}
ext := mimetype.Lookup(doc.MimeType).Extension()
if ext == "" {
return "", false
}
return ext, true
case *tg.MessageMediaPhoto:
return ".jpg", true
}
return "", false
}

View File

@@ -6,6 +6,7 @@ import (
"fmt"
"github.com/celestix/gotgproto/ext"
"github.com/gotd/td/telegram/downloader"
"github.com/gotd/td/tg"
"github.com/krau/SaveAny-Bot/common"
"github.com/krau/SaveAny-Bot/config"
@@ -13,6 +14,12 @@ import (
"github.com/krau/SaveAny-Bot/types"
)
var Downloader *downloader.Downloader
func init() {
Downloader = downloader.NewDownloader().WithPartSize(1024 * 1024)
}
func worker(queue *queue.TaskQueue, semaphore chan struct{}) {
for {
semaphore <- struct{}{}

View File

@@ -188,6 +188,13 @@ func processTelegraph(extCtx *ext.Context, cancelCtx context.Context, task *type
common.Log.Errorf("Failed to unmarshal element: %s", err)
continue
}
if len(node.Children) != 0 {
for _, child := range node.Children {
imgs = append(imgs, GetImages(child)...)
}
}
if node.Tag == "img" {
if src, ok := node.Attrs["src"]; ok {
imgs = append(imgs, src)
@@ -258,3 +265,27 @@ func processTelegraph(extCtx *ext.Context, cancelCtx context.Context, task *type
return cancelCtx.Err()
}
}
func GetImages(node telegraph.Node) []string {
var srcs []string
var nodeElement telegraph.NodeElement
data, err := json.Marshal(node)
if err != nil {
return srcs
}
err = json.Unmarshal(data, &nodeElement)
if err != nil {
return srcs
}
if nodeElement.Tag == "img" {
if src, exists := nodeElement.Attrs["src"]; exists {
srcs = append(srcs, src)
}
}
for _, child := range nodeElement.Children {
srcs = append(srcs, GetImages(child)...)
}
return srcs
}

80
core/download_test.go Normal file
View File

@@ -0,0 +1,80 @@
package core
import (
"reflect"
"testing"
"github.com/celestix/telegraph-go/v2"
)
func TestGetImgSrcs(t *testing.T) {
complexStructure := telegraph.NodeElement{
Tag: "div",
Children: []telegraph.Node{
telegraph.NodeElement{
Tag: "figure",
Children: []telegraph.Node{
telegraph.NodeElement{
Tag: "img",
Attrs: map[string]string{
"src": "https://example.com/image1.png",
},
},
telegraph.NodeElement{
Tag: "p",
Children: []telegraph.Node{
"A text node",
},
},
telegraph.NodeElement{
Tag: "figure",
Children: []telegraph.Node{
telegraph.NodeElement{
Tag: "img",
Attrs: map[string]string{
"src": "https://example.com/image2.png",
},
},
},
},
},
},
telegraph.NodeElement{
Tag: "img",
Attrs: map[string]string{
"src": "https://example.com/image3.png",
},
},
"text node",
telegraph.NodeElement{
Tag: "div",
Children: []telegraph.Node{
telegraph.NodeElement{
Tag: "span",
Children: []telegraph.Node{
telegraph.NodeElement{
Tag: "img",
Attrs: map[string]string{
"src": "https://example.com/image4.png",
},
},
},
},
},
},
},
}
expected := []string{
"https://example.com/image1.png",
"https://example.com/image2.png",
"https://example.com/image3.png",
"https://example.com/image4.png",
}
got := GetImages(complexStructure)
if !reflect.DeepEqual(expected, got) {
t.Errorf("expected %vgot %v", expected, got)
}
}

View File

@@ -1,9 +0,0 @@
package core
import "github.com/gotd/td/telegram/downloader"
var Downloader *downloader.Downloader
func init() {
Downloader = downloader.NewDownloader().WithPartSize(1024 * 1024)
}