feat: enhance URL handling by adding utility functions and filters for message entities (#105)

This commit is contained in:
Krau
2025-09-09 20:16:56 +08:00
committed by GitHub
parent 9cb866de8c
commit f05dd883e3
5 changed files with 84 additions and 12 deletions

View File

@@ -4,6 +4,7 @@ package handlers
import (
"errors"
"strings"
"github.com/celestix/gotgproto/dispatcher"
"github.com/celestix/gotgproto/ext"
@@ -12,6 +13,7 @@ import (
"github.com/krau/SaveAny-Bot/client/bot/handlers/utils/msgelem"
"github.com/krau/SaveAny-Bot/client/bot/handlers/utils/shortcut"
"github.com/krau/SaveAny-Bot/common/utils/fsutil"
"github.com/krau/SaveAny-Bot/common/utils/tgutil"
"github.com/krau/SaveAny-Bot/parsers"
"github.com/krau/SaveAny-Bot/pkg/enums/tasktype"
"github.com/krau/SaveAny-Bot/pkg/tcbdata"
@@ -21,6 +23,10 @@ import (
func handleTextMessage(ctx *ext.Context, u *ext.Update) error {
logger := log.FromContext(ctx)
text := u.EffectiveMessage.Text
entityUrls := tgutil.ExtractMessageEntityUrls(u.EffectiveMessage.Message)
if len(entityUrls) > 0 {
text += "\n" + strings.Join(entityUrls, "\n")
}
ok, pser := parsers.CanHandle(text)
if !ok {
return dispatcher.EndGroups

View File

@@ -10,6 +10,7 @@ import (
"github.com/celestix/gotgproto/dispatcher/handlers/filters"
"github.com/celestix/gotgproto/ext"
"github.com/charmbracelet/log"
sabotfilters "github.com/krau/SaveAny-Bot/client/bot/handlers/utils/filters"
"github.com/krau/SaveAny-Bot/client/bot/handlers/utils/re"
"github.com/krau/SaveAny-Bot/client/bot/handlers/utils/ruleutil"
userclient "github.com/krau/SaveAny-Bot/client/user"
@@ -47,16 +48,8 @@ func Register(disp dispatcher.Dispatcher) {
disp.AddHandler(handlers.NewCallbackQuery(filters.CallbackQuery.Prefix(tcbdata.TypeSetDefault), handleSetDefaultCallback))
disp.AddHandler(handlers.NewCallbackQuery(filters.CallbackQuery.Prefix(tcbdata.TypeCancel), handleCancelCallback))
disp.AddHandler(handlers.NewCallbackQuery(filters.CallbackQuery.Prefix(tcbdata.TypeConfig), handleConfigCallback))
linkRegexFilter, err := filters.Message.Regex(re.TgMessageLinkRegexString)
if err != nil {
panic("failed to create regex filter: " + err.Error())
}
disp.AddHandler(handlers.NewMessage(linkRegexFilter, handleSilentMode(handleMessageLink, handleSilentSaveLink)))
telegraphUrlRegexFilter, err := filters.Message.Regex(re.TelegraphUrlRegexString)
if err != nil {
panic("failed to create Telegraph URL regex filter: " + err.Error())
}
disp.AddHandler(handlers.NewMessage(telegraphUrlRegexFilter, handleSilentMode(handleTelegraphUrlMessage, handleSilentSaveTelegraph)))
disp.AddHandler(handlers.NewMessage(sabotfilters.RegexUrl(regexp.MustCompile(re.TgMessageLinkRegexString)), handleSilentMode(handleMessageLink, handleSilentSaveLink)))
disp.AddHandler(handlers.NewMessage(sabotfilters.RegexUrl(regexp.MustCompile(re.TelegraphUrlRegexString)), handleSilentMode(handleTelegraphUrlMessage, handleSilentSaveTelegraph)))
disp.AddHandler(handlers.NewMessage(filters.Message.Media, handleSilentMode(handleMediaMessage, handleSilentSaveMedia)))
disp.AddHandler(handlers.NewMessage(filters.Message.Text, handleSilentMode(handleTextMessage, handleSilentSaveText)))

View File

@@ -0,0 +1,26 @@
package filters
import (
"regexp"
"slices"
"github.com/celestix/gotgproto/dispatcher/handlers/filters"
"github.com/celestix/gotgproto/types"
"github.com/krau/SaveAny-Bot/common/utils/tgutil"
)
func RegexUrl(r *regexp.Regexp) filters.MessageFilter {
return func(m *types.Message) bool {
if m.Text == "" {
return false
}
if r.MatchString(m.Text) {
return true
}
urls := tgutil.ExtractMessageEntityUrls(m.Message)
if len(urls) == 0 {
return false
}
return slices.ContainsFunc(urls, r.MatchString)
}
}

View File

@@ -64,7 +64,7 @@ type EditMessageFunc func(text string, markup tg.ReplyMarkupClass)
// 获取链接中的文件并回复等待消息
func GetFilesFromUpdateLinkMessageWithReplyEdit(ctx *ext.Context, update *ext.Update) (replied *types.Message, files []tfile.TGFileMessage, editReplied EditMessageFunc, err error) {
logger := log.FromContext(ctx)
msgLinks := re.TgMessageLinkRegexp.FindAllString(update.EffectiveMessage.GetMessage(), -1)
msgLinks := re.TgMessageLinkRegexp.FindAllString(tgutil.ExtractMessageEntityUrlsText(update.EffectiveMessage.Message), -1)
if len(msgLinks) == 0 {
logger.Warn("no matched message links but called handleMessageLink")
return nil, nil, nil, dispatcher.EndGroups
@@ -178,7 +178,7 @@ type TelegraphResult struct {
// return replied message, image urls, telegraph path(unescaped), error
func GetTphPicsFromMessageWithReply(ctx *ext.Context, update *ext.Update) (*types.Message, *TelegraphResult, error) {
logger := log.FromContext(ctx)
tphurl := re.TelegraphUrlRegexp.FindString(update.EffectiveMessage.GetMessage()) // TODO: batch urls
tphurl := re.TelegraphUrlRegexp.FindString(tgutil.ExtractMessageEntityUrlsText(update.EffectiveMessage.Message))
if tphurl == "" {
logger.Warnf("No telegraph url found but called handleTelegraph")
return nil, nil, dispatcher.ContinueGroups

View File

@@ -5,6 +5,7 @@ import (
"strconv"
"strings"
"unicode"
"unicode/utf16"
"github.com/celestix/gotgproto/ext"
"github.com/duke-git/lancet/v2/maputil"
@@ -306,3 +307,49 @@ func GetGroupedMessages(ctx *ext.Context, chatID int64, msg *tg.Message) ([]*tg.
}
return groupedMessages, nil
}
func ExtractMessageEntityUrls(msg *tg.Message) []string {
if len(msg.Entities) == 0 {
return nil
}
msgText := msg.GetMessage()
if msgText == "" {
return nil
}
runes := []rune(msgText)
utf16Codes := utf16.Encode(runes)
var urls []string
for _, entity := range msg.Entities {
switch ent := entity.(type) {
case *tg.MessageEntityTextURL:
urls = append(urls, ent.GetURL())
case *tg.MessageEntityURL:
start := ent.Offset
end := ent.Offset + ent.Length
if start < 0 || end > len(utf16Codes) {
continue
}
subRunes := utf16.Decode(utf16Codes[start:end])
urls = append(urls, string(subRunes))
}
}
return urls
}
func ExtractMessageEntityUrlsText(msg *tg.Message) string {
if msg == nil {
return ""
}
urls := ExtractMessageEntityUrls(msg)
if len(urls) == 0 {
return msg.GetMessage()
}
var sb strings.Builder
for _, url := range urls {
sb.WriteString(url)
sb.WriteString(" ")
}
return sb.String()
}