feat: enhance URL handling by adding utility functions and filters for message entities (#105)
This commit is contained in:
@@ -4,6 +4,7 @@ package handlers
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"strings"
|
||||
|
||||
"github.com/celestix/gotgproto/dispatcher"
|
||||
"github.com/celestix/gotgproto/ext"
|
||||
@@ -12,6 +13,7 @@ import (
|
||||
"github.com/krau/SaveAny-Bot/client/bot/handlers/utils/msgelem"
|
||||
"github.com/krau/SaveAny-Bot/client/bot/handlers/utils/shortcut"
|
||||
"github.com/krau/SaveAny-Bot/common/utils/fsutil"
|
||||
"github.com/krau/SaveAny-Bot/common/utils/tgutil"
|
||||
"github.com/krau/SaveAny-Bot/parsers"
|
||||
"github.com/krau/SaveAny-Bot/pkg/enums/tasktype"
|
||||
"github.com/krau/SaveAny-Bot/pkg/tcbdata"
|
||||
@@ -21,6 +23,10 @@ import (
|
||||
func handleTextMessage(ctx *ext.Context, u *ext.Update) error {
|
||||
logger := log.FromContext(ctx)
|
||||
text := u.EffectiveMessage.Text
|
||||
entityUrls := tgutil.ExtractMessageEntityUrls(u.EffectiveMessage.Message)
|
||||
if len(entityUrls) > 0 {
|
||||
text += "\n" + strings.Join(entityUrls, "\n")
|
||||
}
|
||||
ok, pser := parsers.CanHandle(text)
|
||||
if !ok {
|
||||
return dispatcher.EndGroups
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"github.com/celestix/gotgproto/dispatcher/handlers/filters"
|
||||
"github.com/celestix/gotgproto/ext"
|
||||
"github.com/charmbracelet/log"
|
||||
sabotfilters "github.com/krau/SaveAny-Bot/client/bot/handlers/utils/filters"
|
||||
"github.com/krau/SaveAny-Bot/client/bot/handlers/utils/re"
|
||||
"github.com/krau/SaveAny-Bot/client/bot/handlers/utils/ruleutil"
|
||||
userclient "github.com/krau/SaveAny-Bot/client/user"
|
||||
@@ -47,16 +48,8 @@ func Register(disp dispatcher.Dispatcher) {
|
||||
disp.AddHandler(handlers.NewCallbackQuery(filters.CallbackQuery.Prefix(tcbdata.TypeSetDefault), handleSetDefaultCallback))
|
||||
disp.AddHandler(handlers.NewCallbackQuery(filters.CallbackQuery.Prefix(tcbdata.TypeCancel), handleCancelCallback))
|
||||
disp.AddHandler(handlers.NewCallbackQuery(filters.CallbackQuery.Prefix(tcbdata.TypeConfig), handleConfigCallback))
|
||||
linkRegexFilter, err := filters.Message.Regex(re.TgMessageLinkRegexString)
|
||||
if err != nil {
|
||||
panic("failed to create regex filter: " + err.Error())
|
||||
}
|
||||
disp.AddHandler(handlers.NewMessage(linkRegexFilter, handleSilentMode(handleMessageLink, handleSilentSaveLink)))
|
||||
telegraphUrlRegexFilter, err := filters.Message.Regex(re.TelegraphUrlRegexString)
|
||||
if err != nil {
|
||||
panic("failed to create Telegraph URL regex filter: " + err.Error())
|
||||
}
|
||||
disp.AddHandler(handlers.NewMessage(telegraphUrlRegexFilter, handleSilentMode(handleTelegraphUrlMessage, handleSilentSaveTelegraph)))
|
||||
disp.AddHandler(handlers.NewMessage(sabotfilters.RegexUrl(regexp.MustCompile(re.TgMessageLinkRegexString)), handleSilentMode(handleMessageLink, handleSilentSaveLink)))
|
||||
disp.AddHandler(handlers.NewMessage(sabotfilters.RegexUrl(regexp.MustCompile(re.TelegraphUrlRegexString)), handleSilentMode(handleTelegraphUrlMessage, handleSilentSaveTelegraph)))
|
||||
disp.AddHandler(handlers.NewMessage(filters.Message.Media, handleSilentMode(handleMediaMessage, handleSilentSaveMedia)))
|
||||
disp.AddHandler(handlers.NewMessage(filters.Message.Text, handleSilentMode(handleTextMessage, handleSilentSaveText)))
|
||||
|
||||
|
||||
26
client/bot/handlers/utils/filters/url.go
Normal file
26
client/bot/handlers/utils/filters/url.go
Normal file
@@ -0,0 +1,26 @@
|
||||
package filters
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"slices"
|
||||
|
||||
"github.com/celestix/gotgproto/dispatcher/handlers/filters"
|
||||
"github.com/celestix/gotgproto/types"
|
||||
"github.com/krau/SaveAny-Bot/common/utils/tgutil"
|
||||
)
|
||||
|
||||
func RegexUrl(r *regexp.Regexp) filters.MessageFilter {
|
||||
return func(m *types.Message) bool {
|
||||
if m.Text == "" {
|
||||
return false
|
||||
}
|
||||
if r.MatchString(m.Text) {
|
||||
return true
|
||||
}
|
||||
urls := tgutil.ExtractMessageEntityUrls(m.Message)
|
||||
if len(urls) == 0 {
|
||||
return false
|
||||
}
|
||||
return slices.ContainsFunc(urls, r.MatchString)
|
||||
}
|
||||
}
|
||||
@@ -64,7 +64,7 @@ type EditMessageFunc func(text string, markup tg.ReplyMarkupClass)
|
||||
// 获取链接中的文件并回复等待消息
|
||||
func GetFilesFromUpdateLinkMessageWithReplyEdit(ctx *ext.Context, update *ext.Update) (replied *types.Message, files []tfile.TGFileMessage, editReplied EditMessageFunc, err error) {
|
||||
logger := log.FromContext(ctx)
|
||||
msgLinks := re.TgMessageLinkRegexp.FindAllString(update.EffectiveMessage.GetMessage(), -1)
|
||||
msgLinks := re.TgMessageLinkRegexp.FindAllString(tgutil.ExtractMessageEntityUrlsText(update.EffectiveMessage.Message), -1)
|
||||
if len(msgLinks) == 0 {
|
||||
logger.Warn("no matched message links but called handleMessageLink")
|
||||
return nil, nil, nil, dispatcher.EndGroups
|
||||
@@ -178,7 +178,7 @@ type TelegraphResult struct {
|
||||
// return replied message, image urls, telegraph path(unescaped), error
|
||||
func GetTphPicsFromMessageWithReply(ctx *ext.Context, update *ext.Update) (*types.Message, *TelegraphResult, error) {
|
||||
logger := log.FromContext(ctx)
|
||||
tphurl := re.TelegraphUrlRegexp.FindString(update.EffectiveMessage.GetMessage()) // TODO: batch urls
|
||||
tphurl := re.TelegraphUrlRegexp.FindString(tgutil.ExtractMessageEntityUrlsText(update.EffectiveMessage.Message))
|
||||
if tphurl == "" {
|
||||
logger.Warnf("No telegraph url found but called handleTelegraph")
|
||||
return nil, nil, dispatcher.ContinueGroups
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
"unicode"
|
||||
"unicode/utf16"
|
||||
|
||||
"github.com/celestix/gotgproto/ext"
|
||||
"github.com/duke-git/lancet/v2/maputil"
|
||||
@@ -306,3 +307,49 @@ func GetGroupedMessages(ctx *ext.Context, chatID int64, msg *tg.Message) ([]*tg.
|
||||
}
|
||||
return groupedMessages, nil
|
||||
}
|
||||
|
||||
func ExtractMessageEntityUrls(msg *tg.Message) []string {
|
||||
if len(msg.Entities) == 0 {
|
||||
return nil
|
||||
}
|
||||
msgText := msg.GetMessage()
|
||||
if msgText == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
runes := []rune(msgText)
|
||||
utf16Codes := utf16.Encode(runes)
|
||||
|
||||
var urls []string
|
||||
for _, entity := range msg.Entities {
|
||||
switch ent := entity.(type) {
|
||||
case *tg.MessageEntityTextURL:
|
||||
urls = append(urls, ent.GetURL())
|
||||
case *tg.MessageEntityURL:
|
||||
start := ent.Offset
|
||||
end := ent.Offset + ent.Length
|
||||
if start < 0 || end > len(utf16Codes) {
|
||||
continue
|
||||
}
|
||||
subRunes := utf16.Decode(utf16Codes[start:end])
|
||||
urls = append(urls, string(subRunes))
|
||||
}
|
||||
}
|
||||
return urls
|
||||
}
|
||||
|
||||
func ExtractMessageEntityUrlsText(msg *tg.Message) string {
|
||||
if msg == nil {
|
||||
return ""
|
||||
}
|
||||
urls := ExtractMessageEntityUrls(msg)
|
||||
if len(urls) == 0 {
|
||||
return msg.GetMessage()
|
||||
}
|
||||
var sb strings.Builder
|
||||
for _, url := range urls {
|
||||
sb.WriteString(url)
|
||||
sb.WriteString(" ")
|
||||
}
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user