mirror of
https://github.com/krau/SaveAny-Bot.git
synced 2026-05-30 12:40:05 +08:00
refactor: js plugin api
This commit is contained in:
36
parsers/native/kemono/download.go
Normal file
36
parsers/native/kemono/download.go
Normal file
@@ -0,0 +1,36 @@
|
||||
package kemono
|
||||
|
||||
import (
|
||||
"net/url"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type DownloadInfo struct {
|
||||
ServiceName string
|
||||
UserID string
|
||||
PostID string
|
||||
}
|
||||
|
||||
func extractDownloadInfoFromURL(u string) *DownloadInfo {
|
||||
if !strings.HasPrefix(u, "http://") && !strings.HasPrefix(u, "https://") {
|
||||
u = "https://" + u
|
||||
}
|
||||
url, err := url.Parse(u)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
parts := strings.Split(strings.Trim(url.Path, "/"), "/")
|
||||
if len(parts) == 3 {
|
||||
return &DownloadInfo{
|
||||
ServiceName: parts[0],
|
||||
UserID: parts[2],
|
||||
}
|
||||
} else if len(parts) == 5 && parts[3] == "post" {
|
||||
return &DownloadInfo{
|
||||
ServiceName: parts[0],
|
||||
UserID: parts[2],
|
||||
PostID: parts[4],
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
173
parsers/native/kemono/kemono.go
Normal file
173
parsers/native/kemono/kemono.go
Normal file
@@ -0,0 +1,173 @@
|
||||
package kemono
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"path"
|
||||
"strings"
|
||||
|
||||
"github.com/duke-git/lancet/v2/strutil"
|
||||
"github.com/krau/SaveAny-Bot/common/utils/netutil"
|
||||
"github.com/krau/SaveAny-Bot/pkg/parser"
|
||||
)
|
||||
|
||||
type KemonoParser struct{}
|
||||
|
||||
var (
|
||||
kemonoDomains = []string{
|
||||
"kemono.su",
|
||||
"kemono.cr",
|
||||
}
|
||||
ErrFailedToExtractInfo = errors.New("failed to extract download info from URL")
|
||||
)
|
||||
|
||||
const (
|
||||
kemonoApiBase = "https://kemono.cr/api/v1"
|
||||
)
|
||||
|
||||
func (k *KemonoParser) CanHandle(text string) bool {
|
||||
text = strings.TrimPrefix(text, "https://")
|
||||
text = strings.TrimPrefix(text, "http://")
|
||||
|
||||
var matchesDomain bool
|
||||
for _, domain := range kemonoDomains {
|
||||
if strings.Contains(text, domain) {
|
||||
matchesDomain = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !matchesDomain {
|
||||
return false
|
||||
}
|
||||
|
||||
var path string
|
||||
for _, domain := range kemonoDomains {
|
||||
if idx := strings.Index(text, domain); idx != -1 {
|
||||
remaining := text[idx+len(domain):]
|
||||
if len(remaining) > 0 && remaining[0] == '/' {
|
||||
path = remaining[1:]
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if path == "" {
|
||||
return false
|
||||
}
|
||||
|
||||
parts := strings.Split(path, "/")
|
||||
// servicename/user/id (user profile page)
|
||||
// servicename/user/id/post/id (post page)
|
||||
return len(parts) == 3 || (len(parts) == 5 && parts[3] == "post")
|
||||
}
|
||||
|
||||
func (k *KemonoParser) Parse(ctx context.Context, u string) (*parser.Item, error) {
|
||||
info := extractDownloadInfoFromURL(u)
|
||||
if info == nil {
|
||||
return nil, ErrFailedToExtractInfo
|
||||
}
|
||||
if info.PostID != "" {
|
||||
return k.parseOne(ctx, info)
|
||||
}
|
||||
return k.parseUserPage(ctx, info)
|
||||
}
|
||||
|
||||
func (k *KemonoParser) parseOne(ctx context.Context, info *DownloadInfo) (*parser.Item, error) {
|
||||
client := netutil.DefaultParserHTTPClient()
|
||||
endpoint := fmt.Sprintf("%s/%s/user/%s/post/%s", kemonoApiBase, info.ServiceName, info.UserID, info.PostID)
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create request to Kemono API: %w", err)
|
||||
}
|
||||
req.Header.Set("Accept", "text/css")
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to fetch Kemono API: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("failed to fetch Kemono API, status code: %d", resp.StatusCode)
|
||||
}
|
||||
var postInfo PostInfo
|
||||
if err := json.NewDecoder(resp.Body).Decode(&postInfo); err != nil {
|
||||
return nil, fmt.Errorf("failed to decode Kemono API response: %w", err)
|
||||
}
|
||||
item := &parser.Item{
|
||||
Site: "kemono",
|
||||
Title: postInfo.Post.Title,
|
||||
URL: fmt.Sprintf("https://kemono.cr/%s/user/%s/post/%s", info.ServiceName, info.UserID, info.PostID),
|
||||
Author: postInfo.Post.User, // [TODO] request user profile
|
||||
Description: postInfo.Post.Content,
|
||||
Tags: func() []string {
|
||||
if postInfo.Post.Tags != nil {
|
||||
return *postInfo.Post.Tags
|
||||
}
|
||||
return nil
|
||||
}(),
|
||||
}
|
||||
resources := make([]parser.Resource, 0)
|
||||
for _, attachment := range postInfo.Attachments {
|
||||
if attachment.Server == nil || attachment.Path == nil || attachment.Name == nil {
|
||||
continue
|
||||
}
|
||||
var size int64
|
||||
fileUrl := fmt.Sprintf("%s/data%s", *attachment.Server, *attachment.Path)
|
||||
headReq, err := http.NewRequestWithContext(ctx, http.MethodHead, fileUrl, nil)
|
||||
if err == nil {
|
||||
resp, err := client.Do(headReq)
|
||||
if err == nil {
|
||||
size = resp.ContentLength
|
||||
resp.Body.Close()
|
||||
}
|
||||
}
|
||||
resources = append(resources, parser.Resource{
|
||||
URL: fmt.Sprintf("%s/data%s", *attachment.Server, *attachment.Path),
|
||||
Filename: *attachment.Name,
|
||||
Size: size,
|
||||
})
|
||||
}
|
||||
picCdnMap := make(map[string]string)
|
||||
for _, preview := range postInfo.Previews {
|
||||
if preview.Type == nil || *preview.Type != "thumbnail" {
|
||||
continue
|
||||
}
|
||||
picCdnMap[*preview.Path] = *preview.Server
|
||||
}
|
||||
for _, attachment := range postInfo.Post.Attachments {
|
||||
if !isImageExt(*attachment.Path) {
|
||||
continue
|
||||
}
|
||||
picUrl, err := url.JoinPath(picCdnMap[*attachment.Path], "data", *attachment.Path)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
var size int64
|
||||
headReq, err := http.NewRequestWithContext(ctx, http.MethodHead, picUrl, nil)
|
||||
if err == nil {
|
||||
resp, err := client.Do(headReq)
|
||||
if err == nil {
|
||||
size = resp.ContentLength
|
||||
resp.Body.Close()
|
||||
}
|
||||
}
|
||||
resources = append(resources, parser.Resource{
|
||||
URL: picUrl,
|
||||
Filename: *attachment.Name,
|
||||
Size: size,
|
||||
})
|
||||
}
|
||||
item.Resources = resources
|
||||
return item, nil
|
||||
}
|
||||
|
||||
func (k *KemonoParser) parseUserPage(_ context.Context, _ *DownloadInfo) (*parser.Item, error) {
|
||||
return nil, errors.New("kemono user page not implemented")
|
||||
}
|
||||
|
||||
func isImageExt(attachmentPath string) bool {
|
||||
return strutil.HasSuffixAny(path.Ext(strings.Split(attachmentPath, "?")[0]), []string{".jpg", ".jpeg", ".png", ".webp"})
|
||||
}
|
||||
62
parsers/native/kemono/post_info.go
Normal file
62
parsers/native/kemono/post_info.go
Normal file
@@ -0,0 +1,62 @@
|
||||
// https://github.com/kemono-rs/kemono
|
||||
|
||||
package kemono
|
||||
|
||||
type PostInfo struct {
|
||||
Post Post `json:"post"`
|
||||
Attachments []AttachmentLike `json:"attachments"`
|
||||
Previews []AttachmentLike `json:"previews"`
|
||||
}
|
||||
|
||||
type AttachmentLike struct {
|
||||
Type *string `json:"type,omitempty"`
|
||||
Server *string `json:"server,omitempty"`
|
||||
Name *string `json:"name,omitempty"`
|
||||
Path *string `json:"path,omitempty"`
|
||||
}
|
||||
|
||||
type Post struct {
|
||||
ID string `json:"id"`
|
||||
User string `json:"user"`
|
||||
Service string `json:"service"`
|
||||
Title string `json:"title"`
|
||||
Content string `json:"content"`
|
||||
Embed Embed `json:"embed"`
|
||||
SharedFile bool `json:"shared_file"`
|
||||
Added *string `json:"added,omitempty"`
|
||||
Published string `json:"published"`
|
||||
Edited *string `json:"edited,omitempty"`
|
||||
File File `json:"file"`
|
||||
Attachments []AttachmentLike `json:"attachments"`
|
||||
Poll *Poll `json:"poll,omitempty"`
|
||||
Captions *string `json:"captions,omitempty"`
|
||||
Tags *[]string `json:"tags,omitempty"`
|
||||
Next *string `json:"next,omitempty"`
|
||||
Prev *string `json:"prev,omitempty"`
|
||||
}
|
||||
|
||||
type File struct {
|
||||
Name *string `json:"name,omitempty"`
|
||||
Path *string `json:"path,omitempty"`
|
||||
}
|
||||
|
||||
type Embed struct {
|
||||
URL *string `json:"url,omitempty"`
|
||||
Subject *string `json:"subject,omitempty"`
|
||||
Description *string `json:"description,omitempty"`
|
||||
}
|
||||
|
||||
type Poll struct {
|
||||
Title string `json:"title"`
|
||||
Choices []Choice `json:"choices"`
|
||||
ClosesAt *string `json:"closes_at,omitempty"`
|
||||
CreatedAt string `json:"created_at"`
|
||||
Description *string `json:"description,omitempty"`
|
||||
AllowsMultiple bool `json:"allows_multiple"`
|
||||
TotalVotes int64 `json:"total_votes"`
|
||||
}
|
||||
|
||||
type Choice struct {
|
||||
Text string `json:"text"`
|
||||
Votes int64 `json:"votes"`
|
||||
}
|
||||
16
parsers/native/kemono/post_legacy.go
Normal file
16
parsers/native/kemono/post_legacy.go
Normal file
@@ -0,0 +1,16 @@
|
||||
package kemono
|
||||
|
||||
type PostLegacy struct {
|
||||
Props Props `json:"props"`
|
||||
Results []Result `json:"results"`
|
||||
}
|
||||
|
||||
type Props struct {
|
||||
Count uint `json:"count"`
|
||||
Limit uint `json:"limit"`
|
||||
}
|
||||
|
||||
type Result struct {
|
||||
ID string `json:"id"`
|
||||
Title string `json:"title"`
|
||||
}
|
||||
8
parsers/native/kemono/user_profile.go
Normal file
8
parsers/native/kemono/user_profile.go
Normal file
@@ -0,0 +1,8 @@
|
||||
package kemono
|
||||
|
||||
type UserProfile struct {
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Service string `json:"service"`
|
||||
PublicID *string `json:"public_id,omitempty"`
|
||||
}
|
||||
122
parsers/native/twitter/parser.go
Normal file
122
parsers/native/twitter/parser.go
Normal file
@@ -0,0 +1,122 @@
|
||||
package twitter
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"path"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/krau/SaveAny-Bot/common/utils/netutil"
|
||||
"github.com/krau/SaveAny-Bot/pkg/parser"
|
||||
)
|
||||
|
||||
type TwitterParser struct {
|
||||
client http.Client
|
||||
apiDomain string
|
||||
}
|
||||
|
||||
const (
|
||||
fxTwitterApi = "api.fxtwitter.com"
|
||||
)
|
||||
|
||||
var _ parser.ConfigurableParser = (*TwitterParser)(nil)
|
||||
|
||||
var (
|
||||
twitterSourceURLRegexp *regexp.Regexp = regexp.MustCompile(`(?:twitter|x)\.com/([^/]+)/status/(\d+)`)
|
||||
)
|
||||
|
||||
func getTweetID(sourceURL string) string {
|
||||
matches := twitterSourceURLRegexp.FindStringSubmatch(sourceURL)
|
||||
if len(matches) < 3 {
|
||||
return ""
|
||||
}
|
||||
return matches[2]
|
||||
}
|
||||
|
||||
func (p *TwitterParser) Parse(ctx context.Context, u string) (*parser.Item, error) {
|
||||
id := getTweetID(u)
|
||||
if id == "" {
|
||||
return nil, errors.New("invalid Twitter URL")
|
||||
}
|
||||
apiUrl := fmt.Sprintf("https://%s/_/status/%s", p.apiDomain, id)
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, apiUrl, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create request to Twitter API: %w", err)
|
||||
}
|
||||
resp, err := p.client.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to fetch Twitter API: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("failed to fetch Twitter API, status code: %d", resp.StatusCode)
|
||||
}
|
||||
var fxResp FxTwitterApiResp
|
||||
if err := json.NewDecoder(resp.Body).Decode(&fxResp); err != nil {
|
||||
return nil, fmt.Errorf("failed to decode Twitter API response: %w", err)
|
||||
}
|
||||
if fxResp.Code != 200 {
|
||||
return nil, fmt.Errorf("request twitter API error: %s", fxResp.Message)
|
||||
}
|
||||
if len(fxResp.Tweet.Media.All) == 0 {
|
||||
return nil, errors.New("no media found in the tweet")
|
||||
}
|
||||
resources := make([]parser.Resource, 0, len(fxResp.Tweet.Media.All))
|
||||
for _, media := range fxResp.Tweet.Media.All {
|
||||
var size int64
|
||||
resp, err := p.client.Head(media.URL)
|
||||
if err == nil {
|
||||
size = resp.ContentLength
|
||||
resp.Body.Close()
|
||||
}
|
||||
resources = append(resources, parser.Resource{
|
||||
URL: media.URL,
|
||||
Filename: path.Base(strings.Split(media.URL, "?")[0]),
|
||||
Size: size,
|
||||
})
|
||||
}
|
||||
item := &parser.Item{
|
||||
Site: "Twitter",
|
||||
Title: fmt.Sprintf("Tweet/%s", id),
|
||||
URL: fxResp.Tweet.URL,
|
||||
Description: fxResp.Tweet.Text,
|
||||
Author: fxResp.Tweet.Author.Name,
|
||||
Tags: make([]string, 0),
|
||||
Extra: make(map[string]any),
|
||||
Resources: resources,
|
||||
}
|
||||
return item, nil
|
||||
}
|
||||
|
||||
func (p *TwitterParser) CanHandle(u string) bool {
|
||||
return twitterSourceURLRegexp.MatchString(u)
|
||||
}
|
||||
|
||||
func (p *TwitterParser) Name() string {
|
||||
return "twitter"
|
||||
}
|
||||
|
||||
func (p *TwitterParser) Configure(config map[string]any) error {
|
||||
if config == nil {
|
||||
p.apiDomain = fxTwitterApi
|
||||
p.client = *netutil.DefaultParserHTTPClient()
|
||||
return nil
|
||||
}
|
||||
if domain, ok := config["api_domain"].(string); ok && domain != "" {
|
||||
p.apiDomain = domain
|
||||
} else {
|
||||
p.apiDomain = fxTwitterApi
|
||||
}
|
||||
if proxyUrl, ok := config["proxy"].(string); ok && proxyUrl != "" {
|
||||
proxyClient, err := netutil.NewProxyHTTPClient(proxyUrl)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create proxy client: %w", err)
|
||||
}
|
||||
p.client = *proxyClient
|
||||
}
|
||||
return nil
|
||||
}
|
||||
122
parsers/native/twitter/types.go
Normal file
122
parsers/native/twitter/types.go
Normal file
@@ -0,0 +1,122 @@
|
||||
package twitter
|
||||
|
||||
// type AutoGenerated struct {
|
||||
// Code int `json:"code"`
|
||||
// Message string `json:"message"`
|
||||
// Tweet struct {
|
||||
// URL string `json:"url"`
|
||||
// ID string `json:"id"`
|
||||
// Text string `json:"text"`
|
||||
// RawText struct {
|
||||
// Text string `json:"text"`
|
||||
// Facets []struct {
|
||||
// Type string `json:"type"`
|
||||
// Indices []int `json:"indices"`
|
||||
// Original string `json:"original"`
|
||||
// ID string `json:"id,omitempty"`
|
||||
// Display string `json:"display,omitempty"`
|
||||
// Replacement string `json:"replacement,omitempty"`
|
||||
// } `json:"facets"`
|
||||
// } `json:"raw_text"`
|
||||
// Author struct {
|
||||
// ID string `json:"id"`
|
||||
// Name string `json:"name"`
|
||||
// ScreenName string `json:"screen_name"`
|
||||
// AvatarURL string `json:"avatar_url"`
|
||||
// BannerURL interface{} `json:"banner_url"`
|
||||
// Description string `json:"description"`
|
||||
// Location string `json:"location"`
|
||||
// URL string `json:"url"`
|
||||
// Followers int `json:"followers"`
|
||||
// Following int `json:"following"`
|
||||
// Joined string `json:"joined"`
|
||||
// Likes int `json:"likes"`
|
||||
// MediaCount int `json:"media_count"`
|
||||
// Protected bool `json:"protected"`
|
||||
// Website struct {
|
||||
// URL string `json:"url"`
|
||||
// DisplayURL string `json:"display_url"`
|
||||
// } `json:"website"`
|
||||
// Tweets int `json:"tweets"`
|
||||
// AvatarColor interface{} `json:"avatar_color"`
|
||||
// } `json:"author"`
|
||||
// Replies int `json:"replies"`
|
||||
// Retweets int `json:"retweets"`
|
||||
// Likes int `json:"likes"`
|
||||
// Bookmarks int `json:"bookmarks"`
|
||||
// CreatedAt string `json:"created_at"`
|
||||
// CreatedTimestamp int `json:"created_timestamp"`
|
||||
// PossiblySensitive bool `json:"possibly_sensitive"`
|
||||
// Views int `json:"views"`
|
||||
// IsNoteTweet bool `json:"is_note_tweet"`
|
||||
// CommunityNote interface{} `json:"community_note"`
|
||||
// Lang string `json:"lang"`
|
||||
// ReplyingTo interface{} `json:"replying_to"`
|
||||
// ReplyingToStatus interface{} `json:"replying_to_status"`
|
||||
// Media struct {
|
||||
// All []struct {
|
||||
// URL string `json:"url"`
|
||||
// ThumbnailURL string `json:"thumbnail_url"`
|
||||
// Duration int `json:"duration"`
|
||||
// Width int `json:"width"`
|
||||
// Height int `json:"height"`
|
||||
// Format string `json:"format"`
|
||||
// Type string `json:"type"`
|
||||
// Variants []struct {
|
||||
// Bitrate int `json:"bitrate"`
|
||||
// ContentType string `json:"content_type"`
|
||||
// URL string `json:"url"`
|
||||
// } `json:"variants"`
|
||||
// } `json:"all"`
|
||||
// Photos []struct {
|
||||
// Type string `json:"type"`
|
||||
// URL string `json:"url"`
|
||||
// Width int `json:"width"`
|
||||
// Height int `json:"height"`
|
||||
// } `json:"photos"`
|
||||
// Videos []struct {
|
||||
// URL string `json:"url"`
|
||||
// ThumbnailURL string `json:"thumbnail_url"`
|
||||
// Duration int `json:"duration"`
|
||||
// Width int `json:"width"`
|
||||
// Height int `json:"height"`
|
||||
// Format string `json:"format"`
|
||||
// Type string `json:"type"`
|
||||
// Variants []struct {
|
||||
// Bitrate int `json:"bitrate"`
|
||||
// ContentType string `json:"content_type"`
|
||||
// URL string `json:"url"`
|
||||
// } `json:"variants"`
|
||||
// } `json:"videos"`
|
||||
// } `json:"media"`
|
||||
// Source string `json:"source"`
|
||||
// TwitterCard string `json:"twitter_card"`
|
||||
// Color interface{} `json:"color"`
|
||||
// Provider string `json:"provider"`
|
||||
// } `json:"tweet"`
|
||||
// }
|
||||
|
||||
type FxTwitterApiResp struct {
|
||||
Code int `json:"code"`
|
||||
Message string `json:"message"`
|
||||
Tweet struct {
|
||||
URL string `json:"url"`
|
||||
ID string `json:"id"`
|
||||
Text string `json:"text"`
|
||||
Author struct {
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
ScreenName string `json:"screen_name"`
|
||||
Protected bool `json:"protected"`
|
||||
} `json:"author"`
|
||||
PossiblySensitive bool `json:"possibly_sensitive"`
|
||||
IsNoteTweet bool `json:"is_note_tweet"`
|
||||
Lang string `json:"lang"`
|
||||
Media struct {
|
||||
All []struct {
|
||||
URL string `json:"url"`
|
||||
Type string `json:"type"`
|
||||
} `json:"all"`
|
||||
} `json:"media"`
|
||||
} `json:"tweet"`
|
||||
}
|
||||
Reference in New Issue
Block a user