Files
SaveAny-Bot/core/tasks/directlinks/util.go
krau c21ff7e499 feat: add direct links download functionality
- Implemented a new task type for handling direct links downloads.
- Added command handler for downloading multiple links via /dl command.
- Introduced progress tracking for direct link downloads.
- Enhanced filename parsing to support various encoding scenarios.
- Updated enums to include direct links as a task type.
- Refactored existing task structures to accommodate new functionality.
- Improved error handling and logging throughout the download process.
2025-12-08 17:10:41 +08:00

206 lines
5.8 KiB
Go
Raw Blame History

package directlinks
import (
"mime"
"net/url"
"strings"
"unicode/utf8"
"golang.org/x/text/encoding/simplifiedchinese"
)
// parseFilename extracts filename from Content-Disposition header
// It handles multiple encoding scenarios:
// 1. RFC 5987/RFC 2231 format: filename*=UTF-8”%E6%B5%8B%E8%AF%95.zip (preferred, checked first)
// 2. MIME encoded-word: filename="=?UTF-8?B?5rWL6K+VLnppcA==?="
// 3. URL-encoded: filename="%E6%B5%8B%E8%AF%95.zip"
// 4. Plain ASCII filename
//
// The key fix is checking filename*= first before mime.ParseMediaType, because
// some servers send Content-Disposition headers with invalid characters that cause
// mime.ParseMediaType to fail, but the filename*= parameter is still valid.
func parseFilename(contentDisposition string) string {
// First, try to find filename*= (RFC 5987 format, most reliable for non-ASCII)
if filename := parseFilenameExtended(contentDisposition); filename != "" {
return filename
}
// Try standard MIME parsing for regular filename= parameter
_, params, err := mime.ParseMediaType(contentDisposition)
if err == nil {
if filename := params["filename"]; filename != "" {
return decodeFilenameParam(filename)
}
}
// Fallback: manual parsing if mime.ParseMediaType fails
return parseFilenameFallback(contentDisposition)
}
// parseFilenameExtended parses RFC 5987/RFC 2231 extended parameter format
// Format: filename*=charset'language'value (e.g., UTF-8”%E6%B5%8B%E8%AF%95.zip)
func parseFilenameExtended(cd string) string {
// Look for filename*= (case-insensitive)
lower := strings.ToLower(cd)
idx := strings.Index(lower, "filename*=")
if idx == -1 {
return ""
}
// Extract the value after filename*=
value := cd[idx+len("filename*="):]
// Find the end of the value (next ; or end of string)
if endIdx := strings.Index(value, ";"); endIdx != -1 {
value = value[:endIdx]
}
value = strings.TrimSpace(value)
// Parse charset'language'encoded-value format
// Common format: UTF-8''%E6%B5%8B%E8%AF%95.zip
parts := strings.SplitN(value, "''", 2)
if len(parts) == 2 {
// parts[0] is charset (e.g., "UTF-8")
// parts[1] is percent-encoded value
decoded, err := url.QueryUnescape(parts[1])
if err == nil {
return decoded
}
}
// Try with single quote delimiter as well (some servers use this)
parts = strings.SplitN(value, "'", 3)
if len(parts) >= 3 {
decoded, err := url.QueryUnescape(parts[2])
if err == nil {
return decoded
}
}
return ""
}
// TryUrlQueryUnescape tries to unescape a URL-encoded string.
//
// If unescaping fails, it returns the original string.
func tryUrlQueryUnescape(s string) string {
if decoded, err := url.QueryUnescape(s); err == nil {
return decoded
}
return s
}
// decodeFilenameParam decodes a filename parameter value
// Handles MIME encoded-word, URL encoding, and GBK encoding fallback
func decodeFilenameParam(filename string) string {
// Check if the filename is MIME encoded-word (e.g., =?UTF-8?B?...?=)
if strings.HasPrefix(filename, "=?") {
decoder := new(mime.WordDecoder)
// Some servers use "UTF8" instead of "UTF-8", create a normalized copy
normalizedFilename := strings.Replace(filename, "UTF8", "UTF-8", 1)
if decoded, err := decoder.Decode(normalizedFilename); err == nil {
return decoded
}
}
// Try URL decoding
decoded := tryUrlQueryUnescape(filename)
// Check if the result is valid UTF-8. If not, try GBK decoding.
// This handles the case where Chinese Windows servers send GBK-encoded filenames
// which appear as garbled characters (e.g., "下载地址.zip" -> "<22><><EFBFBD>ص<EFBFBD>ַ.zip")
if !utf8.ValidString(decoded) {
if gbkDecoded := tryDecodeGBK(decoded); gbkDecoded != "" {
return gbkDecoded
}
}
return decoded
}
// gbkDecoder is a reusable GBK decoder for better performance
var gbkDecoder = simplifiedchinese.GBK.NewDecoder()
// tryDecodeGBK attempts to decode a string as GBK/GB2312/GB18030 encoding
// Returns empty string if decoding fails or result is not valid UTF-8
func tryDecodeGBK(s string) string {
// GBK uses 1-2 bytes per character. Single-byte chars are 0x00-0x7F (ASCII compatible).
// Double-byte chars have first byte 0x81-0xFE and second byte 0x40-0xFE.
// Skip if string is empty or all ASCII (valid UTF-8)
if len(s) == 0 {
return ""
}
// Create a fresh decoder since the transform state may be corrupted
decoder := gbkDecoder
decoded, err := decoder.Bytes([]byte(s))
if err != nil {
return ""
}
result := string(decoded)
if utf8.ValidString(result) {
return result
}
return ""
}
// parseFilenameFallback manually parses filename= when mime.ParseMediaType fails
func parseFilenameFallback(cd string) string {
// Look for filename= (case-insensitive)
lower := strings.ToLower(cd)
idx := strings.Index(lower, "filename=")
if idx == -1 {
return ""
}
// Skip "filename=" prefix
value := cd[idx+len("filename="):]
// Find the end of the value
if endIdx := strings.Index(value, ";"); endIdx != -1 {
value = value[:endIdx]
}
value = strings.TrimSpace(value)
// Remove quotes if present
if len(value) >= 2 {
if (value[0] == '"' && value[len(value)-1] == '"') ||
(value[0] == '\'' && value[len(value)-1] == '\'') {
value = value[1 : len(value)-1]
}
}
return decodeFilenameParam(value)
}
var progressUpdatesLevels = []struct {
size int64 // 文件大小阈值
stepPercent int // 每多少 % 更新一次
}{
{10 << 20, 100},
{50 << 20, 50},
{200 << 20, 20},
{500 << 20, 10},
}
func shouldUpdateProgress(total, downloaded int64, lastUpdatePercent int) bool {
if total <= 0 || downloaded <= 0 {
return false
}
percent := int((downloaded * 100) / total)
if percent <= lastUpdatePercent {
return false
}
step := progressUpdatesLevels[len(progressUpdatesLevels)-1].stepPercent
for _, lvl := range progressUpdatesLevels {
if total < lvl.size {
step = lvl.stepPercent
break
}
}
return percent >= lastUpdatePercent+step
}