Files
SaveAny-Bot/core/tasks/directlinks/util.go
copilot-swe-agent[bot] 943ad190e6 Fix empty filename when Content-Disposition header is missing in /dl command
Add filenameFromURL helper to extract filename from URL path when
Content-Disposition header is empty or not present. This fixes the issue
where direct link downloads fail due to empty filename.

Co-authored-by: krau <71133316+krau@users.noreply.github.com>
2026-01-28 08:57:05 +00:00

235 lines
6.5 KiB
Go
Raw Blame History

package directlinks
import (
"mime"
"net/url"
"strings"
"unicode/utf8"
"golang.org/x/text/encoding/simplifiedchinese"
)
// parseFilename extracts filename from Content-Disposition header
// It handles multiple encoding scenarios:
// 1. RFC 5987/RFC 2231 format: filename*=UTF-8”%E6%B5%8B%E8%AF%95.zip (preferred, checked first)
// 2. MIME encoded-word: filename="=?UTF-8?B?5rWL6K+VLnppcA==?="
// 3. URL-encoded: filename="%E6%B5%8B%E8%AF%95.zip"
// 4. Plain ASCII filename
//
// The key fix is checking filename*= first before mime.ParseMediaType, because
// some servers send Content-Disposition headers with invalid characters that cause
// mime.ParseMediaType to fail, but the filename*= parameter is still valid.
func parseFilename(contentDisposition string) string {
// First, try to find filename*= (RFC 5987 format, most reliable for non-ASCII)
if filename := parseFilenameExtended(contentDisposition); filename != "" {
return filename
}
// Try standard MIME parsing for regular filename= parameter
_, params, err := mime.ParseMediaType(contentDisposition)
if err == nil {
if filename := params["filename"]; filename != "" {
return decodeFilenameParam(filename)
}
}
// Fallback: manual parsing if mime.ParseMediaType fails
return parseFilenameFallback(contentDisposition)
}
// parseFilenameExtended parses RFC 5987/RFC 2231 extended parameter format
// Format: filename*=charset'language'value (e.g., UTF-8”%E6%B5%8B%E8%AF%95.zip)
func parseFilenameExtended(cd string) string {
// Look for filename*= (case-insensitive)
lower := strings.ToLower(cd)
idx := strings.Index(lower, "filename*=")
if idx == -1 {
return ""
}
// Extract the value after filename*=
value := cd[idx+len("filename*="):]
// Find the end of the value (next ; or end of string)
if endIdx := strings.Index(value, ";"); endIdx != -1 {
value = value[:endIdx]
}
value = strings.TrimSpace(value)
// Parse charset'language'encoded-value format
// Common format: UTF-8''%E6%B5%8B%E8%AF%95.zip
parts := strings.SplitN(value, "''", 2)
if len(parts) == 2 {
// parts[0] is charset (e.g., "UTF-8")
// parts[1] is percent-encoded value
decoded, err := url.QueryUnescape(parts[1])
if err == nil {
return decoded
}
}
// Try with single quote delimiter as well (some servers use this)
parts = strings.SplitN(value, "'", 3)
if len(parts) >= 3 {
decoded, err := url.QueryUnescape(parts[2])
if err == nil {
return decoded
}
}
return ""
}
// TryUrlQueryUnescape tries to unescape a URL-encoded string.
//
// If unescaping fails, it returns the original string.
func tryUrlQueryUnescape(s string) string {
if decoded, err := url.QueryUnescape(s); err == nil {
return decoded
}
return s
}
// decodeFilenameParam decodes a filename parameter value
// Handles MIME encoded-word, URL encoding, and GBK encoding fallback
func decodeFilenameParam(filename string) string {
// Check if the filename is MIME encoded-word (e.g., =?UTF-8?B?...?=)
if strings.HasPrefix(filename, "=?") {
decoder := new(mime.WordDecoder)
// Some servers use "UTF8" instead of "UTF-8", create a normalized copy
normalizedFilename := strings.Replace(filename, "UTF8", "UTF-8", 1)
if decoded, err := decoder.Decode(normalizedFilename); err == nil {
return decoded
}
}
// Try URL decoding
decoded := tryUrlQueryUnescape(filename)
// Check if the result is valid UTF-8. If not, try GBK decoding.
// This handles the case where Chinese Windows servers send GBK-encoded filenames
// which appear as garbled characters (e.g., "下载地址.zip" -> "<22><><EFBFBD>ص<EFBFBD>ַ.zip")
if !utf8.ValidString(decoded) {
if gbkDecoded := tryDecodeGBK(decoded); gbkDecoded != "" {
return gbkDecoded
}
}
return decoded
}
// gbkDecoder is a reusable GBK decoder for better performance
var gbkDecoder = simplifiedchinese.GBK.NewDecoder()
// tryDecodeGBK attempts to decode a string as GBK/GB2312/GB18030 encoding
// Returns empty string if decoding fails or result is not valid UTF-8
func tryDecodeGBK(s string) string {
// GBK uses 1-2 bytes per character. Single-byte chars are 0x00-0x7F (ASCII compatible).
// Double-byte chars have first byte 0x81-0xFE and second byte 0x40-0xFE.
// Skip if string is empty or all ASCII (valid UTF-8)
if len(s) == 0 {
return ""
}
// Create a fresh decoder since the transform state may be corrupted
decoder := gbkDecoder
decoded, err := decoder.Bytes([]byte(s))
if err != nil {
return ""
}
result := string(decoded)
if utf8.ValidString(result) {
return result
}
return ""
}
// parseFilenameFallback manually parses filename= when mime.ParseMediaType fails
func parseFilenameFallback(cd string) string {
// Look for filename= (case-insensitive)
lower := strings.ToLower(cd)
idx := strings.Index(lower, "filename=")
if idx == -1 {
return ""
}
// Skip "filename=" prefix
value := cd[idx+len("filename="):]
// Find the end of the value
if endIdx := strings.Index(value, ";"); endIdx != -1 {
value = value[:endIdx]
}
value = strings.TrimSpace(value)
// Remove quotes if present
if len(value) >= 2 {
if (value[0] == '"' && value[len(value)-1] == '"') ||
(value[0] == '\'' && value[len(value)-1] == '\'') {
value = value[1 : len(value)-1]
}
}
return decodeFilenameParam(value)
}
// filenameFromURL extracts filename from a URL path.
// It uses the last path segment and removes any query parameters.
// Returns empty string if the URL cannot be parsed or has no valid path.
func filenameFromURL(rawURL string) string {
u, err := url.Parse(rawURL)
if err != nil {
return ""
}
// Get the path and extract the base name
path := u.Path
if path == "" || path == "/" {
return ""
}
// Find the last path segment
idx := strings.LastIndex(path, "/")
if idx >= 0 && idx < len(path)-1 {
filename := path[idx+1:]
// URL decode the filename
if decoded, err := url.QueryUnescape(filename); err == nil {
return decoded
}
return filename
}
return ""
}
var progressUpdatesLevels = []struct {
size int64 // 文件大小阈值
stepPercent int // 每多少 % 更新一次
}{
{10 << 20, 100},
{50 << 20, 50},
{200 << 20, 20},
{500 << 20, 10},
}
func shouldUpdateProgress(total, downloaded int64, lastUpdatePercent int) bool {
if total <= 0 || downloaded <= 0 {
return false
}
percent := int((downloaded * 100) / total)
if percent <= lastUpdatePercent {
return false
}
step := progressUpdatesLevels[len(progressUpdatesLevels)-1].stepPercent
for _, lvl := range progressUpdatesLevels {
if total < lvl.size {
step = lvl.stepPercent
break
}
}
return percent >= lastUpdatePercent+step
}