Files
BiliNote/backend/app/downloaders/youtube_downloader.py
huangjianwu 41f17592c2 fix(backend): 部署韧性——模型自愈/就绪门禁/全局代理/启动诊断
- whisper: model.bin 截断/损坏时删目录重下重试一次,修「Unable to
  open file model.bin」死循环;mlx 同样按 config.json 判完整性
- /generate_note 加就绪门禁:本地转写引擎模型没下好直接拦截,返回
  reason=transcriber_model_not_ready,不让任务静默卡在首次下载
- 全局代理:新增 ProxyConfigManager(JSON 配置 + HTTP_PROXY env 兜底)
  + build_openai_client,统一注入代理到 LLM/Groq 客户端;yt-dlp 与
  youtube-transcript-api 也走代理
- build_openai_client 校验 api_key 非空,空 key 给「xxx 的 API Key
  未配置」而不是天书般的 Illegal header value b'Bearer '
- universal_gpt: 模型拒绝自定义 temperature(o1/o3/gpt-5 系列)时
  就地去掉参数重试,不消耗重试预算
- connect_test 改用真实 chat completion 而非 /v1/models 探测
- main.py: lifespan 拆 [startup 1/5..5/5] 分段日志 + 异常清晰定位
- /sys_health 重构为结构化返回 {backend,ffmpeg,db,whisper_model}

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-14 19:01:14 +08:00

136 lines
4.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import os
import logging
from abc import ABC
from typing import Union, Optional, List
import yt_dlp
from app.downloaders.base import Downloader, DownloadQuality
from app.downloaders.youtube_subtitle import YouTubeSubtitleFetcher
from app.models.notes_model import AudioDownloadResult
from app.models.transcriber_model import TranscriptResult
from app.services.proxy_config_manager import ProxyConfigManager
from app.utils.path_helper import get_data_dir
from app.utils.url_parser import extract_video_id
logger = logging.getLogger(__name__)
def _apply_proxy(ydl_opts: dict) -> dict:
"""YouTube 在国内需要代理。配置了全局代理就塞进 yt-dlp opts。"""
proxy = ProxyConfigManager().get_proxy_url()
if proxy:
ydl_opts['proxy'] = proxy
logger.info(f"yt-dlp 走代理: {proxy}")
return ydl_opts
class YoutubeDownloader(Downloader, ABC):
def __init__(self):
super().__init__()
def download(
self,
video_url: str,
output_dir: Union[str, None] = None,
quality: DownloadQuality = "fast",
need_video: Optional[bool] = False,
skip_download: bool = False,
) -> AudioDownloadResult:
if output_dir is None:
output_dir = get_data_dir()
if not output_dir:
output_dir = self.cache_data
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, "%(id)s.%(ext)s")
ydl_opts = {
'format': 'bestaudio[ext=m4a]/bestaudio/best',
'outtmpl': output_path,
'noplaylist': True,
'quiet': False,
}
if skip_download:
ydl_opts['skip_download'] = True
_apply_proxy(ydl_opts)
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(video_url, download=not skip_download)
video_id = info.get("id")
title = info.get("title")
duration = info.get("duration", 0)
cover_url = info.get("thumbnail")
ext = info.get("ext", "m4a")
audio_path = os.path.join(output_dir, f"{video_id}.{ext}")
return AudioDownloadResult(
file_path=audio_path,
title=title,
duration=duration,
cover_url=cover_url,
platform="youtube",
video_id=video_id,
raw_info={'tags': info.get('tags')},
video_path=None,
)
def download_video(
self,
video_url: str,
output_dir: Union[str, None] = None,
) -> str:
"""
下载视频,返回视频文件路径
"""
if output_dir is None:
output_dir = get_data_dir()
video_id = extract_video_id(video_url, "youtube")
video_path = os.path.join(output_dir, f"{video_id}.mp4")
if os.path.exists(video_path):
return video_path
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, "%(id)s.%(ext)s")
ydl_opts = {
'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]',
'outtmpl': output_path,
'noplaylist': True,
'quiet': False,
'merge_output_format': 'mp4', # 确保合并成 mp4
}
_apply_proxy(ydl_opts)
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(video_url, download=True)
video_id = info.get("id")
video_path = os.path.join(output_dir, f"{video_id}.mp4")
if not os.path.exists(video_path):
raise FileNotFoundError(f"视频文件未找到: {video_path}")
return video_path
def download_subtitles(self, video_url: str, output_dir: str = None,
langs: List[str] = None) -> Optional[TranscriptResult]:
"""
通过 YouTube InnerTube API 直接获取字幕(优先人工字幕,其次自动生成)。
比 yt_dlp 方式更轻量,无需写临时文件到磁盘。
:param video_url: 视频链接
:param output_dir: 未使用(保留接口兼容)
:param langs: 优先语言列表
:return: TranscriptResult 或 None
"""
if langs is None:
langs = ['zh-Hans', 'zh', 'zh-CN', 'zh-TW', 'en', 'en-US', 'ja']
video_id = extract_video_id(video_url, "youtube")
fetcher = YouTubeSubtitleFetcher()
print(
f"尝试获取字幕video_id={video_id}, langs={langs}"
)
return fetcher.fetch_subtitles(video_id, langs)