mirror of
https://github.com/JefferyHcool/BiliNote.git
synced 2026-06-12 03:00:09 +08:00
- whisper: model.bin 截断/损坏时删目录重下重试一次,修「Unable to
open file model.bin」死循环;mlx 同样按 config.json 判完整性
- /generate_note 加就绪门禁:本地转写引擎模型没下好直接拦截,返回
reason=transcriber_model_not_ready,不让任务静默卡在首次下载
- 全局代理:新增 ProxyConfigManager(JSON 配置 + HTTP_PROXY env 兜底)
+ build_openai_client,统一注入代理到 LLM/Groq 客户端;yt-dlp 与
youtube-transcript-api 也走代理
- build_openai_client 校验 api_key 非空,空 key 给「xxx 的 API Key
未配置」而不是天书般的 Illegal header value b'Bearer '
- universal_gpt: 模型拒绝自定义 temperature(o1/o3/gpt-5 系列)时
就地去掉参数重试,不消耗重试预算
- connect_test 改用真实 chat completion 而非 /v1/models 探测
- main.py: lifespan 拆 [startup 1/5..5/5] 分段日志 + 异常清晰定位
- /sys_health 重构为结构化返回 {backend,ffmpeg,db,whisper_model}
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
136 lines
4.5 KiB
Python
136 lines
4.5 KiB
Python
import os
|
||
import logging
|
||
from abc import ABC
|
||
from typing import Union, Optional, List
|
||
|
||
import yt_dlp
|
||
|
||
from app.downloaders.base import Downloader, DownloadQuality
|
||
from app.downloaders.youtube_subtitle import YouTubeSubtitleFetcher
|
||
from app.models.notes_model import AudioDownloadResult
|
||
from app.models.transcriber_model import TranscriptResult
|
||
from app.services.proxy_config_manager import ProxyConfigManager
|
||
from app.utils.path_helper import get_data_dir
|
||
from app.utils.url_parser import extract_video_id
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
def _apply_proxy(ydl_opts: dict) -> dict:
|
||
"""YouTube 在国内需要代理。配置了全局代理就塞进 yt-dlp opts。"""
|
||
proxy = ProxyConfigManager().get_proxy_url()
|
||
if proxy:
|
||
ydl_opts['proxy'] = proxy
|
||
logger.info(f"yt-dlp 走代理: {proxy}")
|
||
return ydl_opts
|
||
|
||
|
||
class YoutubeDownloader(Downloader, ABC):
|
||
def __init__(self):
|
||
|
||
super().__init__()
|
||
|
||
def download(
|
||
self,
|
||
video_url: str,
|
||
output_dir: Union[str, None] = None,
|
||
quality: DownloadQuality = "fast",
|
||
need_video: Optional[bool] = False,
|
||
skip_download: bool = False,
|
||
) -> AudioDownloadResult:
|
||
if output_dir is None:
|
||
output_dir = get_data_dir()
|
||
if not output_dir:
|
||
output_dir = self.cache_data
|
||
os.makedirs(output_dir, exist_ok=True)
|
||
|
||
output_path = os.path.join(output_dir, "%(id)s.%(ext)s")
|
||
|
||
ydl_opts = {
|
||
'format': 'bestaudio[ext=m4a]/bestaudio/best',
|
||
'outtmpl': output_path,
|
||
'noplaylist': True,
|
||
'quiet': False,
|
||
}
|
||
|
||
if skip_download:
|
||
ydl_opts['skip_download'] = True
|
||
|
||
_apply_proxy(ydl_opts)
|
||
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
||
info = ydl.extract_info(video_url, download=not skip_download)
|
||
video_id = info.get("id")
|
||
title = info.get("title")
|
||
duration = info.get("duration", 0)
|
||
cover_url = info.get("thumbnail")
|
||
ext = info.get("ext", "m4a")
|
||
audio_path = os.path.join(output_dir, f"{video_id}.{ext}")
|
||
|
||
return AudioDownloadResult(
|
||
file_path=audio_path,
|
||
title=title,
|
||
duration=duration,
|
||
cover_url=cover_url,
|
||
platform="youtube",
|
||
video_id=video_id,
|
||
raw_info={'tags': info.get('tags')},
|
||
video_path=None,
|
||
)
|
||
|
||
def download_video(
|
||
self,
|
||
video_url: str,
|
||
output_dir: Union[str, None] = None,
|
||
) -> str:
|
||
"""
|
||
下载视频,返回视频文件路径
|
||
"""
|
||
if output_dir is None:
|
||
output_dir = get_data_dir()
|
||
video_id = extract_video_id(video_url, "youtube")
|
||
video_path = os.path.join(output_dir, f"{video_id}.mp4")
|
||
if os.path.exists(video_path):
|
||
return video_path
|
||
os.makedirs(output_dir, exist_ok=True)
|
||
output_path = os.path.join(output_dir, "%(id)s.%(ext)s")
|
||
|
||
ydl_opts = {
|
||
'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]',
|
||
'outtmpl': output_path,
|
||
'noplaylist': True,
|
||
'quiet': False,
|
||
'merge_output_format': 'mp4', # 确保合并成 mp4
|
||
}
|
||
|
||
_apply_proxy(ydl_opts)
|
||
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
||
info = ydl.extract_info(video_url, download=True)
|
||
video_id = info.get("id")
|
||
video_path = os.path.join(output_dir, f"{video_id}.mp4")
|
||
|
||
if not os.path.exists(video_path):
|
||
raise FileNotFoundError(f"视频文件未找到: {video_path}")
|
||
|
||
return video_path
|
||
|
||
def download_subtitles(self, video_url: str, output_dir: str = None,
|
||
langs: List[str] = None) -> Optional[TranscriptResult]:
|
||
"""
|
||
通过 YouTube InnerTube API 直接获取字幕(优先人工字幕,其次自动生成)。
|
||
比 yt_dlp 方式更轻量,无需写临时文件到磁盘。
|
||
|
||
:param video_url: 视频链接
|
||
:param output_dir: 未使用(保留接口兼容)
|
||
:param langs: 优先语言列表
|
||
:return: TranscriptResult 或 None
|
||
"""
|
||
if langs is None:
|
||
langs = ['zh-Hans', 'zh', 'zh-CN', 'zh-TW', 'en', 'en-US', 'ja']
|
||
|
||
video_id = extract_video_id(video_url, "youtube")
|
||
fetcher = YouTubeSubtitleFetcher()
|
||
print(
|
||
f"尝试获取字幕,video_id={video_id}, langs={langs}"
|
||
)
|
||
return fetcher.fetch_subtitles(video_id, langs)
|