mirror of
https://github.com/JefferyHcool/BiliNote.git
synced 2026-05-15 12:28:06 +08:00
fix(backend): 部署友好性——whisper 半成品目录与 deploy_status 硬依赖 torch
两处部署反馈来的问题:
1. WhisperTranscriber 反复抛 'Unable to open file model.bin in
model whisper-base'
· 原因:__init__ 只看目录是否存在判定模型已下载(Path(model_path).exists()),
但首次下载若中断 / 网络异常会留下空 / 半成品目录,下次启动绕过下载分支直接
进 WhisperModel 加载,于是死循环报错
· 修:判定条件换成 'model.bin' 落盘存在;目录在但 model.bin 缺失时打 warn
并触发重新下载
· routers/config.py 的 _check_whisper_model_exists 同步改用 model.bin 判定,
避免「已下载」状态在监控页误报
2. /api/deploy_status 在没装 torch 的部署上 500
ModuleNotFoundError: No module named 'torch'
· 原因:endpoint 顶部直接 import torch,仅 fast-whisper 才用得到的依赖被强制为
全局必需。轻量部署 / 用户切到 Groq / 必剪 / 快手 在线引擎时无 torch 也合理
· 修:torch 改为 try/except,未装或 cuda 检测异常时返回
{available: false, torch_installed: false};同时把 transcriber 配置 +
ffmpeg 都包在 try 里,保证整个监控 endpoint 不会被任一子项打死
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -87,10 +87,13 @@ _downloading: dict[str, str] = {} # model_size -> status ("downloading" | "done
|
||||
|
||||
|
||||
def _check_whisper_model_exists(model_size: str, subdir: str = "whisper") -> bool:
|
||||
"""检查指定 whisper 模型是否已下载到本地。"""
|
||||
"""检查指定 whisper 模型是否已完整下载到本地。
|
||||
|
||||
只看目录会把"上次下载中断剩下的空目录"误判为已下载;以 model.bin 落盘为准。
|
||||
"""
|
||||
model_dir = get_model_dir(subdir)
|
||||
model_path = os.path.join(model_dir, f"whisper-{model_size}")
|
||||
return Path(model_path).exists()
|
||||
model_path = Path(os.path.join(model_dir, f"whisper-{model_size}"))
|
||||
return (model_path / "model.bin").exists()
|
||||
|
||||
|
||||
@router.get("/transcriber_models_status")
|
||||
@@ -227,30 +230,49 @@ async def sys_check():
|
||||
|
||||
@router.get("/deploy_status")
|
||||
async def deploy_status():
|
||||
"""返回部署监控所需的所有状态信息"""
|
||||
import torch
|
||||
"""返回部署监控所需的所有状态信息。
|
||||
|
||||
每一项都做容错:torch 没装 / Whisper 配置读取失败 / FFmpeg 不可用 都不应让整个
|
||||
endpoint 500 把监控页打死。
|
||||
"""
|
||||
import os
|
||||
|
||||
# CUDA 状态
|
||||
cuda_available = torch.cuda.is_available()
|
||||
cuda_info = {
|
||||
"available": cuda_available,
|
||||
"version": torch.version.cuda if cuda_available else None,
|
||||
"gpu_name": torch.cuda.get_device_name(0) if cuda_available else None,
|
||||
}
|
||||
|
||||
# Whisper 模型状态(从配置文件读取,与前端设置同步)
|
||||
transcriber_cfg = transcriber_config_manager.get_config()
|
||||
model_size = transcriber_cfg["whisper_model_size"]
|
||||
transcriber_type = transcriber_cfg["transcriber_type"]
|
||||
|
||||
|
||||
# CUDA 状态:torch 是 fast-whisper 路径才需要的依赖;轻量部署可能没装
|
||||
cuda_info = {"available": False, "version": None, "gpu_name": None, "torch_installed": False}
|
||||
try:
|
||||
import torch
|
||||
cuda_info["torch_installed"] = True
|
||||
cuda_available = torch.cuda.is_available()
|
||||
cuda_info["available"] = cuda_available
|
||||
if cuda_available:
|
||||
cuda_info["version"] = torch.version.cuda
|
||||
try:
|
||||
cuda_info["gpu_name"] = torch.cuda.get_device_name(0)
|
||||
except Exception as e:
|
||||
logger.warning(f"读取 GPU 名称失败: {e}")
|
||||
except ImportError:
|
||||
# torch 未安装:保持 available=False;插件 / web 监控页能识别
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.warning(f"CUDA 状态检测失败: {e}")
|
||||
|
||||
# Whisper 配置(任何失败回落到空,监控页不应被这个打死)
|
||||
model_size = None
|
||||
transcriber_type = None
|
||||
try:
|
||||
transcriber_cfg = transcriber_config_manager.get_config()
|
||||
model_size = transcriber_cfg.get("whisper_model_size")
|
||||
transcriber_type = transcriber_cfg.get("transcriber_type")
|
||||
except Exception as e:
|
||||
logger.warning(f"读取转写器配置失败: {e}")
|
||||
|
||||
# FFmpeg 状态
|
||||
try:
|
||||
ensure_ffmpeg_or_raise()
|
||||
ffmpeg_ok = True
|
||||
except:
|
||||
except Exception:
|
||||
ffmpeg_ok = False
|
||||
|
||||
|
||||
return R.success(data={
|
||||
"backend": {"status": "running", "port": int(os.getenv("BACKEND_PORT", 8483))},
|
||||
"cuda": cuda_info,
|
||||
|
||||
@@ -50,12 +50,20 @@ class WhisperTranscriber(Transcriber):
|
||||
|
||||
model_dir = get_model_dir("whisper")
|
||||
model_path = os.path.join(model_dir, f"whisper-{model_size}")
|
||||
if not Path(model_path).exists():
|
||||
logger.info(f"模型 whisper-{model_size} 不存在,开始下载...")
|
||||
# 仅看目录存在不够:一次失败的 / 中断的下载会留下空 / 半成品目录,
|
||||
# 后面 WhisperModel 加载时会以 'Unable to open file model.bin' 抛错。
|
||||
# 必须以 model.bin 这个核心权重文件落盘为准。
|
||||
model_bin = Path(model_path) / "model.bin"
|
||||
if not model_bin.exists():
|
||||
if Path(model_path).exists():
|
||||
logger.warning(
|
||||
f"检测到 {model_path} 目录存在但缺少 model.bin(可能上次下载中断),将重新拉取"
|
||||
)
|
||||
else:
|
||||
logger.info(f"模型 whisper-{model_size} 不存在,开始下载...")
|
||||
repo_id = MODEL_MAP[model_size]
|
||||
model_path = snapshot_download(
|
||||
repo_id,
|
||||
|
||||
local_dir=model_path,
|
||||
)
|
||||
logger.info("模型下载完成")
|
||||
|
||||
Reference in New Issue
Block a user