fix(backend): 部署友好性——whisper 半成品目录与 deploy_status 硬依赖 torch

两处部署反馈来的问题： 1. WhisperTranscriber 反复抛 'Unable to open file model.bin in model whisper-base' · 原因：__init__ 只看目录是否存在判定模型已下载（Path(model_path).exists()），但首次下载若中断 / 网络异常会留下空 / 半成品目录，下次启动绕过下载分支直接进 WhisperModel 加载，于是死循环报错 · 修：判定条件换成 'model.bin' 落盘存在；目录在但 model.bin 缺失时打 warn 并触发重新下载 · routers/config.py 的 _check_whisper_model_exists 同步改用 model.bin 判定，避免「已下载」状态在监控页误报 2. /api/deploy_status 在没装 torch 的部署上 500 ModuleNotFoundError: No module named 'torch' · 原因：endpoint 顶部直接 import torch，仅 fast-whisper 才用得到的依赖被强制为全局必需。轻量部署 / 用户切到 Groq / 必剪 / 快手在线引擎时无 torch 也合理 · 修：torch 改为 try/except，未装或 cuda 检测异常时返回 {available: false, torch_installed: false}；同时把 transcriber 配置 + ffmpeg 都包在 try 里，保证整个监控 endpoint 不会被任一子项打死 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-15 12:28:06 +08:00 · 2026-05-09 13:57:34 +08:00
parent e89090bed0
commit 2bb69d1581
2 changed files with 54 additions and 24 deletions
--- a/backend/app/routers/config.py
+++ b/backend/app/routers/config.py
@@ -87,10 +87,13 @@ _downloading: dict[str, str] = {}  # model_size -> status ("downloading" | "done


 def _check_whisper_model_exists(model_size: str, subdir: str = "whisper") -> bool:
-    """检查指定 whisper 模型是否已下载到本地。"""
+    """检查指定 whisper 模型是否已完整下载到本地。
+
+    只看目录会把"上次下载中断剩下的空目录"误判为已下载；以 model.bin 落盘为准。
+    """
    model_dir = get_model_dir(subdir)
-    model_path = os.path.join(model_dir, f"whisper-{model_size}")
-    return Path(model_path).exists()
+    model_path = Path(os.path.join(model_dir, f"whisper-{model_size}"))
+    return (model_path / "model.bin").exists()


@router.get("/transcriber_models_status")
@@ -227,30 +230,49 @@ async def sys_check():

@router.get("/deploy_status")
 async def deploy_status():
-    """返回部署监控所需的所有状态信息"""
-    import torch
+    """返回部署监控所需的所有状态信息。
+
+    每一项都做容错：torch 没装 / Whisper 配置读取失败 / FFmpeg 不可用 都不应让整个
+    endpoint 500 把监控页打死。
+    """
    import os
-    
-    # CUDA 状态
-    cuda_available = torch.cuda.is_available()
-    cuda_info = {
-        "available": cuda_available,
-        "version": torch.version.cuda if cuda_available else None,
-        "gpu_name": torch.cuda.get_device_name(0) if cuda_available else None,
-    }
-    
-    # Whisper 模型状态（从配置文件读取，与前端设置同步）
-    transcriber_cfg = transcriber_config_manager.get_config()
-    model_size = transcriber_cfg["whisper_model_size"]
-    transcriber_type = transcriber_cfg["transcriber_type"]
-    
+
+    # CUDA 状态：torch 是 fast-whisper 路径才需要的依赖；轻量部署可能没装
+    cuda_info = {"available": False, "version": None, "gpu_name": None, "torch_installed": False}
+    try:
+        import torch
+        cuda_info["torch_installed"] = True
+        cuda_available = torch.cuda.is_available()
+        cuda_info["available"] = cuda_available
+        if cuda_available:
+            cuda_info["version"] = torch.version.cuda
+            try:
+                cuda_info["gpu_name"] = torch.cuda.get_device_name(0)
+            except Exception as e:
+                logger.warning(f"读取 GPU 名称失败: {e}")
+    except ImportError:
+        # torch 未安装：保持 available=False；插件 / web 监控页能识别
+        pass
+    except Exception as e:
+        logger.warning(f"CUDA 状态检测失败: {e}")
+
+    # Whisper 配置（任何失败回落到空，监控页不应被这个打死）
+    model_size = None
+    transcriber_type = None
+    try:
+        transcriber_cfg = transcriber_config_manager.get_config()
+        model_size = transcriber_cfg.get("whisper_model_size")
+        transcriber_type = transcriber_cfg.get("transcriber_type")
+    except Exception as e:
+        logger.warning(f"读取转写器配置失败: {e}")
+
    # FFmpeg 状态
    try:
        ensure_ffmpeg_or_raise()
        ffmpeg_ok = True
-    except:
+    except Exception:
        ffmpeg_ok = False
-    
+
    return R.success(data={
        "backend": {"status": "running", "port": int(os.getenv("BACKEND_PORT", 8483))},
        "cuda": cuda_info,
--- a/backend/app/transcriber/whisper.py
+++ b/backend/app/transcriber/whisper.py
@@ -50,12 +50,20 @@ class WhisperTranscriber(Transcriber):

        model_dir = get_model_dir("whisper")
        model_path = os.path.join(model_dir, f"whisper-{model_size}")
-        if not Path(model_path).exists():
-            logger.info(f"模型 whisper-{model_size} 不存在，开始下载...")
+        # 仅看目录存在不够：一次失败的 / 中断的下载会留下空 / 半成品目录，
+        # 后面 WhisperModel 加载时会以 'Unable to open file model.bin' 抛错。
+        # 必须以 model.bin 这个核心权重文件落盘为准。
+        model_bin = Path(model_path) / "model.bin"
+        if not model_bin.exists():
+            if Path(model_path).exists():
+                logger.warning(
+                    f"检测到 {model_path} 目录存在但缺少 model.bin（可能上次下载中断），将重新拉取"
+                )
+            else:
+                logger.info(f"模型 whisper-{model_size} 不存在，开始下载...")
            repo_id = MODEL_MAP[model_size]
            model_path = snapshot_download(
                repo_id,
-
                local_dir=model_path,
            )
            logger.info("模型下载完成")