Files
BiliNote/backend/tests/test_whisper_models.py
huangjianwu 3841719d5a fix(transcriber): 修复 large-v3-turbo 因仓库 404 无法下载
Systran/faster-whisper-large-v3-turbo 已从 HuggingFace 下架(API 返回
401/404,仓库不存在)。用户点击下载后,后台 snapshot_download 立即抛错
被吞掉,_downloading 置为 failed 但状态接口只回传 downloading/downloaded
两个布尔,于是表现为:无进度转圈、状态一直「未下载」、前端无错误提示。

改用社区维护的 CT2 转换版 deepdml/faster-whisper-large-v3-turbo-ct2:
HF 直链可达(200,无重定向,保证缓存目录名与存在性检测一致),含
model.bin 等全部所需文件,与 faster-whisper 的 large-v3-turbo 等价。

附回归测试,断言 large-v3-turbo 解析到存活仓库而非已失效的 Systran 仓库。

Closes #402

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-23 10:50:55 +08:00

145 lines
5.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Unit tests for app.transcriber.whisper_modelswhisper 模型名→标识 的映射注册表)。
直接按文件路径加载被测模块,并桩掉 app.utils.logger避免触发 app/__init__.py
(会 import faster_whisper / ctranslate2 等重依赖),使本测试无需安装转写依赖即可运行。
"""
import importlib.util
import logging
import os
import pathlib
import sys
import tempfile
import types
import unittest
ROOT = pathlib.Path(__file__).resolve().parents[1]
MODULE_PATH = ROOT / "app" / "transcriber" / "whisper_models.py"
def _load_module():
if "app" not in sys.modules:
app_pkg = types.ModuleType("app")
app_pkg.__path__ = [] # 标记为 package
sys.modules["app"] = app_pkg
if "app.utils" not in sys.modules:
utils_pkg = types.ModuleType("app.utils")
utils_pkg.__path__ = []
sys.modules["app.utils"] = utils_pkg
if "app.utils.logger" not in sys.modules:
logger_mod = types.ModuleType("app.utils.logger")
logger_mod.get_logger = lambda name=None: logging.getLogger(name or "test")
sys.modules["app.utils.logger"] = logger_mod
spec = importlib.util.spec_from_file_location("whisper_models_under_test", MODULE_PATH)
assert spec and spec.loader
mod = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mod)
return mod
wm = _load_module()
class TestResolve(unittest.TestCase):
def setUp(self):
self.tmp = tempfile.TemporaryDirectory()
self.cfg = os.path.join(self.tmp.name, "whisper_models.json")
self.reg = wm.WhisperModelRegistry(filepath=self.cfg)
def tearDown(self):
self.tmp.cleanup()
def test_builtin_resolves_to_systran(self):
self.assertEqual(self.reg.resolve("tiny"), "Systran/faster-whisper-tiny")
def test_large_v3_turbo_resolves_to_live_repo(self):
# 回归 issue #402Systran 从未发布 turbo 的 CT2 转换版,
# 原映射 Systran/faster-whisper-large-v3-turbo 在 HF 上 401/404
# 导致下载静默失败、状态一直「未下载」。改用社区维护的 CT2 转换版。
self.assertEqual(
self.reg.resolve("large-v3-turbo"),
"deepdml/faster-whisper-large-v3-turbo-ct2",
)
self.assertNotEqual(
self.reg.resolve("large-v3-turbo"),
"Systran/faster-whisper-large-v3-turbo",
)
def test_passthrough_repo_id(self):
# 用户直接把 HF repo_id 当 model_size 传进来(含 "/"
self.assertEqual(self.reg.resolve("SomeOrg/my-whisper-ct2"), "SomeOrg/my-whisper-ct2")
def test_unknown_raises(self):
with self.assertRaises(ValueError):
self.reg.resolve("definitely-not-a-model")
def test_custom_overrides_and_persists(self):
self.reg.add_custom_model("myhf", "someorg/whisper-ct2")
self.assertEqual(self.reg.resolve("myhf"), "someorg/whisper-ct2")
# 新实例读同一文件 → 确认持久化Docker 下随 config 卷保留)
reg2 = wm.WhisperModelRegistry(filepath=self.cfg)
self.assertEqual(reg2.resolve("myhf"), "someorg/whisper-ct2")
def test_custom_can_override_builtin_key_resolution(self):
# 自定义优先级高于内置:把 "tiny" 强行指到别的 reporesolve 层允许add 层禁止重名)
self.reg._write_custom({"tiny": "Other/tiny-ct2"})
self.assertEqual(self.reg.resolve("tiny"), "Other/tiny-ct2")
def test_local_path_resolution_and_detection(self):
model_dir = os.path.join(self.tmp.name, "mymodel")
os.makedirs(model_dir)
self.reg.add_custom_model("local1", model_dir)
self.assertEqual(self.reg.resolve("local1"), model_dir)
self.assertTrue(wm.is_local_target(self.reg.resolve("local1")))
def test_bare_existing_dir_passthrough(self):
# 没登记、但直接传一个已存在目录 → 直通为本地路径
model_dir = os.path.join(self.tmp.name, "bare")
os.makedirs(model_dir)
self.assertEqual(self.reg.resolve(model_dir), model_dir)
def test_add_rejects_builtin_collision_and_empty(self):
with self.assertRaises(ValueError):
self.reg.add_custom_model("tiny", "x/y") # 与内置重名
with self.assertRaises(ValueError):
self.reg.add_custom_model("", "x/y")
with self.assertRaises(ValueError):
self.reg.add_custom_model("ok", "")
def test_remove(self):
self.reg.add_custom_model("tmpm", "a/b")
self.assertIn("tmpm", self.reg.get_custom_models())
self.reg.remove_custom_model("tmpm")
self.assertNotIn("tmpm", self.reg.get_custom_models())
def test_visible_includes_builtin_and_custom(self):
self.reg.add_custom_model("zzz", "a/b")
names = self.reg.visible_model_names()
self.assertIn("tiny", names)
self.assertIn("large-v3", names)
self.assertIn("zzz", names)
def test_is_known(self):
self.assertTrue(self.reg.is_known("base"))
self.assertTrue(self.reg.is_known("Org/Name"))
self.assertFalse(self.reg.is_known("nope-not-real"))
class TestHelpers(unittest.TestCase):
def test_hf_cache_dirname(self):
self.assertEqual(
wm.hf_cache_dirname("Systran/faster-whisper-tiny"),
"models--Systran--faster-whisper-tiny",
)
self.assertEqual(wm.hf_cache_dirname("Org/Name"), "models--Org--Name")
def test_is_local_target(self):
self.assertTrue(wm.is_local_target("/abs/path"))
self.assertTrue(wm.is_local_target("./rel"))
self.assertTrue(wm.is_local_target("~/home/model"))
self.assertFalse(wm.is_local_target("Org/Name")) # repo_id 不是本地路径
self.assertFalse(wm.is_local_target(""))
if __name__ == "__main__":
unittest.main()