fix(mlx-whisper): 修正 huggingface 仓库 ID 命名

mlx-community 上 Whisper 仓库的命名实际是 'whisper-{size}-mlx'(large-v3-turbo 例外,无 -mlx 后缀)。
之前 hardcode 拼成 'mlx-community/whisper-{size}' 在 HF 上不存在,下载会 404:

  Repository Not Found for url:
    https://huggingface.co/api/models/mlx-community/whisper-small/revision/main.

修复:
- 在 mlx_whisper_transcriber.py 加 MLX_MODEL_MAP(已用 huggingface API 核对过命名)+ resolve_mlx_repo_id() 帮助函数
- routers/config.py 的 _do_download_mlx_whisper 与 _check ... 路径生成都改用同一份映射表
- 给 transcriber_models_status 的每条 mlx 状态加 available 字段,避免后续若有不支持的 size 时静默失败

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
huangjianwu
2026-05-07 11:59:02 +08:00
parent 702b57c165
commit be5e1637fa
2 changed files with 45 additions and 9 deletions

View File

@@ -110,15 +110,19 @@ def get_transcriber_models_status():
mlx_available = platform.system() == "Darwin"
mlx_statuses = []
if mlx_available:
from app.transcriber.mlx_whisper_transcriber import MLX_MODEL_MAP
for size in WHISPER_MODEL_SIZES:
mlx_key = f"mlx-{size}"
model_dir = get_model_dir("mlx-whisper")
model_path = os.path.join(model_dir, f"mlx-community/whisper-{size}")
downloaded = Path(model_path).exists()
repo_id = MLX_MODEL_MAP.get(size)
# 模型在本地按 repo_id如 mlx-community/whisper-small-mlx落盘
model_path = os.path.join(model_dir, repo_id) if repo_id else None
downloaded = bool(model_path and Path(model_path).exists())
mlx_statuses.append({
"model_size": size,
"downloaded": downloaded,
"downloading": _downloading.get(mlx_key) == "downloading",
"available": repo_id is not None,
})
return R.success(data={
@@ -164,15 +168,22 @@ def _do_download_mlx_whisper(model_size: str):
try:
_downloading[key] = "downloading"
from huggingface_hub import snapshot_download as hf_download
from app.transcriber.mlx_whisper_transcriber import resolve_mlx_repo_id
try:
repo_id = resolve_mlx_repo_id(model_size)
except ValueError as e:
logger.error(str(e))
_downloading[key] = "failed"
return
model_dir = get_model_dir("mlx-whisper")
model_name = f"mlx-community/whisper-{model_size}"
model_path = os.path.join(model_dir, model_name)
model_path = os.path.join(model_dir, repo_id)
if Path(model_path).exists():
_downloading[key] = "done"
return
logger.info(f"开始下载 mlx-whisper 模型: {model_size}")
hf_download(model_name, local_dir=model_path, local_dir_use_symlinks=False)
logger.info(f"开始下载 mlx-whisper 模型: {model_size}{repo_id}")
hf_download(repo_id, local_dir=model_path, local_dir_use_symlinks=False)
logger.info(f"mlx-whisper 模型下载完成: {model_size}")
_downloading[key] = "done"
except Exception as e:

View File

@@ -13,6 +13,31 @@ from events import transcription_finished
logger = get_logger(__name__)
# mlx-community 上的 Whisper 仓库命名不统一:常规版本是 'whisper-{size}-mlx'
# turbo 例外没有 -mlx 后缀。直接拼 'mlx-community/whisper-{size}' 会 404。
# 已用 https://huggingface.co/api/models?author=mlx-community&search=whisper 核对过。
MLX_MODEL_MAP = {
"tiny": "mlx-community/whisper-tiny-mlx",
"base": "mlx-community/whisper-base-mlx",
"small": "mlx-community/whisper-small-mlx",
"medium": "mlx-community/whisper-medium-mlx",
"large-v1": "mlx-community/whisper-large-v1-mlx",
"large-v2": "mlx-community/whisper-large-v2-mlx",
"large-v3": "mlx-community/whisper-large-v3-mlx",
"large-v3-turbo": "mlx-community/whisper-large-v3-turbo",
}
def resolve_mlx_repo_id(model_size: str) -> str:
if model_size not in MLX_MODEL_MAP:
raise ValueError(
f"不支持的 MLX Whisper 模型大小: {model_size}"
f"可选: {', '.join(MLX_MODEL_MAP.keys())}"
)
return MLX_MODEL_MAP[model_size]
class MLXWhisperTranscriber(Transcriber):
def __init__(
self,
@@ -21,13 +46,13 @@ class MLXWhisperTranscriber(Transcriber):
# 检查平台
if platform.system() != "Darwin":
raise RuntimeError("MLX Whisper 仅支持 Apple 平台")
# 检查环境变量
if os.environ.get("TRANSCRIBER_TYPE") != "mlx-whisper":
raise RuntimeError("必须设置环境变量 TRANSCRIBER_TYPE=mlx-whisper 才能使用 MLX Whisper")
self.model_size = model_size
self.model_name = f"mlx-community/whisper-{model_size}"
self.model_name = resolve_mlx_repo_id(model_size)
self.model_path = None
# 设置模型路径