From 51fb59e3e1f202f9749a1e3f62637ca588ea1dd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E5=BB=BA=E6=AD=A6?= Date: Thu, 8 May 2025 14:42:43 +0800 Subject: [PATCH] =?UTF-8?q?feat(transcriber):=20=E4=BD=BF=E7=94=A8=20Model?= =?UTF-8?q?Scope=20=E6=9B=BF=E4=BB=A3=20Hugging=20Face=20=E4=B8=8B?= =?UTF-8?q?=E8=BD=BD=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 在 requirements.txt 中添加 modelscope 依赖 - 修改 whisper.py 中的模型下载逻辑,使用 ModelScope 的 snapshot_download 函数- 更新 MODEL_MAP 字典,映射不同大小的模型到对应的 ModelScope 仓库 - 调整模型路径,直接使用 ModelScope 下载的路径 --- backend/app/transcriber/whisper.py | 22 +++++++++++++++++----- backend/app/utils/path_helper.py | 4 +++- backend/requirements.txt | Bin 3894 -> 3934 bytes 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/backend/app/transcriber/whisper.py b/backend/app/transcriber/whisper.py index e06e3a6..7b438f4 100644 --- a/backend/app/transcriber/whisper.py +++ b/backend/app/transcriber/whisper.py @@ -11,13 +11,25 @@ from events import transcription_finished from pathlib import Path import os from tqdm import tqdm -from huggingface_hub import snapshot_download +from modelscope import snapshot_download + ''' Size of the model to use (tiny, tiny.en, base, base.en, small, small.en, distil-small.en, medium, medium.en, distil-medium.en, large-v1, large-v2, large-v3, large, distil-large-v2, distil-large-v3, large-v3-turbo, or turbo ''' logger=get_logger(__name__) +MODEL_MAP={ + "tiny": "pengzhendong/faster-whisper-tiny", + 'base':'pengzhendong/faster-whisper-base', + 'small':'pengzhendong/faster-whisper-small', + 'medium':'pengzhendong/faster-whisper-medium', + 'large-v1':'pengzhendong/faster-whisper-large-v1', + 'large-v2':'pengzhendong/faster-whisper-large-v2', + 'large-v3':'pengzhendong/faster-whisper-large-v3', + 'large-v3-turbo':'pengzhendong/faster-whisper-large-v3-turbo', +} + class WhisperTranscriber(Transcriber): # TODO:修改为可配置 def __init__( @@ -40,16 +52,16 @@ class WhisperTranscriber(Transcriber): model_path = os.path.join(model_dir, f"whisper-{model_size}") if not Path(model_path).exists(): logger.info(f"模型 whisper-{model_size} 不存在,开始下载...") - repo_id = f"guillaumekln/faster-whisper-{model_size}" - snapshot_download( + repo_id = MODEL_MAP[model_size] + model_path = snapshot_download( repo_id, + local_dir=model_path, - local_dir_use_symlinks=False, ) logger.info("模型下载完成") self.model = WhisperModel( - model_size, + model_size_or_path=model_path, device=self.device, compute_type=self.compute_type, cpu_threads=cpu_threads, diff --git a/backend/app/utils/path_helper.py b/backend/app/utils/path_helper.py index b7585bb..99b29da 100644 --- a/backend/app/utils/path_helper.py +++ b/backend/app/utils/path_helper.py @@ -2,11 +2,13 @@ import os PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../")) + def get_data_dir(): data_path = os.path.join(PROJECT_ROOT, "data") os.makedirs(data_path, exist_ok=True) return data_path + def get_model_dir(subdir: str = "whisper") -> str: base = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../models")) path = os.path.join(base, subdir) @@ -15,4 +17,4 @@ def get_model_dir(subdir: str = "whisper") -> str: if __name__ == '__main__': - print(get_data_dir()) \ No newline at end of file + print(get_data_dir()) diff --git a/backend/requirements.txt b/backend/requirements.txt index 41167c51d562a99f84a035bfbeba362074db93d9..e37baa0994ba2fe3ab142c16ffea2817bc00b7e5 100644 GIT binary patch delta 48 zcmdlccTaAE8NUWE0~bRsLq0