feat(transcriber): 使用 ModelScope 替代 Hugging Face 下载模型

- 在 requirements.txt 中添加 modelscope 依赖
- 修改 whisper.py 中的模型下载逻辑,使用 ModelScope 的 snapshot_download 函数- 更新 MODEL_MAP 字典,映射不同大小的模型到对应的 ModelScope 仓库
- 调整模型路径,直接使用 ModelScope 下载的路径
This commit is contained in:
黄建武
2025-05-08 14:42:43 +08:00
parent ae92ec190a
commit 51fb59e3e1
3 changed files with 20 additions and 6 deletions

View File

@@ -11,13 +11,25 @@ from events import transcription_finished
from pathlib import Path
import os
from tqdm import tqdm
from huggingface_hub import snapshot_download
from modelscope import snapshot_download
'''
Size of the model to use (tiny, tiny.en, base, base.en, small, small.en, distil-small.en, medium, medium.en, distil-medium.en, large-v1, large-v2, large-v3, large, distil-large-v2, distil-large-v3, large-v3-turbo, or turbo
'''
logger=get_logger(__name__)
MODEL_MAP={
"tiny": "pengzhendong/faster-whisper-tiny",
'base':'pengzhendong/faster-whisper-base',
'small':'pengzhendong/faster-whisper-small',
'medium':'pengzhendong/faster-whisper-medium',
'large-v1':'pengzhendong/faster-whisper-large-v1',
'large-v2':'pengzhendong/faster-whisper-large-v2',
'large-v3':'pengzhendong/faster-whisper-large-v3',
'large-v3-turbo':'pengzhendong/faster-whisper-large-v3-turbo',
}
class WhisperTranscriber(Transcriber):
# TODO:修改为可配置
def __init__(
@@ -40,16 +52,16 @@ class WhisperTranscriber(Transcriber):
model_path = os.path.join(model_dir, f"whisper-{model_size}")
if not Path(model_path).exists():
logger.info(f"模型 whisper-{model_size} 不存在,开始下载...")
repo_id = f"guillaumekln/faster-whisper-{model_size}"
snapshot_download(
repo_id = MODEL_MAP[model_size]
model_path = snapshot_download(
repo_id,
local_dir=model_path,
local_dir_use_symlinks=False,
)
logger.info("模型下载完成")
self.model = WhisperModel(
model_size,
model_size_or_path=model_path,
device=self.device,
compute_type=self.compute_type,
cpu_threads=cpu_threads,

View File

@@ -2,11 +2,13 @@ import os
PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../"))
def get_data_dir():
data_path = os.path.join(PROJECT_ROOT, "data")
os.makedirs(data_path, exist_ok=True)
return data_path
def get_model_dir(subdir: str = "whisper") -> str:
base = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../models"))
path = os.path.join(base, subdir)
@@ -15,4 +17,4 @@ def get_model_dir(subdir: str = "whisper") -> str:
if __name__ == '__main__':
print(get_data_dir())
print(get_data_dir())

Binary file not shown.