feat(transcriber): 使用 ModelScope 替代 Hugging Face 下载模型

- 在 requirements.txt 中添加 modelscope 依赖
- 修改 whisper.py 中的模型下载逻辑,使用 ModelScope 的 snapshot_download 函数- 更新 MODEL_MAP 字典,映射不同大小的模型到对应的 ModelScope 仓库
- 调整模型路径,直接使用 ModelScope 下载的路径
This commit is contained in:
黄建武
2025-05-08 14:42:43 +08:00
parent ae92ec190a
commit 51fb59e3e1
3 changed files with 20 additions and 6 deletions

View File

@@ -11,13 +11,25 @@ from events import transcription_finished
from pathlib import Path from pathlib import Path
import os import os
from tqdm import tqdm from tqdm import tqdm
from huggingface_hub import snapshot_download from modelscope import snapshot_download
''' '''
Size of the model to use (tiny, tiny.en, base, base.en, small, small.en, distil-small.en, medium, medium.en, distil-medium.en, large-v1, large-v2, large-v3, large, distil-large-v2, distil-large-v3, large-v3-turbo, or turbo Size of the model to use (tiny, tiny.en, base, base.en, small, small.en, distil-small.en, medium, medium.en, distil-medium.en, large-v1, large-v2, large-v3, large, distil-large-v2, distil-large-v3, large-v3-turbo, or turbo
''' '''
logger=get_logger(__name__) logger=get_logger(__name__)
MODEL_MAP={
"tiny": "pengzhendong/faster-whisper-tiny",
'base':'pengzhendong/faster-whisper-base',
'small':'pengzhendong/faster-whisper-small',
'medium':'pengzhendong/faster-whisper-medium',
'large-v1':'pengzhendong/faster-whisper-large-v1',
'large-v2':'pengzhendong/faster-whisper-large-v2',
'large-v3':'pengzhendong/faster-whisper-large-v3',
'large-v3-turbo':'pengzhendong/faster-whisper-large-v3-turbo',
}
class WhisperTranscriber(Transcriber): class WhisperTranscriber(Transcriber):
# TODO:修改为可配置 # TODO:修改为可配置
def __init__( def __init__(
@@ -40,16 +52,16 @@ class WhisperTranscriber(Transcriber):
model_path = os.path.join(model_dir, f"whisper-{model_size}") model_path = os.path.join(model_dir, f"whisper-{model_size}")
if not Path(model_path).exists(): if not Path(model_path).exists():
logger.info(f"模型 whisper-{model_size} 不存在,开始下载...") logger.info(f"模型 whisper-{model_size} 不存在,开始下载...")
repo_id = f"guillaumekln/faster-whisper-{model_size}" repo_id = MODEL_MAP[model_size]
snapshot_download( model_path = snapshot_download(
repo_id, repo_id,
local_dir=model_path, local_dir=model_path,
local_dir_use_symlinks=False,
) )
logger.info("模型下载完成") logger.info("模型下载完成")
self.model = WhisperModel( self.model = WhisperModel(
model_size, model_size_or_path=model_path,
device=self.device, device=self.device,
compute_type=self.compute_type, compute_type=self.compute_type,
cpu_threads=cpu_threads, cpu_threads=cpu_threads,

View File

@@ -2,11 +2,13 @@ import os
PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../")) PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../"))
def get_data_dir(): def get_data_dir():
data_path = os.path.join(PROJECT_ROOT, "data") data_path = os.path.join(PROJECT_ROOT, "data")
os.makedirs(data_path, exist_ok=True) os.makedirs(data_path, exist_ok=True)
return data_path return data_path
def get_model_dir(subdir: str = "whisper") -> str: def get_model_dir(subdir: str = "whisper") -> str:
base = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../models")) base = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../models"))
path = os.path.join(base, subdir) path = os.path.join(base, subdir)
@@ -15,4 +17,4 @@ def get_model_dir(subdir: str = "whisper") -> str:
if __name__ == '__main__': if __name__ == '__main__':
print(get_data_dir()) print(get_data_dir())

Binary file not shown.