mirror of
https://github.com/JefferyHcool/BiliNote.git
synced 2026-05-06 20:42:52 +08:00
feat(transcriber): 使用 ModelScope 替代 Hugging Face 下载模型
- 在 requirements.txt 中添加 modelscope 依赖 - 修改 whisper.py 中的模型下载逻辑,使用 ModelScope 的 snapshot_download 函数- 更新 MODEL_MAP 字典,映射不同大小的模型到对应的 ModelScope 仓库 - 调整模型路径,直接使用 ModelScope 下载的路径
This commit is contained in:
@@ -11,13 +11,25 @@ from events import transcription_finished
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import os
|
import os
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
from huggingface_hub import snapshot_download
|
from modelscope import snapshot_download
|
||||||
|
|
||||||
|
|
||||||
'''
|
'''
|
||||||
Size of the model to use (tiny, tiny.en, base, base.en, small, small.en, distil-small.en, medium, medium.en, distil-medium.en, large-v1, large-v2, large-v3, large, distil-large-v2, distil-large-v3, large-v3-turbo, or turbo
|
Size of the model to use (tiny, tiny.en, base, base.en, small, small.en, distil-small.en, medium, medium.en, distil-medium.en, large-v1, large-v2, large-v3, large, distil-large-v2, distil-large-v3, large-v3-turbo, or turbo
|
||||||
'''
|
'''
|
||||||
logger=get_logger(__name__)
|
logger=get_logger(__name__)
|
||||||
|
|
||||||
|
MODEL_MAP={
|
||||||
|
"tiny": "pengzhendong/faster-whisper-tiny",
|
||||||
|
'base':'pengzhendong/faster-whisper-base',
|
||||||
|
'small':'pengzhendong/faster-whisper-small',
|
||||||
|
'medium':'pengzhendong/faster-whisper-medium',
|
||||||
|
'large-v1':'pengzhendong/faster-whisper-large-v1',
|
||||||
|
'large-v2':'pengzhendong/faster-whisper-large-v2',
|
||||||
|
'large-v3':'pengzhendong/faster-whisper-large-v3',
|
||||||
|
'large-v3-turbo':'pengzhendong/faster-whisper-large-v3-turbo',
|
||||||
|
}
|
||||||
|
|
||||||
class WhisperTranscriber(Transcriber):
|
class WhisperTranscriber(Transcriber):
|
||||||
# TODO:修改为可配置
|
# TODO:修改为可配置
|
||||||
def __init__(
|
def __init__(
|
||||||
@@ -40,16 +52,16 @@ class WhisperTranscriber(Transcriber):
|
|||||||
model_path = os.path.join(model_dir, f"whisper-{model_size}")
|
model_path = os.path.join(model_dir, f"whisper-{model_size}")
|
||||||
if not Path(model_path).exists():
|
if not Path(model_path).exists():
|
||||||
logger.info(f"模型 whisper-{model_size} 不存在,开始下载...")
|
logger.info(f"模型 whisper-{model_size} 不存在,开始下载...")
|
||||||
repo_id = f"guillaumekln/faster-whisper-{model_size}"
|
repo_id = MODEL_MAP[model_size]
|
||||||
snapshot_download(
|
model_path = snapshot_download(
|
||||||
repo_id,
|
repo_id,
|
||||||
|
|
||||||
local_dir=model_path,
|
local_dir=model_path,
|
||||||
local_dir_use_symlinks=False,
|
|
||||||
)
|
)
|
||||||
logger.info("模型下载完成")
|
logger.info("模型下载完成")
|
||||||
|
|
||||||
self.model = WhisperModel(
|
self.model = WhisperModel(
|
||||||
model_size,
|
model_size_or_path=model_path,
|
||||||
device=self.device,
|
device=self.device,
|
||||||
compute_type=self.compute_type,
|
compute_type=self.compute_type,
|
||||||
cpu_threads=cpu_threads,
|
cpu_threads=cpu_threads,
|
||||||
|
|||||||
@@ -2,11 +2,13 @@ import os
|
|||||||
|
|
||||||
PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../"))
|
PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../"))
|
||||||
|
|
||||||
|
|
||||||
def get_data_dir():
|
def get_data_dir():
|
||||||
data_path = os.path.join(PROJECT_ROOT, "data")
|
data_path = os.path.join(PROJECT_ROOT, "data")
|
||||||
os.makedirs(data_path, exist_ok=True)
|
os.makedirs(data_path, exist_ok=True)
|
||||||
return data_path
|
return data_path
|
||||||
|
|
||||||
|
|
||||||
def get_model_dir(subdir: str = "whisper") -> str:
|
def get_model_dir(subdir: str = "whisper") -> str:
|
||||||
base = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../models"))
|
base = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../models"))
|
||||||
path = os.path.join(base, subdir)
|
path = os.path.join(base, subdir)
|
||||||
@@ -15,4 +17,4 @@ def get_model_dir(subdir: str = "whisper") -> str:
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
print(get_data_dir())
|
print(get_data_dir())
|
||||||
|
|||||||
Binary file not shown.
Reference in New Issue
Block a user