Files
BiliNote/backend/app/transcriber/whisper.py
Jefferyhcool 43b88c85fa feat(events): 实现转写完成后的文件清理功能
- 新增 events 模块,包括 handlers 和 signals 子模块
- 在 handlers 中实现 cleanup_temp_files 函数,用于清理转写临时文件
- 在 signals 中定义 transcription_finished 信号
- 修改 main.py,添加 startup_event 函数以注册事件处理器- 更新 WhisperTranscriber 类,增加 on_finish 方法并发送转写完成信号
- 在 base.py 中添加 TranscriberBase 类的 on_finish 方法占位符
2025-04-13 23:29:33 +08:00

103 lines
3.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from faster_whisper import WhisperModel
from app.decorators.timeit import timeit
from app.models.transcriber_model import TranscriptSegment, TranscriptResult
from app.transcriber.base import Transcriber
from app.utils.env_checker import is_cuda_available, is_torch_installed
from app.utils.path_helper import get_model_dir
from events import transcription_finished
'''
Size of the model to use (tiny, tiny.en, base, base.en, small, small.en, distil-small.en, medium, medium.en, distil-medium.en, large-v1, large-v2, large-v3, large, distil-large-v2, distil-large-v3, large-v3-turbo, or turbo
'''
class WhisperTranscriber(Transcriber):
# TODO:修改为可配置
def __init__(
self,
model_size: str = "base",
device: str = 'cpu',
compute_type: str = None,
cpu_threads: int = 1,
):
if device == 'cpu' or device is None:
self.device = 'cpu'
else:
self.device = "cuda" if self.is_cuda() else "cpu"
if device == 'cuda' and self.device == 'cpu':
print('没有 cuda 使用 cpu进行计算')
self.compute_type = compute_type or ("float16" if self.device == "cuda" else "int8")
model_path = get_model_dir("whisper")
self.model = WhisperModel(
model_size,
device=self.device,
# compute_type="int8", # 或 "float16"
cpu_threads=cpu_threads,
download_root=model_path
)
@staticmethod
def is_torch_installed() -> bool:
try:
import torch
return True
except ImportError:
return False
@staticmethod
def is_cuda() -> bool:
try:
if is_cuda_available():
print("✅ CUDA 可用,使用 GPU")
return True
elif is_torch_installed():
print("⚠️ 只装了 torch但没有 CUDA用 CPU")
return False
else:
print("❌ 还没有安装 torch请先安装")
return False
except ImportError:
return False
@timeit
def transcript(self, file_path: str) -> TranscriptResult:
try:
segments_raw, info = self.model.transcribe(file_path)
segments = []
full_text = ""
for seg in segments_raw:
text = seg.text.strip()
full_text += text + " "
segments.append(TranscriptSegment(
start=seg.start,
end=seg.end,
text=text
))
result= TranscriptResult(
language=info.language,
full_text=full_text.strip(),
segments=segments,
raw=info
)
self.on_finish(file_path, result)
return result
except Exception as e:
print(f"转写失败:{e}")
def on_finish(self,video_path:str,result: TranscriptResult)->None:
print("转写完成")
transcription_finished.send({
"file_path": video_path,
})