Files
BiliNote/backend/app/utils/video_reader.py
huangjianwu c105342ded fix: 性能优化、前端转写器配置、任务进度丢失及 MLX Whisper 回退问题修复
### 性能优化
- 后端任务执行从串行锁改为 ThreadPoolExecutor 并发执行(默认3线程)
- 添加 GZipMiddleware 响应压缩 + Nginx gzip 配置
- 数据库连接池参数优化(pool_size=10, max_overflow=20)
- 视频帧提取并行化(ThreadPoolExecutor)
- LLM 重试配置缓存到实例,避免每次请求读 env var
- 前端路由级代码拆分(React.lazy + Suspense)
- Vite manualChunks 拆分 markdown/markmap/vendor
- MarkdownViewer 用 React.memo + useMemo 减少不必要渲染
- NoteHistory Fuse.js 实例 useMemo 缓存
- useTaskPolling 无待处理任务时跳过轮询
- 移除 antd 依赖(NoteForm Alert、modelForm Tag),改用 shadcn/ui

### 前端转写器配置(新功能)
- 新增 TranscriberConfigManager(JSON 文件存储,替代环境变量)
- 新增 GET/POST /transcriber_config API 端点
- 新增 GET /transcriber_models_status 模型下载状态查询
- 新增 POST /transcriber_download 后台模型下载触发
- 前端转写器设置页面:引擎选择、模型大小选择、模型下载管理
- deploy_status 端点同步从配置文件读取

### Bug 修复
- 修复任务进行中切换页面后进度丢失:Home.tsx status 派生逻辑补全中间状态
- 修复 MLX Whisper 静默回退 fast-whisper:移除环境变量门控,macOS 下自动尝试导入
- MLX Whisper 不可用时抛出 RuntimeError 而非静默回退
- 前端展示 MLX Whisper 可用性状态,不可用时禁用保存

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-23 14:09:34 +08:00

184 lines
7.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import base64
import hashlib
import os
import re
import subprocess
from concurrent.futures import ThreadPoolExecutor, as_completed
import ffmpeg
from PIL import Image, ImageDraw, ImageFont
from app.utils.logger import get_logger
from app.utils.path_helper import get_app_dir
logger = get_logger(__name__)
class VideoReader:
def __init__(self,
video_path: str,
grid_size=(3, 3),
frame_interval=2,
dedupe_enabled=True,
unit_width=960,
unit_height=540,
save_quality=90,
font_path="fonts/arial.ttf",
frame_dir=None,
grid_dir=None):
self.video_path = video_path
self.grid_size = grid_size
self.frame_interval = frame_interval
self.dedupe_enabled = dedupe_enabled
self.unit_width = unit_width
self.unit_height = unit_height
self.save_quality = save_quality
self.frame_dir = frame_dir or get_app_dir("output_frames")
self.grid_dir = grid_dir or get_app_dir("grid_output")
print(f"视频路径:{video_path}",self.frame_dir,self.grid_dir)
self.font_path = font_path
@staticmethod
def _calculate_file_md5(file_path: str) -> str:
hasher = hashlib.md5()
with open(file_path, "rb") as f:
for chunk in iter(lambda: f.read(8192), b""):
hasher.update(chunk)
return hasher.hexdigest()
def format_time(self, seconds: float) -> str:
mm = int(seconds // 60)
ss = int(seconds % 60)
return f"{mm:02d}_{ss:02d}"
def extract_time_from_filename(self, filename: str) -> float:
match = re.search(r"frame_(\d{2})_(\d{2})\.jpg", filename)
if match:
mm, ss = map(int, match.groups())
return mm * 60 + ss
return float('inf')
def _extract_single_frame(self, ts: int) -> str | None:
"""提取单帧,返回输出路径或 None失败时"""
time_label = self.format_time(ts)
output_path = os.path.join(self.frame_dir, f"frame_{time_label}.jpg")
cmd = ["ffmpeg", "-ss", str(ts), "-i", self.video_path, "-frames:v", "1", "-q:v", "2", "-y", output_path,
"-hide_banner", "-loglevel", "error"]
try:
subprocess.run(cmd, check=True)
return output_path
except subprocess.CalledProcessError:
return None
def extract_frames(self, max_frames=1000) -> list[str]:
try:
os.makedirs(self.frame_dir, exist_ok=True)
duration = float(ffmpeg.probe(self.video_path)["format"]["duration"])
timestamps = [i for i in range(0, int(duration), self.frame_interval)][:max_frames]
# 并行提取帧
max_workers = min(os.cpu_count() or 4, 8, len(timestamps))
frame_results: dict[int, str | None] = {}
with ThreadPoolExecutor(max_workers=max_workers) as pool:
futures = {pool.submit(self._extract_single_frame, ts): ts for ts in timestamps}
for future in as_completed(futures):
ts = futures[future]
frame_results[ts] = future.result()
# 按时间戳顺序整理结果,并进行去重
image_paths = []
last_hash = None
for ts in timestamps:
output_path = frame_results.get(ts)
if not output_path or not os.path.exists(output_path):
continue
if self.dedupe_enabled:
frame_hash = self._calculate_file_md5(output_path)
if frame_hash == last_hash:
os.remove(output_path)
continue
last_hash = frame_hash
image_paths.append(output_path)
return image_paths
except Exception as e:
logger.error(f"分割帧发生错误:{str(e)}")
raise ValueError("视频处理失败")
def group_images(self) -> list[list[str]]:
image_files = [os.path.join(self.frame_dir, f) for f in os.listdir(self.frame_dir) if
f.startswith("frame_") and f.endswith(".jpg")]
image_files.sort(key=lambda f: self.extract_time_from_filename(os.path.basename(f)))
group_size = self.grid_size[0] * self.grid_size[1]
return [image_files[i:i + group_size] for i in range(0, len(image_files), group_size)]
def concat_images(self, image_paths: list[str], name: str) -> str:
os.makedirs(self.grid_dir, exist_ok=True)
font = ImageFont.truetype(self.font_path, 48) if os.path.exists(self.font_path) else ImageFont.load_default()
images = []
for path in image_paths:
img = Image.open(path).convert("RGB").resize((self.unit_width, self.unit_height), Image.Resampling.LANCZOS)
timestamp = re.search(r"frame_(\d{2})_(\d{2})\.jpg", os.path.basename(path))
time_text = f"{timestamp.group(1)}:{timestamp.group(2)}" if timestamp else ""
draw = ImageDraw.Draw(img)
draw.text((10, 10), time_text, fill="yellow", font=font, stroke_width=1, stroke_fill="black")
images.append(img)
cols, rows = self.grid_size
grid_img = Image.new("RGB", (self.unit_width * cols, self.unit_height * rows), (255, 255, 255))
for i, img in enumerate(images):
x = (i % cols) * self.unit_width
y = (i // cols) * self.unit_height
grid_img.paste(img, (x, y))
save_path = os.path.join(self.grid_dir, f"{name}.jpg")
grid_img.save(save_path, quality=self.save_quality)
return save_path
def encode_images_to_base64(self, image_paths: list[str]) -> list[str]:
base64_images = []
for path in image_paths:
with open(path, "rb") as img_file:
encoded_string = base64.b64encode(img_file.read()).decode("utf-8")
base64_images.append(f"data:image/jpeg;base64,{encoded_string}")
return base64_images
def run(self)->list[str]:
logger.info("开始提取视频帧...")
try:
# 确保目录存在
print(self.frame_dir,self.grid_dir)
os.makedirs(self.frame_dir, exist_ok=True)
os.makedirs(self.grid_dir, exist_ok=True)
#清空帧文件夹
for file in os.listdir(self.frame_dir):
if file.startswith("frame_"):
os.remove(os.path.join(self.frame_dir, file))
print(self.frame_dir,self.grid_dir)
#清空网格文件夹
for file in os.listdir(self.grid_dir):
if file.startswith("grid_"):
os.remove(os.path.join(self.grid_dir, file))
print(self.frame_dir,self.grid_dir)
self.extract_frames()
print("2#3",self.frame_dir,self.grid_dir)
logger.info("开始拼接网格图...")
image_paths = []
groups = self.group_images()
for idx, group in enumerate(groups, start=1):
if len(group) < self.grid_size[0] * self.grid_size[1]:
logger.warning(f"⚠️ 跳过第 {idx} 组,图片不足 {self.grid_size[0] * self.grid_size[1]}")
continue
out_path = self.concat_images(group, f"grid_{idx}")
image_paths.append(out_path)
logger.info("📤 开始编码图像...")
urls = self.encode_images_to_base64(image_paths)
return urls
except Exception as e:
logger.error(f"发生错误:{str(e)}")
raise ValueError("视频处理失败")