🐞 fix: 增加错误之后对已解析段落的缓存功能,再次重试时不再重头开始

解析长视频时,当附件大小过大时不再调用后进行报错,而是将附件进行分批次发送在每篇笔记开头默认增加地址来源链接,对模糊处可溯源
2026-07-20 04:02:38 +08:00 · 2026-02-12 18:28:11 +08:00
parent 7b45db2f59
commit d9a7b89e7d
67 changed files with 279293 additions and 64 deletions
--- a/backend/app/utils/video_reader.py
+++ b/backend/app/utils/video_reader.py
@@ -1,4 +1,5 @@
 import base64
+import hashlib
 import os
 import re
 import subprocess
@@ -14,6 +15,7 @@ class VideoReader:
                 video_path: str,
                 grid_size=(3, 3),
                 frame_interval=2,
+                 dedupe_enabled=True,
                 unit_width=960,
                 unit_height=540,
                 save_quality=90,
@@ -23,6 +25,7 @@ class VideoReader:
        self.video_path = video_path
        self.grid_size = grid_size
        self.frame_interval = frame_interval
+        self.dedupe_enabled = dedupe_enabled
        self.unit_width = unit_width
        self.unit_height = unit_height
        self.save_quality = save_quality
@@ -31,6 +34,14 @@ class VideoReader:
        print(f"视频路径：{video_path}",self.frame_dir,self.grid_dir)
        self.font_path = font_path

+    @staticmethod
+    def _calculate_file_md5(file_path: str) -> str:
+        hasher = hashlib.md5()
+        with open(file_path, "rb") as f:
+            for chunk in iter(lambda: f.read(8192), b""):
+                hasher.update(chunk)
+        return hasher.hexdigest()
+
    def format_time(self, seconds: float) -> str:
        mm = int(seconds // 60)
        ss = int(seconds % 60)
@@ -51,12 +62,21 @@ class VideoReader:
            timestamps = [i for i in range(0, int(duration), self.frame_interval)][:max_frames]

            image_paths = []
+            last_hash = None
            for ts in timestamps:
                time_label = self.format_time(ts)
                output_path = os.path.join(self.frame_dir, f"frame_{time_label}.jpg")
                cmd = ["ffmpeg", "-ss", str(ts), "-i", self.video_path, "-frames:v", "1", "-q:v", "2", "-y", output_path,
                       "-hide_banner", "-loglevel", "error"]
                subprocess.run(cmd, check=True)
+
+                if self.dedupe_enabled:
+                    frame_hash = self._calculate_file_md5(output_path)
+                    if frame_hash == last_hash:
+                        os.remove(output_path)
+                        continue
+                    last_hash = frame_hash
+
                image_paths.append(output_path)
            return image_paths
        except Exception as e: