mirror of
https://github.com/JefferyHcool/BiliNote.git
synced 2026-05-31 12:09:36 +08:00
🐞 fix: 增加错误之后对已解析段落的缓存功能,再次重试时不再重头开始
解析长视频时,当附件大小过大时不再调用后进行报错,而是将附件进行分批次发送 在每篇笔记开头默认增加地址来源链接,对模糊处可溯源
This commit is contained in:
@@ -5,6 +5,37 @@ import re
|
||||
|
||||
import re
|
||||
|
||||
|
||||
def prepend_source_link(markdown: str | None, source_url: str) -> str | None:
|
||||
"""
|
||||
在笔记开头添加来源链接;若首个非空行已包含来源链接,则更新该行并避免重复。
|
||||
"""
|
||||
if markdown is None:
|
||||
return None
|
||||
|
||||
source = (source_url or "").strip()
|
||||
if not source:
|
||||
return markdown
|
||||
|
||||
header = f"> 来源链接:{source}"
|
||||
lines = markdown.splitlines()
|
||||
first_non_empty_idx = None
|
||||
for idx, line in enumerate(lines):
|
||||
if line.strip():
|
||||
first_non_empty_idx = idx
|
||||
break
|
||||
|
||||
if first_non_empty_idx is not None:
|
||||
first_line = lines[first_non_empty_idx].strip()
|
||||
if first_line.startswith("> 来源链接:") or first_line.startswith("来源链接:"):
|
||||
lines[first_non_empty_idx] = header
|
||||
return "\n".join(lines)
|
||||
|
||||
if markdown.strip():
|
||||
return f"{header}\n\n{markdown}"
|
||||
return header
|
||||
|
||||
|
||||
def replace_content_markers(markdown: str, video_id: str, platform: str = 'bilibili') -> str:
|
||||
"""
|
||||
替换 *Content-04:16*、Content-04:16 或 Content-[04:16] 为超链接,跳转到对应平台视频的时间位置
|
||||
@@ -12,18 +43,20 @@ def replace_content_markers(markdown: str, video_id: str, platform: str = 'bilib
|
||||
# 匹配三种形式:*Content-04:16*、Content-04:16、Content-[04:16]
|
||||
pattern = r"(?:\*?)Content-(?:\[(\d{2}):(\d{2})\]|(\d{2}):(\d{2}))"
|
||||
|
||||
safe_video_id = video_id
|
||||
|
||||
def replacer(match):
|
||||
mm = match.group(1) or match.group(3)
|
||||
ss = match.group(2) or match.group(4)
|
||||
total_seconds = int(mm) * 60 + int(ss)
|
||||
|
||||
if platform == 'bilibili':
|
||||
video_id = video_id.replace("_p", "?p=")
|
||||
url = f"https://www.bilibili.com/video/{video_id}&t={total_seconds}"
|
||||
parsed_video_id = safe_video_id.replace("_p", "?p=")
|
||||
url = f"https://www.bilibili.com/video/{parsed_video_id}&t={total_seconds}"
|
||||
elif platform == 'youtube':
|
||||
url = f"https://www.youtube.com/watch?v={video_id}&t={total_seconds}s"
|
||||
url = f"https://www.youtube.com/watch?v={safe_video_id}&t={total_seconds}s"
|
||||
elif platform == 'douyin':
|
||||
url = f"https://www.douyin.com/video/{video_id}"
|
||||
url = f"https://www.douyin.com/video/{safe_video_id}"
|
||||
return f"[原片 @ {mm}:{ss}]({url})"
|
||||
else:
|
||||
return f"({mm}:{ss})"
|
||||
|
||||
13
backend/app/utils/screenshot_marker.py
Normal file
13
backend/app/utils/screenshot_marker.py
Normal file
@@ -0,0 +1,13 @@
|
||||
import re
|
||||
from typing import List, Tuple
|
||||
|
||||
|
||||
def extract_screenshot_timestamps(markdown: str) -> List[Tuple[str, int]]:
|
||||
pattern = r"(\*?Screenshot-(?:\[(\d{2}):(\d{2})\]|(\d{2}):(\d{2})))"
|
||||
results: List[Tuple[str, int]] = []
|
||||
for match in re.finditer(pattern, markdown):
|
||||
mm = match.group(2) or match.group(4)
|
||||
ss = match.group(3) or match.group(5)
|
||||
total_seconds = int(mm) * 60 + int(ss)
|
||||
results.append((match.group(1), total_seconds))
|
||||
return results
|
||||
@@ -1,4 +1,5 @@
|
||||
import base64
|
||||
import hashlib
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
@@ -14,6 +15,7 @@ class VideoReader:
|
||||
video_path: str,
|
||||
grid_size=(3, 3),
|
||||
frame_interval=2,
|
||||
dedupe_enabled=True,
|
||||
unit_width=960,
|
||||
unit_height=540,
|
||||
save_quality=90,
|
||||
@@ -23,6 +25,7 @@ class VideoReader:
|
||||
self.video_path = video_path
|
||||
self.grid_size = grid_size
|
||||
self.frame_interval = frame_interval
|
||||
self.dedupe_enabled = dedupe_enabled
|
||||
self.unit_width = unit_width
|
||||
self.unit_height = unit_height
|
||||
self.save_quality = save_quality
|
||||
@@ -31,6 +34,14 @@ class VideoReader:
|
||||
print(f"视频路径:{video_path}",self.frame_dir,self.grid_dir)
|
||||
self.font_path = font_path
|
||||
|
||||
@staticmethod
|
||||
def _calculate_file_md5(file_path: str) -> str:
|
||||
hasher = hashlib.md5()
|
||||
with open(file_path, "rb") as f:
|
||||
for chunk in iter(lambda: f.read(8192), b""):
|
||||
hasher.update(chunk)
|
||||
return hasher.hexdigest()
|
||||
|
||||
def format_time(self, seconds: float) -> str:
|
||||
mm = int(seconds // 60)
|
||||
ss = int(seconds % 60)
|
||||
@@ -51,12 +62,21 @@ class VideoReader:
|
||||
timestamps = [i for i in range(0, int(duration), self.frame_interval)][:max_frames]
|
||||
|
||||
image_paths = []
|
||||
last_hash = None
|
||||
for ts in timestamps:
|
||||
time_label = self.format_time(ts)
|
||||
output_path = os.path.join(self.frame_dir, f"frame_{time_label}.jpg")
|
||||
cmd = ["ffmpeg", "-ss", str(ts), "-i", self.video_path, "-frames:v", "1", "-q:v", "2", "-y", output_path,
|
||||
"-hide_banner", "-loglevel", "error"]
|
||||
subprocess.run(cmd, check=True)
|
||||
|
||||
if self.dedupe_enabled:
|
||||
frame_hash = self._calculate_file_md5(output_path)
|
||||
if frame_hash == last_hash:
|
||||
os.remove(output_path)
|
||||
continue
|
||||
last_hash = frame_hash
|
||||
|
||||
image_paths.append(output_path)
|
||||
return image_paths
|
||||
except Exception as e:
|
||||
|
||||
Reference in New Issue
Block a user