🐞 fix: 增加错误之后对已解析段落的缓存功能,再次重试时不再重头开始

解析长视频时,当附件大小过大时不再调用后进行报错,而是将附件进行分批次发送

在每篇笔记开头默认增加地址来源链接,对模糊处可溯源
This commit is contained in:
CyanAutumn
2026-02-12 18:28:11 +08:00
parent 7b45db2f59
commit d9a7b89e7d
67 changed files with 279293 additions and 64 deletions

View File

@@ -1,7 +1,6 @@
import json
import logging
import os
import re
from dataclasses import asdict
from pathlib import Path
from typing import List, Optional, Tuple, Union, Any
@@ -32,7 +31,8 @@ from app.services.constant import SUPPORT_PLATFORM_MAP
from app.services.provider import ProviderService
from app.transcriber.base import Transcriber
from app.transcriber.transcriber_provider import get_transcriber, _transcribers
from app.utils.note_helper import replace_content_markers
from app.utils.note_helper import replace_content_markers, prepend_source_link
from app.utils.screenshot_marker import extract_screenshot_timestamps
from app.utils.status_code import StatusCode
from app.utils.video_helper import generate_screenshot
from app.utils.video_reader import VideoReader
@@ -182,6 +182,8 @@ class NoteGenerator:
platform=platform,
)
markdown = prepend_source_link(markdown, str(video_url))
# 5. 保存记录到数据库
self._update_status(task_id, TaskStatus.SAVING)
self._save_metadata(video_id=audio_meta.video_id, platform=platform, task_id=task_id)
@@ -353,6 +355,10 @@ class NoteGenerator:
# 判断是否需要下载视频
need_video = screenshot or video_understanding
if screenshot and not grid_size:
grid_size = [2, 2]
frame_interval = video_interval if video_interval and video_interval > 0 else 6
if need_video:
try:
logger.info("开始下载视频")
@@ -365,10 +371,10 @@ class NoteGenerator:
self.video_img_urls=VideoReader(
video_path=str(self.video_path),
grid_size=tuple(grid_size),
frame_interval=video_interval,
unit_width=1280,
unit_height=720,
save_quality=90,
frame_interval=frame_interval,
unit_width=960,
unit_height=540,
save_quality=80,
).run()
else:
logger.info("未指定 grid_size跳过缩略图生成")
@@ -540,6 +546,7 @@ class NoteGenerator:
_format=formats,
style=style,
extras=extras,
checkpoint_key=task_id,
)
try:
@@ -592,7 +599,7 @@ class NoteGenerator:
:param video_path: 本地视频文件路径
:return: 替换后的 Markdown 字符串
"""
matches: List[Tuple[str, int]] = self._extract_screenshot_timestamps(markdown)
matches: List[Tuple[str, int]] = extract_screenshot_timestamps(markdown)
for idx, (marker, ts) in enumerate(matches):
try:
img_path = generate_screenshot(str(video_path), str(IMAGE_OUTPUT_DIR), ts, idx)
@@ -615,14 +622,7 @@ class NoteGenerator:
:param markdown: 原始 Markdown 文本
:return: 标记与对应时间戳秒数的列表
"""
pattern = r"(?:\*Screenshot-(\d{2}):(\d{2})|Screenshot-\[(\d{2}):(\d{2})\])"
results: List[Tuple[str, int]] = []
for match in re.finditer(pattern, markdown):
mm = match.group(1) or match.group(3)
ss = match.group(2) or match.group(4)
total_seconds = int(mm) * 60 + int(ss)
results.append((match.group(0), total_seconds))
return results
return extract_screenshot_timestamps(markdown)
def _save_metadata(self, video_id: str, platform: str, task_id: str) -> None:
"""
@@ -636,4 +636,4 @@ class NoteGenerator:
insert_video_task(video_id=video_id, platform=platform, task_id=task_id)
logger.info(f"已保存任务记录到数据库 (video_id={video_id}, platform={platform}, task_id={task_id})")
except Exception as e:
logger.error(f"保存任务记录失败:{e}")
logger.error(f"保存任务记录失败:{e}")