feat(frontend): 新增多版本笔记功能,并做了向下兼容。

- 新增关于页面组件,介绍项目背景、功能和使用方法
- 重构笔记生成逻辑,支持多版本笔记
- 新增笔记版本选择、复制和导出功能
-优化笔记界面布局和交互
- 调整部分组件样式,提升用户体验
This commit is contained in:
黄建武
2025-05-04 11:00:54 +08:00
parent c492f0780b
commit 97f153646f
29 changed files with 1499 additions and 407 deletions

View File

@@ -7,6 +7,7 @@ import yt_dlp
from app.downloaders.base import Downloader, DownloadQuality, QUALITY_MAP
from app.models.notes_model import AudioDownloadResult
from app.utils.path_helper import get_data_dir
from app.utils.url_parser import extract_video_id
class BilibiliDownloader(Downloader, ABC):
@@ -69,10 +70,19 @@ class BilibiliDownloader(Downloader, ABC):
"""
下载视频,返回视频文件路径
"""
if output_dir is None:
output_dir = get_data_dir()
os.makedirs(output_dir, exist_ok=True)
print("video_url",video_url)
video_id=extract_video_id(video_url, "bilibili")
video_path = os.path.join(output_dir, f"{video_id}.mp4")
if os.path.exists(video_path):
return video_path
# 检查是否已经存在
output_path = os.path.join(output_dir, "%(id)s.%(ext)s")
ydl_opts = {

View File

@@ -249,13 +249,21 @@ class DouyinDownloader(Downloader):
)
def download_video(self, video_url: str, output_dir: Union[str, None] = None) -> str:
try:
if output_dir is None:
output_dir = get_data_dir()
if not output_dir:
output_dir = self.cache_data
os.makedirs(output_dir, exist_ok=True)
video_id = self.extract_video_id(video_url)
video_path = os.path.join(output_dir, f"{video_id}.mp4")
if os.path.exists(video_path):
return video_path
output_path = os.path.join(output_dir, "%(id)s.%(ext)s")
video_data = self.fetch_video_info(video_url)

View File

@@ -7,6 +7,7 @@ import yt_dlp
from app.downloaders.base import Downloader, DownloadQuality
from app.models.notes_model import AudioDownloadResult
from app.utils.path_helper import get_data_dir
from app.utils.url_parser import extract_video_id
class YoutubeDownloader(Downloader, ABC):
@@ -67,12 +68,15 @@ class YoutubeDownloader(Downloader, ABC):
"""
if output_dir is None:
output_dir = get_data_dir()
video_id = extract_video_id(video_url, "youtube")
video_path = os.path.join(output_dir, f"{video_id}.mp4")
if os.path.exists(video_path):
return video_path
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, "%(id)s.%(ext)s")
ydl_opts = {
'format': 'worst[ext=mp4]/worst',
'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]',
'outtmpl': output_path,
'noplaylist': True,
'quiet': False,

View File

@@ -35,11 +35,10 @@ BASE_PROMPT = '''
根据上面的分段转录内容,生成结构化的笔记,遵循以下原则:
1. **完整信息**:记录尽可能多的相关细节,确保内容全面。
2. **清晰结构**:用合适的标题级别(`##``###`)整理内容,概述每个部分的要点。主标题用`#`来标识(如果额外重要的任务有格式需求可以不遵守)
3. **去除无关内容**:省略广告、填充词、问候语和不相关的言论。
4. **保留关键细节**:保留重要事实、示例、结论和建议。(如果额外重要的任务有格式需求可以不遵守)
5. **可读布局**:必要时使用项目符号,并保持段落简短,增强可读性。(如果额外重要的任务有格式需求可以不遵守)
6. 视频中提及的数学公式必须保留,并以 LaTeX 语法形式呈现,适合 Markdown 渲染。
2. **去除无关内容**:省略广告、填充词、问候语和不相关的言论。
3. **保留关键细节**:保留重要事实、示例、结论和建议。(如果额外重要的任务有格式需求可以不遵守)
4. **可读布局**:必要时使用项目符号,并保持段落简短,增强可读性。(如果额外重要的任务有格式需求可以不遵守)
5. 视频中提及的数学公式必须保留,并以 LaTeX 语法形式呈现,适合 Markdown 渲染。
请始终遵循此规则。

View File

@@ -58,7 +58,7 @@ def get_format_function(format_type):
def get_style_format(style):
style_map = {
'minimal': '1. **精简信息**: 仅记录最重要的内容,简洁明了。',
'detailed': '2. **详细记录**: 包含完整的时间戳和每个部分的详细讨论。',
'detailed': '2. **详细记录**: 包含完整的内容和每个部分的详细讨论。需要尽可能多的记录视频内容,最好详细的笔记',
'academic': '3. **学术风格**: 适合学术报告,正式且结构化。',
'xiaohongshu': '''4. **小红书风格**:
### 擅长使用下面的爆款关键词:

View File

@@ -217,17 +217,25 @@ def get_task_status(task_id: str):
@router.get("/image_proxy")
async def image_proxy(request: Request, url: str):
headers = {
"Referer": "https://www.bilibili.com/", # 模拟B站来源
"Referer": "https://www.bilibili.com/",
"User-Agent": request.headers.get("User-Agent", ""),
}
try:
async with httpx.AsyncClient(timeout=10.0) as client:
resp = await client.get(url, headers=headers)
if resp.status_code != 200:
raise HTTPException(status_code=resp.status_code, detail="图片获取失败")
content_type = resp.headers.get("Content-Type", "image/jpeg")
return StreamingResponse(resp.aiter_bytes(), media_type=content_type)
return StreamingResponse(
resp.aiter_bytes(),
media_type=content_type,
headers={
"Cache-Control": "public, max-age=86400", # ✅ 缓存一天
"Content-Type": content_type,
}
)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))

View File

@@ -47,26 +47,26 @@ def add_provider(data: ProviderRequest):
@router.get("/get_all_providers")
def get_all_providers():
try:
res = ProviderService.get_all_providers()
res = ProviderService.get_all_providers_safe()
return R.success(data=res)
except Exception as e:
return R.error(msg=e)
@router.get("/get_provider_by_id/{id}")
def get_provider_by_id(id: str):
try:
res = ProviderService.get_provider_by_id(id)
return R.success(data=res)
except Exception as e:
return R.error(msg=e)
@router.get("/get_provider_by_name/{name}")
def get_provider_by_name(name: str):
try:
res = ProviderService.get_provider_by_name(name)
return R.success(data=res)
except Exception as e:
return R.error(msg=e)
# @router.get("/get_provider_by_id/{id}")
# def get_provider_by_id(id: str):
# try:
# res = ProviderService.get_provider_by_id(id)
# return R.success(data=res)
# except Exception as e:
# return R.error(msg=e)
#
# @router.get("/get_provider_by_name/{name}")
# def get_provider_by_name(name: str):
# try:
# res = ProviderService.get_provider_by_name(name)
# return R.success(data=res)
# except Exception as e:
# return R.error(msg=e)
@router.post("/update_provider")

View File

@@ -45,7 +45,16 @@ class ModelService:
except Exception as e:
print(f"获取所有模型失败: {e}")
return []
@staticmethod
def get_all_models_safe(verbose: bool = False):
try:
raw_models = get_all_models()
if verbose:
print(f"所有模型列表: {raw_models}")
return ModelService._format_models(raw_models)
except Exception as e:
print(f"获取所有模型失败: {e}")
return []
@staticmethod
def _format_models(raw_models: list) -> list:
"""

View File

@@ -140,7 +140,6 @@ class NoteGenerator:
replacement = f"![]({image_url})"
new_markdown = new_markdown.replace(marker, replacement, 1)
return new_markdown
except Exception as e:
logger.error(f"截图生成失败:{e}")
@@ -201,16 +200,23 @@ class NoteGenerator:
# -------- 1. 下载音频 --------
try:
self.update_task_status(task_id, TaskStatus.DOWNLOADING)
# 加载音频缓存(如果存在)
audio = None
if os.path.exists(audio_cache_path):
logger.info(f"检测到已有音频缓存直接读取task_id={task_id}")
with open(audio_cache_path, "r", encoding="utf-8") as f:
audio_data = json.load(f)
audio = AudioDownloadResult(**audio_data)
else:
if 'screenshot' in _format or video_understanding:
# 需要视频的情况(截图 or 视频理解)
need_video = 'screenshot' in _format or video_understanding
if need_video:
try:
video_path = downloader.download_video(video_url)
self.video_path = video_path
logger.info(f"成功下载视频文件: {video_path}")
video_img_urls = VideoReader(
video_path=video_path,
grid_size=tuple(grid_size),
@@ -219,13 +225,17 @@ class NoteGenerator:
unit_height=720,
save_quality=90,
).run()
except Exception as e:
logger.error(f"❌ 下载视频失败task_id={task_id},错误信息:{e}")
self.update_task_status(task_id, TaskStatus.FAILED, message=f"下载音频失败:{e}")
screenshot = 'screenshot' in _format
audio: AudioDownloadResult = downloader.download(
# 没有音频缓存就下载音频(可能同时也带上视频)
if audio is None:
audio = downloader.download(
video_url=video_url,
quality=quality,
output_dir=path,
need_video=screenshot
need_video='screenshot' in _format, # 注意这里只为了截图需要
)
with open(audio_cache_path, "w", encoding="utf-8") as f:
json.dump(asdict(audio), f, ensure_ascii=False, indent=2)
@@ -266,27 +276,27 @@ class NoteGenerator:
# -------- 3. 总结内容 --------
try:
self.update_task_status(task_id, TaskStatus.SUMMARIZING)
if os.path.exists(markdown_cache_path):
logger.info(f"检测到已有总结缓存直接读取task_id={task_id}")
with open(markdown_cache_path, "r", encoding="utf-8") as f:
markdown = f.read()
else:
source = GPTSource(
title=audio.title,
segment=transcript.segments,
tags=audio.raw_info.get('tags'),
screenshot=screenshot,
video_img_urls=video_img_urls,
link=link,
_format=_format,
style=style,
extras=extras
)
# if os.path.exists(markdown_cache_path):
# logger.info(f"检测到已有总结缓存直接读取task_id={task_id}")
# with open(markdown_cache_path, "r", encoding="utf-8") as f:
# markdown = f.read()
# else:
source = GPTSource(
title=audio.title,
segment=transcript.segments,
tags=audio.raw_info.get('tags'),
screenshot=screenshot,
video_img_urls=video_img_urls,
link=link,
_format=_format,
style=style,
extras=extras
)
markdown: str = gpt.summarize(source)
with open(markdown_cache_path, "w", encoding="utf-8") as f:
f.write(markdown)
logger.info(f"GPT总结并缓存成功task_id={task_id}")
markdown: str = gpt.summarize(source)
with open(markdown_cache_path, "w", encoding="utf-8") as f:
f.write(markdown)
logger.info(f"GPT总结并缓存成功task_id={task_id}")
except Exception as e:
logger.error(f"❌ 总结内容失败task_id={task_id},错误信息:{e}")
self.update_task_status(task_id, TaskStatus.FAILED, message=f"总结内容失败:{e}")
@@ -313,10 +323,11 @@ class NoteGenerator:
# -------- 6. 完成 --------
self.update_task_status(task_id, TaskStatus.SUCCESS)
logger.info(f"✅ 笔记生成成功task_id={task_id}")
if platform != 'local':
transcription_finished.send({
"file_path": audio.file_path,
})
# TODO :改为前端一键清除缓存
# if platform != 'local':
# transcription_finished.send({
# "file_path": audio.file_path,
# })
return NoteResult(
markdown=markdown,
transcript=transcript,

View File

@@ -14,6 +14,7 @@ from app.models.model_config import ModelConfig
class ProviderService:
@staticmethod
def serialize_provider(row: tuple) -> dict:
if not row:
@@ -28,7 +29,25 @@ class ProviderService:
"enabled": row[6],
"created_at": row[7],
}
@staticmethod
def serialize_provider_safe(row: tuple) -> dict:
if not row:
return None
return {
"id": row[0],
"name": row[1],
"logo": row[2],
"type": row[3],
"api_key": ProviderService.mask_key(row[4]),
"base_url": row[5],
"enabled": row[6],
"created_at": row[7],
}
@staticmethod
def mask_key(key: str) -> str:
if not key or len(key) < 8:
return '*' * len(key)
return key[:4] + '*' * (len(key) - 8) + key[-4:]
@staticmethod
def add_provider( name: str, api_key: str, base_url: str, logo: str, type_: str, enabled: int = 1):
try:
@@ -42,7 +61,10 @@ class ProviderService:
def get_all_providers():
rows = get_all_providers()
return [ProviderService.serialize_provider(row) for row in rows] if rows else []
@staticmethod
def get_all_providers_safe():
rows = get_all_providers()
return [ProviderService.serialize_provider(row) for row in rows] if rows else []
@staticmethod
def get_provider_by_name(name: str):
row = get_provider_by_name(name)