From 30da57ddab7c81b9a51ab85e216d13fe0ff60eea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=80=9D=E8=AF=BA=E7=89=B9?= <jefferyhcool@gmail.com>
Date: Sun, 27 Apr 2025 09:05:37 +0800
Subject: [PATCH] =?UTF-8?q?refactor(backend):=20=E9=87=8D=E6=9E=84?=
 =?UTF-8?q?=E5=90=8E=E7=AB=AF=E9=85=8D=E7=BD=AE=E5=B9=B6=E4=BC=98=E5=8C=96?=
 =?UTF-8?q?=20GPT=20=E6=A8=A1=E5=9E=8B=E5=8F=8A=E8=BD=AC=E5=BD=95=E5=8A=9F?=
 =?UTF-8?q?=E8=83=BD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

-移除 .env.example 中的后端专用 AI 配置
- 在 GPT 模型中添加新字段：style、extras 和 _format
- 修改转录器的 on_finish 方法调用
- 更新 GPT 提示模板，增加时间标记要求
---
 backend/.env.example                          | 15 -------
 backend/app/gpt/prompt.py                     | 44 ++++++++++---------
 backend/app/models/gpt_model.py               |  3 ++
 .../transcriber/mlx_whisper_transcriber.py    |  2 +-
 4 files changed, 27 insertions(+), 37 deletions(-)

diff --git a/backend/.env.example b/backend/.env.example
index 19e42dd..0b37d5f 100644
--- a/backend/.env.example
+++ b/backend/.env.example
@@ -7,21 +7,6 @@ STATIC=/static # 外部访问路径（URL 前缀）
 OUT_DIR=./static/screenshots    # 本地输出目录
 IMAGE_BASE_URL=/static/screenshots  # 图片访问 URL
 DATA_DIR=data
-
-# 后端专用
-
-# AI 相关配置
-OPENAI_API_KEY=
-OPENAI_API_BASE_URL=
-OPENAI_MODEL=
-DEEP_SEEK_API_KEY=
-DEEP_SEEK_API_BASE_URL=
-DEEP_SEEK_MODEL
-QWEN_API_KEY=
-QWEN_API_BASE_URL=
-QWEN_MODEL=
-MODEl_PROVIDER= #如果不是openai 请修改 deepseek/qwen
-
 # transcriber 相关配置
 TRANSCRIBER_TYPE=fast-whisper # fast-whisper/bcut/kuaishou
 WHISPER_MODEL_SIZE=base
\ No newline at end of file
diff --git a/backend/app/gpt/prompt.py b/backend/app/gpt/prompt.py
index 1431939..a5d0a77 100644
--- a/backend/app/gpt/prompt.py
+++ b/backend/app/gpt/prompt.py
@@ -1,40 +1,42 @@
 BASE_PROMPT = '''
-You are a professional note-taking assistant who excels at summarizing video transcripts into clear, structured, and information-rich notes.
+你是一个专业的笔记助手，擅长将视频转录内容整理成清晰、有条理且信息丰富的笔记。
 
-🎯 Language Requirement:
-- The notes must be written in **Chinese**.
-- Proper nouns, technical terms, brand names, and personal names should remain in **English** where appropriate.
+语言要求：
+- 笔记必须使用 **中文** 撰写。
+- 专有名词、技术术语、品牌名称和人名应适当保留 **英文**。
 
-📌 Video Title:
+视频标题：
 {video_title}
 
-📎 Video Tags:
+视频标签：
 {tags}
 
-📝 Your Task:
-Based on the segmented transcript below, generate structured notes in standard **Markdown format**, and follow these principles:
-
-1. **Complete information**: Record as much relevant detail as possible to ensure comprehensive coverage.
-2. **Clear structure**: Organize content with logical sectioning. Use appropriate heading levels (`##`, `###`) to summarize key points in each section.
-3. **Concise wording**: Use accurate, clear, and professional Chinese expressions.
-4. **Remove irrelevant content**: Omit advertisements, filler words, casual greetings, and off-topic remarks.
-5. **Keep critical details**: Preserve important facts, examples, conclusions, and recommendations.
-6. **Readable layout**: Use bullet points where needed, and keep paragraphs reasonably short to enhance readability.
-7. **Table of Contents**: Generate a table of contents at the top based on the `##` level headings.
 
 
-⚠️ Output Instructions:
-- Only return the final **Markdown content**.
-- Do **not** wrap the output in code blocks like ```` ```markdown ```` or ```` ``` ````.
+输出说明：
+- 仅返回最终的 **Markdown 内容**。
+- **不要**将输出包裹在代码块中（例如：```` ```markdown ````，```` ``` ````）。
 
-
-🎬 Transcript Segments (Format: Start Time - Text):
+视频分段（格式：开始时间 - 内容）：
 
 ---
 {segment_text}
 ---
+
+你的任务：
+根据上面的分段转录内容，生成结构化的笔记，遵循以下原则：
+
+1. **完整信息**：记录尽可能多的相关细节，确保内容全面。
+2. **清晰结构**：用合适的标题级别（`##`，`###`）整理内容，概述每个部分的要点。
+3. **去除无关内容**：省略广告、填充词、问候语和不相关的言论。
+4. **保留关键细节**：保留重要事实、示例、结论和建议。
+5. **可读布局**：必要时使用项目符号，并保持段落简短，增强可读性。
+
+额外重要的任务如下(每一个都必须严格完成):
+
 '''
 
+
 LINK='''
 9. **Add time markers**: THIS IS IMPORTANT For every main heading (`##`), append the starting time of that segment using the format ,start with *Content ,eg: `*Content-[mm:ss]`.
 
diff --git a/backend/app/models/gpt_model.py b/backend/app/models/gpt_model.py
index d1c4aec..2e334c5 100644
--- a/backend/app/models/gpt_model.py
+++ b/backend/app/models/gpt_model.py
@@ -11,4 +11,7 @@ class GPTSource:
     tags:str
     screenshot: Optional[bool] = False
     link: Optional[bool] = False
+    style: Optional[str] = None
+    extras: Optional[str] = None
+    _format: Optional[list] = None
 
diff --git a/backend/app/transcriber/mlx_whisper_transcriber.py b/backend/app/transcriber/mlx_whisper_transcriber.py
index b253acc..e3de4a0 100644
--- a/backend/app/transcriber/mlx_whisper_transcriber.py
+++ b/backend/app/transcriber/mlx_whisper_transcriber.py
@@ -74,7 +74,7 @@ class MLXWhisperTranscriber(Transcriber):
                 raw=result
             )
             
-            self.on_finish(file_path, transcript_result)
+            # self.on_finish(file_path, transcript_result)
             return transcript_result
             
         except Exception as e: