mirror of
https://github.com/JefferyHcool/BiliNote.git
synced 2026-05-11 18:10:06 +08:00
feat: 新增模型管理和供应商配置功能
### v1.1.0 - #### Added - 新增 AI 笔记风格选择 - 新增 AI 笔记返回格式选择 - 添加 AI 自定义笔记备注 Prompt - 添加任务失败重试 - 添加全局设置页,可在设置页进行模型设置 - #### Optimize - 优化前端样式,优化用户体验 - 增加生成中间产物,可用于失败后加快生成速度 - #### Fix - 修复视频截图视频过早删除错误
This commit is contained in:
@@ -1,9 +1,10 @@
|
||||
from fastapi import FastAPI
|
||||
from .routers import note, provider
|
||||
from .routers import note, provider,model
|
||||
|
||||
|
||||
def create_app() -> FastAPI:
|
||||
app = FastAPI(title="BiliNote")
|
||||
app.include_router(note.router, prefix="/api")
|
||||
app.include_router(provider.router, prefix="/api")
|
||||
app.include_router(model.router,prefix="/api")
|
||||
return app
|
||||
|
||||
42
backend/app/db/builtin_providers.json
Normal file
42
backend/app/db/builtin_providers.json
Normal file
@@ -0,0 +1,42 @@
|
||||
[
|
||||
{
|
||||
"id": "openai",
|
||||
"name": "OpenAI",
|
||||
"type": "built-in",
|
||||
"logo": "OpenAI",
|
||||
"api_key": "",
|
||||
"base_url": "https://api.openai.com/v1"
|
||||
},
|
||||
{
|
||||
"id": "deepseek",
|
||||
"name": "DeepSeek",
|
||||
"type": "built-in",
|
||||
"logo": "DeepSeek",
|
||||
"api_key": "",
|
||||
"base_url": "https://api.deepseek.com"
|
||||
},
|
||||
{
|
||||
"id": "qwen",
|
||||
"name": "Qwen",
|
||||
"type": "built-in",
|
||||
"logo": "Qwen",
|
||||
"api_key": "",
|
||||
"base_url": "https://qwen.aliyun.com/api"
|
||||
},
|
||||
{
|
||||
"id": "doubao",
|
||||
"name": "豆包 (Doubao)",
|
||||
"type": "built-in",
|
||||
"logo": "Doubao",
|
||||
"api_key": "",
|
||||
"base_url": "https://open.doubao.com/api"
|
||||
},
|
||||
{
|
||||
"id": "Claude",
|
||||
"name": "Claude",
|
||||
"type": "built-in",
|
||||
"logo": "Claude",
|
||||
"api_key": "",
|
||||
"base_url": "https://"
|
||||
}
|
||||
]
|
||||
58
backend/app/db/model_dao.py
Normal file
58
backend/app/db/model_dao.py
Normal file
@@ -0,0 +1,58 @@
|
||||
from app.db.sqlite_client import get_connection
|
||||
|
||||
def init_model_table():
|
||||
conn = get_connection()
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS models (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
provider_id INTEGER NOT NULL,
|
||||
model_name TEXT NOT NULL,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
""")
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
# 插入模型
|
||||
def insert_model(provider_id: int, model_name: str):
|
||||
conn = get_connection()
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("""
|
||||
INSERT INTO models (provider_id, model_name)
|
||||
VALUES (?, ?)
|
||||
""", (provider_id, model_name))
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
# 根据provider查模型
|
||||
def get_models_by_provider(provider_id: int):
|
||||
conn = get_connection()
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("""
|
||||
SELECT id, model_name FROM models
|
||||
WHERE provider_id = ?
|
||||
""", (provider_id,))
|
||||
rows = cursor.fetchall()
|
||||
conn.close()
|
||||
return [{"id": row[0], "model_name": row[1]} for row in rows]
|
||||
|
||||
# 删除某个模型
|
||||
def delete_model(model_id: int):
|
||||
conn = get_connection()
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("""
|
||||
DELETE FROM models WHERE id = ?
|
||||
""", (model_id,))
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
def get_all_models():
|
||||
conn = get_connection()
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("""
|
||||
SELECT id, provider_id, model_name FROM models
|
||||
""")
|
||||
rows = cursor.fetchall()
|
||||
conn.close()
|
||||
return [{"id": row[0], "provider_id": row[1], "model_name": row[2]} for row in rows]
|
||||
@@ -1,8 +1,59 @@
|
||||
import json
|
||||
import os
|
||||
|
||||
from app.db.sqlite_client import get_connection
|
||||
from app.utils.logger import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
|
||||
def seed_default_providers():
|
||||
conn = get_connection()
|
||||
if conn is None:
|
||||
logger.error("Failed to connect to database.")
|
||||
return
|
||||
|
||||
cursor = conn.cursor()
|
||||
|
||||
# 检查已有数据
|
||||
cursor.execute("SELECT COUNT(*) FROM providers")
|
||||
count = cursor.fetchone()[0]
|
||||
if count > 0:
|
||||
logger.info("Providers already exist, skipping seed.")
|
||||
conn.close()
|
||||
return
|
||||
|
||||
json_path = os.path.join(os.path.dirname(__file__), 'builtin_providers.json')
|
||||
try:
|
||||
with open(json_path, 'r', encoding='utf-8') as f:
|
||||
providers = json.load(f)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to read builtin_providers.json: {e}")
|
||||
conn.close()
|
||||
return
|
||||
|
||||
try:
|
||||
for p in providers:
|
||||
cursor.execute("""
|
||||
INSERT INTO providers (id, name, api_key, base_url, logo, type, enabled)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?)
|
||||
""", (
|
||||
p['id'],
|
||||
p['name'],
|
||||
p['api_key'],
|
||||
p['base_url'],
|
||||
p['logo'],
|
||||
p['type'],
|
||||
p.get('enabled', 1)
|
||||
))
|
||||
|
||||
conn.commit()
|
||||
logger.info("Default providers seeded successfully.")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to seed default providers: {e}")
|
||||
finally:
|
||||
conn.close()
|
||||
def init_provider_table():
|
||||
conn = get_connection()
|
||||
if conn is None:
|
||||
@@ -11,40 +62,60 @@ def init_provider_table():
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS providers (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
id TEXT PRIMARY KEY,
|
||||
name TEXT NOT NULL,
|
||||
logo TEXT NOT NULL,
|
||||
type TEXT NOT NULL, -- ✅ 新增字段
|
||||
type TEXT NOT NULL,
|
||||
api_key TEXT NOT NULL,
|
||||
base_url TEXT NOT NULL,
|
||||
enabled INTEGER DEFAULT 1,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
""")
|
||||
|
||||
try:
|
||||
conn.commit()
|
||||
conn.close()
|
||||
logger.info("provider table created successfully.")
|
||||
seed_default_providers()
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create provider table: {e}")
|
||||
def insert_provider(name: str, api_key: str, base_url: str, logo: str, type_: str):
|
||||
def insert_provider(id: str, name: str, api_key: str, base_url: str, logo: str, type_: str,enabled:int=1):
|
||||
conn = get_connection()
|
||||
if conn is None:
|
||||
logger.error("Failed to connect to the database.")
|
||||
return
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("""
|
||||
INSERT INTO providers (name, api_key, base_url, logo, type)
|
||||
VALUES (?, ?, ?, ?, ?)
|
||||
""", (name, api_key, base_url, logo, type_))
|
||||
INSERT INTO providers (id, name, api_key, base_url, logo, type, enabled)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?)
|
||||
""", (id, name, api_key, base_url, logo, type_, enabled))
|
||||
try:
|
||||
conn.commit()
|
||||
cursor_id = cursor.lastrowid
|
||||
conn.close()
|
||||
logger.info(f"Provider inserted successfully. name: {name}, type: {type_}")
|
||||
return cursor_id
|
||||
logger.info(f"Provider inserted successfully. id: {id}, name: {name}, type: {type_}")
|
||||
return id
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to insert provider: {e}")
|
||||
return None
|
||||
|
||||
def get_enabled_providers():
|
||||
conn = get_connection()
|
||||
if conn is None:
|
||||
logger.error("Failed to connect to the database.")
|
||||
return
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("SELECT * FROM providers WHERE enabled = 1")
|
||||
try:
|
||||
rows = cursor.fetchall()
|
||||
conn.close()
|
||||
if rows is None:
|
||||
logger.info("No providers found")
|
||||
return None
|
||||
logger.info(f"Providers found: {rows}")
|
||||
return rows
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get enabled providers: {e}")
|
||||
def get_provider_by_name(name: str):
|
||||
conn = get_connection()
|
||||
if conn is None:
|
||||
@@ -70,6 +141,7 @@ def get_provider_by_id(id: int):
|
||||
return
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("SELECT * FROM providers WHERE id = ?", (id,))
|
||||
|
||||
try:
|
||||
row = cursor.fetchone()
|
||||
conn.close()
|
||||
@@ -99,23 +171,40 @@ def get_all_providers():
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get all providers: {e}")
|
||||
|
||||
def update_provider(id: int, name: str, api_key: str, base_url: str, logo: str, type_: str):
|
||||
def update_provider(id: str, **kwargs):
|
||||
conn = get_connection()
|
||||
if conn is None:
|
||||
logger.error("Failed to connect to the database.")
|
||||
return
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("""
|
||||
UPDATE providers
|
||||
SET name = ?, api_key = ?, base_url = ?, logo = ?, type = ?
|
||||
|
||||
fields = []
|
||||
values = []
|
||||
|
||||
for key, value in kwargs.items():
|
||||
fields.append(f"{key} = ?")
|
||||
values.append(value)
|
||||
|
||||
if not fields:
|
||||
logger.warning("No fields provided for update.")
|
||||
return
|
||||
|
||||
sql = f"""
|
||||
UPDATE providers
|
||||
SET {', '.join(fields)}
|
||||
WHERE id = ?
|
||||
""", (name, api_key, base_url, logo, type_, id))
|
||||
"""
|
||||
|
||||
values.append(id) # id 最后加
|
||||
cursor = conn.cursor()
|
||||
|
||||
try:
|
||||
cursor.execute(sql, values)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
logger.info(f"Provider updated successfully. id: {id}, type: {type_}")
|
||||
logger.info(f"Provider updated successfully. id: {id}, updated_fields: {fields}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to update provider: {e}")
|
||||
|
||||
def delete_provider(id: int):
|
||||
conn = get_connection()
|
||||
if conn is None:
|
||||
|
||||
28
backend/app/enmus/task_status_enums.py
Normal file
28
backend/app/enmus/task_status_enums.py
Normal file
@@ -0,0 +1,28 @@
|
||||
import enum
|
||||
|
||||
|
||||
class TaskStatus(str, enum.Enum):
|
||||
PENDING = "PENDING"
|
||||
PARSING = "PARSING"
|
||||
DOWNLOADING = "DOWNLOADING"
|
||||
TRANSCRIBING = "TRANSCRIBING"
|
||||
SUMMARIZING = "SUMMARIZING"
|
||||
FORMATTING = "FORMATTING"
|
||||
SAVING = "SAVING"
|
||||
SUCCESS = "SUCCESS"
|
||||
FAILED = "FAILED"
|
||||
|
||||
@classmethod
|
||||
def description(cls, status):
|
||||
desc_map = {
|
||||
cls.PENDING: "排队中",
|
||||
cls.PARSING: "解析链接",
|
||||
cls.DOWNLOADING: "下载中",
|
||||
cls.TRANSCRIBING: "转录中",
|
||||
cls.SUMMARIZING: "总结中",
|
||||
cls.FORMATTING: "格式化中",
|
||||
cls.SAVING: "保存中",
|
||||
cls.SUCCESS: "完成",
|
||||
cls.FAILED: "失败",
|
||||
}
|
||||
return desc_map.get(status, "未知状态")
|
||||
@@ -9,5 +9,5 @@ from app.models.model_config import ModelConfig
|
||||
class GPTFactory:
|
||||
@staticmethod
|
||||
def from_config(config: ModelConfig) -> GPT:
|
||||
client = OpenAICompatibleProvider(api_key=config.api_key, base_url=config.base_url).get_client()
|
||||
client = OpenAICompatibleProvider(api_key=config.api_key, base_url=config.base_url).get_client
|
||||
return UniversalGPT(client=client, model=config.model_name)
|
||||
100
backend/app/gpt/prompt_builder.py
Normal file
100
backend/app/gpt/prompt_builder.py
Normal file
@@ -0,0 +1,100 @@
|
||||
from app.gpt.prompt import BASE_PROMPT
|
||||
|
||||
note_formats = [
|
||||
{'label': '目录', 'value': 'toc'},
|
||||
{'label': '原片跳转', 'value': 'link'},
|
||||
{'label': '原片截图', 'value': 'screenshot'},
|
||||
{'label': 'AI总结', 'value': 'summary'}
|
||||
]
|
||||
|
||||
note_styles = [
|
||||
{'label': '精简', 'value': 'minimal'},
|
||||
{'label': '详细', 'value': 'detailed'},
|
||||
{'label': '学术', 'value': 'academic'},
|
||||
{"label": '教程',"value": 'tutorial', },
|
||||
{'label': '小红书', 'value': 'xiaohongshu'},
|
||||
{'label': '生活向', 'value': 'life_journal'},
|
||||
{'label': '任务导向', 'value': 'task_oriented'},
|
||||
{'label': '商业风格', 'value': 'business'},
|
||||
{'label': '会议纪要', 'value': 'meeting_minutes'}
|
||||
]
|
||||
|
||||
|
||||
# 生成 BASE_PROMPT 函数
|
||||
def generate_base_prompt(title, segment_text, tags, _format=None, style=None, extras=None):
|
||||
# 生成 Base Prompt 开头部分
|
||||
prompt = BASE_PROMPT.format(
|
||||
video_title=title,
|
||||
segment_text=segment_text,
|
||||
tags=tags
|
||||
)
|
||||
|
||||
# 添加用户选择的格式
|
||||
if _format:
|
||||
prompt += "\n" + "\n".join([get_format_function(f) for f in _format])
|
||||
|
||||
# 根据用户选择的笔记风格添加描述
|
||||
if style:
|
||||
prompt += "\n" + get_style_format(style)
|
||||
|
||||
# 添加额外内容
|
||||
if extras:
|
||||
prompt += f"\n{extras}"
|
||||
|
||||
return prompt
|
||||
|
||||
|
||||
# 获取格式函数
|
||||
def get_format_function(format_type):
|
||||
format_map = {
|
||||
'toc': get_toc_format,
|
||||
'link': get_link_format,
|
||||
'screenshot': get_screenshot_format,
|
||||
'summary': get_summary_format
|
||||
}
|
||||
return format_map.get(format_type, lambda: '')()
|
||||
|
||||
|
||||
# 风格描述的处理
|
||||
def get_style_format(style):
|
||||
style_map = {
|
||||
'minimal': '1. **精简信息**: 仅记录最重要的内容,简洁明了。',
|
||||
'detailed': '2. **详细记录**: 包含完整的时间戳和每个部分的详细讨论。',
|
||||
'academic': '3. **学术风格**: 适合学术报告,正式且结构化。',
|
||||
|
||||
'xiaohongshu': '4. **小红书风格**: 适合社交平台分享,亲切、口语化。',
|
||||
'life_journal': '5. **生活向**: 记录个人生活感悟,情感化表达。',
|
||||
'task_oriented': '6. **任务导向**: 强调任务、目标,适合工作和待办事项。',
|
||||
'business': '7. **商业风格**: 适合商业报告、会议纪要,正式且精准。',
|
||||
'meeting_minutes': '8. **会议纪要**: 适合商业报告、会议纪要,正式且精准。',
|
||||
"tutorial":"9.**教程笔记**:尽可能详细的记录教程,特别是关键点和一些重要的结论步骤"
|
||||
}
|
||||
return style_map.get(style, '')
|
||||
|
||||
|
||||
# 格式化输出内容
|
||||
def get_toc_format():
|
||||
return '''
|
||||
9. **目录**: 自动生成一个基于 `##` 级标题的目录。不需要插入原片跳转
|
||||
'''
|
||||
|
||||
|
||||
def get_link_format():
|
||||
return '''
|
||||
10. **原片跳转**: 为每个主要章节添加时间戳,使用格式 `*Content-[mm:ss]`。
|
||||
重要:**始终**在章节标题前加上 `*Content` 前缀,例如:`AI 的发展史 *Content-[01:23]`。一定是标题在前 插入标记在后
|
||||
'''
|
||||
|
||||
|
||||
def get_screenshot_format():
|
||||
return '''
|
||||
11. **原片截图**: 如果某个部分涉及**视觉演示**或任何能帮助理解的内容,插入截图提示:
|
||||
- 格式:`*Screenshot-[mm:ss]`
|
||||
至少插入 1-3张截图
|
||||
'''
|
||||
|
||||
|
||||
def get_summary_format():
|
||||
return '''
|
||||
12. **AI总结**: 在笔记末尾加入简短的AI生成总结,并且二级标题 就是 AI 总结 例如 ## AI 总结。
|
||||
'''
|
||||
@@ -1,4 +1,5 @@
|
||||
from app.gpt.base import GPT
|
||||
from app.gpt.prompt_builder import generate_base_prompt
|
||||
from app.models.gpt_model import GPTSource
|
||||
from app.gpt.prompt import BASE_PROMPT, AI_SUM, SCREENSHOT, LINK
|
||||
from app.gpt.utils import fix_markdown
|
||||
@@ -28,29 +29,35 @@ class UniversalGPT(GPT):
|
||||
return [TranscriptSegment(**seg) if isinstance(seg, dict) else seg for seg in segments]
|
||||
|
||||
def create_messages(self, segments: List[TranscriptSegment],**kwargs):
|
||||
content = BASE_PROMPT.format(
|
||||
video_title=kwargs.get('title'),
|
||||
print("UniversalGPT",kwargs)
|
||||
content =generate_base_prompt(
|
||||
title=kwargs.get('title'),
|
||||
segment_text=self._build_segment_text(segments),
|
||||
tags=kwargs.get('tags')
|
||||
tags=kwargs.get('tags'),
|
||||
_format=kwargs.get('_format'),
|
||||
style=kwargs.get('style'),
|
||||
extras=kwargs.get('extras')
|
||||
)
|
||||
if self.screenshot:
|
||||
print(":需要截图")
|
||||
content += SCREENSHOT
|
||||
if self.link:
|
||||
print(":需要链接")
|
||||
content += LINK
|
||||
|
||||
print(content)
|
||||
return [{"role": "user", "content": content + AI_SUM}]
|
||||
return [{"role": "user", "content": content }]
|
||||
|
||||
def list_models(self):
|
||||
return self.client.list_models()
|
||||
return self.client.models.list()
|
||||
def summarize(self, source: GPTSource) -> str:
|
||||
self.screenshot = source.screenshot
|
||||
self.link = source.link
|
||||
source.segment = self.ensure_segments_type(source.segment)
|
||||
messages = self.create_messages(source.segment, source.title,source.tags)
|
||||
response = self.client.chat(
|
||||
|
||||
messages = self.create_messages(
|
||||
source.segment,
|
||||
title=source.title,
|
||||
tags=source.tags
|
||||
,
|
||||
_format=source._format,
|
||||
style=source.style,
|
||||
extras=source.extras
|
||||
)
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=messages,
|
||||
temperature=0.7
|
||||
|
||||
36
backend/app/routers/model.py
Normal file
36
backend/app/routers/model.py
Normal file
@@ -0,0 +1,36 @@
|
||||
from fastapi import APIRouter
|
||||
from pydantic import BaseModel
|
||||
|
||||
from app.services.model import ModelService
|
||||
from app.utils.response import ResponseWrapper as R
|
||||
router = APIRouter()
|
||||
modelService = ModelService()
|
||||
class CreateModelRequest(BaseModel):
|
||||
provider_id: str
|
||||
model_name: str
|
||||
|
||||
# 返回体:模型信息
|
||||
class ModelItem(BaseModel):
|
||||
id: int
|
||||
model_name: str
|
||||
@router.get("/model_list")
|
||||
def model_list():
|
||||
try:
|
||||
return R.success(modelService.get_all_models(True),msg="获取模型列表成功")
|
||||
except Exception as e:
|
||||
return R.error(e)
|
||||
|
||||
@router.get("/model_list/{provider_id}")
|
||||
def model_list(provider_id):
|
||||
try:
|
||||
return R.success(modelService.get_all_models_by_id(provider_id))
|
||||
except Exception as e:
|
||||
return R.error(e)
|
||||
|
||||
@router.post("/models")
|
||||
def create_model(data: CreateModelRequest):
|
||||
success = ModelService.add_new_model(data.provider_id, data.model_name)
|
||||
if not success:
|
||||
raise R.error("模型添加失败")
|
||||
return R.success(msg="模型添加成功")
|
||||
|
||||
@@ -10,13 +10,14 @@ from dataclasses import asdict
|
||||
|
||||
from app.db.video_task_dao import get_task_by_video
|
||||
from app.enmus.note_enums import DownloadQuality
|
||||
from app.services.note import NoteGenerator
|
||||
from app.services.note import NoteGenerator, logger
|
||||
from app.utils.response import ResponseWrapper as R
|
||||
from app.utils.url_parser import extract_video_id
|
||||
from app.validators.video_url_validator import is_supported_video_url
|
||||
from fastapi import APIRouter, Request, HTTPException
|
||||
from fastapi.responses import StreamingResponse
|
||||
import httpx
|
||||
from app.enmus.task_status_enums import TaskStatus
|
||||
|
||||
# from app.services.downloader import download_raw_audio
|
||||
# from app.services.whisperer import transcribe_audio
|
||||
@@ -35,6 +36,12 @@ class VideoRequest(BaseModel):
|
||||
quality: DownloadQuality
|
||||
screenshot: Optional[bool] = False
|
||||
link: Optional[bool] = False
|
||||
model_name:str
|
||||
provider_id:str
|
||||
task_id: Optional[str] = None
|
||||
format:Optional[list]=[]
|
||||
style:str=None
|
||||
extras:Optional[str]
|
||||
|
||||
@validator("video_url")
|
||||
def validate_supported_url(cls, v):
|
||||
@@ -54,14 +61,24 @@ def save_note_to_file(task_id: str, note):
|
||||
json.dump(asdict(note), f, ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
def run_note_task(task_id: str, video_url: str, platform: str, quality: DownloadQuality, link: bool = False,screenshot: bool = False):
|
||||
def run_note_task(task_id: str, video_url: str, platform: str, quality: DownloadQuality,
|
||||
link: bool = False,screenshot: bool = False,model_name:str=None,provider_id:str=None,
|
||||
_format:list=None,style:str=None,extras:str=None):
|
||||
try:
|
||||
if not model_name or not provider_id:
|
||||
raise HTTPException(status_code=400, detail="请选择模型和提供者")
|
||||
|
||||
note = NoteGenerator().generate(
|
||||
video_url=video_url,
|
||||
platform=platform,
|
||||
quality=quality,
|
||||
task_id=task_id,
|
||||
model_name=model_name,
|
||||
provider_id=provider_id,
|
||||
link=link,
|
||||
_format=_format,
|
||||
style=style,
|
||||
extras=extras,
|
||||
screenshot=screenshot
|
||||
)
|
||||
print('Note 结果',note)
|
||||
@@ -85,38 +102,91 @@ def generate_note(data: VideoRequest, background_tasks: BackgroundTasks):
|
||||
try:
|
||||
|
||||
video_id = extract_video_id(data.video_url, data.platform)
|
||||
if not video_id:
|
||||
raise HTTPException(status_code=400, detail="无法提取视频 ID")
|
||||
existing = get_task_by_video(video_id, data.platform)
|
||||
if existing:
|
||||
return R.error(
|
||||
msg='笔记已生成,请勿重复发起',
|
||||
# if not video_id:
|
||||
# raise HTTPException(status_code=400, detail="无法提取视频 ID")
|
||||
# existing = get_task_by_video(video_id, data.platform)
|
||||
# if existing:
|
||||
# return R.error(
|
||||
# msg='笔记已生成,请勿重复发起',
|
||||
#
|
||||
# )
|
||||
|
||||
)
|
||||
|
||||
task_id = str(uuid.uuid4())
|
||||
if data.task_id:
|
||||
# 如果传了task_id,说明是重试!
|
||||
task_id = data.task_id
|
||||
# 更新之前的状态
|
||||
NoteGenerator.update_task_status(task_id, TaskStatus.PENDING)
|
||||
logger.info(f"重试模式,复用已有 task_id={task_id}")
|
||||
else:
|
||||
# 正常新建任务
|
||||
task_id = str(uuid.uuid4())
|
||||
|
||||
background_tasks.add_task(run_note_task, task_id, data.video_url, data.platform, data.quality,data.link ,data.screenshot)
|
||||
background_tasks.add_task(run_note_task, task_id, data.video_url, data.platform, data.quality,data.link ,data.screenshot,data.model_name,data.provider_id,data.format,data.style,data.extras)
|
||||
return R.success({"task_id": task_id})
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
|
||||
|
||||
@router.get("/task_status/{task_id}")
|
||||
def get_task_status(task_id: str):
|
||||
path = os.path.join(NOTE_OUTPUT_DIR, f"{task_id}.json")
|
||||
if not os.path.exists(path):
|
||||
return R.success({"status": "PENDING"})
|
||||
status_path = os.path.join(NOTE_OUTPUT_DIR, f"{task_id}.status.json")
|
||||
result_path = os.path.join(NOTE_OUTPUT_DIR, f"{task_id}.json")
|
||||
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
content = json.load(f)
|
||||
# 优先读状态文件
|
||||
if os.path.exists(status_path):
|
||||
with open(status_path, "r", encoding="utf-8") as f:
|
||||
status_content = json.load(f)
|
||||
|
||||
if "error" in content:
|
||||
return R.error(content["error"], code=500)
|
||||
content['id'] = task_id
|
||||
status = status_content.get("status")
|
||||
message = status_content.get("message", "")
|
||||
|
||||
if status == TaskStatus.SUCCESS.value:
|
||||
# 成功状态的话,继续读取最终笔记内容
|
||||
if os.path.exists(result_path):
|
||||
with open(result_path, "r", encoding="utf-8") as rf:
|
||||
result_content = json.load(rf)
|
||||
return R.success({
|
||||
"status": status,
|
||||
"result": result_content,
|
||||
"message": message,
|
||||
"task_id": task_id
|
||||
})
|
||||
else:
|
||||
# 理论上不会出现,保险处理
|
||||
return R.success({
|
||||
"status": TaskStatus.PENDING.value,
|
||||
"message": "任务完成,但结果文件未找到",
|
||||
"task_id": task_id
|
||||
})
|
||||
|
||||
if status == TaskStatus.FAILED.value:
|
||||
return R.error(message or "任务失败", code=500)
|
||||
|
||||
# 处理中状态
|
||||
return R.success({
|
||||
"status": status,
|
||||
"message": message,
|
||||
"task_id": task_id
|
||||
})
|
||||
|
||||
# 没有状态文件,但有结果
|
||||
if os.path.exists(result_path):
|
||||
with open(result_path, "r", encoding="utf-8") as f:
|
||||
result_content = json.load(f)
|
||||
return R.success({
|
||||
"status": TaskStatus.SUCCESS.value,
|
||||
"result": result_content,
|
||||
"task_id": task_id
|
||||
})
|
||||
|
||||
# 什么都没有,默认PENDING
|
||||
return R.success({
|
||||
"status": "SUCCESS",
|
||||
"result": content
|
||||
"status": TaskStatus.PENDING.value,
|
||||
"message": "任务排队中",
|
||||
"task_id": task_id
|
||||
})
|
||||
|
||||
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
from typing import Optional
|
||||
from fastapi import APIRouter
|
||||
from pydantic import BaseModel
|
||||
|
||||
from app.models.model_config import ModelConfig
|
||||
from app.services.model import ModelService
|
||||
from app.utils.response import ResponseWrapper as R
|
||||
from app.services.provider import ProviderService
|
||||
|
||||
@@ -11,16 +14,21 @@ class ProviderRequest(BaseModel):
|
||||
name: str
|
||||
api_key: str
|
||||
base_url: str
|
||||
logo: str
|
||||
logo: Optional[str] = None
|
||||
type: str
|
||||
|
||||
class TestRequest(BaseModel):
|
||||
|
||||
api_key: str
|
||||
base_url:str
|
||||
class ProviderUpdateRequest(BaseModel):
|
||||
id: int
|
||||
id: str
|
||||
name: Optional[str] = None
|
||||
api_key: Optional[str] = None
|
||||
base_url: Optional[str] = None
|
||||
logo: Optional[str] = None
|
||||
type: Optional[str] = None
|
||||
enabled:Optional[int] = None
|
||||
|
||||
@router.post("/add_provider")
|
||||
def add_provider(data: ProviderRequest):
|
||||
@@ -45,7 +53,7 @@ def get_all_providers():
|
||||
return R.error(msg=e)
|
||||
|
||||
@router.get("/get_provider_by_id/{id}")
|
||||
def get_provider_by_id(id: int):
|
||||
def get_provider_by_id(id: str):
|
||||
try:
|
||||
res = ProviderService.get_provider_by_id(id)
|
||||
return R.success(data=res)
|
||||
@@ -60,23 +68,33 @@ def get_provider_by_name(name: str):
|
||||
except Exception as e:
|
||||
return R.error(msg=e)
|
||||
|
||||
@router.post("/update_provider/")
|
||||
|
||||
@router.post("/update_provider")
|
||||
def update_provider(data: ProviderUpdateRequest):
|
||||
try:
|
||||
if all(
|
||||
field is None
|
||||
for field in [data.name, data.api_key, data.base_url, data.logo, data.type]
|
||||
for field in [data.name, data.api_key, data.base_url, data.logo, data.type,data.enabled]
|
||||
):
|
||||
return R.error(msg='请至少填写一个参数')
|
||||
|
||||
ProviderService.update_provider(
|
||||
id=data.id,
|
||||
name=data.name or '',
|
||||
api_key=data.api_key or '',
|
||||
base_url=data.base_url or '',
|
||||
logo=data.logo or '',
|
||||
type_=data.type or ''
|
||||
data=dict(data)
|
||||
)
|
||||
return R.success(msg='更新模型供应商成功')
|
||||
except Exception as e:
|
||||
print(e)
|
||||
return R.error(msg=e)
|
||||
|
||||
@router.post('/connect_test')
|
||||
def gpt_connect_test(data:TestRequest):
|
||||
try:
|
||||
|
||||
res= ModelService().connect_test(data.api_key,data.base_url)
|
||||
if not res:
|
||||
return R.error(msg='连接失败')
|
||||
return R.success(msg='连接成功')
|
||||
except Exception as e:
|
||||
print(e)
|
||||
return R.error(msg=e)
|
||||
@@ -1,23 +1,109 @@
|
||||
from app.db.model_dao import insert_model, get_all_models
|
||||
from app.db.provider_dao import get_enabled_providers
|
||||
from app.gpt.gpt_factory import GPTFactory
|
||||
from app.gpt.provider.OpenAI_compatible_provider import OpenAICompatibleProvider
|
||||
from app.models.model_config import ModelConfig
|
||||
from app.services.provider import ProviderService
|
||||
|
||||
|
||||
class ModelService:
|
||||
|
||||
@staticmethod
|
||||
def get_model_list(provider_id: int):
|
||||
provider=ProviderService.get_provider_by_id(provider_id)
|
||||
def _build_model_config(provider: dict) -> ModelConfig:
|
||||
return ModelConfig(
|
||||
api_key=provider["api_key"],
|
||||
base_url=provider["base_url"],
|
||||
provider=provider["name"],
|
||||
model_name='',
|
||||
name=provider["name"],
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def get_model_list(provider_id: int, verbose: bool = False):
|
||||
provider = ProviderService.get_provider_by_id(provider_id)
|
||||
if not provider:
|
||||
return []
|
||||
config=ModelConfig(
|
||||
api_key=provider.api_key,
|
||||
base_url=provider.base_url,
|
||||
provider=provider.name,
|
||||
model_name='',
|
||||
name=provider.name,
|
||||
)
|
||||
GPT=GPTFactory().from_config(config)
|
||||
return GPT.list_models()
|
||||
|
||||
try:
|
||||
config = ModelService._build_model_config(provider)
|
||||
gpt = GPTFactory().from_config(config)
|
||||
models = gpt.list_models()
|
||||
if verbose:
|
||||
print(f"[{provider['name']}] 模型列表: {models}")
|
||||
return models
|
||||
except Exception as e:
|
||||
print(f"[{provider['name']}] 获取模型失败: {e}")
|
||||
return []
|
||||
|
||||
@staticmethod
|
||||
def get_all_models(verbose: bool = False):
|
||||
try:
|
||||
raw_models = get_all_models()
|
||||
if verbose:
|
||||
print(f"所有模型列表: {raw_models}")
|
||||
return ModelService._format_models(raw_models)
|
||||
except Exception as e:
|
||||
print(f"获取所有模型失败: {e}")
|
||||
return []
|
||||
|
||||
@staticmethod
|
||||
def _format_models(raw_models: list) -> list:
|
||||
"""
|
||||
格式化模型列表
|
||||
"""
|
||||
formatted = []
|
||||
for model in raw_models:
|
||||
formatted.append({
|
||||
"id": model.get("id"),
|
||||
"provider_id": model.get("provider_id"),
|
||||
"model_name": model.get("model_name"),
|
||||
"created_at": model.get("created_at", None), # 如果有created_at字段
|
||||
})
|
||||
return formatted
|
||||
@staticmethod
|
||||
def get_all_models_by_id(provider_id: str, verbose: bool = False):
|
||||
try:
|
||||
provider = ProviderService.get_provider_by_id(provider_id)
|
||||
|
||||
models = ModelService.get_model_list(provider["id"], verbose=verbose)
|
||||
|
||||
model_list={
|
||||
|
||||
"models": models
|
||||
}
|
||||
|
||||
return model_list
|
||||
except Exception as e:
|
||||
print(f"[{provider_id}] 获取模型失败: {e}")
|
||||
return []
|
||||
@staticmethod
|
||||
def connect_test(api_key: str, base_url: str) -> bool:
|
||||
try:
|
||||
return OpenAICompatibleProvider.test_connection(api_key=api_key, base_url=base_url)
|
||||
except Exception as e:
|
||||
print(f"连接测试失败:{e}")
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def add_new_model(provider_id: int, model_name: str) -> bool:
|
||||
try:
|
||||
# 先查供应商是否存在
|
||||
provider = ProviderService.get_provider_by_id(provider_id)
|
||||
if not provider:
|
||||
print(f"供应商ID {provider_id} 不存在,无法添加模型")
|
||||
return False
|
||||
|
||||
# 插入模型
|
||||
insert_model(provider_id=provider_id, model_name=model_name)
|
||||
print(f"模型 {model_name} 已成功添加到供应商ID {provider_id}")
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"添加模型失败: {e}")
|
||||
return False
|
||||
|
||||
if __name__ == '__main__':
|
||||
print(ModelService.get_model_list(1))
|
||||
# 单个 Provider 测试
|
||||
print(ModelService.get_model_list(1, verbose=True))
|
||||
|
||||
# 所有 Provider 模型测试
|
||||
# print(ModelService.get_all_models(verbose=True))
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
import json
|
||||
from dataclasses import asdict
|
||||
|
||||
from app.enmus.task_status_enums import TaskStatus
|
||||
import os
|
||||
from typing import Union
|
||||
from typing import Union, Optional
|
||||
|
||||
from pydantic import HttpUrl
|
||||
|
||||
@@ -10,13 +14,17 @@ from app.downloaders.douyin_downloader import DouyinDownloader
|
||||
from app.downloaders.youtube_downloader import YoutubeDownloader
|
||||
from app.gpt.base import GPT
|
||||
from app.gpt.deepseek_gpt import DeepSeekGPT
|
||||
from app.gpt.gpt_factory import GPTFactory
|
||||
from app.gpt.openai_gpt import OpenaiGPT
|
||||
from app.gpt.qwen_gpt import QwenGPT
|
||||
from app.models.gpt_model import GPTSource
|
||||
from app.models.model_config import ModelConfig
|
||||
from app.models.notes_model import NoteResult
|
||||
from app.models.notes_model import AudioDownloadResult
|
||||
from app.enmus.note_enums import DownloadQuality
|
||||
from app.models.transcriber_model import TranscriptResult
|
||||
from app.models.transcriber_model import TranscriptResult, TranscriptSegment
|
||||
|
||||
from app.services.provider import ProviderService
|
||||
from app.transcriber.base import Transcriber
|
||||
from app.transcriber.transcriber_provider import get_transcriber,_transcribers
|
||||
from app.transcriber.whisper import WhisperTranscriber
|
||||
@@ -29,6 +37,8 @@ from app.utils.video_helper import generate_screenshot
|
||||
# from app.services.gpt import summarize_text
|
||||
from dotenv import load_dotenv
|
||||
from app.utils.logger import get_logger
|
||||
from events import transcription_finished
|
||||
|
||||
logger = get_logger(__name__)
|
||||
load_dotenv()
|
||||
BACKEND_BASE_URL = os.getenv("API_BASE_URL", "http://localhost:8000")
|
||||
@@ -37,7 +47,7 @@ output_dir = os.getenv('OUT_DIR')
|
||||
image_base_url = os.getenv('IMAGE_BASE_URL')
|
||||
logger.info("starting up")
|
||||
|
||||
|
||||
NOTE_OUTPUT_DIR = "note_results"
|
||||
|
||||
class NoteGenerator:
|
||||
def __init__(self):
|
||||
@@ -45,26 +55,39 @@ class NoteGenerator:
|
||||
self.device: Union[str, None] = None
|
||||
self.transcriber_type = os.getenv('TRANSCRIBER_TYPE','fast-whisper')
|
||||
self.transcriber = self.get_transcriber()
|
||||
# TODO 需要更换为可调节
|
||||
|
||||
self.provider = os.getenv('MODEl_PROVIDER','openai')
|
||||
self.video_path = None
|
||||
logger.info("初始化NoteGenerator")
|
||||
|
||||
import logging
|
||||
|
||||
def get_gpt(self) -> GPT:
|
||||
if self.provider == 'openai':
|
||||
logger.info("使用OpenAI")
|
||||
return OpenaiGPT()
|
||||
elif self.provider == 'deepSeek':
|
||||
logger.info("使用DeepSeek")
|
||||
return DeepSeekGPT()
|
||||
elif self.provider == 'qwen':
|
||||
logger.info("使用Qwen")
|
||||
return QwenGPT()
|
||||
else:
|
||||
logger.warning("不支持的AI提供商")
|
||||
raise ValueError(f"不支持的AI提供商:{self.provider}")
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@staticmethod
|
||||
def update_task_status(task_id: str, status: Union[str, TaskStatus], message: Optional[str] = None):
|
||||
os.makedirs(NOTE_OUTPUT_DIR, exist_ok=True)
|
||||
path = os.path.join(NOTE_OUTPUT_DIR, f"{task_id}.status.json")
|
||||
content = {"status": status.value if isinstance(status, TaskStatus) else status}
|
||||
if message:
|
||||
content["message"] = message
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
json.dump(content, f, ensure_ascii=False, indent=2)
|
||||
|
||||
def get_gpt(self, model_name: str = None, provider_id: str = None) -> GPT:
|
||||
provider = ProviderService.get_provider_by_id(provider_id)
|
||||
if not provider:
|
||||
logger.error(f"[get_gpt] 未找到对应的模型供应商: provider_id={provider_id}")
|
||||
raise ValueError(f"未找到对应的模型供应商: provider_id={provider_id}")
|
||||
|
||||
gpt = GPTFactory().from_config(
|
||||
ModelConfig(
|
||||
api_key=provider.get('api_key'),
|
||||
base_url=provider.get('base_url'),
|
||||
model_name=model_name,
|
||||
provider=provider.get('type'),
|
||||
name=provider.get('name')
|
||||
)
|
||||
)
|
||||
return gpt
|
||||
|
||||
def get_downloader(self, platform: str) -> Downloader:
|
||||
if platform == "bilibili":
|
||||
@@ -98,7 +121,7 @@ class NoteGenerator:
|
||||
insert_video_task(video_id=video_id, platform=platform, task_id=task_id)
|
||||
|
||||
def insert_screenshots_into_markdown(self, markdown: str, video_path: str, image_base_url: str,
|
||||
output_dir: str) -> str:
|
||||
output_dir: str,_format:list) -> str:
|
||||
"""
|
||||
扫描 markdown 中的 *Screenshot-xx:xx,生成截图并插入 markdown 图片
|
||||
:param markdown:
|
||||
@@ -145,62 +168,143 @@ class NoteGenerator:
|
||||
|
||||
def generate(
|
||||
self,
|
||||
|
||||
video_url: Union[str, HttpUrl],
|
||||
platform: str,
|
||||
quality: DownloadQuality = DownloadQuality.medium,
|
||||
task_id: Union[str, None] = None,
|
||||
model_name: str = None,
|
||||
provider_id: str = None,
|
||||
link: bool = False,
|
||||
screenshot: bool = False,
|
||||
_format: list = None,
|
||||
style: str = None,
|
||||
extras: str = None,
|
||||
path: Union[str, None] = None
|
||||
|
||||
) -> NoteResult:
|
||||
logger.info(f"开始解析并生成笔记")
|
||||
# 1. 选择下载器
|
||||
downloader = self.get_downloader(platform)
|
||||
gpt = self.get_gpt()
|
||||
logger.info(f'使用{downloader.__class__.__name__}下载器\n'
|
||||
f'使用{gpt.__class__.__name__}GPT\n'
|
||||
f'视频地址:{video_url}')
|
||||
if screenshot:
|
||||
try:
|
||||
logger.info(f"🎯 开始解析并生成笔记,task_id={task_id}")
|
||||
self.update_task_status(task_id, TaskStatus.PARSING)
|
||||
_path=''
|
||||
downloader = self.get_downloader(platform)
|
||||
gpt = self.get_gpt(model_name=model_name, provider_id=provider_id)
|
||||
|
||||
video_path = downloader.download_video(video_url)
|
||||
self.video_path = video_path
|
||||
print(video_path)
|
||||
audio_cache_path = os.path.join(NOTE_OUTPUT_DIR, f"{task_id}_audio.json")
|
||||
transcript_cache_path = os.path.join(NOTE_OUTPUT_DIR, f"{task_id}_transcript.json")
|
||||
markdown_cache_path = os.path.join(NOTE_OUTPUT_DIR, f"{task_id}_markdown.md")
|
||||
|
||||
# 2. 下载音频
|
||||
audio: AudioDownloadResult = downloader.download(
|
||||
video_url=video_url,
|
||||
quality=quality,
|
||||
output_dir=path,
|
||||
need_video=screenshot
|
||||
# -------- 1. 下载音频 --------
|
||||
try:
|
||||
self.update_task_status(task_id, TaskStatus.DOWNLOADING)
|
||||
if os.path.exists(audio_cache_path):
|
||||
logger.info(f"检测到已有音频缓存,直接读取,task_id={task_id}")
|
||||
with open(audio_cache_path, "r", encoding="utf-8") as f:
|
||||
audio_data = json.load(f)
|
||||
audio = AudioDownloadResult(**audio_data)
|
||||
else:
|
||||
if 'screenshot' in _format:
|
||||
video_path = downloader.download_video(video_url)
|
||||
self.video_path = video_path
|
||||
logger.info(f"成功下载视频文件: {video_path}")
|
||||
screenshot= 'screenshot' in _format
|
||||
audio: AudioDownloadResult = downloader.download(
|
||||
video_url=video_url,
|
||||
quality=quality,
|
||||
output_dir=path,
|
||||
need_video=screenshot
|
||||
)
|
||||
_path=audio.raw_info.get('path')
|
||||
with open(audio_cache_path, "w", encoding="utf-8") as f:
|
||||
json.dump(audio.__dict__, f, ensure_ascii=False, indent=2)
|
||||
logger.info(f"音频下载并缓存成功,task_id={task_id}")
|
||||
except Exception as e:
|
||||
logger.error(f"❌ 下载音频失败,task_id={task_id},错误信息:{e}")
|
||||
self.update_task_status(task_id, TaskStatus.FAILED, message=f"下载音频失败:{e}")
|
||||
raise e
|
||||
|
||||
# -------- 2. 转写文字 --------
|
||||
try:
|
||||
self.update_task_status(task_id, TaskStatus.TRANSCRIBING)
|
||||
if os.path.exists(transcript_cache_path):
|
||||
logger.info(f"检测到已有转写缓存,直接读取,task_id={task_id}")
|
||||
with open(transcript_cache_path, "r", encoding="utf-8") as f:
|
||||
transcript_data = json.load(f)
|
||||
transcript = TranscriptResult(
|
||||
language=transcript_data["language"],
|
||||
full_text=transcript_data["full_text"],
|
||||
segments=[TranscriptSegment(**seg) for seg in transcript_data["segments"]]
|
||||
)
|
||||
else:
|
||||
transcript: TranscriptResult = self.transcriber.transcript(file_path=audio.file_path)
|
||||
with open(transcript_cache_path, "w", encoding="utf-8") as f:
|
||||
json.dump(asdict(transcript), f, ensure_ascii=False, indent=2)
|
||||
logger.info(f"文字转写并缓存成功,task_id={task_id}")
|
||||
except Exception as e:
|
||||
logger.error(f"❌ 转写文字失败,task_id={task_id},错误信息:{e}")
|
||||
self.update_task_status(task_id, TaskStatus.FAILED, message=f"转写文字失败:{e}")
|
||||
raise e
|
||||
|
||||
# -------- 3. 总结内容 --------
|
||||
try:
|
||||
self.update_task_status(task_id, TaskStatus.SUMMARIZING)
|
||||
if os.path.exists(markdown_cache_path):
|
||||
logger.info(f"检测到已有总结缓存,直接读取,task_id={task_id}")
|
||||
with open(markdown_cache_path, "r", encoding="utf-8") as f:
|
||||
markdown = f.read()
|
||||
else:
|
||||
source = GPTSource(
|
||||
title=audio.title,
|
||||
segment=transcript.segments,
|
||||
tags=audio.raw_info.get('tags'),
|
||||
screenshot=screenshot,
|
||||
link=link,
|
||||
_format=_format,
|
||||
style=style,
|
||||
extras=extras
|
||||
)
|
||||
|
||||
markdown: str = gpt.summarize(source)
|
||||
with open(markdown_cache_path, "w", encoding="utf-8") as f:
|
||||
f.write(markdown)
|
||||
logger.info(f"GPT总结并缓存成功,task_id={task_id}")
|
||||
except Exception as e:
|
||||
logger.error(f"❌ 总结内容失败,task_id={task_id},错误信息:{e}")
|
||||
self.update_task_status(task_id, TaskStatus.FAILED, message=f"总结内容失败:{e}")
|
||||
raise e
|
||||
|
||||
# -------- 4. 插入截图 --------
|
||||
if _format and 'screenshot' in _format:
|
||||
try:
|
||||
markdown = self.insert_screenshots_into_markdown(markdown, self.video_path, image_base_url, output_dir,_format)
|
||||
except Exception as e:
|
||||
logger.warning(f"⚠️ 插入截图失败,跳过处理,task_id={task_id},错误信息:{e}")
|
||||
if _format and 'link' in _format:
|
||||
try:
|
||||
markdown = replace_content_markers(markdown, video_id=audio.video_id,platform=platform)
|
||||
except Exception as e:
|
||||
logger.warning(f"⚠️ 插入链接失败,跳过处理,task_id={task_id},错误信息:{e}")
|
||||
# 注意:截图失败不终止整体流程
|
||||
|
||||
# -------- 5. 保存数据库记录 --------
|
||||
self.update_task_status(task_id, TaskStatus.SAVING)
|
||||
self.save_meta(video_id=audio.video_id, platform=platform, task_id=task_id)
|
||||
|
||||
# -------- 6. 完成 --------
|
||||
self.update_task_status(task_id, TaskStatus.SUCCESS)
|
||||
logger.info(f"✅ 笔记生成成功,task_id={task_id}")
|
||||
transcription_finished.send({
|
||||
"file_path": audio.file_path,
|
||||
})
|
||||
return NoteResult(
|
||||
markdown=markdown,
|
||||
transcript=transcript,
|
||||
audio_meta=audio
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ 笔记生成流程异常终止,task_id={task_id},错误信息:{e}")
|
||||
self.update_task_status(task_id, TaskStatus.FAILED, message=str(e))
|
||||
raise f'❌ 笔记生成流程异常终止,task_id={task_id},错误信息:{e}'
|
||||
|
||||
)
|
||||
logger.info(f"下载音频成功,文件路径:{audio.file_path}")
|
||||
# 3. Whisper 转写
|
||||
transcript: TranscriptResult = self.transcriber.transcript(file_path=audio.file_path)
|
||||
logger.info(f"Whisper 转写成功,转写结果:{transcript.full_text}")
|
||||
# 4. GPT 总结
|
||||
source = GPTSource(
|
||||
title=audio.title,
|
||||
segment=transcript.segments,
|
||||
tags=audio.raw_info.get('tags'),
|
||||
screenshot=screenshot,
|
||||
link=link
|
||||
)
|
||||
logger.info(f"GPT 总结完成,总结结果:{source}")
|
||||
markdown: str = gpt.summarize(source)
|
||||
print("markdown结果", markdown)
|
||||
|
||||
markdown = replace_content_markers(markdown=markdown, video_id=audio.video_id, platform=platform)
|
||||
if self.video_path:
|
||||
markdown = self.insert_screenshots_into_markdown(markdown, self.video_path, image_base_url, output_dir)
|
||||
self.save_meta(video_id=audio.video_id, platform=platform, task_id=task_id)
|
||||
# 5. 返回结构体
|
||||
return NoteResult(
|
||||
markdown=markdown,
|
||||
transcript=transcript,
|
||||
audio_meta=audio
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
from kombu import uuid
|
||||
|
||||
from app.db.provider_dao import (
|
||||
insert_provider,
|
||||
init_provider_table,
|
||||
@@ -5,50 +7,65 @@ from app.db.provider_dao import (
|
||||
get_provider_by_name,
|
||||
get_provider_by_id,
|
||||
update_provider,
|
||||
delete_provider,
|
||||
delete_provider, get_enabled_providers,
|
||||
)
|
||||
from app.gpt.gpt_factory import GPTFactory
|
||||
from app.models.model_config import ModelConfig
|
||||
|
||||
|
||||
class ProviderService:
|
||||
@staticmethod
|
||||
def serialize_provider(row: tuple) -> dict:
|
||||
if not row:
|
||||
return None
|
||||
return {
|
||||
"id": row[0],
|
||||
"name": row[1],
|
||||
"logo": row[2],
|
||||
"type": row[3],
|
||||
"api_key": row[4],
|
||||
"base_url": row[5],
|
||||
"enabled": row[6],
|
||||
"created_at": row[7],
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def add_provider(name: str, api_key: str, base_url: str, logo: str, type_: str):
|
||||
return insert_provider(name, api_key, base_url, logo, type_)
|
||||
def add_provider( name: str, api_key: str, base_url: str, logo: str, type_: str, enabled: int = 1):
|
||||
try:
|
||||
id = uuid().lower()
|
||||
logo='custom'
|
||||
return insert_provider(id, name, api_key, base_url, logo, type_, enabled)
|
||||
except Exception as e:
|
||||
print('创建模式失败',e)
|
||||
|
||||
@staticmethod
|
||||
def get_all_providers():
|
||||
provider_list = []
|
||||
provider = get_all_providers()
|
||||
|
||||
for i in provider:
|
||||
provider_list.append({
|
||||
"id": i[0],
|
||||
"name": i[1],
|
||||
"logo": i[2],
|
||||
"type": i[3], # ✅ 加上类型
|
||||
"api_key": i[4],
|
||||
"base_url": i[5],
|
||||
})
|
||||
return provider_list
|
||||
rows = get_all_providers()
|
||||
return [ProviderService.serialize_provider(row) for row in rows] if rows else []
|
||||
|
||||
@staticmethod
|
||||
def get_provider_by_name(name: str):
|
||||
return get_provider_by_name(name)
|
||||
row = get_provider_by_name(name)
|
||||
return ProviderService.serialize_provider(row)
|
||||
|
||||
@staticmethod
|
||||
def get_provider_by_id(id: int):
|
||||
return get_provider_by_id(id)
|
||||
def get_provider_by_id(id: str): # 已改为 str 类型
|
||||
row = get_provider_by_id(id)
|
||||
return ProviderService.serialize_provider(row)
|
||||
|
||||
# all_models.extend(provider['models'])
|
||||
|
||||
@staticmethod
|
||||
def update_provider(
|
||||
id: int,
|
||||
name: str,
|
||||
api_key: str,
|
||||
base_url: str,
|
||||
logo: str,
|
||||
type_: str
|
||||
):
|
||||
return update_provider(id, name, api_key, base_url, logo, type_)
|
||||
def update_provider(id: str, data: dict):
|
||||
try:
|
||||
# 过滤掉空值
|
||||
filtered_data = {k: v for k, v in data.items() if v is not None and k != 'id'}
|
||||
print('更新模型供应商',filtered_data)
|
||||
return update_provider(id, **filtered_data)
|
||||
|
||||
except Exception as e:
|
||||
print('更新模型供应商失败:',e)
|
||||
|
||||
@staticmethod
|
||||
def delete_provider(id: int):
|
||||
return delete_provider(id)
|
||||
def delete_provider(id: str):
|
||||
return delete_provider(id)
|
||||
|
||||
251
backend/app/transcriber/bcut.py
Normal file
251
backend/app/transcriber/bcut.py
Normal file
@@ -0,0 +1,251 @@
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
from typing import Optional, List, Dict, Union
|
||||
|
||||
import requests
|
||||
|
||||
from app.decorators.timeit import timeit
|
||||
from app.models.transcriber_model import TranscriptSegment, TranscriptResult
|
||||
from app.transcriber.base import Transcriber
|
||||
from app.utils.logger import get_logger
|
||||
from events import transcription_finished
|
||||
|
||||
__version__ = "0.0.3"
|
||||
|
||||
API_BASE_URL = "https://member.bilibili.com/x/bcut/rubick-interface"
|
||||
|
||||
# 申请上传
|
||||
API_REQ_UPLOAD = API_BASE_URL + "/resource/create"
|
||||
|
||||
# 提交上传
|
||||
API_COMMIT_UPLOAD = API_BASE_URL + "/resource/create/complete"
|
||||
|
||||
# 创建任务
|
||||
API_CREATE_TASK = API_BASE_URL + "/task"
|
||||
|
||||
# 查询结果
|
||||
API_QUERY_RESULT = API_BASE_URL + "/task/result"
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
class BcutTranscriber(Transcriber):
|
||||
"""必剪 语音识别接口"""
|
||||
headers = {
|
||||
'User-Agent': 'Bilibili/1.0.0 (https://www.bilibili.com)',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
self.session = requests.Session()
|
||||
self.task_id = None
|
||||
self.__etags = []
|
||||
|
||||
self.__in_boss_key: Optional[str] = None
|
||||
self.__resource_id: Optional[str] = None
|
||||
self.__upload_id: Optional[str] = None
|
||||
self.__upload_urls: List[str] = []
|
||||
self.__per_size: Optional[int] = None
|
||||
self.__clips: Optional[int] = None
|
||||
|
||||
self.__etags: List[str] = []
|
||||
self.__download_url: Optional[str] = None
|
||||
self.task_id: Optional[str] = None
|
||||
|
||||
def _load_file(self, file_path: str) -> bytes:
|
||||
"""读取文件内容"""
|
||||
with open(file_path, 'rb') as f:
|
||||
return f.read()
|
||||
|
||||
def _upload(self, file_path: str) -> None:
|
||||
"""申请上传"""
|
||||
file_binary = self._load_file(file_path)
|
||||
if not file_binary:
|
||||
raise ValueError("无法读取文件数据")
|
||||
|
||||
payload = json.dumps({
|
||||
"type": 2,
|
||||
"name": "audio.mp3",
|
||||
"size": len(file_binary),
|
||||
"ResourceFileType": "mp3",
|
||||
"model_id": "8",
|
||||
})
|
||||
|
||||
resp = self.session.post(
|
||||
API_REQ_UPLOAD,
|
||||
data=payload,
|
||||
headers=self.headers
|
||||
)
|
||||
resp.raise_for_status()
|
||||
resp = resp.json()
|
||||
resp_data = resp["data"]
|
||||
|
||||
self.__in_boss_key = resp_data["in_boss_key"]
|
||||
self.__resource_id = resp_data["resource_id"]
|
||||
self.__upload_id = resp_data["upload_id"]
|
||||
self.__upload_urls = resp_data["upload_urls"]
|
||||
self.__per_size = resp_data["per_size"]
|
||||
self.__clips = len(resp_data["upload_urls"])
|
||||
|
||||
logger.info(
|
||||
f"申请上传成功, 总计大小{resp_data['size'] // 1024}KB, {self.__clips}分片, 分片大小{resp_data['per_size'] // 1024}KB: {self.__in_boss_key}"
|
||||
)
|
||||
self.__upload_part(file_binary)
|
||||
self.__commit_upload()
|
||||
|
||||
def __upload_part(self, file_binary: bytes) -> None:
|
||||
"""上传音频数据"""
|
||||
for clip in range(self.__clips):
|
||||
start_range = clip * self.__per_size
|
||||
end_range = min((clip + 1) * self.__per_size, len(file_binary))
|
||||
logger.info(f"开始上传分片{clip}: {start_range}-{end_range}")
|
||||
resp = self.session.put(
|
||||
self.__upload_urls[clip],
|
||||
data=file_binary[start_range:end_range],
|
||||
headers={'Content-Type': 'application/octet-stream'}
|
||||
)
|
||||
resp.raise_for_status()
|
||||
etag = resp.headers.get("Etag", "").strip('"')
|
||||
self.__etags.append(etag)
|
||||
logger.info(f"分片{clip}上传成功: {etag}")
|
||||
|
||||
def __commit_upload(self) -> None:
|
||||
"""提交上传数据"""
|
||||
data = json.dumps({
|
||||
"InBossKey": self.__in_boss_key,
|
||||
"ResourceId": self.__resource_id,
|
||||
"Etags": ",".join(self.__etags),
|
||||
"UploadId": self.__upload_id,
|
||||
"model_id": "8",
|
||||
})
|
||||
resp = self.session.post(
|
||||
API_COMMIT_UPLOAD,
|
||||
data=data,
|
||||
headers=self.headers
|
||||
)
|
||||
resp.raise_for_status()
|
||||
resp = resp.json()
|
||||
print('Bili',resp)
|
||||
if resp.get("code") != 0:
|
||||
error_msg = f"上传提交失败: {resp.get('message', '未知错误')}"
|
||||
logger.error(error_msg)
|
||||
raise Exception(error_msg)
|
||||
|
||||
self.__download_url = resp["data"]["download_url"]
|
||||
logger.info(f"提交成功,下载链接: {self.__download_url}")
|
||||
|
||||
def _create_task(self) -> str:
|
||||
"""开始创建转换任务"""
|
||||
resp = self.session.post(
|
||||
API_CREATE_TASK, json={"resource": self.__download_url, "model_id": "8"}, headers=self.headers
|
||||
)
|
||||
resp.raise_for_status()
|
||||
resp = resp.json()
|
||||
if resp.get("code") != 0:
|
||||
error_msg = f"创建任务失败: {resp.get('message', '未知错误')}"
|
||||
logger.error(error_msg)
|
||||
raise Exception(error_msg)
|
||||
|
||||
self.task_id = resp["data"]["task_id"]
|
||||
logger.info(f"任务已创建: {self.task_id}")
|
||||
return self.task_id
|
||||
|
||||
def _query_result(self) -> dict:
|
||||
"""查询转换结果"""
|
||||
resp = self.session.get(
|
||||
API_QUERY_RESULT,
|
||||
params={"model_id": 7, "task_id": self.task_id},
|
||||
headers=self.headers
|
||||
)
|
||||
resp.raise_for_status()
|
||||
resp = resp.json()
|
||||
if resp.get("code") != 0:
|
||||
error_msg = f"查询结果失败: {resp.get('message', '未知错误')}"
|
||||
logger.error(error_msg)
|
||||
raise Exception(error_msg)
|
||||
|
||||
return resp["data"]
|
||||
|
||||
@timeit
|
||||
def transcript(self, file_path: str) -> TranscriptResult:
|
||||
"""执行识别过程,符合 Transcriber 接口"""
|
||||
try:
|
||||
logger.info(f"开始处理文件: {file_path}")
|
||||
|
||||
# 上传文件
|
||||
logger.info("正在上传文件...")
|
||||
self._upload(file_path)
|
||||
|
||||
# 创建任务
|
||||
logger.info("提交转录任务...")
|
||||
self._create_task()
|
||||
|
||||
# 轮询检查任务状态
|
||||
logger.info("等待转录结果...")
|
||||
task_resp = None
|
||||
max_retries = 500
|
||||
for i in range(max_retries):
|
||||
task_resp = self._query_result()
|
||||
|
||||
if task_resp["state"] == 4: # 完成状态
|
||||
break
|
||||
elif task_resp["state"] == 3: # 失败状态
|
||||
error_msg = f"B站ASR任务失败,状态码: {task_resp['state']}"
|
||||
logger.error(error_msg)
|
||||
raise Exception(error_msg)
|
||||
|
||||
# 每隔一段时间打印进度
|
||||
if i % 10 == 0:
|
||||
logger.info(f"转录进行中... {i}/{max_retries}")
|
||||
|
||||
time.sleep(1)
|
||||
|
||||
if not task_resp or task_resp["state"] != 4:
|
||||
error_msg = f"B站ASR任务未能完成,状态: {task_resp.get('state') if task_resp else 'Unknown'}"
|
||||
logger.error(error_msg)
|
||||
raise Exception(error_msg)
|
||||
|
||||
# 解析结果
|
||||
logger.info("转录成功,处理结果...")
|
||||
result_json = json.loads(task_resp["result"])
|
||||
|
||||
# 提取分段数据
|
||||
segments = []
|
||||
full_text = ""
|
||||
|
||||
for u in result_json.get("utterances", []):
|
||||
text = u.get("transcript", "").strip()
|
||||
# B站ASR返回的时间戳是毫秒,需要转换为秒
|
||||
start_time = float(u.get("start_time", 0)) / 1000.0
|
||||
end_time = float(u.get("end_time", 0)) / 1000.0
|
||||
|
||||
full_text += text + " "
|
||||
segments.append(TranscriptSegment(
|
||||
start=start_time,
|
||||
end=end_time,
|
||||
text=text
|
||||
))
|
||||
|
||||
# 创建结果对象
|
||||
result = TranscriptResult(
|
||||
language=result_json.get("language", "zh"),
|
||||
full_text=full_text.strip(),
|
||||
segments=segments,
|
||||
raw=result_json
|
||||
)
|
||||
|
||||
# 触发完成事件
|
||||
# self.on_finish(file_path, result)
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"B站ASR处理失败: {str(e)}")
|
||||
raise
|
||||
|
||||
def on_finish(self, video_path: str, result: TranscriptResult) -> None:
|
||||
"""转录完成的回调"""
|
||||
logger.info(f"B站ASR转写完成: {video_path}")
|
||||
transcription_finished.send({
|
||||
"file_path": video_path,
|
||||
})
|
||||
115
backend/app/transcriber/kuaishou.py
Normal file
115
backend/app/transcriber/kuaishou.py
Normal file
@@ -0,0 +1,115 @@
|
||||
import requests
|
||||
import logging
|
||||
import os
|
||||
from typing import Union, List, Dict, Optional
|
||||
|
||||
from app.decorators.timeit import timeit
|
||||
from app.models.transcriber_model import TranscriptSegment, TranscriptResult
|
||||
from app.transcriber.base import Transcriber
|
||||
from app.utils.logger import get_logger
|
||||
from events import transcription_finished
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
class KuaishouTranscriber(Transcriber):
|
||||
"""快手语音识别实现"""
|
||||
|
||||
API_URL = "https://ai.kuaishou.com/api/effects/subtitle_generate"
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def _load_file(self, file_path: str) -> bytes:
|
||||
"""读取文件内容"""
|
||||
with open(file_path, 'rb') as f:
|
||||
return f.read()
|
||||
|
||||
def _submit(self, file_path: str) -> dict:
|
||||
"""提交识别请求"""
|
||||
try:
|
||||
file_binary = self._load_file(file_path)
|
||||
|
||||
payload = {
|
||||
"typeId": "1"
|
||||
}
|
||||
|
||||
# 使用文件名作为上传文件名
|
||||
file_name = os.path.basename(file_path)
|
||||
files = [('file', (file_name, file_binary, 'audio/mpeg'))]
|
||||
|
||||
logger.info(f"开始向快手API提交请求,文件: {file_name}")
|
||||
response = requests.post(self.API_URL, data=payload, files=files, timeout=300)
|
||||
response.raise_for_status() # 检查HTTP错误
|
||||
|
||||
result = response.json()
|
||||
print('result',result)
|
||||
# 检查快手API返回是否包含错误
|
||||
if "data" not in result or result.get("code", 0) != 0:
|
||||
error_msg = f"快手API返回错误: {result.get('message', '未知错误')}"
|
||||
logger.error(error_msg)
|
||||
raise Exception(error_msg)
|
||||
|
||||
return result
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
error_msg = f"快手ASR请求网络错误: {str(e)}"
|
||||
logger.error(error_msg)
|
||||
raise
|
||||
except Exception as e:
|
||||
error_msg = f"快手ASR请求处理错误: {str(e)}"
|
||||
logger.error(error_msg)
|
||||
raise
|
||||
|
||||
@timeit
|
||||
def transcript(self, file_path: str) -> TranscriptResult:
|
||||
"""执行转录过程,符合 Transcriber 接口"""
|
||||
try:
|
||||
logger.info(f"开始处理文件: {file_path}")
|
||||
|
||||
# 提交请求并获取结果
|
||||
logger.info("向快手API提交识别请求...")
|
||||
result_data = self._submit(file_path)
|
||||
|
||||
logger.info("请求成功,处理结果...")
|
||||
|
||||
# 提取分段数据
|
||||
segments = []
|
||||
full_text = ""
|
||||
|
||||
# 解析快手API返回的文本段
|
||||
texts = result_data.get('data', {}).get('text', [])
|
||||
for u in texts:
|
||||
text = u.get('text', '').strip()
|
||||
start_time = float(u.get('start_time', 0))
|
||||
end_time = float(u.get('end_time', 0))
|
||||
|
||||
full_text += text + " "
|
||||
segments.append(TranscriptSegment(
|
||||
start=start_time,
|
||||
end=end_time,
|
||||
text=text
|
||||
))
|
||||
|
||||
# 创建结果对象
|
||||
result = TranscriptResult(
|
||||
language="zh", # 快手API可能不返回语言信息,默认为中文
|
||||
full_text=full_text.strip(),
|
||||
segments=segments,
|
||||
raw=result_data
|
||||
)
|
||||
|
||||
# 触发完成事件
|
||||
# self.on_finish(file_path, result)
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"快手ASR处理失败: {str(e)}")
|
||||
raise
|
||||
|
||||
def on_finish(self, video_path: str, result: TranscriptResult) -> None:
|
||||
"""转录完成的回调"""
|
||||
logger.info(f"快手ASR转写完成: {video_path}")
|
||||
transcription_finished.send({
|
||||
"file_path": video_path,
|
||||
})
|
||||
@@ -31,15 +31,15 @@ _transcribers = {
|
||||
|
||||
def get_whisper_transcriber(model_size="base", device="cuda"):
|
||||
"""获取 Whisper 转录器实例"""
|
||||
if _transcribers['fast-whisper'] is None:
|
||||
if _transcribers['fast-whisper'] is None:
|
||||
logger.info(f'创建 Whisper 转录器实例,参数:{model_size}, {device}')
|
||||
try:
|
||||
_transcribers['fast-whisper'] = WhisperTranscriber(model_size=model_size, device=device)
|
||||
_transcribers['whisper'] = WhisperTranscriber(model_size=model_size, device=device)
|
||||
logger.info('Whisper 转录器创建成功')
|
||||
except Exception as e:
|
||||
logger.error(f"Whisper 转录器创建失败: {e}")
|
||||
raise
|
||||
return _transcribers['fast-whisper']
|
||||
return _transcribers['whisper']
|
||||
|
||||
def get_bcut_transcriber():
|
||||
"""获取 Bcut 转录器实例"""
|
||||
|
||||
@@ -4,14 +4,19 @@ from app.decorators.timeit import timeit
|
||||
from app.models.transcriber_model import TranscriptSegment, TranscriptResult
|
||||
from app.transcriber.base import Transcriber
|
||||
from app.utils.env_checker import is_cuda_available, is_torch_installed
|
||||
from app.utils.logger import get_logger
|
||||
from app.utils.path_helper import get_model_dir
|
||||
|
||||
from events import transcription_finished
|
||||
from pathlib import Path
|
||||
import os
|
||||
from tqdm import tqdm
|
||||
from huggingface_hub import snapshot_download
|
||||
|
||||
'''
|
||||
Size of the model to use (tiny, tiny.en, base, base.en, small, small.en, distil-small.en, medium, medium.en, distil-medium.en, large-v1, large-v2, large-v3, large, distil-large-v2, distil-large-v3, large-v3-turbo, or turbo
|
||||
'''
|
||||
|
||||
logger=get_logger(__name__)
|
||||
|
||||
class WhisperTranscriber(Transcriber):
|
||||
# TODO:修改为可配置
|
||||
@@ -31,15 +36,25 @@ class WhisperTranscriber(Transcriber):
|
||||
|
||||
self.compute_type = compute_type or ("float16" if self.device == "cuda" else "int8")
|
||||
|
||||
model_path = get_model_dir("whisper")
|
||||
model_dir = get_model_dir("whisper")
|
||||
model_path = os.path.join(model_dir, f"whisper-{model_size}")
|
||||
if not Path(model_path).exists():
|
||||
logger.info(f"模型 whisper-{model_size} 不存在,开始下载...")
|
||||
repo_id = f"guillaumekln/faster-whisper-{model_size}"
|
||||
snapshot_download(
|
||||
repo_id,
|
||||
local_dir=model_path,
|
||||
local_dir_use_symlinks=False,
|
||||
)
|
||||
logger.info("模型下载完成")
|
||||
|
||||
self.model = WhisperModel(
|
||||
model_size,
|
||||
device=self.device,
|
||||
# compute_type="int8", # 或 "float16"
|
||||
compute_type=self.compute_type,
|
||||
cpu_threads=cpu_threads,
|
||||
download_root=model_path
|
||||
download_root=model_dir
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def is_torch_installed() -> bool:
|
||||
try:
|
||||
@@ -88,7 +103,7 @@ class WhisperTranscriber(Transcriber):
|
||||
segments=segments,
|
||||
raw=info
|
||||
)
|
||||
self.on_finish(file_path, result)
|
||||
# self.on_finish(file_path, result)
|
||||
return result
|
||||
except Exception as e:
|
||||
print(f"转写失败:{e}")
|
||||
|
||||
@@ -4,6 +4,7 @@ import uvicorn
|
||||
from starlette.staticfiles import StaticFiles
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from app.db.model_dao import init_model_table
|
||||
from app.db.provider_dao import init_provider_table
|
||||
from app.utils.logger import get_logger
|
||||
from app import create_app
|
||||
@@ -39,6 +40,7 @@ async def startup_event():
|
||||
get_transcriber(transcriber_type=os.getenv("TRANSCRIBER_TYPE","fast-whisper"))
|
||||
init_video_task_table()
|
||||
init_provider_table()
|
||||
init_model_table()
|
||||
|
||||
if __name__ == "__main__":
|
||||
port = int(os.getenv("BACKEND_PORT", 8000))
|
||||
|
||||
Reference in New Issue
Block a user