fix: 性能优化、前端转写器配置、任务进度丢失及 MLX Whisper 回退问题修复

### 性能优化
- 后端任务执行从串行锁改为 ThreadPoolExecutor 并发执行(默认3线程)
- 添加 GZipMiddleware 响应压缩 + Nginx gzip 配置
- 数据库连接池参数优化(pool_size=10, max_overflow=20)
- 视频帧提取并行化(ThreadPoolExecutor)
- LLM 重试配置缓存到实例,避免每次请求读 env var
- 前端路由级代码拆分(React.lazy + Suspense)
- Vite manualChunks 拆分 markdown/markmap/vendor
- MarkdownViewer 用 React.memo + useMemo 减少不必要渲染
- NoteHistory Fuse.js 实例 useMemo 缓存
- useTaskPolling 无待处理任务时跳过轮询
- 移除 antd 依赖(NoteForm Alert、modelForm Tag),改用 shadcn/ui

### 前端转写器配置(新功能)
- 新增 TranscriberConfigManager(JSON 文件存储,替代环境变量)
- 新增 GET/POST /transcriber_config API 端点
- 新增 GET /transcriber_models_status 模型下载状态查询
- 新增 POST /transcriber_download 后台模型下载触发
- 前端转写器设置页面:引擎选择、模型大小选择、模型下载管理
- deploy_status 端点同步从配置文件读取

### Bug 修复
- 修复任务进行中切换页面后进度丢失:Home.tsx status 派生逻辑补全中间状态
- 修复 MLX Whisper 静默回退 fast-whisper:移除环境变量门控,macOS 下自动尝试导入
- MLX Whisper 不可用时抛出 RuntimeError 而非静默回退
- 前端展示 MLX Whisper 可用性状态,不可用时禁用保存

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
huangjianwu
2026-03-23 14:09:34 +08:00
parent 1cd8c33983
commit c105342ded
24 changed files with 1016 additions and 356 deletions

View File

@@ -1,21 +1,23 @@
import './App.css'
import { HomePage } from './pages/HomePage/Home.tsx'
import { lazy, Suspense, useEffect } from 'react'
import { BrowserRouter, Navigate, Routes, Route } from 'react-router-dom'
import { useTaskPolling } from '@/hooks/useTaskPolling.ts'
import SettingPage from './pages/SettingPage/index.tsx'
import { BrowserRouter, Navigate, Routes } from 'react-router-dom'
import { Route } from 'react-router-dom'
import Index from '@/pages/Index.tsx'
import NotFoundPage from '@/pages/NotFoundPage'
import Model from '@/pages/SettingPage/Model.tsx'
import ProviderForm from '@/components/Form/modelForm/Form.tsx'
import AboutPage from '@/pages/SettingPage/about.tsx'
import Monitor from '@/pages/SettingPage/Monitor.tsx'
import Downloader from '@/pages/SettingPage/Downloader.tsx'
import DownloaderForm from '@/components/Form/DownloaderForm/Form.tsx'
import { useEffect } from 'react'
import { systemCheck } from '@/services/system.ts'
import { useCheckBackend } from '@/hooks/useCheckBackend.ts'
import { systemCheck } from '@/services/system.ts'
import BackendInitDialog from '@/components/BackendInitDialog'
import Index from '@/pages/Index.tsx'
import { HomePage } from './pages/HomePage/Home.tsx'
// 非首屏页面使用 React.lazy 按需加载
const SettingPage = lazy(() => import('./pages/SettingPage/index.tsx'))
const Model = lazy(() => import('@/pages/SettingPage/Model.tsx'))
const ProviderForm = lazy(() => import('@/components/Form/modelForm/Form.tsx'))
const AboutPage = lazy(() => import('@/pages/SettingPage/about.tsx'))
const Monitor = lazy(() => import('@/pages/SettingPage/Monitor.tsx'))
const Downloader = lazy(() => import('@/pages/SettingPage/Downloader.tsx'))
const DownloaderForm = lazy(() => import('@/components/Form/DownloaderForm/Form.tsx'))
const TranscriberPage = lazy(() => import('@/pages/SettingPage/transcriber.tsx'))
const NotFoundPage = lazy(() => import('@/pages/NotFoundPage'))
function App() {
useTaskPolling(3000) // 每 3 秒轮询一次
@@ -41,6 +43,7 @@ function App() {
return (
<>
<BrowserRouter>
<Suspense fallback={<div className="flex h-screen items-center justify-center"></div>}>
<Routes>
<Route path="/" element={<Index />}>
<Route index element={<HomePage />} />
@@ -53,6 +56,7 @@ function App() {
<Route path="download" element={<Downloader />}>
<Route path=":id" element={<DownloaderForm />} />
</Route>
<Route path="transcriber" element={<TranscriberPage />} />
<Route path="monitor" element={<Monitor />}></Route>
<Route path="about" element={<AboutPage />}></Route>
<Route path="*" element={<NotFoundPage />} />
@@ -60,6 +64,7 @@ function App() {
<Route path="*" element={<NotFoundPage />} />
</Route>
</Routes>
</Suspense>
</BrowserRouter>
</>
)

View File

@@ -27,7 +27,7 @@ import {
import { ModelSelector } from '@/components/Form/modelForm/ModelSelector.tsx'
import { Alert, AlertDescription, AlertTitle } from '@/components/ui/alert.tsx'
import { Tags } from 'lucide-react'
import { Tag } from 'antd'
import { X } from 'lucide-react'
import { useModelStore } from '@/store/modelStore'
// ✅ Provider表单schema
@@ -312,12 +312,12 @@ const ProviderForm = ({ isCreate = false }: { isCreate?: boolean }) => {
{
models && models.map(model => {
return (
<>
<Tag onClose={()=>{
handelDelete(model.id)
}} key={model.id} closable color={'blue'}>
<span key={model.id} className="inline-flex items-center gap-1 rounded-md bg-blue-100 px-2 py-0.5 text-sm text-blue-700">
{model.model_name}
</Tag></>
<button type="button" onClick={() => handelDelete(model.id)} className="hover:text-blue-900">
<X className="h-3 w-3" />
</button>
</span>
)
})

View File

@@ -22,9 +22,11 @@ export const useTaskPolling = (interval = 3000) => {
task => task.status != 'SUCCESS' && task.status != 'FAILED'
)
// 无活跃任务时跳过轮询
if (pendingTasks.length === 0) return
for (const task of pendingTasks) {
try {
console.log('🔄 正在轮询任务:', task.id)
const res = await get_task_status(task.id)
const { status } = res
@@ -47,9 +49,7 @@ export const useTaskPolling = (interval = 3000) => {
}
} catch (e) {
console.error('❌ 任务轮询失败:', e)
// toast.error(`生成失败 ${e.message || e}`)
updateTaskContent(task.id, { status: 'FAILED' })
// removeTask(task.id)
}
}
}, interval)

View File

@@ -18,14 +18,15 @@ export const HomePage: FC = () => {
useEffect(() => {
if (!currentTask) {
setStatus('idle')
} else if (currentTask.status === 'PENDING') {
setStatus('loading')
} else if (currentTask.status === 'SUCCESS') {
setStatus('success')
} else if (currentTask.status === 'FAILED') {
setStatus('failed')
} else {
// PENDING、PARSING、DOWNLOADING、TRANSCRIBING、SUMMARIZING 等所有进行中状态
setStatus('loading')
}
}, [currentTask])
}, [currentTask, currentTask?.status])
// useEffect( () => {
// get_task_status('d4e87938-c066-48a0-bbd5-9bec40d53354').then(res=>{

View File

@@ -1,4 +1,4 @@
import { useState, useEffect, useRef } from 'react'
import { useState, useEffect, useRef, useMemo, memo, FC } from 'react'
import ReactMarkdown from 'react-markdown'
import { Button } from '@/components/ui/button.tsx'
import { Copy, Download, ArrowRight, Play, ExternalLink } from 'lucide-react'
@@ -16,7 +16,6 @@ import remarkMath from 'remark-math'
import rehypeKatex from 'rehype-katex'
import 'katex/dist/katex.min.css'
import 'github-markdown-css/github-markdown-light.css'
import { FC } from 'react'
import { ScrollArea } from '@/components/ui/scroll-area.tsx'
import { useTaskStore } from '@/store/taskStore'
import { noteStyles } from '@/constant/note.ts'
@@ -45,7 +44,228 @@ const steps = [
{ label: '保存完成', key: 'SUCCESS' },
]
const MarkdownViewer: FC<MarkdownViewerProps> = ({ status }) => {
const remarkPlugins = [gfm, remarkMath]
const rehypePlugins = [rehypeKatex]
/**
* 构建 ReactMarkdown components 对象baseURL 用于修正图片路径。
* 使用函数 + useMemo 避免每次渲染都创建新的函数实例。
*/
function createMarkdownComponents(baseURL: string) {
return {
h1: ({ children, ...props }: any) => (
<h1
className="text-primary my-6 scroll-m-20 text-3xl font-extrabold tracking-tight lg:text-4xl"
{...props}
>
{children}
</h1>
),
h2: ({ children, ...props }: any) => (
<h2
className="text-primary mt-10 mb-4 scroll-m-20 border-b pb-2 text-2xl font-semibold tracking-tight first:mt-0"
{...props}
>
{children}
</h2>
),
h3: ({ children, ...props }: any) => (
<h3
className="text-primary mt-8 mb-4 scroll-m-20 text-xl font-semibold tracking-tight"
{...props}
>
{children}
</h3>
),
h4: ({ children, ...props }: any) => (
<h4
className="text-primary mt-6 mb-2 scroll-m-20 text-lg font-semibold tracking-tight"
{...props}
>
{children}
</h4>
),
p: ({ children, ...props }: any) => (
<p className="leading-7 [&:not(:first-child)]:mt-6" {...props}>
{children}
</p>
),
a: ({ href, children, ...props }: any) => {
const isOriginLink =
typeof children[0] === 'string' &&
(children[0] as string).startsWith('原片 @')
if (isOriginLink) {
const timeMatch = (children[0] as string).match(/原片 @ (\d{2}:\d{2})/)
const timeText = timeMatch ? timeMatch[1] : '原片'
return (
<span className="origin-link my-2 inline-flex">
<a
href={href}
target="_blank"
rel="noopener noreferrer"
className="inline-flex items-center gap-1.5 rounded-full bg-blue-50 px-3 py-1 text-sm font-medium text-blue-700 transition-colors hover:bg-blue-100"
{...props}
>
<Play className="h-3.5 w-3.5" />
<span>{timeText}</span>
</a>
</span>
)
}
return (
<a
href={href}
target="_blank"
rel="noopener noreferrer"
className="text-primary hover:text-primary/80 inline-flex items-center gap-0.5 font-medium underline underline-offset-4"
{...props}
>
{children}
{href?.startsWith('http') && (
<ExternalLink className="ml-0.5 inline-block h-3 w-3" />
)}
</a>
)
},
img: ({ node, ...props }: any) => {
let src = props.src
if (src.startsWith('/')) {
src = baseURL + src
}
props.src = src
return (
<div className="my-8 flex justify-center">
<Zoom>
<img
{...props}
className="max-w-full cursor-zoom-in rounded-lg object-cover shadow-md transition-all hover:shadow-lg"
style={{ maxHeight: '500px' }}
/>
</Zoom>
</div>
)
},
strong: ({ children, ...props }: any) => (
<strong className="text-primary font-bold" {...props}>
{children}
</strong>
),
li: ({ children, ...props }: any) => {
const rawText = String(children)
const isFakeHeading = /^(\*\*.+\*\*)$/.test(rawText.trim())
if (isFakeHeading) {
return (
<div className="text-primary my-4 text-lg font-bold">{children}</div>
)
}
return (
<li className="my-1" {...props}>
{children}
</li>
)
},
ul: ({ children, ...props }: any) => (
<ul className="my-6 ml-6 list-disc [&>li]:mt-2" {...props}>
{children}
</ul>
),
ol: ({ children, ...props }: any) => (
<ol className="my-6 ml-6 list-decimal [&>li]:mt-2" {...props}>
{children}
</ol>
),
blockquote: ({ children, ...props }: any) => (
<blockquote
className="border-primary/20 text-muted-foreground mt-6 border-l-4 pl-4 italic"
{...props}
>
{children}
</blockquote>
),
code: ({ inline, className, children, ...props }: any) => {
const match = /language-(\w+)/.exec(className || '')
const codeContent = String(children).replace(/\n$/, '')
if (!inline && match) {
return (
<div className="group bg-muted relative my-6 overflow-hidden rounded-lg border shadow-sm">
<div className="bg-muted text-muted-foreground flex items-center justify-between px-4 py-1.5 text-sm font-medium">
<div>{match[1].toUpperCase()}</div>
<button
onClick={() => {
navigator.clipboard.writeText(codeContent)
toast.success('代码已复制')
}}
className="bg-background/80 hover:bg-background flex items-center gap-1 rounded-md px-2 py-1 text-xs font-medium transition-colors"
>
<Copy className="h-3.5 w-3.5" />
</button>
</div>
<SyntaxHighlighter
style={codeStyle}
language={match[1]}
PreTag="div"
className="!bg-muted !m-0 !p-0"
customStyle={{
margin: 0,
padding: '1rem',
background: 'transparent',
fontSize: '0.9rem',
}}
{...props}
>
{codeContent}
</SyntaxHighlighter>
</div>
)
}
return (
<code
className="bg-muted relative rounded px-[0.3rem] py-[0.2rem] font-mono text-sm"
{...props}
>
{children}
</code>
)
},
table: ({ children, ...props }: any) => (
<div className="my-6 w-full overflow-y-auto">
<table className="w-full border-collapse text-sm" {...props}>
{children}
</table>
</div>
),
th: ({ children, ...props }: any) => (
<th
className="border-muted-foreground/20 border px-4 py-2 text-left font-medium [&[align=center]]:text-center [&[align=right]]:text-right"
{...props}
>
{children}
</th>
),
td: ({ children, ...props }: any) => (
<td
className="border-muted-foreground/20 border px-4 py-2 text-left [&[align=center]]:text-center [&[align=right]]:text-right"
{...props}
>
{children}
</td>
),
hr: ({ ...props }: any) => (
<hr className="border-muted-foreground/20 my-8" {...props} />
),
}
}
const MarkdownViewer: FC<MarkdownViewerProps> = memo(({ status }) => {
const [copied, setCopied] = useState(false)
const [currentVerId, setCurrentVerId] = useState<string>('')
const [selectedContent, setSelectedContent] = useState<string>('')
@@ -62,6 +282,10 @@ const MarkdownViewer: FC<MarkdownViewerProps> = ({ status }) => {
const [showTranscribe, setShowTranscribe] = useState(false)
const [viewMode, setViewMode] = useState<'map' | 'preview'>('preview')
const svgRef = useRef<SVGSVGElement>(null)
// 缓存 ReactMarkdown components仅在 baseURL 变化时重建
const markdownComponents = useMemo(() => createMarkdownComponents(baseURL), [baseURL])
// 多版本内容处理
useEffect(() => {
if (!currentTask) return
@@ -160,7 +384,7 @@ const MarkdownViewer: FC<MarkdownViewerProps> = ({ status }) => {
<div className="flex h-screen w-full flex-col items-center justify-center space-y-3 text-neutral-500">
<Idle />
<div className="text-center">
<p className="text-lg font-bold"></p>
<p className="text-lg font-bold">"生成笔记"</p>
<p className="mt-2 text-xs text-neutral-500">YouTube </p>
</div>
</div>
@@ -220,248 +444,9 @@ const MarkdownViewer: FC<MarkdownViewerProps> = ({ status }) => {
<ScrollArea className="w-full">
<div className={'markdown-body w-full px-2'}>
<ReactMarkdown
remarkPlugins={[gfm, remarkMath]}
rehypePlugins={[rehypeKatex]}
components={{
// Headings with improved styling and anchor links
h1: ({ children, ...props }) => (
<h1
className="text-primary my-6 scroll-m-20 text-3xl font-extrabold tracking-tight lg:text-4xl"
{...props}
>
{children}
</h1>
),
h2: ({ children, ...props }) => (
<h2
className="text-primary mt-10 mb-4 scroll-m-20 border-b pb-2 text-2xl font-semibold tracking-tight first:mt-0"
{...props}
>
{children}
</h2>
),
h3: ({ children, ...props }) => (
<h3
className="text-primary mt-8 mb-4 scroll-m-20 text-xl font-semibold tracking-tight"
{...props}
>
{children}
</h3>
),
h4: ({ children, ...props }) => (
<h4
className="text-primary mt-6 mb-2 scroll-m-20 text-lg font-semibold tracking-tight"
{...props}
>
{children}
</h4>
),
// Paragraphs with better line height
p: ({ children, ...props }) => (
<p className="leading-7 [&:not(:first-child)]:mt-6" {...props}>
{children}
</p>
),
// Enhanced links with special handling for "原片" links
a: ({ href, children, ...props }) => {
const isOriginLink =
typeof children[0] === 'string' &&
(children[0] as string).startsWith('原片 @')
if (isOriginLink) {
const timeMatch = (children[0] as string).match(/原片 @ (\d{2}:\d{2})/)
const timeText = timeMatch ? timeMatch[1] : '原片'
return (
<span className="origin-link my-2 inline-flex">
<a
href={href}
target="_blank"
rel="noopener noreferrer"
className="inline-flex items-center gap-1.5 rounded-full bg-blue-50 px-3 py-1 text-sm font-medium text-blue-700 transition-colors hover:bg-blue-100"
{...props}
>
<Play className="h-3.5 w-3.5" />
<span>{timeText}</span>
</a>
</span>
)
}
// Default link styling with external indicator
return (
<a
href={href}
target="_blank"
rel="noopener noreferrer"
className="text-primary hover:text-primary/80 inline-flex items-center gap-0.5 font-medium underline underline-offset-4"
{...props}
>
{children}
{href?.startsWith('http') && (
<ExternalLink className="ml-0.5 inline-block h-3 w-3" />
)}
</a>
)
},
// Enhanced image with zoom capability
img: ({ node, ...props }) =>{
// Fix the URL by removing the 'undefined' prefix if it exists
let src = props.src
if (src.startsWith('/')) {
src = baseURL + src
}
props.src = src
return(
<div className="my-8 flex justify-center">
<Zoom>
<img
{...props}
className="max-w-full cursor-zoom-in rounded-lg object-cover shadow-md transition-all hover:shadow-lg"
style={{ maxHeight: '500px' }}
/>
</Zoom>
</div>
)},
// Better strong/bold text
strong: ({ children, ...props }) => (
<strong className="text-primary font-bold" {...props}>
{children}
</strong>
),
// Enhanced list items with support for "fake headings"
li: ({ children, ...props }) => {
const rawText = String(children)
const isFakeHeading = /^(\*\*.+\*\*)$/.test(rawText.trim())
if (isFakeHeading) {
return (
<div className="text-primary my-4 text-lg font-bold">{children}</div>
)
}
return (
<li className="my-1" {...props}>
{children}
</li>
)
},
// Enhanced unordered lists
ul: ({ children, ...props }) => (
<ul className="my-6 ml-6 list-disc [&>li]:mt-2" {...props}>
{children}
</ul>
),
// Enhanced ordered lists
ol: ({ children, ...props }) => (
<ol className="my-6 ml-6 list-decimal [&>li]:mt-2" {...props}>
{children}
</ol>
),
// Enhanced blockquotes
blockquote: ({ children, ...props }) => (
<blockquote
className="border-primary/20 text-muted-foreground mt-6 border-l-4 pl-4 italic"
{...props}
>
{children}
</blockquote>
),
// Enhanced code blocks with syntax highlighting and copy button
code: ({ inline, className, children, ...props }) => {
const match = /language-(\w+)/.exec(className || '')
const codeContent = String(children).replace(/\n$/, '')
if (!inline && match) {
return (
<div className="group bg-muted relative my-6 overflow-hidden rounded-lg border shadow-sm">
<div className="bg-muted text-muted-foreground flex items-center justify-between px-4 py-1.5 text-sm font-medium">
<div>{match[1].toUpperCase()}</div>
<button
onClick={() => {
navigator.clipboard.writeText(codeContent)
toast.success('代码已复制')
}}
className="bg-background/80 hover:bg-background flex items-center gap-1 rounded-md px-2 py-1 text-xs font-medium transition-colors"
>
<Copy className="h-3.5 w-3.5" />
</button>
</div>
<SyntaxHighlighter
style={codeStyle}
language={match[1]}
PreTag="div"
className="!bg-muted !m-0 !p-0"
customStyle={{
margin: 0,
padding: '1rem',
background: 'transparent',
fontSize: '0.9rem',
}}
{...props}
>
{codeContent}
</SyntaxHighlighter>
</div>
)
}
// Inline code styling
return (
<code
className="bg-muted relative rounded px-[0.3rem] py-[0.2rem] font-mono text-sm"
{...props}
>
{children}
</code>
)
},
// Enhanced tables
table: ({ children, ...props }) => (
<div className="my-6 w-full overflow-y-auto">
<table className="w-full border-collapse text-sm" {...props}>
{children}
</table>
</div>
),
// Table headers
th: ({ children, ...props }) => (
<th
className="border-muted-foreground/20 border px-4 py-2 text-left font-medium [&[align=center]]:text-center [&[align=right]]:text-right"
{...props}
>
{children}
</th>
),
// Table cells
td: ({ children, ...props }) => (
<td
className="border-muted-foreground/20 border px-4 py-2 text-left [&[align=center]]:text-center [&[align=right]]:text-right"
{...props}
>
{children}
</td>
),
// Horizontal rule
hr: ({ ...props }) => (
<hr className="border-muted-foreground/20 my-8" {...props} />
),
}}
remarkPlugins={remarkPlugins}
rehypePlugins={rehypePlugins}
components={markdownComponents}
>
{selectedContent}
</ReactMarkdown>
@@ -488,6 +473,8 @@ const MarkdownViewer: FC<MarkdownViewerProps> = ({ status }) => {
)}
</div>
)
}
})
MarkdownViewer.displayName = 'MarkdownViewer'
export default MarkdownViewer

View File

@@ -13,7 +13,7 @@ import { zodResolver } from '@hookform/resolvers/zod'
import { z } from 'zod'
import { Info, Loader2, Plus } from 'lucide-react'
import { message, Alert } from 'antd'
import { Alert, AlertDescription } from '@/components/ui/alert.tsx'
import { generateNote } from '@/services/note.ts'
import { uploadFile } from '@/services/upload.ts'
import { useTaskStore } from '@/store/taskStore'
@@ -513,17 +513,11 @@ const NoteForm = () => {
)}
/>
</div>
<Alert
closable
type="error"
message={
<div>
<strong></strong>
<p>使</p>
</div>
}
className="text-sm"
/>
<Alert variant="warning" className="text-sm">
<AlertDescription>
<strong></strong>使
</AlertDescription>
</Alert>
</div>
{/* 笔记格式 */}

View File

@@ -14,7 +14,7 @@ import {
TooltipTrigger,
} from '@/components/ui/tooltip.tsx'
import LazyImage from "@/components/LazyImage.tsx";
import {FC, useState ,useEffect } from 'react'
import {FC, useState, useEffect, useMemo} from 'react'
interface NoteHistoryProps {
onSelect: (taskId: string) => void
@@ -28,10 +28,10 @@ const NoteHistory: FC<NoteHistoryProps> = ({ onSelect, selectedId }) => {
const baseURL = (String(import.meta.env.VITE_API_BASE_URL || 'api')).replace(/\/$/, '')
const [rawSearch, setRawSearch] = useState('')
const [search, setSearch] = useState('')
const fuse = new Fuse(tasks, {
const fuse = useMemo(() => new Fuse(tasks, {
keys: ['audioMeta.title'],
threshold: 0.4 // 匹配精度(越低越严格)
})
}), [tasks])
useEffect(() => {
const timer = setTimeout(() => {
if (rawSearch === '') return

View File

@@ -1,5 +1,6 @@
import {
BotMessageSquare,
Captions,
HardDriveDownload,
Info,
Activity,
@@ -14,14 +15,12 @@ const Menu = () => {
icon: <BotMessageSquare />,
path: '/settings/model',
},
// TODO :下一版本升级优化
// {
// id: ' transcriber',
// name: '音频转译配置',
// icon: <Captions />,
// path: '/settings/transcriber',
// },
// //下载配置
{
id: 'transcriber',
name: '音频转写配置',
icon: <Captions />,
path: '/settings/transcriber',
},
{
id: 'download',
name: '下载配置',

View File

@@ -1,8 +1,255 @@
const Transcriber = () => {
import { useState, useEffect, useCallback } from 'react'
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
import { Button } from '@/components/ui/button'
import { Badge } from '@/components/ui/badge'
import {
Select,
SelectContent,
SelectItem,
SelectTrigger,
SelectValue,
} from '@/components/ui/select'
import { Alert, AlertDescription } from '@/components/ui/alert'
import { AudioLines, AlertTriangle, CheckCircle2, Download, Loader2, Save, XCircle } from 'lucide-react'
import { toast } from 'react-hot-toast'
import {
getTranscriberConfig,
updateTranscriberConfig,
getModelsStatus,
downloadModel,
TranscriberConfig,
ModelStatus,
} from '@/services/transcriber'
const isWhisperType = (type: string) =>
type === 'fast-whisper' || type === 'mlx-whisper'
export default function Transcriber() {
const [config, setConfig] = useState<TranscriberConfig | null>(null)
const [loading, setLoading] = useState(true)
const [saving, setSaving] = useState(false)
const [selectedType, setSelectedType] = useState('')
const [selectedModelSize, setSelectedModelSize] = useState('')
const [modelStatuses, setModelStatuses] = useState<ModelStatus[]>([])
const [mlxModelStatuses, setMlxModelStatuses] = useState<ModelStatus[]>([])
const [mlxAvailable, setMlxAvailable] = useState(false)
const fetchModelsStatus = useCallback(async () => {
try {
const data = await getModelsStatus()
setModelStatuses(data.whisper)
setMlxModelStatuses(data.mlx_whisper)
setMlxAvailable(data.mlx_available)
} catch {
// 静默失败,不阻塞主流程
}
}, [])
useEffect(() => {
const load = async () => {
try {
const data = await getTranscriberConfig()
setConfig(data)
setSelectedType(data.transcriber_type)
setSelectedModelSize(data.whisper_model_size)
} catch {
toast.error('获取转写器配置失败')
} finally {
setLoading(false)
}
}
load()
fetchModelsStatus()
}, [fetchModelsStatus])
// 有下载中的模型时自动轮询状态
useEffect(() => {
const hasDownloading =
modelStatuses.some(m => m.downloading) || mlxModelStatuses.some(m => m.downloading)
if (!hasDownloading) return
const timer = setInterval(fetchModelsStatus, 3000)
return () => clearInterval(timer)
}, [modelStatuses, mlxModelStatuses, fetchModelsStatus])
const handleSave = async () => {
setSaving(true)
try {
const payload: { transcriber_type: string; whisper_model_size?: string } = {
transcriber_type: selectedType,
}
if (isWhisperType(selectedType)) {
payload.whisper_model_size = selectedModelSize
}
await updateTranscriberConfig(payload)
toast.success('转写器配置已保存')
} catch {
toast.error('保存失败')
} finally {
setSaving(false)
}
}
const handleDownload = async (modelSize: string, transcriberType: string) => {
try {
await downloadModel({ model_size: modelSize, transcriber_type: transcriberType })
toast.success(`模型 ${modelSize} 开始下载`)
// 立即刷新状态
setTimeout(fetchModelsStatus, 1000)
} catch {
toast.error('下载请求失败')
}
}
if (loading) {
return (
<div className="flex h-screen w-full flex-col items-center justify-center">
<h1 className="text-center text-4xl font-bold">Transcriber is under development</h1>
<div className="flex h-64 items-center justify-center">
<Loader2 className="h-6 w-6 animate-spin text-neutral-400" />
</div>
)
}
if (!config) {
return <div className="p-6 text-center text-neutral-500"></div>
}
const currentModels = selectedType === 'mlx-whisper' ? mlxModelStatuses : modelStatuses
return (
<div className="space-y-6 p-6">
<div>
<h2 className="text-2xl font-semibold"></h2>
<p className="mt-1 text-sm text-neutral-500">
使
</p>
</div>
{/* 转写引擎选择 */}
<Card>
<CardHeader>
<CardTitle className="flex items-center gap-2 text-lg">
<AudioLines className="h-5 w-5" />
</CardTitle>
</CardHeader>
<CardContent className="space-y-4">
<div className="space-y-2">
<label className="text-sm font-medium"></label>
<Select value={selectedType} onValueChange={setSelectedType}>
<SelectTrigger className="w-full max-w-xs">
<SelectValue />
</SelectTrigger>
<SelectContent>
{config.available_types.map(t => (
<SelectItem key={t.value} value={t.value}>
{t.label}
</SelectItem>
))}
</SelectContent>
</Select>
</div>
{isWhisperType(selectedType) && (
<div className="space-y-2">
<label className="text-sm font-medium">Whisper </label>
<Select value={selectedModelSize} onValueChange={setSelectedModelSize}>
<SelectTrigger className="w-full max-w-xs">
<SelectValue />
</SelectTrigger>
<SelectContent>
{config.whisper_model_sizes.map(size => {
const status = currentModels.find(m => m.model_size === size)
return (
<SelectItem key={size} value={size}>
<span className="flex items-center gap-2">
{size}
{status?.downloaded && (
<CheckCircle2 className="h-3 w-3 text-green-500" />
)}
</span>
</SelectItem>
)
})}
</SelectContent>
</Select>
<p className="text-xs text-neutral-400">
</p>
</div>
)}
{selectedType === 'mlx-whisper' && !config.mlx_whisper_available && (
<Alert variant="warning" className="text-sm">
<AlertTriangle className="h-4 w-4" />
<AlertDescription>
MLX Whisper macOS {' '}
<code className="rounded bg-neutral-100 px-1">pip install mlx_whisper</code>
</AlertDescription>
</Alert>
)}
<Button onClick={handleSave} disabled={saving || (selectedType === 'mlx-whisper' && !config.mlx_whisper_available)} className="mt-2">
{saving ? (
<Loader2 className="mr-2 h-4 w-4 animate-spin" />
) : (
<Save className="mr-2 h-4 w-4" />
)}
</Button>
</CardContent>
</Card>
{/* Whisper 模型管理 */}
{isWhisperType(selectedType) && currentModels.length > 0 && (
<Card>
<CardHeader>
<CardTitle className="flex items-center gap-2 text-lg">
<Download className="h-5 w-5" />
<span className="text-sm font-normal text-neutral-400">
{selectedType === 'mlx-whisper' ? 'MLX Whisper' : 'Faster Whisper'}
</span>
</CardTitle>
</CardHeader>
<CardContent>
<div className="space-y-3">
{currentModels.map(model => (
<div
key={model.model_size}
className="flex items-center justify-between rounded-md border px-4 py-3"
>
<div className="flex items-center gap-3">
<span className="font-medium">{model.model_size}</span>
{model.downloaded ? (
<Badge variant="default" className="bg-green-500 hover:bg-green-600">
</Badge>
) : model.downloading ? (
<Badge variant="secondary" className="flex items-center gap-1">
<Loader2 className="h-3 w-3 animate-spin" />
</Badge>
) : (
<Badge variant="outline"></Badge>
)}
</div>
{!model.downloaded && !model.downloading && (
<Button
size="sm"
variant="outline"
onClick={() => handleDownload(model.model_size, selectedType)}
>
<Download className="mr-1 h-4 w-4" />
</Button>
)}
</div>
))}
</div>
</CardContent>
</Card>
)}
</div>
)
}
export default Transcriber

View File

@@ -0,0 +1,43 @@
import request from '@/utils/request'
export interface TranscriberConfig {
transcriber_type: string
whisper_model_size: string
available_types: { value: string; label: string }[]
whisper_model_sizes: string[]
mlx_whisper_available: boolean
}
export interface ModelStatus {
model_size: string
downloaded: boolean
downloading: boolean
}
export interface ModelsStatusResponse {
whisper: ModelStatus[]
mlx_whisper: ModelStatus[]
mlx_available: boolean
}
export const getTranscriberConfig = async (): Promise<TranscriberConfig> => {
return await request.get('/transcriber_config')
}
export const updateTranscriberConfig = async (data: {
transcriber_type: string
whisper_model_size?: string
}) => {
return await request.post('/transcriber_config', data)
}
export const getModelsStatus = async (): Promise<ModelsStatusResponse> => {
return await request.get('/transcriber_models_status')
}
export const downloadModel = async (data: {
model_size: string
transcriber_type?: string
}) => {
return await request.post('/transcriber_download', data)
}

View File

@@ -18,6 +18,17 @@ export default defineConfig(({ mode }) => {
'@': path.resolve(__dirname, './src'),
},
},
build: {
rollupOptions: {
output: {
manualChunks: {
markdown: ['react-markdown', 'react-syntax-highlighter', 'remark-gfm', 'remark-math', 'rehype-katex'],
markmap: ['markmap-lib', 'markmap-view', 'markmap-toolbar', 'markmap-common'],
vendor: ['react', 'react-dom', 'react-router-dom'],
},
},
},
},
server: {
host: '0.0.0.0',
port: port,

74
CLAUDE.md Normal file
View File

@@ -0,0 +1,74 @@
# CLAUDE.md
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
## Project Overview
BiliNote is an AI video note generation tool. It extracts content from video links (Bilibili, YouTube, Douyin, Kuaishou, local files) and generates structured Markdown notes using LLM models. Full-stack app with a FastAPI backend, React frontend, and optional Tauri desktop packaging.
## Development Commands
### Backend (Python 3.11 + FastAPI)
```bash
cd backend
pip install -r requirements.txt
python main.py # Starts on 0.0.0.0:8483
```
### Frontend (React 19 + Vite + TypeScript)
```bash
cd BillNote_frontend
pnpm install
pnpm dev # Dev server on port 3015, proxies /api to backend
pnpm build # Production build
pnpm lint # ESLint
```
### Docker
```bash
docker-compose up # Web stack (backend + frontend + nginx)
docker-compose -f docker-compose.gpu.yml up # GPU variant
```
### Desktop (Tauri)
```bash
cd backend && ./build.sh # Build PyInstaller backend binary
cd BillNote_frontend && pnpm tauri build
```
## Architecture
**Backend** (`backend/`) — FastAPI app, entry point `main.py`:
- `app/routers/` — API routes: `note.py` (generation), `provider.py`, `model.py`, `config.py`
- `app/services/` — Business logic: `note.py` (NoteGenerator orchestrates the full pipeline), `task_serial_executor.py` (task queue)
- `app/downloaders/` — Platform adapters (bilibili, youtube, douyin, kuaishou, local) with shared `base.py` interface
- `app/transcriber/` — Speech-to-text engines (fast-whisper, groq, bcut, kuaishou, mlx-whisper) with factory in `transcriber_provider.py`
- `app/gpt/` — LLM integration with factory pattern (`gpt_factory.py`), prompt templates (`prompt.py`, `prompt_builder.py`), and `request_chunker.py` for long transcripts
- `app/db/` — SQLite + SQLAlchemy: DAO pattern (`provider_dao.py`, `model_dao.py`, `video_task_dao.py`), models in `models/`
- `app/utils/``response.py` (ResponseWrapper for consistent JSON), `video_helper.py` (screenshots via FFmpeg), `export.py` (PDF/DOCX)
- `events/` (root level) — Blinker signal system for post-processing (e.g., temp file cleanup after transcription)
**Frontend** (`BillNote_frontend/src/`) — React 19 + Vite + Tailwind + shadcn/ui:
- `pages/HomePage/` — Main note generation UI: `NoteForm.tsx` (input), `MarkdownViewer.tsx` (preview), `MarkmapComponent.tsx` (mind map)
- `pages/SettingPage/` — LLM provider management, system monitoring, transcriber config
- `store/` — Zustand stores: `taskStore`, `modelStore`, `configStore`, `providerStore`
- `services/` — Axios API clients matching backend routes
- `hooks/useTaskPolling.ts` — Polls task status every 3 seconds
- `components/ui/` — shadcn/ui (Radix-based) components
- Path alias: `@``./src`
**Core Workflow**: User submits URL → task queued → download video → extract audio (FFmpeg) → transcribe (Whisper/Groq/etc) → generate notes (LLM) → frontend polls for completion → display Markdown + mind map.
## Key Configuration
- **Ports**: Backend 8483, Frontend dev 3015, Docker maps 3015→80
- **Environment**: Root `.env` (copy from `.env.example`). LLM API keys are configured through the UI, not env vars.
- **Database**: SQLite at `backend/app/db/bili_note.db`, auto-initialized on first run
- **FFmpeg**: Required system dependency for video/audio processing
- **Vite proxy**: Dev server proxies `/api` and `/static` to backend (configured in `vite.config.ts`, reads env from parent dir)
## Code Style
- **Frontend**: ESLint + Prettier (2 spaces, single quotes, 100 char width, Tailwind plugin). TypeScript strict mode.
- **Backend**: Python with type hints. No configured linter. Uses Pydantic models for validation.
- **Note**: The frontend directory is named `BillNote_frontend` (not "Bili").

View File

@@ -13,10 +13,19 @@ engine_args = {}
if DATABASE_URL.startswith("sqlite"):
engine_args["connect_args"] = {"check_same_thread": False}
_pool_args = {}
if not DATABASE_URL.startswith("sqlite"):
_pool_args = {
"pool_size": int(os.getenv("DB_POOL_SIZE", "10")),
"max_overflow": int(os.getenv("DB_MAX_OVERFLOW", "20")),
"pool_pre_ping": True,
}
engine = create_engine(
DATABASE_URL,
echo=os.getenv("SQLALCHEMY_ECHO", "false").lower() == "true",
**engine_args
**engine_args,
**_pool_args,
)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)

View File

@@ -26,6 +26,9 @@ class UniversalGPT(GPT):
self.max_request_bytes = int(os.getenv("OPENAI_MAX_REQUEST_BYTES", str(45 * 1024 * 1024)))
self.checkpoint_dir = Path(os.getenv("NOTE_OUTPUT_DIR", "note_results"))
self.checkpoint_dir.mkdir(parents=True, exist_ok=True)
# 初始化时缓存重试配置,避免每次请求重复读取环境变量
self._max_retry_attempts = max(1, int(os.getenv("OPENAI_RETRY_ATTEMPTS", "3")))
self._retry_base_backoff = float(os.getenv("OPENAI_RETRY_BACKOFF_SECONDS", "1.5"))
def _format_time(self, seconds: float) -> str:
return str(timedelta(seconds=int(seconds)))[2:]
@@ -176,11 +179,8 @@ class UniversalGPT(GPT):
return status in {408, 409, 429, 500, 502, 503, 504, 524}
def _chat_completion_create(self, messages: list):
max_attempts = max(1, int(os.getenv("OPENAI_RETRY_ATTEMPTS", "3")))
base_backoff = float(os.getenv("OPENAI_RETRY_BACKOFF_SECONDS", "1.5"))
last_exc = None
for attempt in range(max_attempts):
for attempt in range(self._max_retry_attempts):
try:
return self.client.chat.completions.create(
model=self.model,
@@ -189,9 +189,9 @@ class UniversalGPT(GPT):
)
except Exception as exc:
last_exc = exc
if attempt == max_attempts - 1 or not self._is_retryable_error(exc):
if attempt == self._max_retry_attempts - 1 or not self._is_retryable_error(exc):
raise
sleep_seconds = base_backoff * (2 ** attempt)
sleep_seconds = self._retry_base_backoff * (2 ** attempt)
time.sleep(sleep_seconds)
if last_exc is not None:

View File

@@ -1,13 +1,23 @@
from fastapi import APIRouter, HTTPException
import os
import platform
from pathlib import Path
from fastapi import APIRouter, HTTPException, BackgroundTasks
from pydantic import BaseModel
from typing import Optional
from app.utils.response import ResponseWrapper as R
from app.utils.logger import get_logger
from app.utils.path_helper import get_model_dir
from app.services.cookie_manager import CookieConfigManager
from app.services.transcriber_config_manager import TranscriberConfigManager
from ffmpeg_helper import ensure_ffmpeg_or_raise
logger = get_logger(__name__)
router = APIRouter()
cookie_manager = CookieConfigManager()
transcriber_config_manager = TranscriberConfigManager()
class CookieUpdateRequest(BaseModel):
@@ -32,6 +42,165 @@ def update_cookie(data: CookieUpdateRequest):
)
class TranscriberConfigRequest(BaseModel):
transcriber_type: str
whisper_model_size: Optional[str] = None
AVAILABLE_TRANSCRIBER_TYPES = [
{"value": "fast-whisper", "label": "Faster Whisper本地"},
{"value": "bcut", "label": "必剪(在线)"},
{"value": "kuaishou", "label": "快手(在线)"},
{"value": "groq", "label": "Groq在线"},
{"value": "mlx-whisper", "label": "MLX Whisper仅macOS"},
]
WHISPER_MODEL_SIZES = ["tiny", "base", "small", "medium", "large-v3", "large-v3-turbo"]
@router.get("/transcriber_config")
def get_transcriber_config():
from app.transcriber.transcriber_provider import MLX_WHISPER_AVAILABLE
config = transcriber_config_manager.get_config()
return R.success(data={
**config,
"available_types": AVAILABLE_TRANSCRIBER_TYPES,
"whisper_model_sizes": WHISPER_MODEL_SIZES,
"mlx_whisper_available": MLX_WHISPER_AVAILABLE,
})
@router.post("/transcriber_config")
def update_transcriber_config(data: TranscriberConfigRequest):
config = transcriber_config_manager.update_config(
transcriber_type=data.transcriber_type,
whisper_model_size=data.whisper_model_size,
)
return R.success(data=config)
# ---- Whisper 模型下载状态 & 下载触发 ----
# 用于跟踪正在进行的下载任务
_downloading: dict[str, str] = {} # model_size -> status ("downloading" | "done" | "failed")
def _check_whisper_model_exists(model_size: str, subdir: str = "whisper") -> bool:
"""检查指定 whisper 模型是否已下载到本地。"""
model_dir = get_model_dir(subdir)
model_path = os.path.join(model_dir, f"whisper-{model_size}")
return Path(model_path).exists()
@router.get("/transcriber_models_status")
def get_transcriber_models_status():
"""返回所有 whisper 模型的下载状态。"""
statuses = []
for size in WHISPER_MODEL_SIZES:
downloaded = _check_whisper_model_exists(size, "whisper")
download_status = _downloading.get(size)
statuses.append({
"model_size": size,
"downloaded": downloaded,
"downloading": download_status == "downloading",
})
# 也检查 mlx-whisper仅 macOS
mlx_available = platform.system() == "Darwin"
mlx_statuses = []
if mlx_available:
for size in WHISPER_MODEL_SIZES:
mlx_key = f"mlx-{size}"
model_dir = get_model_dir("mlx-whisper")
model_path = os.path.join(model_dir, f"mlx-community/whisper-{size}")
downloaded = Path(model_path).exists()
mlx_statuses.append({
"model_size": size,
"downloaded": downloaded,
"downloading": _downloading.get(mlx_key) == "downloading",
})
return R.success(data={
"whisper": statuses,
"mlx_whisper": mlx_statuses,
"mlx_available": mlx_available,
})
class ModelDownloadRequest(BaseModel):
model_size: str
transcriber_type: str = "fast-whisper" # "fast-whisper" 或 "mlx-whisper"
def _do_download_whisper(model_size: str):
"""后台下载 faster-whisper 模型。"""
from app.transcriber.whisper import MODEL_MAP
from modelscope import snapshot_download
try:
_downloading[model_size] = "downloading"
model_dir = get_model_dir("whisper")
model_path = os.path.join(model_dir, f"whisper-{model_size}")
if Path(model_path).exists():
_downloading[model_size] = "done"
return
repo_id = MODEL_MAP.get(model_size)
if not repo_id:
_downloading[model_size] = "failed"
return
logger.info(f"开始下载 whisper 模型: {model_size}")
snapshot_download(repo_id, local_dir=model_path)
logger.info(f"whisper 模型下载完成: {model_size}")
_downloading[model_size] = "done"
except Exception as e:
logger.error(f"whisper 模型下载失败: {model_size}, {e}")
_downloading[model_size] = "failed"
def _do_download_mlx_whisper(model_size: str):
"""后台下载 mlx-whisper 模型。"""
key = f"mlx-{model_size}"
try:
_downloading[key] = "downloading"
from huggingface_hub import snapshot_download as hf_download
model_dir = get_model_dir("mlx-whisper")
model_name = f"mlx-community/whisper-{model_size}"
model_path = os.path.join(model_dir, model_name)
if Path(model_path).exists():
_downloading[key] = "done"
return
logger.info(f"开始下载 mlx-whisper 模型: {model_size}")
hf_download(model_name, local_dir=model_path, local_dir_use_symlinks=False)
logger.info(f"mlx-whisper 模型下载完成: {model_size}")
_downloading[key] = "done"
except Exception as e:
logger.error(f"mlx-whisper 模型下载失败: {model_size}, {e}")
_downloading[key] = "failed"
@router.post("/transcriber_download")
def download_transcriber_model(data: ModelDownloadRequest, background_tasks: BackgroundTasks):
"""触发后台下载指定的 whisper 模型。"""
if data.model_size not in WHISPER_MODEL_SIZES:
return R.error(msg=f"不支持的模型大小: {data.model_size}")
if data.transcriber_type == "mlx-whisper":
if platform.system() != "Darwin":
return R.error(msg="MLX Whisper 仅支持 macOS")
key = f"mlx-{data.model_size}"
if _downloading.get(key) == "downloading":
return R.success(msg="模型正在下载中")
background_tasks.add_task(_do_download_mlx_whisper, data.model_size)
else:
if _downloading.get(data.model_size) == "downloading":
return R.success(msg="模型正在下载中")
background_tasks.add_task(_do_download_whisper, data.model_size)
return R.success(msg="模型下载已开始")
@router.get("/sys_health")
async def sys_health():
try:
@@ -59,9 +228,10 @@ async def deploy_status():
"gpu_name": torch.cuda.get_device_name(0) if cuda_available else None,
}
# Whisper 模型状态
model_size = os.getenv("WHISPER_MODEL_SIZE", "base")
transcriber_type = os.getenv("TRANSCRIBER_TYPE", "fast-whisper")
# Whisper 模型状态(从配置文件读取,与前端设置同步)
transcriber_cfg = transcriber_config_manager.get_config()
model_size = transcriber_cfg["whisper_model_size"]
transcriber_type = transcriber_cfg["transcriber_type"]
# FFmpeg 状态
try:

View File

@@ -101,7 +101,7 @@ def run_note_task(task_id: str, video_url: str, platform: str, quality: Download
grid_size=grid_size,
)
logger.info(f"任务进入行队列,等待执行 (task_id={task_id})")
logger.info(f"任务进入行队列 (task_id={task_id})")
note = task_serial_executor.run(_execute_note_task)
logger.info(f"Note generated: {task_id}")
if not note or not note.markdown:

View File

@@ -66,9 +66,11 @@ class NoteGenerator:
"""
def __init__(self):
self.model_size: str = "base"
from app.services.transcriber_config_manager import TranscriberConfigManager
config_manager = TranscriberConfigManager()
self.model_size: str = config_manager.get_whisper_model_size()
self.device: Optional[str] = None
self.transcriber_type: str = os.getenv("TRANSCRIBER_TYPE", "fast-whisper")
self.transcriber_type: str = config_manager.get_transcriber_type()
self.transcriber: Transcriber = self._init_transcriber()
self.video_path: Optional[Path] = None
self.video_img_urls=[]

View File

@@ -1,14 +1,23 @@
import threading
import os
from concurrent.futures import ThreadPoolExecutor, Future
from typing import Any, Callable
class SerialTaskExecutor:
def __init__(self):
self._lock = threading.Lock()
class ConcurrentTaskExecutor:
"""使用线程池并发执行任务,替代原来的串行锁。"""
def __init__(self, max_workers: int | None = None):
self._max_workers = max_workers or int(os.getenv("TASK_MAX_WORKERS", "3"))
self._pool = ThreadPoolExecutor(max_workers=self._max_workers)
def run(self, fn: Callable[..., Any], *args: Any, **kwargs: Any) -> Any:
with self._lock:
return fn(*args, **kwargs)
future: Future = self._pool.submit(fn, *args, **kwargs)
return future.result()
def shutdown(self, wait: bool = True):
self._pool.shutdown(wait=wait)
task_serial_executor = SerialTaskExecutor()
# 保持向后兼容的导出名
SerialTaskExecutor = ConcurrentTaskExecutor
task_serial_executor = ConcurrentTaskExecutor()

View File

@@ -0,0 +1,58 @@
import json
import os
from pathlib import Path
from typing import Optional, Dict, Any
class TranscriberConfigManager:
"""管理转写器配置,存储在 JSON 文件中,支持前端动态修改。"""
def __init__(self, filepath: str = "config/transcriber.json"):
self.path = Path(filepath)
self.path.parent.mkdir(parents=True, exist_ok=True)
def _read(self) -> Dict[str, Any]:
if not self.path.exists():
return {}
try:
with self.path.open("r", encoding="utf-8") as f:
return json.load(f)
except Exception:
return {}
def _write(self, data: Dict[str, Any]):
with self.path.open("w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)
def get_config(self) -> Dict[str, Any]:
"""获取当前转写器配置fallback 到环境变量默认值。"""
data = self._read()
return {
"transcriber_type": data.get(
"transcriber_type",
os.getenv("TRANSCRIBER_TYPE", "fast-whisper"),
),
"whisper_model_size": data.get(
"whisper_model_size",
os.getenv("WHISPER_MODEL_SIZE", "medium"),
),
}
def update_config(
self,
transcriber_type: str,
whisper_model_size: Optional[str] = None,
) -> Dict[str, Any]:
"""更新转写器配置并持久化。"""
data = self._read()
data["transcriber_type"] = transcriber_type
if whisper_model_size is not None:
data["whisper_model_size"] = whisper_model_size
self._write(data)
return self.get_config()
def get_transcriber_type(self) -> str:
return self.get_config()["transcriber_type"]
def get_whisper_model_size(self) -> str:
return self.get_config()["whisper_model_size"]

View File

@@ -17,15 +17,15 @@ class TranscriberType(str, Enum):
KUAISHOU = "kuaishou"
GROQ = "groq"
# 在 Apple 平台启用 MLX Whisper
# 在 Apple 平台尝试导入 MLX Whisper(不再依赖环境变量,支持前端动态切换)
MLX_WHISPER_AVAILABLE = False
if platform.system() == "Darwin" and os.environ.get("TRANSCRIBER_TYPE") == "mlx-whisper":
if platform.system() == "Darwin":
try:
from app.transcriber.mlx_whisper_transcriber import MLXWhisperTranscriber
MLX_WHISPER_AVAILABLE = True
logger.info("MLX Whisper 可用,已导入")
except ImportError:
logger.warning("MLX Whisper 导入失败,可能未安装或平台不支持")
logger.warning("MLX Whisper 导入失败,可能未安装 mlx_whisper")
logger.info('初始化转录服务提供器')
@@ -97,8 +97,10 @@ def get_transcriber(transcriber_type="fast-whisper", model_size="base", device="
elif transcriber_enum == TranscriberType.MLX_WHISPER:
if not MLX_WHISPER_AVAILABLE:
logger.warning("MLX Whisper 不可用,回退到 fast-whisper")
return get_whisper_transcriber(whisper_model_size, device=device)
raise RuntimeError(
"MLX Whisper 不可用:需要 macOS 平台并安装 mlx_whisper 包 (pip install mlx_whisper)。"
"请在「音频转写配置」页面切换到其他转写引擎。"
)
return get_mlx_whisper_transcriber(whisper_model_size)
elif transcriber_enum == TranscriberType.BCUT:

View File

@@ -3,6 +3,7 @@ import hashlib
import os
import re
import subprocess
from concurrent.futures import ThreadPoolExecutor, as_completed
import ffmpeg
from PIL import Image, ImageDraw, ImageFont
@@ -54,6 +55,18 @@ class VideoReader:
return mm * 60 + ss
return float('inf')
def _extract_single_frame(self, ts: int) -> str | None:
"""提取单帧,返回输出路径或 None失败时"""
time_label = self.format_time(ts)
output_path = os.path.join(self.frame_dir, f"frame_{time_label}.jpg")
cmd = ["ffmpeg", "-ss", str(ts), "-i", self.video_path, "-frames:v", "1", "-q:v", "2", "-y", output_path,
"-hide_banner", "-loglevel", "error"]
try:
subprocess.run(cmd, check=True)
return output_path
except subprocess.CalledProcessError:
return None
def extract_frames(self, max_frames=1000) -> list[str]:
try:
@@ -61,14 +74,22 @@ class VideoReader:
duration = float(ffmpeg.probe(self.video_path)["format"]["duration"])
timestamps = [i for i in range(0, int(duration), self.frame_interval)][:max_frames]
# 并行提取帧
max_workers = min(os.cpu_count() or 4, 8, len(timestamps))
frame_results: dict[int, str | None] = {}
with ThreadPoolExecutor(max_workers=max_workers) as pool:
futures = {pool.submit(self._extract_single_frame, ts): ts for ts in timestamps}
for future in as_completed(futures):
ts = futures[future]
frame_results[ts] = future.result()
# 按时间戳顺序整理结果,并进行去重
image_paths = []
last_hash = None
for ts in timestamps:
time_label = self.format_time(ts)
output_path = os.path.join(self.frame_dir, f"frame_{time_label}.jpg")
cmd = ["ffmpeg", "-ss", str(ts), "-i", self.video_path, "-frames:v", "1", "-q:v", "2", "-y", output_path,
"-hide_banner", "-loglevel", "error"]
subprocess.run(cmd, check=True)
output_path = frame_results.get(ts)
if not output_path or not os.path.exists(output_path):
continue
if self.dedupe_enabled:
frame_hash = self._calculate_file_md5(output_path)

View File

@@ -4,6 +4,7 @@ from contextlib import asynccontextmanager
import uvicorn
from fastapi import FastAPI
from starlette.middleware.cors import CORSMiddleware
from starlette.middleware.gzip import GZipMiddleware
from starlette.staticfiles import StaticFiles
from dotenv import load_dotenv
@@ -14,7 +15,7 @@ from app.exceptions.exception_handlers import register_exception_handlers
# from app.db.provider_dao import init_provider_table
from app.utils.logger import get_logger
from app import create_app
from app.transcriber.transcriber_provider import get_transcriber
from app.services.transcriber_config_manager import TranscriberConfigManager
from events import register_handler
from ffmpeg_helper import ensure_ffmpeg_or_raise
@@ -40,7 +41,10 @@ if not os.path.exists(out_dir):
async def lifespan(app: FastAPI):
register_handler()
init_db()
get_transcriber(transcriber_type=os.getenv("TRANSCRIBER_TYPE", "fast-whisper"))
# 转写器不再在启动时强制初始化,而是在首次生成笔记时按需创建
# 如果配置了不可用的类型(如 mlx-whisper 未安装),会在使用时报错而非静默回退
_cfg = TranscriberConfigManager().get_config()
logger.info(f"当前转写器配置: type={_cfg['transcriber_type']}, model_size={_cfg['whisper_model_size']}")
seed_default_providers()
yield
@@ -58,6 +62,7 @@ app.add_middleware(
allow_methods=["*"],
allow_headers=["*"],
)
app.add_middleware(GZipMiddleware, minimum_size=1000)
register_exception_handlers(app)
app.mount(static_path, StaticFiles(directory=static_dir), name="static")
app.mount("/uploads", StaticFiles(directory=uploads_dir), name="uploads")

View File

@@ -14,6 +14,14 @@ services:
- ./backend:/app
expose:
- "${BACKEND_PORT}" # 不再对外暴露,用于 nginx 内部通信
restart: on-failure:3
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:${BACKEND_PORT}/api/sys_health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 15s
mem_limit: 4g
frontend:
container_name: bilinote-frontend
@@ -24,6 +32,8 @@ services:
- .env
expose:
- "80" # 不暴露给宿主机,只供 nginx 访问
restart: on-failure:3
mem_limit: 512m
nginx:
container_name: bilinote-nginx
@@ -33,5 +43,9 @@ services:
volumes:
- ./nginx/default.conf:/etc/nginx/conf.d/default.conf
depends_on:
- backend
- frontend
backend:
condition: service_healthy
frontend:
condition: service_started
restart: on-failure:3
mem_limit: 256m

View File

@@ -1,6 +1,14 @@
server {
listen 80;
client_max_body_size 10G;
# gzip 压缩
gzip on;
gzip_vary on;
gzip_min_length 1024;
gzip_proxied any;
gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss text/javascript image/svg+xml;
# 所有非 /api 请求全部代理给 frontend 容器
location / {
proxy_pass http://frontend:80;
@@ -17,6 +25,7 @@ server {
proxy_pass http://backend:8483/static/;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
expires 7d;
add_header Cache-Control "public, immutable";
}
}