mirror of
https://github.com/JefferyHcool/BiliNote.git
synced 2026-06-24 01:03:42 +08:00
Compare commits
27 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6d67e5a76a | ||
|
|
e21e329d42 | ||
|
|
55946b82ec | ||
|
|
5380f4c412 | ||
|
|
4ef3631712 | ||
|
|
4a87c5b93b | ||
|
|
3841719d5a | ||
|
|
f072996db5 | ||
|
|
9d424aef59 | ||
|
|
48568e52c6 | ||
|
|
7baaefd76e | ||
|
|
3002e311ac | ||
|
|
ad57bc5489 | ||
|
|
a7d8995f3a | ||
|
|
67486c4d66 | ||
|
|
54714faa73 | ||
|
|
f6ba347718 | ||
|
|
67253a1bf2 | ||
|
|
f79dc612fb | ||
|
|
ab9ca6a026 | ||
|
|
2ba409880e | ||
|
|
16a0dd4aec | ||
|
|
39d051cc36 | ||
|
|
71c931be55 | ||
|
|
f5bfb43619 | ||
|
|
24b4d4c57c | ||
|
|
7d4573f84e |
44
.github/workflows/claude-code-review.yml
vendored
Normal file
44
.github/workflows/claude-code-review.yml
vendored
Normal file
@@ -0,0 +1,44 @@
|
||||
name: Claude Code Review
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, synchronize, ready_for_review, reopened]
|
||||
# Optional: Only run on specific file changes
|
||||
# paths:
|
||||
# - "src/**/*.ts"
|
||||
# - "src/**/*.tsx"
|
||||
# - "src/**/*.js"
|
||||
# - "src/**/*.jsx"
|
||||
|
||||
jobs:
|
||||
claude-review:
|
||||
# Optional: Filter by PR author
|
||||
# if: |
|
||||
# github.event.pull_request.user.login == 'external-contributor' ||
|
||||
# github.event.pull_request.user.login == 'new-developer' ||
|
||||
# github.event.pull_request.author_association == 'FIRST_TIME_CONTRIBUTOR'
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: read
|
||||
issues: read
|
||||
id-token: write
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 1
|
||||
|
||||
- name: Run Claude Code Review
|
||||
id: claude-review
|
||||
uses: anthropics/claude-code-action@v1
|
||||
with:
|
||||
claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
|
||||
plugin_marketplaces: 'https://github.com/anthropics/claude-code.git'
|
||||
plugins: 'code-review@claude-code-plugins'
|
||||
prompt: '/code-review:code-review ${{ github.repository }}/pull/${{ github.event.pull_request.number }}'
|
||||
# See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md
|
||||
# or https://code.claude.com/docs/en/cli-reference for available options
|
||||
|
||||
50
.github/workflows/claude.yml
vendored
Normal file
50
.github/workflows/claude.yml
vendored
Normal file
@@ -0,0 +1,50 @@
|
||||
name: Claude Code
|
||||
|
||||
on:
|
||||
issue_comment:
|
||||
types: [created]
|
||||
pull_request_review_comment:
|
||||
types: [created]
|
||||
issues:
|
||||
types: [opened, assigned]
|
||||
pull_request_review:
|
||||
types: [submitted]
|
||||
|
||||
jobs:
|
||||
claude:
|
||||
if: |
|
||||
(github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) ||
|
||||
(github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) ||
|
||||
(github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) ||
|
||||
(github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude')))
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: read
|
||||
issues: read
|
||||
id-token: write
|
||||
actions: read # Required for Claude to read CI results on PRs
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 1
|
||||
|
||||
- name: Run Claude Code
|
||||
id: claude
|
||||
uses: anthropics/claude-code-action@v1
|
||||
with:
|
||||
claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
|
||||
|
||||
# This is an optional setting that allows Claude to read CI results on PRs
|
||||
additional_permissions: |
|
||||
actions: read
|
||||
|
||||
# Optional: Give a custom prompt to Claude. If this is not specified, Claude will perform the instructions specified in the comment that tagged it.
|
||||
# prompt: 'Update the pull request description to include a summary of changes.'
|
||||
|
||||
# Optional: Add claude_args to customize behavior and configuration
|
||||
# See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md
|
||||
# or https://code.claude.com/docs/en/cli-reference for available options
|
||||
# claude_args: '--allowed-tools Bash(gh pr *)'
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"$schema": "../node_modules/@tauri-apps/cli/config.schema.json",
|
||||
"productName": "BiliNote",
|
||||
"version": "2.4.0",
|
||||
"version": "2.4.4",
|
||||
"identifier": "com.jefferyhuang.bilinote",
|
||||
"build": {
|
||||
"frontendDist": "../dist",
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { useState, useEffect, useCallback } from 'react'
|
||||
import { useState, useEffect, useCallback, useRef } from 'react'
|
||||
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
|
||||
import { Button } from '@/components/ui/button'
|
||||
import { Badge } from '@/components/ui/badge'
|
||||
@@ -40,6 +40,9 @@ export default function Transcriber() {
|
||||
const [newModelName, setNewModelName] = useState('')
|
||||
const [newModelTarget, setNewModelTarget] = useState('')
|
||||
const [addingModel, setAddingModel] = useState(false)
|
||||
// 已提示过的下载失败 key(whisper 用 model_size,mlx 用 mlx-{size})。
|
||||
// null 表示尚未首次加载——首次加载只建立基线、不对历史失败弹窗。
|
||||
const prevFailedRef = useRef<Set<string> | null>(null)
|
||||
|
||||
// 重新拉取配置(不重置用户当前的选择),用于增删自定义模型后刷新下拉与列表
|
||||
const reloadConfig = useCallback(async () => {
|
||||
@@ -56,6 +59,23 @@ export default function Transcriber() {
|
||||
setModelStatuses(data.whisper)
|
||||
setMlxModelStatuses(data.mlx_whisper)
|
||||
setMlxAvailable(data.mlx_available)
|
||||
|
||||
// 下载失败主动提示:只对「本次新出现的失败」弹一次,避免轮询期间反复弹窗
|
||||
const failedNow = new Map<string, ModelStatus>()
|
||||
data.whisper.forEach(m => m.failed && failedNow.set(m.model_size, m))
|
||||
data.mlx_whisper.forEach(m => m.failed && failedNow.set(`mlx-${m.model_size}`, m))
|
||||
if (prevFailedRef.current === null) {
|
||||
// 首次加载:建立基线,不对进入页面前就已失败的项弹窗(仍会在列表里红字展示)
|
||||
prevFailedRef.current = new Set(failedNow.keys())
|
||||
} else {
|
||||
failedNow.forEach((m, key) => {
|
||||
if (!prevFailedRef.current!.has(key)) {
|
||||
const detail = m.error ? `:${m.error.slice(0, 120)}` : ''
|
||||
toast.error(`模型 ${m.model_size} 下载失败${detail}`, { duration: 6000 })
|
||||
}
|
||||
})
|
||||
prevFailedRef.current = new Set(failedNow.keys())
|
||||
}
|
||||
} catch {
|
||||
// 静默失败,不阻塞主流程
|
||||
}
|
||||
@@ -290,32 +310,44 @@ export default function Transcriber() {
|
||||
{currentModels.map(model => (
|
||||
<div
|
||||
key={model.model_size}
|
||||
className="flex items-center justify-between rounded-md border px-4 py-3"
|
||||
className="rounded-md border px-4 py-3"
|
||||
>
|
||||
<div className="flex items-center gap-3">
|
||||
<span className="font-medium">{model.model_size}</span>
|
||||
{model.downloaded ? (
|
||||
<Badge variant="default" className="bg-green-500 hover:bg-green-600">
|
||||
已下载
|
||||
</Badge>
|
||||
) : model.downloading ? (
|
||||
<Badge variant="secondary" className="flex items-center gap-1">
|
||||
<Loader2 className="h-3 w-3 animate-spin" />
|
||||
下载中
|
||||
</Badge>
|
||||
) : (
|
||||
<Badge variant="outline">未下载</Badge>
|
||||
<div className="flex items-center justify-between">
|
||||
<div className="flex items-center gap-3">
|
||||
<span className="font-medium">{model.model_size}</span>
|
||||
{model.downloaded ? (
|
||||
<Badge variant="default" className="bg-green-500 hover:bg-green-600">
|
||||
已下载
|
||||
</Badge>
|
||||
) : model.downloading ? (
|
||||
<Badge variant="secondary" className="flex items-center gap-1">
|
||||
<Loader2 className="h-3 w-3 animate-spin" />
|
||||
下载中
|
||||
</Badge>
|
||||
) : model.failed ? (
|
||||
<Badge variant="destructive" className="flex items-center gap-1" title={model.error}>
|
||||
<XCircle className="h-3 w-3" />
|
||||
下载失败
|
||||
</Badge>
|
||||
) : (
|
||||
<Badge variant="outline">未下载</Badge>
|
||||
)}
|
||||
</div>
|
||||
{!model.downloaded && !model.downloading && (
|
||||
<Button
|
||||
size="sm"
|
||||
variant="outline"
|
||||
onClick={() => handleDownload(model.model_size, selectedType)}
|
||||
>
|
||||
<Download className="mr-1 h-4 w-4" />
|
||||
{model.failed ? '重试' : '下载'}
|
||||
</Button>
|
||||
)}
|
||||
</div>
|
||||
{!model.downloaded && !model.downloading && (
|
||||
<Button
|
||||
size="sm"
|
||||
variant="outline"
|
||||
onClick={() => handleDownload(model.model_size, selectedType)}
|
||||
>
|
||||
<Download className="mr-1 h-4 w-4" />
|
||||
下载
|
||||
</Button>
|
||||
{model.failed && model.error && (
|
||||
<p className="mt-2 break-all text-xs text-red-500" title={model.error}>
|
||||
{model.error}
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
))}
|
||||
@@ -368,10 +400,18 @@ export default function Transcriber() {
|
||||
{status?.downloading && (
|
||||
<Loader2 className="h-3.5 w-3.5 animate-spin text-neutral-400" />
|
||||
)}
|
||||
{status?.failed && (
|
||||
<XCircle className="h-3.5 w-3.5 text-red-500" />
|
||||
)}
|
||||
</div>
|
||||
<div className="truncate text-xs text-neutral-400" title={target}>
|
||||
{target}
|
||||
</div>
|
||||
{status?.failed && status?.error && (
|
||||
<div className="truncate text-xs text-red-500" title={status.error}>
|
||||
{status.error}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
<Button
|
||||
size="sm"
|
||||
|
||||
@@ -16,6 +16,10 @@ export interface ModelStatus {
|
||||
model_size: string
|
||||
downloaded: boolean
|
||||
downloading: boolean
|
||||
/** 后台下载失败(仓库 404、网络中断、本地路径缺 model.bin 等)。后端从此字段透传 */
|
||||
failed?: boolean
|
||||
/** 下载失败时的原因(仅 failed 时存在),用于前端提示 */
|
||||
error?: string
|
||||
}
|
||||
|
||||
export interface ModelsStatusResponse {
|
||||
|
||||
30
CHANGELOG.md
30
CHANGELOG.md
@@ -2,6 +2,36 @@
|
||||
|
||||
本项目所有重要变更记录于此。格式参考 [Keep a Changelog](https://keepachangelog.com/zh-CN/1.1.0/),遵循 [语义化版本](https://semver.org/lang/zh-CN/)。
|
||||
|
||||
## [2.4.4] - 2026-06-23
|
||||
|
||||
### Security
|
||||
|
||||
- **升级 Starlette 0.46.1 → 0.47.2 修复 CVE-2025-54121**([GHSA-2c2j-9gv5-cj73](https://github.com/advisories/GHSA-2c2j-9gv5-cj73),#411):旧版在解析 multipart 表单中的大文件时,`SpooledTemporaryFile` 从内存溢写到磁盘的 rollover 在事件循环线程内**同步执行**,攻击者可借大文件上传阻塞事件循环造成拒绝服务(DoS)。新版把该写入移到线程池。FastAPI 同步升级 0.115.12 → 0.116.2(其 starlette 约束由 `<0.47.0` 放宽,以容纳修复版本),与现有 pydantic / anyio / python-multipart 均兼容。
|
||||
|
||||
## [2.4.3] - 2026-06-23
|
||||
|
||||
### Fixed
|
||||
|
||||
- **Whisper `large-v3-turbo` 模型无法下载**(#402):内置映射指向的 `Systran/faster-whisper-large-v3-turbo` 仓库已从 HuggingFace 下架(返回 401/404),点击下载会静默失败、状态一直显示「未下载」。改用社区维护的 CT2 转换版 `deepdml/faster-whisper-large-v3-turbo-ct2`(直链可达、含 `model.bin`,与 faster-whisper 的 `large-v3-turbo` 等价)。
|
||||
- **模型下载失败时前端无任何提示**(#402 衍生):`/transcriber_models_status` 此前只回传 `downloading`/`downloaded`,后台下载失败状态被丢弃。现新增 `model_download_state` 统一维护下载状态与失败原因,状态接口新增 `failed` 字段并透传 `error`;前端模型列表展示「下载失败」徽标 + 错误详情,按钮变为「重试」,并对新出现的失败弹出提示。
|
||||
|
||||
## [2.4.2] - 2026-06-17
|
||||
|
||||
### Fixed
|
||||
|
||||
- **Docker 部署打开显示 nginx 欢迎页**:`nginx/default.conf` 被 docker-compose(多容器)与 `Dockerfile.complete`(单镜像)共用,但两种模式对 `location /` 的需求相反(多容器需反代独立的 frontend 容器,单镜像需直接服务本地静态文件),导致其中一种部署方式总会回退到 nginx 默认欢迎页。现拆分为两份配置:`nginx/default.conf`(compose,反代 frontend 容器)与新增的 `nginx/standalone.conf`(单镜像,静态前端 + 本地 backend 代理);`Dockerfile.complete` 改用后者并删除 Debian 默认站点,两种部署方式均恢复正常。
|
||||
|
||||
## [2.4.1] - 2026-06-17
|
||||
|
||||
### Added
|
||||
|
||||
- **YouTube Shorts 链接支持**:后端 URL 校验与 video id 提取支持 `youtube.com/shorts/<id>` 形态,Shorts 链接可正常提交生成笔记(#381)。
|
||||
|
||||
### Fixed
|
||||
|
||||
- **B 站 412(wbi/playurl 风控)**:B 站 `x/player/wbi/playurl` 网关新增 `dm_img_list`/`dm_img_str`/`dm_cover_img_str`/`dm_img_inter` + `web_location` 风控校验,缺失即返回 HTTP 412。多数视频网页内嵌 playinfo、yt-dlp 不调此 API;而网页不内嵌 playinfo、必须走 API 的视频(如 BV1X9L16oEgB)会撞上风控,刷新 cookie 无效、yt-dlp(含最新版)上游尚未适配。现于 wbi 签名前注入哑值 dm_img 风控参数(形态对齐 yt-dlp 自身 arc/search 用法)恢复 200(#410)。
|
||||
- **B 站分 P 视频字幕取错集**:分 P 视频提交 `?p=N` 时,字幕优先链路未透传 p 参数,始终取第 1 集 cid,导致笔记内容与实际下载的 p=N 音频不一致。现从 `data.pages[N-1]` 取对应分 P 的 cid(#409)。
|
||||
|
||||
## [2.4.0] - 2026-06-07
|
||||
|
||||
### Added
|
||||
|
||||
@@ -90,9 +90,11 @@ WORKDIR /app/backend
|
||||
# 复制前端静态文件到 nginx
|
||||
COPY --from=frontend-builder /tmp/frontend/dist /usr/share/nginx/html
|
||||
|
||||
# 配置 nginx
|
||||
# 配置 nginx(单镜像版:前端静态文件 + 本地 backend 代理,见 nginx/standalone.conf)
|
||||
RUN rm -rf /etc/nginx/conf.d/default.conf
|
||||
COPY ./nginx/default.conf /etc/nginx/conf.d/default.conf
|
||||
# 删除默认 nginx site,防止 default_server 劫持 80 端口
|
||||
RUN rm -f /etc/nginx/sites-enabled/default
|
||||
COPY ./nginx/standalone.conf /etc/nginx/conf.d/default.conf
|
||||
|
||||
# 创建 supervisor 配置
|
||||
# 关键点:supervisord 默认 *不* 把自己的环境变量传给子进程。
|
||||
@@ -127,9 +129,7 @@ priority=20
|
||||
environment=BACKEND_PORT="%(ENV_BACKEND_PORT)s",BACKEND_HOST="%(ENV_BACKEND_HOST)s",TRANSCRIBER_TYPE="%(ENV_TRANSCRIBER_TYPE)s",WHISPER_MODEL_SIZE="%(ENV_WHISPER_MODEL_SIZE)s",FFMPEG_BIN_PATH="%(ENV_FFMPEG_BIN_PATH)s",HF_ENDPOINT="%(ENV_HF_ENDPOINT)s",STATIC="%(ENV_STATIC)s",OUT_DIR="%(ENV_OUT_DIR)s",DATA_DIR="%(ENV_DATA_DIR)s",NOTE_OUTPUT_DIR="%(ENV_NOTE_OUTPUT_DIR)s",DATABASE_URL="%(ENV_DATABASE_URL)s",IMAGE_BASE_URL="%(ENV_IMAGE_BASE_URL)s",ENV="%(ENV_ENV)s",GROQ_TRANSCRIBER_MODEL="%(ENV_GROQ_TRANSCRIBER_MODEL)s"
|
||||
EOF
|
||||
|
||||
# 修改 nginx 配置以使用本地 backend
|
||||
RUN sed -i 's/proxy_pass http:\/\/backend:8483/proxy_pass http:\/\/127.0.0.1:8483/g' /etc/nginx/conf.d/default.conf && \
|
||||
sed -i 's/proxy_pass http:\/\/frontend:80/proxy_pass http:\/\/127.0.0.1:8080/g' /etc/nginx/conf.d/default.conf
|
||||
# nginx/standalone.conf 已直接写好本地 backend(127.0.0.1:8483)与前端静态服务,无需再 sed 改写。
|
||||
|
||||
# 启动 supervisor
|
||||
# 推荐启动方式(覆盖默认 env):
|
||||
|
||||
13
README.md
13
README.md
@@ -3,7 +3,7 @@
|
||||
<p align="center">
|
||||
<img src="./doc/icon.svg" alt="BiliNote Banner" width="50" height="50" />
|
||||
</p>
|
||||
<h1 align="center" > BiliNote v2.4.0</h1>
|
||||
<h1 align="center" > BiliNote v2.4.4</h1>
|
||||
</div>
|
||||
|
||||
<p align="center"><i>AI 视频笔记生成工具 让 AI 为你的视频做笔记</i></p>
|
||||
@@ -386,18 +386,11 @@ docker-compose -f docker-compose.gpu.yml up --build -d
|
||||
|
||||
### Contact and Join-联系和加入社区
|
||||
|
||||
扫码加入 BiliNote 交流微信群(共 5 个群,任选一个即可;二维码会定期更新,如已失效请到 [Issues](https://github.com/JefferyHcool/BiliNote/issues) 反馈):
|
||||
扫描下方公众号二维码,关注后回复 **「交流群」** 即可获取最新的微信交流群二维码(群码会自动更新,避免过期失效):
|
||||
|
||||
<table align="center">
|
||||
<tr>
|
||||
<td align="center"><img src="./doc/wechat-group-1.png" alt="BiliNote 交流群 1" width="200" /><br/>交流群 1</td>
|
||||
<td align="center"><img src="./doc/wechat-group-2.png" alt="BiliNote 交流群 2" width="200" /><br/>交流群 2</td>
|
||||
<td align="center"><img src="./doc/wechat-group-3.png" alt="BiliNote 交流群 3" width="200" /><br/>交流群 3</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td align="center"><img src="./doc/wechat-group-4.png" alt="BiliNote 交流群 4" width="200" /><br/>交流群 4</td>
|
||||
<td align="center"><img src="./doc/wechat-group-5.png" alt="BiliNote 交流群 5" width="200" /><br/>交流群 5</td>
|
||||
<td></td>
|
||||
<td align="center"><img src="./doc/wechat-gzh.png" alt="BiliNote 公众号" width="200" /><br/>BiliNote 公众号</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
|
||||
71
backend/app/downloaders/bilibili_dm_patch.py
Normal file
71
backend/app/downloaders/bilibili_dm_patch.py
Normal file
@@ -0,0 +1,71 @@
|
||||
"""
|
||||
Patch yt-dlp's Bilibili extractor to inject the dm_img_* / web_location
|
||||
risk-control parameters required by Bilibili's wbi/playurl gateway.
|
||||
|
||||
Background
|
||||
----------
|
||||
Around 2026-06 Bilibili's ``x/player/wbi/playurl`` gateway began rejecting
|
||||
requests that omit the browser fingerprint params
|
||||
``dm_img_list`` / ``dm_img_str`` / ``dm_cover_img_str`` / ``dm_img_inter`` +
|
||||
``web_location`` with **HTTP 412**. Current yt-dlp (incl. the latest release)
|
||||
does not send these for the playurl endpoint, so any video whose web page does
|
||||
*not* inline ``playinfo`` — forcing yt-dlp onto the API path — fails with 412.
|
||||
Refreshing cookies does not help; the params themselves are missing.
|
||||
|
||||
We inject dummy-but-well-formed values *before* wbi signing. The value shapes
|
||||
deliberately mirror yt-dlp's own usage of the same fields for the
|
||||
``x/space/wbi/arc/search`` endpoint (``BiliBiliSpaceIE``), which is the only
|
||||
place upstream currently sends them.
|
||||
"""
|
||||
import base64
|
||||
import logging
|
||||
import random
|
||||
import string
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def build_dm_img_params() -> dict:
|
||||
"""Return dummy ``dm_img_*`` / ``web_location`` params the gateway expects."""
|
||||
return {
|
||||
'web_location': 1550101,
|
||||
'dm_img_list': '[]',
|
||||
'dm_img_str': base64.b64encode(
|
||||
''.join(random.choices(string.printable, k=random.randint(16, 64))).encode()
|
||||
)[:-2].decode(),
|
||||
'dm_cover_img_str': base64.b64encode(
|
||||
''.join(random.choices(string.printable, k=random.randint(32, 128))).encode()
|
||||
)[:-2].decode(),
|
||||
'dm_img_inter': '{"ds":[],"wh":[6093,6631,31],"of":[430,760,380]}',
|
||||
}
|
||||
|
||||
|
||||
def apply_bilibili_dm_img_patch() -> bool:
|
||||
"""
|
||||
Monkey-patch ``BilibiliBaseIE._download_playinfo`` to inject dm_img params.
|
||||
|
||||
Idempotent and defensive: returns ``True`` if the patch is in place (whether
|
||||
applied now or previously), ``False`` if yt-dlp's internals could not be
|
||||
patched (logged, never raised — the caller stays functional).
|
||||
"""
|
||||
try:
|
||||
from yt_dlp.extractor.bilibili import BilibiliBaseIE
|
||||
except Exception as e: # yt-dlp missing or module layout changed upstream
|
||||
logger.warning("Bilibili dm_img patch skipped, cannot import extractor: %s", e)
|
||||
return False
|
||||
|
||||
original = BilibiliBaseIE._download_playinfo
|
||||
if getattr(original, '_bili_dm_patched', False):
|
||||
return True
|
||||
|
||||
def _patched_download_playinfo(self, bvid, cid, headers=None, query=None):
|
||||
# dm_* are merged into the query that the original method signs via
|
||||
# _sign_wbi; caller-supplied query params (e.g. try_look/qn) take
|
||||
# precedence over the injected dummies.
|
||||
merged_query = {**build_dm_img_params(), **(query or {})}
|
||||
return original(self, bvid, cid, headers=headers, query=merged_query)
|
||||
|
||||
_patched_download_playinfo._bili_dm_patched = True
|
||||
BilibiliBaseIE._download_playinfo = _patched_download_playinfo
|
||||
logger.info("Applied Bilibili wbi/playurl dm_img patch to yt-dlp BilibiliBaseIE")
|
||||
return True
|
||||
@@ -8,6 +8,7 @@ from typing import Union, Optional, List
|
||||
import yt_dlp
|
||||
|
||||
from app.downloaders.base import Downloader, DownloadQuality, QUALITY_MAP
|
||||
from app.downloaders.bilibili_dm_patch import apply_bilibili_dm_img_patch
|
||||
from app.downloaders.bilibili_subtitle import BilibiliSubtitleFetcher
|
||||
from app.models.notes_model import AudioDownloadResult
|
||||
from app.models.transcriber_model import TranscriptResult, TranscriptSegment
|
||||
@@ -17,6 +18,11 @@ from app.services.cookie_manager import CookieConfigManager
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Inject the dm_img_* / web_location risk-control params Bilibili's wbi/playurl
|
||||
# gateway now requires; without them the API path returns HTTP 412. See
|
||||
# app/downloaders/bilibili_dm_patch.py for details.
|
||||
apply_bilibili_dm_img_patch()
|
||||
|
||||
|
||||
class BilibiliDownloader(Downloader, ABC):
|
||||
def __init__(self):
|
||||
|
||||
@@ -3,12 +3,13 @@
|
||||
|
||||
流程:
|
||||
1. 从 URL 提 BV id(已有 utils.url_parser.extract_video_id)
|
||||
2. GET /x/web-interface/view?bvid=BVxxx → 拿 cid
|
||||
3. GET /x/player/wbi/v2?bvid=...&cid=... → 返回 data.subtitle.subtitles[]
|
||||
2. 从 URL 提 p 参数(分 P 序号,已有 utils.url_parser.extract_bilibili_p_number)
|
||||
3. GET /x/web-interface/view?bvid=BVxxx&p=N → 拿第 N 集的 cid
|
||||
4. GET /x/player/wbi/v2?bvid=...&cid=... → 返回 data.subtitle.subtitles[]
|
||||
每条带 subtitle_url(B 站后端已经签好 auth_key 的完整地址)
|
||||
4. 按优先级(人工 zh-CN > AI zh-CN > 任意 zh > 任意非空)选一条
|
||||
5. fetch subtitle_url → JSON {body:[{from,to,content,...}]}
|
||||
6. 解析为 TranscriptResult
|
||||
5. 按优先级(人工 zh-CN > AI zh-CN > 任意 zh > 任意非空)选一条
|
||||
6. fetch subtitle_url → JSON {body:[{from,to,content,...}]}
|
||||
7. 解析为 TranscriptResult
|
||||
|
||||
AI 字幕需要登录态 cookie(SESSDATA);通过 CookieConfigManager 注入。
|
||||
"""
|
||||
@@ -20,7 +21,7 @@ import requests
|
||||
from app.models.transcriber_model import TranscriptResult, TranscriptSegment
|
||||
from app.services.cookie_manager import CookieConfigManager
|
||||
from app.utils.logger import get_logger
|
||||
from app.utils.url_parser import extract_video_id
|
||||
from app.utils.url_parser import extract_video_id, extract_bilibili_p_number, resolve_bilibili_short_url
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
@@ -45,10 +46,13 @@ class BilibiliSubtitleFetcher:
|
||||
h["Cookie"] = self._cookie
|
||||
return h
|
||||
|
||||
def _get_cid(self, bvid: str) -> Optional[int]:
|
||||
def _get_cid(self, bvid: str, p: Optional[int] = None) -> Optional[int]:
|
||||
url = "https://api.bilibili.com/x/web-interface/view"
|
||||
params = {"bvid": bvid}
|
||||
if p is not None and p >= 1:
|
||||
params["p"] = p
|
||||
try:
|
||||
resp = requests.get(url, params={"bvid": bvid}, headers=self._headers(), timeout=10)
|
||||
resp = requests.get(url, params=params, headers=self._headers(), timeout=10)
|
||||
data = resp.json()
|
||||
except Exception as e:
|
||||
logger.warning(f"获取 cid 失败: {e}")
|
||||
@@ -56,6 +60,19 @@ class BilibiliSubtitleFetcher:
|
||||
if data.get("code") != 0:
|
||||
logger.warning(f"view API 返回错误: code={data.get('code')}, msg={data.get('message')}")
|
||||
return None
|
||||
# 分 P 视频:data.pages[N-1] 对应第 N 集
|
||||
pages = data.get("data", {}).get("pages", [])
|
||||
if pages:
|
||||
if p is not None and 1 <= p <= len(pages):
|
||||
cid = pages[p - 1].get("cid")
|
||||
logger.info(f"分 P 视频: bvid={bvid} p={p} 共 {len(pages)} 集, 取第 {p} 集 cid={cid}")
|
||||
return int(cid) if cid else None
|
||||
else:
|
||||
# 没有 p 参数或 p 超出范围,取第 1 集
|
||||
cid = pages[0].get("cid")
|
||||
logger.info(f"非分 P 或 p 无效: bvid={bvid} 取第 1 集 cid={cid}")
|
||||
return int(cid) if cid else None
|
||||
# 单集视频
|
||||
cid = data.get("data", {}).get("cid")
|
||||
return int(cid) if cid else None
|
||||
|
||||
@@ -109,14 +126,21 @@ class BilibiliSubtitleFetcher:
|
||||
return None
|
||||
|
||||
def fetch_subtitles(self, video_url: str) -> Optional[TranscriptResult]:
|
||||
# 统一 resolve 短链,避免 extract_video_id 和 extract_bilibili_p_number 各 resolve 一次
|
||||
if "b23.tv" in video_url:
|
||||
video_url = resolve_bilibili_short_url(video_url) or video_url
|
||||
|
||||
bvid = extract_video_id(video_url, "bilibili")
|
||||
if not bvid:
|
||||
logger.info("无法从 URL 提取 BV id")
|
||||
return None
|
||||
|
||||
cid = self._get_cid(bvid)
|
||||
# 提取分 P 序号
|
||||
p = extract_bilibili_p_number(video_url)
|
||||
|
||||
cid = self._get_cid(bvid, p)
|
||||
if not cid:
|
||||
logger.info(f"{bvid} 没有取到 cid")
|
||||
logger.info(f"{bvid} (p={p}) 没有取到 cid")
|
||||
return None
|
||||
|
||||
subtitles = self._list_subtitles(bvid, cid)
|
||||
@@ -149,7 +173,7 @@ class BilibiliSubtitleFetcher:
|
||||
return None
|
||||
|
||||
full_text = " ".join(s.text for s in segments)
|
||||
logger.info(f"B站直拉字幕成功: {bvid} lan={lan} 共 {len(segments)} 段")
|
||||
logger.info(f"B站直拉字幕成功: {bvid} p={p} lan={lan} 共 {len(segments)} 段")
|
||||
return TranscriptResult(
|
||||
language=lan,
|
||||
full_text=full_text,
|
||||
@@ -158,6 +182,7 @@ class BilibiliSubtitleFetcher:
|
||||
"source": "bilibili_player_api",
|
||||
"bvid": bvid,
|
||||
"cid": cid,
|
||||
"p": p,
|
||||
"lan": lan,
|
||||
"ai_type": track.get("ai_type"),
|
||||
},
|
||||
|
||||
@@ -11,6 +11,7 @@ from app.utils.path_helper import get_model_dir
|
||||
|
||||
from app.services.cookie_manager import CookieConfigManager
|
||||
from app.services.transcriber_config_manager import TranscriberConfigManager
|
||||
from app.transcriber import model_download_state as dl_state
|
||||
from ffmpeg_helper import ensure_ffmpeg_or_raise
|
||||
|
||||
logger = get_logger(__name__)
|
||||
@@ -148,9 +149,9 @@ def update_proxy_config(data: ProxyConfigRequest):
|
||||
|
||||
|
||||
# ---- Whisper 模型下载状态 & 下载触发 ----
|
||||
|
||||
# 用于跟踪正在进行的下载任务
|
||||
_downloading: dict[str, str] = {} # model_size -> status ("downloading" | "done" | "failed")
|
||||
# 下载状态(downloading / done / failed + 失败原因)统一交给 model_download_state 维护,
|
||||
# 「触发下载」与「查询状态」共享同一份进程内内存态。失败原因会随状态接口透传给前端,
|
||||
# 修复 issue #402 衍生问题:原先只回传 downloading/downloaded,下载失败时前端无任何提示。
|
||||
|
||||
|
||||
def _check_whisper_model_exists(model_size: str, subdir: str = "whisper") -> bool:
|
||||
@@ -212,12 +213,7 @@ def get_transcriber_models_status():
|
||||
statuses = []
|
||||
for size in get_registry().visible_model_names():
|
||||
downloaded = _check_whisper_model_exists(size, "whisper")
|
||||
download_status = _downloading.get(size)
|
||||
statuses.append({
|
||||
"model_size": size,
|
||||
"downloaded": downloaded,
|
||||
"downloading": download_status == "downloading",
|
||||
})
|
||||
statuses.append(dl_state.status_row(size, downloaded))
|
||||
|
||||
# 也检查 mlx-whisper(仅 macOS)
|
||||
mlx_available = platform.system() == "Darwin"
|
||||
@@ -225,16 +221,12 @@ def get_transcriber_models_status():
|
||||
if mlx_available:
|
||||
from app.transcriber.mlx_whisper_transcriber import MLX_MODEL_MAP
|
||||
for size in WHISPER_MODEL_SIZES:
|
||||
mlx_key = f"mlx-{size}"
|
||||
repo_id = MLX_MODEL_MAP.get(size)
|
||||
# 用 config.json 判定,和 _check_mlx_whisper_model_exists / 加载逻辑保持一致
|
||||
downloaded = _check_mlx_whisper_model_exists(size)
|
||||
mlx_statuses.append({
|
||||
"model_size": size,
|
||||
"downloaded": downloaded,
|
||||
"downloading": _downloading.get(mlx_key) == "downloading",
|
||||
"available": repo_id is not None,
|
||||
})
|
||||
row = dl_state.status_row(size, downloaded, key=f"mlx-{size}")
|
||||
row["available"] = repo_id is not None
|
||||
mlx_statuses.append(row)
|
||||
|
||||
return R.success(data={
|
||||
"whisper": statuses,
|
||||
@@ -260,21 +252,24 @@ def _do_download_whisper(model_size: str):
|
||||
from app.transcriber.whisper_models import resolve_whisper_model, is_local_target
|
||||
|
||||
try:
|
||||
_downloading[model_size] = "downloading"
|
||||
dl_state.mark_downloading(model_size)
|
||||
model_dir = get_model_dir("whisper")
|
||||
|
||||
# 已经下好就不重复下
|
||||
if _check_whisper_model_exists(model_size, "whisper"):
|
||||
_downloading[model_size] = "done"
|
||||
dl_state.mark_done(model_size)
|
||||
return
|
||||
|
||||
target = resolve_whisper_model(model_size)
|
||||
if is_local_target(target):
|
||||
# 本地模型不下载,只校验 model.bin 是否就位
|
||||
ok = (Path(target) / "model.bin").exists()
|
||||
_downloading[model_size] = "done" if ok else "failed"
|
||||
if not ok:
|
||||
logger.warning(f"本地模型 {model_size} 路径 {target} 下没有 model.bin,无法使用")
|
||||
if ok:
|
||||
dl_state.mark_done(model_size)
|
||||
else:
|
||||
msg = f"本地模型路径 {target} 下没有 model.bin,无法使用"
|
||||
logger.warning(f"本地模型 {model_size}:{msg}")
|
||||
dl_state.mark_failed(model_size, msg)
|
||||
return
|
||||
|
||||
logger.info(f"开始下载 whisper 模型: {model_size} ← {target}")
|
||||
@@ -292,17 +287,17 @@ def _do_download_whisper(model_size: str):
|
||||
],
|
||||
)
|
||||
logger.info(f"whisper 模型下载完成: {model_size}")
|
||||
_downloading[model_size] = "done"
|
||||
dl_state.mark_done(model_size)
|
||||
except Exception as e:
|
||||
logger.error(f"whisper 模型下载失败: {model_size}, {e}")
|
||||
_downloading[model_size] = "failed"
|
||||
dl_state.mark_failed(model_size, str(e))
|
||||
|
||||
|
||||
def _do_download_mlx_whisper(model_size: str):
|
||||
"""后台下载 mlx-whisper 模型。"""
|
||||
key = f"mlx-{model_size}"
|
||||
try:
|
||||
_downloading[key] = "downloading"
|
||||
dl_state.mark_downloading(key)
|
||||
from huggingface_hub import snapshot_download as hf_download
|
||||
from app.transcriber.mlx_whisper_transcriber import resolve_mlx_repo_id
|
||||
|
||||
@@ -310,22 +305,22 @@ def _do_download_mlx_whisper(model_size: str):
|
||||
repo_id = resolve_mlx_repo_id(model_size)
|
||||
except ValueError as e:
|
||||
logger.error(str(e))
|
||||
_downloading[key] = "failed"
|
||||
dl_state.mark_failed(key, str(e))
|
||||
return
|
||||
|
||||
model_dir = get_model_dir("mlx-whisper")
|
||||
model_path = os.path.join(model_dir, repo_id)
|
||||
# 用 config.json 判定而非目录存在:半成品目录不能算「已下载」
|
||||
if (Path(model_path) / "config.json").exists():
|
||||
_downloading[key] = "done"
|
||||
dl_state.mark_done(key)
|
||||
return
|
||||
logger.info(f"开始下载 mlx-whisper 模型: {model_size} ← {repo_id}")
|
||||
hf_download(repo_id, local_dir=model_path, local_dir_use_symlinks=False)
|
||||
logger.info(f"mlx-whisper 模型下载完成: {model_size}")
|
||||
_downloading[key] = "done"
|
||||
dl_state.mark_done(key)
|
||||
except Exception as e:
|
||||
logger.error(f"mlx-whisper 模型下载失败: {model_size}, {e}")
|
||||
_downloading[key] = "failed"
|
||||
dl_state.mark_failed(key, str(e))
|
||||
|
||||
|
||||
@router.post("/transcriber_download")
|
||||
@@ -338,7 +333,7 @@ def download_transcriber_model(data: ModelDownloadRequest, background_tasks: Bac
|
||||
if platform.system() != "Darwin":
|
||||
return R.error(msg="MLX Whisper 仅支持 macOS")
|
||||
key = f"mlx-{data.model_size}"
|
||||
if _downloading.get(key) == "downloading":
|
||||
if dl_state.is_downloading(key):
|
||||
return R.success(msg="模型正在下载中")
|
||||
background_tasks.add_task(_do_download_mlx_whisper, data.model_size)
|
||||
else:
|
||||
@@ -346,7 +341,7 @@ def download_transcriber_model(data: ModelDownloadRequest, background_tasks: Bac
|
||||
from app.transcriber.whisper_models import get_registry
|
||||
if not get_registry().is_known(data.model_size):
|
||||
return R.error(msg=f"不支持的模型: {data.model_size}(请先在自定义模型中登记)")
|
||||
if _downloading.get(data.model_size) == "downloading":
|
||||
if dl_state.is_downloading(data.model_size):
|
||||
return R.success(msg="模型正在下载中")
|
||||
background_tasks.add_task(_do_download_whisper, data.model_size)
|
||||
|
||||
|
||||
75
backend/app/transcriber/model_download_state.py
Normal file
75
backend/app/transcriber/model_download_state.py
Normal file
@@ -0,0 +1,75 @@
|
||||
"""whisper / mlx 模型后台下载状态跟踪(含失败原因)。
|
||||
|
||||
routers.config 的「触发下载」与「查询状态」共享这份进程内内存态:
|
||||
- key:fast-whisper 直接用 model_size;mlx 用 "mlx-{size}" 前缀(与历史一致)
|
||||
- 状态:downloading / done / failed;failed 时另存最近一次错误原因
|
||||
|
||||
为什么抽成独立的轻量模块(仅依赖 logger):
|
||||
1) 把原先散落在 config.py 多处的字符串状态赋值收敛到一处,避免拼写漂移;
|
||||
2) 失败原因能透传到 /transcriber_models_status → 前端,修复「下载失败前端无任何
|
||||
提示、状态一直显示未下载」(issue #402 的衍生问题:原先状态接口只回传
|
||||
downloading/downloaded 两个布尔,failed 态被直接丢弃);
|
||||
3) 不引入 faster_whisper / ctranslate2 等重依赖,可被单测隔离加载。
|
||||
"""
|
||||
from typing import Dict, Optional
|
||||
|
||||
from app.utils.logger import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
DOWNLOADING = "downloading"
|
||||
DONE = "done"
|
||||
FAILED = "failed"
|
||||
|
||||
# key -> 状态字符串;key -> 最近一次失败原因(仅 failed 时有意义)
|
||||
_status: Dict[str, str] = {}
|
||||
_errors: Dict[str, str] = {}
|
||||
|
||||
|
||||
def mark_downloading(key: str) -> None:
|
||||
_status[key] = DOWNLOADING
|
||||
_errors.pop(key, None) # 重新开始下载,清掉上一次的失败原因
|
||||
|
||||
|
||||
def mark_done(key: str) -> None:
|
||||
_status[key] = DONE
|
||||
_errors.pop(key, None)
|
||||
|
||||
|
||||
def mark_failed(key: str, error: str = "") -> None:
|
||||
_status[key] = FAILED
|
||||
if error:
|
||||
_errors[key] = error
|
||||
|
||||
|
||||
def get_status(key: str) -> Optional[str]:
|
||||
return _status.get(key)
|
||||
|
||||
|
||||
def is_downloading(key: str) -> bool:
|
||||
return _status.get(key) == DOWNLOADING
|
||||
|
||||
|
||||
def get_error(key: str) -> Optional[str]:
|
||||
return _errors.get(key)
|
||||
|
||||
|
||||
def status_row(name: str, downloaded: bool, key: Optional[str] = None) -> dict:
|
||||
"""构造单个模型给前端的状态行:downloaded / downloading / failed (+error)。
|
||||
|
||||
key 默认用 name;mlx 传 "mlx-{size}"。已下载成功(downloaded=True)的模型
|
||||
一律不回传 failed/error——避免「先失败后又下好」时残留旧的错误状态。
|
||||
"""
|
||||
k = key if key is not None else name
|
||||
st = _status.get(k)
|
||||
row: dict = {
|
||||
"model_size": name,
|
||||
"downloaded": downloaded,
|
||||
"downloading": st == DOWNLOADING,
|
||||
"failed": (not downloaded) and st == FAILED,
|
||||
}
|
||||
if row["failed"]:
|
||||
err = _errors.get(k)
|
||||
if err:
|
||||
row["error"] = err
|
||||
return row
|
||||
@@ -6,7 +6,8 @@
|
||||
检测三处,用户想用命名不符合该约定的模型(比如社区微调版、或自己下到本地的模型)就接不上。
|
||||
|
||||
本模块把映射**显式化 + 可配置**(对齐 mlx_whisper_transcriber.MLX_MODEL_MAP 的模式):
|
||||
- 内置:size → Systran/faster-whisper-{size}
|
||||
- 内置:size → faster-whisper 兼容的 CT2 repo_id(多数为 Systran/faster-whisper-{size};
|
||||
turbo 用社区维护版,见 BUILTIN_WHISPER_MODELS)
|
||||
- 自定义:用户在 config/whisper_models.json 登记 {名称: "<repo_id 或本地路径>"}
|
||||
(JSON 持久化;Docker 下随 config 卷持久化)
|
||||
|
||||
@@ -22,7 +23,8 @@ from app.utils.logger import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
# 内置模型:size → faster-whisper 兼容的 HF repo_id(CTranslate2 转换版,Systran 官方维护)。
|
||||
# 内置模型:size → faster-whisper 兼容的 HF repo_id(CTranslate2 转换版)。
|
||||
# 多数档位用 Systran 官方维护的转换版;turbo 例外见下。
|
||||
BUILTIN_WHISPER_MODELS: Dict[str, str] = {
|
||||
"tiny": "Systran/faster-whisper-tiny",
|
||||
"base": "Systran/faster-whisper-base",
|
||||
@@ -31,7 +33,10 @@ BUILTIN_WHISPER_MODELS: Dict[str, str] = {
|
||||
"large-v1": "Systran/faster-whisper-large-v1",
|
||||
"large-v2": "Systran/faster-whisper-large-v2",
|
||||
"large-v3": "Systran/faster-whisper-large-v3",
|
||||
"large-v3-turbo": "Systran/faster-whisper-large-v3-turbo",
|
||||
# issue #402:Systran 没有 turbo 的 CT2 转换版(Systran/faster-whisper-large-v3-turbo
|
||||
# 在 HF 上 401/404),点下载会静默失败、状态一直「未下载」。改用社区维护的 CT2 转换版
|
||||
# (deepdml,直链可达、含 model.bin,与 faster-whisper 的 large-v3-turbo 等价)。
|
||||
"large-v3-turbo": "deepdml/faster-whisper-large-v3-turbo-ct2",
|
||||
}
|
||||
|
||||
# 前端下拉默认展示的内置档位(保持与历史 WHISPER_MODEL_SIZES 一致,不把 8 个全列出来)
|
||||
|
||||
@@ -23,8 +23,8 @@ def extract_video_id(url: str, platform: str) -> Optional[str]:
|
||||
return f"BV{match.group(1)}" if match else None
|
||||
|
||||
elif platform == "youtube":
|
||||
# 匹配 v=xxxxx 或 youtu.be/xxxxx,ID 长度通常为 11
|
||||
match = re.search(r"(?:v=|youtu\.be/)([0-9A-Za-z_-]{11})", url)
|
||||
# 匹配 v=xxxxx、youtu.be/xxxxx 或 shorts/xxxxx,ID 长度通常为 11
|
||||
match = re.search(r"(?:v=|youtu\.be/|shorts/)([0-9A-Za-z_-]{11})", url)
|
||||
return match.group(1) if match else None
|
||||
|
||||
elif platform == "douyin":
|
||||
@@ -48,3 +48,36 @@ def resolve_bilibili_short_url(short_url: str) -> Optional[str]:
|
||||
except requests.RequestException as e:
|
||||
print(f"Error resolving short URL: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def extract_bilibili_p_number(url: str) -> Optional[int]:
|
||||
"""
|
||||
从 B 站分 P 视频 URL 中提取 p 参数(分 P 序号)。
|
||||
|
||||
支持格式:
|
||||
- https://www.bilibili.com/video/BVxxx/?p=36
|
||||
- https://www.bilibili.com/video/BVxxx?p=5
|
||||
- https://b23.tv/xxxxx?p=10
|
||||
- https://www.bilibili.com/video/BVxxx/pN (尾缀形式)
|
||||
|
||||
:param url: B 站视频链接
|
||||
:return: 分 P 序号(从 1 开始),非分 P 视频返回 None
|
||||
"""
|
||||
if "b23.tv" in url:
|
||||
url = resolve_bilibili_short_url(url) or url
|
||||
|
||||
# 匹配 ?p=NNN 或 &p=NNN
|
||||
match = re.search(r'[?&]p=(\d+)', url)
|
||||
if match:
|
||||
p = int(match.group(1))
|
||||
if p >= 1:
|
||||
return p
|
||||
|
||||
# 匹配 /pN 尾缀形式(较少见)
|
||||
match = re.search(r'/p(\d+)(?:/?$|\?|&)', url)
|
||||
if match:
|
||||
p_val = int(match.group(1))
|
||||
if p_val >= 1:
|
||||
return p_val
|
||||
|
||||
return None
|
||||
|
||||
@@ -4,7 +4,7 @@ from urllib.parse import urlparse
|
||||
|
||||
SUPPORTED_PLATFORMS = {
|
||||
"bilibili": r"(https?://)?(www\.)?bilibili\.com/video/[a-zA-Z0-9]+",
|
||||
"youtube": r"(https?://)?(www\.)?(youtube\.com/watch\?v=|youtu\.be/)[\w\-]+",
|
||||
"youtube": r"(https?://)?(www\.)?(youtube\.com/(watch\?v=|shorts/)|youtu\.be/)[\w\-]+",
|
||||
"douyin": "douyin",
|
||||
"kuaishou": "kuaishou"
|
||||
}
|
||||
|
||||
@@ -29,7 +29,7 @@ distro==1.9.0
|
||||
dnspython==2.7.0
|
||||
email_validator==2.2.0
|
||||
exceptiongroup==1.2.2
|
||||
fastapi==0.115.12
|
||||
fastapi==0.116.2
|
||||
fastapi-cli==0.0.7
|
||||
faster-whisper==1.1.1
|
||||
ffmpeg-python==0.2.0
|
||||
@@ -102,7 +102,7 @@ shellingham==1.5.4
|
||||
six==1.17.0
|
||||
sniffio==1.3.1
|
||||
soupsieve==2.7
|
||||
starlette==0.46.1
|
||||
starlette==0.47.2
|
||||
sympy==1.13.1
|
||||
SQLAlchemy==2.0.41
|
||||
tenacity==9.1.2
|
||||
|
||||
94
backend/tests/test_bilibili_dm_patch.py
Normal file
94
backend/tests/test_bilibili_dm_patch.py
Normal file
@@ -0,0 +1,94 @@
|
||||
"""
|
||||
TDD coverage for the Bilibili wbi/playurl dm_img risk-control patch.
|
||||
|
||||
Background: around 2026-06, Bilibili's `x/player/wbi/playurl` gateway began
|
||||
rejecting requests that omit the browser fingerprint params
|
||||
(dm_img_list / dm_img_str / dm_cover_img_str / dm_img_inter + web_location)
|
||||
with HTTP 412. yt-dlp (incl. latest) does not yet send these for playurl, so
|
||||
videos whose web page does not inline playinfo (forcing the API call) fail.
|
||||
|
||||
These tests verify our yt-dlp monkey-patch injects those params *before* wbi
|
||||
signing, and that caller-supplied query params still win.
|
||||
"""
|
||||
import importlib.util
|
||||
import pathlib
|
||||
import unittest
|
||||
|
||||
ROOT = pathlib.Path(__file__).resolve().parents[1]
|
||||
MODULE_PATH = ROOT / "app" / "downloaders" / "bilibili_dm_patch.py"
|
||||
spec = importlib.util.spec_from_file_location("bilibili_dm_patch", MODULE_PATH)
|
||||
if spec is None or spec.loader is None:
|
||||
raise ImportError("bilibili_dm_patch module spec not found")
|
||||
bilibili_dm_patch = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(bilibili_dm_patch)
|
||||
|
||||
REQUIRED_KEYS = {
|
||||
"web_location",
|
||||
"dm_img_list",
|
||||
"dm_img_str",
|
||||
"dm_cover_img_str",
|
||||
"dm_img_inter",
|
||||
}
|
||||
|
||||
|
||||
class BuildDmImgParamsTest(unittest.TestCase):
|
||||
def test_contains_all_required_risk_control_keys(self):
|
||||
params = bilibili_dm_patch.build_dm_img_params()
|
||||
self.assertTrue(REQUIRED_KEYS.issubset(params.keys()))
|
||||
|
||||
def test_web_location_is_expected_sentinel(self):
|
||||
self.assertEqual(bilibili_dm_patch.build_dm_img_params()["web_location"], 1550101)
|
||||
|
||||
|
||||
class ApplyPatchTest(unittest.TestCase):
|
||||
def setUp(self):
|
||||
try:
|
||||
import yt_dlp.extractor.bilibili # noqa: F401
|
||||
except Exception as exc: # pragma: no cover - env without yt-dlp
|
||||
self.skipTest(f"yt-dlp not importable: {exc}")
|
||||
|
||||
def test_patch_is_idempotent(self):
|
||||
from yt_dlp.extractor.bilibili import BilibiliBaseIE
|
||||
|
||||
self.assertTrue(bilibili_dm_patch.apply_bilibili_dm_img_patch())
|
||||
first = BilibiliBaseIE._download_playinfo
|
||||
self.assertTrue(bilibili_dm_patch.apply_bilibili_dm_img_patch())
|
||||
self.assertIs(BilibiliBaseIE._download_playinfo, first)
|
||||
|
||||
def test_dm_params_reach_wbi_signing_with_caller_query_preserved(self):
|
||||
from yt_dlp import YoutubeDL
|
||||
from yt_dlp.extractor.bilibili import BilibiliBaseIE
|
||||
|
||||
bilibili_dm_patch.apply_bilibili_dm_img_patch()
|
||||
|
||||
captured = {}
|
||||
|
||||
def fake_sign_wbi(params, video_id):
|
||||
# Capture the exact params handed to wbi signing (just before the
|
||||
# HTTP request). dm_* must already be present here, pre-signature.
|
||||
captured.update(params)
|
||||
return params
|
||||
|
||||
def fake_download_json(url, video_id, **kwargs):
|
||||
# Avoid any network; the real playurl call would 412 without dm_*.
|
||||
return {"data": {"ok": True}}
|
||||
|
||||
ie = BilibiliBaseIE(YoutubeDL({"quiet": True}))
|
||||
ie._sign_wbi = fake_sign_wbi
|
||||
ie._download_json = fake_download_json
|
||||
|
||||
ie._download_playinfo("BV1X9L16oEgB", 4242, headers={}, query={"qn": 64})
|
||||
|
||||
self.assertTrue(
|
||||
REQUIRED_KEYS.issubset(captured.keys()),
|
||||
f"missing dm_* keys, got: {sorted(captured)}",
|
||||
)
|
||||
self.assertEqual(captured["web_location"], 1550101)
|
||||
# caller-supplied query must survive the merge
|
||||
self.assertEqual(captured["qn"], 64)
|
||||
# the original method still builds its base params
|
||||
self.assertEqual(captured["bvid"], "BV1X9L16oEgB")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
113
backend/tests/test_model_download_state.py
Normal file
113
backend/tests/test_model_download_state.py
Normal file
@@ -0,0 +1,113 @@
|
||||
"""Unit tests for app.transcriber.model_download_state(模型下载状态 + 失败原因跟踪)。
|
||||
|
||||
与 test_whisper_models 一样按文件路径隔离加载,并桩掉 app.utils.logger,
|
||||
避免触发 app/__init__.py(会 import faster_whisper 等重依赖)。
|
||||
"""
|
||||
import importlib.util
|
||||
import logging
|
||||
import pathlib
|
||||
import sys
|
||||
import types
|
||||
import unittest
|
||||
|
||||
ROOT = pathlib.Path(__file__).resolve().parents[1]
|
||||
MODULE_PATH = ROOT / "app" / "transcriber" / "model_download_state.py"
|
||||
|
||||
|
||||
def _load_module():
|
||||
if "app" not in sys.modules:
|
||||
app_pkg = types.ModuleType("app")
|
||||
app_pkg.__path__ = []
|
||||
sys.modules["app"] = app_pkg
|
||||
if "app.utils" not in sys.modules:
|
||||
utils_pkg = types.ModuleType("app.utils")
|
||||
utils_pkg.__path__ = []
|
||||
sys.modules["app.utils"] = utils_pkg
|
||||
if "app.utils.logger" not in sys.modules:
|
||||
logger_mod = types.ModuleType("app.utils.logger")
|
||||
logger_mod.get_logger = lambda name=None: logging.getLogger(name or "test")
|
||||
sys.modules["app.utils.logger"] = logger_mod
|
||||
spec = importlib.util.spec_from_file_location("model_download_state_under_test", MODULE_PATH)
|
||||
assert spec and spec.loader
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(mod)
|
||||
return mod
|
||||
|
||||
|
||||
ds = _load_module()
|
||||
|
||||
|
||||
class TestDownloadState(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# 模块级单例,测试间互相隔离
|
||||
ds._status.clear()
|
||||
ds._errors.clear()
|
||||
|
||||
def test_unknown_key_defaults(self):
|
||||
row = ds.status_row("tiny", downloaded=False)
|
||||
self.assertEqual(
|
||||
row,
|
||||
{"model_size": "tiny", "downloaded": False, "downloading": False, "failed": False},
|
||||
)
|
||||
self.assertNotIn("error", row)
|
||||
self.assertFalse(ds.is_downloading("tiny"))
|
||||
|
||||
def test_downloading(self):
|
||||
ds.mark_downloading("tiny")
|
||||
self.assertTrue(ds.is_downloading("tiny"))
|
||||
row = ds.status_row("tiny", downloaded=False)
|
||||
self.assertTrue(row["downloading"])
|
||||
self.assertFalse(row["failed"])
|
||||
|
||||
def test_failed_surfaces_error(self):
|
||||
ds.mark_failed("tiny", "401 Repository Not Found")
|
||||
row = ds.status_row("tiny", downloaded=False)
|
||||
self.assertTrue(row["failed"])
|
||||
self.assertFalse(row["downloading"])
|
||||
self.assertEqual(row["error"], "401 Repository Not Found")
|
||||
self.assertEqual(ds.get_error("tiny"), "401 Repository Not Found")
|
||||
|
||||
def test_failed_without_message_has_no_error_field(self):
|
||||
ds.mark_failed("tiny")
|
||||
row = ds.status_row("tiny", downloaded=False)
|
||||
self.assertTrue(row["failed"])
|
||||
self.assertNotIn("error", row)
|
||||
|
||||
def test_downloaded_overrides_failed(self):
|
||||
# 先失败后又下好:downloaded=True 时不应再回传 failed/error
|
||||
ds.mark_failed("tiny", "boom")
|
||||
row = ds.status_row("tiny", downloaded=True)
|
||||
self.assertFalse(row["failed"])
|
||||
self.assertTrue(row["downloaded"])
|
||||
self.assertNotIn("error", row)
|
||||
|
||||
def test_mark_done_clears_error(self):
|
||||
ds.mark_failed("tiny", "boom")
|
||||
ds.mark_done("tiny")
|
||||
self.assertIsNone(ds.get_error("tiny"))
|
||||
row = ds.status_row("tiny", downloaded=True)
|
||||
self.assertFalse(row["failed"])
|
||||
|
||||
def test_redownload_clears_previous_error(self):
|
||||
ds.mark_failed("tiny", "boom")
|
||||
ds.mark_downloading("tiny") # 重新开始下载
|
||||
self.assertIsNone(ds.get_error("tiny"))
|
||||
row = ds.status_row("tiny", downloaded=False)
|
||||
self.assertTrue(row["downloading"])
|
||||
self.assertFalse(row["failed"])
|
||||
self.assertNotIn("error", row)
|
||||
|
||||
def test_mlx_key_is_independent(self):
|
||||
# mlx 用 "mlx-{size}" 前缀,与 fast-whisper 的同名档位互不影响
|
||||
ds.mark_failed("mlx-tiny", "mlx boom")
|
||||
ds.mark_downloading("tiny")
|
||||
whisper_row = ds.status_row("tiny", downloaded=False)
|
||||
mlx_row = ds.status_row("tiny", downloaded=False, key="mlx-tiny")
|
||||
self.assertTrue(whisper_row["downloading"])
|
||||
self.assertFalse(whisper_row["failed"])
|
||||
self.assertTrue(mlx_row["failed"])
|
||||
self.assertEqual(mlx_row["error"], "mlx boom")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
50
backend/tests/test_video_url_support.py
Normal file
50
backend/tests/test_video_url_support.py
Normal file
@@ -0,0 +1,50 @@
|
||||
import importlib.util
|
||||
import pathlib
|
||||
import unittest
|
||||
|
||||
|
||||
ROOT = pathlib.Path(__file__).resolve().parents[1]
|
||||
|
||||
|
||||
def _load_module(name, relative_path):
|
||||
module_path = ROOT / relative_path
|
||||
spec = importlib.util.spec_from_file_location(name, module_path)
|
||||
if spec is None or spec.loader is None:
|
||||
raise ImportError(f"{name} module spec not found")
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
|
||||
|
||||
url_parser = _load_module("url_parser", pathlib.Path("app") / "utils" / "url_parser.py")
|
||||
video_url_validator = _load_module(
|
||||
"video_url_validator",
|
||||
pathlib.Path("app") / "validators" / "video_url_validator.py",
|
||||
)
|
||||
|
||||
|
||||
class TestVideoUrlSupport(unittest.TestCase):
|
||||
def test_extract_youtube_video_id_from_supported_url_shapes(self):
|
||||
expected_id = "dQw4w9WgXcQ"
|
||||
|
||||
cases = [
|
||||
f"https://www.youtube.com/watch?v={expected_id}",
|
||||
f"https://youtu.be/{expected_id}",
|
||||
f"https://www.youtube.com/shorts/{expected_id}",
|
||||
]
|
||||
|
||||
for url in cases:
|
||||
with self.subTest(url=url):
|
||||
self.assertEqual(
|
||||
url_parser.extract_video_id(url, "youtube"),
|
||||
expected_id,
|
||||
)
|
||||
|
||||
def test_accepts_youtube_shorts_url(self):
|
||||
url = "https://www.youtube.com/shorts/dQw4w9WgXcQ"
|
||||
|
||||
self.assertTrue(video_url_validator.is_supported_video_url(url))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -50,7 +50,19 @@ class TestResolve(unittest.TestCase):
|
||||
|
||||
def test_builtin_resolves_to_systran(self):
|
||||
self.assertEqual(self.reg.resolve("tiny"), "Systran/faster-whisper-tiny")
|
||||
self.assertEqual(self.reg.resolve("large-v3-turbo"), "Systran/faster-whisper-large-v3-turbo")
|
||||
|
||||
def test_large_v3_turbo_resolves_to_live_repo(self):
|
||||
# 回归 issue #402:Systran 从未发布 turbo 的 CT2 转换版,
|
||||
# 原映射 Systran/faster-whisper-large-v3-turbo 在 HF 上 401/404,
|
||||
# 导致下载静默失败、状态一直「未下载」。改用社区维护的 CT2 转换版。
|
||||
self.assertEqual(
|
||||
self.reg.resolve("large-v3-turbo"),
|
||||
"deepdml/faster-whisper-large-v3-turbo-ct2",
|
||||
)
|
||||
self.assertNotEqual(
|
||||
self.reg.resolve("large-v3-turbo"),
|
||||
"Systran/faster-whisper-large-v3-turbo",
|
||||
)
|
||||
|
||||
def test_passthrough_repo_id(self):
|
||||
# 用户直接把 HF repo_id 当 model_size 传进来(含 "/")
|
||||
|
||||
@@ -2,19 +2,18 @@ server {
|
||||
listen 80;
|
||||
client_max_body_size 10G;
|
||||
|
||||
# gzip 压缩
|
||||
gzip on;
|
||||
gzip_vary on;
|
||||
gzip_min_length 1024;
|
||||
gzip_proxied any;
|
||||
gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss text/javascript image/svg+xml;
|
||||
|
||||
# 所有非 /api 请求全部代理给 frontend 容器
|
||||
# 多容器(docker-compose)部署:前端在独立的 frontend 容器,代理过去。
|
||||
# 单镜像(Dockerfile.complete)部署请勿用本文件,改用 nginx/standalone.conf。
|
||||
location / {
|
||||
proxy_pass http://frontend:80;
|
||||
}
|
||||
|
||||
# 所有 /api 请求代理给 backend 容器
|
||||
location /api/ {
|
||||
proxy_pass http://backend:8483;
|
||||
proxy_set_header Host $host;
|
||||
|
||||
41
nginx/standalone.conf
Normal file
41
nginx/standalone.conf
Normal file
@@ -0,0 +1,41 @@
|
||||
# 单镜像(Dockerfile.complete / 一体化部署)专用 nginx 配置。
|
||||
#
|
||||
# 与 nginx/default.conf(docker-compose 多容器版)的关键区别:
|
||||
# - 前端不再由独立的 frontend 容器提供,构建产物已直接 COPY 到本镜像的
|
||||
# /usr/share/nginx/html,所以 location / 走【静态文件】而非反代 frontend;
|
||||
# - backend 与 nginx 同处一个容器,所以 /api、/static 代理到 127.0.0.1:8483。
|
||||
#
|
||||
# 注意:请勿把本文件的 location / 改成代理 frontend,否则单镜像里没有 frontend
|
||||
# 服务,会回退到 nginx 默认欢迎页。多容器(compose)请改 nginx/default.conf。
|
||||
server {
|
||||
listen 80;
|
||||
client_max_body_size 10G;
|
||||
|
||||
gzip on;
|
||||
gzip_vary on;
|
||||
gzip_min_length 1024;
|
||||
gzip_proxied any;
|
||||
gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss text/javascript image/svg+xml;
|
||||
|
||||
# 前端静态文件由本容器直接服务(构建产物已 COPY 到此目录)
|
||||
location / {
|
||||
root /usr/share/nginx/html;
|
||||
index index.html;
|
||||
try_files $uri $uri/ /index.html;
|
||||
}
|
||||
|
||||
# backend 与 nginx 同容器,代理到本地
|
||||
location /api/ {
|
||||
proxy_pass http://127.0.0.1:8483;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
}
|
||||
|
||||
location /static/ {
|
||||
proxy_pass http://127.0.0.1:8483/static/;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
expires 7d;
|
||||
add_header Cache-Control "public, immutable";
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user