mirror of
https://github.com/JefferyHcool/BiliNote.git
synced 2026-06-26 02:01:38 +08:00
feat(extension+backend): 插件直接在浏览器里抓 B 站字幕,跳过后端 download_subtitles
之前 B 站字幕优先逻辑放在后端的 BilibiliSubtitleFetcher,需要后端通过 CookieConfigManager
管理 SESSDATA cookie 才能拿 AI 字幕。这次改为:插件在用户浏览器里直接抓字幕,
天然带着用户当前登录态的 cookie;后端只负责把传过来的字幕当作转写缓存。
extension:
- 新增 logic/bilibili-subtitle.ts,调 /x/web-interface/view → /x/player/wbi/v2 → 字幕 URL JSON
· service worker fetch 走 credentials:'include',借 manifest host_permissions:'*://*/*'
自动带 .bilibili.com 域 cookie,并绕过 CORS
· 优先级:人工中文 > AI 中文 > 任意非空
- popup start() 与 background startTask() 在 platform === 'bilibili' 时先调一次抓取,
结果作为 prefetched_transcript 字段塞到 /api/generate_note payload
- types.ts GenerateRequest 增加 prefetched_transcript 字段
backend:
- VideoRequest 增加可选 prefetched_transcript: dict
- generate_note endpoint 收到时调 _persist_prefetched_transcript() 写到
NOTE_OUTPUT_DIR/<task_id>_transcript.json;NoteGenerator 的 cache-hit 逻辑天然命中,
跳过 downloader.download_subtitles 和音频转写,直接走 GPT 总结
- 字幕清洗:去掉空 segment、必要时合成 full_text、language 默认 'zh'
效果:B 站登录用户的视频,从用户点击到 GPT 拿到全文,省掉一次后端 → B 站 API 的来回,
也彻底告别了 backend 那侧的 cookie 配置心智负担。
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -2,6 +2,7 @@ import { onMessage } from 'webext-bridge/background'
|
||||
import type { Settings, TaskRecord } from '~/logic/types'
|
||||
import { DEFAULT_SETTINGS, MAX_TASKS, SETTINGS_KEY, TASKS_KEY } from '~/logic/constants'
|
||||
import { detectPlatform } from '~/logic/platform'
|
||||
import { fetchBilibiliSubtitle } from '~/logic/bilibili-subtitle'
|
||||
|
||||
// only on dev mode
|
||||
if (import.meta.hot) {
|
||||
@@ -65,6 +66,10 @@ async function startTask(url: string): Promise<{ ok: boolean, taskId?: string, e
|
||||
return { ok: false, error: '请先在设置页选择供应商与模型' }
|
||||
|
||||
const backend = settings.backendUrl.replace(/\/$/, '')
|
||||
|
||||
// B 站:先在浏览器里抓字幕(带本地登录态 cookie),随提交带过去
|
||||
const prefetched = platform === 'bilibili' ? await fetchBilibiliSubtitle(url) : null
|
||||
|
||||
try {
|
||||
const res = await fetch(`${backend}/api/generate_note`, {
|
||||
method: 'POST',
|
||||
@@ -82,6 +87,7 @@ async function startTask(url: string): Promise<{ ok: boolean, taskId?: string, e
|
||||
...(settings.screenshot ? ['screenshot'] : []),
|
||||
...(settings.link ? ['link'] : []),
|
||||
],
|
||||
prefetched_transcript: prefetched ?? undefined,
|
||||
}),
|
||||
})
|
||||
if (!res.ok)
|
||||
|
||||
125
BillNote_extension/src/logic/bilibili-subtitle.ts
Normal file
125
BillNote_extension/src/logic/bilibili-subtitle.ts
Normal file
@@ -0,0 +1,125 @@
|
||||
// 在浏览器里直接调 B 站 player API 抓字幕。
|
||||
// 因为 manifest host_permissions: '*://*/*' 覆盖 api.bilibili.com,service worker 里的
|
||||
// fetch 会自动带 .bilibili.com 域下的用户 cookie,并且绕过 CORS——AI 字幕需要登录态,
|
||||
// 这等于用用户当前浏览器的登录身份代替了 backend 那边的 SESSDATA 配置。
|
||||
//
|
||||
// 与 backend/app/downloaders/bilibili_subtitle.py 的 BilibiliSubtitleFetcher 行为对齐。
|
||||
|
||||
const UA
|
||||
= 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
|
||||
|
||||
export interface PrefetchedTranscript {
|
||||
language: string
|
||||
full_text: string
|
||||
segments: Array<{ start: number, end: number, text: string }>
|
||||
source: 'bilibili_extension'
|
||||
}
|
||||
|
||||
interface SubtitleEntry {
|
||||
lan?: string
|
||||
ai_type?: number
|
||||
subtitle_url?: string
|
||||
}
|
||||
|
||||
function extractBvid(url: string): string | null {
|
||||
const m = url.match(/BV([0-9A-Za-z]+)/)
|
||||
return m ? `BV${m[1]}` : null
|
||||
}
|
||||
|
||||
async function jsonGet<T>(url: string): Promise<T | null> {
|
||||
try {
|
||||
const res = await fetch(url, {
|
||||
credentials: 'include',
|
||||
headers: { 'User-Agent': UA, 'Referer': 'https://www.bilibili.com' },
|
||||
})
|
||||
if (!res.ok)
|
||||
return null
|
||||
return await res.json() as T
|
||||
}
|
||||
catch (e) {
|
||||
console.warn('[bilinote] B 站 API 请求失败:', url, e)
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
async function getCid(bvid: string): Promise<number | null> {
|
||||
const data = await jsonGet<{ code: number, data?: { cid?: number } }>(
|
||||
`https://api.bilibili.com/x/web-interface/view?bvid=${bvid}`,
|
||||
)
|
||||
if (!data || data.code !== 0)
|
||||
return null
|
||||
return data.data?.cid ?? null
|
||||
}
|
||||
|
||||
async function listSubtitles(bvid: string, cid: number): Promise<SubtitleEntry[]> {
|
||||
const data = await jsonGet<{
|
||||
code: number
|
||||
data?: { subtitle?: { subtitles?: SubtitleEntry[] } }
|
||||
}>(`https://api.bilibili.com/x/player/wbi/v2?bvid=${bvid}&cid=${cid}`)
|
||||
if (!data || data.code !== 0)
|
||||
return []
|
||||
return data.data?.subtitle?.subtitles ?? []
|
||||
}
|
||||
|
||||
function pickSubtitle(subtitles: SubtitleEntry[]): SubtitleEntry | null {
|
||||
if (!subtitles.length)
|
||||
return null
|
||||
const isZh = (s: SubtitleEntry) => {
|
||||
const lan = (s.lan || '').toLowerCase()
|
||||
return lan.startsWith('zh') || lan === 'ai-zh'
|
||||
}
|
||||
// 优先级:人工中文 > AI 中文 > 任意非空
|
||||
return (
|
||||
subtitles.find(s => isZh(s) && !s.ai_type)
|
||||
|| subtitles.find(s => isZh(s))
|
||||
|| subtitles[0]
|
||||
)
|
||||
}
|
||||
|
||||
function normalizeUrl(url: string): string {
|
||||
return url.startsWith('//') ? `https:${url}` : url
|
||||
}
|
||||
|
||||
interface SubtitleBody {
|
||||
body?: Array<{ from?: number, to?: number, content?: string }>
|
||||
}
|
||||
|
||||
export async function fetchBilibiliSubtitle(videoUrl: string): Promise<PrefetchedTranscript | null> {
|
||||
const bvid = extractBvid(videoUrl)
|
||||
if (!bvid)
|
||||
return null
|
||||
|
||||
const cid = await getCid(bvid)
|
||||
if (!cid)
|
||||
return null
|
||||
|
||||
const subtitles = await listSubtitles(bvid, cid)
|
||||
const track = pickSubtitle(subtitles)
|
||||
if (!track?.subtitle_url) {
|
||||
console.info(`[bilinote] B 站 ${bvid} 没找到可用字幕轨(可能未登录或视频无字幕)`)
|
||||
return null
|
||||
}
|
||||
|
||||
const sub = await jsonGet<SubtitleBody>(normalizeUrl(track.subtitle_url))
|
||||
const body = sub?.body || []
|
||||
const segments: PrefetchedTranscript['segments'] = []
|
||||
for (const item of body) {
|
||||
const text = (item.content || '').trim()
|
||||
if (!text)
|
||||
continue
|
||||
segments.push({
|
||||
start: Number(item.from || 0),
|
||||
end: Number(item.to || 0),
|
||||
text,
|
||||
})
|
||||
}
|
||||
if (!segments.length)
|
||||
return null
|
||||
|
||||
return {
|
||||
language: track.lan || 'zh',
|
||||
full_text: segments.map(s => s.text).join(' '),
|
||||
segments,
|
||||
source: 'bilibili_extension',
|
||||
}
|
||||
}
|
||||
@@ -40,6 +40,13 @@ export interface GenerateRequest {
|
||||
format?: string[]
|
||||
style?: string
|
||||
extras?: string
|
||||
// 客户端在浏览器里直接抓到的字幕,跳过后端的 download_subtitles + 音频转写
|
||||
prefetched_transcript?: {
|
||||
language: string
|
||||
full_text: string
|
||||
segments: Array<{ start: number, end: number, text: string }>
|
||||
source?: string
|
||||
}
|
||||
}
|
||||
|
||||
export interface NoteResult {
|
||||
|
||||
@@ -3,6 +3,7 @@ import { computed, onMounted, onUnmounted, ref } from 'vue'
|
||||
import { detectPlatform } from '~/logic/platform'
|
||||
import { settings, settingsReady, tasks, tasksReady, upsertTask } from '~/logic/storage'
|
||||
import { generateNote, getTaskStatus, resolveImageUrl } from '~/logic/api'
|
||||
import { fetchBilibiliSubtitle } from '~/logic/bilibili-subtitle'
|
||||
import type { TaskRecord } from '~/logic/types'
|
||||
|
||||
const tabUrl = ref<string>('')
|
||||
@@ -64,6 +65,8 @@ async function start() {
|
||||
}
|
||||
submitting.value = true
|
||||
try {
|
||||
// B 站:在用户浏览器里直接抓字幕(带本地登录态 cookie),跳过后端的 download_subtitles 与音频转写
|
||||
const prefetched = platform.value === 'bilibili' ? await fetchBilibiliSubtitle(tabUrl.value) : null
|
||||
const { task_id } = await generateNote({
|
||||
video_url: tabUrl.value,
|
||||
platform: platform.value!,
|
||||
@@ -77,6 +80,7 @@ async function start() {
|
||||
...(settings.value.screenshot ? ['screenshot'] : []),
|
||||
...(settings.value.link ? ['link'] : []),
|
||||
],
|
||||
prefetched_transcript: prefetched ?? undefined,
|
||||
})
|
||||
activeTaskId.value = task_id
|
||||
upsertTask({
|
||||
|
||||
@@ -50,6 +50,10 @@ class VideoRequest(BaseModel):
|
||||
video_understanding: Optional[bool] = False
|
||||
video_interval: Optional[int] = 0
|
||||
grid_size: Optional[list] = []
|
||||
# 客户端(如浏览器插件)已经在用户浏览器里抓到字幕,直接传给后端复用,
|
||||
# 跳过 download_subtitles 和音频转写。形如:
|
||||
# {"language": "zh", "full_text": "...", "segments": [{"start","end","text"}, ...]}
|
||||
prefetched_transcript: Optional[dict] = None
|
||||
|
||||
@field_validator("video_url")
|
||||
def validate_supported_url(cls, v):
|
||||
@@ -74,6 +78,40 @@ def save_note_to_file(task_id: str, note):
|
||||
json.dump(asdict(note), f, ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
def _persist_prefetched_transcript(task_id: str, transcript: dict) -> None:
|
||||
"""把客户端预取的字幕写到 NoteGenerator 期望的转写缓存文件里。
|
||||
|
||||
NoteGenerator.generate 会优先读 <task_id>_transcript.json,命中即跳过 download_subtitles
|
||||
与音频转写流程。要求字段:language(可空)/full_text/segments[{start,end,text}]
|
||||
"""
|
||||
segments = transcript.get("segments") or []
|
||||
cleaned_segments = []
|
||||
for s in segments:
|
||||
text = (s.get("text") or "").strip()
|
||||
if not text:
|
||||
continue
|
||||
cleaned_segments.append({
|
||||
"start": float(s.get("start", 0)),
|
||||
"end": float(s.get("end", 0)),
|
||||
"text": text,
|
||||
})
|
||||
if not cleaned_segments:
|
||||
raise ValueError("prefetched_transcript 没有可用的 segments")
|
||||
|
||||
full_text = transcript.get("full_text") or " ".join(s["text"] for s in cleaned_segments)
|
||||
payload = {
|
||||
"language": transcript.get("language") or "zh",
|
||||
"full_text": full_text,
|
||||
"segments": cleaned_segments,
|
||||
}
|
||||
|
||||
os.makedirs(NOTE_OUTPUT_DIR, exist_ok=True)
|
||||
target = os.path.join(NOTE_OUTPUT_DIR, f"{task_id}_transcript.json")
|
||||
with open(target, "w", encoding="utf-8") as f:
|
||||
json.dump(payload, f, ensure_ascii=False, indent=2)
|
||||
logger.info(f"已写入客户端预取字幕缓存: {target} ({len(cleaned_segments)} 段)")
|
||||
|
||||
|
||||
def run_note_task(task_id: str, video_url: str, platform: str, quality: DownloadQuality,
|
||||
link: bool = False, screenshot: bool = False, model_name: str = None, provider_id: str = None,
|
||||
_format: list = None, style: str = None, extras: str = None, video_understanding: bool = False,
|
||||
@@ -163,6 +201,13 @@ def generate_note(data: VideoRequest, background_tasks: BackgroundTasks):
|
||||
# 统一先写入 PENDING,表示已进入队列等待串行执行
|
||||
NoteGenerator()._update_status(task_id, TaskStatus.PENDING)
|
||||
|
||||
# 客户端已经抓好字幕的话,写到转写缓存文件,NoteGenerator 的 cache-hit 逻辑会直接用上
|
||||
if data.prefetched_transcript:
|
||||
try:
|
||||
_persist_prefetched_transcript(task_id, data.prefetched_transcript)
|
||||
except Exception as e:
|
||||
logger.warning(f"写入预取字幕失败 (task_id={task_id}): {e}")
|
||||
|
||||
background_tasks.add_task(run_note_task, task_id, data.video_url, data.platform, data.quality, data.link,
|
||||
data.screenshot, data.model_name, data.provider_id, data.format, data.style,
|
||||
data.extras, data.video_understanding, data.video_interval, data.grid_size)
|
||||
|
||||
Reference in New Issue
Block a user