Files
BiliNote/backend/app/utils/url_parser.py
wmsdsb137 2ba409880e fix(bilibili): 修正分P视频字幕优先链路未传p参数导致取错集
问题:B站分P视频(如62集课程),提交?p=36链接时,
字幕优先链路通过x/web-interface/view API拿cid时未传p参数,
默认取第1集cid,导致生成的是第1集的笔记。
同时yt-dlp正确下载了p36音频,但被跳过。

修复:
- url_parser新增extract_bilibili_p_number()提取URL中的p参数
- bilibili_subtitle的_get_cid()接收p参数,从data.pages[p-1]取对应分P的cid
- fetch_subtitles()调用extract_bilibili_p_number()透传p
2026-06-16 20:54:36 +08:00

82 lines
2.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import re
from typing import Optional, Tuple
import requests
def extract_video_id(url: str, platform: str) -> Optional[str]:
"""
从视频链接中提取视频 ID
:param url: 视频链接
:param platform: 平台名bilibili / youtube / douyin
:return: 提取到的视频 ID 或 None
"""
if platform == "bilibili":
# 如果是短链接,则解析真实链接
if "b23.tv" in url:
resolved_url = resolve_bilibili_short_url(url)
if resolved_url:
url = resolved_url
# 匹配 BV号如 BV1vc411b7Wa
match = re.search(r"BV([0-9A-Za-z]+)", url)
return f"BV{match.group(1)}" if match else None
elif platform == "youtube":
# 匹配 v=xxxxx 或 youtu.be/xxxxxID 长度通常为 11
match = re.search(r"(?:v=|youtu\.be/)([0-9A-Za-z_-]{11})", url)
return match.group(1) if match else None
elif platform == "douyin":
# 匹配 douyin.com/video/1234567890123456789
match = re.search(r"/video/(\d+)", url)
return match.group(1) if match else None
return None
def resolve_bilibili_short_url(short_url: str) -> Optional[str]:
"""
解析哔哩哔哩短链接以获取真实视频链接
:param short_url: Bilibili短链接"https://b23.tv/xxxxxx"
:return: 真实的视频链接或None
"""
try:
response = requests.head(short_url, allow_redirects=True)
return response.url
except requests.RequestException as e:
print(f"Error resolving short URL: {e}")
return None
def extract_bilibili_p_number(url: str) -> Optional[int]:
"""
从 B 站分 P 视频 URL 中提取 p 参数(分 P 序号)。
支持格式:
- https://www.bilibili.com/video/BVxxx/?p=36
- https://www.bilibili.com/video/BVxxx?p=5
- https://b23.tv/xxxxx?p=10
- https://www.bilibili.com/video/BVxxx/pN (尾缀形式)
:param url: B 站视频链接
:return: 分 P 序号(从 1 开始),非分 P 视频返回 None
"""
if "b23.tv" in url:
url = resolve_bilibili_short_url(url) or url
# 匹配 ?p=NNN 或 &p=NNN
match = re.search(r'[?&]p=(\d+)', url)
if match:
p = int(match.group(1))
if p >= 1:
return p
# 匹配 /pN 尾缀形式(较少见)
match = re.search(r'/p(\d+)(?:/?$|\?|&)', url)
if match:
return int(match.group(1))
return None