diff --git a/backend/app/downloaders/bilibili_dm_patch.py b/backend/app/downloaders/bilibili_dm_patch.py new file mode 100644 index 0000000..cc4d2d6 --- /dev/null +++ b/backend/app/downloaders/bilibili_dm_patch.py @@ -0,0 +1,71 @@ +""" +Patch yt-dlp's Bilibili extractor to inject the dm_img_* / web_location +risk-control parameters required by Bilibili's wbi/playurl gateway. + +Background +---------- +Around 2026-06 Bilibili's ``x/player/wbi/playurl`` gateway began rejecting +requests that omit the browser fingerprint params +``dm_img_list`` / ``dm_img_str`` / ``dm_cover_img_str`` / ``dm_img_inter`` + +``web_location`` with **HTTP 412**. Current yt-dlp (incl. the latest release) +does not send these for the playurl endpoint, so any video whose web page does +*not* inline ``playinfo`` — forcing yt-dlp onto the API path — fails with 412. +Refreshing cookies does not help; the params themselves are missing. + +We inject dummy-but-well-formed values *before* wbi signing. The value shapes +deliberately mirror yt-dlp's own usage of the same fields for the +``x/space/wbi/arc/search`` endpoint (``BiliBiliSpaceIE``), which is the only +place upstream currently sends them. +""" +import base64 +import logging +import random +import string + +logger = logging.getLogger(__name__) + + +def build_dm_img_params() -> dict: + """Return dummy ``dm_img_*`` / ``web_location`` params the gateway expects.""" + return { + 'web_location': 1550101, + 'dm_img_list': '[]', + 'dm_img_str': base64.b64encode( + ''.join(random.choices(string.printable, k=random.randint(16, 64))).encode() + )[:-2].decode(), + 'dm_cover_img_str': base64.b64encode( + ''.join(random.choices(string.printable, k=random.randint(32, 128))).encode() + )[:-2].decode(), + 'dm_img_inter': '{"ds":[],"wh":[6093,6631,31],"of":[430,760,380]}', + } + + +def apply_bilibili_dm_img_patch() -> bool: + """ + Monkey-patch ``BilibiliBaseIE._download_playinfo`` to inject dm_img params. + + Idempotent and defensive: returns ``True`` if the patch is in place (whether + applied now or previously), ``False`` if yt-dlp's internals could not be + patched (logged, never raised — the caller stays functional). + """ + try: + from yt_dlp.extractor.bilibili import BilibiliBaseIE + except Exception as e: # yt-dlp missing or module layout changed upstream + logger.warning("Bilibili dm_img patch skipped, cannot import extractor: %s", e) + return False + + original = BilibiliBaseIE._download_playinfo + if getattr(original, '_bili_dm_patched', False): + return True + + def _patched_download_playinfo(self, bvid, cid, headers=None, query=None): + # dm_* are merged into the query that the original method signs via + # _sign_wbi; caller-supplied query params (e.g. try_look/qn) take + # precedence over the injected dummies. + merged_query = {**build_dm_img_params(), **(query or {})} + return original(self, bvid, cid, headers=headers, query=merged_query) + + _patched_download_playinfo._bili_dm_patched = True + BilibiliBaseIE._download_playinfo = _patched_download_playinfo + logger.info("Applied Bilibili wbi/playurl dm_img patch to yt-dlp BilibiliBaseIE") + return True diff --git a/backend/app/downloaders/bilibili_downloader.py b/backend/app/downloaders/bilibili_downloader.py index 0a94849..95f8e5f 100644 --- a/backend/app/downloaders/bilibili_downloader.py +++ b/backend/app/downloaders/bilibili_downloader.py @@ -8,6 +8,7 @@ from typing import Union, Optional, List import yt_dlp from app.downloaders.base import Downloader, DownloadQuality, QUALITY_MAP +from app.downloaders.bilibili_dm_patch import apply_bilibili_dm_img_patch from app.downloaders.bilibili_subtitle import BilibiliSubtitleFetcher from app.models.notes_model import AudioDownloadResult from app.models.transcriber_model import TranscriptResult, TranscriptSegment @@ -17,6 +18,11 @@ from app.services.cookie_manager import CookieConfigManager logger = logging.getLogger(__name__) +# Inject the dm_img_* / web_location risk-control params Bilibili's wbi/playurl +# gateway now requires; without them the API path returns HTTP 412. See +# app/downloaders/bilibili_dm_patch.py for details. +apply_bilibili_dm_img_patch() + class BilibiliDownloader(Downloader, ABC): def __init__(self): diff --git a/backend/tests/test_bilibili_dm_patch.py b/backend/tests/test_bilibili_dm_patch.py new file mode 100644 index 0000000..3731aa1 --- /dev/null +++ b/backend/tests/test_bilibili_dm_patch.py @@ -0,0 +1,94 @@ +""" +TDD coverage for the Bilibili wbi/playurl dm_img risk-control patch. + +Background: around 2026-06, Bilibili's `x/player/wbi/playurl` gateway began +rejecting requests that omit the browser fingerprint params +(dm_img_list / dm_img_str / dm_cover_img_str / dm_img_inter + web_location) +with HTTP 412. yt-dlp (incl. latest) does not yet send these for playurl, so +videos whose web page does not inline playinfo (forcing the API call) fail. + +These tests verify our yt-dlp monkey-patch injects those params *before* wbi +signing, and that caller-supplied query params still win. +""" +import importlib.util +import pathlib +import unittest + +ROOT = pathlib.Path(__file__).resolve().parents[1] +MODULE_PATH = ROOT / "app" / "downloaders" / "bilibili_dm_patch.py" +spec = importlib.util.spec_from_file_location("bilibili_dm_patch", MODULE_PATH) +if spec is None or spec.loader is None: + raise ImportError("bilibili_dm_patch module spec not found") +bilibili_dm_patch = importlib.util.module_from_spec(spec) +spec.loader.exec_module(bilibili_dm_patch) + +REQUIRED_KEYS = { + "web_location", + "dm_img_list", + "dm_img_str", + "dm_cover_img_str", + "dm_img_inter", +} + + +class BuildDmImgParamsTest(unittest.TestCase): + def test_contains_all_required_risk_control_keys(self): + params = bilibili_dm_patch.build_dm_img_params() + self.assertTrue(REQUIRED_KEYS.issubset(params.keys())) + + def test_web_location_is_expected_sentinel(self): + self.assertEqual(bilibili_dm_patch.build_dm_img_params()["web_location"], 1550101) + + +class ApplyPatchTest(unittest.TestCase): + def setUp(self): + try: + import yt_dlp.extractor.bilibili # noqa: F401 + except Exception as exc: # pragma: no cover - env without yt-dlp + self.skipTest(f"yt-dlp not importable: {exc}") + + def test_patch_is_idempotent(self): + from yt_dlp.extractor.bilibili import BilibiliBaseIE + + self.assertTrue(bilibili_dm_patch.apply_bilibili_dm_img_patch()) + first = BilibiliBaseIE._download_playinfo + self.assertTrue(bilibili_dm_patch.apply_bilibili_dm_img_patch()) + self.assertIs(BilibiliBaseIE._download_playinfo, first) + + def test_dm_params_reach_wbi_signing_with_caller_query_preserved(self): + from yt_dlp import YoutubeDL + from yt_dlp.extractor.bilibili import BilibiliBaseIE + + bilibili_dm_patch.apply_bilibili_dm_img_patch() + + captured = {} + + def fake_sign_wbi(params, video_id): + # Capture the exact params handed to wbi signing (just before the + # HTTP request). dm_* must already be present here, pre-signature. + captured.update(params) + return params + + def fake_download_json(url, video_id, **kwargs): + # Avoid any network; the real playurl call would 412 without dm_*. + return {"data": {"ok": True}} + + ie = BilibiliBaseIE(YoutubeDL({"quiet": True})) + ie._sign_wbi = fake_sign_wbi + ie._download_json = fake_download_json + + ie._download_playinfo("BV1X9L16oEgB", 4242, headers={}, query={"qn": 64}) + + self.assertTrue( + REQUIRED_KEYS.issubset(captured.keys()), + f"missing dm_* keys, got: {sorted(captured)}", + ) + self.assertEqual(captured["web_location"], 1550101) + # caller-supplied query must survive the merge + self.assertEqual(captured["qn"], 64) + # the original method still builds its base params + self.assertEqual(captured["bvid"], "BV1X9L16oEgB") + + +if __name__ == "__main__": + unittest.main()