diff --git a/app/modules/indexer/spider/__init__.py b/app/modules/indexer/spider/__init__.py index 341f15a9..62dfa934 100644 --- a/app/modules/indexer/spider/__init__.py +++ b/app/modules/indexer/spider/__init__.py @@ -902,6 +902,25 @@ class SiteSpider: return None return str(value) + @staticmethod + def __is_login_or_permission_page(html_doc: Any) -> bool: + """ + 判断返回内容是否是登录或权限提示页。 + """ + title = (html_doc("title").text() or "").strip() + page_text = " ".join((html_doc.text() or "").split())[:1000] + if title == "登录" or ":: 登录" in title: + return True + return any( + marker in page_text + for marker in ( + "未登录", + "登录 / 注册", + "必须在登录后才能访问", + "你需要启用cookies才能登录", + ) + ) + def parse(self, html_text: str) -> List[dict]: """ 解析整个页面 @@ -938,6 +957,10 @@ class SiteSpider: try: # 解析站点文本对象 html_doc = PyQuery(html_text) + if self.__is_login_or_permission_page(html_doc): + self.is_error = True + logger.warn(f"错误:{self.indexername} 返回登录或权限提示页") + return [] # 种子筛选器 torrents_selector = self.list.get('selector', '') rows = html_doc(torrents_selector) diff --git a/tests/test_subtitle_search.py b/tests/test_subtitle_search.py index c40a52f7..d66c7f0a 100644 --- a/tests/test_subtitle_search.py +++ b/tests/test_subtitle_search.py @@ -366,6 +366,23 @@ def test_subtitle_site_spider_skips_empty_rows(monkeypatch): assert result[0]["title"] == "The.Capture.S01" +def test_subtitle_site_spider_marks_login_page_as_error(monkeypatch): + """ + Python 字幕解析遇到登录页时应标记站点错误,避免误判为无字幕。 + """ + monkeypatch.setattr(rust_accel, "parse_indexer_subtitles", lambda **_kwargs: None) + html = """ + 1PTBA.COM :: 登录 - Powered by NexusPHP + 未登录! 错误: 该页面必须在登录后才能访问 你需要启用cookies才能登录 + """ + spider = SiteSpider(_audiences_indexer(), keyword="The.Capture", search_type="subtitles") + + result = spider.parse(html) + + assert result == [] + assert spider.is_error + + def test_subtitle_site_spider_uses_direct_nexus_row(monkeypatch): """ Python 字幕解析应只使用 NexusPHP 内层字幕行,避免外层布局行字段错位。