fix: flag subtitle login pages

This commit is contained in:
jxxghp
2026-06-10 01:28:16 +08:00
parent 4241461ba7
commit 5d5e37792e
2 changed files with 40 additions and 0 deletions

View File

@@ -902,6 +902,25 @@ class SiteSpider:
return None
return str(value)
@staticmethod
def __is_login_or_permission_page(html_doc: Any) -> bool:
"""
判断返回内容是否是登录或权限提示页。
"""
title = (html_doc("title").text() or "").strip()
page_text = " ".join((html_doc.text() or "").split())[:1000]
if title == "登录" or ":: 登录" in title:
return True
return any(
marker in page_text
for marker in (
"未登录",
"登录 / 注册",
"必须在登录后才能访问",
"你需要启用cookies才能登录",
)
)
def parse(self, html_text: str) -> List[dict]:
"""
解析整个页面
@@ -938,6 +957,10 @@ class SiteSpider:
try:
# 解析站点文本对象
html_doc = PyQuery(html_text)
if self.__is_login_or_permission_page(html_doc):
self.is_error = True
logger.warn(f"错误:{self.indexername} 返回登录或权限提示页")
return []
# 种子筛选器
torrents_selector = self.list.get('selector', '')
rows = html_doc(torrents_selector)

View File

@@ -366,6 +366,23 @@ def test_subtitle_site_spider_skips_empty_rows(monkeypatch):
assert result[0]["title"] == "The.Capture.S01"
def test_subtitle_site_spider_marks_login_page_as_error(monkeypatch):
"""
Python 字幕解析遇到登录页时应标记站点错误,避免误判为无字幕。
"""
monkeypatch.setattr(rust_accel, "parse_indexer_subtitles", lambda **_kwargs: None)
html = """
<html><head><title>1PTBA.COM :: 登录 - Powered by NexusPHP</title></head>
<body>未登录! 错误: 该页面必须在登录后才能访问 你需要启用cookies才能登录</body></html>
"""
spider = SiteSpider(_audiences_indexer(), keyword="The.Capture", search_type="subtitles")
result = spider.parse(html)
assert result == []
assert spider.is_error
def test_subtitle_site_spider_uses_direct_nexus_row(monkeypatch):
"""
Python 字幕解析应只使用 NexusPHP 内层字幕行,避免外层布局行字段错位。