fix: detect login pages before rust parsing

This commit is contained in:
jxxghp
2026-06-10 01:31:39 +08:00
parent 5d5e37792e
commit 63a63d2ec6
2 changed files with 16 additions and 6 deletions

View File

@@ -929,6 +929,21 @@ class SiteSpider:
self.is_error = True
return []
try:
status_doc = PyQuery(html_text)
if self.__is_login_or_permission_page(status_doc):
self.is_error = True
logger.warn(f"错误:{self.indexername} 返回登录或权限提示页")
return []
except Exception as err:
self.is_error = True
logger.warn(f"错误:{self.indexername} {str(err)}")
return []
finally:
if 'status_doc' in locals():
status_doc.clear()
del status_doc
if self.search_type == "subtitles":
rust_subtitles = rust_accel.parse_indexer_subtitles(
html_text=html_text,
@@ -957,10 +972,6 @@ class SiteSpider:
try:
# 解析站点文本对象
html_doc = PyQuery(html_text)
if self.__is_login_or_permission_page(html_doc):
self.is_error = True
logger.warn(f"错误:{self.indexername} 返回登录或权限提示页")
return []
# 种子筛选器
torrents_selector = self.list.get('selector', '')
rows = html_doc(torrents_selector)

View File

@@ -366,11 +366,10 @@ def test_subtitle_site_spider_skips_empty_rows(monkeypatch):
assert result[0]["title"] == "The.Capture.S01"
def test_subtitle_site_spider_marks_login_page_as_error(monkeypatch):
def test_subtitle_site_spider_marks_login_page_as_error():
"""
Python 字幕解析遇到登录页时应标记站点错误,避免误判为无字幕。
"""
monkeypatch.setattr(rust_accel, "parse_indexer_subtitles", lambda **_kwargs: None)
html = """
<html><head><title>1PTBA.COM :: 登录 - Powered by NexusPHP</title></head>
<body>未登录! 错误: 该页面必须在登录后才能访问 你需要启用cookies才能登录</body></html>