mirror of
https://github.com/jxxghp/MoviePilot.git
synced 2026-06-14 04:00:51 +08:00
fix: flag subtitle login pages
This commit is contained in:
@@ -902,6 +902,25 @@ class SiteSpider:
|
||||
return None
|
||||
return str(value)
|
||||
|
||||
@staticmethod
|
||||
def __is_login_or_permission_page(html_doc: Any) -> bool:
|
||||
"""
|
||||
判断返回内容是否是登录或权限提示页。
|
||||
"""
|
||||
title = (html_doc("title").text() or "").strip()
|
||||
page_text = " ".join((html_doc.text() or "").split())[:1000]
|
||||
if title == "登录" or ":: 登录" in title:
|
||||
return True
|
||||
return any(
|
||||
marker in page_text
|
||||
for marker in (
|
||||
"未登录",
|
||||
"登录 / 注册",
|
||||
"必须在登录后才能访问",
|
||||
"你需要启用cookies才能登录",
|
||||
)
|
||||
)
|
||||
|
||||
def parse(self, html_text: str) -> List[dict]:
|
||||
"""
|
||||
解析整个页面
|
||||
@@ -938,6 +957,10 @@ class SiteSpider:
|
||||
try:
|
||||
# 解析站点文本对象
|
||||
html_doc = PyQuery(html_text)
|
||||
if self.__is_login_or_permission_page(html_doc):
|
||||
self.is_error = True
|
||||
logger.warn(f"错误:{self.indexername} 返回登录或权限提示页")
|
||||
return []
|
||||
# 种子筛选器
|
||||
torrents_selector = self.list.get('selector', '')
|
||||
rows = html_doc(torrents_selector)
|
||||
|
||||
@@ -366,6 +366,23 @@ def test_subtitle_site_spider_skips_empty_rows(monkeypatch):
|
||||
assert result[0]["title"] == "The.Capture.S01"
|
||||
|
||||
|
||||
def test_subtitle_site_spider_marks_login_page_as_error(monkeypatch):
|
||||
"""
|
||||
Python 字幕解析遇到登录页时应标记站点错误,避免误判为无字幕。
|
||||
"""
|
||||
monkeypatch.setattr(rust_accel, "parse_indexer_subtitles", lambda **_kwargs: None)
|
||||
html = """
|
||||
<html><head><title>1PTBA.COM :: 登录 - Powered by NexusPHP</title></head>
|
||||
<body>未登录! 错误: 该页面必须在登录后才能访问 你需要启用cookies才能登录</body></html>
|
||||
"""
|
||||
spider = SiteSpider(_audiences_indexer(), keyword="The.Capture", search_type="subtitles")
|
||||
|
||||
result = spider.parse(html)
|
||||
|
||||
assert result == []
|
||||
assert spider.is_error
|
||||
|
||||
|
||||
def test_subtitle_site_spider_uses_direct_nexus_row(monkeypatch):
|
||||
"""
|
||||
Python 字幕解析应只使用 NexusPHP 内层字幕行,避免外层布局行字段错位。
|
||||
|
||||
Reference in New Issue
Block a user