mirror of
https://github.com/jxxghp/MoviePilot.git
synced 2026-06-20 15:10:22 +08:00
379 lines
13 KiB
Python
379 lines
13 KiB
Python
from urllib.parse import parse_qs, urlparse
|
||
from unittest.mock import patch
|
||
|
||
from app.modules.indexer.spider import SiteSpider
|
||
from app.modules.indexer.spider.haidan import HaiDanSpider
|
||
from app.schemas.types import MediaType
|
||
|
||
|
||
def _build_indexer(**kwargs):
|
||
"""
|
||
构造 SiteSpider 生成搜索 URL 所需的最小站点配置。
|
||
"""
|
||
indexer = {
|
||
"id": "test",
|
||
"name": "测试站点",
|
||
"domain": "https://example.com/",
|
||
"search": {
|
||
"paths": [{"path": "torrents.php"}],
|
||
"params": {"search": "{keyword}"},
|
||
},
|
||
"torrents": {"list": {}, "fields": {}},
|
||
}
|
||
indexer.update(kwargs)
|
||
return indexer
|
||
|
||
|
||
def _get_search_url(indexer: dict, keyword: str | list[str], mtype: MediaType = None) -> str:
|
||
"""
|
||
调用 SiteSpider 私有 URL 构造逻辑,避免真实请求站点。
|
||
"""
|
||
spider = SiteSpider(indexer=indexer, keyword=keyword, mtype=mtype)
|
||
return spider._SiteSpider__get_search_url()
|
||
|
||
|
||
def _get_haidan_params(keyword: str | None, mtype: MediaType = None) -> dict:
|
||
"""
|
||
调用 HaiDanSpider 私有参数构造逻辑,避免真实请求站点。
|
||
"""
|
||
spider = HaiDanSpider(indexer={"domain": "https://www.haidan.video/", "name": "海胆"})
|
||
params = parse_qs(spider._HaiDanSpider__get_params(keyword, mtype), keep_blank_values=True)
|
||
return {key: values[0] for key, values in params.items()}
|
||
|
||
|
||
def test_eastgame_imdb_search_uses_imdb_area():
|
||
"""
|
||
TLF 支持 IMDb ID 搜索时应使用站点配置的 IMDb 搜索区域。
|
||
"""
|
||
indexer = _build_indexer(
|
||
id="eastgame",
|
||
domain="https://pt.eastgame.org/",
|
||
search={
|
||
"paths": [{"path": "torrents.php"}],
|
||
"params": {
|
||
"search_area": 4,
|
||
"search": "{keyword}",
|
||
},
|
||
},
|
||
)
|
||
|
||
parsed_url = urlparse(_get_search_url(indexer, "tt16311594"))
|
||
query = parse_qs(parsed_url.query)
|
||
|
||
assert parsed_url.geturl().startswith("https://pt.eastgame.org/torrents.php?")
|
||
assert query["search"] == ["tt16311594"]
|
||
assert query["search_area"] == ["4"]
|
||
|
||
|
||
def test_eastgame_title_search_keeps_title_area():
|
||
"""
|
||
TLF 普通标题搜索不应误用 IMDb 搜索区域。
|
||
"""
|
||
indexer = _build_indexer(
|
||
id="eastgame",
|
||
domain="https://pt.eastgame.org/",
|
||
search={
|
||
"paths": [{"path": "torrents.php"}],
|
||
"params": {
|
||
"search_area": 4,
|
||
"search": "{keyword}",
|
||
},
|
||
},
|
||
)
|
||
|
||
query = parse_qs(urlparse(_get_search_url(indexer, "普通标题")).query)
|
||
|
||
assert query["search"] == ["普通标题"]
|
||
assert query["search_area"] == ["0"]
|
||
|
||
|
||
def test_eastgame_batch_search_keeps_title_area():
|
||
"""
|
||
TLF 批量搜索不是单个 IMDb ID,不能触发 IMDb 搜索区域。
|
||
"""
|
||
indexer = _build_indexer(
|
||
id="eastgame",
|
||
domain="https://pt.eastgame.org/",
|
||
search={
|
||
"paths": [{"path": "torrents.php"}],
|
||
"params": {
|
||
"search_area": 4,
|
||
"search": "{keyword}",
|
||
},
|
||
},
|
||
)
|
||
|
||
query = parse_qs(urlparse(_get_search_url(indexer, ["tt1234567", "tt7654321"])).query)
|
||
|
||
assert query["search"] == ["tt1234567 tt7654321"]
|
||
assert query["search_mode"] == ["1"]
|
||
assert query["search_area"] == ["0"]
|
||
|
||
|
||
def test_ttg_imdb_search_formats_keyword_and_keeps_existing_query():
|
||
"""
|
||
TTG 的 IMDb 搜索需要 tt 前缀转换,并且路径自带查询参数不能生成双问号。
|
||
"""
|
||
indexer = _build_indexer(
|
||
id="ttg",
|
||
domain="https://totheglory.im/",
|
||
search={
|
||
"paths": [{"path": "browse.php?c=M"}],
|
||
"params": {
|
||
"search_field": "{keyword}",
|
||
"c": "M",
|
||
},
|
||
"imdbid_format": "imdb{imdbid_num}",
|
||
},
|
||
category={
|
||
"field": "search_field",
|
||
"delimiter": " 分类:",
|
||
"movie": [{"id": "电影DVDRip", "cat": "Movies/SD"}],
|
||
},
|
||
)
|
||
|
||
search_url = _get_search_url(indexer, "tt0049406", MediaType.MOVIE)
|
||
query = parse_qs(urlparse(search_url).query)
|
||
|
||
assert search_url.count("?") == 1
|
||
assert query["c"] == ["M"]
|
||
assert query["search_field"] == ["imdb0049406 分类:电影DVDRip"]
|
||
|
||
|
||
def test_ttg_title_search_does_not_format_keyword():
|
||
"""
|
||
TTG 普通标题搜索不能被 IMDb ID 格式化规则影响。
|
||
"""
|
||
indexer = _build_indexer(
|
||
id="ttg",
|
||
domain="https://totheglory.im/",
|
||
search={
|
||
"paths": [{"path": "browse.php?c=M"}],
|
||
"params": {
|
||
"search_field": "{keyword}",
|
||
"c": "M",
|
||
},
|
||
"imdbid_format": "imdb{imdbid_num}",
|
||
},
|
||
category={
|
||
"field": "search_field",
|
||
"delimiter": " 分类:",
|
||
"movie": [{"id": "电影DVDRip", "cat": "Movies/SD"}],
|
||
},
|
||
)
|
||
|
||
query = parse_qs(urlparse(_get_search_url(indexer, "The Movie", MediaType.MOVIE)).query)
|
||
|
||
assert query["search_field"] == ["The Movie 分类:电影DVDRip"]
|
||
|
||
|
||
def test_haidan_empty_keyword_uses_blank_search_value():
|
||
"""
|
||
海胆空关键词浏览不能把 Python None 编码进 search 参数。
|
||
"""
|
||
params = _get_haidan_params(None)
|
||
|
||
assert params["search"] == ""
|
||
assert params["search_area"] == "0"
|
||
|
||
|
||
def test_python_spider_remove_does_not_pollute_other_fields():
|
||
"""
|
||
Python fallback 解析带 remove 的字段时不能影响同一行后续字段选择。
|
||
"""
|
||
indexer = _build_indexer(
|
||
torrents={
|
||
"list": {"selector": "table.torrents > tr"},
|
||
"fields": {
|
||
"title": {"selector": "a.title"},
|
||
"description": {
|
||
"selector": "td.desc",
|
||
"remove": "span.noise",
|
||
},
|
||
"imdbid": {"selector": "span.noise"},
|
||
},
|
||
},
|
||
)
|
||
html = """
|
||
<table class="torrents">
|
||
<tr>
|
||
<td><a class="title">Movie.Title</a></td>
|
||
<td class="desc">Main description <span class="noise">tt1234567</span></td>
|
||
</tr>
|
||
</table>
|
||
"""
|
||
|
||
with patch("app.modules.indexer.spider.rust_accel.parse_indexer_torrents", return_value=None):
|
||
result = SiteSpider(indexer).parse(html)
|
||
|
||
assert result == [{
|
||
"title": "Movie.Title",
|
||
"description": "Main description",
|
||
"imdbid": "tt1234567",
|
||
}]
|
||
|
||
|
||
def test_python_spider_parses_nexus_php_occurrence_time_cell():
|
||
"""
|
||
Python 兜底解析应兼容 NexusPHP 发生时间模式下没有 span 的时间单元格。
|
||
"""
|
||
indexer = _build_indexer(
|
||
torrents={
|
||
"list": {"selector": 'table.torrents > tr:has("table.torrentname")'},
|
||
"fields": {
|
||
"title": {"selector": 'a[href*="details.php?id="]'},
|
||
"date_elapsed": {"selector": "td:nth-child(4) > span", "optional": True},
|
||
"date_added": {
|
||
"selector": "td:nth-child(4) > span",
|
||
"attribute": "title",
|
||
"optional": True,
|
||
},
|
||
"date": {
|
||
"text": "{% if fields['date_elapsed'] or fields['date_added'] %}"
|
||
"{{ fields['date_elapsed'] if fields['date_elapsed'] else fields['date_added'] }}"
|
||
"{% else %}now{% endif %}",
|
||
"filters": [{"name": "dateparse", "args": "%Y-%m-%d %H:%M:%S"}],
|
||
},
|
||
},
|
||
},
|
||
)
|
||
html = """
|
||
<table class="torrents">
|
||
<tr>
|
||
<td></td>
|
||
<td><table class="torrentname"><tr><td><a href="details.php?id=1">Movie.Title</a></td></tr></table></td>
|
||
<td></td>
|
||
<td class="rowfollow nowrap">2025-05-01<br/>12:13:14</td>
|
||
</tr>
|
||
</table>
|
||
"""
|
||
|
||
with patch("app.modules.indexer.spider.rust_accel.parse_indexer_torrents", return_value=None):
|
||
result = SiteSpider(indexer).parse(html)
|
||
|
||
assert result[0]["pubdate"] == "2025-05-01 12:13:14"
|
||
|
||
|
||
def test_python_spider_does_not_use_relative_date_as_pubdate():
|
||
"""
|
||
Python 兜底解析不能把相对时间写入 pubdate。
|
||
"""
|
||
indexer = _build_indexer(
|
||
torrents={
|
||
"list": {"selector": "table.torrents > tr"},
|
||
"fields": {
|
||
"title": {"selector": "a.title"},
|
||
"date_elapsed": {"selector": "span.elapsed"},
|
||
"date": {
|
||
"text": "{% if fields['date_elapsed'] or fields['date_added'] %}"
|
||
"{{ fields['date_elapsed'] if fields['date_elapsed'] else fields['date_added'] }}"
|
||
"{% else %}now{% endif %}",
|
||
"filters": [{"name": "dateparse", "args": "%Y-%m-%d %H:%M:%S"}],
|
||
},
|
||
},
|
||
},
|
||
)
|
||
html = """
|
||
<table class="torrents">
|
||
<tr><td><a class="title">Movie.Title</a><span class="elapsed">1小时</span></td></tr>
|
||
</table>
|
||
"""
|
||
|
||
with patch("app.modules.indexer.spider.rust_accel.parse_indexer_torrents", return_value=None):
|
||
result = SiteSpider(indexer).parse(html)
|
||
|
||
assert "pubdate" not in result[0] or result[0]["pubdate"] is None
|
||
|
||
|
||
def test_python_spider_does_not_use_invalid_date_as_pubdate():
|
||
"""
|
||
Python 兜底解析不能把列错位的无效日期写入 pubdate。
|
||
"""
|
||
indexer = _build_indexer(
|
||
torrents={
|
||
"list": {"selector": "table.torrents > tr"},
|
||
"fields": {
|
||
"title": {"selector": "a.title"},
|
||
"date_added": {"selector": "td:nth-child(4) > span", "attribute": "title"},
|
||
"date_elapsed": {"selector": "td:nth-child(4) > span"},
|
||
"date": {
|
||
"text": "{% if fields['date_elapsed'] or fields['date_added'] %}"
|
||
"{{ fields['date_elapsed'] if fields['date_elapsed'] else fields['date_added'] }}"
|
||
"{% else %}now{% endif %}",
|
||
"filters": [{"name": "dateparse", "args": "%Y-%m-%d %H:%M:%S"}],
|
||
},
|
||
},
|
||
},
|
||
)
|
||
html = """
|
||
<table class="torrents">
|
||
<tr>
|
||
<td><a class="title">Movie.Title</a></td>
|
||
<td></td>
|
||
<td></td>
|
||
<td>0</td>
|
||
</tr>
|
||
</table>
|
||
"""
|
||
|
||
with patch("app.modules.indexer.spider.rust_accel.parse_indexer_torrents", return_value=None):
|
||
result = SiteSpider(indexer).parse(html)
|
||
|
||
assert "pubdate" not in result[0] or result[0]["pubdate"] is None
|
||
|
||
|
||
def test_nexus_php_subtitle_table_parse_extracts_common_fields():
|
||
"""
|
||
NexusPHP 字幕表格应解析出下载链接、语言、标题、时间、大小、点击、上传者等字段。
|
||
"""
|
||
indexer = _build_indexer(
|
||
subtitles={
|
||
"search": {
|
||
"paths": [{"path": "subtitles.php?search={keyword}&lang_id=0"}],
|
||
},
|
||
"list": {"selector": "table tr:has(td.rowfollow)"},
|
||
"fields": {
|
||
"language": {"selector": "td:nth-child(1) img", "attribute": "title"},
|
||
"language_icon": {"selector": "td:nth-child(1) img", "attribute": "src"},
|
||
"title": {"selector": "td:nth-child(2) a"},
|
||
"download": {"selector": "td:nth-child(2) a", "attribute": "href"},
|
||
"date_added": {"selector": "td:nth-child(3) span", "attribute": "title"},
|
||
"date_elapsed": {"selector": "td:nth-child(3) span"},
|
||
"size": {"selector": "td:nth-child(4)"},
|
||
"grabs": {"selector": "td:nth-child(5)"},
|
||
"uploader": {"selector": "td:nth-child(6)"},
|
||
"report": {"selector": "td:nth-child(7) a", "attribute": "href"},
|
||
},
|
||
},
|
||
)
|
||
html = """
|
||
<table width="940" border="1" cellspacing="0" cellpadding="5">
|
||
<tbody><tr><td class="colhead">语言</td><td width="100%" class="colhead" align="center">标题</td></tr>
|
||
<tr><td class="rowfollow" align="center" valign="middle"><img border="0" src="pic/flag/japan.gif" alt="日本語" title="日本語"></td>
|
||
<td class="rowfollow" align="left"><a href="downloadsubs.php?torrentid=514068&subid=2179">739437-second-to-last-love-s03-2025-1080p-fod-web-dl-aac20-h264-magicstar-japanese-subtitle</a></td>
|
||
<td class="rowfollow" align="center"><nobr><span title="2026-03-17 19:48:55">2月23天</span></nobr></td>
|
||
<td class="rowfollow" align="center">233.19 KB</td>
|
||
<td class="rowfollow" align="center">0</td>
|
||
<td class="rowfollow" align="center"><i>匿名</i></td>
|
||
<td class="rowfollow" align="center"><a href="report.php?subtitle=2179"><img class="f_report" src="pic/trans.gif" alt="Report" title="举报该字幕"></a></td>
|
||
</tr>
|
||
</tbody></table>
|
||
"""
|
||
|
||
result = SiteSpider(indexer, keyword="love", search_type="subtitles").parse(html)
|
||
|
||
assert result == [{
|
||
"title": "739437-second-to-last-love-s03-2025-1080p-fod-web-dl-aac20-h264-magicstar-japanese-subtitle",
|
||
"enclosure": "https://example.com/downloadsubs.php?torrentid=514068&subid=2179",
|
||
"size": 238787,
|
||
"pubdate": "2026-03-17 19:48:55",
|
||
"date_elapsed": "2月23天",
|
||
"grabs": 0,
|
||
"language_icon": "https://example.com/pic/flag/japan.gif",
|
||
"report_url": "https://example.com/report.php?subtitle=2179",
|
||
"language": "日本語",
|
||
"uploader": "匿名",
|
||
"torrent_id": "514068",
|
||
"subtitle_id": "2179",
|
||
}]
|