import pytest
from app.api.endpoints.search import _parse_media_type
from app.chain.search import SearchChain
from app.core.context import MediaInfo, SubtitleInfo
from app.modules.indexer.spider import SiteSpider
from app.schemas.types import MediaType
from app.utils import rust_accel
AUDIENCES_SUBTITLE_HTML = """
"""
HHANCLUB_SUBTITLE_HTML = """
"""
PTTIME_SUBTITLE_HTML = """
"""
def _audiences_indexer():
"""
构造 audiences 字幕解析配置。
"""
return {
"id": 1,
"name": "观众",
"domain": "https://audiences.me/",
"subtitles": {
"list": {"selector": "tr:has(td.rowfollow)"},
"fields": {
"language": {"selector": "td:nth-child(1) img", "attribute": "title"},
"language_icon": {"selector": "td:nth-child(1) img", "attribute": "src"},
"title": {"selector": "td:nth-child(2) a"},
"download": {"selector": "td:nth-child(2) a", "attribute": "href"},
"date_added": {"selector": "td:nth-child(3) span", "attribute": "title"},
"date_elapsed": {"selector": "td:nth-child(3) span"},
"size": {"selector": "td:nth-child(4)"},
"grabs": {"selector": "td:nth-child(5)"},
"uploader": {"selector": "td:nth-child(6)"},
"report": {"selector": "td:nth-child(7) a", "attribute": "href"},
},
},
}
def _hhanclub_indexer():
"""
构造 hhanclub 字幕解析配置。
"""
return {
"id": 2,
"name": "憨憨",
"domain": "https://hhanclub.net/",
"subtitles": {
"list": {"selector": "#subtitles-table > div"},
"fields": {
"language": {
"selector": "div:nth-child(1) img",
"attribute": "title",
"default_value": "简体中文",
},
"language_icon": {"selector": "div:nth-child(1) img", "attribute": "src"},
"title": {"selector": 'div:nth-child(2) a[href*="downloadsubs.php"]'},
"download": {
"selector": 'div:nth-child(2) a[href*="downloadsubs.php"]',
"attribute": "href",
},
"date_added": {"selector": "div:nth-child(4) span", "attribute": "title"},
"date_elapsed": {"selector": "div:nth-child(4) span"},
"size": {"selector": "div:nth-child(3)"},
"grabs": {"default_value": 0},
"uploader": {"selector": 'div:nth-child(2) a[href*="userdetails.php"]'},
"report": {"selector": 'div:nth-child(5) a[href*="report.php"]', "attribute": "href"},
},
},
}
def _pttime_indexer():
"""
构造 PT时间 字幕解析配置。
"""
return {
"id": 3,
"name": "PT时间",
"domain": "https://www.pttime.org/",
"subtitles": {
"list": {"selector": "table tr:has(td.rowfollow)"},
"fields": {
"language": {"selector": "td:nth-child(1)"},
"language_icon": {
"selector": "td:nth-child(1) img",
"attribute": "src",
"optional": True,
},
"title": {"selector": "td:nth-child(2) a"},
"download": {"selector": "td:nth-child(2) a", "attribute": "href"},
"date_added": {"selector": "td:nth-child(4)", "optional": True},
"date_elapsed": {"selector": "td:nth-child(4)", "optional": True},
"size": {"selector": "td:nth-child(5)"},
"grabs": {"selector": "td:nth-child(6)"},
"uploader": {"selector": "td:nth-child(7)"},
"report": {"selector": "td:nth-child(8) a", "attribute": "href"},
},
},
}
def test_search_media_type_parser_accepts_agent_values():
"""
搜索入口应兼容前端使用的 movie/tv 媒体类型值。
"""
assert _parse_media_type("movie") == MediaType.MOVIE
assert _parse_media_type("tv") == MediaType.TV
assert _parse_media_type("电影") == MediaType.MOVIE
assert _parse_media_type("电视剧") == MediaType.TV
def test_exact_subtitle_match_keeps_same_tv_episode(monkeypatch):
"""
精确字幕搜索应识别字幕名称,并只保留同一剧集的字幕结果。
"""
chain = object.__new__(SearchChain)
def fail_filter(*_args, **_kwargs):
"""
字幕精确搜索不能调用资源过滤规则。
"""
pytest.fail("字幕精确搜索不应调用过滤规则")
monkeypatch.setattr(chain, "filter_torrents", fail_filter)
mediainfo = MediaInfo(
type=MediaType.TV,
title="Example Show",
original_title="Example Show",
en_title="Example Show",
year="2024",
season=1,
names=["Example Show"],
season_years={1: "2024"},
)
subtitles = [
SubtitleInfo(site_name="SiteA", title="Example Show S01E03 1080p WEB-DL CHS", subtitle_id="1"),
SubtitleInfo(site_name="SiteA", title="Example Show S01E04 1080p WEB-DL CHS", subtitle_id="2"),
SubtitleInfo(site_name="SiteA", title="Example Show S02E03 1080p WEB-DL CHS", subtitle_id="3"),
SubtitleInfo(site_name="SiteA", title="Other Show S01E03 1080p WEB-DL CHS", subtitle_id="4"),
]
result = chain._SearchChain__parse_subtitle_result(
subtitles=subtitles,
mediainfo=mediainfo,
season_episodes={1: [3]},
episode=3,
)
assert [item.subtitle_id for item in result] == ["1"]
def test_exact_subtitle_match_uses_file_name_candidate():
"""
精确字幕搜索应同时识别字幕标题和下载文件名。
"""
chain = object.__new__(SearchChain)
mediainfo = MediaInfo(
type=MediaType.TV,
title="Example Show",
original_title="Example Show",
en_title="Example Show",
year="2024",
season=1,
names=["Example Show"],
season_years={1: "2024"},
)
subtitles = [
SubtitleInfo(
site_name="SiteA",
title="Example Show subtitle package",
file_name="Example.Show.S01E03.1080p.WEB-DL.CHS.srt",
subtitle_id="1",
),
SubtitleInfo(
site_name="SiteA",
title="Example Show subtitle package",
file_name="Example.Show.S01E04.1080p.WEB-DL.CHS.srt",
subtitle_id="2",
),
]
result = chain._SearchChain__parse_subtitle_result(
subtitles=subtitles,
mediainfo=mediainfo,
season_episodes={1: [3]},
episode=3,
)
assert [item.subtitle_id for item in result] == ["1"]
def test_subtitle_search_params_keep_episode():
"""
精确字幕搜索缓存参数时应保留集数,便于前端刷新后继续按同一集搜索。
"""
params = SearchChain._normalize_search_params(
{
"keyword": "tmdb:123",
"type": MediaType.TV,
"season": 1,
"episode": 3,
"sites": "1,2",
"result_type": "subtitle",
}
)
assert params["episode"] == "3"
assert params["result_type"] == "subtitle"
def test_subtitle_info_serializes_title_season_episode():
"""
字幕结果序列化应返回从字幕名称中识别出的季集信息。
"""
subtitle = SubtitleInfo(
site_name="观众",
title="The.Capture.S02E05.2022.1080p.iP.WEB-DL.x264.AAC-ADWeb",
)
result = subtitle.to_dict()
assert result["season_episode"] == "S02 E05"
assert result["meta_info"]["season_episode"] == "S02 E05"
assert result["episode_list"] == [5]
@pytest.mark.parametrize(
("indexer", "html", "expected"),
[
(
_audiences_indexer(),
AUDIENCES_SUBTITLE_HTML,
{
"title": "The.Capture.S02E05.2022.1080p.iP.WEB-DL.x264.AAC-ADWeb",
"language": "English",
"language_icon": "https://audiences.me/pic/flag/uk.gif",
"enclosure": "https://audiences.me/downloadsubs.php?torrentid=61964&subid=394",
"pubdate": "2022-09-18 19:33:11",
"date_elapsed": "3年9月",
"size": 99011,
"grabs": 1,
"uploader": "匿名",
"report_url": "https://audiences.me/report.php?subtitle=394",
"torrent_id": "61964",
"subtitle_id": "394",
},
),
(
_hhanclub_indexer(),
HHANCLUB_SUBTITLE_HTML,
{
"title": "The.Capture.S01.1080p.AMZN.WEB-DL.DDP5.1.H.264-NTb[chs&eng]",
"language": "简体中文",
"language_icon": "https://hhanclub.net/pic/flag/china.gif",
"enclosure": "https://hhanclub.net/downloadsubs.php?torrentid=1435&subid=1733",
"pubdate": "2026-03-25 23:26:37",
"date_elapsed": "2月15天",
"size": 184801,
"grabs": 0,
"uploader": "qfsong",
"report_url": "https://hhanclub.net/report.php?subtitle=1733",
"torrent_id": "1435",
"subtitle_id": "1733",
},
),
(
_pttime_indexer(),
PTTIME_SUBTITLE_HTML,
{
"title": "The.Capture.S02.1080p.iP.WEBRip.AAC2.0.x264-PlayWEB.zip",
"language": "简体中文",
"enclosure": "https://www.pttime.org/downloadsubs.php?torrentid=33968&subid=1242",
"pubdate": "2022-09-25 13:36:44",
"date_elapsed": "2022-09-25 13:36:44",
"size": 253952,
"grabs": 27,
"uploader": "匿名",
"report_url": "https://www.pttime.org/report.php?subtitle=1242",
"torrent_id": "33968",
"subtitle_id": "1242",
},
),
],
)
def test_subtitle_site_spider_parses_standard_fields(monkeypatch, indexer, html, expected):
"""
Python 字幕解析应把站点配置字段归一化为前端需要的标准字段。
"""
monkeypatch.setattr(rust_accel, "parse_indexer_subtitles", lambda **_kwargs: None)
result = SiteSpider(indexer, keyword="The.Capture", search_type="subtitles").parse(html)
assert len(result) == 1
for field, value in expected.items():
assert result[0][field] == value
def test_subtitle_site_spider_skips_empty_rows(monkeypatch):
"""
Python 字幕解析应丢弃没有标题或下载链接的表格杂项行。
"""
monkeypatch.setattr(rust_accel, "parse_indexer_subtitles", lambda **_kwargs: None)
html = """
"""
result = SiteSpider(_audiences_indexer(), keyword="The.Capture", search_type="subtitles").parse(html)
assert len(result) == 1
assert result[0]["title"] == "The.Capture.S01"
def test_subtitle_site_spider_uses_direct_nexus_row(monkeypatch):
"""
Python 字幕解析应只使用 NexusPHP 内层字幕行,避免外层布局行字段错位。
"""
monkeypatch.setattr(rust_accel, "parse_indexer_subtitles", lambda **_kwargs: None)
html = """
"""
result = SiteSpider(_audiences_indexer(), keyword="The.Capture", search_type="subtitles").parse(html)
assert [item["title"] for item in result] == ["The.Capture.S01", "The.Capture.S02"]
assert result[0]["language"] == "添加时间"
assert result[0]["language_icon"] == "data:image/svg+xml;base64,xxx"
assert result[1]["language"] == "English"
def test_exact_subtitle_match_uses_torrent_helper_for_media_names():
"""
精确字幕搜索应复用资源匹配逻辑识别字幕标题中的媒体名称。
"""
chain = object.__new__(SearchChain)
mediainfo = MediaInfo(
type=MediaType.TV,
title="真相捕捉",
original_title="The Capture",
en_title="The Capture",
year="2019",
season=1,
names=["The.Capture", "The Capture"],
season_years={1: "2019"},
)
subtitles = [
SubtitleInfo(
site_name="憨憨",
title="The.Capture.S01.1080p.AMZN.WEB-DL.DDP5.1.H.264-NTb[chs&eng]",
subtitle_id="1733",
),
SubtitleInfo(
site_name="观众",
title="Other.Show.S01.1080p.WEB-DL.CHS",
subtitle_id="404",
),
]
result = chain._SearchChain__parse_subtitle_result(
subtitles=subtitles,
mediainfo=mediainfo,
season_episodes={1: []},
)
assert [item.subtitle_id for item in result] == ["1733"]
def test_exact_subtitle_match_rejects_partial_name_match():
"""
精确字幕搜索应避免媒体名称只在中间出现时误判。
"""
chain = object.__new__(SearchChain)
mediainfo = MediaInfo(
type=MediaType.TV,
title="真相捕捉",
original_title="The Capture",
en_title="The Capture",
year="2019",
season=1,
names=["The.Capture", "The Capture"],
season_years={1: "2019"},
)
subtitles = [
SubtitleInfo(
site_name="观众",
title="Not.The.Capture.S01.1080p.WEB-DL.CHS",
subtitle_id="404",
),
]
result = chain._SearchChain__parse_subtitle_result(
subtitles=subtitles,
mediainfo=mediainfo,
season_episodes={1: []},
)
assert result == []