Movie & Show

import os import time from datetime import datetime, timezone from types import SimpleNamespace import pytest from app.helper import rss as rss_module from app.helper.rss import RssHelper from app.core import metainfo as metainfo_module from app.core.config import settings from app.core.meta.customization import CustomizationMatcher from app.core.meta.releasegroup import ReleaseGroupsMatcher from app.db.systemconfig_oper import SystemConfigOper from app.modules.indexer.spider import SiteSpider from app.schemas.types import SystemConfigKey from app.schemas.types import MediaType from app.utils import rust_accel pytestmark = pytest.mark.skipif( not rust_accel.is_available(), reason="moviepilot_rust 扩展未安装", ) def test_rust_filter_rule_parser_matches_boolean_semantics(): """ Rust 过滤规则解析应保持 pyparsing 的布尔表达式结构。 """ result = rust_accel.parse_filter_rule("HDR & !BLU") assert result == [["HDR", "and", ["not", "BLU"]]] def test_rust_filter_rule_parser_handles_parentheses_and_or(): """ Rust 过滤规则解析应保持括号、与、或的优先级语义。 """ result = rust_accel.parse_filter_rule("CNSUB & (4K | 1080P) & !BLU") assert result == [[["CNSUB", "and", ["4K", "or", "1080P"]], "and", ["not", "BLU"]]] def test_rust_rss_parser_extracts_rss_and_atom_items(): """ Rust RSS解析应覆盖 RSS item、Atom entry、命名空间和日期字段。 """ xml = """ Movie & Show bold]]> https://example.com/details/1 Tue, 19 May 2026 08:30:00 GMT 豆瓣用户 Atom Title

Atom Summary

2026-05-19T09:30:00Z """ result = rust_accel.parse_rss_items(xml, max_items=100) assert len(result) == 2 assert result[0]["title"] == "Movie & Show" assert result[0]["description"] == "Desc bold" assert result[0]["link"] == "https://example.com/details/1" assert result[0]["enclosure"] == "https://example.com/download/1.torrent" assert result[0]["size"] == 123456 assert result[0]["nickname"] == "豆瓣用户" assert int(result[0]["pubdate"].timestamp()) == int(datetime(2026, 5, 19, 8, 30, tzinfo=timezone.utc).timestamp()) assert result[1]["title"] == "Atom Title" assert result[1]["description"] == "Atom Summary" assert result[1]["link"] == "https://example.com/atom/2" assert result[1]["enclosure"] == "https://example.com/atom/2" assert int(result[1]["pubdate"].timestamp()) == int(datetime(2026, 5, 19, 9, 30, tzinfo=timezone.utc).timestamp()) def test_rust_rss_parser_skips_incomplete_items(): """ Rust RSS解析应保持原逻辑，跳过无标题或无链接的条目。 """ xml = """ https://example.com/a No Link OKhttps://example.com/ok """ result = rust_accel.parse_rss_items(xml, max_items=100) assert result == [{ "title": "OK", "enclosure": "https://example.com/ok", "size": 0, "description": "", "link": "https://example.com/ok", "pubdate": "", }] def test_rss_helper_parse_uses_rust_parser(monkeypatch): """ RssHelper.parse 应在请求和编码处理后直接使用 Rust 解析结果。 """ xml = """ Helper Title Helper Description https://example.com/details/3 2026-05-19T10:30:00Z """ class FakeRequestUtils: """ 测试用 RequestUtils，避免真实网络请求。 """ def __init__(self, **_kwargs): """ 保存构造参数占位，兼容 RssHelper 的调用方式。 """ def get_res(self, _url): """ 返回带 content/text/status_code 的最小响应对象。 """ return SimpleNamespace( status_code=200, content=xml.encode("utf-8"), text=xml, apparent_encoding="utf-8", encoding="utf-8", ) monkeypatch.setattr(rss_module, "RequestUtils", FakeRequestUtils) result = RssHelper().parse("https://example.com/rss") assert len(result) == 1 assert result[0]["title"] == "Helper Title" assert result[0]["enclosure"] == "https://example.com/details/3" assert int(result[0]["pubdate"].timestamp()) == int(datetime(2026, 5, 19, 10, 30, tzinfo=timezone.utc).timestamp()) def _metainfo_options(custom_words=None): """ 构造 Rust MetaInfo 测试所需的配置，保持和生产入口一致。 """ systemconfig = SystemConfigOper() custom_release_groups = systemconfig.get(SystemConfigKey.CustomReleaseGroups) if isinstance(custom_release_groups, list): custom_release_groups = list(filter(None, custom_release_groups)) release_groups = ReleaseGroupsMatcher()._ReleaseGroupsMatcher__release_groups if custom_release_groups: release_groups = f"{release_groups}|{'|'.join(custom_release_groups)}" customization = CustomizationMatcher._normalize_customization( systemconfig.get(SystemConfigKey.Customization) ) return { "custom_words": custom_words or [], "media_exts": settings.RMT_MEDIAEXT + settings.RMT_SUBEXT + settings.RMT_AUDIOEXT, "release_groups": release_groups, "customization": customization, "streaming_platforms": metainfo_module._rust_parse_options()["streaming_platforms"], } def test_rust_metainfo_parser_handles_video_from_entry(): """ Rust MetaInfo 入口应完整识别普通影视标题。 """ result = rust_accel.parse_metainfo( "The Long Season 2017 2160p WEB-DL H265 120FPS AAC-XXX", options=_metainfo_options(), ) assert result["kind"] == "video" assert result["type"] == "未知" assert result["en_name"] == "The Long Season" assert result["year"] == "2017" assert result["resource_type"] == "WEB-DL" assert result["resource_pix"] == "2160p" assert result["video_encode"] == "H265" assert result["audio_encode"] == "AAC" assert result["fps"] == 120 def test_rust_metainfo_parser_handles_anime_from_entry(): """ Rust MetaInfo 入口应完整识别 Anime 标题。 """ result = rust_accel.parse_metainfo( "[ANi] OVERLORD 第四季 - 04 [1080P][Baha][WEB-DL][AAC AVC][CHT].mp4", options=_metainfo_options(), ) assert result["kind"] == "anime" assert result["type"] == "电视剧" assert result["en_name"] == "Overlord" assert result["begin_season"] == 4 assert result["begin_episode"] == 4 assert result["resource_pix"] == "1080p" assert result["video_encode"] == "AVC" assert result["audio_encode"] == "AAC" def test_rust_metainfo_parser_handles_episode_group(): """ Rust MetaInfo 入口应识别显式媒体标签中的 g 剧集组参数。 """ group_id = "5ad0ec240e0a26303f00d84d" result = rust_accel.parse_metainfo( f"物语系列 {{[tmdbid=46195;type=tv;g={group_id};s=1]}} 01", options=_metainfo_options(), ) assert result["tmdbid"] == 46195 assert result["type"] == MediaType.TV.value assert result["episode_group"] == group_id assert result["begin_season"] == 1 def test_rust_metainfo_path_parser_merges_parent_title(): """ Rust MetaInfoPath 入口应在 Rust 内完成父目录标题合并。 """ result = rust_accel.parse_metainfo_path( "/Marty Supreme 2025 2160p DoVi HDR Atmos TrueHD 7.1 x265-PbK/简英双语特效.mp4", options=_metainfo_options(), ) assert result["kind"] == "video" assert result["en_name"] == "Marty Supreme" assert result["year"] == "2025" assert result["original_name"] == "Marty Supreme" assert result["resource_pix"] == "2160p" def test_metainfo_public_entry_uses_rust(monkeypatch): """ MetaInfo 公共入口应调用 Rust 解析器，而不是直接进入 Python 旧解析逻辑。 """ calls = [] original_parse = metainfo_module.rust_accel.parse_metainfo def wrapped_parse(*args, **kwargs): """ 记录 Rust 入口调用并透传结果。 """ calls.append(args[0]) return original_parse(*args, **kwargs) monkeypatch.setattr(metainfo_module.rust_accel, "parse_metainfo", wrapped_parse) meta = metainfo_module.MetaInfo("旧名第03集", custom_words=["旧名 => 新名 && 第 <> 集 >> EP+1"]) assert calls == ["旧名第03集"] assert meta.name == "新名" assert meta.episode == "E04" assert meta.apply_words == ["旧名 => 新名 && 第 <> 集 >> EP+1"] def test_rust_indexer_parser_handles_jinja_pyquery_filters_and_links(): """ Rust indexer 解析应覆盖普通站点配置的 Jinja、PyQuery selector 和过滤器。 """ html = """

Default.Title DL IMDb Main description removelink FREE

1 hour ago

1.5 GB

1,234

5/7

""" indexer = { "id": "unit", "name": "Unit", "domain": "https://example.com/", "search": {"paths": [{"path": "torrents.php"}]}, "category": { "movie": [{"id": "401"}], "tv": [{"id": "402"}], }, "torrents": { "list": {"selector": 'table.torrents > tr:has("table.torrentname")'}, "fields": { "title_default": {"selector": 'a[href*="details.php?id="]'}, "title_optional": { "selector": 'a[title][href*="details.php?id="]', "attribute": "title", }, "title": { "text": "{% if fields['title_optional'] %}{{ fields['title_optional'] }}{% else %}" "{{ fields['title_default'] }}{% endif %}" }, "details": {"selector": 'a[href*="details.php?id="]', "attribute": "href"}, "download": {"selector": 'a[href*="download.php?id="]', "attribute": "href"}, "imdbid": { "selector": 'a[href*="imdb.com/title/tt"]', "attribute": "href", "filters": [{"name": "re_search", "args": ["tt\\d+", 0]}], }, "date_elapsed": {"selector": "td:nth-child(4) > span"}, "date_added": {"selector": "td:nth-child(4) > span", "attribute": "title"}, "date": { "text": "{% if fields['date_elapsed'] or fields['date_added'] %}" "{{ fields['date_added'] if fields['date_added'] else fields['date_elapsed'] }}" "{% else %}now{% endif %}", "filters": [{"name": "dateparse", "args": "%Y-%m-%d %H:%M:%S"}], }, "size": {"selector": "td:nth-child(5)"}, "seeders": {"selector": "td:nth-child(6)"}, "leechers": {"selector": "td:nth-child(7)"}, "grabs": {"selector": "td:nth-child(8)"}, "downloadvolumefactor": {"case": {"img.free": 0, "*": 1}}, "uploadvolumefactor": {"case": {"*": 1}}, "description": { "selector": "font.subtitle", "remove": "span,a", }, "labels": {"selector": "span.label"}, "hr": {"selector": "img.hitandrun"}, "category": { "selector": 'a[href*="?cat="]', "attribute": "href", "filters": [{"name": "querystring", "args": "cat"}], }, }, }, } result = SiteSpider(indexer, mtype=MediaType.TV).parse(html) assert result == [{ "page_url": "https://example.com/details.php?id=100", "enclosure": "https://example.com/download.php?id=100", "downloadvolumefactor": 1.0, "uploadvolumefactor": 1.0, "pubdate": "2025-05-01 12:13:14", "title": "Optional.Title", "description": "Main description", "imdbid": "tt1234567", "size": 1610612736, "peers": 5, "seeders": 1234, "grabs": 9, "date_elapsed": "1 hour ago", "labels": ["FREE"], "hit_and_run": True, "category": "电视剧", }] def test_rust_indexer_subtitle_parser_dispatches_to_extension(monkeypatch): """ Rust 字幕解析入口应将站点配置透传给扩展函数。 """ calls = [] expected = [{"title": "Green Snake"}] def fake_parse_indexer_subtitles_fast(html_text, domain, list_config, fields, result_num): """ 记录字幕解析扩展入口调用参数。 """ calls.append((html_text, domain, list_config, fields, result_num)) return expected fake_extension = SimpleNamespace( is_available=lambda: True, parse_indexer_subtitles_fast=fake_parse_indexer_subtitles_fast, ) monkeypatch.setattr(rust_accel, "_moviepilot_rust", fake_extension) fields = { "language_icon": {"selector": "div:nth-child(1) img", "attribute": "src"}, "title": {"selector": 'div:nth-child(2) a[href*="downloadsubs.php"]'}, } list_config = {"selector": "#subtitles-table > div"} result = rust_accel.parse_indexer_subtitles( html_text="

", domain="https://hhanclub.net/", list_config=list_config, fields=fields, result_num=100, ) assert result == expected assert calls == [("

", "https://hhanclub.net/", list_config, fields, 100)] @pytest.mark.skipif( os.environ.get("MP_RUST_PERF_TEST") != "1", reason="性能测试仅在显式开启 MP_RUST_PERF_TEST=1 时运行", ) def test_rust_subtitle_parser_is_several_times_faster_than_python(monkeypatch): """ Rust 字幕解析在生产 SiteSpider 路径下应显著快于 Python 兜底解析。 """ if not hasattr(rust_accel._moviepilot_rust, "parse_indexer_subtitles_fast"): pytest.skip("当前 Rust 扩展未包含字幕解析入口") def subtitle_row(index: int) -> str: """ 构造憨憨新版字幕卡片行，放大样本以稳定性能对比。 """ return f"""

Example Show S01E03 1080p WEB-DL CHS {index} tester{index}

111.99 KB

1月18天

""" html = f'

{"".join(subtitle_row(index) for index in range(600))}

' indexer = { "id": "hhanclub", "name": "憨憨", "domain": "https://hhanclub.net/", "public": False, "subtitles": { "list": {"selector": "#subtitles-table > div"}, "fields": { "language_icon": {"selector": "div:nth-child(1) img", "attribute": "src"}, "title": {"selector": 'div:nth-child(2) a[href*="downloadsubs.php"]'}, "download": { "selector": 'div:nth-child(2) a[href*="downloadsubs.php"]', "attribute": "href", }, "size": {"selector": "div:nth-child(3)"}, "date_added": {"selector": "div:nth-child(4) span", "attribute": "title"}, "date_elapsed": {"selector": "div:nth-child(4) span"}, "grabs": {"defualt_value": 0}, "uploader": {"selector": 'div:nth-child(2) a[href*="userdetails.php"]'}, "report": {"selector": 'div:nth-child(5) a[href*="report.php"]', "attribute": "href"}, }, "result_num": 600, }, } def best_time(parse_func): """ 多次运行取最短时间，降低偶发调度抖动对倍数判断的影响。 """ elapsed_times = [] result = None for _ in range(5): start = time.perf_counter() result = parse_func() elapsed_times.append(time.perf_counter() - start) return min(elapsed_times), result def parse_with_python(): """ 强制禁用 Rust 字幕解析，测量 Python 兜底解析路径。 """ with monkeypatch.context() as patch_context: patch_context.setattr(rust_accel, "parse_indexer_subtitles", lambda **_kwargs: None) return SiteSpider(indexer, keyword="Example Show", search_type="subtitles").parse(html) def parse_with_rust(): """ 使用生产配置中的 Rust 字幕解析路径。 """ return SiteSpider(indexer, keyword="Example Show", search_type="subtitles").parse(html) monkeypatch.setattr(settings, "RUST_ACCEL", True) python_time, python_result = best_time(parse_with_python) rust_time, rust_result = best_time(parse_with_rust) assert len(rust_result) == len(python_result) == 600 assert rust_result[0] == python_result[0] assert rust_time * 3 <= python_time, ( f"Rust 字幕解析未达到 3 倍性能要求：python={python_time:.6f}s, rust={rust_time:.6f}s" ) def test_rust_indexer_parser_handles_default_values_and_template_arithmetic(): """ Rust indexer 解析应支持 defualt_value、Jinja int filter 和模板算术表达式。 """ html = """

Default.Title

""" fields = { "title_default": {"selector": 'a[href*="details.php?id="]'}, "missing_days": {"defualt_value": "2", "selector": "span.missing"}, "title": {"text": "{{ fields['title_default'] }} {{ (fields['missing_days']|int)*86400 }}"}, } result = rust_accel.parse_indexer_torrents( html_text=html, domain="https://example.com/", list_config={"selector": "table.torrents > tr"}, fields=fields, category=None, result_num=100, ) assert result == [{"title": "Default.Title 172800"}] def test_rust_indexer_parser_handles_lstrip_and_english_elapsed_date(): """ Rust indexer 解析应覆盖 IPT 配置用到的 lstrip 和 date_en_elapsed_parse 过滤器。 """ html = """

Title download

Uploaded | 2 hours ago

""" fields = { "title": {"selector": 'a[href*="/t/"]'}, "download": { "selector": 'a[href*="/download.php/"]', "attribute": "href", "filters": [{"name": "lstrip", "args": ["/"]}], }, "date": { "selector": "td:nth-child(2) > div", "filters": [ {"name": "split", "args": ["|", 1]}, {"name": "date_en_elapsed_parse"}, ], }, } result = rust_accel.parse_indexer_torrents( html_text=html, domain="https://iptorrents.com/", list_config={"selector": 'table[id="torrents"] tr'}, fields=fields, category=None, result_num=100, ) assert len(result) == 1 assert result[0]["title"] == "Title" assert result[0]["enclosure"] == "https://iptorrents.com/download.php/123" assert result[0]["pubdate"] def test_rust_indexer_parser_prefers_date_added_when_date_template_returns_elapsed_text(): """ Rust indexer 解析 date 模板产出相对时间时，应使用 date_added 里的标准时间。 """ html = """

1 hour ago

""" fields = { "date_elapsed": {"selector": "span"}, "date_added": {"selector": "span", "attribute": "title"}, "date": { "text": "{% if fields['date_elapsed'] or fields['date_added'] %}" "{{ fields['date_elapsed'] if fields['date_elapsed'] else fields['date_added'] }}" "{% else %}now{% endif %}", "filters": [{"name": "dateparse", "args": "%Y-%m-%d %H:%M:%S"}], }, } result = rust_accel.parse_indexer_torrents( html_text=html, domain="https://example.com/", list_config={"selector": "table.torrents > tr"}, fields=fields, category=None, result_num=100, ) assert result[0]["pubdate"] == "2025-06-02 03:04:05"