feat: add Rust acceleration for core parsing

This commit is contained in:
jxxghp
2026-05-22 19:57:26 +08:00
parent 7daeb17d85
commit bd4d493f34
28 changed files with 5012 additions and 77 deletions

View File

@@ -68,6 +68,8 @@ class LocalSetupConfigDirTests(unittest.TestCase):
venv_pip = venv_dir / "bin" / "pip"
with patch.object(module, "ensure_supported_python"), \
patch.object(module, "ensure_rust_accel_ready") as rust_ready, \
patch.object(module, "install_rust_accel") as install_rust, \
patch.object(
module,
"configure_venv_pip_compat",
@@ -86,8 +88,75 @@ class LocalSetupConfigDirTests(unittest.TestCase):
run_mock.assert_any_call(
[str(venv_pip), "install", "-r", str(module.ROOT / "requirements.txt")]
)
rust_ready.assert_called_once_with()
install_rust.assert_called_once_with(venv_python)
install_browser.assert_called_once_with(venv_python)
def test_install_rust_accel_runs_maturin_develop(self):
"""
验证本地 CLI 安装会通过 maturin 将 Rust 扩展安装进虚拟环境。
"""
module = load_local_setup_module()
with tempfile.TemporaryDirectory() as temp_dir:
manifest = Path(temp_dir) / "Cargo.toml"
manifest.write_text("[package]\nname = \"moviepilot_rust\"\n")
venv_python = Path(temp_dir) / "venv" / "bin" / "python"
with patch.object(module, "RUST_ACCEL_MANIFEST", manifest), \
patch.object(module, "_rust_accel_should_skip", return_value=False), \
patch.object(module, "ensure_rust_accel_ready"), \
patch.object(module, "_cargo_env_path", return_value="/cargo/bin:/bin"), \
patch.object(module, "run") as run_mock:
module.install_rust_accel(venv_python)
run_mock.assert_called_once()
command = run_mock.call_args.args[0]
self.assertEqual(
command,
[
str(venv_python),
"-m",
"maturin",
"develop",
"--release",
"--manifest-path",
str(manifest),
],
)
self.assertEqual(run_mock.call_args.kwargs["env"]["PATH"], "/cargo/bin:/bin")
def test_ensure_rust_accel_ready_requires_cargo(self):
"""
验证 Rust 扩展源码存在时CLI 安装会检查 cargo 是否可用。
"""
module = load_local_setup_module()
with tempfile.TemporaryDirectory() as temp_dir:
manifest = Path(temp_dir) / "Cargo.toml"
manifest.write_text("[package]\nname = \"moviepilot_rust\"\n")
with patch.object(module, "RUST_ACCEL_MANIFEST", manifest), \
patch.object(module, "_rust_accel_should_skip", return_value=False), \
patch.object(module, "_find_cargo", return_value=None):
with self.assertRaisesRegex(RuntimeError, "cargo"):
module.ensure_rust_accel_ready()
def test_ensure_rust_accel_ready_allows_skip(self):
"""
验证显式跳过 Rust 扩展时,不再要求本机存在 cargo。
"""
module = load_local_setup_module()
with tempfile.TemporaryDirectory() as temp_dir:
manifest = Path(temp_dir) / "Cargo.toml"
manifest.write_text("[package]\nname = \"moviepilot_rust\"\n")
with patch.object(module, "RUST_ACCEL_MANIFEST", manifest), \
patch.object(module, "_rust_accel_should_skip", return_value=True), \
patch.object(module, "_find_cargo", return_value=None):
module.ensure_rust_accel_ready()
if __name__ == "__main__":
unittest.main()

174
tests/test_rust_accel.py Normal file
View File

@@ -0,0 +1,174 @@
import pytest
from app.core.context import TorrentInfo
from app.modules.filter import FilterModule
from app.modules.indexer.spider import SiteSpider
from app.schemas.types import MediaType
from app.utils import rust_accel
pytestmark = pytest.mark.skipif(
not rust_accel.is_available(),
reason="moviepilot_rust 扩展未安装",
)
def test_rust_metainfo_fast_path_extracts_emby_override():
"""
Rust 内嵌媒体标签识别应保持 Emby tmdbid 标签优先级。
"""
title, metainfo = rust_accel.find_metainfo("Movie {[tmdbid=111;type=movies]} [tmdbid=222]")
assert title == "Movie"
assert metainfo["tmdbid"] == "222"
assert metainfo["type"] == MediaType.MOVIE
def test_rust_video_title_fast_path_extracts_common_resource_fields():
"""
Rust 影视标题预解析应能提取常见资源字段。
"""
result = rust_accel.parse_video_title(
"The 355 2022 BluRay 1080p DTS-HD MA5.1 X265.10bit 60FPS"
)
assert result["year"] == "2022"
assert result["resource_pix"] == "1080p"
assert result["resource_type"] == "BluRay"
assert result["video_encode"] == "x265 10bit"
assert result["video_bit"] == "10bit"
assert result["fps"] == 60
def test_rust_filter_fast_path_matches_priority_semantics():
"""
Rust 批量过滤应保持优先级和布尔表达式语义。
"""
module = FilterModule()
module.rule_set = {
"HDR": {"include": "HDR"},
"DV": {"include": "DOVI"},
"BLU": {"include": "BluRay"},
}
torrents = [
TorrentInfo(title="Movie HDR WEB-DL", description=""),
TorrentInfo(title="Movie DOVI", description=""),
TorrentInfo(title="Movie HDR BluRay", description=""),
]
result = module._FilterModule__filter_torrents_by_rust( # noqa: SLF001
groups=[type("RuleGroup", (), {"rule_string": "HDR & !BLU > DV"})()],
torrent_list=torrents,
mediainfo=None,
)
assert result == torrents[:2]
assert result[0].pri_order == 100
assert result[1].pri_order == 99
def test_rust_indexer_search_url_keeps_existing_query_and_category():
"""
Rust URL 生成应保留路径原有查询参数并应用分类参数。
"""
spider = SiteSpider(
indexer={
"id": "ttg",
"name": "TTG",
"domain": "https://totheglory.im/",
"search": {
"paths": [{"path": "browse.php?c=M"}],
"params": {"search_field": "{keyword}", "c": "M"},
"imdbid_format": "imdb{imdbid_num}",
},
"category": {
"field": "search_field",
"delimiter": " 分类:",
"movie": [{"id": "电影DVDRip", "cat": "Movies/SD"}],
},
"torrents": {"list": {}, "fields": {}},
},
keyword="tt0049406",
mtype=MediaType.MOVIE,
)
search_url = spider._SiteSpider__get_search_url() # noqa: SLF001
assert search_url.count("?") == 1
assert "c=M" in search_url
assert "search_field=imdb0049406" in search_url
def test_rust_filesize_parser_matches_site_units():
"""
Rust 文件大小解析应覆盖站点解析器常见单位。
"""
assert rust_accel.parse_filesize("1.5 GB") == 1610612736
assert rust_accel.parse_filesize("2 TiB") == 2199023255552
assert rust_accel.parse_filesize("42") == 42
def test_rust_indexer_page_parser_handles_common_fields():
"""
Rust 普通 indexer 页面解析应批量提取列表行核心字段。
"""
spider = SiteSpider(
indexer={
"id": "demo",
"name": "Demo",
"domain": "https://example.org/",
"search": {"paths": [{"path": "torrents.php"}]},
"category": {
"movie": [{"id": "401"}],
"tv": [{"id": "402"}],
},
"torrents": {
"list": {"selector": "tr.torrent"},
"fields": {
"title": {"selector": "a.title"},
"description": {"selector": ".desc"},
"details": {"selector": "a.title", "attribute": "href"},
"download": {"selector": "a.dl", "attribute": "href"},
"size": {"selector": ".size"},
"seeders": {"selector": ".seeders"},
"leechers": {"selector": ".leechers"},
"grabs": {"selector": ".grabs"},
"downloadvolumefactor": {"case": {".free": 0}},
"uploadvolumefactor": {"selector": ".up"},
"labels": {"selector": ".label"},
"hr": {"selector": ".hr"},
"category": {"selector": ".cat"},
},
},
},
)
html = """
<table>
<tr class="torrent">
<td><a class="title" href="/details/1">Movie 2024 1080p</a><span class="desc">BluRay</span></td>
<td><a class="dl" href="/download/1">DL</a></td>
<td class="size">1.5 GB</td><td class="seeders">1,234</td><td class="leechers">5/10</td>
<td class="grabs">42</td><td class="free">Free</td><td class="up">2x</td>
<td><span class="label">DIY</span><span class="label">HDR</span></td>
<td class="hr">H&R</td><td class="cat">401</td>
</tr>
</table>
"""
torrents = spider.parse(html)
assert torrents == [{
"title": "Movie 2024 1080p",
"description": "BluRay",
"page_url": "https://example.org/details/1",
"enclosure": "https://example.org/download/1",
"size": 1610612736,
"seeders": 1234,
"peers": 5,
"grabs": 42,
"downloadvolumefactor": 0,
"uploadvolumefactor": 2,
"labels": ["DIY", "HDR"],
"hit_and_run": True,
"category": MediaType.MOVIE.value,
}]