feat: accelerate metainfo parsing with rust

This commit is contained in:
jxxghp
2026-05-23 17:45:39 +08:00
parent ad38f51d6b
commit ed0f8c471b
8 changed files with 3634 additions and 0 deletions

View File

@@ -5,7 +5,13 @@ import pytest
from app.helper import rss as rss_module
from app.helper.rss import RssHelper
from app.core import metainfo as metainfo_module
from app.core.config import settings
from app.core.meta.customization import CustomizationMatcher
from app.core.meta.releasegroup import ReleaseGroupsMatcher
from app.db.systemconfig_oper import SystemConfigOper
from app.modules.indexer.spider import SiteSpider
from app.schemas.types import SystemConfigKey
from app.schemas.types import MediaType
from app.utils import rust_accel
@@ -155,6 +161,107 @@ def test_rss_helper_parse_uses_rust_parser(monkeypatch):
assert int(result[0]["pubdate"].timestamp()) == int(datetime(2026, 5, 19, 10, 30, tzinfo=timezone.utc).timestamp())
def _metainfo_options(custom_words=None):
"""
构造 Rust MetaInfo 测试所需的配置,保持和生产入口一致。
"""
systemconfig = SystemConfigOper()
custom_release_groups = systemconfig.get(SystemConfigKey.CustomReleaseGroups)
if isinstance(custom_release_groups, list):
custom_release_groups = list(filter(None, custom_release_groups))
release_groups = ReleaseGroupsMatcher()._ReleaseGroupsMatcher__release_groups
if custom_release_groups:
release_groups = f"{release_groups}|{'|'.join(custom_release_groups)}"
customization = CustomizationMatcher._normalize_customization(
systemconfig.get(SystemConfigKey.Customization)
)
return {
"custom_words": custom_words or [],
"media_exts": settings.RMT_MEDIAEXT + settings.RMT_SUBEXT + settings.RMT_AUDIOEXT,
"release_groups": release_groups,
"customization": customization,
}
def test_rust_metainfo_parser_handles_video_from_entry():
"""
Rust MetaInfo 入口应完整识别普通影视标题。
"""
result = rust_accel.parse_metainfo(
"The Long Season 2017 2160p WEB-DL H265 120FPS AAC-XXX",
options=_metainfo_options(),
)
assert result["kind"] == "video"
assert result["type"] == "未知"
assert result["en_name"] == "The Long Season"
assert result["year"] == "2017"
assert result["resource_type"] == "WEB-DL"
assert result["resource_pix"] == "2160p"
assert result["video_encode"] == "H265"
assert result["audio_encode"] == "AAC"
assert result["fps"] == 120
def test_rust_metainfo_parser_handles_anime_from_entry():
"""
Rust MetaInfo 入口应完整识别 Anime 标题。
"""
result = rust_accel.parse_metainfo(
"[ANi] OVERLORD 第四季 - 04 [1080P][Baha][WEB-DL][AAC AVC][CHT].mp4",
options=_metainfo_options(),
)
assert result["kind"] == "anime"
assert result["type"] == "电视剧"
assert result["en_name"] == "Overlord"
assert result["begin_season"] == 4
assert result["begin_episode"] == 4
assert result["resource_pix"] == "1080p"
assert result["video_encode"] == "AVC"
assert result["audio_encode"] == "AAC"
def test_rust_metainfo_path_parser_merges_parent_title():
"""
Rust MetaInfoPath 入口应在 Rust 内完成父目录标题合并。
"""
result = rust_accel.parse_metainfo_path(
"/Marty Supreme 2025 2160p DoVi HDR Atmos TrueHD 7.1 x265-PbK/简英双语特效.mp4",
options=_metainfo_options(),
)
assert result["kind"] == "video"
assert result["en_name"] == "Marty Supreme"
assert result["year"] == "2025"
assert result["original_name"] == "Marty Supreme"
assert result["resource_pix"] == "2160p"
def test_metainfo_public_entry_uses_rust(monkeypatch):
"""
MetaInfo 公共入口应调用 Rust 解析器,而不是直接进入 Python 旧解析逻辑。
"""
calls = []
original_parse = metainfo_module.rust_accel.parse_metainfo
def wrapped_parse(*args, **kwargs):
"""
记录 Rust 入口调用并透传结果。
"""
calls.append(args[0])
return original_parse(*args, **kwargs)
monkeypatch.setattr(metainfo_module.rust_accel, "parse_metainfo", wrapped_parse)
meta = metainfo_module.MetaInfo("旧名 第03集", custom_words=["旧名 => 新名 && 第 <> 集 >> EP+1"])
assert calls == ["旧名 第03集"]
assert meta.name == "新名"
assert meta.episode == "E04"
assert meta.apply_words == ["旧名 => 新名 && 第 <> 集 >> EP+1"]
def test_rust_indexer_parser_handles_jinja_pyquery_filters_and_links():
"""
Rust indexer 解析应覆盖普通站点配置的 Jinja、PyQuery selector 和过滤器。