refactor: slim rust acceleration surface

This commit is contained in:
jxxghp
2026-05-23 09:17:32 +08:00
parent fcbea077b7
commit 7cbfeb2377
15 changed files with 61 additions and 4737 deletions

View File

@@ -11,7 +11,6 @@ from app.schemas.types import MediaType
from app.utils.string import StringUtils
from app.utils.tokens import Tokens
from app.core.meta.streamingplatform import StreamingPlatforms
from app.utils import rust_accel
class MetaVideo(MetaBase):
@@ -104,60 +103,58 @@ class MetaVideo(MetaBase):
# 把年月日去掉
title = re.sub(r'\d{4}[\s._-]\d{1,2}[\s._-]\d{1,2}', "", title)
media_exts = settings.RMT_MEDIAEXT + settings.RMT_SUBEXT + settings.RMT_AUDIOEXT
rust_parse = rust_accel.parse_video_title(title, isfile=isfile, media_exts=media_exts)
if not self.__apply_rust_parse(rust_parse):
# 拆分tokens
tokens = Tokens(title)
# 实例化StreamingPlatforms对象
streaming_platforms = StreamingPlatforms()
# 解析名称、年份、季、集、资源类型、分辨率等
# 拆分tokens
tokens = Tokens(title)
# 实例化StreamingPlatforms对象
streaming_platforms = StreamingPlatforms()
# 解析名称、年份、季、集、资源类型、分辨率等
token = tokens.get_next()
while token:
self._index += 1 # 更新当前处理的token索引
# Part
self.__init_part(token, tokens)
# 标题
if self._continue_flag:
self.__init_name(token, media_exts)
# 年份
if self._continue_flag:
self.__init_year(token)
# 分辨率
if self._continue_flag:
self.__init_resource_pix(token)
# 季
if self._continue_flag:
self.__init_season(token)
# 集
if self._continue_flag:
self.__init_episode(token)
# 资源类型
if self._continue_flag:
self.__init_resource_type(token)
# 流媒体平台
if self._continue_flag:
self.__init_web_source(token, tokens, streaming_platforms)
# 视频编码
if self._continue_flag:
self.__init_video_encode(token)
# 视频位深
if self._continue_flag:
self.__init_video_bit(token)
# 音频编码
if self._continue_flag:
self.__init_audio_encode(token)
# 帧率
if self._continue_flag:
self.__init_fps(token)
# 取下一个,直到没有为卡
token = tokens.get_next()
while token:
self._index += 1 # 更新当前处理的token索引
# Part
self.__init_part(token, tokens)
# 标题
if self._continue_flag:
self.__init_name(token, media_exts)
# 年份
if self._continue_flag:
self.__init_year(token)
# 分辨率
if self._continue_flag:
self.__init_resource_pix(token)
# 季
if self._continue_flag:
self.__init_season(token)
# 集
if self._continue_flag:
self.__init_episode(token)
# 资源类型
if self._continue_flag:
self.__init_resource_type(token)
# 流媒体平台
if self._continue_flag:
self.__init_web_source(token, tokens, streaming_platforms)
# 视频编码
if self._continue_flag:
self.__init_video_encode(token)
# 视频位深
if self._continue_flag:
self.__init_video_bit(token)
# 音频编码
if self._continue_flag:
self.__init_audio_encode(token)
# 帧率
if self._continue_flag:
self.__init_fps(token)
# 取下一个,直到没有为卡
token = tokens.get_next()
self._continue_flag = True
# 合成质量
if self._effect:
self._effect.reverse()
self.resource_effect = " ".join(self._effect)
if self._source:
self.resource_type = self._source.strip()
self._continue_flag = True
# 合成质量
if self._effect:
self._effect.reverse()
self.resource_effect = " ".join(self._effect)
if self._source:
self.resource_type = self._source.strip()
# 提取原盘DIY
if self.resource_type and "BluRay" in self.resource_type:
if (self.subtitle and re.findall(r'D[Ii]Y', self.subtitle)) \
@@ -188,62 +185,6 @@ class MetaVideo(MetaBase):
if not self.video_bit:
self.video_bit = self.extract_video_bit(self.video_encode)
def __apply_rust_parse(self, rust_parse: Optional[dict]) -> bool:
"""
应用 Rust 主识别结果;成功时跳过 Python token 主循环。
"""
if not rust_parse or not rust_parse.get("complete"):
return False
self.cn_name = rust_parse.get("cn_name")
self.en_name = rust_parse.get("en_name")
if rust_parse.get("year"):
self.year = str(rust_parse.get("year"))
self.part = rust_parse.get("part")
self.__merge_rust_parse(rust_parse)
media_type = rust_parse.get("type")
if media_type == "tv":
self.type = MediaType.TV
elif media_type == "movie":
self.type = MediaType.MOVIE
return True
def __merge_rust_parse(self, rust_parse: Optional[dict]) -> None:
"""
合并 Rust 预解析结果,仅补齐 Python 识别未命中的资源字段。
"""
if not rust_parse:
return
if not self.year and rust_parse.get("year"):
self.year = str(rust_parse.get("year"))
if self.begin_season is None and rust_parse.get("begin_season") is not None:
self.begin_season = int(rust_parse.get("begin_season"))
self.type = MediaType.TV
if self.end_season is None and rust_parse.get("end_season") is not None:
self.end_season = int(rust_parse.get("end_season"))
if not self.total_season and rust_parse.get("total_season"):
self.total_season = int(rust_parse.get("total_season"))
if self.begin_episode is None and rust_parse.get("begin_episode") is not None:
self.begin_episode = int(rust_parse.get("begin_episode"))
self.type = MediaType.TV
if self.end_episode is None and rust_parse.get("end_episode") is not None:
self.end_episode = int(rust_parse.get("end_episode"))
if not self.total_episode and rust_parse.get("total_episode"):
self.total_episode = int(rust_parse.get("total_episode"))
if not self.resource_pix and rust_parse.get("resource_pix"):
self.resource_pix = rust_parse.get("resource_pix")
if not self.resource_type and rust_parse.get("resource_type"):
self.resource_type = rust_parse.get("resource_type")
if not self.resource_effect and rust_parse.get("resource_effect"):
self.resource_effect = rust_parse.get("resource_effect")
if not self.video_encode and rust_parse.get("video_encode"):
self.video_encode = rust_parse.get("video_encode")
if not self.video_bit and rust_parse.get("video_bit"):
self.video_bit = rust_parse.get("video_bit")
if not self.audio_encode and rust_parse.get("audio_encode"):
self.audio_encode = rust_parse.get("audio_encode")
if self.fps is None and rust_parse.get("fps") is not None:
self.fps = int(rust_parse.get("fps"))
@staticmethod
def __get_title_from_description(description: str) -> Optional[str]:
"""

View File

@@ -12,7 +12,6 @@ from app.core.meta.infopath import (
from app.core.meta.words import WordsMatcher
from app.log import logger
from app.schemas.types import MediaType
from app.utils import rust_accel
_ANIME_BRACKET_RE = re.compile(r'【[+0-9XVPI-]+】\s*【', re.IGNORECASE)
@@ -169,9 +168,6 @@ def is_anime(name: str) -> bool:
:param name: 名称
:return: 是否动漫
"""
rust_result = rust_accel.is_anime(name)
if rust_result is not None:
return rust_result
if not name:
return False
if _ANIME_BRACKET_RE.search(name):
@@ -189,9 +185,6 @@ def find_metainfo(title: str) -> Tuple[str, dict]:
"""
从标题中提取媒体信息
"""
rust_result = rust_accel.find_metainfo(title)
if rust_result is not None:
return rust_result
metainfo = _empty_metainfo()
if not title:
return title, metainfo

View File

@@ -9,7 +9,6 @@ from lxml import etree
from app.core.config import settings
from app.helper.browser import PlaywrightHelper
from app.log import logger
from app.utils import rust_accel
from app.utils.http import RequestUtils
from app.utils.string import StringUtils
@@ -228,32 +227,6 @@ class RssHelper:
},
}
@staticmethod
def __format_rust_items(items: List[dict]) -> List[dict]:
"""
将 Rust RSS 解析结果转换为原 Python XPath 解析返回结构。
"""
ret_array = []
for item in items:
pubdate = ""
pubdate_raw = item.get("pubdate_raw")
if pubdate_raw:
pubdate = StringUtils.get_time(pubdate_raw)
if pubdate is not None:
pubdate = pubdate.astimezone(tz=None)
tmp_dict = {
'title': item.get("title") or "",
'enclosure': item.get("enclosure") or "",
'size': item.get("size") or 0,
'description': item.get("description") or "",
'link': item.get("link") or "",
'pubdate': pubdate
}
if item.get("nickname"):
tmp_dict['nickname'] = item.get("nickname")
ret_array.append(tmp_dict)
return ret_array
def parse(self, url, proxy: bool = False,
timeout: Optional[int] = 15, headers: dict = None, ua: str = None) -> Union[List[dict], None, bool]:
"""
@@ -325,12 +298,6 @@ class RssHelper:
logger.error("RSS内容不是有效的XML格式")
return False
rust_items = rust_accel.parse_rss_items(ret_xml, self.MAX_RSS_ITEMS)
if rust_items is not None:
if len(rust_items) >= self.MAX_RSS_ITEMS:
logger.warning(f"RSS条目过多仅处理前{self.MAX_RSS_ITEMS}")
return self.__format_rust_items(rust_items)
# 使用lxml.etree解析XML
parser = None
try:

View File

@@ -11,7 +11,6 @@ from app.modules import _ModuleBase
from app.modules.filter.RuleParser import RuleParser
from app.modules.filter.builtin_rules import BUILTIN_RULE_SET
from app.schemas.types import ModuleType, OtherModulesType, SystemConfigKey
from app.utils import rust_accel
from app.utils.string import StringUtils
@@ -139,9 +138,6 @@ class FilterModule(_ModuleBase):
# 查询规则表详情
groups = self.rulehelper.get_rule_group_by_media(media=mediainfo, group_names=rule_groups)
if groups:
rust_filtered = self.__filter_torrents_by_rust(groups, torrent_list, mediainfo)
if rust_filtered is not None:
return rust_filtered
for group in groups:
# 过滤种子
torrent_list = self.__filter_torrents(
@@ -154,46 +150,6 @@ class FilterModule(_ModuleBase):
)
return torrent_list
def __filter_torrents_by_rust(self, groups: list, torrent_list: List[TorrentInfo],
mediainfo: MediaInfo) -> Optional[List[TorrentInfo]]:
"""
使用 Rust 批量过滤种子;遇到不可支持的规则时返回 None 交由 Python 逻辑处理。
"""
if not torrent_list:
return []
payloads = [self.__build_rust_torrent_payload(torrent) for torrent in torrent_list]
media_payload = mediainfo.to_dict() if mediainfo and hasattr(mediainfo, "to_dict") else (
vars(mediainfo).copy() if mediainfo else None
)
result = rust_accel.filter_torrents(
rule_set=self.rule_set,
rule_strings=[group.rule_string for group in groups],
torrents=payloads,
media_info=media_payload,
)
if result is None:
return None
filtered_torrents = []
for index, pri_order in result:
torrent = torrent_list[int(index)]
torrent.pri_order = int(pri_order)
filtered_torrents.append(torrent)
return filtered_torrents
@staticmethod
def __build_rust_torrent_payload(torrent: TorrentInfo) -> dict:
"""
组装 Rust 过滤器需要的纯数据载荷,避免 Rust 直接依赖 Python 业务对象。
"""
payload = torrent.to_dict() if hasattr(torrent, "to_dict") else vars(torrent).copy()
payload["pub_minutes"] = torrent.pub_minutes()
if payload.get("size"):
meta = MetaInfo(title=torrent.title, subtitle=torrent.description)
payload["episode_count"] = meta.total_episode or 1
else:
payload["episode_count"] = 1
return payload
def __filter_torrents(self, rule_string: str, rule_name: str,
torrent_list: List[TorrentInfo],
mediainfo: MediaInfo,

View File

@@ -12,7 +12,6 @@ from pyquery import PyQuery
from app.core.config import settings
from app.log import logger
from app.schemas.types import MediaType
from app.utils import rust_accel
from app.utils.http import RequestUtils, AsyncRequestUtils
from app.utils.string import StringUtils
from app.utils.url import UrlUtils
@@ -96,19 +95,6 @@ class SiteSpider:
"""
获取搜索URL
"""
rust_url = rust_accel.build_indexer_search_url({
"search": self.search,
"batch": self.batch,
"browse": self.browse,
"category": self.category,
"domain": self.domain,
"keyword": self.keyword,
"mtype": self.mtype.value if self.mtype else None,
"cat": self.cat,
"page": self.page,
})
if rust_url:
return rust_url
# 种子搜索相对路径
paths = self.search.get('paths', [])
torrentspath = ""
@@ -753,16 +739,6 @@ class SiteSpider:
# 清空旧结果
self.torrents_info_array = []
rust_torrents = rust_accel.parse_indexer_torrents(
html_text=html_text,
domain=self.domain,
list_config=self.list,
fields=self.fields,
category=self.category,
result_num=int(self.result_num),
)
if rust_torrents is not None:
return rust_torrents
html_doc = None
try:
# 解析站点文本对象

View File

@@ -1,7 +1,6 @@
from typing import Any, Dict, List, Optional, Tuple
from typing import Optional
from app.log import logger
from app.schemas.types import MediaType
try:
import moviepilot_rust as _moviepilot_rust
@@ -26,64 +25,6 @@ def import_error() -> Optional[Exception]:
return _import_error
def is_anime(name: str) -> Optional[bool]:
"""
使用 Rust 快路径判断标题是否为动漫格式,不可用时返回 None。
"""
if not _moviepilot_rust:
return None
try:
return bool(_moviepilot_rust.is_anime_fast(name or ""))
except BaseException as err:
_raise_non_rust_panic(err)
logger.debug(f"Rust 动漫识别失败,回退 Python{err}")
return None
def find_metainfo(title: str) -> Optional[Tuple[str, Dict[str, Any]]]:
"""
使用 Rust 快路径提取标题中的内嵌媒体标签,不可用时返回 None。
"""
if not _moviepilot_rust:
return None
try:
result = _moviepilot_rust.find_metainfo_fast(title or "")
except BaseException as err:
_raise_non_rust_panic(err)
logger.debug(f"Rust 内嵌媒体标签识别失败,回退 Python{err}")
return None
metainfo = {
"tmdbid": result.get("tmdbid"),
"doubanid": result.get("doubanid"),
"type": _coerce_media_type(result.get("type")),
"begin_season": result.get("begin_season"),
"end_season": result.get("end_season"),
"total_season": result.get("total_season"),
"begin_episode": result.get("begin_episode"),
"end_episode": result.get("end_episode"),
"total_episode": result.get("total_episode"),
}
return result.get("title"), metainfo
def parse_video_title(
title: str,
isfile: bool = False,
media_exts: Optional[List[str]] = None,
) -> Optional[Dict[str, Any]]:
"""
使用 Rust 执行影视标题主识别流程,不可用时返回 None。
"""
if not _moviepilot_rust:
return None
try:
return _moviepilot_rust.parse_video_title_fast(title or "", isfile, media_exts or [])
except BaseException as err:
_raise_non_rust_panic(err)
logger.debug(f"Rust 影视标题主识别失败,回退 Python{err}")
return None
def parse_filter_rule(expression: str) -> Optional[list]:
"""
使用 Rust 解析过滤规则表达式,不可用时返回 None。
@@ -98,92 +39,6 @@ def parse_filter_rule(expression: str) -> Optional[list]:
return None
def filter_torrents(
rule_set: Dict[str, dict],
rule_strings: List[str],
torrents: List[dict],
media_info: Optional[dict] = None,
) -> Optional[list]:
"""
使用 Rust 批量执行种子过滤,不可用或不兼容时返回 None。
"""
if not _moviepilot_rust:
return None
try:
return _moviepilot_rust.filter_torrents_fast(rule_set, rule_strings, torrents, media_info)
except BaseException as err:
_raise_non_rust_panic(err)
logger.debug(f"Rust 种子过滤失败,回退 Python{err}")
return None
def build_indexer_search_url(config: dict) -> Optional[str]:
"""
使用 Rust 根据普通 indexer 配置生成搜索 URL不可用时返回 None。
"""
if not _moviepilot_rust:
return None
try:
return _moviepilot_rust.build_indexer_search_url_fast(config)
except BaseException as err:
_raise_non_rust_panic(err)
logger.debug(f"Rust 站点搜索 URL 生成失败,回退 Python{err}")
return None
def parse_indexer_torrents(
html_text: str,
domain: str,
list_config: dict,
fields: dict,
category: Optional[dict],
result_num: int,
) -> Optional[List[dict]]:
"""
使用 Rust 批量解析普通 indexer 页面,不支持的配置返回 None。
"""
if not _moviepilot_rust:
return None
try:
return _moviepilot_rust.parse_indexer_torrents_fast(
html_text or "",
domain or "",
list_config or {},
fields or {},
category,
int(result_num or 0),
)
except BaseException as err:
_raise_non_rust_panic(err)
logger.debug(f"Rust 站点页面解析失败,回退 Python{err}")
return None
def parse_rss_items(xml_text: str, max_items: int) -> Optional[List[dict]]:
"""
使用 Rust 批量解析 RSS/Atom 条目,不可用或解析失败时返回 None。
"""
if not _moviepilot_rust:
return None
try:
return _moviepilot_rust.parse_rss_items_fast(xml_text or "", int(max_items or 0))
except BaseException as err:
_raise_non_rust_panic(err)
logger.debug(f"Rust RSS 条目解析失败,回退 Python{err}")
return None
def _coerce_media_type(value: Optional[str]) -> Optional[MediaType]:
"""
将 Rust 返回的媒体类型字符串转换为系统 MediaType。
"""
if value == "movies":
return MediaType.MOVIE
if value == "tv":
return MediaType.TV
return None
def _raise_non_rust_panic(err: BaseException) -> None:
"""
只吞掉 Rust 扩展 panic/异常,保留用户中断和进程退出语义。

View File

@@ -2,272 +2,24 @@
# It is not intended for manual editing.
version = 4
[[package]]
name = "aho-corasick"
version = "1.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
dependencies = [
"memchr",
]
[[package]]
name = "autocfg"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
[[package]]
name = "bitflags"
version = "2.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3"
[[package]]
name = "byteorder"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
[[package]]
name = "cfg-if"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
[[package]]
name = "cssparser"
version = "0.35.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4e901edd733a1472f944a45116df3f846f54d37e67e68640ac8bb69689aca2aa"
dependencies = [
"cssparser-macros",
"dtoa-short",
"itoa",
"phf",
"smallvec",
]
[[package]]
name = "cssparser-macros"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331"
dependencies = [
"quote",
"syn",
]
[[package]]
name = "derive_more"
version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d751e9e49156b02b44f9c1815bcb94b984cdcc4396ecc32521c739452808b134"
dependencies = [
"derive_more-impl",
]
[[package]]
name = "derive_more-impl"
version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "799a97264921d8623a957f6c3b9011f3b5492f557bbb7a5a19b7fa6d06ba8dcb"
dependencies = [
"proc-macro2",
"quote",
"rustc_version",
"syn",
]
[[package]]
name = "displaydoc"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "dtoa"
version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c3cf4824e2d5f025c7b531afcb2325364084a16806f6d47fbc1f5fbd9960590"
[[package]]
name = "dtoa-short"
version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd1511a7b6a56299bd043a9c167a6d2bfb37bf84a6dfceaba651168adfb43c87"
dependencies = [
"dtoa",
]
[[package]]
name = "ego-tree"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2972feb8dffe7bc8c5463b1dacda1b0dfbed3710e50f977d965429692d74cd8"
[[package]]
name = "form_urlencoded"
version = "1.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf"
dependencies = [
"percent-encoding",
]
[[package]]
name = "futf"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843"
dependencies = [
"mac",
"new_debug_unreachable",
]
[[package]]
name = "fxhash"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
dependencies = [
"byteorder",
]
[[package]]
name = "getopts"
version = "0.2.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cfe4fbac503b8d1f88e6676011885f34b7174f46e59956bba534ba83abded4df"
dependencies = [
"unicode-width",
]
[[package]]
name = "heck"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "html5ever"
version = "0.35.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "55d958c2f74b664487a2035fe1dadb032c48718a03b63f3ab0b8537db8549ed4"
dependencies = [
"log",
"markup5ever",
"match_token",
]
[[package]]
name = "icu_collections"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2984d1cd16c883d7935b9e07e44071dca8d917fd52ecc02c04d5fa0b5a3f191c"
dependencies = [
"displaydoc",
"potential_utf",
"utf8_iter",
"yoke",
"zerofrom",
"zerovec",
]
[[package]]
name = "icu_locale_core"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92219b62b3e2b4d88ac5119f8904c10f8f61bf7e95b640d25ba3075e6cac2c29"
dependencies = [
"displaydoc",
"litemap",
"tinystr",
"writeable",
"zerovec",
]
[[package]]
name = "icu_normalizer"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c56e5ee99d6e3d33bd91c5d85458b6005a22140021cc324cea84dd0e72cff3b4"
dependencies = [
"icu_collections",
"icu_normalizer_data",
"icu_properties",
"icu_provider",
"smallvec",
"zerovec",
]
[[package]]
name = "icu_normalizer_data"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da3be0ae77ea334f4da67c12f149704f19f81d1adf7c51cf482943e84a2bad38"
[[package]]
name = "icu_properties"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bee3b67d0ea5c2cca5003417989af8996f8604e34fb9ddf96208a033901e70de"
dependencies = [
"icu_collections",
"icu_locale_core",
"icu_properties_data",
"icu_provider",
"zerotrie",
"zerovec",
]
[[package]]
name = "icu_properties_data"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e2bbb201e0c04f7b4b3e14382af113e17ba4f63e2c9d2ee626b720cbce54a14"
[[package]]
name = "icu_provider"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "139c4cf31c8b5f33d7e199446eff9c1e02decfc2f0eec2c8d71f65befa45b421"
dependencies = [
"displaydoc",
"icu_locale_core",
"writeable",
"yoke",
"zerofrom",
"zerotrie",
"zerovec",
]
[[package]]
name = "idna"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de"
dependencies = [
"idna_adapter",
"smallvec",
"utf8_iter",
]
[[package]]
name = "idna_adapter"
version = "1.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cb68373c0d6620ef8105e855e7745e18b0d00d3bdb07fb532e434244cdb9a714"
dependencies = [
"icu_normalizer",
"icu_properties",
]
[[package]]
name = "indoc"
version = "2.0.7"
@@ -277,79 +29,12 @@ dependencies = [
"rustversion",
]
[[package]]
name = "itoa"
version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682"
[[package]]
name = "libc"
version = "0.2.186"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66"
[[package]]
name = "litemap"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92daf443525c4cce67b150400bc2316076100ce0b3686209eb8cf3c31612e6f0"
[[package]]
name = "lock_api"
version = "0.4.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965"
dependencies = [
"scopeguard",
]
[[package]]
name = "log"
version = "0.4.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
[[package]]
name = "mac"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
[[package]]
name = "markup5ever"
version = "0.35.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "311fe69c934650f8f19652b3946075f0fc41ad8757dbb68f1ca14e7900ecc1c3"
dependencies = [
"log",
"tendril",
"web_atoms",
]
[[package]]
name = "match_token"
version = "0.35.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac84fd3f360fcc43dc5f5d186f02a94192761a080e8bc58621ad4d12296a58cf"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "memchr"
version = "2.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
[[package]]
name = "memo-map"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38d1115007560874e373613744c6fba374c17688327a71c1476d1a5954cc857b"
[[package]]
name = "memoffset"
version = "0.9.1"
@@ -359,144 +44,25 @@ dependencies = [
"autocfg",
]
[[package]]
name = "minijinja"
version = "2.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2929e494b2280e1e18959bb2e121da03347ae896896fdfaceaab43c88a02803f"
dependencies = [
"memo-map",
"serde",
]
[[package]]
name = "moviepilot-rust"
version = "0.1.0"
dependencies = [
"minijinja",
"once_cell",
"percent-encoding",
"pyo3",
"quick-xml",
"regex",
"scraper",
"url",
]
[[package]]
name = "new_debug_unreachable"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086"
[[package]]
name = "once_cell"
version = "1.21.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50"
[[package]]
name = "parking_lot"
version = "0.12.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a"
dependencies = [
"lock_api",
"parking_lot_core",
]
[[package]]
name = "parking_lot_core"
version = "0.9.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1"
dependencies = [
"cfg-if",
"libc",
"redox_syscall",
"smallvec",
"windows-link",
]
[[package]]
name = "percent-encoding"
version = "2.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
[[package]]
name = "phf"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078"
dependencies = [
"phf_macros",
"phf_shared",
]
[[package]]
name = "phf_codegen"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a"
dependencies = [
"phf_generator",
"phf_shared",
]
[[package]]
name = "phf_generator"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d"
dependencies = [
"phf_shared",
"rand",
]
[[package]]
name = "phf_macros"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216"
dependencies = [
"phf_generator",
"phf_shared",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "phf_shared"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5"
dependencies = [
"siphasher",
]
[[package]]
name = "portable-atomic"
version = "1.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49"
[[package]]
name = "potential_utf"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0103b1cef7ec0cf76490e969665504990193874ea05c85ff9bab8b911d0a0564"
dependencies = [
"zerovec",
]
[[package]]
name = "precomputed-hash"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
[[package]]
name = "proc-macro2"
version = "1.0.106"
@@ -569,15 +135,6 @@ dependencies = [
"syn",
]
[[package]]
name = "quick-xml"
version = "0.38.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b66c2058c55a409d601666cffe35f04333cf1013010882cec174a7467cd4e21c"
dependencies = [
"memchr",
]
[[package]]
name = "quote"
version = "1.0.45"
@@ -587,201 +144,12 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "rand"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ca0ecfa931c29007047d1bc58e623ab12e5590e8c7cc53200d5202b69266d8a"
dependencies = [
"rand_core",
]
[[package]]
name = "rand_core"
version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
[[package]]
name = "redox_syscall"
version = "0.5.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
dependencies = [
"bitflags",
]
[[package]]
name = "regex"
version = "1.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
[[package]]
name = "rustc_version"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92"
dependencies = [
"semver",
]
[[package]]
name = "rustversion"
version = "1.0.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
[[package]]
name = "scopeguard"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "scraper"
version = "0.24.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5f3a24d916e78954af99281a455168d4a9515d65eca99a18da1b813689c4ad9"
dependencies = [
"cssparser",
"ego-tree",
"getopts",
"html5ever",
"precomputed-hash",
"selectors",
"tendril",
]
[[package]]
name = "selectors"
version = "0.31.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5685b6ae43bfcf7d2e7dfcfb5d8e8f61b46442c902531e41a32a9a8bf0ee0fb6"
dependencies = [
"bitflags",
"cssparser",
"derive_more",
"fxhash",
"log",
"new_debug_unreachable",
"phf",
"phf_codegen",
"precomputed-hash",
"servo_arc",
"smallvec",
]
[[package]]
name = "semver"
version = "1.0.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd"
[[package]]
name = "serde"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
dependencies = [
"serde_core",
]
[[package]]
name = "serde_core"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "servo_arc"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "170fb83ab34de17dc69aa7c67482b22218ddb85da56546f9bd6b929e32a05930"
dependencies = [
"stable_deref_trait",
]
[[package]]
name = "siphasher"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ee5873ec9cce0195efcb7a4e9507a04cd49aec9c83d0389df45b1ef7ba2e649"
[[package]]
name = "smallvec"
version = "1.15.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
[[package]]
name = "stable_deref_trait"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596"
[[package]]
name = "string_cache"
version = "0.8.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf776ba3fa74f83bf4b63c3dcbbf82173db2632ed8452cb2d891d33f459de70f"
dependencies = [
"new_debug_unreachable",
"parking_lot",
"phf_shared",
"precomputed-hash",
"serde",
]
[[package]]
name = "string_cache_codegen"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c711928715f1fe0fe509c53b43e993a9a557babc2d0a3567d0a3006f1ac931a0"
dependencies = [
"phf_generator",
"phf_shared",
"proc-macro2",
"quote",
]
[[package]]
name = "syn"
version = "2.0.117"
@@ -793,183 +161,20 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "synstructure"
version = "0.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "target-lexicon"
version = "0.12.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1"
[[package]]
name = "tendril"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0"
dependencies = [
"futf",
"mac",
"utf-8",
]
[[package]]
name = "tinystr"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8323304221c2a851516f22236c5722a72eaa19749016521d6dff0824447d96d"
dependencies = [
"displaydoc",
"zerovec",
]
[[package]]
name = "unicode-ident"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
[[package]]
name = "unicode-width"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254"
[[package]]
name = "unindent"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3"
[[package]]
name = "url"
version = "2.5.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed"
dependencies = [
"form_urlencoded",
"idna",
"percent-encoding",
"serde",
]
[[package]]
name = "utf-8"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
[[package]]
name = "utf8_iter"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
[[package]]
name = "web_atoms"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57ffde1dc01240bdf9992e3205668b235e59421fd085e8a317ed98da0178d414"
dependencies = [
"phf",
"phf_codegen",
"string_cache",
"string_cache_codegen",
]
[[package]]
name = "windows-link"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
[[package]]
name = "writeable"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ffae5123b2d3fc086436f8834ae3ab053a283cfac8fe0a0b8eaae044768a4c4"
[[package]]
name = "yoke"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "abe8c5fda708d9ca3df187cae8bfb9ceda00dd96231bed36e445a1a48e66f9ca"
dependencies = [
"stable_deref_trait",
"yoke-derive",
"zerofrom",
]
[[package]]
name = "yoke-derive"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "de844c262c8848816172cef550288e7dc6c7b7814b4ee56b3e1553f275f1858e"
dependencies = [
"proc-macro2",
"quote",
"syn",
"synstructure",
]
[[package]]
name = "zerofrom"
version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ec05a11813ea801ff6d75110ad09cd0824ddba17dfe17128ea0d5f68e6c5272"
dependencies = [
"zerofrom-derive",
]
[[package]]
name = "zerofrom-derive"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "11532158c46691caf0f2593ea8358fed6bbf68a0315e80aae9bd41fbade684a1"
dependencies = [
"proc-macro2",
"quote",
"syn",
"synstructure",
]
[[package]]
name = "zerotrie"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0f9152d31db0792fa83f70fb2f83148effb5c1f5b8c7686c3459e361d9bc20bf"
dependencies = [
"displaydoc",
"yoke",
"zerofrom",
]
[[package]]
name = "zerovec"
version = "0.11.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "90f911cbc359ab6af17377d242225f4d75119aec87ea711a880987b18cd7b239"
dependencies = [
"yoke",
"zerofrom",
"zerovec-derive",
]
[[package]]
name = "zerovec-derive"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "625dc425cab0dca6dc3c3319506e6593dcb08a9f387ea3b284dbd52a92c40555"
dependencies = [
"proc-macro2",
"quote",
"syn",
]

View File

@@ -8,11 +8,4 @@ name = "moviepilot_rust"
crate-type = ["cdylib"]
[dependencies]
minijinja = "2.20"
once_cell = "1.20"
percent-encoding = "2.3"
pyo3 = { version = "0.23", features = ["abi3-py311", "extension-module"] }
quick-xml = "0.38"
regex = "1.11"
scraper = "0.24"
url = "2.5"

View File

@@ -1,9 +1,6 @@
use crate::utils::{get_optional_f64, get_optional_i64, get_optional_string};
use pyo3::exceptions::PyValueError;
use pyo3::prelude::*;
use pyo3::types::{PyDict, PyList, PyString};
use regex::{Regex, RegexBuilder};
use std::collections::HashMap;
use pyo3::types::{PyList, PyString};
#[derive(Clone, Debug)]
enum RuleExpr {
@@ -22,27 +19,6 @@ enum Token {
LParen,
RParen,
}
#[derive(Clone, Debug)]
struct TorrentPayload {
index: usize,
title: String,
description: String,
labels: Vec<String>,
size: f64,
seeders: i64,
downloadvolumefactor: Option<f64>,
pub_minutes: f64,
episode_count: f64,
fields: HashMap<String, FieldValue>,
}
#[derive(Clone, Debug)]
enum FieldValue {
Scalar(String),
List(Vec<String>),
}
#[pyfunction]
pub(crate) fn parse_filter_rule_fast(py: Python<'_>, expression: &str) -> PyResult<PyObject> {
let tokens = tokenize_rule(expression)?;
@@ -56,85 +32,6 @@ pub(crate) fn parse_filter_rule_fast(py: Python<'_>, expression: &str) -> PyResu
Ok(outer.into())
}
/// 批量执行种子过滤规则,返回保留项的原始下标和优先级。
#[pyfunction]
#[pyo3(signature = (rule_set, rule_strings, torrents, media_info=None))]
pub(crate) fn filter_torrents_fast(
py: Python<'_>,
rule_set: &Bound<'_, PyDict>,
rule_strings: Vec<String>,
torrents: &Bound<'_, PyList>,
media_info: Option<&Bound<'_, PyDict>>,
) -> PyResult<PyObject> {
py.allow_threads(|| {});
let mut payloads = Vec::with_capacity(torrents.len());
for index in 0..torrents.len() {
let item = torrents.get_item(index)?;
let dict = item.downcast::<PyDict>()?;
payloads.push(TorrentPayload::from_py_dict(index, dict)?);
}
let mut expr_cache: HashMap<String, RuleExpr> = HashMap::new();
let mut regex_cache: HashMap<String, Regex> = HashMap::new();
let mut current_indices: Vec<usize> = (0..payloads.len()).collect();
let mut priorities: HashMap<usize, i64> = HashMap::new();
for rule_string in rule_strings {
if current_indices.is_empty() {
break;
}
let levels: Vec<String> = rule_string
.split('>')
.map(|level| level.trim().to_string())
.collect();
let mut retained = Vec::new();
for payload_index in &current_indices {
let payload = &payloads[*payload_index];
let mut res_order = 100_i64;
let mut matched = false;
for level in &levels {
let expr = if let Some(cached) = expr_cache.get(level) {
cached.clone()
} else {
let parsed = parse_rule_expression(level)?;
expr_cache.insert(level.clone(), parsed.clone());
parsed
};
if match_expr(&expr, payload, rule_set, media_info, &mut regex_cache)? {
matched = true;
priorities.insert(payload.index, res_order);
break;
}
res_order -= 1;
}
if matched {
retained.push(*payload_index);
}
}
current_indices = retained;
}
let result = PyList::empty(py);
for payload_index in current_indices {
let payload = &payloads[payload_index];
result.append((
payload.index,
priorities.get(&payload.index).copied().unwrap_or(0),
))?;
}
Ok(result.into())
}
fn parse_rule_expression(expression: &str) -> PyResult<RuleExpr> {
let tokens = tokenize_rule(expression)?;
let mut parser = RuleParserState::new(tokens);
let expr = parser.parse_expression()?;
if parser.has_remaining() {
return Err(PyValueError::new_err("规则表达式包含无法解析的剩余内容"));
}
Ok(expr)
}
/// 将规则字符串切分为名称、逻辑符和括号。
fn tokenize_rule(expression: &str) -> PyResult<Vec<Token>> {
let chars: Vec<char> = expression.chars().collect();
@@ -325,316 +222,3 @@ fn expr_binary_to_py(
list.append(expr_to_py(py, right)?)?;
Ok(list.into())
}
impl TorrentPayload {
/// 从 Python 字典构造 Rust 过滤载荷。
fn from_py_dict(index: usize, dict: &Bound<'_, PyDict>) -> PyResult<Self> {
let title = get_optional_string(dict, "title")?.unwrap_or_default();
let description = get_optional_string(dict, "description")?.unwrap_or_default();
let labels = get_string_list(dict, "labels")?;
let size = get_optional_f64(dict, "size")?.unwrap_or(0.0);
let seeders = get_optional_i64(dict, "seeders")?.unwrap_or(0);
let downloadvolumefactor = get_optional_f64(dict, "downloadvolumefactor")?;
let pub_minutes = get_optional_f64(dict, "pub_minutes")?.unwrap_or(0.0);
let episode_count = get_optional_f64(dict, "episode_count")?
.unwrap_or(1.0)
.max(1.0);
let mut fields = HashMap::new();
for (key, value) in dict.iter() {
let key = key.extract::<String>()?;
if value.is_none() {
continue;
}
if let Ok(values) = value.extract::<Vec<String>>() {
fields.insert(key, FieldValue::List(values));
} else {
fields.insert(key, FieldValue::Scalar(value.str()?.to_str()?.to_string()));
}
}
Ok(Self {
index,
title,
description,
labels,
size,
seeders,
downloadvolumefactor,
pub_minutes,
episode_count,
fields,
})
}
/// 返回指定字段的匹配文本。
fn content_for_matches(&self, match_fields: &[String]) -> String {
if match_fields.is_empty() {
return format!(
"{} {} {}",
self.title,
self.description,
self.labels.join(" ")
);
}
let mut parts = Vec::new();
for field in match_fields {
if let Some(value) = self.fields.get(field) {
match value {
FieldValue::Scalar(text) => {
if !text.is_empty() {
parts.push(text.clone());
}
}
FieldValue::List(values) => {
parts.extend(values.iter().filter(|v| !v.is_empty()).cloned())
}
}
}
}
parts.join(" ")
}
}
/// 从 Python 字典读取字符串列表。
fn get_string_list(dict: &Bound<'_, PyDict>, key: &str) -> PyResult<Vec<String>> {
let Some(value) = dict.get_item(key)? else {
return Ok(Vec::new());
};
if value.is_none() {
return Ok(Vec::new());
}
if let Ok(values) = value.extract::<Vec<String>>() {
return Ok(values);
}
Ok(vec![value.str()?.to_str()?.to_string()])
}
/// 执行规则 AST 匹配。
fn match_expr(
expr: &RuleExpr,
torrent: &TorrentPayload,
rule_set: &Bound<'_, PyDict>,
media_info: Option<&Bound<'_, PyDict>>,
regex_cache: &mut HashMap<String, Regex>,
) -> PyResult<bool> {
match expr {
RuleExpr::Name(name) => match_rule(name, torrent, rule_set, media_info, regex_cache),
RuleExpr::Not(inner) => Ok(!match_expr(
inner,
torrent,
rule_set,
media_info,
regex_cache,
)?),
RuleExpr::And(left, right) => {
Ok(
match_expr(left, torrent, rule_set, media_info, regex_cache)?
&& match_expr(right, torrent, rule_set, media_info, regex_cache)?,
)
}
RuleExpr::Or(left, right) => {
Ok(
match_expr(left, torrent, rule_set, media_info, regex_cache)?
|| match_expr(right, torrent, rule_set, media_info, regex_cache)?,
)
}
}
}
/// 执行单条规则匹配。
fn match_rule(
rule_name: &str,
torrent: &TorrentPayload,
rule_set: &Bound<'_, PyDict>,
media_info: Option<&Bound<'_, PyDict>>,
regex_cache: &mut HashMap<String, Regex>,
) -> PyResult<bool> {
let Some(rule_obj) = rule_set.get_item(rule_name)? else {
return Ok(false);
};
let rule = rule_obj.downcast::<PyDict>()?;
if let Some(tmdb_obj) = rule.get_item("tmdb")? {
if !tmdb_obj.is_none() {
if let Ok(tmdb) = tmdb_obj.downcast::<PyDict>() {
if match_tmdb(tmdb, media_info)? {
return Ok(true);
}
}
}
}
let match_fields = get_string_list(rule, "match")?;
let content = torrent.content_for_matches(&match_fields);
let includes = get_string_list(rule, "include")?;
let excludes = get_string_list(rule, "exclude")?;
if !includes.is_empty() {
let mut included = false;
for pattern in &includes {
if regex_search(pattern, &content, regex_cache)? {
included = true;
break;
}
}
if !included {
return Ok(false);
}
}
for exclude in excludes {
if regex_search(&exclude, &content, regex_cache)? {
return Ok(false);
}
}
if let Some(size_range) = get_optional_string(rule, "size_range")? {
if !match_size(torrent, &size_range)? {
return Ok(false);
}
}
if let Some(seeders) = get_optional_i64(rule, "seeders")? {
if torrent.seeders < seeders {
return Ok(false);
}
}
if let Some(downloadvolumefactor) = get_optional_f64(rule, "downloadvolumefactor")? {
if torrent.downloadvolumefactor != Some(downloadvolumefactor) {
return Ok(false);
}
}
if let Some(pubdate) = get_optional_string(rule, "publish_time")? {
if !match_publish_time(torrent.pub_minutes, &pubdate) {
return Ok(false);
}
}
Ok(true)
}
/// 使用带缓存的忽略大小写正则搜索。
fn regex_search(
pattern: &str,
content: &str,
cache: &mut HashMap<String, Regex>,
) -> PyResult<bool> {
if !cache.contains_key(pattern) {
let regex = RegexBuilder::new(pattern)
.case_insensitive(true)
.build()
.map_err(|err| PyValueError::new_err(err.to_string()))?;
cache.insert(pattern.to_string(), regex);
}
Ok(cache
.get(pattern)
.is_some_and(|regex| regex.is_match(content)))
}
/// 匹配 TMDB 媒体属性规则。
fn match_tmdb(tmdb: &Bound<'_, PyDict>, media_info: Option<&Bound<'_, PyDict>>) -> PyResult<bool> {
let Some(media) = media_info else {
return Ok(false);
};
for (attr, value) in tmdb.iter() {
if value.is_none() {
continue;
}
let attr_name = attr.extract::<String>()?;
let expected = value.str()?.to_str()?.to_string();
if expected.is_empty() {
continue;
}
let info_values = media_values(media, &attr_name)?;
if info_values.is_empty() {
return Ok(false);
}
let expected_values: Vec<String> = expected
.split(',')
.filter(|item| !item.is_empty())
.map(|item| item.to_uppercase())
.collect();
if !expected_values.iter().any(|expected_item| {
info_values
.iter()
.any(|info_item| info_item == expected_item)
}) {
return Ok(false);
}
}
Ok(true)
}
/// 获取媒体属性的可比较字符串集合。
fn media_values(media: &Bound<'_, PyDict>, attr_name: &str) -> PyResult<Vec<String>> {
let Some(value) = media.get_item(attr_name)? else {
return Ok(Vec::new());
};
if value.is_none() {
return Ok(Vec::new());
}
if attr_name == "production_countries" {
let Ok(items) = value.downcast::<PyList>() else {
return Ok(Vec::new());
};
let mut values = Vec::new();
for item in items.iter() {
if let Ok(dict) = item.downcast::<PyDict>() {
if let Some(country) = dict.get_item("iso_3166_1")? {
values.push(country.str()?.to_str()?.to_uppercase());
}
}
}
return Ok(values);
}
if let Ok(items) = value.extract::<Vec<String>>() {
return Ok(items.into_iter().map(|item| item.to_uppercase()).collect());
}
Ok(vec![value.str()?.to_str()?.to_uppercase()])
}
/// 按每集大小匹配大小范围规则。
fn match_size(torrent: &TorrentPayload, size_range: &str) -> PyResult<bool> {
let torrent_size = torrent.size / torrent.episode_count;
let size_range = size_range.trim();
let unit = 1024.0 * 1024.0;
if let Some((min, max)) = size_range.split_once('-') {
let min = min
.trim()
.parse::<f64>()
.map_err(|err| PyValueError::new_err(err.to_string()))?
* unit;
let max = max
.trim()
.parse::<f64>()
.map_err(|err| PyValueError::new_err(err.to_string()))?
* unit;
return Ok(min <= torrent_size && torrent_size <= max);
}
if let Some(min) = size_range.strip_prefix('>') {
let min = min
.trim()
.parse::<f64>()
.map_err(|err| PyValueError::new_err(err.to_string()))?
* unit;
return Ok(torrent_size >= min);
}
if let Some(max) = size_range.strip_prefix('<') {
let max = max
.trim()
.parse::<f64>()
.map_err(|err| PyValueError::new_err(err.to_string()))?
* unit;
return Ok(torrent_size <= max);
}
Ok(false)
}
/// 匹配发布时间分钟数规则。
fn match_publish_time(pub_minutes: f64, publish_time: &str) -> bool {
let values: Vec<f64> = publish_time
.split('-')
.filter_map(|item| item.parse::<f64>().ok())
.collect();
if values.len() == 1 {
return pub_minutes >= values[0];
}
if values.len() >= 2 {
return values[0] <= pub_minutes && pub_minutes <= values[1];
}
true
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,8 +1,4 @@
mod filter;
mod indexer;
mod meta;
mod rss;
mod utils;
use pyo3::prelude::*;
@@ -16,13 +12,6 @@ fn is_available() -> bool {
#[pymodule]
fn moviepilot_rust(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_function(wrap_pyfunction!(is_available, m)?)?;
m.add_function(wrap_pyfunction!(meta::is_anime_fast, m)?)?;
m.add_function(wrap_pyfunction!(meta::find_metainfo_fast, m)?)?;
m.add_function(wrap_pyfunction!(meta::parse_video_title_fast, m)?)?;
m.add_function(wrap_pyfunction!(filter::parse_filter_rule_fast, m)?)?;
m.add_function(wrap_pyfunction!(filter::filter_torrents_fast, m)?)?;
m.add_function(wrap_pyfunction!(indexer::build_indexer_search_url_fast, m)?)?;
m.add_function(wrap_pyfunction!(indexer::parse_indexer_torrents_fast, m)?)?;
m.add_function(wrap_pyfunction!(rss::parse_rss_items_fast, m)?)?;
Ok(())
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,204 +0,0 @@
use pyo3::prelude::*;
use pyo3::types::PyDict;
use quick_xml::events::{BytesStart, Event};
use quick_xml::name::QName;
use quick_xml::Reader;
#[derive(Default)]
struct RssItem {
title: String,
description: String,
link: String,
enclosure: String,
size: i64,
pubdate: String,
nickname: String,
}
impl RssItem {
fn has_output(&self) -> bool {
!self.title.is_empty() && (!self.enclosure.is_empty() || !self.link.is_empty())
}
}
#[derive(Clone, Copy, PartialEq, Eq)]
enum TextField {
Title,
Description,
Link,
Pubdate,
Nickname,
}
/// 批量解析 RSS/Atom 条目,返回 Python 侧后续处理需要的核心字段。
#[pyfunction]
pub(crate) fn parse_rss_items_fast(
py: Python<'_>,
xml_text: &str,
max_items: usize,
) -> PyResult<Option<Vec<PyObject>>> {
let mut reader = Reader::from_str(xml_text);
reader.config_mut().trim_text(true);
let mut items = Vec::new();
let mut current_item: Option<RssItem> = None;
let mut current_field: Option<TextField> = None;
let mut item_depth = 0usize;
let mut parse_failed = false;
loop {
match reader.read_event() {
Ok(Event::Start(event)) => {
let local = local_name(event.name());
if current_item.is_none() && (local == "item" || local == "entry") {
current_item = Some(RssItem::default());
current_field = None;
item_depth = 1;
continue;
}
if let Some(item) = current_item.as_mut() {
item_depth += 1;
match local.as_str() {
"title" => current_field = Some(TextField::Title),
"description" | "summary" => current_field = Some(TextField::Description),
"pubDate" | "published" | "updated" => current_field = Some(TextField::Pubdate),
"creator" => current_field = Some(TextField::Nickname),
"link" => {
current_field = Some(TextField::Link);
if item.link.is_empty() {
if let Some(href) = attr_value(&event, QName(b"href")) {
item.link = href;
}
}
}
"enclosure" => {
if let Some(url) = attr_value(&event, QName(b"url")) {
item.enclosure = url;
}
if let Some(length) = attr_value(&event, QName(b"length")) {
item.size = length.parse::<i64>().unwrap_or(0);
}
}
_ => {}
}
}
}
Ok(Event::Empty(event)) => {
if let Some(item) = current_item.as_mut() {
match local_name(event.name()).as_str() {
"link" => {
if item.link.is_empty() {
if let Some(href) = attr_value(&event, QName(b"href")) {
item.link = href;
}
}
}
"enclosure" => {
if let Some(url) = attr_value(&event, QName(b"url")) {
item.enclosure = url;
}
if let Some(length) = attr_value(&event, QName(b"length")) {
item.size = length.parse::<i64>().unwrap_or(0);
}
}
_ => {}
}
}
}
Ok(Event::Text(event)) => {
if let (Some(item), Some(field)) = (current_item.as_mut(), current_field) {
if let Ok(value) = event.decode() {
append_field(item, field, value.as_ref());
}
}
}
Ok(Event::CData(event)) => {
if let (Some(item), Some(field)) = (current_item.as_mut(), current_field) {
if let Ok(value) = event.decode() {
append_field(item, field, value.as_ref());
}
}
}
Ok(Event::End(event)) => {
if current_item.is_some() {
let local = local_name(event.name());
if local == "item" || local == "entry" {
let mut item = current_item.take().unwrap_or_default();
if item.enclosure.is_empty() && !item.link.is_empty() {
item.enclosure = item.link.clone();
}
if item.has_output() {
items.push(item_to_py(py, &item)?.into_any().unbind());
if items.len() >= max_items {
break;
}
}
current_field = None;
item_depth = 0;
} else {
item_depth = item_depth.saturating_sub(1);
if item_depth <= 1 {
current_field = None;
}
}
}
}
Ok(Event::Eof) => break,
Err(_) => {
parse_failed = true;
break;
}
_ => {}
}
}
if parse_failed && items.is_empty() {
Ok(None)
} else {
Ok(Some(items))
}
}
/// 将内部 RSS 条目结构转换为 Python 字典。
fn item_to_py<'py>(py: Python<'py>, item: &RssItem) -> PyResult<Bound<'py, PyDict>> {
let dict = PyDict::new(py);
dict.set_item("title", item.title.trim())?;
dict.set_item("enclosure", item.enclosure.trim())?;
dict.set_item("size", item.size)?;
dict.set_item("description", item.description.trim())?;
dict.set_item("link", item.link.trim())?;
dict.set_item("pubdate_raw", item.pubdate.trim())?;
if !item.nickname.trim().is_empty() {
dict.set_item("nickname", item.nickname.trim())?;
}
Ok(dict)
}
/// 返回 XML 名称去掉命名空间前缀后的本地名称。
fn local_name(name: QName<'_>) -> String {
let raw = name.as_ref();
let local = raw.rsplit(|byte| *byte == b':').next().unwrap_or(raw);
std::str::from_utf8(local).unwrap_or("").to_string()
}
/// 读取 XML 节点属性并完成实体反转义。
fn attr_value(event: &BytesStart<'_>, name: QName<'_>) -> Option<String> {
event
.try_get_attribute(name)
.ok()
.flatten()
.and_then(|attr| attr.decode_and_unescape_value(event.decoder()).ok())
.map(|value| value.into_owned())
}
/// 追加当前文本节点到对应 RSS 字段。
fn append_field(item: &mut RssItem, field: TextField, value: &str) {
let target = match field {
TextField::Title => &mut item.title,
TextField::Description => &mut item.description,
TextField::Link => &mut item.link,
TextField::Pubdate => &mut item.pubdate,
TextField::Nickname => &mut item.nickname,
};
target.push_str(value);
}

View File

@@ -1,103 +0,0 @@
use pyo3::exceptions::PyValueError;
use pyo3::prelude::*;
use pyo3::types::{PyAny, PyDict};
use regex::Regex;
/// 捕获正则第一组并转换为整数。
pub(crate) fn capture_i64(regex: &Regex, text: &str) -> Option<i64> {
regex
.captures(text)
.and_then(|caps| caps.get(1))
.and_then(|value| value.as_str().parse::<i64>().ok())
}
/// 捕获正则所有分组中的整数,用于 S01E02 和范围类 token 的多值识别。
pub(crate) fn capture_all_i64(regex: &Regex, text: &str) -> Vec<i64> {
let mut values = Vec::new();
for caps in regex.captures_iter(text) {
for item in caps.iter().skip(1).flatten() {
if let Ok(value) = item.as_str().parse::<i64>() {
values.push(value);
break;
}
}
}
values
}
/// 计算范围的开始、结束和总数,保持 Python 侧的倒序交换语义。
pub(crate) fn apply_range_total(
mut begin: Option<i64>,
mut end: Option<i64>,
) -> (Option<i64>, Option<i64>, Option<i64>) {
let total = match (begin, end) {
(Some(begin_value), Some(end_value)) => {
if begin_value > end_value {
begin = Some(end_value);
end = Some(begin_value);
Some(begin_value - end_value + 1)
} else {
Some(end_value - begin_value + 1)
}
}
(Some(_), None) => Some(1),
_ => None,
};
(begin, end, total)
}
/// 将 Python 对象转换为 usize用于过滤器下标。
pub(crate) fn py_i64_to_usize(value: &Bound<'_, PyAny>) -> PyResult<usize> {
let index = value.extract::<i64>()?;
if index < 0 {
return Err(PyValueError::new_err("下标不能为负数"));
}
Ok(index as usize)
}
/// 从 Python 字典读取可选字符串。
pub(crate) fn get_optional_string(dict: &Bound<'_, PyDict>, key: &str) -> PyResult<Option<String>> {
let Some(value) = dict.get_item(key)? else {
return Ok(None);
};
if value.is_none() {
return Ok(None);
}
Ok(Some(value.str()?.to_str()?.to_string()))
}
/// 从 Python 字典读取可选整数。
pub(crate) fn get_optional_i64(dict: &Bound<'_, PyDict>, key: &str) -> PyResult<Option<i64>> {
let Some(value) = dict.get_item(key)? else {
return Ok(None);
};
if value.is_none() {
return Ok(None);
}
if let Ok(parsed) = value.extract::<i64>() {
return Ok(Some(parsed));
}
let text = value.str()?.to_str()?.trim().to_string();
if text.is_empty() {
return Ok(None);
}
Ok(text.parse::<i64>().ok())
}
/// 从 Python 字典读取可选浮点数。
pub(crate) fn get_optional_f64(dict: &Bound<'_, PyDict>, key: &str) -> PyResult<Option<f64>> {
let Some(value) = dict.get_item(key)? else {
return Ok(None);
};
if value.is_none() {
return Ok(None);
}
if let Ok(parsed) = value.extract::<f64>() {
return Ok(Some(parsed));
}
let text = value.str()?.to_str()?.trim().to_string();
if text.is_empty() {
return Ok(None);
}
Ok(text.parse::<f64>().ok())
}

View File

@@ -1,9 +1,5 @@
import pytest
from app.core.context import TorrentInfo
from app.modules.filter import FilterModule
from app.modules.indexer.spider import SiteSpider
from app.schemas.types import MediaType
from app.utils import rust_accel
@@ -13,445 +9,19 @@ pytestmark = pytest.mark.skipif(
)
def test_rust_metainfo_fast_path_extracts_emby_override():
def test_rust_filter_rule_parser_matches_boolean_semantics():
"""
Rust 内嵌媒体标签识别应保持 Emby tmdbid 标签优先级
Rust 过滤规则解析应保持 pyparsing 的布尔表达式结构
"""
title, metainfo = rust_accel.find_metainfo("Movie {[tmdbid=111;type=movies]} [tmdbid=222]")
result = rust_accel.parse_filter_rule("HDR & !BLU")
assert title == "Movie"
assert metainfo["tmdbid"] == "222"
assert metainfo["type"] == MediaType.MOVIE
assert result == [["HDR", "and", ["not", "BLU"]]]
def test_rust_video_title_fast_path_extracts_common_resource_fields():
def test_rust_filter_rule_parser_handles_parentheses_and_or():
"""
Rust 影视标题预解析应能提取常见资源字段
Rust 过滤规则解析应保持括号、与、或的优先级语义
"""
result = rust_accel.parse_video_title(
"The 355 2022 BluRay 1080p DTS-HD MA5.1 X265.10bit 60FPS"
)
result = rust_accel.parse_filter_rule("CNSUB & (4K | 1080P) & !BLU")
assert result["year"] == "2022"
assert result["resource_pix"] == "1080p"
assert result["resource_type"] == "BluRay"
assert result["video_encode"] == "x265 10bit"
assert result["video_bit"] == "10bit"
assert result["fps"] == 60
def test_rust_filter_fast_path_matches_priority_semantics():
"""
Rust 批量过滤应保持优先级和布尔表达式语义。
"""
module = FilterModule()
module.rule_set = {
"HDR": {"include": "HDR"},
"DV": {"include": "DOVI"},
"BLU": {"include": "BluRay"},
}
torrents = [
TorrentInfo(title="Movie HDR WEB-DL", description=""),
TorrentInfo(title="Movie DOVI", description=""),
TorrentInfo(title="Movie HDR BluRay", description=""),
]
result = module._FilterModule__filter_torrents_by_rust( # noqa: SLF001
groups=[type("RuleGroup", (), {"rule_string": "HDR & !BLU > DV"})()],
torrent_list=torrents,
mediainfo=None,
)
assert result == torrents[:2]
assert result[0].pri_order == 100
assert result[1].pri_order == 99
def test_rust_indexer_search_url_keeps_existing_query_and_category():
"""
Rust URL 生成应保留路径原有查询参数并应用分类参数。
"""
spider = SiteSpider(
indexer={
"id": "ttg",
"name": "TTG",
"domain": "https://totheglory.im/",
"search": {
"paths": [{"path": "browse.php?c=M"}],
"params": {"search_field": "{keyword}", "c": "M"},
"imdbid_format": "imdb{imdbid_num}",
},
"category": {
"field": "search_field",
"delimiter": " 分类:",
"movie": [{"id": "电影DVDRip", "cat": "Movies/SD"}],
},
"torrents": {"list": {}, "fields": {}},
},
keyword="tt0049406",
mtype=MediaType.MOVIE,
)
search_url = spider._SiteSpider__get_search_url() # noqa: SLF001
assert search_url.count("?") == 1
assert "c=M" in search_url
assert "search_field=imdb0049406" in search_url
def test_rust_rss_parser_extracts_common_rss_and_atom_fields():
"""
Rust RSS 解析应同时覆盖 RSS item 和 Atom entry 的核心字段。
"""
xml_text = """
<rss><channel>
<item>
<title>Example Torrent</title>
<description><![CDATA[Desc]]></description>
<link>https://example.org/details/1</link>
<enclosure url="https://example.org/download/1.torrent" length="1024" />
<pubDate>Tue, 19 May 2026 08:30:00 GMT</pubDate>
<dc:creator>豆瓣用户</dc:creator>
</item>
<entry>
<title>Atom Torrent</title>
<summary>Atom Desc</summary>
<link href="https://example.org/atom/2" />
<updated>2026-05-19T09:30:00Z</updated>
</entry>
</channel></rss>
"""
items = rust_accel.parse_rss_items(xml_text, 100)
assert items[0]["title"] == "Example Torrent"
assert items[0]["enclosure"] == "https://example.org/download/1.torrent"
assert items[0]["size"] == 1024
assert items[0]["nickname"] == "豆瓣用户"
assert items[1]["title"] == "Atom Torrent"
assert items[1]["enclosure"] == "https://example.org/atom/2"
def test_rust_indexer_page_parser_handles_common_fields():
"""
Rust 普通 indexer 页面解析应批量提取列表行核心字段。
"""
spider = SiteSpider(
indexer={
"id": "demo",
"name": "Demo",
"domain": "https://example.org/",
"search": {"paths": [{"path": "torrents.php"}]},
"category": {
"movie": [{"id": "401"}],
"tv": [{"id": "402"}],
},
"torrents": {
"list": {"selector": "tr.torrent"},
"fields": {
"title": {"selector": "a.title"},
"description": {"selector": ".desc"},
"details": {"selector": "a.title", "attribute": "href"},
"download": {"selector": "a.dl", "attribute": "href"},
"size": {"selector": ".size"},
"seeders": {"selector": ".seeders"},
"leechers": {"selector": ".leechers"},
"grabs": {"selector": ".grabs"},
"downloadvolumefactor": {"case": {".free": 0}},
"uploadvolumefactor": {"selector": ".up"},
"labels": {"selector": ".label"},
"hr": {"selector": ".hr"},
"category": {"selector": ".cat"},
},
},
},
)
html = """
<table>
<tr class="torrent">
<td><a class="title" href="/details/1">Movie 2024 1080p</a><span class="desc">BluRay</span></td>
<td><a class="dl" href="/download/1">DL</a></td>
<td class="size">1.5 GB</td><td class="seeders">1,234</td><td class="leechers">5/10</td>
<td class="grabs">42</td><td class="free">Free</td><td class="up">2x</td>
<td><span class="label">DIY</span><span class="label">HDR</span></td>
<td class="hr">H&R</td><td class="cat">401</td>
</tr>
</table>
"""
torrents = spider.parse(html)
assert torrents == [{
"title": "Movie 2024 1080p",
"description": "BluRay",
"page_url": "https://example.org/details/1",
"enclosure": "https://example.org/download/1",
"size": 1610612736,
"seeders": 1234,
"peers": 5,
"grabs": 42,
"downloadvolumefactor": 0,
"uploadvolumefactor": 2,
"labels": ["DIY", "HDR"],
"hit_and_run": True,
"category": MediaType.MOVIE.value,
}]
def test_rust_indexer_page_parser_renders_common_title_template():
"""
Rust 普通 indexer 页面解析应兼容站点构建项目里的 title_optional 模板。
"""
spider = SiteSpider(
indexer={
"id": "demo",
"name": "Demo",
"domain": "https://example.org/",
"search": {"paths": [{"path": "torrents.php"}]},
"torrents": {
"list": {"selector": "tr.torrent"},
"fields": {
"title_default": {"selector": "a.title"},
"title_optional": {
"selector": "a.title",
"attribute": "title",
"optional": True,
},
"title": {
"text": (
"{% if fields['title_optional'] %}"
"{{ fields['title_optional'] }}"
"{% else %}"
"{{ fields['title_default'] }}"
"{% endif %}"
)
},
"download": {"selector": "a.dl", "attribute": "href"},
},
},
},
)
html = """
<table>
<tr class="torrent">
<td><a class="title" title="Optional Name" href="/details/1">Default Name</a></td>
<td><a class="dl" href="/download/1">DL</a></td>
</tr>
<tr class="torrent">
<td><a class="title" title="" href="/details/2">Default Fallback</a></td>
<td><a class="dl" href="/download/2">DL</a></td>
</tr>
</table>
"""
torrents = spider.parse(html)
assert [item["title"] for item in torrents] == ["Optional Name", "Default Fallback"]
def test_rust_indexer_page_parser_renders_literal_title_template_without_default_field():
"""
Rust 普通 indexer 页面解析应在没有 title_default 时渲染 title_optional 的纯文本兜底模板。
"""
spider = SiteSpider(
indexer={
"id": "demo",
"name": "Demo",
"domain": "https://example.org/",
"search": {"paths": [{"path": "torrents.php"}]},
"torrents": {
"list": {"selector": "tr.torrent"},
"fields": {
"title_optional": {
"selector": "a.title",
"attribute": "title",
"optional": True,
},
"title": {
"text": (
"{% if fields['title_optional'] %}"
"{{ fields['title_optional'] }}"
"{% else %}"
"For All Mankind S05 2019 2160p ATVP WEB-DL "
"DDP5.1 Atmos DV H 265-HHWEB [新]"
"{% endif %}"
)
},
"download": {"selector": "a.dl", "attribute": "href"},
},
},
},
)
html = """
<table>
<tr class="torrent">
<td><a class="title" title="" href="/details/1">Ignored</a></td>
<td><a class="dl" href="/download/1">DL</a></td>
</tr>
</table>
"""
torrents = spider.parse(html)
assert torrents == [{
"title": "For All Mankind S05 2019 2160p ATVP WEB-DL DDP5.1 Atmos DV H 265-HHWEB [新]",
"enclosure": "https://example.org/download/1",
}]
def test_rust_indexer_page_parser_supports_agsvpt_selector_and_embedded_title_template():
"""
Rust 普通 indexer 页面解析应兼容 AGSVPT 的 PyQuery 选择器和字段内嵌 Jinja 模板。
"""
spider = SiteSpider(
indexer={
"id": "agsvpt",
"name": "AGSVPT",
"domain": "https://www.agsvpt.com/",
"search": {"paths": [{"path": "torrents.php"}]},
"torrents": {
"list": {"selector": 'table.torrents > tr:has("table.torrentname")'},
"fields": {
"title_default": {"selector": 'a[href*="details.php?id="]'},
"title_optional": {
"selector": 'a[title][href*="details.php?id="]',
"attribute": "title",
"optional": True,
},
"title": {
"text": (
"{% if fields['title_optional'] %}"
"{{ fields['title_optional'] }}"
"{% else %}"
"{{ fields['title_default'] }}"
"{% endif %}"
)
},
"details": {
"selector": 'a[href*="details.php?id="]',
"attribute": "href",
},
"download": {
"selector": 'a[href*="download.php?id="]',
"attribute": "href",
},
},
},
},
)
html = """
<table class="torrents">
<tr>
<td><table class="torrentname"><tr><td>
<a href="details.php?id=1" title="{% if fields['title_optional'] %}{% else %}Release that Witch S01 2026 1080p WEB-DL H264 AAC-HHWEB{% endif %}">Ignored</a>
</td></tr></table></td>
<td><a href="download.php?id=1">DL</a></td>
</tr>
</table>
"""
torrents = spider.parse(html)
assert torrents == [{
"title": "Release that Witch S01 2026 1080p WEB-DL H264 AAC-HHWEB",
"page_url": "https://www.agsvpt.com/details.php?id=1",
"enclosure": "https://www.agsvpt.com/download.php?id=1",
}]
def test_rust_indexer_page_parser_renders_common_description_templates():
"""
Rust 普通 indexer 页面解析应兼容站点构建项目里的 description 字段模板。
"""
spider = SiteSpider(
indexer={
"id": "demo",
"name": "Demo",
"domain": "https://example.org/",
"search": {"paths": [{"path": "torrents.php"}]},
"torrents": {
"list": {"selector": "tr.torrent"},
"fields": {
"title": {"selector": "a.title"},
"subject": {"selector": ".subject"},
"tags": {"selector": ".tags"},
"description": {
"text": (
"{% if fields['tags']%}"
"{{ fields['subject']+' '+fields['tags'] }}"
"{% else %}"
"{{ fields['subject'] }}"
"{% endif %}"
)
},
"download": {"selector": "a.dl", "attribute": "href"},
},
},
},
)
html = """
<table>
<tr class="torrent">
<td><a class="title">Movie 2024</a><span class="subject">BluRay</span><span class="tags">HDR</span></td>
<td><a class="dl" href="/download/1">DL</a></td>
</tr>
<tr class="torrent">
<td><a class="title">Show 2025</a><span class="subject">WEB-DL</span><span class="tags"></span></td>
<td><a class="dl" href="/download/2">DL</a></td>
</tr>
</table>
"""
torrents = spider.parse(html)
assert [item["description"] for item in torrents] == ["BluRay HDR", "WEB-DL"]
def test_rust_indexer_page_parser_supports_remove_and_negative_index():
"""
Rust 普通 indexer 页面解析应兼容站点配置常用的 remove 和负索引。
"""
spider = SiteSpider(
indexer={
"id": "demo",
"name": "Demo",
"domain": "https://example.org/",
"search": {"paths": [{"path": "torrents.php"}]},
"torrents": {
"list": {"selector": "tr.torrent"},
"fields": {
"title": {"selector": ".name", "remove": "a,b"},
"description": {
"selector": ".desc",
"remove": "span,a,img,font,b",
"contents": -1,
},
"labels": {
"selector": ".labels > span",
"remove": "span,a,img,font,b",
"contents": -1,
},
"download": {"selector": "a.dl", "attribute": "href"},
},
},
},
)
html = """
<table>
<tr class="torrent">
<td class="name">Movie<a>删掉</a><b>也删</b>2024</td>
<td class="desc">第一行
<span>标签</span><a>链接</a>
第二行
</td>
<td class="labels"><span><i>DIY</i></span><span><i>HDR</i></span></td>
<td><a class="dl" href="/download/1">DL</a></td>
</tr>
</table>
"""
torrents = spider.parse(html)
assert torrents[0]["title"] == "Movie2024"
assert torrents[0]["description"] == "第一行 第二行"
assert torrents[0]["labels"] == ["DIY", "HDR"]
assert result == [[["CNSUB", "and", ["4K", "or", "1080P"]], "and", ["not", "BLU"]]]