diff --git a/.dockerignore b/.dockerignore index 6970f86a..f36af573 100644 --- a/.dockerignore +++ b/.dockerignore @@ -73,6 +73,7 @@ test_* build/ dist/ *.egg-info/ +rust/**/target/ # Docker Dockerfile* @@ -81,4 +82,4 @@ docker-compose* # Other app.ico -frozen.spec \ No newline at end of file +frozen.spec diff --git a/.gitignore b/.gitignore index 47d90c82..0c8d817c 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ build/ cython_cache/ dist/ +rust/**/target/ nginx/ test.py safety_report.txt diff --git a/README.md b/README.md index dae1fa77..b5e7fdf4 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,17 @@ MCP工具API文档:详见 [docs/mcp-api.md](docs/mcp-api.md) 开发环境准备与本地源码运行说明:[`docs/development-setup.md`](docs/development-setup.md) +本地开发启用 Rust 加速扩展,需先安装 Rust toolchain 并确保 `cargo` 可用: + +```shell +cargo --version +python -m pip install "maturin>=1.9,<2" +python -m maturin develop --release --manifest-path rust/moviepilot_rust/Cargo.toml +python -c "from app.utils import rust_accel; print(rust_accel.is_available())" +``` + +如果输出 `True`,说明当前开发环境已经加载 `moviepilot_rust`。重新修改 Rust 代码后再次执行 `python -m maturin develop --release --manifest-path rust/moviepilot_rust/Cargo.toml` 即可更新本地扩展。 + 插件开发说明: ## 相关项目 diff --git a/app/core/meta/metavideo.py b/app/core/meta/metavideo.py index ccc91699..ec875f49 100644 --- a/app/core/meta/metavideo.py +++ b/app/core/meta/metavideo.py @@ -11,6 +11,7 @@ from app.schemas.types import MediaType from app.utils.string import StringUtils from app.utils.tokens import Tokens from app.core.meta.streamingplatform import StreamingPlatforms +from app.utils import rust_accel class MetaVideo(MetaBase): @@ -102,59 +103,61 @@ class MetaVideo(MetaBase): title = re.sub(r'[0-9.]+\s*[MGT]i?B(?![A-Z]+)', "", title, flags=re.IGNORECASE) # 把年月日去掉 title = re.sub(r'\d{4}[\s._-]\d{1,2}[\s._-]\d{1,2}', "", title) - # 拆分tokens - tokens = Tokens(title) - # 实例化StreamingPlatforms对象 - streaming_platforms = StreamingPlatforms() media_exts = settings.RMT_MEDIAEXT + settings.RMT_SUBEXT + settings.RMT_AUDIOEXT - # 解析名称、年份、季、集、资源类型、分辨率等 - token = tokens.get_next() - while token: - self._index += 1 # 更新当前处理的token索引 - # Part - self.__init_part(token, tokens) - # 标题 - if self._continue_flag: - self.__init_name(token, media_exts) - # 年份 - if self._continue_flag: - self.__init_year(token) - # 分辨率 - if self._continue_flag: - self.__init_resource_pix(token) - # 季 - if self._continue_flag: - self.__init_season(token) - # 集 - if self._continue_flag: - self.__init_episode(token) - # 资源类型 - if self._continue_flag: - self.__init_resource_type(token) - # 流媒体平台 - if self._continue_flag: - self.__init_web_source(token, tokens, streaming_platforms) - # 视频编码 - if self._continue_flag: - self.__init_video_encode(token) - # 视频位深 - if self._continue_flag: - self.__init_video_bit(token) - # 音频编码 - if self._continue_flag: - self.__init_audio_encode(token) - # 帧率 - if self._continue_flag: - self.__init_fps(token) - # 取下一个,直到没有为卡 + rust_parse = rust_accel.parse_video_title(title, isfile=isfile, media_exts=media_exts) + if not self.__apply_rust_parse(rust_parse): + # 拆分tokens + tokens = Tokens(title) + # 实例化StreamingPlatforms对象 + streaming_platforms = StreamingPlatforms() + # 解析名称、年份、季、集、资源类型、分辨率等 token = tokens.get_next() - self._continue_flag = True - # 合成质量 - if self._effect: - self._effect.reverse() - self.resource_effect = " ".join(self._effect) - if self._source: - self.resource_type = self._source.strip() + while token: + self._index += 1 # 更新当前处理的token索引 + # Part + self.__init_part(token, tokens) + # 标题 + if self._continue_flag: + self.__init_name(token, media_exts) + # 年份 + if self._continue_flag: + self.__init_year(token) + # 分辨率 + if self._continue_flag: + self.__init_resource_pix(token) + # 季 + if self._continue_flag: + self.__init_season(token) + # 集 + if self._continue_flag: + self.__init_episode(token) + # 资源类型 + if self._continue_flag: + self.__init_resource_type(token) + # 流媒体平台 + if self._continue_flag: + self.__init_web_source(token, tokens, streaming_platforms) + # 视频编码 + if self._continue_flag: + self.__init_video_encode(token) + # 视频位深 + if self._continue_flag: + self.__init_video_bit(token) + # 音频编码 + if self._continue_flag: + self.__init_audio_encode(token) + # 帧率 + if self._continue_flag: + self.__init_fps(token) + # 取下一个,直到没有为卡 + token = tokens.get_next() + self._continue_flag = True + # 合成质量 + if self._effect: + self._effect.reverse() + self.resource_effect = " ".join(self._effect) + if self._source: + self.resource_type = self._source.strip() # 提取原盘DIY if self.resource_type and "BluRay" in self.resource_type: if (self.subtitle and re.findall(r'D[Ii]Y', self.subtitle)) \ @@ -185,6 +188,62 @@ class MetaVideo(MetaBase): if not self.video_bit: self.video_bit = self.extract_video_bit(self.video_encode) + def __apply_rust_parse(self, rust_parse: Optional[dict]) -> bool: + """ + 应用 Rust 主识别结果;成功时跳过 Python token 主循环。 + """ + if not rust_parse or not rust_parse.get("complete"): + return False + self.cn_name = rust_parse.get("cn_name") + self.en_name = rust_parse.get("en_name") + if rust_parse.get("year"): + self.year = str(rust_parse.get("year")) + self.part = rust_parse.get("part") + self.__merge_rust_parse(rust_parse) + media_type = rust_parse.get("type") + if media_type == "tv": + self.type = MediaType.TV + elif media_type == "movie": + self.type = MediaType.MOVIE + return True + + def __merge_rust_parse(self, rust_parse: Optional[dict]) -> None: + """ + 合并 Rust 预解析结果,仅补齐 Python 识别未命中的资源字段。 + """ + if not rust_parse: + return + if not self.year and rust_parse.get("year"): + self.year = str(rust_parse.get("year")) + if self.begin_season is None and rust_parse.get("begin_season") is not None: + self.begin_season = int(rust_parse.get("begin_season")) + self.type = MediaType.TV + if self.end_season is None and rust_parse.get("end_season") is not None: + self.end_season = int(rust_parse.get("end_season")) + if not self.total_season and rust_parse.get("total_season"): + self.total_season = int(rust_parse.get("total_season")) + if self.begin_episode is None and rust_parse.get("begin_episode") is not None: + self.begin_episode = int(rust_parse.get("begin_episode")) + self.type = MediaType.TV + if self.end_episode is None and rust_parse.get("end_episode") is not None: + self.end_episode = int(rust_parse.get("end_episode")) + if not self.total_episode and rust_parse.get("total_episode"): + self.total_episode = int(rust_parse.get("total_episode")) + if not self.resource_pix and rust_parse.get("resource_pix"): + self.resource_pix = rust_parse.get("resource_pix") + if not self.resource_type and rust_parse.get("resource_type"): + self.resource_type = rust_parse.get("resource_type") + if not self.resource_effect and rust_parse.get("resource_effect"): + self.resource_effect = rust_parse.get("resource_effect") + if not self.video_encode and rust_parse.get("video_encode"): + self.video_encode = rust_parse.get("video_encode") + if not self.video_bit and rust_parse.get("video_bit"): + self.video_bit = rust_parse.get("video_bit") + if not self.audio_encode and rust_parse.get("audio_encode"): + self.audio_encode = rust_parse.get("audio_encode") + if self.fps is None and rust_parse.get("fps") is not None: + self.fps = int(rust_parse.get("fps")) + @staticmethod def __get_title_from_description(description: str) -> Optional[str]: """ diff --git a/app/core/metainfo.py b/app/core/metainfo.py index 10201648..b3000219 100644 --- a/app/core/metainfo.py +++ b/app/core/metainfo.py @@ -12,6 +12,7 @@ from app.core.meta.infopath import ( from app.core.meta.words import WordsMatcher from app.log import logger from app.schemas.types import MediaType +from app.utils import rust_accel _ANIME_BRACKET_RE = re.compile(r'【[+0-9XVPI-]+】\s*【', re.IGNORECASE) @@ -168,6 +169,9 @@ def is_anime(name: str) -> bool: :param name: 名称 :return: 是否动漫 """ + rust_result = rust_accel.is_anime(name) + if rust_result is not None: + return rust_result if not name: return False if _ANIME_BRACKET_RE.search(name): @@ -185,6 +189,9 @@ def find_metainfo(title: str) -> Tuple[str, dict]: """ 从标题中提取媒体信息 """ + rust_result = rust_accel.find_metainfo(title) + if rust_result is not None: + return rust_result metainfo = _empty_metainfo() if not title: return title, metainfo diff --git a/app/modules/filter/RuleParser.py b/app/modules/filter/RuleParser.py index c02cfd09..164a6980 100644 --- a/app/modules/filter/RuleParser.py +++ b/app/modules/filter/RuleParser.py @@ -2,6 +2,8 @@ import threading from pyparsing import Forward, Literal, Word, alphas, infixNotation, opAssoc, alphanums, Combine, nums, ParseResults +from app.utils import rust_accel + class RuleParser: @@ -48,9 +50,30 @@ class RuleParser: 返回: 解析结果 """ + rust_result = rust_accel.parse_filter_rule(expression) + if rust_result is not None: + return _RustParseResults(rust_result) return self.expr.parseString(expression) +class _RustParseResults(list): + """ + 包装 Rust 解析结果,提供本模块调用方使用的 as_list/asList 接口。 + """ + + def as_list(self) -> list: + """ + 返回兼容 pyparsing.ParseResults.as_list 的列表结构。 + """ + return list(self) + + def asList(self) -> list: # noqa: N802 + """ + 返回兼容 pyparsing.ParseResults.asList 的列表结构。 + """ + return self.as_list() + + if __name__ == '__main__': # 测试代码 expression_str = """ diff --git a/app/modules/filter/__init__.py b/app/modules/filter/__init__.py index f6192835..232d1c72 100644 --- a/app/modules/filter/__init__.py +++ b/app/modules/filter/__init__.py @@ -11,6 +11,7 @@ from app.modules import _ModuleBase from app.modules.filter.RuleParser import RuleParser from app.modules.filter.builtin_rules import BUILTIN_RULE_SET from app.schemas.types import ModuleType, OtherModulesType, SystemConfigKey +from app.utils import rust_accel from app.utils.string import StringUtils @@ -138,6 +139,9 @@ class FilterModule(_ModuleBase): # 查询规则表详情 groups = self.rulehelper.get_rule_group_by_media(media=mediainfo, group_names=rule_groups) if groups: + rust_filtered = self.__filter_torrents_by_rust(groups, torrent_list, mediainfo) + if rust_filtered is not None: + return rust_filtered for group in groups: # 过滤种子 torrent_list = self.__filter_torrents( @@ -150,6 +154,46 @@ class FilterModule(_ModuleBase): ) return torrent_list + def __filter_torrents_by_rust(self, groups: list, torrent_list: List[TorrentInfo], + mediainfo: MediaInfo) -> Optional[List[TorrentInfo]]: + """ + 使用 Rust 批量过滤种子;遇到不可支持的规则时返回 None 交由 Python 逻辑处理。 + """ + if not torrent_list: + return [] + payloads = [self.__build_rust_torrent_payload(torrent) for torrent in torrent_list] + media_payload = mediainfo.to_dict() if mediainfo and hasattr(mediainfo, "to_dict") else ( + vars(mediainfo).copy() if mediainfo else None + ) + result = rust_accel.filter_torrents( + rule_set=self.rule_set, + rule_strings=[group.rule_string for group in groups], + torrents=payloads, + media_info=media_payload, + ) + if result is None: + return None + filtered_torrents = [] + for index, pri_order in result: + torrent = torrent_list[int(index)] + torrent.pri_order = int(pri_order) + filtered_torrents.append(torrent) + return filtered_torrents + + @staticmethod + def __build_rust_torrent_payload(torrent: TorrentInfo) -> dict: + """ + 组装 Rust 过滤器需要的纯数据载荷,避免 Rust 直接依赖 Python 业务对象。 + """ + payload = torrent.to_dict() if hasattr(torrent, "to_dict") else vars(torrent).copy() + payload["pub_minutes"] = torrent.pub_minutes() + if payload.get("size"): + meta = MetaInfo(title=torrent.title, subtitle=torrent.description) + payload["episode_count"] = meta.total_episode or 1 + else: + payload["episode_count"] = 1 + return payload + def __filter_torrents(self, rule_string: str, rule_name: str, torrent_list: List[TorrentInfo], mediainfo: MediaInfo, diff --git a/app/modules/indexer/parser/__init__.py b/app/modules/indexer/parser/__init__.py index 7cf2c8b6..9d29c19f 100644 --- a/app/modules/indexer/parser/__init__.py +++ b/app/modules/indexer/parser/__init__.py @@ -11,8 +11,10 @@ from requests import Session from app.core.config import settings from app.helper.cloudflare import under_challenge from app.log import logger +from app.utils import rust_accel from app.utils.http import RequestUtils from app.utils.site import SiteUtils +from app.utils.string import StringUtils # 站点框架 @@ -154,6 +156,16 @@ class SiteParserBase(metaclass=ABCMeta): """ return self.schema + @staticmethod + def num_filesize(text) -> int: + """ + 将站点页面中的文件大小文本转换为字节,优先使用 Rust 快路径。 + """ + rust_value = rust_accel.parse_filesize(text) + if rust_value is not None: + return rust_value + return StringUtils.num_filesize(text) + def parse(self): """ 解析站点信息 diff --git a/app/modules/indexer/parser/nexus_php.py b/app/modules/indexer/parser/nexus_php.py index 5851895c..78ec2b8f 100644 --- a/app/modules/indexer/parser/nexus_php.py +++ b/app/modules/indexer/parser/nexus_php.py @@ -93,10 +93,10 @@ class NexusPhpSiteUserInfo(SiteParserBase): html_text = self._prepare_html_text(html_text) upload_match = re.search(r"[^总]上[传傳]量?[::_<>/a-zA-Z-=\"'\s#;]+([\d,.\s]+[KMGTPI]*B)", html_text, re.IGNORECASE) - self.upload = StringUtils.num_filesize(upload_match.group(1).strip()) if upload_match else 0 + self.upload = self.num_filesize(upload_match.group(1).strip()) if upload_match else 0 download_match = re.search(r"[^总子影力]下[载載]量?[::_<>/a-zA-Z-=\"'\s#;]+([\d,.\s]+[KMGTPI]*B)", html_text, re.IGNORECASE) - self.download = StringUtils.num_filesize(download_match.group(1).strip()) if download_match else 0 + self.download = self.num_filesize(download_match.group(1).strip()) if download_match else 0 ratio_match = re.search(r"分享率[::_<>/a-zA-Z-=\"'\s#;]+([\d,.\s]+)", html_text) # 计算分享率 calc_ratio = 0.0 if self.download <= 0.0 else round(self.upload / self.download, 3) @@ -209,7 +209,7 @@ class NexusPhpSiteUserInfo(SiteParserBase): page_seeding = len(seeding_sizes) for i in range(0, len(seeding_sizes)): - size = StringUtils.num_filesize(seeding_sizes[i].xpath("string(.)").strip()) + size = self.num_filesize(seeding_sizes[i].xpath("string(.)").strip()) seeders = StringUtils.str_int(seeding_seeders[i]) page_seeding_size += size @@ -273,7 +273,7 @@ class NexusPhpSiteUserInfo(SiteParserBase): tmp_seeding_size = 0 tmp_seeding_info = [] for i in range(0, len(seeding_sizes)): - size = StringUtils.num_filesize(seeding_sizes[i].xpath("string(.)").strip()) + size = self.num_filesize(seeding_sizes[i].xpath("string(.)").strip()) seeders = StringUtils.str_int(seeding_seeders[i]) tmp_seeding_size += size @@ -292,7 +292,7 @@ class NexusPhpSiteUserInfo(SiteParserBase): seeding_size_match = re.search(r"总做种体积:\s+([\d,.\s]+[KMGTPI]*B)", seeding_sizes[0], re.IGNORECASE) tmp_seeding = StringUtils.str_int(seeding_match.group(1)) if ( seeding_match and seeding_match.group(1)) else 0 - tmp_seeding_size = StringUtils.num_filesize( + tmp_seeding_size = self.num_filesize( seeding_size_match.group(1).strip()) if seeding_size_match else 0 if not self.seeding_size: self.seeding_size = tmp_seeding_size diff --git a/app/modules/indexer/parser/zhixing.py b/app/modules/indexer/parser/zhixing.py index ce610bf1..647b8c17 100644 --- a/app/modules/indexer/parser/zhixing.py +++ b/app/modules/indexer/parser/zhixing.py @@ -75,7 +75,7 @@ class ZhixingSiteUserInfo(SiteParserBase): s = s.strip() if re.match(r'^\d+(\.\d+)?$', s): s += ' B' - return StringUtils.num_filesize(s) if s else 0 + return self.num_filesize(s) if s else 0 self.upload = num_filesize_safe(info_dict.get('上传流量')) if '上传流量' in info_dict else 0 self.download = num_filesize_safe(info_dict.get('下载流量')) if '下载流量' in info_dict else 0 @@ -108,7 +108,7 @@ class ZhixingSiteUserInfo(SiteParserBase): if size_td: size_text = size_td.find('a').text if size_td.find('a') else size_td.text.strip() page_seeding += 1 - page_seeding_size += StringUtils.num_filesize(size_text) + page_seeding_size += self.num_filesize(size_text) return page_seeding, page_seeding_size def _parse_message_unread_links(self, html_text: str, msg_links: list) -> Optional[str]: @@ -164,7 +164,7 @@ class ZhixingSiteUserInfo(SiteParserBase): s = s.strip() if re.match(r'^\d+(\.\d+)?$', s): s += ' B' - return StringUtils.num_filesize(s) if s else 0 + return self.num_filesize(s) if s else 0 self.seeding = int(self._basic_info.get('当前保种数量', 0)) self.seeding_size = num_filesize_safe(self._basic_info.get('当前保种容量', '')) @@ -181,4 +181,4 @@ class ZhixingSiteUserInfo(SiteParserBase): self.message_unread = str(self.message_unread or 0) self.seeding = str(self.seeding or 0) - self.seeding_size = str(self.seeding_size or 0) \ No newline at end of file + self.seeding_size = str(self.seeding_size or 0) diff --git a/app/modules/indexer/spider/__init__.py b/app/modules/indexer/spider/__init__.py index c3059c30..a66525b4 100644 --- a/app/modules/indexer/spider/__init__.py +++ b/app/modules/indexer/spider/__init__.py @@ -12,6 +12,7 @@ from pyquery import PyQuery from app.core.config import settings from app.log import logger from app.schemas.types import MediaType +from app.utils import rust_accel from app.utils.http import RequestUtils, AsyncRequestUtils from app.utils.string import StringUtils from app.utils.url import UrlUtils @@ -95,6 +96,19 @@ class SiteSpider: """ 获取搜索URL """ + rust_url = rust_accel.build_indexer_search_url({ + "search": self.search, + "batch": self.batch, + "browse": self.browse, + "category": self.category, + "domain": self.domain, + "keyword": self.keyword, + "mtype": self.mtype.value if self.mtype else None, + "cat": self.cat, + "page": self.page, + }) + if rust_url: + return rust_url # 种子搜索相对路径 paths = self.search.get('paths', []) torrentspath = "" @@ -658,6 +672,9 @@ class SiteSpider: """ if not text or not filters or not isinstance(filters, list): return text + rust_text = rust_accel.apply_indexer_text_filters(text, filters) + if rust_text is not None: + return rust_text if not isinstance(text, str): text = str(text) for filter_item in filters: @@ -739,6 +756,16 @@ class SiteSpider: # 清空旧结果 self.torrents_info_array = [] + rust_torrents = rust_accel.parse_indexer_torrents( + html_text=html_text, + domain=self.domain, + list_config=self.list, + fields=self.fields, + category=self.category, + result_num=int(self.result_num), + ) + if rust_torrents is not None: + return rust_torrents html_doc = None try: # 解析站点文本对象 diff --git a/app/utils/rust_accel.py b/app/utils/rust_accel.py new file mode 100644 index 00000000..0de85356 --- /dev/null +++ b/app/utils/rust_accel.py @@ -0,0 +1,206 @@ +from typing import Any, Dict, List, Optional, Tuple + +from app.log import logger +from app.schemas.types import MediaType + +try: + import moviepilot_rust as _moviepilot_rust +except Exception as err: # pragma: no cover - 取决于运行环境是否安装 Rust 扩展 + _moviepilot_rust = None + _import_error = err +else: + _import_error = None + + +def is_available() -> bool: + """ + 判断 Rust 扩展是否可用。 + """ + return bool(_moviepilot_rust and _moviepilot_rust.is_available()) + + +def import_error() -> Optional[Exception]: + """ + 返回 Rust 扩展导入失败的异常,便于调试构建问题。 + """ + return _import_error + + +def is_anime(name: str) -> Optional[bool]: + """ + 使用 Rust 快路径判断标题是否为动漫格式,不可用时返回 None。 + """ + if not _moviepilot_rust: + return None + try: + return bool(_moviepilot_rust.is_anime_fast(name or "")) + except BaseException as err: + _raise_non_rust_panic(err) + logger.debug(f"Rust 动漫识别失败,回退 Python:{err}") + return None + + +def find_metainfo(title: str) -> Optional[Tuple[str, Dict[str, Any]]]: + """ + 使用 Rust 快路径提取标题中的内嵌媒体标签,不可用时返回 None。 + """ + if not _moviepilot_rust: + return None + try: + result = _moviepilot_rust.find_metainfo_fast(title or "") + except BaseException as err: + _raise_non_rust_panic(err) + logger.debug(f"Rust 内嵌媒体标签识别失败,回退 Python:{err}") + return None + metainfo = { + "tmdbid": result.get("tmdbid"), + "doubanid": result.get("doubanid"), + "type": _coerce_media_type(result.get("type")), + "begin_season": result.get("begin_season"), + "end_season": result.get("end_season"), + "total_season": result.get("total_season"), + "begin_episode": result.get("begin_episode"), + "end_episode": result.get("end_episode"), + "total_episode": result.get("total_episode"), + } + return result.get("title"), metainfo + + +def parse_video_title( + title: str, + isfile: bool = False, + media_exts: Optional[List[str]] = None, +) -> Optional[Dict[str, Any]]: + """ + 使用 Rust 执行影视标题主识别流程,不可用时返回 None。 + """ + if not _moviepilot_rust: + return None + try: + return _moviepilot_rust.parse_video_title_fast(title or "", isfile, media_exts or []) + except BaseException as err: + _raise_non_rust_panic(err) + logger.debug(f"Rust 影视标题主识别失败,回退 Python:{err}") + return None + + +def parse_filter_rule(expression: str) -> Optional[list]: + """ + 使用 Rust 解析过滤规则表达式,不可用时返回 None。 + """ + if not _moviepilot_rust: + return None + try: + return _moviepilot_rust.parse_filter_rule_fast(expression) + except BaseException as err: + _raise_non_rust_panic(err) + logger.debug(f"Rust 过滤规则解析失败,回退 Python:{err}") + return None + + +def filter_torrents( + rule_set: Dict[str, dict], + rule_strings: List[str], + torrents: List[dict], + media_info: Optional[dict] = None, +) -> Optional[list]: + """ + 使用 Rust 批量执行种子过滤,不可用或不兼容时返回 None。 + """ + if not _moviepilot_rust: + return None + try: + return _moviepilot_rust.filter_torrents_fast(rule_set, rule_strings, torrents, media_info) + except BaseException as err: + _raise_non_rust_panic(err) + logger.debug(f"Rust 种子过滤失败,回退 Python:{err}") + return None + + +def apply_indexer_text_filters(text: Any, filters: Optional[List[dict]]) -> Optional[str]: + """ + 使用 Rust 执行 indexer 文本过滤器,不可用或遇到不支持过滤器时返回 None。 + """ + if not _moviepilot_rust or not filters or not isinstance(filters, list): + return None + try: + return _moviepilot_rust.apply_indexer_text_filters_fast(None if text is None else str(text), filters) + except BaseException as err: + _raise_non_rust_panic(err) + logger.debug(f"Rust 站点文本过滤失败,回退 Python:{err}") + return None + + +def parse_filesize(text: Any) -> Optional[int]: + """ + 使用 Rust 将文件大小文本转换为字节,不可用时返回 None。 + """ + if not _moviepilot_rust: + return None + try: + return int(_moviepilot_rust.parse_filesize_fast(text)) + except BaseException as err: + _raise_non_rust_panic(err) + logger.debug(f"Rust 文件大小解析失败,回退 Python:{err}") + return None + + +def build_indexer_search_url(config: dict) -> Optional[str]: + """ + 使用 Rust 根据普通 indexer 配置生成搜索 URL,不可用时返回 None。 + """ + if not _moviepilot_rust: + return None + try: + return _moviepilot_rust.build_indexer_search_url_fast(config) + except BaseException as err: + _raise_non_rust_panic(err) + logger.debug(f"Rust 站点搜索 URL 生成失败,回退 Python:{err}") + return None + + +def parse_indexer_torrents( + html_text: str, + domain: str, + list_config: dict, + fields: dict, + category: Optional[dict], + result_num: int, +) -> Optional[List[dict]]: + """ + 使用 Rust 批量解析普通 indexer 页面,不支持的配置返回 None。 + """ + if not _moviepilot_rust: + return None + try: + return _moviepilot_rust.parse_indexer_torrents_fast( + html_text or "", + domain or "", + list_config or {}, + fields or {}, + category, + int(result_num or 0), + ) + except BaseException as err: + _raise_non_rust_panic(err) + logger.debug(f"Rust 站点页面解析失败,回退 Python:{err}") + return None + + +def _coerce_media_type(value: Optional[str]) -> Optional[MediaType]: + """ + 将 Rust 返回的媒体类型字符串转换为系统 MediaType。 + """ + if value == "movies": + return MediaType.MOVIE + if value == "tv": + return MediaType.TV + return None + + +def _raise_non_rust_panic(err: BaseException) -> None: + """ + 只吞掉 Rust 扩展 panic/异常,保留用户中断和进程退出语义。 + """ + if isinstance(err, (KeyboardInterrupt, SystemExit)): + raise err diff --git a/docker/Dockerfile b/docker/Dockerfile index 202ffca8..a0d8e2a2 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -85,10 +85,24 @@ RUN python3 -m venv ${VENV_PATH} \ && ln -sf /usr/local/bin/uv-pip-compat ${VENV_PATH}/bin/pip3.12 \ && ln -sf /usr/local/bin/uv-pip-compat ${VENV_PATH}/bin/pip-compile \ && ln -sf /usr/local/bin/uv-pip-compat ${VENV_PATH}/bin/pip-sync \ - && pip install "Cython~=3.1.2" \ + && pip install "Cython~=3.1.2" "maturin>=1.9,<2" \ && pip-compile requirements.in -o requirements.txt \ && pip install -r requirements.txt +# 准备 Rust 扩展 +FROM prepare_venv AS prepare_rust + +ENV PATH="${VENV_PATH}/bin:/root/.cargo/bin:${PATH}" + +WORKDIR /app +COPY rust /app/rust + +RUN curl https://sh.rustup.rs -sSf | sh -s -- -y --profile minimal \ + && cd /app/rust/moviepilot_rust \ + && maturin build --release -o /tmp/wheels \ + && pip install /tmp/wheels/*.whl \ + && rm -rf /tmp/wheels /root/.cargo /root/.rustup + # 下载准备代码 FROM prepare_package AS prepare_code @@ -114,9 +128,9 @@ FROM prepare_package AS final ENV LD_PRELOAD="/usr/local/lib/libjemalloc.so" # python 环境 -COPY --from=prepare_venv --chmod=777 ${VENV_PATH} ${VENV_PATH} -COPY --from=prepare_venv /usr/local/bin/uv /usr/local/bin/uv -COPY --from=prepare_venv /usr/local/bin/uv-pip-compat /usr/local/bin/uv-pip-compat +COPY --from=prepare_rust --chmod=777 ${VENV_PATH} ${VENV_PATH} +COPY --from=prepare_rust /usr/local/bin/uv /usr/local/bin/uv +COPY --from=prepare_rust /usr/local/bin/uv-pip-compat /usr/local/bin/uv-pip-compat # 浏览器运行依赖 RUN playwright install-deps chromium \ diff --git a/docs/cli.md b/docs/cli.md index 3341d07a..50ca9aff 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -159,6 +159,9 @@ moviepilot install deps --config-dir /path/to/moviepilot-config 说明: - 默认会自动选择本地已安装的 `Python 3.11+` 解释器 +- 会在安装 Python 依赖后构建并安装 `moviepilot_rust` 加速扩展,因此本机需要可用的 Rust `cargo` +- 一键安装脚本会自动准备 Rust toolchain 和系统构建工具;手动执行 CLI 安装时,如果未安装 Rust 或本机编译器,请先安装后再执行 `moviepilot install deps` +- 如需临时跳过加速扩展构建,可设置 `MOVIEPILOT_SKIP_RUST_ACCEL=1`,但相关核心处理会回退到 Python 实现,性能收益不会生效 安装前端 release: @@ -220,7 +223,7 @@ moviepilot setup --config-dir /path/to/moviepilot-config `moviepilot setup` 会串行执行: -1. 安装后端依赖 +1. 安装后端依赖并构建 Rust 加速扩展 2. 下载并安装前端 release 3. 下载并同步资源文件 4. 初始化本地配置 @@ -323,7 +326,7 @@ moviepilot update all --skip-resources 说明: -- `update backend` 会更新 Git 仓库并重新安装后端依赖 +- `update backend` 会更新 Git 仓库并重新安装后端依赖,同时重新构建 Rust 加速扩展 - `update frontend` 会按当前仓库 `version.py` 中的 `FRONTEND_VERSION` 下载并替换前端 release - `update all` 会先更新后端,再按更新后代码中的 `FRONTEND_VERSION` 更新前端,默认也会同步资源文件 - 更新前请先执行 `moviepilot stop` diff --git a/moviepilot b/moviepilot index 7e4cfdcd..f68d9c54 100755 --- a/moviepilot +++ b/moviepilot @@ -105,6 +105,8 @@ Options: --venv PATH 虚拟环境目录,默认 ./venv --recreate 删除并重建虚拟环境 --config-dir PATH 指定配置目录 + 说明 会构建并安装 Rust 加速扩展,需本机可用 cargo 和编译器; + 可临时设置 MOVIEPILOT_SKIP_RUST_ACCEL=1 跳过构建 frontend: --version TAG 前端版本,默认使用 version.py 中的 FRONTEND_VERSION @@ -152,6 +154,10 @@ Options: --superuser-password PWD 预设超级管理员密码 --config-dir PATH 指定配置目录 -h, --help 显示帮助 + +说明: + - 安装后端依赖时会构建并安装 Rust 加速扩展,需本机可用 cargo 和编译器 + - 可临时设置 MOVIEPILOT_SKIP_RUST_ACCEL=1 跳过加速扩展构建 EOF } @@ -188,6 +194,10 @@ Options: --skip-resources 更新 all 时跳过资源同步 --config-dir PATH 指定配置目录 -h, --help 显示帮助 + +说明: + - 更新后端依赖时会重新构建并安装 Rust 加速扩展,需本机可用 cargo 和编译器 + - 可临时设置 MOVIEPILOT_SKIP_RUST_ACCEL=1 跳过加速扩展构建 EOF } diff --git a/requirements.in b/requirements.in index d04d7029..cac8c796 100644 --- a/requirements.in +++ b/requirements.in @@ -1,4 +1,5 @@ Cython~=3.1.2 +maturin>=1.9,<2 pydantic>=2.0.0,<3.0.0 pydantic-settings>=2.0.0,<3.0.0 SQLAlchemy~=2.0.41 diff --git a/rust/moviepilot_rust/Cargo.lock b/rust/moviepilot_rust/Cargo.lock new file mode 100644 index 00000000..02c33c23 --- /dev/null +++ b/rust/moviepilot_rust/Cargo.lock @@ -0,0 +1,948 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "bitflags" +version = "2.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "cssparser" +version = "0.35.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e901edd733a1472f944a45116df3f846f54d37e67e68640ac8bb69689aca2aa" +dependencies = [ + "cssparser-macros", + "dtoa-short", + "itoa", + "phf", + "smallvec", +] + +[[package]] +name = "cssparser-macros" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331" +dependencies = [ + "quote", + "syn", +] + +[[package]] +name = "derive_more" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d751e9e49156b02b44f9c1815bcb94b984cdcc4396ecc32521c739452808b134" +dependencies = [ + "derive_more-impl", +] + +[[package]] +name = "derive_more-impl" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "799a97264921d8623a957f6c3b9011f3b5492f557bbb7a5a19b7fa6d06ba8dcb" +dependencies = [ + "proc-macro2", + "quote", + "rustc_version", + "syn", +] + +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "dtoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c3cf4824e2d5f025c7b531afcb2325364084a16806f6d47fbc1f5fbd9960590" + +[[package]] +name = "dtoa-short" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd1511a7b6a56299bd043a9c167a6d2bfb37bf84a6dfceaba651168adfb43c87" +dependencies = [ + "dtoa", +] + +[[package]] +name = "ego-tree" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2972feb8dffe7bc8c5463b1dacda1b0dfbed3710e50f977d965429692d74cd8" + +[[package]] +name = "form_urlencoded" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "futf" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843" +dependencies = [ + "mac", + "new_debug_unreachable", +] + +[[package]] +name = "fxhash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +dependencies = [ + "byteorder", +] + +[[package]] +name = "getopts" +version = "0.2.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe4fbac503b8d1f88e6676011885f34b7174f46e59956bba534ba83abded4df" +dependencies = [ + "unicode-width", +] + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "html5ever" +version = "0.35.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55d958c2f74b664487a2035fe1dadb032c48718a03b63f3ab0b8537db8549ed4" +dependencies = [ + "log", + "markup5ever", + "match_token", +] + +[[package]] +name = "icu_collections" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2984d1cd16c883d7935b9e07e44071dca8d917fd52ecc02c04d5fa0b5a3f191c" +dependencies = [ + "displaydoc", + "potential_utf", + "utf8_iter", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locale_core" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92219b62b3e2b4d88ac5119f8904c10f8f61bf7e95b640d25ba3075e6cac2c29" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_normalizer" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c56e5ee99d6e3d33bd91c5d85458b6005a22140021cc324cea84dd0e72cff3b4" +dependencies = [ + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da3be0ae77ea334f4da67c12f149704f19f81d1adf7c51cf482943e84a2bad38" + +[[package]] +name = "icu_properties" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bee3b67d0ea5c2cca5003417989af8996f8604e34fb9ddf96208a033901e70de" +dependencies = [ + "icu_collections", + "icu_locale_core", + "icu_properties_data", + "icu_provider", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e2bbb201e0c04f7b4b3e14382af113e17ba4f63e2c9d2ee626b720cbce54a14" + +[[package]] +name = "icu_provider" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "139c4cf31c8b5f33d7e199446eff9c1e02decfc2f0eec2c8d71f65befa45b421" +dependencies = [ + "displaydoc", + "icu_locale_core", + "writeable", + "yoke", + "zerofrom", + "zerotrie", + "zerovec", +] + +[[package]] +name = "idna" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb68373c0d6620ef8105e855e7745e18b0d00d3bdb07fb532e434244cdb9a714" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + +[[package]] +name = "indoc" +version = "2.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706" +dependencies = [ + "rustversion", +] + +[[package]] +name = "itoa" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" + +[[package]] +name = "libc" +version = "0.2.186" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" + +[[package]] +name = "litemap" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92daf443525c4cce67b150400bc2316076100ce0b3686209eb8cf3c31612e6f0" + +[[package]] +name = "lock_api" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" +dependencies = [ + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "mac" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" + +[[package]] +name = "markup5ever" +version = "0.35.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "311fe69c934650f8f19652b3946075f0fc41ad8757dbb68f1ca14e7900ecc1c3" +dependencies = [ + "log", + "tendril", + "web_atoms", +] + +[[package]] +name = "match_token" +version = "0.35.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac84fd3f360fcc43dc5f5d186f02a94192761a080e8bc58621ad4d12296a58cf" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "memchr" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" + +[[package]] +name = "memoffset" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" +dependencies = [ + "autocfg", +] + +[[package]] +name = "moviepilot-rust" +version = "0.1.0" +dependencies = [ + "once_cell", + "percent-encoding", + "pyo3", + "regex", + "scraper", + "url", +] + +[[package]] +name = "new_debug_unreachable" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" + +[[package]] +name = "once_cell" +version = "1.21.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" + +[[package]] +name = "parking_lot" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-link", +] + +[[package]] +name = "percent-encoding" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" + +[[package]] +name = "phf" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" +dependencies = [ + "phf_macros", + "phf_shared", +] + +[[package]] +name = "phf_codegen" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" +dependencies = [ + "phf_shared", + "rand", +] + +[[package]] +name = "phf_macros" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216" +dependencies = [ + "phf_generator", + "phf_shared", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "phf_shared" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" +dependencies = [ + "siphasher", +] + +[[package]] +name = "portable-atomic" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" + +[[package]] +name = "potential_utf" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0103b1cef7ec0cf76490e969665504990193874ea05c85ff9bab8b911d0a0564" +dependencies = [ + "zerovec", +] + +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "pyo3" +version = "0.23.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7778bffd85cf38175ac1f545509665d0b9b92a198ca7941f131f85f7a4f9a872" +dependencies = [ + "cfg-if", + "indoc", + "libc", + "memoffset", + "once_cell", + "portable-atomic", + "pyo3-build-config", + "pyo3-ffi", + "pyo3-macros", + "unindent", +] + +[[package]] +name = "pyo3-build-config" +version = "0.23.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94f6cbe86ef3bf18998d9df6e0f3fc1050a8c5efa409bf712e661a4366e010fb" +dependencies = [ + "once_cell", + "target-lexicon", +] + +[[package]] +name = "pyo3-ffi" +version = "0.23.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9f1b4c431c0bb1c8fb0a338709859eed0d030ff6daa34368d3b152a63dfdd8d" +dependencies = [ + "libc", + "pyo3-build-config", +] + +[[package]] +name = "pyo3-macros" +version = "0.23.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbc2201328f63c4710f68abdf653c89d8dbc2858b88c5d88b0ff38a75288a9da" +dependencies = [ + "proc-macro2", + "pyo3-macros-backend", + "quote", + "syn", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.23.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fca6726ad0f3da9c9de093d6f116a93c1a38e417ed73bf138472cf4064f72028" +dependencies = [ + "heck", + "proc-macro2", + "pyo3-build-config", + "quote", + "syn", +] + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ca0ecfa931c29007047d1bc58e623ab12e5590e8c7cc53200d5202b69266d8a" +dependencies = [ + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" + +[[package]] +name = "redox_syscall" +version = "0.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex" +version = "1.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" + +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "scraper" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5f3a24d916e78954af99281a455168d4a9515d65eca99a18da1b813689c4ad9" +dependencies = [ + "cssparser", + "ego-tree", + "getopts", + "html5ever", + "precomputed-hash", + "selectors", + "tendril", +] + +[[package]] +name = "selectors" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5685b6ae43bfcf7d2e7dfcfb5d8e8f61b46442c902531e41a32a9a8bf0ee0fb6" +dependencies = [ + "bitflags", + "cssparser", + "derive_more", + "fxhash", + "log", + "new_debug_unreachable", + "phf", + "phf_codegen", + "precomputed-hash", + "servo_arc", + "smallvec", +] + +[[package]] +name = "semver" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "servo_arc" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "170fb83ab34de17dc69aa7c67482b22218ddb85da56546f9bd6b929e32a05930" +dependencies = [ + "stable_deref_trait", +] + +[[package]] +name = "siphasher" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ee5873ec9cce0195efcb7a4e9507a04cd49aec9c83d0389df45b1ef7ba2e649" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "stable_deref_trait" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" + +[[package]] +name = "string_cache" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf776ba3fa74f83bf4b63c3dcbbf82173db2632ed8452cb2d891d33f459de70f" +dependencies = [ + "new_debug_unreachable", + "parking_lot", + "phf_shared", + "precomputed-hash", + "serde", +] + +[[package]] +name = "string_cache_codegen" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c711928715f1fe0fe509c53b43e993a9a557babc2d0a3567d0a3006f1ac931a0" +dependencies = [ + "phf_generator", + "phf_shared", + "proc-macro2", + "quote", +] + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "synstructure" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "target-lexicon" +version = "0.12.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" + +[[package]] +name = "tendril" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0" +dependencies = [ + "futf", + "mac", + "utf-8", +] + +[[package]] +name = "tinystr" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8323304221c2a851516f22236c5722a72eaa19749016521d6dff0824447d96d" +dependencies = [ + "displaydoc", + "zerovec", +] + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "unicode-width" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" + +[[package]] +name = "unindent" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3" + +[[package]] +name = "url" +version = "2.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", + "serde", +] + +[[package]] +name = "utf-8" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + +[[package]] +name = "web_atoms" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57ffde1dc01240bdf9992e3205668b235e59421fd085e8a317ed98da0178d414" +dependencies = [ + "phf", + "phf_codegen", + "string_cache", + "string_cache_codegen", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "writeable" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ffae5123b2d3fc086436f8834ae3ab053a283cfac8fe0a0b8eaae044768a4c4" + +[[package]] +name = "yoke" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abe8c5fda708d9ca3df187cae8bfb9ceda00dd96231bed36e445a1a48e66f9ca" +dependencies = [ + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de844c262c8848816172cef550288e7dc6c7b7814b4ee56b3e1553f275f1858e" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zerofrom" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ec05a11813ea801ff6d75110ad09cd0824ddba17dfe17128ea0d5f68e6c5272" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11532158c46691caf0f2593ea8358fed6bbf68a0315e80aae9bd41fbade684a1" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zerotrie" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f9152d31db0792fa83f70fb2f83148effb5c1f5b8c7686c3459e361d9bc20bf" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", +] + +[[package]] +name = "zerovec" +version = "0.11.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90f911cbc359ab6af17377d242225f4d75119aec87ea711a880987b18cd7b239" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "625dc425cab0dca6dc3c3319506e6593dcb08a9f387ea3b284dbd52a92c40555" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/rust/moviepilot_rust/Cargo.toml b/rust/moviepilot_rust/Cargo.toml new file mode 100644 index 00000000..387cca7f --- /dev/null +++ b/rust/moviepilot_rust/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "moviepilot-rust" +version = "0.1.0" +edition = "2021" + +[lib] +name = "moviepilot_rust" +crate-type = ["cdylib"] + +[dependencies] +once_cell = "1.20" +percent-encoding = "2.3" +pyo3 = { version = "0.23", features = ["abi3-py311", "extension-module"] } +regex = "1.11" +scraper = "0.24" +url = "2.5" diff --git a/rust/moviepilot_rust/pyproject.toml b/rust/moviepilot_rust/pyproject.toml new file mode 100644 index 00000000..e77f448f --- /dev/null +++ b/rust/moviepilot_rust/pyproject.toml @@ -0,0 +1,13 @@ +[build-system] +requires = ["maturin>=1.9,<2"] +build-backend = "maturin" + +[project] +name = "moviepilot-rust" +version = "0.1.0" +requires-python = ">=3.11" +description = "Rust acceleration helpers for MoviePilot" + +[tool.maturin] +bindings = "pyo3" +strip = true diff --git a/rust/moviepilot_rust/src/filter.rs b/rust/moviepilot_rust/src/filter.rs new file mode 100644 index 00000000..69c405e8 --- /dev/null +++ b/rust/moviepilot_rust/src/filter.rs @@ -0,0 +1,640 @@ +use crate::utils::{get_optional_f64, get_optional_i64, get_optional_string}; +use pyo3::exceptions::PyValueError; +use pyo3::prelude::*; +use pyo3::types::{PyDict, PyList, PyString}; +use regex::{Regex, RegexBuilder}; +use std::collections::HashMap; + +#[derive(Clone, Debug)] +enum RuleExpr { + Name(String), + Not(Box), + And(Box, Box), + Or(Box, Box), +} + +#[derive(Clone, Debug, PartialEq)] +enum Token { + Name(String), + Not, + And, + Or, + LParen, + RParen, +} + +#[derive(Clone, Debug)] +struct TorrentPayload { + index: usize, + title: String, + description: String, + labels: Vec, + size: f64, + seeders: i64, + downloadvolumefactor: Option, + pub_minutes: f64, + episode_count: f64, + fields: HashMap, +} + +#[derive(Clone, Debug)] +enum FieldValue { + Scalar(String), + List(Vec), +} + +#[pyfunction] +pub(crate) fn parse_filter_rule_fast(py: Python<'_>, expression: &str) -> PyResult { + let tokens = tokenize_rule(expression)?; + let mut parser = RuleParserState::new(tokens); + let expr = parser.parse_expression()?; + if parser.has_remaining() { + return Err(PyValueError::new_err("规则表达式包含无法解析的剩余内容")); + } + let outer = PyList::empty(py); + outer.append(expr_to_py(py, &expr)?)?; + Ok(outer.into()) +} + +/// 批量执行种子过滤规则,返回保留项的原始下标和优先级。 +#[pyfunction] +#[pyo3(signature = (rule_set, rule_strings, torrents, media_info=None))] +pub(crate) fn filter_torrents_fast( + py: Python<'_>, + rule_set: &Bound<'_, PyDict>, + rule_strings: Vec, + torrents: &Bound<'_, PyList>, + media_info: Option<&Bound<'_, PyDict>>, +) -> PyResult { + py.allow_threads(|| {}); + let mut payloads = Vec::with_capacity(torrents.len()); + for index in 0..torrents.len() { + let item = torrents.get_item(index)?; + let dict = item.downcast::()?; + payloads.push(TorrentPayload::from_py_dict(index, dict)?); + } + + let mut expr_cache: HashMap = HashMap::new(); + let mut regex_cache: HashMap = HashMap::new(); + let mut current_indices: Vec = (0..payloads.len()).collect(); + let mut priorities: HashMap = HashMap::new(); + + for rule_string in rule_strings { + if current_indices.is_empty() { + break; + } + let levels: Vec = rule_string + .split('>') + .map(|level| level.trim().to_string()) + .collect(); + let mut retained = Vec::new(); + for payload_index in ¤t_indices { + let payload = &payloads[*payload_index]; + let mut res_order = 100_i64; + let mut matched = false; + for level in &levels { + let expr = if let Some(cached) = expr_cache.get(level) { + cached.clone() + } else { + let parsed = parse_rule_expression(level)?; + expr_cache.insert(level.clone(), parsed.clone()); + parsed + }; + if match_expr(&expr, payload, rule_set, media_info, &mut regex_cache)? { + matched = true; + priorities.insert(payload.index, res_order); + break; + } + res_order -= 1; + } + if matched { + retained.push(*payload_index); + } + } + current_indices = retained; + } + + let result = PyList::empty(py); + for payload_index in current_indices { + let payload = &payloads[payload_index]; + result.append(( + payload.index, + priorities.get(&payload.index).copied().unwrap_or(0), + ))?; + } + Ok(result.into()) +} + +fn parse_rule_expression(expression: &str) -> PyResult { + let tokens = tokenize_rule(expression)?; + let mut parser = RuleParserState::new(tokens); + let expr = parser.parse_expression()?; + if parser.has_remaining() { + return Err(PyValueError::new_err("规则表达式包含无法解析的剩余内容")); + } + Ok(expr) +} + +/// 将规则字符串切分为名称、逻辑符和括号。 +fn tokenize_rule(expression: &str) -> PyResult> { + let chars: Vec = expression.chars().collect(); + let mut tokens = Vec::new(); + let mut index = 0; + while index < chars.len() { + let ch = chars[index]; + if ch.is_whitespace() { + index += 1; + continue; + } + match ch { + '!' => { + tokens.push(Token::Not); + index += 1; + } + '&' => { + tokens.push(Token::And); + index += 1; + } + '|' => { + tokens.push(Token::Or); + index += 1; + } + '(' => { + tokens.push(Token::LParen); + index += 1; + } + ')' => { + tokens.push(Token::RParen); + index += 1; + } + _ => { + let start = index; + while index < chars.len() && chars[index].is_ascii_alphanumeric() { + index += 1; + } + if start == index { + return Err(PyValueError::new_err(format!("非法规则字符: {ch}"))); + } + let name: String = chars[start..index].iter().collect(); + if !is_valid_rule_name(&name) { + return Err(PyValueError::new_err(format!("非法规则名称: {name}"))); + } + tokens.push(Token::Name(name)); + } + } + } + if tokens.is_empty() { + return Err(PyValueError::new_err("规则表达式不能为空")); + } + Ok(tokens) +} + +/// 判断规则名称是否符合原 pyparsing 语法。 +fn is_valid_rule_name(name: &str) -> bool { + if name.is_empty() { + return false; + } + let mut chars = name.chars(); + let Some(first) = chars.next() else { + return false; + }; + if first.is_ascii_alphabetic() { + return chars.all(|ch| ch.is_ascii_alphanumeric()); + } + if first.is_ascii_digit() { + let mut seen_alpha = false; + for ch in name.chars().skip_while(|ch| ch.is_ascii_digit()) { + if !ch.is_ascii_alphanumeric() { + return false; + } + if ch.is_ascii_alphabetic() { + seen_alpha = true; + } + } + return seen_alpha; + } + false +} + +struct RuleParserState { + tokens: Vec, + index: usize, +} + +impl RuleParserState { + /// 创建规则解析器状态。 + fn new(tokens: Vec) -> Self { + Self { tokens, index: 0 } + } + + /// 解析完整表达式。 + fn parse_expression(&mut self) -> PyResult { + self.parse_or() + } + + /// 返回是否还有未消费 token。 + fn has_remaining(&self) -> bool { + self.index < self.tokens.len() + } + + /// 解析 or 表达式。 + fn parse_or(&mut self) -> PyResult { + let mut expr = self.parse_and()?; + while self.consume(&Token::Or) { + let right = self.parse_and()?; + expr = RuleExpr::Or(Box::new(expr), Box::new(right)); + } + Ok(expr) + } + + /// 解析 and 表达式。 + fn parse_and(&mut self) -> PyResult { + let mut expr = self.parse_not()?; + while self.consume(&Token::And) { + let right = self.parse_not()?; + expr = RuleExpr::And(Box::new(expr), Box::new(right)); + } + Ok(expr) + } + + /// 解析 not 表达式。 + fn parse_not(&mut self) -> PyResult { + if self.consume(&Token::Not) { + return Ok(RuleExpr::Not(Box::new(self.parse_not()?))); + } + self.parse_primary() + } + + /// 解析原子或括号表达式。 + fn parse_primary(&mut self) -> PyResult { + let Some(token) = self.tokens.get(self.index).cloned() else { + return Err(PyValueError::new_err("规则表达式意外结束")); + }; + match token { + Token::Name(name) => { + self.index += 1; + Ok(RuleExpr::Name(name)) + } + Token::LParen => { + self.index += 1; + let expr = self.parse_expression()?; + if !self.consume(&Token::RParen) { + return Err(PyValueError::new_err("规则表达式缺少右括号")); + } + Ok(expr) + } + _ => Err(PyValueError::new_err("规则表达式缺少规则名称")), + } + } + + /// 如果下一个 token 匹配则消费它。 + fn consume(&mut self, token: &Token) -> bool { + if self.tokens.get(self.index) == Some(token) { + self.index += 1; + return true; + } + false + } +} + +/// 将规则 AST 转换为 Python 兼容嵌套列表。 +fn expr_to_py(py: Python<'_>, expr: &RuleExpr) -> PyResult { + match expr { + RuleExpr::Name(name) => Ok(PyString::new(py, name).into_any().unbind()), + RuleExpr::Not(inner) => { + let list = PyList::empty(py); + list.append("not")?; + list.append(expr_to_py(py, inner)?)?; + Ok(list.into()) + } + RuleExpr::And(left, right) => expr_binary_to_py(py, "and", left, right), + RuleExpr::Or(left, right) => expr_binary_to_py(py, "or", left, right), + } +} + +/// 将二元规则 AST 转换为 Python 兼容嵌套列表。 +fn expr_binary_to_py( + py: Python<'_>, + operator: &str, + left: &RuleExpr, + right: &RuleExpr, +) -> PyResult { + let list = PyList::empty(py); + list.append(expr_to_py(py, left)?)?; + list.append(operator)?; + list.append(expr_to_py(py, right)?)?; + Ok(list.into()) +} + +impl TorrentPayload { + /// 从 Python 字典构造 Rust 过滤载荷。 + fn from_py_dict(index: usize, dict: &Bound<'_, PyDict>) -> PyResult { + let title = get_optional_string(dict, "title")?.unwrap_or_default(); + let description = get_optional_string(dict, "description")?.unwrap_or_default(); + let labels = get_string_list(dict, "labels")?; + let size = get_optional_f64(dict, "size")?.unwrap_or(0.0); + let seeders = get_optional_i64(dict, "seeders")?.unwrap_or(0); + let downloadvolumefactor = get_optional_f64(dict, "downloadvolumefactor")?; + let pub_minutes = get_optional_f64(dict, "pub_minutes")?.unwrap_or(0.0); + let episode_count = get_optional_f64(dict, "episode_count")? + .unwrap_or(1.0) + .max(1.0); + let mut fields = HashMap::new(); + for (key, value) in dict.iter() { + let key = key.extract::()?; + if value.is_none() { + continue; + } + if let Ok(values) = value.extract::>() { + fields.insert(key, FieldValue::List(values)); + } else { + fields.insert(key, FieldValue::Scalar(value.str()?.to_str()?.to_string())); + } + } + Ok(Self { + index, + title, + description, + labels, + size, + seeders, + downloadvolumefactor, + pub_minutes, + episode_count, + fields, + }) + } + + /// 返回指定字段的匹配文本。 + fn content_for_matches(&self, match_fields: &[String]) -> String { + if match_fields.is_empty() { + return format!( + "{} {} {}", + self.title, + self.description, + self.labels.join(" ") + ); + } + let mut parts = Vec::new(); + for field in match_fields { + if let Some(value) = self.fields.get(field) { + match value { + FieldValue::Scalar(text) => { + if !text.is_empty() { + parts.push(text.clone()); + } + } + FieldValue::List(values) => { + parts.extend(values.iter().filter(|v| !v.is_empty()).cloned()) + } + } + } + } + parts.join(" ") + } +} + +/// 从 Python 字典读取字符串列表。 +fn get_string_list(dict: &Bound<'_, PyDict>, key: &str) -> PyResult> { + let Some(value) = dict.get_item(key)? else { + return Ok(Vec::new()); + }; + if value.is_none() { + return Ok(Vec::new()); + } + if let Ok(values) = value.extract::>() { + return Ok(values); + } + Ok(vec![value.str()?.to_str()?.to_string()]) +} + +/// 执行规则 AST 匹配。 +fn match_expr( + expr: &RuleExpr, + torrent: &TorrentPayload, + rule_set: &Bound<'_, PyDict>, + media_info: Option<&Bound<'_, PyDict>>, + regex_cache: &mut HashMap, +) -> PyResult { + match expr { + RuleExpr::Name(name) => match_rule(name, torrent, rule_set, media_info, regex_cache), + RuleExpr::Not(inner) => Ok(!match_expr( + inner, + torrent, + rule_set, + media_info, + regex_cache, + )?), + RuleExpr::And(left, right) => { + Ok( + match_expr(left, torrent, rule_set, media_info, regex_cache)? + && match_expr(right, torrent, rule_set, media_info, regex_cache)?, + ) + } + RuleExpr::Or(left, right) => { + Ok( + match_expr(left, torrent, rule_set, media_info, regex_cache)? + || match_expr(right, torrent, rule_set, media_info, regex_cache)?, + ) + } + } +} + +/// 执行单条规则匹配。 +fn match_rule( + rule_name: &str, + torrent: &TorrentPayload, + rule_set: &Bound<'_, PyDict>, + media_info: Option<&Bound<'_, PyDict>>, + regex_cache: &mut HashMap, +) -> PyResult { + let Some(rule_obj) = rule_set.get_item(rule_name)? else { + return Ok(false); + }; + let rule = rule_obj.downcast::()?; + if let Some(tmdb_obj) = rule.get_item("tmdb")? { + if !tmdb_obj.is_none() { + if let Ok(tmdb) = tmdb_obj.downcast::() { + if match_tmdb(tmdb, media_info)? { + return Ok(true); + } + } + } + } + + let match_fields = get_string_list(rule, "match")?; + let content = torrent.content_for_matches(&match_fields); + let includes = get_string_list(rule, "include")?; + let excludes = get_string_list(rule, "exclude")?; + + if !includes.is_empty() { + let mut included = false; + for pattern in &includes { + if regex_search(pattern, &content, regex_cache)? { + included = true; + break; + } + } + if !included { + return Ok(false); + } + } + for exclude in excludes { + if regex_search(&exclude, &content, regex_cache)? { + return Ok(false); + } + } + if let Some(size_range) = get_optional_string(rule, "size_range")? { + if !match_size(torrent, &size_range)? { + return Ok(false); + } + } + if let Some(seeders) = get_optional_i64(rule, "seeders")? { + if torrent.seeders < seeders { + return Ok(false); + } + } + if let Some(downloadvolumefactor) = get_optional_f64(rule, "downloadvolumefactor")? { + if torrent.downloadvolumefactor != Some(downloadvolumefactor) { + return Ok(false); + } + } + if let Some(pubdate) = get_optional_string(rule, "publish_time")? { + if !match_publish_time(torrent.pub_minutes, &pubdate) { + return Ok(false); + } + } + Ok(true) +} + +/// 使用带缓存的忽略大小写正则搜索。 +fn regex_search( + pattern: &str, + content: &str, + cache: &mut HashMap, +) -> PyResult { + if !cache.contains_key(pattern) { + let regex = RegexBuilder::new(pattern) + .case_insensitive(true) + .build() + .map_err(|err| PyValueError::new_err(err.to_string()))?; + cache.insert(pattern.to_string(), regex); + } + Ok(cache + .get(pattern) + .is_some_and(|regex| regex.is_match(content))) +} + +/// 匹配 TMDB 媒体属性规则。 +fn match_tmdb(tmdb: &Bound<'_, PyDict>, media_info: Option<&Bound<'_, PyDict>>) -> PyResult { + let Some(media) = media_info else { + return Ok(false); + }; + for (attr, value) in tmdb.iter() { + if value.is_none() { + continue; + } + let attr_name = attr.extract::()?; + let expected = value.str()?.to_str()?.to_string(); + if expected.is_empty() { + continue; + } + let info_values = media_values(media, &attr_name)?; + if info_values.is_empty() { + return Ok(false); + } + let expected_values: Vec = expected + .split(',') + .filter(|item| !item.is_empty()) + .map(|item| item.to_uppercase()) + .collect(); + if !expected_values.iter().any(|expected_item| { + info_values + .iter() + .any(|info_item| info_item == expected_item) + }) { + return Ok(false); + } + } + Ok(true) +} + +/// 获取媒体属性的可比较字符串集合。 +fn media_values(media: &Bound<'_, PyDict>, attr_name: &str) -> PyResult> { + let Some(value) = media.get_item(attr_name)? else { + return Ok(Vec::new()); + }; + if value.is_none() { + return Ok(Vec::new()); + } + if attr_name == "production_countries" { + let Ok(items) = value.downcast::() else { + return Ok(Vec::new()); + }; + let mut values = Vec::new(); + for item in items.iter() { + if let Ok(dict) = item.downcast::() { + if let Some(country) = dict.get_item("iso_3166_1")? { + values.push(country.str()?.to_str()?.to_uppercase()); + } + } + } + return Ok(values); + } + if let Ok(items) = value.extract::>() { + return Ok(items.into_iter().map(|item| item.to_uppercase()).collect()); + } + Ok(vec![value.str()?.to_str()?.to_uppercase()]) +} + +/// 按每集大小匹配大小范围规则。 +fn match_size(torrent: &TorrentPayload, size_range: &str) -> PyResult { + let torrent_size = torrent.size / torrent.episode_count; + let size_range = size_range.trim(); + let unit = 1024.0 * 1024.0; + if let Some((min, max)) = size_range.split_once('-') { + let min = min + .trim() + .parse::() + .map_err(|err| PyValueError::new_err(err.to_string()))? + * unit; + let max = max + .trim() + .parse::() + .map_err(|err| PyValueError::new_err(err.to_string()))? + * unit; + return Ok(min <= torrent_size && torrent_size <= max); + } + if let Some(min) = size_range.strip_prefix('>') { + let min = min + .trim() + .parse::() + .map_err(|err| PyValueError::new_err(err.to_string()))? + * unit; + return Ok(torrent_size >= min); + } + if let Some(max) = size_range.strip_prefix('<') { + let max = max + .trim() + .parse::() + .map_err(|err| PyValueError::new_err(err.to_string()))? + * unit; + return Ok(torrent_size <= max); + } + Ok(false) +} + +/// 匹配发布时间分钟数规则。 +fn match_publish_time(pub_minutes: f64, publish_time: &str) -> bool { + let values: Vec = publish_time + .split('-') + .filter_map(|item| item.parse::().ok()) + .collect(); + if values.len() == 1 { + return pub_minutes >= values[0]; + } + if values.len() >= 2 { + return values[0] <= pub_minutes && pub_minutes <= values[1]; + } + true +} diff --git a/rust/moviepilot_rust/src/indexer.rs b/rust/moviepilot_rust/src/indexer.rs new file mode 100644 index 00000000..ee2ad406 --- /dev/null +++ b/rust/moviepilot_rust/src/indexer.rs @@ -0,0 +1,1053 @@ +use crate::utils::{get_optional_i64, get_optional_string, py_i64_to_usize}; +use once_cell::sync::Lazy; +use percent_encoding::{utf8_percent_encode, AsciiSet, CONTROLS}; +use pyo3::exceptions::PyValueError; +use pyo3::prelude::*; +use pyo3::types::{PyAny, PyDict, PyList}; +use regex::{Regex, RegexBuilder}; +use scraper::{ElementRef, Html, Selector}; +use url::form_urlencoded; +use url::Url; + +const PATH_ENCODE_SET: &AsciiSet = &CONTROLS + .add(b' ') + .add(b'"') + .add(b'#') + .add(b'%') + .add(b'<') + .add(b'>') + .add(b'[') + .add(b'\\') + .add(b']') + .add(b'^') + .add(b'`') + .add(b'{') + .add(b'|') + .add(b'}'); + +static IMDB_ID_RE: Lazy = Lazy::new(|| Regex::new(r"^tt\d+$").unwrap()); + +static FILESIZE_UNIT_RE: Lazy = Lazy::new(|| { + RegexBuilder::new(r"[KMGTPI]*B?") + .case_insensitive(true) + .build() + .unwrap() +}); +static NUMERIC_FACTOR_RE: Lazy = Lazy::new(|| Regex::new(r"(\d+\.?\d*)").unwrap()); + +enum RowParseResult { + Unsupported, + Empty, + Item(PyObject), +} + +#[pyfunction] +pub(crate) fn apply_indexer_text_filters_fast( + text: &Bound<'_, PyAny>, + filters: &Bound<'_, PyAny>, +) -> PyResult> { + if text.is_none() { + return Ok(None); + } + apply_text_filters(text.str()?.to_str()?.to_string(), filters) +} + +/// 批量解析普通配置 indexer 页面,遇到不支持的选择器配置时返回 None 交给 Python 回退。 +#[pyfunction] +#[pyo3(signature = (html_text, domain, list_config, fields, category=None, result_num=100))] +pub(crate) fn parse_indexer_torrents_fast( + py: Python<'_>, + html_text: &str, + domain: &str, + list_config: &Bound<'_, PyDict>, + fields: &Bound<'_, PyDict>, + category: Option<&Bound<'_, PyDict>>, + result_num: usize, +) -> PyResult> { + let Some(list_selector_text) = get_optional_string(list_config, "selector")? else { + return Ok(None); + }; + if list_selector_text.is_empty() { + return Ok(None); + } + let Ok(list_selector) = Selector::parse(&list_selector_text) else { + return Ok(None); + }; + let document = Html::parse_document(html_text); + let result = PyList::empty(py); + for row in document.select(&list_selector).take(result_num) { + match parse_indexer_row(py, row, domain, fields, category)? { + RowParseResult::Unsupported => return Ok(None), + RowParseResult::Empty => {} + RowParseResult::Item(item) => result.append(item)?, + } + } + Ok(Some(result.into())) +} + +/// 执行 indexer 文本过滤器,遇到 Python 专属过滤器时返回 None。 +fn apply_text_filters(mut current: String, filters: &Bound<'_, PyAny>) -> PyResult> { + let Ok(filter_list) = filters.downcast::() else { + return Ok(None); + }; + for item in filter_list.iter() { + let filter = item.downcast::()?; + let method_name = get_optional_string(filter, "name")?; + if current.is_empty() { + break; + } + match method_name.as_deref() { + Some("re_search") => { + let Some(args) = filter.get_item("args")? else { + continue; + }; + let Ok(args_list) = args.downcast::() else { + continue; + }; + if args_list.len() < 2 { + continue; + } + let pattern = args_list.get_item(0)?.extract::()?; + let group_index = py_i64_to_usize(&args_list.get_item(args_list.len() - 1)?)?; + let regex = + Regex::new(&pattern).map_err(|err| PyValueError::new_err(err.to_string()))?; + if let Some(captures) = regex.captures(¤t) { + if let Some(value) = captures.get(group_index) { + current = value.as_str().to_string(); + } + } + } + Some("split") => { + let Some(args) = filter.get_item("args")? else { + continue; + }; + let Ok(args_list) = args.downcast::() else { + continue; + }; + if args_list.len() < 2 { + continue; + } + let delimiter = args_list.get_item(0)?.extract::()?; + let index = py_i64_to_usize(&args_list.get_item(args_list.len() - 1)?)?; + if let Some(value) = current.split(&delimiter).nth(index) { + current = value.to_string(); + } + } + Some("replace") => { + let Some(args) = filter.get_item("args")? else { + continue; + }; + let Ok(args_list) = args.downcast::() else { + continue; + }; + if args_list.len() < 2 { + continue; + } + let from = args_list.get_item(0)?.extract::()?; + let to = args_list + .get_item(args_list.len() - 1)? + .extract::()?; + current = current.replace(&from, &to); + } + Some("strip") => { + current = current.trim().to_string(); + } + Some("appendleft") => { + let Some(args) = filter.get_item("args")? else { + continue; + }; + current = format!("{}{}", args.str()?.to_str()?, current); + } + Some("querystring") => { + let Some(args) = filter.get_item("args")? else { + continue; + }; + current = query_param_value(¤t, args.str()?.to_str()?).unwrap_or_default(); + } + Some("dateparse") => return Ok(None), + _ => return Ok(None), + } + } + Ok(Some(current.trim().to_string())) +} + +/// 将站点页面中的文件大小文本转换为字节数。 +#[pyfunction] +pub(crate) fn parse_filesize_fast(text: &Bound<'_, PyAny>) -> PyResult { + if text.is_none() { + return Ok(0); + } + Ok(parse_filesize_text(text.str()?.to_str()?)) +} + +/// 将文件大小文本转换为字节数,供 Python 导出函数和 Rust HTML 解析共用。 +fn parse_filesize_text(text: &str) -> i64 { + let raw = text.trim().to_string(); + if raw.is_empty() { + return 0; + } + if raw.chars().all(|ch| ch.is_ascii_digit()) { + return raw.parse::().unwrap_or(0); + } + let normalized = raw.replace([',', ' '], "").to_uppercase(); + let size_text = FILESIZE_UNIT_RE.replace_all(&normalized, "").to_string(); + let Ok(mut size) = size_text.parse::() else { + return 0; + }; + if normalized.contains("PB") || normalized.contains("PIB") { + size *= 1024_f64.powi(5); + } else if normalized.contains("TB") || normalized.contains("TIB") { + size *= 1024_f64.powi(4); + } else if normalized.contains("GB") || normalized.contains("GIB") { + size *= 1024_f64.powi(3); + } else if normalized.contains("MB") || normalized.contains("MIB") { + size *= 1024_f64.powi(2); + } else if normalized.contains("KB") || normalized.contains("KIB") { + size *= 1024_f64; + } + size.round() as i64 +} + +/// 根据普通 indexer 配置构造搜索或浏览 URL。 +#[pyfunction] +pub(crate) fn build_indexer_search_url_fast( + config: &Bound<'_, PyDict>, +) -> PyResult> { + let Some(search_any) = config.get_item("search")? else { + return Ok(None); + }; + let search = search_any.downcast::()?; + let domain = get_optional_string(config, "domain")?.unwrap_or_default(); + if domain.is_empty() { + return Ok(None); + } + + let keyword_any = config.get_item("keyword")?; + let keyword_present = keyword_any.as_ref().is_some_and(|value| !value.is_none()); + let mut torrents_path = pick_torrents_path(search, config)?; + let page = get_optional_i64(config, "page")?.unwrap_or(0); + + if keyword_present { + let (mut search_word, search_mode) = + build_search_word(config, keyword_any.as_ref().unwrap())?; + let is_imdbid_search = IMDB_ID_RE.is_match(&search_word); + search_word = format_search_word(search, &search_word)?; + let params_any = search.get_item("params")?; + let Some(params_obj) = params_any else { + let encoded = utf8_percent_encode(&search_word, PATH_ENCODE_SET).to_string(); + return Ok(Some(format!( + "{}{}", + domain, + torrents_path + .replace("{keyword}", &encoded) + .replace("{page}", &page.to_string()) + ))); + }; + let params_dict = params_obj.downcast::()?; + if params_dict.is_empty() { + let encoded = utf8_percent_encode(&search_word, PATH_ENCODE_SET).to_string(); + return Ok(Some(format!( + "{}{}", + domain, + torrents_path + .replace("{keyword}", &encoded) + .replace("{page}", &page.to_string()) + ))); + } + + let mut query_params: Vec<(String, String)> = vec![ + ("search_mode".to_string(), search_mode.to_string()), + ("search_area".to_string(), "0".to_string()), + ("page".to_string(), page.to_string()), + ("notnewword".to_string(), "1".to_string()), + ]; + for (key, value) in params_dict.iter() { + let key = key.extract::()?; + if key == "search_area" && !is_imdbid_search { + continue; + } + let rendered = value.str()?.to_str()?.replace("{keyword}", &search_word); + upsert_query_param(&mut query_params, key, rendered); + } + apply_category_params(config, &mut query_params)?; + return Ok(Some(combine_url(&domain, &torrents_path, &query_params)?)); + } + + let browse_any = config.get_item("browse")?; + if let Some(browse_obj) = browse_any { + if !browse_obj.is_none() { + let browse = browse_obj.downcast::()?; + if let Some(path) = get_optional_string(browse, "path")? { + torrents_path = path; + } + if let Some(start) = get_optional_i64(browse, "start")? { + torrents_path = torrents_path.replace("{page}", &(start + page).to_string()); + } + } else if page > 0 { + torrents_path = format!("{torrents_path}?page={page}"); + } + } else if page > 0 { + torrents_path = format!("{torrents_path}?page={page}"); + } + torrents_path = torrents_path + .replace("{page}", &page.to_string()) + .replace("{keyword}", ""); + Ok(Some(format!("{domain}{torrents_path}"))) +} + +fn query_param_value(text: &str, key: &str) -> Option { + let query = if let Ok(url) = Url::parse(text) { + url.query().unwrap_or("").to_string() + } else { + text.split_once('?') + .map(|(_, query)| query.split('#').next().unwrap_or("").to_string()) + .unwrap_or_default() + }; + form_urlencoded::parse(query.as_bytes()) + .find(|(param_key, _)| param_key == key) + .map(|(_, value)| value.to_string()) +} + +/// 解析单行种子信息,覆盖普通配置站点的主字段抽取流程。 +fn parse_indexer_row( + py: Python<'_>, + row: ElementRef<'_>, + domain: &str, + fields: &Bound<'_, PyDict>, + category: Option<&Bound<'_, PyDict>>, +) -> PyResult { + let output = PyDict::new(py); + if !parse_title(py, row, fields, &output)? { + return Ok(RowParseResult::Unsupported); + } + if !parse_description(py, row, fields, &output)? { + return Ok(RowParseResult::Unsupported); + } + if !parse_link_field( + py, row, fields, domain, "details", "page_url", true, &output, + )? { + return Ok(RowParseResult::Unsupported); + } + if !parse_link_field( + py, + row, + fields, + domain, + "download", + "enclosure", + false, + &output, + )? { + return Ok(RowParseResult::Unsupported); + } + if !parse_plain_field(py, row, fields, "imdbid", "imdbid", &output)? { + return Ok(RowParseResult::Unsupported); + } + if !parse_size_field(py, row, fields, &output)? { + return Ok(RowParseResult::Unsupported); + } + if !parse_int_field(py, row, fields, "leechers", "peers", &output)? { + return Ok(RowParseResult::Unsupported); + } + if !parse_int_field(py, row, fields, "seeders", "seeders", &output)? { + return Ok(RowParseResult::Unsupported); + } + if !parse_int_field(py, row, fields, "grabs", "grabs", &output)? { + return Ok(RowParseResult::Unsupported); + } + if !parse_factor_field(py, row, fields, "downloadvolumefactor", &output)? { + return Ok(RowParseResult::Unsupported); + } + if !parse_factor_field(py, row, fields, "uploadvolumefactor", &output)? { + return Ok(RowParseResult::Unsupported); + } + if !parse_plain_field(py, row, fields, "date_added", "pubdate", &output)? { + return Ok(RowParseResult::Unsupported); + } + if !parse_plain_field(py, row, fields, "date_elapsed", "date_elapsed", &output)? { + return Ok(RowParseResult::Unsupported); + } + if !parse_plain_field(py, row, fields, "freedate", "freedate", &output)? { + return Ok(RowParseResult::Unsupported); + } + if !parse_labels_field(py, row, fields, &output)? { + return Ok(RowParseResult::Unsupported); + } + if !parse_hr_field(py, row, fields, &output)? { + return Ok(RowParseResult::Unsupported); + } + if !parse_category_field(py, row, fields, category, &output)? { + return Ok(RowParseResult::Unsupported); + } + if output.is_empty() { + return Ok(RowParseResult::Empty); + } + Ok(RowParseResult::Item(output.into())) +} + +/// 解析标题字段,支持直接 selector 和常见的 title_default/title_optional 模板。 +fn parse_title( + py: Python<'_>, + row: ElementRef<'_>, + fields: &Bound<'_, PyDict>, + output: &Bound<'_, PyDict>, +) -> PyResult { + let Some(selector) = get_field_dict(fields, "title")? else { + return Ok(true); + }; + let mut title = if selector.contains("selector")? { + safe_query(row, &selector)? + } else if let Some(template) = get_optional_string(&selector, "text")? { + let Some(default_selector) = get_field_dict(fields, "title_default")? else { + return Ok(false); + }; + let title_default = safe_query(row, &default_selector)?.unwrap_or_default(); + let title_optional = + if let Some(optional_selector) = get_field_dict(fields, "title_optional")? { + safe_query(row, &optional_selector)?.unwrap_or_default() + } else { + String::new() + }; + Some(render_known_template( + &template, + &[ + ("title_default", title_default.as_str()), + ("title_optional", title_optional.as_str()), + ], + )) + } else { + None + }; + title = apply_selector_filters(py, title, &selector)?; + if let Some(value) = title { + output.set_item("title", value)?; + } + Ok(true) +} + +/// 解析描述字段,支持直接 selector 和常见 description 模板。 +fn parse_description( + py: Python<'_>, + row: ElementRef<'_>, + fields: &Bound<'_, PyDict>, + output: &Bound<'_, PyDict>, +) -> PyResult { + let Some(selector) = get_field_dict(fields, "description")? else { + return Ok(true); + }; + let mut description = if selector.contains("selector")? || selector.contains("selectors")? { + safe_query(row, &selector)? + } else if let Some(template) = get_optional_string(&selector, "text")? { + let mut values = Vec::new(); + for key in [ + "tags", + "subject", + "description_free_forever", + "description_normal", + ] { + if let Some(field_selector) = get_field_dict(fields, key)? { + let value = safe_query(row, &field_selector)?.unwrap_or_default(); + values.push((key.to_string(), value)); + } + } + let refs: Vec<(&str, &str)> = values + .iter() + .map(|(key, value)| (key.as_str(), value.as_str())) + .collect(); + Some(render_known_template(&template, &refs)) + } else { + None + }; + description = apply_selector_filters(py, description, &selector)?; + if let Some(value) = description { + output.set_item("description", value)?; + } + Ok(true) +} + +/// 解析普通文本字段。 +fn parse_plain_field( + py: Python<'_>, + row: ElementRef<'_>, + fields: &Bound<'_, PyDict>, + source_key: &str, + target_key: &str, + output: &Bound<'_, PyDict>, +) -> PyResult { + let Some(selector) = get_field_dict(fields, source_key)? else { + return Ok(true); + }; + if selector.contains("text")? { + return Ok(false); + } + let value = apply_selector_filters(py, safe_query(row, &selector)?, &selector)?; + if let Some(value) = value { + output.set_item(target_key, value.replace('\n', " ").trim().to_string())?; + } + Ok(true) +} + +/// 解析详情和下载链接,并按 Python 逻辑拼接相对地址。 +fn parse_link_field( + py: Python<'_>, + row: ElementRef<'_>, + fields: &Bound<'_, PyDict>, + domain: &str, + source_key: &str, + target_key: &str, + protocol_relative: bool, + output: &Bound<'_, PyDict>, +) -> PyResult { + let Some(selector) = get_field_dict(fields, source_key)? else { + return Ok(true); + }; + let link = apply_selector_filters(py, safe_query(row, &selector)?, &selector)?; + if let Some(link) = link { + if link.is_empty() { + return Ok(true); + } + output.set_item( + target_key, + normalize_site_link(domain, &link, protocol_relative), + )?; + } + Ok(true) +} + +/// 解析文件大小字段并转换为字节。 +fn parse_size_field( + py: Python<'_>, + row: ElementRef<'_>, + fields: &Bound<'_, PyDict>, + output: &Bound<'_, PyDict>, +) -> PyResult { + let Some(selector) = get_field_dict(fields, "size")? else { + return Ok(true); + }; + let value = apply_selector_filters(py, safe_query(row, &selector)?, &selector)?; + let size = value + .map(|item| parse_filesize_text(item.replace('\n', "").trim())) + .unwrap_or(0); + output.set_item("size", size)?; + Ok(true) +} + +/// 解析整数类字段,兼容 "12/34" 和千分位逗号。 +fn parse_int_field( + py: Python<'_>, + row: ElementRef<'_>, + fields: &Bound<'_, PyDict>, + source_key: &str, + target_key: &str, + output: &Bound<'_, PyDict>, +) -> PyResult { + let Some(selector) = get_field_dict(fields, source_key)? else { + return Ok(true); + }; + let value = apply_selector_filters(py, safe_query(row, &selector)?, &selector)?; + let parsed = value + .as_deref() + .unwrap_or("") + .split('/') + .next() + .unwrap_or("") + .replace(',', "") + .trim() + .parse::() + .unwrap_or(0); + output.set_item(target_key, parsed)?; + Ok(true) +} + +/// 解析上传/下载优惠系数字段。 +fn parse_factor_field( + py: Python<'_>, + row: ElementRef<'_>, + fields: &Bound<'_, PyDict>, + key: &str, + output: &Bound<'_, PyDict>, +) -> PyResult { + let Some(selector) = get_field_dict(fields, key)? else { + return Ok(true); + }; + output.set_item(key, 1)?; + if let Some(case_obj) = selector.get_item("case")? { + let case_dict = case_obj.downcast::()?; + for (case_selector_obj, value) in case_dict.iter() { + let case_selector = case_selector_obj.extract::()?; + if selector_exists(row, &case_selector)? { + output.set_item(key, value)?; + return Ok(true); + } + } + return Ok(true); + } + let value = apply_selector_filters(py, safe_query(row, &selector)?, &selector)?; + if let Some(value) = value { + if let Some(caps) = NUMERIC_FACTOR_RE.captures(&value) { + if let Some(number) = caps + .get(1) + .and_then(|item| item.as_str().parse::().ok()) + { + output.set_item(key, number)?; + } + } + } + Ok(true) +} + +/// 解析标签列表字段。 +fn parse_labels_field( + py: Python<'_>, + row: ElementRef<'_>, + fields: &Bound<'_, PyDict>, + output: &Bound<'_, PyDict>, +) -> PyResult { + let Some(selector) = get_field_dict(fields, "labels")? else { + return Ok(true); + }; + if !selector.contains("selector")? { + output.set_item("labels", PyList::empty(py))?; + return Ok(true); + } + if selector.contains("remove")? { + return Ok(false); + } + let Some(values) = query_all_values(row, &selector)? else { + output.set_item("labels", PyList::empty(py))?; + return Ok(true); + }; + let labels = PyList::empty(py); + for value in values.into_iter().filter(|item| !item.is_empty()) { + labels.append(value)?; + } + output.set_item("labels", labels)?; + Ok(true) +} + +/// 解析 HR 标记字段。 +fn parse_hr_field( + py: Python<'_>, + row: ElementRef<'_>, + fields: &Bound<'_, PyDict>, + output: &Bound<'_, PyDict>, +) -> PyResult { + let Some(selector) = get_field_dict(fields, "hr")? else { + return Ok(true); + }; + let Some(selector_text) = get_selector_text(&selector)? else { + output.set_item("hit_and_run", false)?; + return Ok(true); + }; + output.set_item("hit_and_run", selector_exists(row, &selector_text)?)?; + let _ = py; + Ok(true) +} + +/// 解析分类字段并映射为 MoviePilot 媒体类型中文值。 +fn parse_category_field( + py: Python<'_>, + row: ElementRef<'_>, + fields: &Bound<'_, PyDict>, + category: Option<&Bound<'_, PyDict>>, + output: &Bound<'_, PyDict>, +) -> PyResult { + let Some(selector) = get_field_dict(fields, "category")? else { + return Ok(true); + }; + let value = apply_selector_filters(py, safe_query(row, &selector)?, &selector)?; + let media_type = if let (Some(value), Some(category)) = (value.as_deref(), category) { + let tv_cats = category_ids_for_field(category, "tv")?; + let movie_cats = category_ids_for_field(category, "movie")?; + if tv_cats.iter().any(|item| item == value) && !movie_cats.iter().any(|item| item == value) + { + "电视剧" + } else if movie_cats.iter().any(|item| item == value) { + "电影" + } else { + "未知" + } + } else { + "未知" + }; + output.set_item("category", media_type)?; + Ok(true) +} + +/// 获取字段配置字典。 +fn get_field_dict<'py>( + fields: &Bound<'py, PyDict>, + key: &str, +) -> PyResult>> { + let Some(value) = fields.get_item(key)? else { + return Ok(None); + }; + if value.is_none() { + return Ok(None); + } + Ok(Some(value.downcast_into::()?)) +} + +/// 执行 selector 查询并返回第一个符合 index/contents 规则的文本。 +fn safe_query( + row: ElementRef<'_>, + selector_config: &Bound<'_, PyDict>, +) -> PyResult> { + if selector_config.contains("remove")? { + return Ok(None); + } + let Some(values) = query_all_values(row, selector_config)? else { + return Ok(None); + }; + Ok(select_indexed_value(values, selector_config)) +} + +/// 查询 selector 的全部文本或属性值。 +fn query_all_values( + row: ElementRef<'_>, + selector_config: &Bound<'_, PyDict>, +) -> PyResult>> { + let Some(selector_text) = get_selector_text(selector_config)? else { + return Ok(None); + }; + let Ok(selector) = Selector::parse(&selector_text) else { + return Ok(None); + }; + let attribute = get_optional_string(selector_config, "attribute")?; + let mut values = Vec::new(); + for element in row.select(&selector) { + if let Some(attribute) = attribute.as_deref() { + values.push(element.value().attr(attribute).unwrap_or("").to_string()); + } else { + values.push(normalize_element_text(element)); + } + } + Ok(Some(values)) +} + +/// 读取 selector 或 selectors 配置。 +fn get_selector_text(selector_config: &Bound<'_, PyDict>) -> PyResult> { + if let Some(selector) = get_optional_string(selector_config, "selector")? { + if !selector.is_empty() { + return Ok(Some(selector)); + } + } + if let Some(selector) = get_optional_string(selector_config, "selectors")? { + if !selector.is_empty() { + return Ok(Some(selector)); + } + } + Ok(None) +} + +/// 对查询结果应用 contents/index 规则。 +fn select_indexed_value( + values: Vec, + selector_config: &Bound<'_, PyDict>, +) -> Option { + if values.is_empty() { + return None; + } + if let Ok(Some(contents)) = get_optional_i64(selector_config, "contents") { + let index = contents as usize; + if let Some(first) = values.first() { + return first.split('\n').nth(index).map(|item| item.to_string()); + } + } + if let Ok(Some(index)) = get_optional_i64(selector_config, "index") { + return values.get(index as usize).cloned(); + } + values.first().cloned() +} + +/// 应用字段配置中的 filters。 +fn apply_selector_filters( + py: Python<'_>, + value: Option, + selector_config: &Bound<'_, PyDict>, +) -> PyResult> { + let Some(value) = value else { + return Ok(None); + }; + let Some(filters) = selector_config.get_item("filters")? else { + return Ok(Some(value)); + }; + if filters.is_none() { + return Ok(Some(value)); + } + let _ = py; + apply_text_filters(value, &filters).map(|filtered| filtered.or_else(|| Some(String::new()))) +} + +/// 规范化元素文本,尽量接近 PyQuery.text() 输出。 +fn normalize_element_text(element: ElementRef<'_>) -> String { + element + .text() + .map(str::trim) + .filter(|item| !item.is_empty()) + .collect::>() + .join(" ") +} + +/// 判断 row 内是否存在指定 selector。 +fn selector_exists(row: ElementRef<'_>, selector_text: &str) -> PyResult { + let Ok(selector) = Selector::parse(selector_text) else { + return Ok(false); + }; + Ok(row.select(&selector).next().is_some()) +} + +/// 拼接详情和下载链接。 +fn normalize_site_link(domain: &str, link: &str, protocol_relative: bool) -> String { + if link.starts_with("http") || link.starts_with("magnet") { + return link.to_string(); + } + if protocol_relative && link.starts_with("//") { + let scheme = domain.split(':').next().unwrap_or("http"); + return format!("{scheme}:{link}"); + } + if !protocol_relative { + if let Ok(base) = Url::parse(&standardize_base_url(domain)) { + if let Some(host) = base.host_str() { + if link.contains(host) { + if link.starts_with('/') { + return format!("{}:{link}", base.scheme()); + } + return format!("{}://{link}", base.scheme()); + } + } + } + } + if let Some(stripped) = link.strip_prefix('/') { + format!("{domain}{stripped}") + } else { + format!("{domain}{link}") + } +} + +/// 渲染常见的 Jinja 字段模板,不支持复杂表达式时由调用方回退 Python。 +fn render_known_template(template: &str, values: &[(&str, &str)]) -> String { + let mut rendered = template.to_string(); + for (key, value) in values { + for pattern in [ + format!("{{{{ fields.{key} }}}}"), + format!("{{{{fields.{key}}}}}"), + format!("{{{{ fields['{key}'] }}}}"), + format!("{{{{fields['{key}']}}}}"), + format!("{{{{ fields[\"{key}\"] }}}}"), + format!("{{{{fields[\"{key}\"]}}}}"), + ] { + rendered = rendered.replace(&pattern, value); + } + } + rendered +} + +/// 读取分类配置中的 ID 列表。 +fn category_ids_for_field(category: &Bound<'_, PyDict>, key: &str) -> PyResult> { + let Some(list_obj) = category.get_item(key)? else { + return Ok(Vec::new()); + }; + let Ok(list) = list_obj.downcast::() else { + return Ok(Vec::new()); + }; + let mut values = Vec::new(); + for item in list.iter() { + let dict = item.downcast::()?; + if let Some(id) = get_optional_string(dict, "id")? { + values.push(id); + } + } + Ok(values) +} + +/// 从 indexer paths 配置中选择搜索路径。 +fn pick_torrents_path(search: &Bound<'_, PyDict>, config: &Bound<'_, PyDict>) -> PyResult { + let Some(paths_obj) = search.get_item("paths")? else { + return Ok(String::new()); + }; + let paths = paths_obj.downcast::()?; + if paths.len() == 1 { + let path_item = paths.get_item(0)?; + let path_dict = path_item.downcast::()?; + return Ok(get_optional_string(path_dict, "path")?.unwrap_or_default()); + } + let mtype = get_optional_string(config, "mtype")?; + for item in paths.iter() { + let path = item.downcast::()?; + let path_type = get_optional_string(path, "type")?; + if path_type.as_deref() == Some("all") && mtype.is_none() { + return Ok(get_optional_string(path, "path")?.unwrap_or_default()); + } + if path_type.as_deref() == Some("movie") && mtype.as_deref() == Some("电影") { + return Ok(get_optional_string(path, "path")?.unwrap_or_default()); + } + if path_type.as_deref() == Some("tv") && mtype.as_deref() == Some("电视剧") { + return Ok(get_optional_string(path, "path")?.unwrap_or_default()); + } + } + Ok(String::new()) +} + +/// 根据关键字、批量配置构造搜索词和搜索模式。 +fn build_search_word( + config: &Bound<'_, PyDict>, + keyword: &Bound<'_, PyAny>, +) -> PyResult<(String, i64)> { + if let Ok(values) = keyword.extract::>() { + let batch = config.get_item("batch")?; + let (delimiter, space_replace) = if let Some(batch_obj) = batch { + if batch_obj.is_none() { + (" ".to_string(), " ".to_string()) + } else { + let batch_dict = batch_obj.downcast::()?; + ( + get_optional_string(batch_dict, "delimiter")? + .unwrap_or_else(|| " ".to_string()), + get_optional_string(batch_dict, "space_replace")? + .unwrap_or_else(|| " ".to_string()), + ) + } + } else { + (" ".to_string(), " ".to_string()) + }; + let words: Vec = values + .into_iter() + .map(|value| value.replace(' ', &space_replace)) + .collect(); + return Ok((words.join(&delimiter), 1)); + } + Ok((keyword.str()?.to_str()?.to_string(), 0)) +} + +/// 按 imdbid_format 转换 IMDb ID 搜索词。 +fn format_search_word(search: &Bound<'_, PyDict>, search_word: &str) -> PyResult { + if !IMDB_ID_RE.is_match(search_word) { + return Ok(search_word.to_string()); + } + let Some(format) = get_optional_string(search, "imdbid_format")? else { + return Ok(search_word.to_string()); + }; + Ok(format + .replace("{keyword}", search_word) + .replace("{imdbid}", search_word) + .replace("{imdbid_num}", search_word.trim_start_matches("tt"))) +} + +/// 更新查询参数,保留 Python dict update 的覆盖语义。 +fn upsert_query_param(params: &mut Vec<(String, String)>, key: String, value: String) { + if let Some((_, existing_value)) = params + .iter_mut() + .find(|(existing_key, _)| existing_key == &key) + { + *existing_value = value; + return; + } + params.push((key, value)); +} + +/// 应用电影/电视剧分类查询参数。 +fn apply_category_params( + config: &Bound<'_, PyDict>, + params: &mut Vec<(String, String)>, +) -> PyResult<()> { + let Some(category_obj) = config.get_item("category")? else { + return Ok(()); + }; + if category_obj.is_none() { + return Ok(()); + } + let category = category_obj.downcast::()?; + let mtype = get_optional_string(config, "mtype")?; + let cat_ids = collect_category_ids(category, mtype.as_deref())?; + let allowed = get_optional_string(config, "cat")?.map(|value| { + value + .split(',') + .map(|item| item.to_string()) + .collect::>() + }); + for cat_id in cat_ids { + if cat_id.is_empty() { + continue; + } + if let Some(allowed_cats) = &allowed { + if !allowed_cats.iter().any(|item| item == &cat_id) { + continue; + } + } + if let Some(field) = get_optional_string(category, "field")? { + let delimiter = + get_optional_string(category, "delimiter")?.unwrap_or_else(|| " ".to_string()); + let current = params + .iter() + .find(|(key, _)| key == &field) + .map(|(_, value)| value.clone()) + .unwrap_or_default(); + upsert_query_param(params, field, format!("{current}{delimiter}{cat_id}")); + } else { + upsert_query_param(params, format!("cat{cat_id}"), "1".to_string()); + } + } + Ok(()) +} + +/// 收集当前媒体类型可用的分类 ID。 +fn collect_category_ids( + category: &Bound<'_, PyDict>, + mtype: Option<&str>, +) -> PyResult> { + let mut items = Vec::new(); + let keys = match mtype { + Some("电视剧") => vec!["tv"], + Some("电影") => vec!["movie"], + _ => vec!["movie", "tv"], + }; + for key in keys { + if let Some(list_obj) = category.get_item(key)? { + if let Ok(list) = list_obj.downcast::() { + for item in list.iter() { + let cat = item.downcast::()?; + if let Some(cat_id) = get_optional_string(cat, "id")? { + items.push(cat_id); + } + } + } + } + } + Ok(items) +} + +/// 合并 host、path 和查询参数。 +fn combine_url(host: &str, path: &str, query: &[(String, String)]) -> PyResult { + let base = standardize_base_url(host); + let mut url = Url::parse(&base) + .and_then(|base_url| base_url.join(path)) + .map_err(|err| PyValueError::new_err(err.to_string()))?; + let mut query_params: Vec<(String, String)> = url + .query_pairs() + .map(|(key, value)| (key.to_string(), value.to_string())) + .collect(); + for (key, value) in query { + upsert_query_param(&mut query_params, key.clone(), value.clone()); + } + { + let mut pairs = url.query_pairs_mut(); + pairs.clear(); + for (key, value) in query_params { + pairs.append_pair(&key, &value); + } + } + Ok(url.to_string()) +} + +/// 标准化基础 URL,与 Python UrlUtils.standardize_base_url 保持一致。 +fn standardize_base_url(host: &str) -> String { + let mut value = host.to_string(); + if !value.ends_with('/') { + value.push('/'); + } + if !value.starts_with("http://") && !value.starts_with("https://") { + value = format!("http://{value}"); + } + value +} diff --git a/rust/moviepilot_rust/src/lib.rs b/rust/moviepilot_rust/src/lib.rs new file mode 100644 index 00000000..e7e375aa --- /dev/null +++ b/rust/moviepilot_rust/src/lib.rs @@ -0,0 +1,31 @@ +mod filter; +mod indexer; +mod meta; +mod utils; + +use pyo3::prelude::*; + +/// 返回扩展是否已成功加载,用于 Python 侧健康检查。 +#[pyfunction] +fn is_available() -> bool { + true +} + +/// 注册 MoviePilot Rust 扩展模块。 +#[pymodule] +fn moviepilot_rust(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_function(wrap_pyfunction!(is_available, m)?)?; + m.add_function(wrap_pyfunction!(meta::is_anime_fast, m)?)?; + m.add_function(wrap_pyfunction!(meta::find_metainfo_fast, m)?)?; + m.add_function(wrap_pyfunction!(meta::parse_video_title_fast, m)?)?; + m.add_function(wrap_pyfunction!(filter::parse_filter_rule_fast, m)?)?; + m.add_function(wrap_pyfunction!(filter::filter_torrents_fast, m)?)?; + m.add_function(wrap_pyfunction!( + indexer::apply_indexer_text_filters_fast, + m + )?)?; + m.add_function(wrap_pyfunction!(indexer::parse_filesize_fast, m)?)?; + m.add_function(wrap_pyfunction!(indexer::build_indexer_search_url_fast, m)?)?; + m.add_function(wrap_pyfunction!(indexer::parse_indexer_torrents_fast, m)?)?; + Ok(()) +} diff --git a/rust/moviepilot_rust/src/meta.rs b/rust/moviepilot_rust/src/meta.rs new file mode 100644 index 00000000..a6e357a4 --- /dev/null +++ b/rust/moviepilot_rust/src/meta.rs @@ -0,0 +1,1247 @@ +use crate::utils::{apply_range_total, capture_all_i64, capture_i64}; +use once_cell::sync::Lazy; +use pyo3::prelude::*; +use pyo3::types::PyDict; +use regex::{Regex, RegexBuilder}; + +static ANIME_BRACKET_RE: Lazy = Lazy::new(|| { + RegexBuilder::new(r"【[+0-9XVPI-]+】\s*【") + .case_insensitive(true) + .build() + .unwrap() +}); +static ANIME_DASH_EPISODE_RE: Lazy = Lazy::new(|| { + RegexBuilder::new(r"\s+-\s+[\dv]{1,4}\s+") + .case_insensitive(true) + .build() + .unwrap() +}); +static VIDEO_SEASON_EPISODE_RE: Lazy = Lazy::new(|| { + RegexBuilder::new( + r"S\d{2}\s*-\s*S\d{2}|S\d{2}|\s+S\d{1,2}|EP?\d{2,4}\s*-\s*EP?\d{2,4}|EP?\d{2,4}|\s+EP?\d{1,4}", + ) + .case_insensitive(true) + .build() + .unwrap() +}); +static ANIME_SQUARE_BRACKET_RE: Lazy = Lazy::new(|| { + RegexBuilder::new(r"\[[+0-9XVPI-]+]\s*\[") + .case_insensitive(true) + .build() + .unwrap() +}); +static BRACED_METAINFO_RE: Lazy = Lazy::new(|| Regex::new(r"\{\[([\s\S]+?)]}").unwrap()); +static BRACED_TMDBID_RE: Lazy = Lazy::new(|| Regex::new(r"tmdbid=(\d+)").unwrap()); +static BRACED_DOUBANID_RE: Lazy = Lazy::new(|| Regex::new(r"doubanid=(\d+)").unwrap()); +static BRACED_TYPE_RE: Lazy = Lazy::new(|| Regex::new(r"type=(\w+)").unwrap()); +static BRACED_BEGIN_SEASON_RE: Lazy = Lazy::new(|| Regex::new(r"(?:^|;)s=(\d+)").unwrap()); +static BRACED_END_SEASON_RE: Lazy = Lazy::new(|| Regex::new(r"s=\d+-(\d+)").unwrap()); +static BRACED_BEGIN_EPISODE_RE: Lazy = Lazy::new(|| Regex::new(r"(?:^|;)e=(\d+)").unwrap()); +static BRACED_END_EPISODE_RE: Lazy = Lazy::new(|| Regex::new(r"e=\d+-(\d+)").unwrap()); +static EMBY_TMDB_RE_LIST: Lazy> = Lazy::new(|| { + vec![ + Regex::new(r"\[tmdbid[=\-](\d+)]").unwrap(), + Regex::new(r"\[tmdb[=\-](\d+)]").unwrap(), + Regex::new(r"\{tmdbid[=\-](\d+)}").unwrap(), + Regex::new(r"\{tmdb[=\-](\d+)}").unwrap(), + ] +}); + +static TITLE_SIZE_RE: Lazy = Lazy::new(|| { + RegexBuilder::new(r"[0-9.]+\s*[MGT]i?B") + .case_insensitive(true) + .build() + .unwrap() +}); +static TITLE_DATE_RE: Lazy = + Lazy::new(|| Regex::new(r"\d{4}[\s._-]\d{1,2}[\s._-]\d{1,2}").unwrap()); +static TITLE_YEAR_RANGE_RE: Lazy = + Lazy::new(|| Regex::new(r"([\s.]+)(\d{4})-(\d{4})").unwrap()); +static FIRST_BRACKET_RE: Lazy = Lazy::new(|| Regex::new(r"^[\[【](.+?)[\]】]").unwrap()); +static FIRST_BRACKET_RELEASE_RE: Lazy = + Lazy::new(|| Regex::new(r"[A-Za-z]+\..+(?:19|20)\d{2}").unwrap()); +static FIRST_BRACKET_RESOURCE_RE: Lazy = Lazy::new(|| { + RegexBuilder::new(r"(?:2160|1080|720|480)[PIpi]|4K|UHD|Blu[\-.]?ray|REMUX|WEB[\-.]?DL|HDTV") + .case_insensitive(true) + .build() + .unwrap() +}); +static TOKEN_SPLIT_RE: Lazy = + Lazy::new(|| Regex::new(r"[.\s()\[\]\-【】/~;&|#_「」~]+").unwrap()); +static FULL_SEASON_RE: Lazy = Lazy::new(|| { + RegexBuilder::new(r"^(?:Season\s+|S)(\d{1,3})$") + .case_insensitive(true) + .build() + .unwrap() +}); +static SEASON_RE: Lazy = Lazy::new(|| { + RegexBuilder::new(r"S(\d{3})|^S(\d{1,3})$|S(\d{1,3})E") + .case_insensitive(true) + .build() + .unwrap() +}); +static EPISODE_RE: Lazy = Lazy::new(|| { + RegexBuilder::new(r"EP?(\d{2,4})$|^EP?(\d{1,4})$|^S\d{1,2}EP?(\d{1,4})$|S\d{2}EP?(\d{2,4})") + .case_insensitive(true) + .build() + .unwrap() +}); +static RESOURCE_PIX_RE: Lazy = Lazy::new(|| { + RegexBuilder::new(r"^[SBUHD]*(\d{3,4}[PI]+)|\d{3,4}X(\d{3,4})") + .case_insensitive(true) + .build() + .unwrap() +}); +static RESOURCE_PIX_RE2: Lazy = Lazy::new(|| { + RegexBuilder::new(r"(^[248]+K)") + .case_insensitive(true) + .build() + .unwrap() +}); +static SOURCE_RE: Lazy = Lazy::new(|| { + RegexBuilder::new(r"^BLURAY$|^HDTV$|^UHDTV$|^HDDVD$|^WEBRIP$|^DVDRIP$|^BDRIP$|^BLU$|^WEB$|^BD$|^HDRip$|^REMUX$|^UHD$") + .case_insensitive(true) + .build() + .unwrap() +}); +static EFFECT_RE: Lazy = Lazy::new(|| { + RegexBuilder::new( + r"^SDR$|^HDR\d*$|^DOLBY$|^DOVI$|^DV$|^3D$|^REPACK$|^HLG$|^HDR10(\+|Plus)$|^EDR$|^HQ$", + ) + .case_insensitive(true) + .build() + .unwrap() +}); +static VIDEO_ENCODE_RE: Lazy = Lazy::new(|| { + RegexBuilder::new(r"^(H26[45])$|^(x26[45])$|^AVC$|^HEVC$|^VC\d?$|^MPEG\d?$|^Xvid$|^DivX$|^AV1$|^HDR\d*$|^AVS(\+|[23])$") + .case_insensitive(true) + .build() + .unwrap() +}); +static AUDIO_ENCODE_RE: Lazy = Lazy::new(|| { + RegexBuilder::new(r"^DTS\d?$|^DTSHD$|^DTSHDMA$|^Atmos$|^TrueHD\d?$|^AC3$|^\dAudios?$|^DDP\d?$|^DD\+\d?$|^DD\d?$|^LPCM\d?$|^AAC\d?$|^FLAC\d?$|^HD\d?$|^MA\d?$|^HR\d?$|^Opus\d?$|^Vorbis\d?$|^AV[3S]A$") + .case_insensitive(true) + .build() + .unwrap() +}); +static FPS_RE: Lazy = Lazy::new(|| { + RegexBuilder::new(r"(\d{2,3})FPS") + .case_insensitive(true) + .build() + .unwrap() +}); +static PART_RE: Lazy = Lazy::new(|| { + RegexBuilder::new( + r"(^PART[0-9ABI]{0,2}$|^CD[0-9]{0,2}$|^DVD[0-9]{0,2}$|^DISK[0-9]{0,2}$|^DISC[0-9]{0,2}$)", + ) + .case_insensitive(true) + .build() + .unwrap() +}); +static NAME_NO_CHINESE_RE: Lazy = Lazy::new(|| { + RegexBuilder::new(r".*版|.*字幕") + .case_insensitive(true) + .build() + .unwrap() +}); +static VIDEO_BIT_RE: Lazy = Lazy::new(|| { + RegexBuilder::new(r"(?i)(8|10|12)[\s._-]*bit") + .build() + .unwrap() +}); + +struct VideoParseState { + tokens: Vec, + media_exts: Vec, + isfile: bool, + cn_name: Option, + en_name: Option, + year: Option, + total_season: i64, + begin_season: Option, + end_season: Option, + total_episode: i64, + begin_episode: Option, + end_episode: Option, + part: Option, + source: String, + effects: Vec, + resource_pix: Option, + web_source: Option, + video_encode: Option, + video_bit: Option, + audio_encode: Option, + fps: Option, + media_type: Option, + stop_name_flag: bool, + stop_cnname_flag: bool, + last_token: String, + last_token_type: String, + continue_flag: bool, + unknown_name_str: String, + index: usize, +} + +#[pyfunction] +pub(crate) fn is_anime_fast(name: &str) -> bool { + if name.is_empty() { + return false; + } + if ANIME_BRACKET_RE.is_match(name) { + return true; + } + if ANIME_DASH_EPISODE_RE.is_match(name) { + return true; + } + if VIDEO_SEASON_EPISODE_RE.is_match(name) { + return false; + } + ANIME_SQUARE_BRACKET_RE.is_match(name) +} + +/// 从标题中的内嵌标签提取媒体 ID、类型和季集范围。 +#[pyfunction] +pub(crate) fn find_metainfo_fast(py: Python<'_>, title: &str) -> PyResult { + let result = PyDict::new(py); + let mut cleaned_title = title.to_string(); + let mut tmdbid: Option = None; + let mut doubanid: Option = None; + let mut media_type: Option = None; + let mut begin_season: Option = None; + let mut end_season: Option = None; + let mut begin_episode: Option = None; + let mut end_episode: Option = None; + + for captures in BRACED_METAINFO_RE.captures_iter(title) { + let Some(meta_match) = captures.get(1) else { + continue; + }; + let meta_text = meta_match.as_str(); + let found_tmdb = BRACED_TMDBID_RE + .captures(meta_text) + .and_then(|caps| caps.get(1).map(|m| m.as_str().to_string())); + if found_tmdb.is_some() { + tmdbid = found_tmdb.clone(); + } + if let Some(value) = BRACED_DOUBANID_RE + .captures(meta_text) + .and_then(|caps| caps.get(1).map(|m| m.as_str().to_string())) + { + doubanid = Some(value); + } + let found_type = BRACED_TYPE_RE + .captures(meta_text) + .and_then(|caps| caps.get(1).map(|m| m.as_str().to_string())); + if let Some(value) = found_type.as_deref() { + if value == "movies" || value == "tv" { + media_type = Some(value.to_string()); + } + } + if let Some(value) = capture_i64(&BRACED_BEGIN_SEASON_RE, meta_text) { + begin_season = Some(value); + } + if let Some(value) = capture_i64(&BRACED_END_SEASON_RE, meta_text) { + end_season = Some(value); + } + if let Some(value) = capture_i64(&BRACED_BEGIN_EPISODE_RE, meta_text) { + begin_episode = Some(value); + } + if let Some(value) = capture_i64(&BRACED_END_EPISODE_RE, meta_text) { + end_episode = Some(value); + } + if found_tmdb.is_some() + || found_type.is_some() + || begin_season.is_some() + || end_season.is_some() + || begin_episode.is_some() + || end_episode.is_some() + { + cleaned_title = cleaned_title.replace(&format!("{{[{meta_text}]}}"), ""); + } + } + + if let Some(caps) = EMBY_TMDB_RE_LIST[0].captures(&cleaned_title) { + tmdbid = caps.get(1).map(|m| m.as_str().to_string()); + cleaned_title = EMBY_TMDB_RE_LIST[0] + .replace_all(&cleaned_title, "") + .trim() + .to_string(); + } else if tmdbid.is_none() { + for tmdb_re in EMBY_TMDB_RE_LIST.iter().skip(1) { + if let Some(caps) = tmdb_re.captures(&cleaned_title) { + tmdbid = caps.get(1).map(|m| m.as_str().to_string()); + cleaned_title = tmdb_re.replace_all(&cleaned_title, "").trim().to_string(); + break; + } + } + } + + let (begin_season, end_season, total_season) = apply_range_total(begin_season, end_season); + let (begin_episode, end_episode, total_episode) = apply_range_total(begin_episode, end_episode); + + result.set_item("title", cleaned_title)?; + result.set_item("tmdbid", tmdbid)?; + result.set_item("doubanid", doubanid)?; + result.set_item("type", media_type)?; + result.set_item("begin_season", begin_season)?; + result.set_item("end_season", end_season)?; + result.set_item("total_season", total_season)?; + result.set_item("begin_episode", begin_episode)?; + result.set_item("end_episode", end_episode)?; + result.set_item("total_episode", total_episode)?; + Ok(result.into()) +} + +/// 对标题执行影视主识别流程,返回名称、季集、资源和编码等完整主状态。 +#[pyfunction] +#[pyo3(signature = (title, isfile=false, media_exts=None))] +pub(crate) fn parse_video_title_fast( + py: Python<'_>, + title: &str, + isfile: bool, + media_exts: Option>, +) -> PyResult { + let result = PyDict::new(py); + if title.is_empty() { + result.set_item("complete", false)?; + return Ok(result.into()); + } + + if isfile && title.chars().all(|ch| ch.is_ascii_digit()) && title.len() < 5 { + result.set_item("complete", true)?; + result.set_item("type", "tv")?; + result.set_item("begin_episode", title.parse::().ok())?; + result.set_item("total_episode", 1)?; + return Ok(result.into()); + } + + if let Some(caps) = FULL_SEASON_RE.captures(title) { + result.set_item("complete", true)?; + result.set_item("type", "tv")?; + if let Some(season) = caps + .get(1) + .and_then(|value| value.as_str().parse::().ok()) + { + result.set_item("begin_season", season)?; + result.set_item("total_season", 1)?; + } + return Ok(result.into()); + } + + let normalized = normalize_video_title(title); + let tokens: Vec = TOKEN_SPLIT_RE + .split(&normalized) + .filter(|token| !token.is_empty()) + .map(|token| token.to_string()) + .collect(); + + let mut parser = VideoParseState::new(tokens, isfile, media_exts.unwrap_or_default()); + parser.parse(); + let mut effects = parser.effects.clone(); + if !effects.is_empty() { + effects.reverse(); + } + + result.set_item("complete", true)?; + result.set_item("cn_name", parser.cn_name)?; + result.set_item("en_name", parser.en_name)?; + result.set_item("year", parser.year)?; + result.set_item("type", parser.media_type)?; + result.set_item("begin_season", parser.begin_season)?; + result.set_item("end_season", parser.end_season)?; + result.set_item( + "total_season", + if parser.total_season > 0 { + Some(parser.total_season) + } else { + None + }, + )?; + result.set_item("begin_episode", parser.begin_episode)?; + result.set_item("end_episode", parser.end_episode)?; + result.set_item( + "total_episode", + if parser.total_episode > 0 { + Some(parser.total_episode) + } else { + None + }, + )?; + result.set_item("part", parser.part)?; + result.set_item( + "resource_type", + if parser.source.is_empty() { + None + } else { + Some(parser.source.trim().to_string()) + }, + )?; + result.set_item( + "resource_effect", + if effects.is_empty() { + None + } else { + Some(effects.join(" ")) + }, + )?; + result.set_item("resource_pix", parser.resource_pix)?; + result.set_item("web_source", parser.web_source)?; + result.set_item("video_encode", parser.video_encode)?; + result.set_item("video_bit", parser.video_bit)?; + result.set_item("audio_encode", parser.audio_encode)?; + result.set_item("fps", parser.fps)?; + Ok(result.into()) +} + +fn normalize_video_title(title: &str) -> String { + let mut value = title.to_string(); + if let Some(caps) = FIRST_BRACKET_RE.captures(&value) { + if let Some(content) = caps.get(1) { + if FIRST_BRACKET_RELEASE_RE.is_match(content.as_str()) + && FIRST_BRACKET_RESOURCE_RE.is_match(content.as_str()) + { + value = format!( + "{}{}", + content.as_str(), + value + .get(caps.get(0).map(|m| m.end()).unwrap_or(0)..) + .unwrap_or("") + ); + } else if let Some(full) = caps.get(0) { + value = value.get(full.end()..).unwrap_or("").to_string(); + } + } + } + value = TITLE_YEAR_RANGE_RE.replace_all(&value, "$1$2").to_string(); + value = remove_title_size_markers(&value); + TITLE_DATE_RE.replace_all(&value, "").to_string() +} + +/// 移除标题里的大小标记;单位后紧跟大写字母时保留,兼容 Python 负向前瞻语义。 +fn remove_title_size_markers(title: &str) -> String { + TITLE_SIZE_RE + .replace_all(title, |caps: ®ex::Captures<'_>| { + let Some(matched) = caps.get(0) else { + return String::new(); + }; + let next_char = title + .get(matched.end()..) + .and_then(|tail| tail.chars().next()); + if next_char.is_some_and(|ch| ch.is_ascii_uppercase()) { + matched.as_str().to_string() + } else { + String::new() + } + }) + .to_string() +} + +/// 识别分辨率字段。 +fn parse_resource_pix(token: &str) -> Option { + if let Some(caps) = RESOURCE_PIX_RE.captures(token) { + for item in caps.iter().skip(1).flatten() { + let mut pix = item.as_str().to_lowercase(); + if pix.chars().all(|ch| ch.is_ascii_digit()) && !pix.ends_with(['k', 'p', 'i']) { + pix.push('p'); + } + return Some(pix); + } + } + RESOURCE_PIX_RE2 + .captures(token) + .and_then(|caps| caps.get(1).map(|item| item.as_str().to_lowercase())) +} + +/// 判断文本是否包含中日韩统一表意文字,等价于 Python StringUtils.is_chinese。 +fn contains_chinese(text: &str) -> bool { + text.chars() + .any(|ch| ('\u{4e00}'..='\u{9fff}').contains(&ch)) +} + +/// 判断 token 是否属于季集描述词。 +fn is_name_se_word(token: &str) -> bool { + matches!(token, "共" | "第" | "季" | "集" | "话" | "話" | "期") +} + +/// 判断 token 是否包含季集描述词。 +fn contains_name_se_word(token: &str) -> bool { + ["共", "第", "季", "集", "话", "話", "期"] + .iter() + .any(|word| token.contains(word)) +} + +/// 判断中文 token 是否表示剧场版/电影版,保留在中文名中。 +fn is_name_movie_word(token: &str) -> bool { + ["剧场版", "劇場版", "电影版", "電影版"] + .iter() + .any(|word| token.contains(word)) +} + +/// 判断罗马数字,覆盖 Python 原正则的合法罗马数字范围。 +fn is_roman_numeral(token: &str) -> bool { + let upper = token.to_uppercase(); + if upper.is_empty() + || !upper + .chars() + .all(|ch| matches!(ch, 'M' | 'D' | 'C' | 'L' | 'X' | 'V' | 'I')) + { + return false; + } + let mut prev = 0; + let mut total = 0; + for ch in upper.chars().rev() { + let value = match ch { + 'I' => 1, + 'V' => 5, + 'X' => 10, + 'L' => 50, + 'C' => 100, + 'D' => 500, + 'M' => 1000, + _ => 0, + }; + if value < prev { + total -= value; + } else { + total += value; + prev = value; + } + } + total > 0 +} + +/// 判断字符串是否以指定 ASCII 后缀结尾,忽略大小写。 +fn ends_with_ignore_ascii(value: &str, suffix: &str) -> bool { + value + .get(value.len().saturating_sub(suffix.len())..) + .is_some_and(|tail| tail.eq_ignore_ascii_case(suffix)) +} + +/// 用空格向可选字符串追加片段。 +fn append_with_space(target: &mut Option, value: &str) { + if value.is_empty() { + return; + } + match target { + Some(existing) if !existing.is_empty() => { + existing.push(' '); + existing.push_str(value); + } + _ => *target = Some(value.to_string()), + } +} + +/// 原样追加片段,用于保留 Python 中 Season 标题后置空格的兼容行为。 +fn append_raw(target: &mut Option, value: &str) { + if let Some(existing) = target { + existing.push_str(value); + } +} + +/// 识别常见流媒体平台简称和全称。 +fn streaming_platform_name(token: &str) -> Option<&'static str> { + let normalized = token.to_uppercase(); + match normalized.as_str() { + "AMZN" | "AMAZON" => Some("Amazon"), + "NF" | "NETFLIX" => Some("Netflix"), + "ATVP" | "APPLE TV+" | "APPLE-TV+" => Some("Apple TV+"), + "DSNP" | "DISNEY+" | "DISNEY" => Some("Disney+"), + "HMAX" | "MAX" => Some("Max"), + "HULU" => Some("Hulu Networks"), + "PMTP" | "PARAMOUNT+" | "PARAMOUNT" => Some("Paramount+"), + "PCOK" | "PEACOCK" => Some("Peacock"), + "B-GLOBAL" | "BG" => Some("B-Global"), + "BAHA" => Some("Baha"), + "CR" | "CRUNCHYROLL" => Some("Crunchyroll"), + "VIU" => Some("Viu"), + "ITUNES" | "IT" => Some("iTunes"), + "YOUTUBE" | "YT" => Some("YouTube"), + "ROKU" => Some("Roku"), + "PLEX" => Some("Plex"), + "STAN" => Some("Stan"), + _ => None, + } +} + +/// 识别片源和效果字段,并保留 BluRay/WEB-DL/REMUX 等组合语义。 + +fn normalize_source(token: &str) -> String { + if token.eq_ignore_ascii_case("BLURAY") { + return "BluRay".to_string(); + } + if token.eq_ignore_ascii_case("WEBDL") { + return "WEB-DL".to_string(); + } + token.to_string() +} + +fn parse_video_bit(token: &str) -> Option { + VIDEO_BIT_RE + .captures(token) + .and_then(|caps| caps.get(1).map(|value| format!("{}bit", value.as_str()))) +} + +/// 标准化视频编码捕获结果,保持 Python MetaVideo 的大小写兼容行为。 +fn normalize_video_encode_capture(caps: ®ex::Captures<'_>) -> Option { + let value = caps.get(0)?.as_str(); + if value.starts_with('x') || value.starts_with('X') { + return Some(value.to_lowercase()); + } + Some(value.to_uppercase()) +} + +impl VideoParseState { + /// 创建影视标题主解析状态机,保持 Python MetaVideo 原有字段和中间状态语义。 + fn new(tokens: Vec, isfile: bool, media_exts: Vec) -> Self { + Self { + tokens, + media_exts: media_exts + .into_iter() + .map(|item| item.to_lowercase()) + .collect(), + isfile, + cn_name: None, + en_name: None, + year: None, + total_season: 0, + begin_season: None, + end_season: None, + total_episode: 0, + begin_episode: None, + end_episode: None, + part: None, + source: String::new(), + effects: Vec::new(), + resource_pix: None, + web_source: None, + video_encode: None, + video_bit: None, + audio_encode: None, + fps: None, + media_type: None, + stop_name_flag: false, + stop_cnname_flag: false, + last_token: String::new(), + last_token_type: String::new(), + continue_flag: true, + unknown_name_str: String::new(), + index: 0, + } + } + + /// 执行单轮 token 扫描,迁移 Python 侧 MetaVideo.__init__ 的主循环。 + fn parse(&mut self) { + while self.index < self.tokens.len() { + let token = self.tokens[self.index].clone(); + self.index += 1; + self.parse_part(&token); + if self.continue_flag { + self.parse_name(&token); + } + if self.continue_flag { + self.parse_year(&token); + } + if self.continue_flag { + self.parse_resource_pix(&token); + } + if self.continue_flag { + self.parse_season(&token); + } + if self.continue_flag { + self.parse_episode(&token); + } + if self.continue_flag { + self.parse_resource_type(&token); + } + if self.continue_flag { + self.parse_streaming_platform(&token); + } + if self.continue_flag { + self.parse_video_encode(&token); + } + if self.continue_flag { + self.parse_video_bit(&token); + } + if self.continue_flag { + self.parse_audio_encode(&token); + } + if self.continue_flag { + self.parse_fps(&token); + } + self.continue_flag = true; + } + } + + /// 返回当前是否已经识别到任意名称。 + fn has_name(&self) -> bool { + self.cn_name + .as_deref() + .is_some_and(|value| !value.is_empty()) + || self + .en_name + .as_deref() + .is_some_and(|value| !value.is_empty()) + } + + /// 识别标题中的 Part/CD/DVD/Disc 信息。 + fn parse_part(&mut self, token: &str) { + if !self.has_name() + || (self.year.is_none() + && self.begin_season.is_none() + && self.begin_episode.is_none() + && self.resource_pix.is_none() + && self.source.is_empty()) + { + return; + } + let Some(caps) = PART_RE.captures(token) else { + return; + }; + if self.part.is_none() { + self.part = caps.get(1).map(|value| value.as_str().to_string()); + } + if let Some(next_value) = self.tokens.get(self.index) { + let upper = next_value.to_uppercase(); + let next_is_part_suffix = (next_value.chars().all(|ch| ch.is_ascii_digit()) + && (next_value.len() == 1 + || (next_value.len() == 2 && next_value.starts_with('0')))) + || matches!(upper.as_str(), "A" | "B" | "C" | "I" | "II" | "III"); + if next_is_part_suffix { + if let Some(part) = &mut self.part { + part.push_str(next_value); + } + self.index += 1; + } + } + self.last_token_type = "part".to_string(); + self.continue_flag = false; + } + + /// 识别中文名和英文名,保持原有停止名称消费的规则。 + fn parse_name(&mut self, token: &str) { + if token.is_empty() { + return; + } + if !self.unknown_name_str.is_empty() { + if self.cn_name.as_deref().unwrap_or("").is_empty() { + if self.en_name.as_deref().unwrap_or("").is_empty() { + self.en_name = Some(self.unknown_name_str.clone()); + } else if Some(self.unknown_name_str.as_str()) != self.year.as_deref() { + append_with_space(&mut self.en_name, &self.unknown_name_str); + } + self.last_token_type = "enname".to_string(); + } + self.unknown_name_str.clear(); + } + if self.stop_name_flag { + return; + } + if token.eq_ignore_ascii_case("AKA") { + self.continue_flag = false; + self.stop_name_flag = true; + return; + } + if is_name_se_word(token) { + self.last_token_type = "name_se_words".to_string(); + return; + } + if contains_chinese(token) { + self.last_token_type = "cnname".to_string(); + if self.cn_name.as_deref().unwrap_or("").is_empty() { + self.cn_name = Some(token.to_string()); + } else if !self.stop_cnname_flag { + if is_name_movie_word(token) + || (!NAME_NO_CHINESE_RE.is_match(token) && !contains_name_se_word(token)) + { + append_with_space(&mut self.cn_name, token); + } + self.stop_cnname_flag = true; + } + return; + } + + let roman_digit = is_roman_numeral(token); + if token.chars().all(|ch| ch.is_ascii_digit()) || roman_digit { + if self.last_token_type == "name_se_words" { + return; + } + if self.has_name() { + if token.starts_with('0') { + return; + } + if token.chars().all(|ch| ch.is_ascii_digit()) + && self.last_token_type == "cnname" + && token.parse::().ok().is_some_and(|value| value < 1900) + { + return; + } + if (token.chars().all(|ch| ch.is_ascii_digit()) && token.len() < 4) || roman_digit { + if self.last_token_type == "cnname" { + append_with_space(&mut self.cn_name, token); + } else if self.last_token_type == "enname" { + append_with_space(&mut self.en_name, token); + } + self.continue_flag = false; + } else if token.chars().all(|ch| ch.is_ascii_digit()) + && token.len() == 4 + && self.unknown_name_str.is_empty() + { + self.unknown_name_str = token.to_string(); + } + } else if self.unknown_name_str.is_empty() { + self.unknown_name_str = token.to_string(); + } + } else if SEASON_RE.is_match(token) { + if self + .en_name + .as_deref() + .is_some_and(|name| ends_with_ignore_ascii(name, "SEASON")) + { + append_raw(&mut self.en_name, " "); + } + self.stop_name_flag = true; + } else if EPISODE_RE.is_match(token) + || SOURCE_RE.is_match(token) + || EFFECT_RE.is_match(token) + || RESOURCE_PIX_RE.is_match(token) + { + self.stop_name_flag = true; + } else { + if self.is_media_ext(token) { + return; + } + append_with_space(&mut self.en_name, token); + self.last_token_type = "enname".to_string(); + } + } + + /// 识别年份;识别到年份后停止后续名称消费。 + fn parse_year(&mut self, token: &str) { + if !self.has_name() + || !token.chars().all(|ch| ch.is_ascii_digit()) + || token.len() != 4 + || !token + .parse::() + .ok() + .is_some_and(|value| value > 1900 && value < 2050) + { + return; + } + if let Some(existing_year) = self.year.clone() { + if self.en_name.as_deref().is_some_and(|name| !name.is_empty()) { + append_with_space(&mut self.en_name, &existing_year); + } else if self.cn_name.as_deref().is_some_and(|name| !name.is_empty()) { + append_with_space(&mut self.cn_name, &existing_year); + } + } else if self + .en_name + .as_deref() + .is_some_and(|name| ends_with_ignore_ascii(name, "SEASON")) + { + append_raw(&mut self.en_name, " "); + } + self.year = Some(token.to_string()); + self.last_token_type = "year".to_string(); + self.continue_flag = false; + self.stop_name_flag = true; + } + + /// 识别分辨率。 + fn parse_resource_pix(&mut self, token: &str) { + if !self.has_name() { + return; + } + if let Some(pix) = parse_resource_pix(token) { + self.last_token_type = "pix".to_string(); + self.continue_flag = false; + self.stop_name_flag = true; + if self.resource_pix.is_none() { + self.resource_pix = Some(pix); + } + } + } + + /// 识别季信息并计算季总数。 + fn parse_season(&mut self, token: &str) { + let seasons = capture_all_i64(&SEASON_RE, token); + if !seasons.is_empty() { + self.last_token_type = "season".to_string(); + self.media_type = Some("tv".to_string()); + self.stop_name_flag = true; + self.continue_flag = true; + for season in seasons { + if self.begin_season.is_none() { + self.begin_season = Some(season); + self.total_season = 1; + } else if Some(season) > self.begin_season { + self.end_season = Some(season); + self.total_season = season - self.begin_season.unwrap_or(season) + 1; + if self.isfile && self.total_season > 1 { + self.end_season = None; + self.total_season = 1; + } + } + } + return; + } + if token.chars().all(|ch| ch.is_ascii_digit()) { + if self.last_token_type == "SEASON" && self.begin_season.is_none() && token.len() < 3 { + if let Ok(season) = token.parse::() { + self.begin_season = Some(season); + self.total_season = 1; + self.last_token_type = "season".to_string(); + self.stop_name_flag = true; + self.continue_flag = false; + self.media_type = Some("tv".to_string()); + } + } + } else if token.eq_ignore_ascii_case("SEASON") && self.begin_season.is_none() { + self.last_token_type = "SEASON".to_string(); + } else if self.media_type.as_deref() == Some("tv") && self.begin_season.is_none() { + self.begin_season = Some(1); + } + } + + /// 识别集信息并计算集总数。 + fn parse_episode(&mut self, token: &str) { + let episodes = capture_all_i64(&EPISODE_RE, token); + if !episodes.is_empty() { + self.last_token_type = "episode".to_string(); + self.continue_flag = false; + self.stop_name_flag = true; + self.media_type = Some("tv".to_string()); + for episode in episodes { + if self.begin_episode.is_none() { + self.begin_episode = Some(episode); + self.total_episode = 1; + } else if Some(episode) > self.begin_episode { + self.end_episode = Some(episode); + self.total_episode = episode - self.begin_episode.unwrap_or(episode) + 1; + if self.isfile && self.total_episode > 2 { + self.end_episode = None; + self.total_episode = 1; + } + } + } + return; + } + if token.chars().all(|ch| ch.is_ascii_digit()) { + let Ok(episode) = token.parse::() else { + return; + }; + if self.begin_episode.is_some() + && self.end_episode.is_none() + && token.len() < 5 + && Some(episode) > self.begin_episode + && self.last_token_type == "episode" + { + self.end_episode = Some(episode); + self.total_episode = episode - self.begin_episode.unwrap_or(episode) + 1; + if self.isfile && self.total_episode > 2 { + self.end_episode = None; + self.total_episode = 1; + } + self.continue_flag = false; + self.media_type = Some("tv".to_string()); + } else if self.begin_episode.is_none() + && token.len() > 1 + && token.len() < 4 + && self.last_token_type != "year" + && self.last_token_type != "videoencode" + && token != self.unknown_name_str + { + self.begin_episode = Some(episode); + self.total_episode = 1; + self.last_token_type = "episode".to_string(); + self.continue_flag = false; + self.stop_name_flag = true; + self.media_type = Some("tv".to_string()); + } else if self.last_token_type == "EPISODE" + && self.begin_episode.is_none() + && token.len() < 5 + { + self.begin_episode = Some(episode); + self.total_episode = 1; + self.last_token_type = "episode".to_string(); + self.continue_flag = false; + self.stop_name_flag = true; + self.media_type = Some("tv".to_string()); + } + } else if token.eq_ignore_ascii_case("EPISODE") { + self.last_token_type = "EPISODE".to_string(); + } + } + + /// 识别片源和效果字段。 + fn parse_resource_type(&mut self, token: &str) { + if !self.has_name() { + return; + } + let upper = token.to_uppercase(); + if upper == "DL" && self.last_token_type == "source" && self.last_token == "WEB" { + self.source = "WEB-DL".to_string(); + self.continue_flag = false; + return; + } + if upper == "RAY" && self.last_token_type == "source" && self.last_token == "BLU" { + self.source = if self.source == "UHD" { + "UHD BluRay".to_string() + } else { + "BluRay".to_string() + }; + self.continue_flag = false; + return; + } + if upper == "WEBDL" { + self.source = "WEB-DL".to_string(); + self.continue_flag = false; + return; + } + if upper == "REMUX" && self.source == "BluRay" { + self.source = "BluRay REMUX".to_string(); + self.continue_flag = false; + return; + } + if upper == "BLURAY" && self.source == "UHD" { + self.source = "UHD BluRay".to_string(); + self.continue_flag = false; + return; + } + if SOURCE_RE.is_match(token) { + self.last_token_type = "source".to_string(); + self.continue_flag = false; + self.stop_name_flag = true; + if self.source.is_empty() { + self.source = normalize_source(token); + self.last_token = self.source.to_uppercase(); + } + return; + } + if EFFECT_RE.is_match(token) { + self.last_token_type = "effect".to_string(); + self.continue_flag = false; + self.stop_name_flag = true; + if !self + .effects + .iter() + .any(|effect| effect.eq_ignore_ascii_case(token)) + { + self.effects.push(token.to_string()); + } + self.last_token = upper; + } + } + + /// 识别常见流媒体平台简称。 + fn parse_streaming_platform(&mut self, token: &str) { + if !self.has_name() { + return; + } + let mut platform_name = streaming_platform_name(token); + let mut query_range = 1usize; + if platform_name.is_none() { + let prev_token = if self.index >= 2 { + self.tokens.get(self.index - 2) + } else { + None + }; + let next_token = self.tokens.get(self.index); + for (adjacent_token, is_next) in [(prev_token, false), (next_token, true)] { + if adjacent_token.is_none() || platform_name.is_some() { + continue; + } + let adjacent_token = adjacent_token.unwrap(); + for separator in [" ", "-"] { + let combined = if is_next { + format!("{token}{separator}{adjacent_token}") + } else { + format!("{adjacent_token}{separator}{token}") + }; + if let Some(name) = streaming_platform_name(&combined) { + platform_name = Some(name); + query_range = 2; + if is_next { + self.index += 1; + } + break; + } + } + } + } + let Some(platform_name) = platform_name else { + return; + }; + let match_start = self.index.saturating_sub(query_range + 1); + let match_end = self.index.saturating_sub(1); + let start = match_start.saturating_sub(query_range); + let end = (match_end + 1 + query_range).min(self.tokens.len()); + if self.tokens[start..end].iter().any(|item| { + matches!( + item.to_uppercase().as_str(), + "WEB" | "DL" | "WEBDL" | "WEBRIP" + ) + }) { + self.web_source = Some(platform_name.to_string()); + self.continue_flag = false; + } + } + + /// 识别视频编码。 + fn parse_video_encode(&mut self, token: &str) { + if !self.has_name() + || (self.year.is_none() + && self.resource_pix.is_none() + && self.source.is_empty() + && self.begin_season.is_none() + && self.begin_episode.is_none()) + { + return; + } + if let Some(caps) = VIDEO_ENCODE_RE.captures(token) { + self.continue_flag = false; + self.stop_name_flag = true; + self.last_token_type = "videoencode".to_string(); + if self.video_encode.is_none() { + let encode = normalize_video_encode_capture(&caps); + if let Some(encode) = encode { + self.last_token = encode.clone(); + self.video_encode = Some(encode); + } + } else if self.video_encode.as_deref() == Some("10bit") { + if let Some(encode) = normalize_video_encode_capture(&caps) { + self.video_encode = Some(format!("{encode} 10bit")); + self.last_token = encode; + } + } + return; + } + let upper = token.to_uppercase(); + if upper == "H" || upper == "X" { + self.continue_flag = false; + self.stop_name_flag = true; + self.last_token_type = "videoencode".to_string(); + self.last_token = if upper == "H" { + upper + } else { + token.to_lowercase() + }; + } else if matches!(token, "264" | "265") + && self.last_token_type == "videoencode" + && matches!(self.last_token.as_str(), "H" | "x") + { + self.video_encode = Some(format!("{}{}", self.last_token, token)); + } else if token.chars().all(|ch| ch.is_ascii_digit()) + && self.last_token_type == "videoencode" + && matches!(self.last_token.as_str(), "VC" | "MPEG") + { + self.video_encode = Some(format!("{}{}", self.last_token, token)); + } else if upper == "10BIT" { + self.last_token_type = "videoencode".to_string(); + if let Some(existing) = &mut self.video_encode { + *existing = format!("{existing} 10bit"); + } else { + self.video_encode = Some("10bit".to_string()); + } + } + } + + /// 识别视频位深字段。 + fn parse_video_bit(&mut self, token: &str) { + if !self.has_name() + || (self.year.is_none() + && self.resource_pix.is_none() + && self.source.is_empty() + && self.begin_season.is_none() + && self.begin_episode.is_none()) + { + return; + } + let Some(video_bit) = parse_video_bit(token) else { + return; + }; + self.continue_flag = false; + self.stop_name_flag = true; + self.last_token_type = "videobit".to_string(); + if self.video_bit.is_none() { + self.video_bit = Some(video_bit); + } + } + + /// 识别音频编码并合并 5.1、DTS-HD MA 等组合。 + fn parse_audio_encode(&mut self, token: &str) { + if !self.has_name() + || (self.year.is_none() + && self.resource_pix.is_none() + && self.source.is_empty() + && self.begin_season.is_none() + && self.begin_episode.is_none()) + { + return; + } + if AUDIO_ENCODE_RE.is_match(token) { + self.continue_flag = false; + self.stop_name_flag = true; + self.last_token_type = "audioencode".to_string(); + self.last_token = token.to_uppercase(); + if let Some(existing) = &mut self.audio_encode { + if existing.eq_ignore_ascii_case("DTS") { + *existing = format!("{existing}-{token}"); + } else { + *existing = format!("{existing} {token}"); + } + } else { + self.audio_encode = Some(token.to_string()); + } + } else if is_digit_token(token) && self.last_token_type == "audioencode" { + if let Some(existing) = &mut self.audio_encode { + if is_digit_token(&self.last_token) { + *existing = format!("{existing}.{token}"); + } else if existing + .chars() + .last() + .is_some_and(|ch| ch.is_ascii_digit()) + { + let split_at = existing.len() - 1; + *existing = format!( + "{} {}.{token}", + &existing[..split_at], + &existing[split_at..] + ); + } else { + *existing = format!("{existing} {token}"); + } + } + self.last_token = token.to_string(); + } + } + + /// 识别 FPS 数值。 + fn parse_fps(&mut self, token: &str) { + if !self.has_name() { + return; + } + let Some(fps) = FPS_RE + .captures(token) + .and_then(|caps| caps.get(1)) + .and_then(|value| value.as_str().parse::().ok()) + else { + return; + }; + self.continue_flag = false; + self.stop_name_flag = true; + self.last_token_type = "fps".to_string(); + self.fps = Some(fps); + self.last_token = format!("{fps}FPS"); + } + + /// 判断 token 是否是配置里的媒体后缀,防止文件扩展名进入标题。 + fn is_media_ext(&self, token: &str) -> bool { + let suffix = format!(".{}", token.to_lowercase()); + self.media_exts.iter().any(|item| item == &suffix) + } +} + +/// 判断 token 是否全部为 Unicode 数字,兼容 Python str.isdigit 的行为。 +fn is_digit_token(token: &str) -> bool { + !token.is_empty() && token.chars().all(|ch| ch.is_numeric()) +} diff --git a/rust/moviepilot_rust/src/utils.rs b/rust/moviepilot_rust/src/utils.rs new file mode 100644 index 00000000..083d79d8 --- /dev/null +++ b/rust/moviepilot_rust/src/utils.rs @@ -0,0 +1,103 @@ +use pyo3::exceptions::PyValueError; +use pyo3::prelude::*; +use pyo3::types::{PyAny, PyDict}; +use regex::Regex; + +/// 捕获正则第一组并转换为整数。 +pub(crate) fn capture_i64(regex: &Regex, text: &str) -> Option { + regex + .captures(text) + .and_then(|caps| caps.get(1)) + .and_then(|value| value.as_str().parse::().ok()) +} + +/// 捕获正则所有分组中的整数,用于 S01E02 和范围类 token 的多值识别。 +pub(crate) fn capture_all_i64(regex: &Regex, text: &str) -> Vec { + let mut values = Vec::new(); + for caps in regex.captures_iter(text) { + for item in caps.iter().skip(1).flatten() { + if let Ok(value) = item.as_str().parse::() { + values.push(value); + break; + } + } + } + values +} + +/// 计算范围的开始、结束和总数,保持 Python 侧的倒序交换语义。 +pub(crate) fn apply_range_total( + mut begin: Option, + mut end: Option, +) -> (Option, Option, Option) { + let total = match (begin, end) { + (Some(begin_value), Some(end_value)) => { + if begin_value > end_value { + begin = Some(end_value); + end = Some(begin_value); + Some(begin_value - end_value + 1) + } else { + Some(end_value - begin_value + 1) + } + } + (Some(_), None) => Some(1), + _ => None, + }; + (begin, end, total) +} + +/// 将 Python 对象转换为 usize,用于过滤器下标。 +pub(crate) fn py_i64_to_usize(value: &Bound<'_, PyAny>) -> PyResult { + let index = value.extract::()?; + if index < 0 { + return Err(PyValueError::new_err("下标不能为负数")); + } + Ok(index as usize) +} + +/// 从 Python 字典读取可选字符串。 +pub(crate) fn get_optional_string(dict: &Bound<'_, PyDict>, key: &str) -> PyResult> { + let Some(value) = dict.get_item(key)? else { + return Ok(None); + }; + if value.is_none() { + return Ok(None); + } + Ok(Some(value.str()?.to_str()?.to_string())) +} + +/// 从 Python 字典读取可选整数。 +pub(crate) fn get_optional_i64(dict: &Bound<'_, PyDict>, key: &str) -> PyResult> { + let Some(value) = dict.get_item(key)? else { + return Ok(None); + }; + if value.is_none() { + return Ok(None); + } + if let Ok(parsed) = value.extract::() { + return Ok(Some(parsed)); + } + let text = value.str()?.to_str()?.trim().to_string(); + if text.is_empty() { + return Ok(None); + } + Ok(text.parse::().ok()) +} + +/// 从 Python 字典读取可选浮点数。 +pub(crate) fn get_optional_f64(dict: &Bound<'_, PyDict>, key: &str) -> PyResult> { + let Some(value) = dict.get_item(key)? else { + return Ok(None); + }; + if value.is_none() { + return Ok(None); + } + if let Ok(parsed) = value.extract::() { + return Ok(Some(parsed)); + } + let text = value.str()?.to_str()?.trim().to_string(); + if text.is_empty() { + return Ok(None); + } + Ok(text.parse::().ok()) +} diff --git a/scripts/bootstrap-local.sh b/scripts/bootstrap-local.sh index b97cfac2..09f6ff0f 100755 --- a/scripts/bootstrap-local.sh +++ b/scripts/bootstrap-local.sh @@ -17,6 +17,7 @@ SUPERUSER_PASSWORD="" OS_NAME="Unknown" PYTHON_BIN="" BREW_BIN="" +RUSTUP_BIN="" PACKAGE_MANAGER="" PACKAGE_INDEX_UPDATED="false" PROMPT_INPUT="/dev/stdin" @@ -227,20 +228,20 @@ find_uv_python() { python_install_hint() { case "$OS_NAME" in macOS) - echo "脚本已尝试自动安装 Git、curl 和 Python 3.11+。" >&2 - echo "如果自动安装失败,请先安装 Homebrew,或手动执行:brew install git curl python@3.11" >&2 + echo "脚本已尝试自动安装 Git、curl、Python 3.11+、Rust toolchain 和构建工具。" >&2 + echo "如果自动安装失败,请先安装 Homebrew 和 Xcode Command Line Tools,或手动执行:brew install git curl python@3.11 rustup-init" >&2 ;; Linux*) - echo "脚本已尝试自动安装 Git、curl 和 Python 3.11+。" >&2 - echo "如果自动安装失败,请先安装 Git、curl 和 Python 3.11+,并确保包含 venv 模块。" >&2 - echo "例如 Debian/Ubuntu: sudo apt install git curl python3.11 python3.11-venv" >&2 - echo "例如 Fedora/RHEL: sudo dnf install git curl python3.11" >&2 + echo "脚本已尝试自动安装 Git、curl、Python 3.11+、Rust toolchain 和构建工具。" >&2 + echo "如果自动安装失败,请先安装 Git、curl、Python 3.11+、cargo,并确保包含 venv 模块。" >&2 + echo "例如 Debian/Ubuntu: sudo apt install git curl python3.11 python3.11-venv build-essential" >&2 + echo "例如 Fedora/RHEL: sudo dnf install git curl python3.11 gcc gcc-c++ make" >&2 ;; Windows) echo "推荐在 WSL、Linux 或 macOS 终端中运行此脚本。" >&2 ;; *) - echo "请先安装 Git、curl 和 Python 3.11 或更高版本。" >&2 + echo "请先安装 Git、curl、Python 3.11 或更高版本、cargo 和 C 编译器。" >&2 ;; esac } @@ -270,6 +271,74 @@ ensure_brew() { fi } +# 检查 cargo 是否已可用,兼容 rustup 安装后 PATH 尚未刷新。 +find_cargo() { + local cargo_bin="" + cargo_bin="$(command -v cargo 2>/dev/null || true)" + if [[ -n "$cargo_bin" ]]; then + printf '%s\n' "$cargo_bin" + return 0 + fi + if [[ -x "$HOME/.cargo/bin/cargo" ]]; then + printf '%s\n' "$HOME/.cargo/bin/cargo" + fi +} + +# 判断是否显式跳过 Rust 加速扩展,避免一键安装继续准备构建工具链。 +rust_accel_should_skip() { + case "${MOVIEPILOT_SKIP_RUST_ACCEL:-}" in + 1|true|TRUE|yes|YES|on|ON) + return 0 + ;; + *) + return 1 + ;; + esac +} + +# 为 CLI 一键安装准备 Rust toolchain,后续 setup 会用它构建加速扩展。 +ensure_rust_toolchain() { + if rust_accel_should_skip; then + return 0 + fi + + if [[ -n "$(find_cargo)" ]]; then + export PATH="$HOME/.cargo/bin:$PATH" + return 0 + fi + + echo "==> 自动安装 Rust toolchain,用于构建 MoviePilot 加速扩展" + case "$PACKAGE_MANAGER" in + brew) + ensure_brew + "$BREW_BIN" install rustup-init + RUSTUP_BIN="$(command -v rustup-init 2>/dev/null || true)" + ;; + *) + RUSTUP_BIN="$(command -v rustup 2>/dev/null || true)" + if [[ -z "$RUSTUP_BIN" ]]; then + curl https://sh.rustup.rs -sSf | sh -s -- -y --profile minimal + else + "$RUSTUP_BIN" toolchain install stable --profile minimal + fi + ;; + esac + + export PATH="$HOME/.cargo/bin:$PATH" + if [[ "$PACKAGE_MANAGER" == "brew" ]]; then + RUSTUP_BIN="$(command -v rustup-init 2>/dev/null || true)" + if [[ -n "$RUSTUP_BIN" ]]; then + "$RUSTUP_BIN" -y --profile minimal + fi + fi + + hash -r + if [[ -z "$(find_cargo)" ]]; then + echo "Rust toolchain 安装失败,请手动安装 cargo 后重试。" >&2 + return 1 + fi +} + run_privileged() { if [[ "$(id -u)" -eq 0 ]]; then "$@" @@ -383,6 +452,54 @@ ensure_base_tools() { fi } +# 安装 Rust 扩展构建需要的本机编译器和链接器。 +ensure_build_tools() { + if rust_accel_should_skip; then + return 0 + fi + + if [[ "$OS_NAME" == "macOS" ]]; then + if xcode-select -p >/dev/null 2>&1; then + return 0 + fi + echo "当前 macOS 缺少 Command Line Tools,请先执行:xcode-select --install" >&2 + return 1 + fi + + if command -v cc >/dev/null 2>&1 || command -v gcc >/dev/null 2>&1 || command -v clang >/dev/null 2>&1; then + return 0 + fi + + echo "==> 自动安装系统构建工具,用于编译 Rust 加速扩展" + case "$PACKAGE_MANAGER" in + apt-get) + install_system_packages build-essential + ;; + dnf|yum) + install_system_packages gcc gcc-c++ make + ;; + zypper) + install_system_packages gcc gcc-c++ make + ;; + pacman) + install_system_packages base-devel + ;; + apk) + install_system_packages build-base + ;; + *) + echo "当前系统暂不支持自动安装构建工具,请手动安装 C 编译器后重试。" >&2 + return 1 + ;; + esac + + hash -r + if ! command -v cc >/dev/null 2>&1 && ! command -v gcc >/dev/null 2>&1 && ! command -v clang >/dev/null 2>&1; then + echo "系统构建工具安装失败,请确认 C 编译器可用后重试。" >&2 + return 1 + fi +} + ensure_uv() { if command -v uv >/dev/null 2>&1; then return 0 @@ -427,7 +544,7 @@ ensure_prereqs() { exit 1 fi - if ! ensure_base_tools || ! ensure_python || ! ensure_uv; then + if ! ensure_base_tools || ! ensure_build_tools || ! ensure_python || ! ensure_uv || ! ensure_rust_toolchain; then python_install_hint exit 1 fi diff --git a/scripts/local_setup.py b/scripts/local_setup.py index fbfca63e..127595c2 100644 --- a/scripts/local_setup.py +++ b/scripts/local_setup.py @@ -45,6 +45,9 @@ COOKIE_DIR = CONFIG_DIR / "cookies" ENV_FILE = CONFIG_DIR / "app.env" DEFAULT_NODE_VERSION = "20.12.1" +RUST_ACCEL_DIR = ROOT / "rust" / "moviepilot_rust" +RUST_ACCEL_MANIFEST = RUST_ACCEL_DIR / "Cargo.toml" +RUST_ACCEL_SKIP_ENV = "MOVIEPILOT_SKIP_RUST_ACCEL" FRONTEND_LATEST_API = ( "https://api.github.com/repos/jxxghp/MoviePilot-Frontend/releases/latest" ) @@ -475,10 +478,17 @@ def print_step(message: str) -> None: print(f"==> {message}") -def run(command: list[str], cwd: Optional[Path] = None) -> None: +def run( + command: list[str], + cwd: Optional[Path] = None, + env: Optional[dict[str, str]] = None, +) -> None: + """ + 执行安装步骤中的外部命令,并在失败时让调用方中断流程。 + """ pretty = " ".join(command) print(f"+ {pretty}") - subprocess.run(command, cwd=str(cwd or ROOT), check=True) + subprocess.run(command, cwd=str(cwd or ROOT), check=True, env=env) def capture(command: list[str], cwd: Optional[Path] = None) -> str: @@ -579,6 +589,9 @@ def _ensure_uv_available_for_venv(venv_dir: Path, venv_python: Path) -> Optional def configure_venv_pip_compat(venv_dir: Path, venv_python: Path) -> Path: + """ + 在虚拟环境中安装 uv 并保持 pip 命令兼容,供现有安装流程复用。 + """ if os.name == "nt": return get_venv_pip(venv_dir) @@ -606,6 +619,93 @@ def configure_venv_pip_compat(venv_dir: Path, venv_python: Path) -> Path: return get_venv_pip(venv_dir) +def _rust_accel_should_skip() -> bool: + """ + 判断当前安装是否显式跳过 Rust 加速扩展构建。 + """ + raw_value = os.getenv(RUST_ACCEL_SKIP_ENV, "").strip().lower() + return raw_value in {"1", "true", "yes", "on"} + + +def _cargo_env_path() -> str: + """ + 组合 PATH,兼容 rustup 默认安装到用户目录但当前 shell 未刷新环境的场景。 + """ + extra_paths = [str(Path.home() / ".cargo" / "bin")] + current_path = os.environ.get("PATH", "") + return os.pathsep.join([*extra_paths, current_path]) + + +def _find_cargo() -> Optional[str]: + """ + 查找 Rust cargo 可执行文件,供本地 CLI 安装构建 PyO3 扩展。 + """ + return shutil.which("cargo", path=_cargo_env_path()) + + +def _find_native_linker() -> Optional[str]: + """ + 查找 Rust 扩展构建所需的本机链接器。 + """ + if os.name == "nt": + return "windows-msvc" + for candidate in ("cc", "gcc", "clang"): + linker = shutil.which(candidate) + if linker: + return linker + return None + + +def ensure_rust_accel_ready() -> None: + """ + 确认 Rust 加速扩展源码存在且本机具备 cargo 与链接器。 + """ + if not RUST_ACCEL_MANIFEST.exists(): + return + if _rust_accel_should_skip(): + print_step(f"已跳过 Rust 加速扩展构建:{RUST_ACCEL_SKIP_ENV}=1") + return + if not _find_cargo(): + raise RuntimeError( + "未找到 Rust cargo,无法构建 MoviePilot Rust 加速扩展。" + "请先安装 Rust toolchain 后重试,或临时设置 " + f"{RUST_ACCEL_SKIP_ENV}=1 跳过加速扩展。" + ) + if not _find_native_linker(): + raise RuntimeError( + "未找到本机 C 编译器/链接器,无法构建 MoviePilot Rust 加速扩展。" + "请先安装系统构建工具后重试,或临时设置 " + f"{RUST_ACCEL_SKIP_ENV}=1 跳过加速扩展。" + ) + + +def install_rust_accel(venv_python: Path) -> None: + """ + 构建并安装 MoviePilot Rust 加速扩展到当前虚拟环境。 + """ + if not RUST_ACCEL_MANIFEST.exists(): + return + if _rust_accel_should_skip(): + return + + ensure_rust_accel_ready() + print_step("构建并安装 Rust 加速扩展") + env = os.environ.copy() + env["PATH"] = _cargo_env_path() + run( + [ + str(venv_python), + "-m", + "maturin", + "develop", + "--release", + "--manifest-path", + str(RUST_ACCEL_MANIFEST), + ], + env=env, + ) + + def ensure_supported_python(python_bin: str) -> None: version = get_python_version(python_bin) if version < MIN_PYTHON_VERSION: @@ -2628,7 +2728,11 @@ def init_local( def install_deps(*, python_bin: str, venv_dir: Path, recreate: bool) -> Path: + """ + 创建或复用本地虚拟环境,并安装后端依赖、Rust 扩展和浏览器运行时。 + """ ensure_supported_python(python_bin) + ensure_rust_accel_ready() venv_dir = venv_dir.expanduser().resolve() venv_python = get_venv_python(venv_dir) venv_pip = get_venv_pip(venv_dir) @@ -2653,6 +2757,7 @@ def install_deps(*, python_bin: str, venv_dir: Path, recreate: bool) -> Path: print_step("安装项目依赖") run([str(venv_pip), "install", "-r", str(ROOT / "requirements.txt")]) + install_rust_accel(venv_python) install_browser_runtime(venv_python) return venv_python diff --git a/tests/test_local_setup_config_dir.py b/tests/test_local_setup_config_dir.py index bbdd0a2d..142f19b5 100644 --- a/tests/test_local_setup_config_dir.py +++ b/tests/test_local_setup_config_dir.py @@ -68,6 +68,8 @@ class LocalSetupConfigDirTests(unittest.TestCase): venv_pip = venv_dir / "bin" / "pip" with patch.object(module, "ensure_supported_python"), \ + patch.object(module, "ensure_rust_accel_ready") as rust_ready, \ + patch.object(module, "install_rust_accel") as install_rust, \ patch.object( module, "configure_venv_pip_compat", @@ -86,8 +88,75 @@ class LocalSetupConfigDirTests(unittest.TestCase): run_mock.assert_any_call( [str(venv_pip), "install", "-r", str(module.ROOT / "requirements.txt")] ) + rust_ready.assert_called_once_with() + install_rust.assert_called_once_with(venv_python) install_browser.assert_called_once_with(venv_python) + def test_install_rust_accel_runs_maturin_develop(self): + """ + 验证本地 CLI 安装会通过 maturin 将 Rust 扩展安装进虚拟环境。 + """ + module = load_local_setup_module() + + with tempfile.TemporaryDirectory() as temp_dir: + manifest = Path(temp_dir) / "Cargo.toml" + manifest.write_text("[package]\nname = \"moviepilot_rust\"\n") + venv_python = Path(temp_dir) / "venv" / "bin" / "python" + + with patch.object(module, "RUST_ACCEL_MANIFEST", manifest), \ + patch.object(module, "_rust_accel_should_skip", return_value=False), \ + patch.object(module, "ensure_rust_accel_ready"), \ + patch.object(module, "_cargo_env_path", return_value="/cargo/bin:/bin"), \ + patch.object(module, "run") as run_mock: + module.install_rust_accel(venv_python) + + run_mock.assert_called_once() + command = run_mock.call_args.args[0] + self.assertEqual( + command, + [ + str(venv_python), + "-m", + "maturin", + "develop", + "--release", + "--manifest-path", + str(manifest), + ], + ) + self.assertEqual(run_mock.call_args.kwargs["env"]["PATH"], "/cargo/bin:/bin") + + def test_ensure_rust_accel_ready_requires_cargo(self): + """ + 验证 Rust 扩展源码存在时,CLI 安装会检查 cargo 是否可用。 + """ + module = load_local_setup_module() + + with tempfile.TemporaryDirectory() as temp_dir: + manifest = Path(temp_dir) / "Cargo.toml" + manifest.write_text("[package]\nname = \"moviepilot_rust\"\n") + + with patch.object(module, "RUST_ACCEL_MANIFEST", manifest), \ + patch.object(module, "_rust_accel_should_skip", return_value=False), \ + patch.object(module, "_find_cargo", return_value=None): + with self.assertRaisesRegex(RuntimeError, "cargo"): + module.ensure_rust_accel_ready() + + def test_ensure_rust_accel_ready_allows_skip(self): + """ + 验证显式跳过 Rust 扩展时,不再要求本机存在 cargo。 + """ + module = load_local_setup_module() + + with tempfile.TemporaryDirectory() as temp_dir: + manifest = Path(temp_dir) / "Cargo.toml" + manifest.write_text("[package]\nname = \"moviepilot_rust\"\n") + + with patch.object(module, "RUST_ACCEL_MANIFEST", manifest), \ + patch.object(module, "_rust_accel_should_skip", return_value=True), \ + patch.object(module, "_find_cargo", return_value=None): + module.ensure_rust_accel_ready() + if __name__ == "__main__": unittest.main() diff --git a/tests/test_rust_accel.py b/tests/test_rust_accel.py new file mode 100644 index 00000000..a94a6d3c --- /dev/null +++ b/tests/test_rust_accel.py @@ -0,0 +1,174 @@ +import pytest + +from app.core.context import TorrentInfo +from app.modules.filter import FilterModule +from app.modules.indexer.spider import SiteSpider +from app.schemas.types import MediaType +from app.utils import rust_accel + + +pytestmark = pytest.mark.skipif( + not rust_accel.is_available(), + reason="moviepilot_rust 扩展未安装", +) + + +def test_rust_metainfo_fast_path_extracts_emby_override(): + """ + Rust 内嵌媒体标签识别应保持 Emby tmdbid 标签优先级。 + """ + title, metainfo = rust_accel.find_metainfo("Movie {[tmdbid=111;type=movies]} [tmdbid=222]") + + assert title == "Movie" + assert metainfo["tmdbid"] == "222" + assert metainfo["type"] == MediaType.MOVIE + + +def test_rust_video_title_fast_path_extracts_common_resource_fields(): + """ + Rust 影视标题预解析应能提取常见资源字段。 + """ + result = rust_accel.parse_video_title( + "The 355 2022 BluRay 1080p DTS-HD MA5.1 X265.10bit 60FPS" + ) + + assert result["year"] == "2022" + assert result["resource_pix"] == "1080p" + assert result["resource_type"] == "BluRay" + assert result["video_encode"] == "x265 10bit" + assert result["video_bit"] == "10bit" + assert result["fps"] == 60 + + +def test_rust_filter_fast_path_matches_priority_semantics(): + """ + Rust 批量过滤应保持优先级和布尔表达式语义。 + """ + module = FilterModule() + module.rule_set = { + "HDR": {"include": "HDR"}, + "DV": {"include": "DOVI"}, + "BLU": {"include": "BluRay"}, + } + torrents = [ + TorrentInfo(title="Movie HDR WEB-DL", description=""), + TorrentInfo(title="Movie DOVI", description=""), + TorrentInfo(title="Movie HDR BluRay", description=""), + ] + + result = module._FilterModule__filter_torrents_by_rust( # noqa: SLF001 + groups=[type("RuleGroup", (), {"rule_string": "HDR & !BLU > DV"})()], + torrent_list=torrents, + mediainfo=None, + ) + + assert result == torrents[:2] + assert result[0].pri_order == 100 + assert result[1].pri_order == 99 + + +def test_rust_indexer_search_url_keeps_existing_query_and_category(): + """ + Rust URL 生成应保留路径原有查询参数并应用分类参数。 + """ + spider = SiteSpider( + indexer={ + "id": "ttg", + "name": "TTG", + "domain": "https://totheglory.im/", + "search": { + "paths": [{"path": "browse.php?c=M"}], + "params": {"search_field": "{keyword}", "c": "M"}, + "imdbid_format": "imdb{imdbid_num}", + }, + "category": { + "field": "search_field", + "delimiter": " 分类:", + "movie": [{"id": "电影DVDRip", "cat": "Movies/SD"}], + }, + "torrents": {"list": {}, "fields": {}}, + }, + keyword="tt0049406", + mtype=MediaType.MOVIE, + ) + + search_url = spider._SiteSpider__get_search_url() # noqa: SLF001 + + assert search_url.count("?") == 1 + assert "c=M" in search_url + assert "search_field=imdb0049406" in search_url + + +def test_rust_filesize_parser_matches_site_units(): + """ + Rust 文件大小解析应覆盖站点解析器常见单位。 + """ + assert rust_accel.parse_filesize("1.5 GB") == 1610612736 + assert rust_accel.parse_filesize("2 TiB") == 2199023255552 + assert rust_accel.parse_filesize("42") == 42 + + +def test_rust_indexer_page_parser_handles_common_fields(): + """ + Rust 普通 indexer 页面解析应批量提取列表行核心字段。 + """ + spider = SiteSpider( + indexer={ + "id": "demo", + "name": "Demo", + "domain": "https://example.org/", + "search": {"paths": [{"path": "torrents.php"}]}, + "category": { + "movie": [{"id": "401"}], + "tv": [{"id": "402"}], + }, + "torrents": { + "list": {"selector": "tr.torrent"}, + "fields": { + "title": {"selector": "a.title"}, + "description": {"selector": ".desc"}, + "details": {"selector": "a.title", "attribute": "href"}, + "download": {"selector": "a.dl", "attribute": "href"}, + "size": {"selector": ".size"}, + "seeders": {"selector": ".seeders"}, + "leechers": {"selector": ".leechers"}, + "grabs": {"selector": ".grabs"}, + "downloadvolumefactor": {"case": {".free": 0}}, + "uploadvolumefactor": {"selector": ".up"}, + "labels": {"selector": ".label"}, + "hr": {"selector": ".hr"}, + "category": {"selector": ".cat"}, + }, + }, + }, + ) + html = """ + + + + + + + + + +
Movie 2024 1080pBluRayDL1.5 GB1,2345/1042Free2xDIYHDRH&R401
+ """ + + torrents = spider.parse(html) + + assert torrents == [{ + "title": "Movie 2024 1080p", + "description": "BluRay", + "page_url": "https://example.org/details/1", + "enclosure": "https://example.org/download/1", + "size": 1610612736, + "seeders": 1234, + "peers": 5, + "grabs": 42, + "downloadvolumefactor": 0, + "uploadvolumefactor": 2, + "labels": ["DIY", "HDR"], + "hit_and_run": True, + "category": MediaType.MOVIE.value, + }]