feat: add Rust acceleration for core parsing

This commit is contained in:
jxxghp
2026-05-22 19:57:26 +08:00
parent 7daeb17d85
commit bd4d493f34
28 changed files with 5012 additions and 77 deletions

View File

@@ -11,6 +11,7 @@ from app.schemas.types import MediaType
from app.utils.string import StringUtils
from app.utils.tokens import Tokens
from app.core.meta.streamingplatform import StreamingPlatforms
from app.utils import rust_accel
class MetaVideo(MetaBase):
@@ -102,59 +103,61 @@ class MetaVideo(MetaBase):
title = re.sub(r'[0-9.]+\s*[MGT]i?B(?![A-Z]+)', "", title, flags=re.IGNORECASE)
# 把年月日去掉
title = re.sub(r'\d{4}[\s._-]\d{1,2}[\s._-]\d{1,2}', "", title)
# 拆分tokens
tokens = Tokens(title)
# 实例化StreamingPlatforms对象
streaming_platforms = StreamingPlatforms()
media_exts = settings.RMT_MEDIAEXT + settings.RMT_SUBEXT + settings.RMT_AUDIOEXT
# 解析名称、年份、季、集、资源类型、分辨率等
token = tokens.get_next()
while token:
self._index += 1 # 更新当前处理的token索引
# Part
self.__init_part(token, tokens)
# 标题
if self._continue_flag:
self.__init_name(token, media_exts)
# 年份
if self._continue_flag:
self.__init_year(token)
# 分辨率
if self._continue_flag:
self.__init_resource_pix(token)
# 季
if self._continue_flag:
self.__init_season(token)
# 集
if self._continue_flag:
self.__init_episode(token)
# 资源类型
if self._continue_flag:
self.__init_resource_type(token)
# 流媒体平台
if self._continue_flag:
self.__init_web_source(token, tokens, streaming_platforms)
# 视频编码
if self._continue_flag:
self.__init_video_encode(token)
# 视频位深
if self._continue_flag:
self.__init_video_bit(token)
# 音频编码
if self._continue_flag:
self.__init_audio_encode(token)
# 帧率
if self._continue_flag:
self.__init_fps(token)
# 取下一个,直到没有为卡
rust_parse = rust_accel.parse_video_title(title, isfile=isfile, media_exts=media_exts)
if not self.__apply_rust_parse(rust_parse):
# 拆分tokens
tokens = Tokens(title)
# 实例化StreamingPlatforms对象
streaming_platforms = StreamingPlatforms()
# 解析名称、年份、季、集、资源类型、分辨率等
token = tokens.get_next()
self._continue_flag = True
# 合成质量
if self._effect:
self._effect.reverse()
self.resource_effect = " ".join(self._effect)
if self._source:
self.resource_type = self._source.strip()
while token:
self._index += 1 # 更新当前处理的token索引
# Part
self.__init_part(token, tokens)
# 标题
if self._continue_flag:
self.__init_name(token, media_exts)
# 年份
if self._continue_flag:
self.__init_year(token)
# 分辨率
if self._continue_flag:
self.__init_resource_pix(token)
# 季
if self._continue_flag:
self.__init_season(token)
# 集
if self._continue_flag:
self.__init_episode(token)
# 资源类型
if self._continue_flag:
self.__init_resource_type(token)
# 流媒体平台
if self._continue_flag:
self.__init_web_source(token, tokens, streaming_platforms)
# 视频编码
if self._continue_flag:
self.__init_video_encode(token)
# 视频位深
if self._continue_flag:
self.__init_video_bit(token)
# 音频编码
if self._continue_flag:
self.__init_audio_encode(token)
# 帧率
if self._continue_flag:
self.__init_fps(token)
# 取下一个,直到没有为卡
token = tokens.get_next()
self._continue_flag = True
# 合成质量
if self._effect:
self._effect.reverse()
self.resource_effect = " ".join(self._effect)
if self._source:
self.resource_type = self._source.strip()
# 提取原盘DIY
if self.resource_type and "BluRay" in self.resource_type:
if (self.subtitle and re.findall(r'D[Ii]Y', self.subtitle)) \
@@ -185,6 +188,62 @@ class MetaVideo(MetaBase):
if not self.video_bit:
self.video_bit = self.extract_video_bit(self.video_encode)
def __apply_rust_parse(self, rust_parse: Optional[dict]) -> bool:
"""
应用 Rust 主识别结果;成功时跳过 Python token 主循环。
"""
if not rust_parse or not rust_parse.get("complete"):
return False
self.cn_name = rust_parse.get("cn_name")
self.en_name = rust_parse.get("en_name")
if rust_parse.get("year"):
self.year = str(rust_parse.get("year"))
self.part = rust_parse.get("part")
self.__merge_rust_parse(rust_parse)
media_type = rust_parse.get("type")
if media_type == "tv":
self.type = MediaType.TV
elif media_type == "movie":
self.type = MediaType.MOVIE
return True
def __merge_rust_parse(self, rust_parse: Optional[dict]) -> None:
"""
合并 Rust 预解析结果,仅补齐 Python 识别未命中的资源字段。
"""
if not rust_parse:
return
if not self.year and rust_parse.get("year"):
self.year = str(rust_parse.get("year"))
if self.begin_season is None and rust_parse.get("begin_season") is not None:
self.begin_season = int(rust_parse.get("begin_season"))
self.type = MediaType.TV
if self.end_season is None and rust_parse.get("end_season") is not None:
self.end_season = int(rust_parse.get("end_season"))
if not self.total_season and rust_parse.get("total_season"):
self.total_season = int(rust_parse.get("total_season"))
if self.begin_episode is None and rust_parse.get("begin_episode") is not None:
self.begin_episode = int(rust_parse.get("begin_episode"))
self.type = MediaType.TV
if self.end_episode is None and rust_parse.get("end_episode") is not None:
self.end_episode = int(rust_parse.get("end_episode"))
if not self.total_episode and rust_parse.get("total_episode"):
self.total_episode = int(rust_parse.get("total_episode"))
if not self.resource_pix and rust_parse.get("resource_pix"):
self.resource_pix = rust_parse.get("resource_pix")
if not self.resource_type and rust_parse.get("resource_type"):
self.resource_type = rust_parse.get("resource_type")
if not self.resource_effect and rust_parse.get("resource_effect"):
self.resource_effect = rust_parse.get("resource_effect")
if not self.video_encode and rust_parse.get("video_encode"):
self.video_encode = rust_parse.get("video_encode")
if not self.video_bit and rust_parse.get("video_bit"):
self.video_bit = rust_parse.get("video_bit")
if not self.audio_encode and rust_parse.get("audio_encode"):
self.audio_encode = rust_parse.get("audio_encode")
if self.fps is None and rust_parse.get("fps") is not None:
self.fps = int(rust_parse.get("fps"))
@staticmethod
def __get_title_from_description(description: str) -> Optional[str]:
"""

View File

@@ -12,6 +12,7 @@ from app.core.meta.infopath import (
from app.core.meta.words import WordsMatcher
from app.log import logger
from app.schemas.types import MediaType
from app.utils import rust_accel
_ANIME_BRACKET_RE = re.compile(r'【[+0-9XVPI-]+】\s*【', re.IGNORECASE)
@@ -168,6 +169,9 @@ def is_anime(name: str) -> bool:
:param name: 名称
:return: 是否动漫
"""
rust_result = rust_accel.is_anime(name)
if rust_result is not None:
return rust_result
if not name:
return False
if _ANIME_BRACKET_RE.search(name):
@@ -185,6 +189,9 @@ def find_metainfo(title: str) -> Tuple[str, dict]:
"""
从标题中提取媒体信息
"""
rust_result = rust_accel.find_metainfo(title)
if rust_result is not None:
return rust_result
metainfo = _empty_metainfo()
if not title:
return title, metainfo

View File

@@ -2,6 +2,8 @@ import threading
from pyparsing import Forward, Literal, Word, alphas, infixNotation, opAssoc, alphanums, Combine, nums, ParseResults
from app.utils import rust_accel
class RuleParser:
@@ -48,9 +50,30 @@ class RuleParser:
返回:
解析结果
"""
rust_result = rust_accel.parse_filter_rule(expression)
if rust_result is not None:
return _RustParseResults(rust_result)
return self.expr.parseString(expression)
class _RustParseResults(list):
"""
包装 Rust 解析结果,提供本模块调用方使用的 as_list/asList 接口。
"""
def as_list(self) -> list:
"""
返回兼容 pyparsing.ParseResults.as_list 的列表结构。
"""
return list(self)
def asList(self) -> list: # noqa: N802
"""
返回兼容 pyparsing.ParseResults.asList 的列表结构。
"""
return self.as_list()
if __name__ == '__main__':
# 测试代码
expression_str = """

View File

@@ -11,6 +11,7 @@ from app.modules import _ModuleBase
from app.modules.filter.RuleParser import RuleParser
from app.modules.filter.builtin_rules import BUILTIN_RULE_SET
from app.schemas.types import ModuleType, OtherModulesType, SystemConfigKey
from app.utils import rust_accel
from app.utils.string import StringUtils
@@ -138,6 +139,9 @@ class FilterModule(_ModuleBase):
# 查询规则表详情
groups = self.rulehelper.get_rule_group_by_media(media=mediainfo, group_names=rule_groups)
if groups:
rust_filtered = self.__filter_torrents_by_rust(groups, torrent_list, mediainfo)
if rust_filtered is not None:
return rust_filtered
for group in groups:
# 过滤种子
torrent_list = self.__filter_torrents(
@@ -150,6 +154,46 @@ class FilterModule(_ModuleBase):
)
return torrent_list
def __filter_torrents_by_rust(self, groups: list, torrent_list: List[TorrentInfo],
mediainfo: MediaInfo) -> Optional[List[TorrentInfo]]:
"""
使用 Rust 批量过滤种子;遇到不可支持的规则时返回 None 交由 Python 逻辑处理。
"""
if not torrent_list:
return []
payloads = [self.__build_rust_torrent_payload(torrent) for torrent in torrent_list]
media_payload = mediainfo.to_dict() if mediainfo and hasattr(mediainfo, "to_dict") else (
vars(mediainfo).copy() if mediainfo else None
)
result = rust_accel.filter_torrents(
rule_set=self.rule_set,
rule_strings=[group.rule_string for group in groups],
torrents=payloads,
media_info=media_payload,
)
if result is None:
return None
filtered_torrents = []
for index, pri_order in result:
torrent = torrent_list[int(index)]
torrent.pri_order = int(pri_order)
filtered_torrents.append(torrent)
return filtered_torrents
@staticmethod
def __build_rust_torrent_payload(torrent: TorrentInfo) -> dict:
"""
组装 Rust 过滤器需要的纯数据载荷,避免 Rust 直接依赖 Python 业务对象。
"""
payload = torrent.to_dict() if hasattr(torrent, "to_dict") else vars(torrent).copy()
payload["pub_minutes"] = torrent.pub_minutes()
if payload.get("size"):
meta = MetaInfo(title=torrent.title, subtitle=torrent.description)
payload["episode_count"] = meta.total_episode or 1
else:
payload["episode_count"] = 1
return payload
def __filter_torrents(self, rule_string: str, rule_name: str,
torrent_list: List[TorrentInfo],
mediainfo: MediaInfo,

View File

@@ -11,8 +11,10 @@ from requests import Session
from app.core.config import settings
from app.helper.cloudflare import under_challenge
from app.log import logger
from app.utils import rust_accel
from app.utils.http import RequestUtils
from app.utils.site import SiteUtils
from app.utils.string import StringUtils
# 站点框架
@@ -154,6 +156,16 @@ class SiteParserBase(metaclass=ABCMeta):
"""
return self.schema
@staticmethod
def num_filesize(text) -> int:
"""
将站点页面中的文件大小文本转换为字节,优先使用 Rust 快路径。
"""
rust_value = rust_accel.parse_filesize(text)
if rust_value is not None:
return rust_value
return StringUtils.num_filesize(text)
def parse(self):
"""
解析站点信息

View File

@@ -93,10 +93,10 @@ class NexusPhpSiteUserInfo(SiteParserBase):
html_text = self._prepare_html_text(html_text)
upload_match = re.search(r"[^总]上[传傳]量?[:_<>/a-zA-Z-=\"'\s#;]+([\d,.\s]+[KMGTPI]*B)", html_text,
re.IGNORECASE)
self.upload = StringUtils.num_filesize(upload_match.group(1).strip()) if upload_match else 0
self.upload = self.num_filesize(upload_match.group(1).strip()) if upload_match else 0
download_match = re.search(r"[^总子影力]下[载載]量?[:_<>/a-zA-Z-=\"'\s#;]+([\d,.\s]+[KMGTPI]*B)", html_text,
re.IGNORECASE)
self.download = StringUtils.num_filesize(download_match.group(1).strip()) if download_match else 0
self.download = self.num_filesize(download_match.group(1).strip()) if download_match else 0
ratio_match = re.search(r"分享率[:_<>/a-zA-Z-=\"'\s#;]+([\d,.\s]+)", html_text)
# 计算分享率
calc_ratio = 0.0 if self.download <= 0.0 else round(self.upload / self.download, 3)
@@ -209,7 +209,7 @@ class NexusPhpSiteUserInfo(SiteParserBase):
page_seeding = len(seeding_sizes)
for i in range(0, len(seeding_sizes)):
size = StringUtils.num_filesize(seeding_sizes[i].xpath("string(.)").strip())
size = self.num_filesize(seeding_sizes[i].xpath("string(.)").strip())
seeders = StringUtils.str_int(seeding_seeders[i])
page_seeding_size += size
@@ -273,7 +273,7 @@ class NexusPhpSiteUserInfo(SiteParserBase):
tmp_seeding_size = 0
tmp_seeding_info = []
for i in range(0, len(seeding_sizes)):
size = StringUtils.num_filesize(seeding_sizes[i].xpath("string(.)").strip())
size = self.num_filesize(seeding_sizes[i].xpath("string(.)").strip())
seeders = StringUtils.str_int(seeding_seeders[i])
tmp_seeding_size += size
@@ -292,7 +292,7 @@ class NexusPhpSiteUserInfo(SiteParserBase):
seeding_size_match = re.search(r"总做种体积:\s+([\d,.\s]+[KMGTPI]*B)", seeding_sizes[0], re.IGNORECASE)
tmp_seeding = StringUtils.str_int(seeding_match.group(1)) if (
seeding_match and seeding_match.group(1)) else 0
tmp_seeding_size = StringUtils.num_filesize(
tmp_seeding_size = self.num_filesize(
seeding_size_match.group(1).strip()) if seeding_size_match else 0
if not self.seeding_size:
self.seeding_size = tmp_seeding_size

View File

@@ -75,7 +75,7 @@ class ZhixingSiteUserInfo(SiteParserBase):
s = s.strip()
if re.match(r'^\d+(\.\d+)?$', s):
s += ' B'
return StringUtils.num_filesize(s) if s else 0
return self.num_filesize(s) if s else 0
self.upload = num_filesize_safe(info_dict.get('上传流量')) if '上传流量' in info_dict else 0
self.download = num_filesize_safe(info_dict.get('下载流量')) if '下载流量' in info_dict else 0
@@ -108,7 +108,7 @@ class ZhixingSiteUserInfo(SiteParserBase):
if size_td:
size_text = size_td.find('a').text if size_td.find('a') else size_td.text.strip()
page_seeding += 1
page_seeding_size += StringUtils.num_filesize(size_text)
page_seeding_size += self.num_filesize(size_text)
return page_seeding, page_seeding_size
def _parse_message_unread_links(self, html_text: str, msg_links: list) -> Optional[str]:
@@ -164,7 +164,7 @@ class ZhixingSiteUserInfo(SiteParserBase):
s = s.strip()
if re.match(r'^\d+(\.\d+)?$', s):
s += ' B'
return StringUtils.num_filesize(s) if s else 0
return self.num_filesize(s) if s else 0
self.seeding = int(self._basic_info.get('当前保种数量', 0))
self.seeding_size = num_filesize_safe(self._basic_info.get('当前保种容量', ''))
@@ -181,4 +181,4 @@ class ZhixingSiteUserInfo(SiteParserBase):
self.message_unread = str(self.message_unread or 0)
self.seeding = str(self.seeding or 0)
self.seeding_size = str(self.seeding_size or 0)
self.seeding_size = str(self.seeding_size or 0)

View File

@@ -12,6 +12,7 @@ from pyquery import PyQuery
from app.core.config import settings
from app.log import logger
from app.schemas.types import MediaType
from app.utils import rust_accel
from app.utils.http import RequestUtils, AsyncRequestUtils
from app.utils.string import StringUtils
from app.utils.url import UrlUtils
@@ -95,6 +96,19 @@ class SiteSpider:
"""
获取搜索URL
"""
rust_url = rust_accel.build_indexer_search_url({
"search": self.search,
"batch": self.batch,
"browse": self.browse,
"category": self.category,
"domain": self.domain,
"keyword": self.keyword,
"mtype": self.mtype.value if self.mtype else None,
"cat": self.cat,
"page": self.page,
})
if rust_url:
return rust_url
# 种子搜索相对路径
paths = self.search.get('paths', [])
torrentspath = ""
@@ -658,6 +672,9 @@ class SiteSpider:
"""
if not text or not filters or not isinstance(filters, list):
return text
rust_text = rust_accel.apply_indexer_text_filters(text, filters)
if rust_text is not None:
return rust_text
if not isinstance(text, str):
text = str(text)
for filter_item in filters:
@@ -739,6 +756,16 @@ class SiteSpider:
# 清空旧结果
self.torrents_info_array = []
rust_torrents = rust_accel.parse_indexer_torrents(
html_text=html_text,
domain=self.domain,
list_config=self.list,
fields=self.fields,
category=self.category,
result_num=int(self.result_num),
)
if rust_torrents is not None:
return rust_torrents
html_doc = None
try:
# 解析站点文本对象

206
app/utils/rust_accel.py Normal file
View File

@@ -0,0 +1,206 @@
from typing import Any, Dict, List, Optional, Tuple
from app.log import logger
from app.schemas.types import MediaType
try:
import moviepilot_rust as _moviepilot_rust
except Exception as err: # pragma: no cover - 取决于运行环境是否安装 Rust 扩展
_moviepilot_rust = None
_import_error = err
else:
_import_error = None
def is_available() -> bool:
"""
判断 Rust 扩展是否可用。
"""
return bool(_moviepilot_rust and _moviepilot_rust.is_available())
def import_error() -> Optional[Exception]:
"""
返回 Rust 扩展导入失败的异常,便于调试构建问题。
"""
return _import_error
def is_anime(name: str) -> Optional[bool]:
"""
使用 Rust 快路径判断标题是否为动漫格式,不可用时返回 None。
"""
if not _moviepilot_rust:
return None
try:
return bool(_moviepilot_rust.is_anime_fast(name or ""))
except BaseException as err:
_raise_non_rust_panic(err)
logger.debug(f"Rust 动漫识别失败,回退 Python{err}")
return None
def find_metainfo(title: str) -> Optional[Tuple[str, Dict[str, Any]]]:
"""
使用 Rust 快路径提取标题中的内嵌媒体标签,不可用时返回 None。
"""
if not _moviepilot_rust:
return None
try:
result = _moviepilot_rust.find_metainfo_fast(title or "")
except BaseException as err:
_raise_non_rust_panic(err)
logger.debug(f"Rust 内嵌媒体标签识别失败,回退 Python{err}")
return None
metainfo = {
"tmdbid": result.get("tmdbid"),
"doubanid": result.get("doubanid"),
"type": _coerce_media_type(result.get("type")),
"begin_season": result.get("begin_season"),
"end_season": result.get("end_season"),
"total_season": result.get("total_season"),
"begin_episode": result.get("begin_episode"),
"end_episode": result.get("end_episode"),
"total_episode": result.get("total_episode"),
}
return result.get("title"), metainfo
def parse_video_title(
title: str,
isfile: bool = False,
media_exts: Optional[List[str]] = None,
) -> Optional[Dict[str, Any]]:
"""
使用 Rust 执行影视标题主识别流程,不可用时返回 None。
"""
if not _moviepilot_rust:
return None
try:
return _moviepilot_rust.parse_video_title_fast(title or "", isfile, media_exts or [])
except BaseException as err:
_raise_non_rust_panic(err)
logger.debug(f"Rust 影视标题主识别失败,回退 Python{err}")
return None
def parse_filter_rule(expression: str) -> Optional[list]:
"""
使用 Rust 解析过滤规则表达式,不可用时返回 None。
"""
if not _moviepilot_rust:
return None
try:
return _moviepilot_rust.parse_filter_rule_fast(expression)
except BaseException as err:
_raise_non_rust_panic(err)
logger.debug(f"Rust 过滤规则解析失败,回退 Python{err}")
return None
def filter_torrents(
rule_set: Dict[str, dict],
rule_strings: List[str],
torrents: List[dict],
media_info: Optional[dict] = None,
) -> Optional[list]:
"""
使用 Rust 批量执行种子过滤,不可用或不兼容时返回 None。
"""
if not _moviepilot_rust:
return None
try:
return _moviepilot_rust.filter_torrents_fast(rule_set, rule_strings, torrents, media_info)
except BaseException as err:
_raise_non_rust_panic(err)
logger.debug(f"Rust 种子过滤失败,回退 Python{err}")
return None
def apply_indexer_text_filters(text: Any, filters: Optional[List[dict]]) -> Optional[str]:
"""
使用 Rust 执行 indexer 文本过滤器,不可用或遇到不支持过滤器时返回 None。
"""
if not _moviepilot_rust or not filters or not isinstance(filters, list):
return None
try:
return _moviepilot_rust.apply_indexer_text_filters_fast(None if text is None else str(text), filters)
except BaseException as err:
_raise_non_rust_panic(err)
logger.debug(f"Rust 站点文本过滤失败,回退 Python{err}")
return None
def parse_filesize(text: Any) -> Optional[int]:
"""
使用 Rust 将文件大小文本转换为字节,不可用时返回 None。
"""
if not _moviepilot_rust:
return None
try:
return int(_moviepilot_rust.parse_filesize_fast(text))
except BaseException as err:
_raise_non_rust_panic(err)
logger.debug(f"Rust 文件大小解析失败,回退 Python{err}")
return None
def build_indexer_search_url(config: dict) -> Optional[str]:
"""
使用 Rust 根据普通 indexer 配置生成搜索 URL不可用时返回 None。
"""
if not _moviepilot_rust:
return None
try:
return _moviepilot_rust.build_indexer_search_url_fast(config)
except BaseException as err:
_raise_non_rust_panic(err)
logger.debug(f"Rust 站点搜索 URL 生成失败,回退 Python{err}")
return None
def parse_indexer_torrents(
html_text: str,
domain: str,
list_config: dict,
fields: dict,
category: Optional[dict],
result_num: int,
) -> Optional[List[dict]]:
"""
使用 Rust 批量解析普通 indexer 页面,不支持的配置返回 None。
"""
if not _moviepilot_rust:
return None
try:
return _moviepilot_rust.parse_indexer_torrents_fast(
html_text or "",
domain or "",
list_config or {},
fields or {},
category,
int(result_num or 0),
)
except BaseException as err:
_raise_non_rust_panic(err)
logger.debug(f"Rust 站点页面解析失败,回退 Python{err}")
return None
def _coerce_media_type(value: Optional[str]) -> Optional[MediaType]:
"""
将 Rust 返回的媒体类型字符串转换为系统 MediaType。
"""
if value == "movies":
return MediaType.MOVIE
if value == "tv":
return MediaType.TV
return None
def _raise_non_rust_panic(err: BaseException) -> None:
"""
只吞掉 Rust 扩展 panic/异常,保留用户中断和进程退出语义。
"""
if isinstance(err, (KeyboardInterrupt, SystemExit)):
raise err