mirror of
https://github.com/jxxghp/MoviePilot.git
synced 2026-05-07 05:32:41 +08:00
perf(search): 按站点并行过滤搜索结果
This commit is contained in:
@@ -592,6 +592,66 @@ class SearchChain(ChainBase):
|
||||
torrent_list=torrent_list,
|
||||
mediainfo=mediainfo) or []
|
||||
|
||||
def __do_site_filter(torrent_list: List[TorrentInfo]) -> List[TorrentInfo]:
|
||||
"""
|
||||
执行单个站点的过滤流程
|
||||
"""
|
||||
if not torrent_list:
|
||||
return []
|
||||
|
||||
filtered_torrents = torrent_list
|
||||
if filter_params:
|
||||
torrenthelper = TorrentHelper()
|
||||
filtered_torrents = [
|
||||
torrent for torrent in filtered_torrents
|
||||
if torrenthelper.filter_torrent(torrent, filter_params)
|
||||
]
|
||||
|
||||
if rule_groups and filtered_torrents:
|
||||
filtered_torrents = __do_filter(filtered_torrents)
|
||||
|
||||
return filtered_torrents
|
||||
|
||||
def __do_parallel_filter(torrent_list: List[TorrentInfo]) -> List[TorrentInfo]:
|
||||
"""
|
||||
按站点并发执行过滤,保持站点内顺序不变
|
||||
"""
|
||||
if not torrent_list or (not filter_params and not rule_groups):
|
||||
return torrent_list
|
||||
|
||||
site_torrents: Dict[Tuple[Optional[int], Optional[str]], List[TorrentInfo]] = {}
|
||||
for torrent in torrent_list:
|
||||
site_key = (torrent.site, torrent.site_name)
|
||||
if site_key not in site_torrents:
|
||||
site_torrents[site_key] = []
|
||||
site_torrents[site_key].append(torrent)
|
||||
|
||||
if len(site_torrents) <= 1:
|
||||
return __do_site_filter(torrent_list)
|
||||
|
||||
finished_count = 0
|
||||
filtered_by_site: Dict[Tuple[Optional[int], Optional[str]], List[TorrentInfo]] = {}
|
||||
max_workers = min(len(site_torrents), settings.CONF.threadpool or len(site_torrents))
|
||||
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||||
all_tasks = {
|
||||
executor.submit(__do_site_filter, site_torrent_list): site_key
|
||||
for site_key, site_torrent_list in site_torrents.items()
|
||||
}
|
||||
for future in as_completed(all_tasks):
|
||||
finished_count += 1
|
||||
filtered_by_site[all_tasks[future]] = future.result() or []
|
||||
progress.update(
|
||||
value=finished_count / len(site_torrents) * 50,
|
||||
text=f'正在过滤,已完成 {finished_count} / {len(site_torrents)} 个站点 ...'
|
||||
)
|
||||
|
||||
filtered_ids = {
|
||||
id(torrent)
|
||||
for filtered_torrents in filtered_by_site.values()
|
||||
for torrent in filtered_torrents
|
||||
}
|
||||
return [torrent for torrent in torrent_list if id(torrent) in filtered_ids]
|
||||
|
||||
if not torrents:
|
||||
logger.warn(f'{keyword or mediainfo.title} 未搜索到资源')
|
||||
return []
|
||||
@@ -605,14 +665,14 @@ class SearchChain(ChainBase):
|
||||
# 匹配订阅附加参数
|
||||
if filter_params:
|
||||
logger.info(f'开始附加参数过滤,附加参数:{filter_params} ...')
|
||||
torrents = [torrent for torrent in torrents if TorrentHelper().filter_torrent(torrent, filter_params)]
|
||||
# 开始过滤规则过滤
|
||||
if rule_groups is None:
|
||||
# 取搜索过滤规则
|
||||
rule_groups: List[str] = SystemConfigOper().get(SystemConfigKey.SearchFilterRuleGroups)
|
||||
if rule_groups:
|
||||
logger.info(f'开始过滤规则/剧集过滤,使用规则组:{rule_groups} ...')
|
||||
torrents = __do_filter(torrents)
|
||||
torrents = __do_parallel_filter(torrents)
|
||||
if rule_groups:
|
||||
if not torrents:
|
||||
logger.warn(f'{keyword or mediainfo.title} 没有符合过滤规则的资源')
|
||||
return []
|
||||
|
||||
@@ -15,10 +15,6 @@ from app.utils.string import StringUtils
|
||||
|
||||
class FilterModule(_ModuleBase):
|
||||
CONFIG_WATCH = {SystemConfigKey.CustomFilterRules.value}
|
||||
# 规则解析器
|
||||
parser: RuleParser = None
|
||||
# 媒体信息
|
||||
media: MediaInfo = None
|
||||
|
||||
# 保留一份只读内置规则定义,方便查询工具准确区分“内置规则”和“自定义规则”。
|
||||
builtin_rule_set: Dict[str, dict] = deepcopy(BUILTIN_RULE_SET)
|
||||
@@ -30,7 +26,6 @@ class FilterModule(_ModuleBase):
|
||||
self.rulehelper = RuleHelper()
|
||||
|
||||
def init_module(self) -> None:
|
||||
self.parser = RuleParser()
|
||||
# 每次重载都先恢复为纯内置规则,避免旧的自定义规则残留在内存里。
|
||||
self.rule_set = deepcopy(self.builtin_rule_set)
|
||||
self.__init_custom_rules()
|
||||
@@ -90,7 +85,7 @@ class FilterModule(_ModuleBase):
|
||||
"""
|
||||
if not rule_groups:
|
||||
return torrent_list
|
||||
self.media = mediainfo
|
||||
parser = RuleParser()
|
||||
# 查询规则表详情
|
||||
groups = self.rulehelper.get_rule_group_by_media(media=mediainfo, group_names=rule_groups)
|
||||
if groups:
|
||||
@@ -99,12 +94,16 @@ class FilterModule(_ModuleBase):
|
||||
torrent_list = self.__filter_torrents(
|
||||
rule_string=group.rule_string,
|
||||
rule_name=group.name,
|
||||
torrent_list=torrent_list
|
||||
torrent_list=torrent_list,
|
||||
mediainfo=mediainfo,
|
||||
parser=parser,
|
||||
)
|
||||
return torrent_list
|
||||
|
||||
def __filter_torrents(self, rule_string: str, rule_name: str,
|
||||
torrent_list: List[TorrentInfo]) -> List[TorrentInfo]:
|
||||
torrent_list: List[TorrentInfo],
|
||||
mediainfo: MediaInfo,
|
||||
parser: RuleParser) -> List[TorrentInfo]:
|
||||
"""
|
||||
过滤种子
|
||||
"""
|
||||
@@ -112,7 +111,7 @@ class FilterModule(_ModuleBase):
|
||||
ret_torrents = []
|
||||
for torrent in torrent_list:
|
||||
# 能命中优先级的才返回
|
||||
if not self.__get_order(torrent, rule_string):
|
||||
if not self.__get_order(torrent, rule_string, mediainfo, parser):
|
||||
logger.debug(f"种子 {torrent.site_name} - {torrent.title} {torrent.description or ''} "
|
||||
f"不匹配 {rule_name} 过滤规则")
|
||||
continue
|
||||
@@ -120,7 +119,8 @@ class FilterModule(_ModuleBase):
|
||||
|
||||
return ret_torrents
|
||||
|
||||
def __get_order(self, torrent: TorrentInfo, rule_str: str) -> Optional[TorrentInfo]:
|
||||
def __get_order(self, torrent: TorrentInfo, rule_str: str,
|
||||
mediainfo: MediaInfo, parser: RuleParser) -> Optional[TorrentInfo]:
|
||||
"""
|
||||
获取种子匹配的规则优先级,值越大越优先,未匹配时返回None
|
||||
"""
|
||||
@@ -133,8 +133,8 @@ class FilterModule(_ModuleBase):
|
||||
|
||||
for rule_group in rule_groups:
|
||||
# 解析规则组
|
||||
parsed_group = self.parser.parse(rule_group.strip())
|
||||
if self.__match_group(torrent, parsed_group.as_list()[0]):
|
||||
parsed_group = parser.parse(rule_group.strip())
|
||||
if self.__match_group(torrent, parsed_group.as_list()[0], mediainfo):
|
||||
# 出现匹配时中断
|
||||
matched = True
|
||||
logger.debug(f"种子 {torrent.site_name} - {torrent.title} 优先级为 {100 - res_order + 1}")
|
||||
@@ -145,27 +145,31 @@ class FilterModule(_ModuleBase):
|
||||
|
||||
return None if not matched else torrent
|
||||
|
||||
def __match_group(self, torrent: TorrentInfo, rule_group: Union[list, str]) -> Optional[bool]:
|
||||
def __match_group(self, torrent: TorrentInfo, rule_group: Union[list, str],
|
||||
mediainfo: MediaInfo) -> Optional[bool]:
|
||||
"""
|
||||
判断种子是否匹配规则组
|
||||
"""
|
||||
if not isinstance(rule_group, list):
|
||||
# 不是列表,说明是规则名称
|
||||
return self.__match_rule(torrent, rule_group)
|
||||
return self.__match_rule(torrent, rule_group, mediainfo)
|
||||
elif isinstance(rule_group, list) and len(rule_group) == 1:
|
||||
# 只有一个规则项
|
||||
return self.__match_group(torrent, rule_group[0])
|
||||
return self.__match_group(torrent, rule_group[0], mediainfo)
|
||||
elif rule_group[0] == "not":
|
||||
# 非操作
|
||||
return not self.__match_group(torrent, rule_group[1:])
|
||||
return not self.__match_group(torrent, rule_group[1:], mediainfo)
|
||||
elif rule_group[1] == "and":
|
||||
# 与操作
|
||||
return self.__match_group(torrent, rule_group[0]) and self.__match_group(torrent, rule_group[2:])
|
||||
return self.__match_group(torrent, rule_group[0], mediainfo) \
|
||||
and self.__match_group(torrent, rule_group[2:], mediainfo)
|
||||
elif rule_group[1] == "or":
|
||||
# 或操作
|
||||
return self.__match_group(torrent, rule_group[0]) or self.__match_group(torrent, rule_group[2:])
|
||||
return self.__match_group(torrent, rule_group[0], mediainfo) \
|
||||
or self.__match_group(torrent, rule_group[2:], mediainfo)
|
||||
|
||||
def __match_rule(self, torrent: TorrentInfo, rule_name: str) -> bool:
|
||||
def __match_rule(self, torrent: TorrentInfo, rule_name: str,
|
||||
mediainfo: MediaInfo) -> bool:
|
||||
"""
|
||||
判断种子是否匹配规则项
|
||||
"""
|
||||
@@ -176,7 +180,7 @@ class FilterModule(_ModuleBase):
|
||||
# TMDB规则
|
||||
tmdb = self.rule_set[rule_name].get("tmdb")
|
||||
# 符合TMDB规则的直接返回True,即不过滤
|
||||
if tmdb and self.__match_tmdb(tmdb):
|
||||
if tmdb and self.__match_tmdb(tmdb, mediainfo):
|
||||
logger.debug(f"种子 {torrent.site_name} - {torrent.title} 符合 {rule_name} 的TMDB规则,匹配成功")
|
||||
return True
|
||||
# 匹配项:标题、副标题、标签
|
||||
@@ -259,18 +263,19 @@ class FilterModule(_ModuleBase):
|
||||
|
||||
return True
|
||||
|
||||
def __match_tmdb(self, tmdb: dict) -> bool:
|
||||
@staticmethod
|
||||
def __match_tmdb(tmdb: dict, mediainfo: MediaInfo) -> bool:
|
||||
"""
|
||||
判断种子是否匹配TMDB规则
|
||||
"""
|
||||
|
||||
def __get_media_value(key: str):
|
||||
try:
|
||||
return getattr(self.media, key)
|
||||
return getattr(mediainfo, key)
|
||||
except ValueError:
|
||||
return ""
|
||||
|
||||
if not self.media:
|
||||
if not mediainfo:
|
||||
return False
|
||||
|
||||
for attr, value in tmdb.items():
|
||||
|
||||
Reference in New Issue
Block a user