perf(search): 按站点并行过滤搜索结果

2026-06-23 08:34:28 +08:00 · 2026-05-05 09:01:18 +08:00
parent 39f9550f86
commit 924d54dfd3
2 changed files with 90 additions and 25 deletions
--- a/app/chain/search.py
+++ b/app/chain/search.py
@@ -592,6 +592,66 @@ class SearchChain(ChainBase):
                                        torrent_list=torrent_list,
                                        mediainfo=mediainfo) or []

+        def __do_site_filter(torrent_list: List[TorrentInfo]) -> List[TorrentInfo]:
+            """
+            执行单个站点的过滤流程
+            """
+            if not torrent_list:
+                return []
+
+            filtered_torrents = torrent_list
+            if filter_params:
+                torrenthelper = TorrentHelper()
+                filtered_torrents = [
+                    torrent for torrent in filtered_torrents
+                    if torrenthelper.filter_torrent(torrent, filter_params)
+                ]
+
+            if rule_groups and filtered_torrents:
+                filtered_torrents = __do_filter(filtered_torrents)
+
+            return filtered_torrents
+
+        def __do_parallel_filter(torrent_list: List[TorrentInfo]) -> List[TorrentInfo]:
+            """
+            按站点并发执行过滤，保持站点内顺序不变
+            """
+            if not torrent_list or (not filter_params and not rule_groups):
+                return torrent_list
+
+            site_torrents: Dict[Tuple[Optional[int], Optional[str]], List[TorrentInfo]] = {}
+            for torrent in torrent_list:
+                site_key = (torrent.site, torrent.site_name)
+                if site_key not in site_torrents:
+                    site_torrents[site_key] = []
+                site_torrents[site_key].append(torrent)
+
+            if len(site_torrents) <= 1:
+                return __do_site_filter(torrent_list)
+
+            finished_count = 0
+            filtered_by_site: Dict[Tuple[Optional[int], Optional[str]], List[TorrentInfo]] = {}
+            max_workers = min(len(site_torrents), settings.CONF.threadpool or len(site_torrents))
+            with ThreadPoolExecutor(max_workers=max_workers) as executor:
+                all_tasks = {
+                    executor.submit(__do_site_filter, site_torrent_list): site_key
+                    for site_key, site_torrent_list in site_torrents.items()
+                }
+                for future in as_completed(all_tasks):
+                    finished_count += 1
+                    filtered_by_site[all_tasks[future]] = future.result() or []
+                    progress.update(
+                        value=finished_count / len(site_torrents) * 50,
+                        text=f'正在过滤，已完成 {finished_count} / {len(site_torrents)} 个站点 ...'
+                    )
+
+            filtered_ids = {
+                id(torrent)
+                for filtered_torrents in filtered_by_site.values()
+                for torrent in filtered_torrents
+            }
+            return [torrent for torrent in torrent_list if id(torrent) in filtered_ids]
+
        if not torrents:
            logger.warn(f'{keyword or mediainfo.title} 未搜索到资源')
            return []
@@ -605,14 +665,14 @@ class SearchChain(ChainBase):
        # 匹配订阅附加参数
        if filter_params:
            logger.info(f'开始附加参数过滤，附加参数：{filter_params} ...')
-            torrents = [torrent for torrent in torrents if TorrentHelper().filter_torrent(torrent, filter_params)]
        # 开始过滤规则过滤
        if rule_groups is None:
            # 取搜索过滤规则
            rule_groups: List[str] = SystemConfigOper().get(SystemConfigKey.SearchFilterRuleGroups)
        if rule_groups:
            logger.info(f'开始过滤规则/剧集过滤，使用规则组：{rule_groups} ...')
-            torrents = __do_filter(torrents)
+        torrents = __do_parallel_filter(torrents)
+        if rule_groups:
            if not torrents:
                logger.warn(f'{keyword or mediainfo.title} 没有符合过滤规则的资源')
                return []
--- a/app/modules/filter/init.py
+++ b/app/modules/filter/init.py
@@ -15,10 +15,6 @@ from app.utils.string import StringUtils

 class FilterModule(_ModuleBase):
    CONFIG_WATCH = {SystemConfigKey.CustomFilterRules.value}
-    # 规则解析器
-    parser: RuleParser = None
-    # 媒体信息
-    media: MediaInfo = None

    # 保留一份只读内置规则定义，方便查询工具准确区分“内置规则”和“自定义规则”。
    builtin_rule_set: Dict[str, dict] = deepcopy(BUILTIN_RULE_SET)
@@ -30,7 +26,6 @@ class FilterModule(_ModuleBase):
        self.rulehelper = RuleHelper()

    def init_module(self) -> None:
-        self.parser = RuleParser()
        # 每次重载都先恢复为纯内置规则，避免旧的自定义规则残留在内存里。
        self.rule_set = deepcopy(self.builtin_rule_set)
        self.__init_custom_rules()
@@ -90,7 +85,7 @@ class FilterModule(_ModuleBase):
        """
        if not rule_groups:
            return torrent_list
-        self.media = mediainfo
+        parser = RuleParser()
        # 查询规则表详情
        groups = self.rulehelper.get_rule_group_by_media(media=mediainfo, group_names=rule_groups)
        if groups:
@@ -99,12 +94,16 @@ class FilterModule(_ModuleBase):
                torrent_list = self.__filter_torrents(
                    rule_string=group.rule_string,
                    rule_name=group.name,
-                    torrent_list=torrent_list
+                    torrent_list=torrent_list,
+                    mediainfo=mediainfo,
+                    parser=parser,
                )
        return torrent_list

    def __filter_torrents(self, rule_string: str, rule_name: str,
-                          torrent_list: List[TorrentInfo]) -> List[TorrentInfo]:
+                          torrent_list: List[TorrentInfo],
+                          mediainfo: MediaInfo,
+                          parser: RuleParser) -> List[TorrentInfo]:
        """
        过滤种子
        """
@@ -112,7 +111,7 @@ class FilterModule(_ModuleBase):
        ret_torrents = []
        for torrent in torrent_list:
            # 能命中优先级的才返回
-            if not self.__get_order(torrent, rule_string):
+            if not self.__get_order(torrent, rule_string, mediainfo, parser):
                logger.debug(f"种子 {torrent.site_name} - {torrent.title} {torrent.description or ''} "
                             f"不匹配 {rule_name} 过滤规则")
                continue
@@ -120,7 +119,8 @@ class FilterModule(_ModuleBase):

        return ret_torrents

-    def __get_order(self, torrent: TorrentInfo, rule_str: str) -> Optional[TorrentInfo]:
+    def __get_order(self, torrent: TorrentInfo, rule_str: str,
+                    mediainfo: MediaInfo, parser: RuleParser) -> Optional[TorrentInfo]:
        """
        获取种子匹配的规则优先级，值越大越优先，未匹配时返回None
        """
@@ -133,8 +133,8 @@ class FilterModule(_ModuleBase):

        for rule_group in rule_groups:
            # 解析规则组
-            parsed_group = self.parser.parse(rule_group.strip())
-            if self.__match_group(torrent, parsed_group.as_list()[0]):
+            parsed_group = parser.parse(rule_group.strip())
+            if self.__match_group(torrent, parsed_group.as_list()[0], mediainfo):
                # 出现匹配时中断
                matched = True
                logger.debug(f"种子 {torrent.site_name} - {torrent.title} 优先级为 {100 - res_order + 1}")
@@ -145,27 +145,31 @@ class FilterModule(_ModuleBase):

        return None if not matched else torrent

-    def __match_group(self, torrent: TorrentInfo, rule_group: Union[list, str]) -> Optional[bool]:
+    def __match_group(self, torrent: TorrentInfo, rule_group: Union[list, str],
+                      mediainfo: MediaInfo) -> Optional[bool]:
        """
        判断种子是否匹配规则组
        """
        if not isinstance(rule_group, list):
            # 不是列表，说明是规则名称
-            return self.__match_rule(torrent, rule_group)
+            return self.__match_rule(torrent, rule_group, mediainfo)
        elif isinstance(rule_group, list) and len(rule_group) == 1:
            # 只有一个规则项
-            return self.__match_group(torrent, rule_group[0])
+            return self.__match_group(torrent, rule_group[0], mediainfo)
        elif rule_group[0] == "not":
            # 非操作
-            return not self.__match_group(torrent, rule_group[1:])
+            return not self.__match_group(torrent, rule_group[1:], mediainfo)
        elif rule_group[1] == "and":
            # 与操作
-            return self.__match_group(torrent, rule_group[0]) and self.__match_group(torrent, rule_group[2:])
+            return self.__match_group(torrent, rule_group[0], mediainfo) \
+                and self.__match_group(torrent, rule_group[2:], mediainfo)
        elif rule_group[1] == "or":
            # 或操作
-            return self.__match_group(torrent, rule_group[0]) or self.__match_group(torrent, rule_group[2:])
+            return self.__match_group(torrent, rule_group[0], mediainfo) \
+                or self.__match_group(torrent, rule_group[2:], mediainfo)

-    def __match_rule(self, torrent: TorrentInfo, rule_name: str) -> bool:
+    def __match_rule(self, torrent: TorrentInfo, rule_name: str,
+                     mediainfo: MediaInfo) -> bool:
        """
        判断种子是否匹配规则项
        """
@@ -176,7 +180,7 @@ class FilterModule(_ModuleBase):
        # TMDB规则
        tmdb = self.rule_set[rule_name].get("tmdb")
        # 符合TMDB规则的直接返回True，即不过滤
-        if tmdb and self.__match_tmdb(tmdb):
+        if tmdb and self.__match_tmdb(tmdb, mediainfo):
            logger.debug(f"种子 {torrent.site_name} - {torrent.title} 符合 {rule_name} 的TMDB规则，匹配成功")
            return True
        # 匹配项：标题、副标题、标签
@@ -259,18 +263,19 @@ class FilterModule(_ModuleBase):

        return True

-    def __match_tmdb(self, tmdb: dict) -> bool:
+    @staticmethod
+    def __match_tmdb(tmdb: dict, mediainfo: MediaInfo) -> bool:
        """
        判断种子是否匹配TMDB规则
        """

        def __get_media_value(key: str):
            try:
-                return getattr(self.media, key)
+                return getattr(mediainfo, key)
            except ValueError:
                return ""

-        if not self.media:
+        if not mediainfo:
            return False

        for attr, value in tmdb.items():