feat：协程搜索 part2

2026-07-02 13:21:35 +08:00 · 2025-07-31 21:39:36 +08:00
parent 3fc2c7d6cc
commit 039d876e3f
1 changed files with 104 additions and 192 deletions
--- a/app/chain/search.py
+++ b/app/chain/search.py
@@ -3,7 +3,7 @@ import pickle
 import traceback
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from datetime import datetime
-from typing import Dict
+from typing import Dict, Tuple
 from typing import List, Optional

 from app.chain import ChainBase
@@ -168,50 +168,14 @@ class SearchChain(ChainBase):
            await self.async_save_cache(pickle.dumps(contexts), self.__result_temp_file)
        return contexts

-    def process(self, mediainfo: MediaInfo,
-                keyword: Optional[str] = None,
-                no_exists: Dict[int, Dict[int, NotExistMediaInfo]] = None,
-                sites: List[int] = None,
-                rule_groups: List[str] = None,
-                area: Optional[str] = "title",
-                custom_words: List[str] = None,
-                filter_params: Dict[str, str] = None) -> List[Context]:
+    @staticmethod
+    def __prepare_params(mediainfo: MediaInfo,
+                         keyword: Optional[str] = None,
+                         no_exists: Dict[int, Dict[int, NotExistMediaInfo]] = None
+                         ) -> Tuple[Dict[int, List[int]], List[str]]:
        """
-        根据媒体信息搜索种子资源，精确匹配，应用过滤规则，同时根据no_exists过滤本地已存在的资源
-        :param mediainfo: 媒体信息
-        :param keyword: 搜索关键词
-        :param no_exists: 缺失的媒体信息
-        :param sites: 站点ID列表，为空时搜索所有站点
-        :param rule_groups: 过滤规则组名称列表
-        :param area: 搜索范围，title or imdbid
-        :param custom_words: 自定义识别词列表
-        :param filter_params: 过滤参数
+        准备搜索参数
        """
-
-        def __do_filter(torrent_list: List[TorrentInfo]) -> List[TorrentInfo]:
-            """
-            执行优先级过滤
-            """
-            return self.filter_torrents(rule_groups=rule_groups,
-                                        torrent_list=torrent_list,
-                                        mediainfo=mediainfo) or []
-
-        # 豆瓣标题处理
-        if not mediainfo.tmdb_id:
-            meta = MetaInfo(title=mediainfo.title)
-            mediainfo.title = meta.name
-            mediainfo.season = meta.begin_season
-        logger.info(f'开始搜索资源，关键词：{keyword or mediainfo.title} ...')
-
-        # 补充媒体信息
-        if not mediainfo.names:
-            mediainfo: MediaInfo = self.recognize_media(mtype=mediainfo.type,
-                                                        tmdbid=mediainfo.tmdb_id,
-                                                        doubanid=mediainfo.douban_id)
-            if not mediainfo:
-                logger.error(f'媒体信息识别失败！')
-                return []
-
        # 缺失的季集
        mediakey = mediainfo.tmdb_id or mediainfo.douban_id
        if no_exists and no_exists.get(mediakey):
@@ -235,14 +199,27 @@ class SearchChain(ChainBase):
                                                       mediainfo.hk_title,
                                                       mediainfo.tw_title,
                                                       mediainfo.sg_title] if k]))
+        return season_episodes, keywords
+
+    def __parse_result(self, torrents: List[TorrentInfo],
+                       mediainfo: MediaInfo,
+                       keyword: Optional[str] = None,
+                       rule_groups: List[str] = None,
+                       season_episodes: Dict[int, List[int]] = None,
+                       custom_words: List[str] = None,
+                       filter_params: Dict[str, str] = None) -> List[Context]:
+        """
+        处理搜索结果
+        """
+
+        def __do_filter(torrent_list: List[TorrentInfo]) -> List[TorrentInfo]:
+            """
+            执行优先级过滤
+            """
+            return self.filter_torrents(rule_groups=rule_groups,
+                                        torrent_list=torrent_list,
+                                        mediainfo=mediainfo) or []

-        # 执行搜索
-        torrents: List[TorrentInfo] = self.__search_all_sites(
-            mediainfo=mediainfo,
-            keywords=keywords,
-            sites=sites,
-            area=area
-        )
        if not torrents:
            logger.warn(f'{keyword or mediainfo.title} 未搜索到资源')
            return []
@@ -355,6 +332,67 @@ class SearchChain(ChainBase):
        # 返回
        return contexts

+    def process(self, mediainfo: MediaInfo,
+                keyword: Optional[str] = None,
+                no_exists: Dict[int, Dict[int, NotExistMediaInfo]] = None,
+                sites: List[int] = None,
+                rule_groups: List[str] = None,
+                area: Optional[str] = "title",
+                custom_words: List[str] = None,
+                filter_params: Dict[str, str] = None) -> List[Context]:
+        """
+        根据媒体信息搜索种子资源，精确匹配，应用过滤规则，同时根据no_exists过滤本地已存在的资源
+        :param mediainfo: 媒体信息
+        :param keyword: 搜索关键词
+        :param no_exists: 缺失的媒体信息
+        :param sites: 站点ID列表，为空时搜索所有站点
+        :param rule_groups: 过滤规则组名称列表
+        :param area: 搜索范围，title or imdbid
+        :param custom_words: 自定义识别词列表
+        :param filter_params: 过滤参数
+        """
+
+        # 豆瓣标题处理
+        if not mediainfo.tmdb_id:
+            meta = MetaInfo(title=mediainfo.title)
+            mediainfo.title = meta.name
+            mediainfo.season = meta.begin_season
+        logger.info(f'开始搜索资源，关键词：{keyword or mediainfo.title} ...')
+
+        # 补充媒体信息
+        if not mediainfo.names:
+            mediainfo: MediaInfo = self.recognize_media(mtype=mediainfo.type,
+                                                        tmdbid=mediainfo.tmdb_id,
+                                                        doubanid=mediainfo.douban_id)
+            if not mediainfo:
+                logger.error(f'媒体信息识别失败！')
+                return []
+
+        # 准备搜索参数
+        season_episodes, keywords = self.__prepare_params(
+            mediainfo=mediainfo,
+            keyword=keyword,
+            no_exists=no_exists
+        )
+
+        # 执行搜索
+        torrents: List[TorrentInfo] = self.__search_all_sites(
+            mediainfo=mediainfo,
+            keywords=keywords,
+            sites=sites,
+            area=area
+        )
+        # 处理结果
+        return self.__parse_result(
+            torrents=torrents,
+            mediainfo=mediainfo,
+            keyword=keyword,
+            rule_groups=rule_groups,
+            season_episodes=season_episodes,
+            custom_words=custom_words,
+            filter_params=filter_params
+        )
+
    async def async_process(self, mediainfo: MediaInfo,
                            keyword: Optional[str] = None,
                            no_exists: Dict[int, Dict[int, NotExistMediaInfo]] = None,
@@ -375,14 +413,6 @@ class SearchChain(ChainBase):
        :param filter_params: 过滤参数
        """

-        def __do_filter(torrent_list: List[TorrentInfo]) -> List[TorrentInfo]:
-            """
-            执行优先级过滤
-            """
-            return self.filter_torrents(rule_groups=rule_groups,
-                                        torrent_list=torrent_list,
-                                        mediainfo=mediainfo) or []
-
        # 豆瓣标题处理
        if not mediainfo.tmdb_id:
            meta = MetaInfo(title=mediainfo.title)
@@ -399,29 +429,12 @@ class SearchChain(ChainBase):
                logger.error(f'媒体信息识别失败！')
                return []

-        # 缺失的季集
-        mediakey = mediainfo.tmdb_id or mediainfo.douban_id
-        if no_exists and no_exists.get(mediakey):
-            # 过滤剧集
-            season_episodes = {sea: info.episodes
-                               for sea, info in no_exists[mediakey].items()}
-        elif mediainfo.season:
-            # 豆瓣只搜索当前季
-            season_episodes = {mediainfo.season: []}
-        else:
-            season_episodes = None
-
-        # 搜索关键词
-        if keyword:
-            keywords = [keyword]
-        else:
-            # 去重去空，但要保持顺序
-            keywords = list(dict.fromkeys([k for k in [mediainfo.title,
-                                                       mediainfo.original_title,
-                                                       mediainfo.en_title,
-                                                       mediainfo.hk_title,
-                                                       mediainfo.tw_title,
-                                                       mediainfo.sg_title] if k]))
+        # 准备搜索参数
+        season_episodes, keywords = self.__prepare_params(
+            mediainfo=mediainfo,
+            keyword=keyword,
+            no_exists=no_exists
+        )

        # 执行搜索
        torrents: List[TorrentInfo] = await self.__async_search_all_sites(
@@ -430,117 +443,16 @@ class SearchChain(ChainBase):
            sites=sites,
            area=area
        )
-        if not torrents:
-            logger.warn(f'{keyword or mediainfo.title} 未搜索到资源')
-            return []
-
-        # 开始新进度
-        progress = ProgressHelper()
-        progress.start(ProgressKey.Search)
-
-        # 开始过滤
-        progress.update(value=0, text=f'开始过滤，总 {len(torrents)} 个资源，请稍候...',
-                        key=ProgressKey.Search)
-        # 匹配订阅附加参数
-        if filter_params:
-            logger.info(f'开始附加参数过滤，附加参数：{filter_params} ...')
-            torrents = [torrent for torrent in torrents if TorrentHelper().filter_torrent(torrent, filter_params)]
-        # 开始过滤规则过滤
-        if rule_groups is None:
-            # 取搜索过滤规则
-            rule_groups: List[str] = SystemConfigOper().get(SystemConfigKey.SearchFilterRuleGroups)
-        if rule_groups:
-            logger.info(f'开始过滤规则/剧集过滤，使用规则组：{rule_groups} ...')
-            torrents = __do_filter(torrents)
-            if not torrents:
-                logger.warn(f'{keyword or mediainfo.title} 没有符合过滤规则的资源')
-                return []
-            logger.info(f"过滤规则/剧集过滤完成，剩余 {len(torrents)} 个资源")
-
-        # 过滤完成
-        progress.update(value=50, text=f'过滤完成，剩余 {len(torrents)} 个资源', key=ProgressKey.Search)
-
-        # 总数
-        _total = len(torrents)
-        # 已处理数
-        _count = 0
-
-        # 开始匹配
-        _match_torrents = []
-        torrenthelper = TorrentHelper()
-        try:
-            # 英文标题应该在别名/原标题中，不需要再匹配
-            logger.info(f"开始匹配结果 标题：{mediainfo.title}，原标题：{mediainfo.original_title}，别名：{mediainfo.names}")
-            progress.update(value=51, text=f'开始匹配，总 {_total} 个资源 ...', key=ProgressKey.Search)
-            for torrent in torrents:
-                if global_vars.is_system_stopped:
-                    break
-                _count += 1
-                progress.update(value=(_count / _total) * 96,
-                                text=f'正在匹配 {torrent.site_name}，已完成 {_count} / {_total} ...',
-                                key=ProgressKey.Search)
-                if not torrent.title:
-                    continue
-
-                # 识别元数据
-                torrent_meta = MetaInfo(title=torrent.title, subtitle=torrent.description,
-                                        custom_words=custom_words)
-                if torrent.title != torrent_meta.org_string:
-                    logger.info(f"种子名称应用识别词后发生改变：{torrent.title} => {torrent_meta.org_string}")
-                # 季集数过滤
-                if season_episodes \
-                        and not torrenthelper.match_season_episodes(torrent=torrent,
-                                                                    meta=torrent_meta,
-                                                                    season_episodes=season_episodes):
-                    continue
-                # 比对IMDBID
-                if torrent.imdbid \
-                        and mediainfo.imdb_id \
-                        and torrent.imdbid == mediainfo.imdb_id:
-                    logger.info(f'{mediainfo.title} 通过IMDBID匹配到资源：{torrent.site_name} - {torrent.title}')
-                    _match_torrents.append((torrent, torrent_meta))
-                    continue
-
-                # 比对种子
-                if torrenthelper.match_torrent(mediainfo=mediainfo,
-                                               torrent_meta=torrent_meta,
-                                               torrent=torrent):
-                    # 匹配成功
-                    _match_torrents.append((torrent, torrent_meta))
-                    continue
-            # 匹配完成
-            logger.info(f"匹配完成，共匹配到 {len(_match_torrents)} 个资源")
-            progress.update(value=97,
-                            text=f'匹配完成，共匹配到 {len(_match_torrents)} 个资源',
-                            key=ProgressKey.Search)
-
-            # 去掉mediainfo中多余的数据
-            mediainfo.clear()
-            # 组装上下文
-            contexts = [Context(torrent_info=t[0],
-                                media_info=mediainfo,
-                                meta_info=t[1]) for t in _match_torrents]
-        finally:
-            torrents.clear()
-            del torrents
-            _match_torrents.clear()
-            del _match_torrents
-
-        # 排序
-        progress.update(value=99,
-                        text=f'正在对 {len(contexts)} 个资源进行排序，请稍候...',
-                        key=ProgressKey.Search)
-        contexts = torrenthelper.sort_torrents(contexts)
-
-        # 结束进度
-        logger.info(f'搜索完成，共 {len(contexts)} 个资源')
-        progress.update(value=100,
-                        text=f'搜索完成，共 {len(contexts)} 个资源',
-                        key=ProgressKey.Search)
-        progress.end(ProgressKey.Search)
-
-        # 返回
-        return contexts
+        # 处理结果
+        return self.__parse_result(
+            torrents=torrents,
+            mediainfo=mediainfo,
+            keyword=keyword,
+            rule_groups=rule_groups,
+            season_episodes=season_episodes,
+            custom_words=custom_words,
+            filter_params=filter_params
+        )

    def __search_all_sites(self, keywords: List[str],
                           mediainfo: Optional[MediaInfo] = None,