diff --git a/app/chain/search.py b/app/chain/search.py index e369091f..416d46cd 100644 --- a/app/chain/search.py +++ b/app/chain/search.py @@ -3,7 +3,7 @@ import pickle import traceback from concurrent.futures import ThreadPoolExecutor, as_completed from datetime import datetime -from typing import Dict +from typing import Dict, Tuple from typing import List, Optional from app.chain import ChainBase @@ -168,50 +168,14 @@ class SearchChain(ChainBase): await self.async_save_cache(pickle.dumps(contexts), self.__result_temp_file) return contexts - def process(self, mediainfo: MediaInfo, - keyword: Optional[str] = None, - no_exists: Dict[int, Dict[int, NotExistMediaInfo]] = None, - sites: List[int] = None, - rule_groups: List[str] = None, - area: Optional[str] = "title", - custom_words: List[str] = None, - filter_params: Dict[str, str] = None) -> List[Context]: + @staticmethod + def __prepare_params(mediainfo: MediaInfo, + keyword: Optional[str] = None, + no_exists: Dict[int, Dict[int, NotExistMediaInfo]] = None + ) -> Tuple[Dict[int, List[int]], List[str]]: """ - 根据媒体信息搜索种子资源,精确匹配,应用过滤规则,同时根据no_exists过滤本地已存在的资源 - :param mediainfo: 媒体信息 - :param keyword: 搜索关键词 - :param no_exists: 缺失的媒体信息 - :param sites: 站点ID列表,为空时搜索所有站点 - :param rule_groups: 过滤规则组名称列表 - :param area: 搜索范围,title or imdbid - :param custom_words: 自定义识别词列表 - :param filter_params: 过滤参数 + 准备搜索参数 """ - - def __do_filter(torrent_list: List[TorrentInfo]) -> List[TorrentInfo]: - """ - 执行优先级过滤 - """ - return self.filter_torrents(rule_groups=rule_groups, - torrent_list=torrent_list, - mediainfo=mediainfo) or [] - - # 豆瓣标题处理 - if not mediainfo.tmdb_id: - meta = MetaInfo(title=mediainfo.title) - mediainfo.title = meta.name - mediainfo.season = meta.begin_season - logger.info(f'开始搜索资源,关键词:{keyword or mediainfo.title} ...') - - # 补充媒体信息 - if not mediainfo.names: - mediainfo: MediaInfo = self.recognize_media(mtype=mediainfo.type, - tmdbid=mediainfo.tmdb_id, - doubanid=mediainfo.douban_id) - if not mediainfo: - logger.error(f'媒体信息识别失败!') - return [] - # 缺失的季集 mediakey = mediainfo.tmdb_id or mediainfo.douban_id if no_exists and no_exists.get(mediakey): @@ -235,14 +199,27 @@ class SearchChain(ChainBase): mediainfo.hk_title, mediainfo.tw_title, mediainfo.sg_title] if k])) + return season_episodes, keywords + + def __parse_result(self, torrents: List[TorrentInfo], + mediainfo: MediaInfo, + keyword: Optional[str] = None, + rule_groups: List[str] = None, + season_episodes: Dict[int, List[int]] = None, + custom_words: List[str] = None, + filter_params: Dict[str, str] = None) -> List[Context]: + """ + 处理搜索结果 + """ + + def __do_filter(torrent_list: List[TorrentInfo]) -> List[TorrentInfo]: + """ + 执行优先级过滤 + """ + return self.filter_torrents(rule_groups=rule_groups, + torrent_list=torrent_list, + mediainfo=mediainfo) or [] - # 执行搜索 - torrents: List[TorrentInfo] = self.__search_all_sites( - mediainfo=mediainfo, - keywords=keywords, - sites=sites, - area=area - ) if not torrents: logger.warn(f'{keyword or mediainfo.title} 未搜索到资源') return [] @@ -355,6 +332,67 @@ class SearchChain(ChainBase): # 返回 return contexts + def process(self, mediainfo: MediaInfo, + keyword: Optional[str] = None, + no_exists: Dict[int, Dict[int, NotExistMediaInfo]] = None, + sites: List[int] = None, + rule_groups: List[str] = None, + area: Optional[str] = "title", + custom_words: List[str] = None, + filter_params: Dict[str, str] = None) -> List[Context]: + """ + 根据媒体信息搜索种子资源,精确匹配,应用过滤规则,同时根据no_exists过滤本地已存在的资源 + :param mediainfo: 媒体信息 + :param keyword: 搜索关键词 + :param no_exists: 缺失的媒体信息 + :param sites: 站点ID列表,为空时搜索所有站点 + :param rule_groups: 过滤规则组名称列表 + :param area: 搜索范围,title or imdbid + :param custom_words: 自定义识别词列表 + :param filter_params: 过滤参数 + """ + + # 豆瓣标题处理 + if not mediainfo.tmdb_id: + meta = MetaInfo(title=mediainfo.title) + mediainfo.title = meta.name + mediainfo.season = meta.begin_season + logger.info(f'开始搜索资源,关键词:{keyword or mediainfo.title} ...') + + # 补充媒体信息 + if not mediainfo.names: + mediainfo: MediaInfo = self.recognize_media(mtype=mediainfo.type, + tmdbid=mediainfo.tmdb_id, + doubanid=mediainfo.douban_id) + if not mediainfo: + logger.error(f'媒体信息识别失败!') + return [] + + # 准备搜索参数 + season_episodes, keywords = self.__prepare_params( + mediainfo=mediainfo, + keyword=keyword, + no_exists=no_exists + ) + + # 执行搜索 + torrents: List[TorrentInfo] = self.__search_all_sites( + mediainfo=mediainfo, + keywords=keywords, + sites=sites, + area=area + ) + # 处理结果 + return self.__parse_result( + torrents=torrents, + mediainfo=mediainfo, + keyword=keyword, + rule_groups=rule_groups, + season_episodes=season_episodes, + custom_words=custom_words, + filter_params=filter_params + ) + async def async_process(self, mediainfo: MediaInfo, keyword: Optional[str] = None, no_exists: Dict[int, Dict[int, NotExistMediaInfo]] = None, @@ -375,14 +413,6 @@ class SearchChain(ChainBase): :param filter_params: 过滤参数 """ - def __do_filter(torrent_list: List[TorrentInfo]) -> List[TorrentInfo]: - """ - 执行优先级过滤 - """ - return self.filter_torrents(rule_groups=rule_groups, - torrent_list=torrent_list, - mediainfo=mediainfo) or [] - # 豆瓣标题处理 if not mediainfo.tmdb_id: meta = MetaInfo(title=mediainfo.title) @@ -399,29 +429,12 @@ class SearchChain(ChainBase): logger.error(f'媒体信息识别失败!') return [] - # 缺失的季集 - mediakey = mediainfo.tmdb_id or mediainfo.douban_id - if no_exists and no_exists.get(mediakey): - # 过滤剧集 - season_episodes = {sea: info.episodes - for sea, info in no_exists[mediakey].items()} - elif mediainfo.season: - # 豆瓣只搜索当前季 - season_episodes = {mediainfo.season: []} - else: - season_episodes = None - - # 搜索关键词 - if keyword: - keywords = [keyword] - else: - # 去重去空,但要保持顺序 - keywords = list(dict.fromkeys([k for k in [mediainfo.title, - mediainfo.original_title, - mediainfo.en_title, - mediainfo.hk_title, - mediainfo.tw_title, - mediainfo.sg_title] if k])) + # 准备搜索参数 + season_episodes, keywords = self.__prepare_params( + mediainfo=mediainfo, + keyword=keyword, + no_exists=no_exists + ) # 执行搜索 torrents: List[TorrentInfo] = await self.__async_search_all_sites( @@ -430,117 +443,16 @@ class SearchChain(ChainBase): sites=sites, area=area ) - if not torrents: - logger.warn(f'{keyword or mediainfo.title} 未搜索到资源') - return [] - - # 开始新进度 - progress = ProgressHelper() - progress.start(ProgressKey.Search) - - # 开始过滤 - progress.update(value=0, text=f'开始过滤,总 {len(torrents)} 个资源,请稍候...', - key=ProgressKey.Search) - # 匹配订阅附加参数 - if filter_params: - logger.info(f'开始附加参数过滤,附加参数:{filter_params} ...') - torrents = [torrent for torrent in torrents if TorrentHelper().filter_torrent(torrent, filter_params)] - # 开始过滤规则过滤 - if rule_groups is None: - # 取搜索过滤规则 - rule_groups: List[str] = SystemConfigOper().get(SystemConfigKey.SearchFilterRuleGroups) - if rule_groups: - logger.info(f'开始过滤规则/剧集过滤,使用规则组:{rule_groups} ...') - torrents = __do_filter(torrents) - if not torrents: - logger.warn(f'{keyword or mediainfo.title} 没有符合过滤规则的资源') - return [] - logger.info(f"过滤规则/剧集过滤完成,剩余 {len(torrents)} 个资源") - - # 过滤完成 - progress.update(value=50, text=f'过滤完成,剩余 {len(torrents)} 个资源', key=ProgressKey.Search) - - # 总数 - _total = len(torrents) - # 已处理数 - _count = 0 - - # 开始匹配 - _match_torrents = [] - torrenthelper = TorrentHelper() - try: - # 英文标题应该在别名/原标题中,不需要再匹配 - logger.info(f"开始匹配结果 标题:{mediainfo.title},原标题:{mediainfo.original_title},别名:{mediainfo.names}") - progress.update(value=51, text=f'开始匹配,总 {_total} 个资源 ...', key=ProgressKey.Search) - for torrent in torrents: - if global_vars.is_system_stopped: - break - _count += 1 - progress.update(value=(_count / _total) * 96, - text=f'正在匹配 {torrent.site_name},已完成 {_count} / {_total} ...', - key=ProgressKey.Search) - if not torrent.title: - continue - - # 识别元数据 - torrent_meta = MetaInfo(title=torrent.title, subtitle=torrent.description, - custom_words=custom_words) - if torrent.title != torrent_meta.org_string: - logger.info(f"种子名称应用识别词后发生改变:{torrent.title} => {torrent_meta.org_string}") - # 季集数过滤 - if season_episodes \ - and not torrenthelper.match_season_episodes(torrent=torrent, - meta=torrent_meta, - season_episodes=season_episodes): - continue - # 比对IMDBID - if torrent.imdbid \ - and mediainfo.imdb_id \ - and torrent.imdbid == mediainfo.imdb_id: - logger.info(f'{mediainfo.title} 通过IMDBID匹配到资源:{torrent.site_name} - {torrent.title}') - _match_torrents.append((torrent, torrent_meta)) - continue - - # 比对种子 - if torrenthelper.match_torrent(mediainfo=mediainfo, - torrent_meta=torrent_meta, - torrent=torrent): - # 匹配成功 - _match_torrents.append((torrent, torrent_meta)) - continue - # 匹配完成 - logger.info(f"匹配完成,共匹配到 {len(_match_torrents)} 个资源") - progress.update(value=97, - text=f'匹配完成,共匹配到 {len(_match_torrents)} 个资源', - key=ProgressKey.Search) - - # 去掉mediainfo中多余的数据 - mediainfo.clear() - # 组装上下文 - contexts = [Context(torrent_info=t[0], - media_info=mediainfo, - meta_info=t[1]) for t in _match_torrents] - finally: - torrents.clear() - del torrents - _match_torrents.clear() - del _match_torrents - - # 排序 - progress.update(value=99, - text=f'正在对 {len(contexts)} 个资源进行排序,请稍候...', - key=ProgressKey.Search) - contexts = torrenthelper.sort_torrents(contexts) - - # 结束进度 - logger.info(f'搜索完成,共 {len(contexts)} 个资源') - progress.update(value=100, - text=f'搜索完成,共 {len(contexts)} 个资源', - key=ProgressKey.Search) - progress.end(ProgressKey.Search) - - # 返回 - return contexts + # 处理结果 + return self.__parse_result( + torrents=torrents, + mediainfo=mediainfo, + keyword=keyword, + rule_groups=rule_groups, + season_episodes=season_episodes, + custom_words=custom_words, + filter_params=filter_params + ) def __search_all_sites(self, keywords: List[str], mediainfo: Optional[MediaInfo] = None,