feat:协程搜索 part2

This commit is contained in:
jxxghp
2025-07-31 21:39:36 +08:00
parent 3fc2c7d6cc
commit 039d876e3f

View File

@@ -3,7 +3,7 @@ import pickle
import traceback
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime
from typing import Dict
from typing import Dict, Tuple
from typing import List, Optional
from app.chain import ChainBase
@@ -168,50 +168,14 @@ class SearchChain(ChainBase):
await self.async_save_cache(pickle.dumps(contexts), self.__result_temp_file)
return contexts
def process(self, mediainfo: MediaInfo,
keyword: Optional[str] = None,
no_exists: Dict[int, Dict[int, NotExistMediaInfo]] = None,
sites: List[int] = None,
rule_groups: List[str] = None,
area: Optional[str] = "title",
custom_words: List[str] = None,
filter_params: Dict[str, str] = None) -> List[Context]:
@staticmethod
def __prepare_params(mediainfo: MediaInfo,
keyword: Optional[str] = None,
no_exists: Dict[int, Dict[int, NotExistMediaInfo]] = None
) -> Tuple[Dict[int, List[int]], List[str]]:
"""
根据媒体信息搜索种子资源精确匹配应用过滤规则同时根据no_exists过滤本地已存在的资源
:param mediainfo: 媒体信息
:param keyword: 搜索关键词
:param no_exists: 缺失的媒体信息
:param sites: 站点ID列表为空时搜索所有站点
:param rule_groups: 过滤规则组名称列表
:param area: 搜索范围title or imdbid
:param custom_words: 自定义识别词列表
:param filter_params: 过滤参数
准备搜索参数
"""
def __do_filter(torrent_list: List[TorrentInfo]) -> List[TorrentInfo]:
"""
执行优先级过滤
"""
return self.filter_torrents(rule_groups=rule_groups,
torrent_list=torrent_list,
mediainfo=mediainfo) or []
# 豆瓣标题处理
if not mediainfo.tmdb_id:
meta = MetaInfo(title=mediainfo.title)
mediainfo.title = meta.name
mediainfo.season = meta.begin_season
logger.info(f'开始搜索资源,关键词:{keyword or mediainfo.title} ...')
# 补充媒体信息
if not mediainfo.names:
mediainfo: MediaInfo = self.recognize_media(mtype=mediainfo.type,
tmdbid=mediainfo.tmdb_id,
doubanid=mediainfo.douban_id)
if not mediainfo:
logger.error(f'媒体信息识别失败!')
return []
# 缺失的季集
mediakey = mediainfo.tmdb_id or mediainfo.douban_id
if no_exists and no_exists.get(mediakey):
@@ -235,14 +199,27 @@ class SearchChain(ChainBase):
mediainfo.hk_title,
mediainfo.tw_title,
mediainfo.sg_title] if k]))
return season_episodes, keywords
def __parse_result(self, torrents: List[TorrentInfo],
mediainfo: MediaInfo,
keyword: Optional[str] = None,
rule_groups: List[str] = None,
season_episodes: Dict[int, List[int]] = None,
custom_words: List[str] = None,
filter_params: Dict[str, str] = None) -> List[Context]:
"""
处理搜索结果
"""
def __do_filter(torrent_list: List[TorrentInfo]) -> List[TorrentInfo]:
"""
执行优先级过滤
"""
return self.filter_torrents(rule_groups=rule_groups,
torrent_list=torrent_list,
mediainfo=mediainfo) or []
# 执行搜索
torrents: List[TorrentInfo] = self.__search_all_sites(
mediainfo=mediainfo,
keywords=keywords,
sites=sites,
area=area
)
if not torrents:
logger.warn(f'{keyword or mediainfo.title} 未搜索到资源')
return []
@@ -355,6 +332,67 @@ class SearchChain(ChainBase):
# 返回
return contexts
def process(self, mediainfo: MediaInfo,
keyword: Optional[str] = None,
no_exists: Dict[int, Dict[int, NotExistMediaInfo]] = None,
sites: List[int] = None,
rule_groups: List[str] = None,
area: Optional[str] = "title",
custom_words: List[str] = None,
filter_params: Dict[str, str] = None) -> List[Context]:
"""
根据媒体信息搜索种子资源精确匹配应用过滤规则同时根据no_exists过滤本地已存在的资源
:param mediainfo: 媒体信息
:param keyword: 搜索关键词
:param no_exists: 缺失的媒体信息
:param sites: 站点ID列表为空时搜索所有站点
:param rule_groups: 过滤规则组名称列表
:param area: 搜索范围title or imdbid
:param custom_words: 自定义识别词列表
:param filter_params: 过滤参数
"""
# 豆瓣标题处理
if not mediainfo.tmdb_id:
meta = MetaInfo(title=mediainfo.title)
mediainfo.title = meta.name
mediainfo.season = meta.begin_season
logger.info(f'开始搜索资源,关键词:{keyword or mediainfo.title} ...')
# 补充媒体信息
if not mediainfo.names:
mediainfo: MediaInfo = self.recognize_media(mtype=mediainfo.type,
tmdbid=mediainfo.tmdb_id,
doubanid=mediainfo.douban_id)
if not mediainfo:
logger.error(f'媒体信息识别失败!')
return []
# 准备搜索参数
season_episodes, keywords = self.__prepare_params(
mediainfo=mediainfo,
keyword=keyword,
no_exists=no_exists
)
# 执行搜索
torrents: List[TorrentInfo] = self.__search_all_sites(
mediainfo=mediainfo,
keywords=keywords,
sites=sites,
area=area
)
# 处理结果
return self.__parse_result(
torrents=torrents,
mediainfo=mediainfo,
keyword=keyword,
rule_groups=rule_groups,
season_episodes=season_episodes,
custom_words=custom_words,
filter_params=filter_params
)
async def async_process(self, mediainfo: MediaInfo,
keyword: Optional[str] = None,
no_exists: Dict[int, Dict[int, NotExistMediaInfo]] = None,
@@ -375,14 +413,6 @@ class SearchChain(ChainBase):
:param filter_params: 过滤参数
"""
def __do_filter(torrent_list: List[TorrentInfo]) -> List[TorrentInfo]:
"""
执行优先级过滤
"""
return self.filter_torrents(rule_groups=rule_groups,
torrent_list=torrent_list,
mediainfo=mediainfo) or []
# 豆瓣标题处理
if not mediainfo.tmdb_id:
meta = MetaInfo(title=mediainfo.title)
@@ -399,29 +429,12 @@ class SearchChain(ChainBase):
logger.error(f'媒体信息识别失败!')
return []
# 缺失的季集
mediakey = mediainfo.tmdb_id or mediainfo.douban_id
if no_exists and no_exists.get(mediakey):
# 过滤剧集
season_episodes = {sea: info.episodes
for sea, info in no_exists[mediakey].items()}
elif mediainfo.season:
# 豆瓣只搜索当前季
season_episodes = {mediainfo.season: []}
else:
season_episodes = None
# 搜索关键词
if keyword:
keywords = [keyword]
else:
# 去重去空,但要保持顺序
keywords = list(dict.fromkeys([k for k in [mediainfo.title,
mediainfo.original_title,
mediainfo.en_title,
mediainfo.hk_title,
mediainfo.tw_title,
mediainfo.sg_title] if k]))
# 准备搜索参数
season_episodes, keywords = self.__prepare_params(
mediainfo=mediainfo,
keyword=keyword,
no_exists=no_exists
)
# 执行搜索
torrents: List[TorrentInfo] = await self.__async_search_all_sites(
@@ -430,117 +443,16 @@ class SearchChain(ChainBase):
sites=sites,
area=area
)
if not torrents:
logger.warn(f'{keyword or mediainfo.title} 未搜索到资源')
return []
# 开始新进度
progress = ProgressHelper()
progress.start(ProgressKey.Search)
# 开始过滤
progress.update(value=0, text=f'开始过滤,总 {len(torrents)} 个资源,请稍候...',
key=ProgressKey.Search)
# 匹配订阅附加参数
if filter_params:
logger.info(f'开始附加参数过滤,附加参数:{filter_params} ...')
torrents = [torrent for torrent in torrents if TorrentHelper().filter_torrent(torrent, filter_params)]
# 开始过滤规则过滤
if rule_groups is None:
# 取搜索过滤规则
rule_groups: List[str] = SystemConfigOper().get(SystemConfigKey.SearchFilterRuleGroups)
if rule_groups:
logger.info(f'开始过滤规则/剧集过滤,使用规则组:{rule_groups} ...')
torrents = __do_filter(torrents)
if not torrents:
logger.warn(f'{keyword or mediainfo.title} 没有符合过滤规则的资源')
return []
logger.info(f"过滤规则/剧集过滤完成,剩余 {len(torrents)} 个资源")
# 过滤完成
progress.update(value=50, text=f'过滤完成,剩余 {len(torrents)} 个资源', key=ProgressKey.Search)
# 总数
_total = len(torrents)
# 已处理数
_count = 0
# 开始匹配
_match_torrents = []
torrenthelper = TorrentHelper()
try:
# 英文标题应该在别名/原标题中,不需要再匹配
logger.info(f"开始匹配结果 标题:{mediainfo.title},原标题:{mediainfo.original_title},别名:{mediainfo.names}")
progress.update(value=51, text=f'开始匹配,总 {_total} 个资源 ...', key=ProgressKey.Search)
for torrent in torrents:
if global_vars.is_system_stopped:
break
_count += 1
progress.update(value=(_count / _total) * 96,
text=f'正在匹配 {torrent.site_name},已完成 {_count} / {_total} ...',
key=ProgressKey.Search)
if not torrent.title:
continue
# 识别元数据
torrent_meta = MetaInfo(title=torrent.title, subtitle=torrent.description,
custom_words=custom_words)
if torrent.title != torrent_meta.org_string:
logger.info(f"种子名称应用识别词后发生改变:{torrent.title} => {torrent_meta.org_string}")
# 季集数过滤
if season_episodes \
and not torrenthelper.match_season_episodes(torrent=torrent,
meta=torrent_meta,
season_episodes=season_episodes):
continue
# 比对IMDBID
if torrent.imdbid \
and mediainfo.imdb_id \
and torrent.imdbid == mediainfo.imdb_id:
logger.info(f'{mediainfo.title} 通过IMDBID匹配到资源{torrent.site_name} - {torrent.title}')
_match_torrents.append((torrent, torrent_meta))
continue
# 比对种子
if torrenthelper.match_torrent(mediainfo=mediainfo,
torrent_meta=torrent_meta,
torrent=torrent):
# 匹配成功
_match_torrents.append((torrent, torrent_meta))
continue
# 匹配完成
logger.info(f"匹配完成,共匹配到 {len(_match_torrents)} 个资源")
progress.update(value=97,
text=f'匹配完成,共匹配到 {len(_match_torrents)} 个资源',
key=ProgressKey.Search)
# 去掉mediainfo中多余的数据
mediainfo.clear()
# 组装上下文
contexts = [Context(torrent_info=t[0],
media_info=mediainfo,
meta_info=t[1]) for t in _match_torrents]
finally:
torrents.clear()
del torrents
_match_torrents.clear()
del _match_torrents
# 排序
progress.update(value=99,
text=f'正在对 {len(contexts)} 个资源进行排序,请稍候...',
key=ProgressKey.Search)
contexts = torrenthelper.sort_torrents(contexts)
# 结束进度
logger.info(f'搜索完成,共 {len(contexts)} 个资源')
progress.update(value=100,
text=f'搜索完成,共 {len(contexts)} 个资源',
key=ProgressKey.Search)
progress.end(ProgressKey.Search)
# 返回
return contexts
# 处理结果
return self.__parse_result(
torrents=torrents,
mediainfo=mediainfo,
keyword=keyword,
rule_groups=rule_groups,
season_episodes=season_episodes,
custom_words=custom_words,
filter_params=filter_params
)
def __search_all_sites(self, keywords: List[str],
mediainfo: Optional[MediaInfo] = None,