feat: add exact subtitle search

This commit is contained in:
jxxghp
2026-06-09 17:04:17 +08:00
parent e3c5a94c52
commit 210aac0937
8 changed files with 903 additions and 7 deletions

View File

@@ -29,6 +29,15 @@ def _parse_site_list(sites: Optional[str]) -> Optional[List[int]]:
return [int(site) for site in sites.split(",") if site] if sites else None
def _parse_media_type(mtype: Optional[str]) -> Optional[MediaType]:
"""
解析媒体类型,兼容前端和 Agent 使用的 movie/tv 取值。
"""
if not mtype:
return None
return MediaType.from_agent(mtype) or MediaType(mtype)
def _sse_event(data: dict) -> str:
"""
转换为SSE事件
@@ -180,7 +189,7 @@ async def search_by_id_stream(
根据TMDBID/豆瓣ID渐进式搜索站点资源返回格式为SSE
"""
media_type = MediaType(mtype) if mtype else None
media_type = _parse_media_type(mtype)
media_season = int(season) if season else None
site_list = _parse_site_list(sites)
media_chain = MediaChain()
@@ -386,10 +395,7 @@ async def search_by_id(
"""
根据TMDBID/豆瓣ID精确搜索站点资源 tmdb:/douban:/bangumi:
"""
if mtype:
media_type = MediaType(mtype)
else:
media_type = None
media_type = _parse_media_type(mtype)
if season:
media_season = int(season)
else:
@@ -643,6 +649,193 @@ async def search_subtitle_by_title(
)
async def _build_subtitle_search_source(
mediaid: str,
mtype: Optional[str] = None,
title: Optional[str] = None,
year: Optional[str] = None,
season: Optional[str] = None,
episode: Optional[str] = None,
sites: Optional[str] = None,
stream: bool = False,
) -> Any:
"""
根据媒体ID构建字幕精确搜索调用兼容多种媒体ID来源。
"""
media_type = _parse_media_type(mtype)
media_season = int(season) if season else None
media_episode = int(episode) if episode else None
site_list = _parse_site_list(sites)
media_chain = MediaChain()
search_chain = SearchChain()
def call_search(**kwargs):
"""
根据调用模式返回普通搜索协程或流式搜索迭代器。
"""
params = {
**kwargs,
"mtype": media_type,
"season": media_season,
"episode": media_episode,
"sites": site_list,
"cache_local": True,
}
if stream:
return search_chain.async_search_subtitles_by_id_stream(**params)
return search_chain.async_search_subtitles_by_id(**params)
if mediaid.startswith("tmdb:"):
tmdbid = int(mediaid.replace("tmdb:", ""))
if settings.RECOGNIZE_SOURCE == "douban":
doubaninfo = await media_chain.async_get_doubaninfo_by_tmdbid(
tmdbid=tmdbid, mtype=media_type
)
if not doubaninfo:
return None, "未识别到豆瓣媒体信息"
return call_search(doubanid=doubaninfo.get("id")), ""
return call_search(tmdbid=tmdbid), ""
if mediaid.startswith("douban:"):
doubanid = mediaid.replace("douban:", "")
if settings.RECOGNIZE_SOURCE == "themoviedb":
tmdbinfo = await media_chain.async_get_tmdbinfo_by_doubanid(
doubanid=doubanid, mtype=media_type
)
if not tmdbinfo:
return None, "未识别到TMDB媒体信息"
if tmdbinfo.get("season") and not media_season:
media_season = tmdbinfo.get("season")
return call_search(tmdbid=tmdbinfo.get("id")), ""
return call_search(doubanid=doubanid), ""
if mediaid.startswith("bangumi:"):
bangumiid = int(mediaid.replace("bangumi:", ""))
if settings.RECOGNIZE_SOURCE == "themoviedb":
tmdbinfo = await media_chain.async_get_tmdbinfo_by_bangumiid(
bangumiid=bangumiid
)
if not tmdbinfo:
return None, "未识别到TMDB媒体信息"
return call_search(tmdbid=tmdbinfo.get("id")), ""
doubaninfo = await media_chain.async_get_doubaninfo_by_bangumiid(
bangumiid=bangumiid
)
if not doubaninfo:
return None, "未识别到豆瓣媒体信息"
return call_search(doubanid=doubaninfo.get("id")), ""
event_data = MediaRecognizeConvertEventData(
mediaid=mediaid, convert_type=settings.RECOGNIZE_SOURCE
)
event = await eventmanager.async_send_event(
ChainEventType.MediaRecognizeConvert, event_data
)
if event and event.event_data and event.event_data.media_dict:
event_data = event.event_data
search_id = event_data.media_dict.get("id")
if event_data.convert_type == "themoviedb":
return call_search(tmdbid=search_id), ""
if event_data.convert_type == "douban":
return call_search(doubanid=search_id), ""
if not title:
return None, "未知的媒体ID"
meta = MetaInfo(title)
if year:
meta.year = year
if media_type:
meta.type = media_type
if media_season:
meta.type = MediaType.TV
meta.begin_season = media_season
mediainfo = await media_chain.async_recognize_by_meta(
meta,
obtain_images=False,
)
if not mediainfo:
return None, "未识别到媒体信息"
if settings.RECOGNIZE_SOURCE == "themoviedb":
return call_search(tmdbid=mediainfo.tmdb_id), ""
return call_search(doubanid=mediainfo.douban_id), ""
@router.get("/subtitle/media/{mediaid}/stream", summary="渐进式精确搜索字幕")
async def search_subtitle_by_id_stream(
request: Request,
mediaid: str,
mtype: Optional[str] = None,
title: Optional[str] = None,
year: Optional[str] = None,
season: Optional[str] = None,
episode: Optional[str] = None,
sites: Optional[str] = None,
_: schemas.TokenPayload = Depends(verify_resource_token),
) -> Any:
"""
根据TMDBID/豆瓣ID渐进式精确搜索站点字幕资源返回格式为SSE。
"""
subtitles, message = await _build_subtitle_search_source(
mediaid=mediaid,
mtype=mtype,
title=title,
year=year,
season=season,
episode=episode,
sites=sites,
stream=True,
)
async def event_source():
"""
输出字幕精确搜索流事件。
"""
if not subtitles:
yield {"type": "error", "success": False, "message": message or "未搜索到任何字幕"}
return
async for event in subtitles:
yield event
return StreamingResponse(
_stream_search_events(request, event_source()), media_type="text/event-stream"
)
@router.get("/subtitle/media/{mediaid}", summary="精确搜索字幕", response_model=schemas.Response)
async def search_subtitle_by_id(
mediaid: str,
mtype: Optional[str] = None,
title: Optional[str] = None,
year: Optional[str] = None,
season: Optional[str] = None,
episode: Optional[str] = None,
sites: Optional[str] = None,
_: schemas.TokenPayload = Depends(verify_token),
) -> Any:
"""
根据TMDBID/豆瓣ID精确搜索站点字幕资源。
"""
subtitles, message = await _build_subtitle_search_source(
mediaid=mediaid,
mtype=mtype,
title=title,
year=year,
season=season,
episode=episode,
sites=sites,
)
if not subtitles:
return schemas.Response(success=False, message=message or "未搜索到任何字幕")
subtitles = await subtitles
if not subtitles:
return schemas.Response(success=False, message="未搜索到任何字幕")
return schemas.Response(
success=True, data=[subtitle.to_dict() for subtitle in subtitles]
)
@router.post("/recommend", summary="AI推荐资源", response_model=schemas.Response)
async def recommend_search_results(
filtered_indices: Optional[List[int]] = Body(

View File

@@ -204,6 +204,7 @@ class SearchChain(ChainBase):
"title": str(params.get("title") or ""),
"year": str(params.get("year") or ""),
"season": str(params.get("season") or ""),
"episode": str(params.get("episode") or ""),
"sites": str(params.get("sites") or ""),
"result_type": str(params.get("result_type") or "torrent"),
}
@@ -218,6 +219,7 @@ class SearchChain(ChainBase):
title: Optional[str] = None,
year: Optional[str] = None,
season: Optional[int] = None,
episode: Optional[int] = None,
sites: Optional[List[int]] = None,
result_type: Optional[str] = "torrent",
) -> None:
@@ -232,6 +234,7 @@ class SearchChain(ChainBase):
"title": title,
"year": year,
"season": season,
"episode": episode,
"sites": self._stringify_sites(sites),
"result_type": result_type or "torrent",
}
@@ -248,6 +251,7 @@ class SearchChain(ChainBase):
title: Optional[str] = None,
year: Optional[str] = None,
season: Optional[int] = None,
episode: Optional[int] = None,
sites: Optional[List[int]] = None,
result_type: Optional[str] = "torrent",
) -> None:
@@ -262,6 +266,7 @@ class SearchChain(ChainBase):
"title": title,
"year": year,
"season": season,
"episode": episode,
"sites": self._stringify_sites(sites),
"result_type": result_type or "torrent",
}
@@ -650,6 +655,101 @@ class SearchChain(ChainBase):
"total_items": len(subtitles)
}
async def async_search_subtitles_by_id(self, tmdbid: Optional[int] = None, doubanid: Optional[str] = None,
mtype: MediaType = None, season: Optional[int] = None,
episode: Optional[int] = None, sites: List[int] = None,
cache_local: bool = False) -> List[SubtitleInfo]:
"""
根据TMDBID/豆瓣ID异步精确搜索字幕不应用过滤规则。
:param tmdbid: TMDB ID
:param doubanid: 豆瓣 ID
:param mtype: 媒体,电影 or 电视剧
:param season: 季数
:param episode: 集数
:param sites: 站点ID列表
:param cache_local: 是否缓存到本地
"""
if cache_local:
self.cancel_ai_recommend()
await self.async_save_last_search_params(
keyword=self._build_search_keyword(tmdbid=tmdbid, doubanid=doubanid),
mtype=mtype,
area="title",
season=season,
episode=episode,
sites=sites,
result_type="subtitle",
)
mediainfo = await self.async_recognize_media(tmdbid=tmdbid, doubanid=doubanid, mtype=mtype)
if not mediainfo:
logger.error(f'{tmdbid} 媒体信息识别失败!')
return []
subtitles = await self.__async_search_subtitles_for_media(
mediainfo=mediainfo,
tmdbid=tmdbid,
doubanid=doubanid,
season=season,
episode=episode,
sites=sites,
)
if cache_local:
await self.async_save_cache(subtitles, self.__subtitle_result_temp_file)
return subtitles
async def async_search_subtitles_by_id_stream(
self,
tmdbid: Optional[int] = None,
doubanid: Optional[str] = None,
mtype: MediaType = None,
season: Optional[int] = None,
episode: Optional[int] = None,
sites: List[int] = None,
cache_local: bool = False,
) -> AsyncIterator[dict]:
"""
根据TMDBID/豆瓣ID渐进式精确搜索字幕先返回站点候选再返回标题和剧集匹配后的结果。
"""
if cache_local:
self.cancel_ai_recommend()
await self.async_save_last_search_params(
keyword=self._build_search_keyword(tmdbid=tmdbid, doubanid=doubanid),
mtype=mtype,
area="title",
season=season,
episode=episode,
sites=sites,
result_type="subtitle",
)
mediainfo = await self.async_recognize_media(tmdbid=tmdbid, doubanid=doubanid, mtype=mtype)
if not mediainfo:
logger.error(f'{tmdbid} 媒体信息识别失败!')
yield {
"type": "error",
"success": False,
"message": "媒体信息识别失败"
}
return
subtitles: List[SubtitleInfo] = []
async for event in self.__async_search_subtitles_for_media_stream(
mediainfo=mediainfo,
tmdbid=tmdbid,
doubanid=doubanid,
season=season,
episode=episode,
sites=sites):
if event.get("type") == "done":
subtitles = event.get("subtitles") or []
event = {
key: value
for key, value in event.items()
if key != "subtitles"
}
yield event
if cache_local:
await self.async_save_cache(subtitles, self.__subtitle_result_temp_file)
async def async_search_by_id(self, tmdbid: Optional[int] = None, doubanid: Optional[str] = None,
mtype: MediaType = None, area: Optional[str] = "title", season: Optional[int] = None,
sites: List[int] = None, cache_local: bool = False) -> List[Context]:
@@ -1341,6 +1441,321 @@ class SearchChain(ChainBase):
"contexts": contexts
}
@staticmethod
def __build_subtitle_season_episodes(mediainfo: MediaInfo,
season: Optional[int] = None,
episode: Optional[int] = None) -> Optional[Dict[int, List[int]]]:
"""
构造字幕匹配用季集约束,未指定集数时只约束到同一季。
"""
if mediainfo.type != MediaType.TV:
return None
media_season = season if season is not None else mediainfo.season
if media_season is None:
return None
return {media_season: [episode] if episode is not None else []}
@staticmethod
def __build_subtitle_torrent(subtitle: SubtitleInfo, title: Optional[str] = None) -> TorrentInfo:
"""
将字幕结果转换为轻量资源对象,复用既有标题匹配逻辑。
"""
return TorrentInfo(
site=subtitle.site,
site_name=subtitle.site_name,
site_cookie=subtitle.site_cookie,
site_ua=subtitle.site_ua,
site_proxy=subtitle.site_proxy,
site_order=subtitle.site_order,
title=title or subtitle.title or subtitle.file_name,
description=subtitle.description,
enclosure=subtitle.enclosure,
page_url=subtitle.page_url,
size=subtitle.size,
grabs=subtitle.grabs,
pubdate=subtitle.pubdate,
date_elapsed=subtitle.date_elapsed,
)
@staticmethod
def __build_subtitle_names(subtitle: SubtitleInfo) -> List[str]:
"""
提取字幕标题和下载文件名,作为精确匹配的名称候选。
"""
return list(dict.fromkeys(
name.strip()
for name in (subtitle.title, subtitle.file_name)
if name and name.strip()
))
@staticmethod
def __build_subtitle_meta(title: str,
subtitle: SubtitleInfo,
custom_words: Optional[List[str]] = None) -> MetaInfo:
"""
识别字幕名称。
"""
return MetaInfo(
title=title,
subtitle=subtitle.description,
custom_words=custom_words,
)
@staticmethod
def __match_subtitle_episode(meta: MetaInfo,
season_episodes: Optional[Dict[int, List[int]]],
episode: Optional[int] = None) -> bool:
"""
判断字幕识别出的季集是否落在目标媒体季集内。
"""
if not season_episodes:
return True
subtitle_torrent = TorrentInfo(title=meta.org_string)
if not TorrentHelper.match_season_episodes(
torrent=subtitle_torrent,
meta=meta,
season_episodes=season_episodes):
return False
if episode is not None:
return bool(meta.episode_list) and episode in meta.episode_list
return True
def __parse_subtitle_result(self,
subtitles: List[SubtitleInfo],
mediainfo: MediaInfo,
keyword: Optional[str] = None,
season_episodes: Optional[Dict[int, List[int]]] = None,
episode: Optional[int] = None,
custom_words: Optional[List[str]] = None) -> List[SubtitleInfo]:
"""
识别并精确匹配字幕搜索结果,不使用任何过滤规则。
"""
if not subtitles:
logger.warn(f'{keyword or mediainfo.title} 未搜索到字幕')
return []
match_subtitles = []
logger.info(f"开始匹配字幕 标题:{mediainfo.title},原标题:{mediainfo.original_title},别名:{mediainfo.names}")
for subtitle in subtitles:
if global_vars.is_system_stopped:
break
subtitle_names = self.__build_subtitle_names(subtitle)
if not subtitle_names:
continue
for subtitle_name in subtitle_names:
subtitle_meta = self.__build_subtitle_meta(
title=subtitle_name,
subtitle=subtitle,
custom_words=custom_words,
)
if not self.__match_subtitle_episode(
meta=subtitle_meta,
season_episodes=season_episodes,
episode=episode):
continue
subtitle_torrent = self.__build_subtitle_torrent(
subtitle=subtitle,
title=subtitle_name,
)
if TorrentHelper.match_torrent(
mediainfo=mediainfo,
torrent_meta=subtitle_meta,
torrent=subtitle_torrent):
match_subtitles.append(subtitle)
break
logger.info(f"字幕匹配完成,共匹配到 {len(match_subtitles)} 个字幕")
return self.__remove_duplicate_subtitles(match_subtitles)
@staticmethod
def __remove_duplicate_subtitles(subtitles: List[SubtitleInfo]) -> List[SubtitleInfo]:
"""
去除重复的字幕结果。
"""
return list({
f"{subtitle.site_name}_{subtitle.torrent_id}_{subtitle.subtitle_id}_{subtitle.title}_{subtitle.enclosure}": subtitle
for subtitle in subtitles
}.values())
async def __async_search_subtitles_for_media(self,
mediainfo: MediaInfo,
tmdbid: Optional[int] = None,
doubanid: Optional[str] = None,
season: Optional[int] = None,
episode: Optional[int] = None,
sites: List[int] = None,
custom_words: List[str] = None) -> List[SubtitleInfo]:
"""
根据媒体信息搜索并精确匹配字幕结果。
"""
if not mediainfo.tmdb_id:
meta = MetaInfo(title=mediainfo.title)
mediainfo.title = meta.name
mediainfo.season = meta.begin_season
logger.info(f'开始精确搜索字幕,关键词:{mediainfo.title} ...')
if not mediainfo.names:
mediainfo = await self.async_recognize_media(mtype=mediainfo.type,
tmdbid=mediainfo.tmdb_id,
doubanid=mediainfo.douban_id)
if not mediainfo:
logger.error('媒体信息识别失败!')
return []
no_exists = None
if season is not None:
no_exists = {
tmdbid or doubanid: {
season: NotExistMediaInfo(episodes=[episode] if episode is not None else [])
}
}
season_episodes, keywords = self.__prepare_params(
mediainfo=mediainfo,
no_exists=no_exists,
)
season_episodes = self.__build_subtitle_season_episodes(
mediainfo=mediainfo,
season=season,
episode=episode,
) or season_episodes
subtitles: List[SubtitleInfo] = []
search_count = 0
for search_word in keywords:
if search_count > 0:
logger.info(f"已搜索 {search_count} 次,强制休眠 1-10 秒 ...")
await asyncio.sleep(random.randint(1, 10))
subtitles.extend(
await self.__async_search_subtitles_all_sites(
keyword=search_word,
sites=sites,
) or []
)
search_count += 1
if not settings.SEARCH_MULTIPLE_NAME and subtitles:
logger.info(f"共搜索到 {len(subtitles)} 个字幕,停止搜索")
break
return await run_in_threadpool(
self.__parse_subtitle_result,
subtitles=subtitles,
mediainfo=mediainfo,
keyword=mediainfo.title,
season_episodes=season_episodes,
episode=episode,
custom_words=custom_words,
)
async def __async_search_subtitles_for_media_stream(
self,
mediainfo: MediaInfo,
tmdbid: Optional[int] = None,
doubanid: Optional[str] = None,
season: Optional[int] = None,
episode: Optional[int] = None,
sites: List[int] = None,
custom_words: List[str] = None,
) -> AsyncIterator[Dict[str, Any]]:
"""
根据媒体信息渐进式搜索并精确匹配字幕结果。
"""
if not mediainfo.tmdb_id:
meta = MetaInfo(title=mediainfo.title)
mediainfo.title = meta.name
mediainfo.season = meta.begin_season
logger.info(f'开始渐进式精确搜索字幕,关键词:{mediainfo.title} ...')
if not mediainfo.names:
mediainfo = await self.async_recognize_media(mtype=mediainfo.type,
tmdbid=mediainfo.tmdb_id,
doubanid=mediainfo.douban_id)
if not mediainfo:
logger.error('媒体信息识别失败!')
yield {
"type": "error",
"success": False,
"message": "媒体信息识别失败"
}
return
no_exists = None
if season is not None:
no_exists = {
tmdbid or doubanid: {
season: NotExistMediaInfo(episodes=[episode] if episode is not None else [])
}
}
season_episodes, keywords = self.__prepare_params(
mediainfo=mediainfo,
no_exists=no_exists,
)
season_episodes = self.__build_subtitle_season_episodes(
mediainfo=mediainfo,
season=season,
episode=episode,
) or season_episodes
subtitles: List[SubtitleInfo] = []
search_count = 0
for search_word in keywords:
if search_count > 0:
logger.info(f"已搜索 {search_count} 次,强制休眠 1-10 秒 ...")
await asyncio.sleep(random.randint(1, 10))
async for event in self.__async_search_subtitles_all_sites_stream(
keyword=search_word,
sites=sites):
result = event.pop("items", []) or []
subtitles.extend(result)
yield {
**event,
"type": "append",
"stage": "searching",
"items": [subtitle.to_dict() for subtitle in result],
"total_items": len(subtitles)
}
search_count += 1
if not settings.SEARCH_MULTIPLE_NAME and subtitles:
logger.info(f"共搜索到 {len(subtitles)} 个字幕,停止搜索")
break
yield {
"type": "progress",
"stage": "filtering",
"value": 98,
"text": f"正在识别匹配 {len(subtitles)} 个候选字幕 ..."
}
match_subtitles = await run_in_threadpool(
self.__parse_subtitle_result,
subtitles=subtitles,
mediainfo=mediainfo,
keyword=mediainfo.title,
season_episodes=season_episodes,
episode=episode,
custom_words=custom_words,
)
final_items = [subtitle.to_dict() for subtitle in match_subtitles]
yield {
"type": "replace",
"stage": "filtered",
"value": 100,
"text": f"识别匹配完成,共 {len(match_subtitles)} 个字幕",
"items": final_items,
"total_items": len(match_subtitles)
}
yield {
"type": "done",
"stage": "done",
"text": f"搜索完成,共 {len(match_subtitles)} 个字幕",
"items": final_items,
"total_items": len(match_subtitles),
"subtitles": match_subtitles
}
def __search_all_sites(self, keyword: str,
mediainfo: Optional[MediaInfo] = None,
sites: List[int] = None,

View File

@@ -866,7 +866,17 @@ class SiteSpider:
self.is_error = True
return []
if self.search_type != "subtitles":
if self.search_type == "subtitles":
rust_subtitles = rust_accel.parse_indexer_subtitles(
html_text=html_text,
domain=self.domain,
list_config=self.list,
fields=self.fields,
result_num=self.result_num
)
if rust_subtitles is not None:
return rust_subtitles
else:
rust_torrents = rust_accel.parse_indexer_torrents(
html_text=html_text,
domain=self.domain,

View File

@@ -118,6 +118,32 @@ def parse_indexer_torrents(
return None
def parse_indexer_subtitles(
html_text: str,
domain: str,
list_config: dict,
fields: dict,
result_num: int = 100
) -> Optional[List[dict]]:
"""
使用 Rust 批量解析普通配置站点字幕列表,不可用时返回 None。
"""
if not is_enabled():
return None
try:
return _moviepilot_rust.parse_indexer_subtitles_fast(
html_text,
domain,
list_config,
fields,
result_num
)
except BaseException as err:
_raise_non_rust_panic(err)
logger.debug(f"Rust 字幕列表解析失败,使用 Python 解析兜底:{err}")
return None
def parse_rss_items(xml_text: str, max_items: int = 1000) -> Optional[List[dict]]:
"""
使用 Rust 解析 RSS/Atom 条目,不可用或异常时返回 None。

View File

@@ -88,6 +88,8 @@ MoviePilot 也提供普通 REST API 给前端和自动化客户端使用。所
| GET | `/api/v1/search/title/stream` | 按关键字渐进式搜索站点种子资源,返回 SSE参数`keyword``page``sites` |
| GET | `/api/v1/search/subtitle/title` | 按关键字搜索站点字幕资源,参数:`keyword``page``sites` |
| GET | `/api/v1/search/subtitle/title/stream` | 按关键字渐进式搜索站点字幕资源,返回 SSE参数`keyword``page``sites` |
| GET | `/api/v1/search/subtitle/media/{mediaid}` | 按媒体 ID 精确搜索站点字幕资源,`mediaid` 支持 `tmdb:123``douban:123``bangumi:123`,参数:`mtype``title``year``season``episode``sites` |
| GET | `/api/v1/search/subtitle/media/{mediaid}/stream` | 按媒体 ID 渐进式精确搜索站点字幕资源,返回 SSE参数同上 |
| GET | `/api/v1/search/last` | 获取上一次种子搜索结果 |
| GET | `/api/v1/search/last/context` | 获取上一次搜索结果及可复用搜索参数,`params.result_type``torrent``subtitle` |
| POST | `/api/v1/search/recommend` | 获取 AI 推荐资源,请求体:`filtered_indices``check_only``force` |

View File

@@ -107,7 +107,7 @@ All endpoints are under the base URL `{MP_HOST}`. Path parameters are shown as `
| GET | `/api/v1/bangumi/person/{person_id}` | Person detail |
| GET | `/api/v1/bangumi/person/credits/{person_id}` | Person filmography. Params: `page`, `count` |
### Search / Torrents / Subtitles (9 endpoints)
### Search / Torrents / Subtitles (11 endpoints)
| Method | Path | Description |
|--------|------|-------------|
@@ -117,6 +117,8 @@ All endpoints are under the base URL `{MP_HOST}`. Path parameters are shown as `
| GET | `/api/v1/search/title/stream` | Stream fuzzy torrent search with SSE. Params: `keyword`, `page`, `sites` |
| GET | `/api/v1/search/subtitle/title` | Fuzzy search site subtitles by keyword. Params: `keyword`, `page`, `sites` |
| GET | `/api/v1/search/subtitle/title/stream` | Stream fuzzy site subtitle search with SSE. Params: `keyword`, `page`, `sites` |
| GET | `/api/v1/search/subtitle/media/{mediaid}` | Exact subtitle search by media ID (format: `tmdb:123` / `douban:123` / `bangumi:123`). Params: `mtype`, `title`, `year`, `season`, `episode`, `sites` |
| GET | `/api/v1/search/subtitle/media/{mediaid}/stream` | Stream exact subtitle search by media ID with SSE. Params: `mtype`, `title`, `year`, `season`, `episode`, `sites` |
| GET | `/api/v1/search/last` | Get latest search results |
| GET | `/api/v1/search/last/context` | Get latest search results with replayable params. `params.result_type` is `torrent` or `subtitle` |
| POST | `/api/v1/search/recommend` | AI recommended resources. Body: `filtered_indices`, `check_only`, `force` |

View File

@@ -1,3 +1,5 @@
import os
import time
from datetime import datetime, timezone
from types import SimpleNamespace
@@ -389,6 +391,136 @@ def test_rust_indexer_parser_handles_jinja_pyquery_filters_and_links():
}]
def test_rust_indexer_subtitle_parser_dispatches_to_extension(monkeypatch):
"""
Rust 字幕解析入口应将站点配置透传给扩展函数。
"""
calls = []
expected = [{"title": "Green Snake"}]
def fake_parse_indexer_subtitles_fast(html_text, domain, list_config, fields, result_num):
"""
记录字幕解析扩展入口调用参数。
"""
calls.append((html_text, domain, list_config, fields, result_num))
return expected
fake_extension = SimpleNamespace(
is_available=lambda: True,
parse_indexer_subtitles_fast=fake_parse_indexer_subtitles_fast,
)
monkeypatch.setattr(rust_accel, "_moviepilot_rust", fake_extension)
fields = {
"language_icon": {"selector": "div:nth-child(1) img", "attribute": "src"},
"title": {"selector": 'div:nth-child(2) a[href*="downloadsubs.php"]'},
}
list_config = {"selector": "#subtitles-table > div"}
result = rust_accel.parse_indexer_subtitles(
html_text="<div></div>",
domain="https://hhanclub.net/",
list_config=list_config,
fields=fields,
result_num=100,
)
assert result == expected
assert calls == [("<div></div>", "https://hhanclub.net/", list_config, fields, 100)]
@pytest.mark.skipif(
os.environ.get("MP_RUST_PERF_TEST") != "1",
reason="性能测试仅在显式开启 MP_RUST_PERF_TEST=1 时运行",
)
def test_rust_subtitle_parser_is_several_times_faster_than_python(monkeypatch):
"""
Rust 字幕解析在生产 SiteSpider 路径下应显著快于 Python 兜底解析。
"""
if not hasattr(rust_accel._moviepilot_rust, "parse_indexer_subtitles_fast"):
pytest.skip("当前 Rust 扩展未包含字幕解析入口")
def subtitle_row(index: int) -> str:
"""
构造憨憨新版字幕卡片行,放大样本以稳定性能对比。
"""
return f"""
<div class="grid grid-cols-[10%_60%_10%_10%_10%]">
<div><img src="pic/flag/china.gif"></div>
<div>
<a href="downloadsubs.php?torrentid={index}&amp;subid={index + 1000}">
Example Show S01E03 1080p WEB-DL CHS {index}
</a>
<a href="https://hhanclub.net/userdetails.php?id={index}"><b>tester{index}</b></a>
</div>
<div><div>111.99&nbsp;KB</div></div>
<div><span title="2026-04-21 20:54:37">1月18天</span></div>
<div><a href="report.php?subtitle={index + 1000}">举报</a></div>
</div>
"""
html = f'<div id="subtitles-table">{"".join(subtitle_row(index) for index in range(600))}</div>'
indexer = {
"id": "hhanclub",
"name": "憨憨",
"domain": "https://hhanclub.net/",
"public": False,
"subtitles": {
"list": {"selector": "#subtitles-table > div"},
"fields": {
"language_icon": {"selector": "div:nth-child(1) img", "attribute": "src"},
"title": {"selector": 'div:nth-child(2) a[href*="downloadsubs.php"]'},
"download": {
"selector": 'div:nth-child(2) a[href*="downloadsubs.php"]',
"attribute": "href",
},
"size": {"selector": "div:nth-child(3)"},
"date_added": {"selector": "div:nth-child(4) span", "attribute": "title"},
"date_elapsed": {"selector": "div:nth-child(4) span"},
"grabs": {"defualt_value": 0},
"uploader": {"selector": 'div:nth-child(2) a[href*="userdetails.php"]'},
"report": {"selector": 'div:nth-child(5) a[href*="report.php"]', "attribute": "href"},
},
"result_num": 600,
},
}
def best_time(parse_func):
"""
多次运行取最短时间,降低偶发调度抖动对倍数判断的影响。
"""
elapsed_times = []
result = None
for _ in range(5):
start = time.perf_counter()
result = parse_func()
elapsed_times.append(time.perf_counter() - start)
return min(elapsed_times), result
def parse_with_python():
"""
强制禁用 Rust 字幕解析,测量 Python 兜底解析路径。
"""
with monkeypatch.context() as patch_context:
patch_context.setattr(rust_accel, "parse_indexer_subtitles", lambda **_kwargs: None)
return SiteSpider(indexer, keyword="Example Show", search_type="subtitles").parse(html)
def parse_with_rust():
"""
使用生产配置中的 Rust 字幕解析路径。
"""
return SiteSpider(indexer, keyword="Example Show", search_type="subtitles").parse(html)
monkeypatch.setattr(settings, "RUST_ACCEL", True)
python_time, python_result = best_time(parse_with_python)
rust_time, rust_result = best_time(parse_with_rust)
assert len(rust_result) == len(python_result) == 600
assert rust_result[0] == python_result[0]
assert rust_time * 3 <= python_time, (
f"Rust 字幕解析未达到 3 倍性能要求python={python_time:.6f}s, rust={rust_time:.6f}s"
)
def test_rust_indexer_parser_handles_default_values_and_template_arithmetic():
"""
Rust indexer 解析应支持 defualt_value、Jinja int filter 和模板算术表达式。

View File

@@ -0,0 +1,116 @@
import pytest
from app.api.endpoints.search import _parse_media_type
from app.chain.search import SearchChain
from app.core.context import MediaInfo, SubtitleInfo
from app.schemas.types import MediaType
def test_search_media_type_parser_accepts_agent_values():
"""
搜索入口应兼容前端使用的 movie/tv 媒体类型值。
"""
assert _parse_media_type("movie") == MediaType.MOVIE
assert _parse_media_type("tv") == MediaType.TV
assert _parse_media_type("电影") == MediaType.MOVIE
assert _parse_media_type("电视剧") == MediaType.TV
def test_exact_subtitle_match_keeps_same_tv_episode(monkeypatch):
"""
精确字幕搜索应识别字幕名称,并只保留同一剧集的字幕结果。
"""
chain = object.__new__(SearchChain)
def fail_filter(*_args, **_kwargs):
"""
字幕精确搜索不能调用资源过滤规则。
"""
pytest.fail("字幕精确搜索不应调用过滤规则")
monkeypatch.setattr(chain, "filter_torrents", fail_filter)
mediainfo = MediaInfo(
type=MediaType.TV,
title="Example Show",
original_title="Example Show",
en_title="Example Show",
year="2024",
season=1,
names=["Example Show"],
season_years={1: "2024"},
)
subtitles = [
SubtitleInfo(site_name="SiteA", title="Example Show S01E03 1080p WEB-DL CHS", subtitle_id="1"),
SubtitleInfo(site_name="SiteA", title="Example Show S01E04 1080p WEB-DL CHS", subtitle_id="2"),
SubtitleInfo(site_name="SiteA", title="Example Show S02E03 1080p WEB-DL CHS", subtitle_id="3"),
SubtitleInfo(site_name="SiteA", title="Other Show S01E03 1080p WEB-DL CHS", subtitle_id="4"),
]
result = chain._SearchChain__parse_subtitle_result(
subtitles=subtitles,
mediainfo=mediainfo,
season_episodes={1: [3]},
episode=3,
)
assert [item.subtitle_id for item in result] == ["1"]
def test_exact_subtitle_match_uses_file_name_candidate():
"""
精确字幕搜索应同时识别字幕标题和下载文件名。
"""
chain = object.__new__(SearchChain)
mediainfo = MediaInfo(
type=MediaType.TV,
title="Example Show",
original_title="Example Show",
en_title="Example Show",
year="2024",
season=1,
names=["Example Show"],
season_years={1: "2024"},
)
subtitles = [
SubtitleInfo(
site_name="SiteA",
title="Example Show subtitle package",
file_name="Example.Show.S01E03.1080p.WEB-DL.CHS.srt",
subtitle_id="1",
),
SubtitleInfo(
site_name="SiteA",
title="Example Show subtitle package",
file_name="Example.Show.S01E04.1080p.WEB-DL.CHS.srt",
subtitle_id="2",
),
]
result = chain._SearchChain__parse_subtitle_result(
subtitles=subtitles,
mediainfo=mediainfo,
season_episodes={1: [3]},
episode=3,
)
assert [item.subtitle_id for item in result] == ["1"]
def test_subtitle_search_params_keep_episode():
"""
精确字幕搜索缓存参数时应保留集数,便于前端刷新后继续按同一集搜索。
"""
params = SearchChain._normalize_search_params(
{
"keyword": "tmdb:123",
"type": MediaType.TV,
"season": 1,
"episode": 3,
"sites": "1,2",
"result_type": "subtitle",
}
)
assert params["episode"] == "3"
assert params["result_type"] == "subtitle"