diff --git a/app/api/endpoints/search.py b/app/api/endpoints/search.py index 72830ba2..43ea280a 100644 --- a/app/api/endpoints/search.py +++ b/app/api/endpoints/search.py @@ -29,6 +29,15 @@ def _parse_site_list(sites: Optional[str]) -> Optional[List[int]]: return [int(site) for site in sites.split(",") if site] if sites else None +def _parse_media_type(mtype: Optional[str]) -> Optional[MediaType]: + """ + 解析媒体类型,兼容前端和 Agent 使用的 movie/tv 取值。 + """ + if not mtype: + return None + return MediaType.from_agent(mtype) or MediaType(mtype) + + def _sse_event(data: dict) -> str: """ 转换为SSE事件 @@ -180,7 +189,7 @@ async def search_by_id_stream( 根据TMDBID/豆瓣ID渐进式搜索站点资源,返回格式为SSE """ - media_type = MediaType(mtype) if mtype else None + media_type = _parse_media_type(mtype) media_season = int(season) if season else None site_list = _parse_site_list(sites) media_chain = MediaChain() @@ -386,10 +395,7 @@ async def search_by_id( """ 根据TMDBID/豆瓣ID精确搜索站点资源 tmdb:/douban:/bangumi: """ - if mtype: - media_type = MediaType(mtype) - else: - media_type = None + media_type = _parse_media_type(mtype) if season: media_season = int(season) else: @@ -643,6 +649,193 @@ async def search_subtitle_by_title( ) +async def _build_subtitle_search_source( + mediaid: str, + mtype: Optional[str] = None, + title: Optional[str] = None, + year: Optional[str] = None, + season: Optional[str] = None, + episode: Optional[str] = None, + sites: Optional[str] = None, + stream: bool = False, +) -> Any: + """ + 根据媒体ID构建字幕精确搜索调用,兼容多种媒体ID来源。 + """ + media_type = _parse_media_type(mtype) + media_season = int(season) if season else None + media_episode = int(episode) if episode else None + site_list = _parse_site_list(sites) + media_chain = MediaChain() + search_chain = SearchChain() + + def call_search(**kwargs): + """ + 根据调用模式返回普通搜索协程或流式搜索迭代器。 + """ + params = { + **kwargs, + "mtype": media_type, + "season": media_season, + "episode": media_episode, + "sites": site_list, + "cache_local": True, + } + if stream: + return search_chain.async_search_subtitles_by_id_stream(**params) + return search_chain.async_search_subtitles_by_id(**params) + + if mediaid.startswith("tmdb:"): + tmdbid = int(mediaid.replace("tmdb:", "")) + if settings.RECOGNIZE_SOURCE == "douban": + doubaninfo = await media_chain.async_get_doubaninfo_by_tmdbid( + tmdbid=tmdbid, mtype=media_type + ) + if not doubaninfo: + return None, "未识别到豆瓣媒体信息" + return call_search(doubanid=doubaninfo.get("id")), "" + return call_search(tmdbid=tmdbid), "" + + if mediaid.startswith("douban:"): + doubanid = mediaid.replace("douban:", "") + if settings.RECOGNIZE_SOURCE == "themoviedb": + tmdbinfo = await media_chain.async_get_tmdbinfo_by_doubanid( + doubanid=doubanid, mtype=media_type + ) + if not tmdbinfo: + return None, "未识别到TMDB媒体信息" + if tmdbinfo.get("season") and not media_season: + media_season = tmdbinfo.get("season") + return call_search(tmdbid=tmdbinfo.get("id")), "" + return call_search(doubanid=doubanid), "" + + if mediaid.startswith("bangumi:"): + bangumiid = int(mediaid.replace("bangumi:", "")) + if settings.RECOGNIZE_SOURCE == "themoviedb": + tmdbinfo = await media_chain.async_get_tmdbinfo_by_bangumiid( + bangumiid=bangumiid + ) + if not tmdbinfo: + return None, "未识别到TMDB媒体信息" + return call_search(tmdbid=tmdbinfo.get("id")), "" + doubaninfo = await media_chain.async_get_doubaninfo_by_bangumiid( + bangumiid=bangumiid + ) + if not doubaninfo: + return None, "未识别到豆瓣媒体信息" + return call_search(doubanid=doubaninfo.get("id")), "" + + event_data = MediaRecognizeConvertEventData( + mediaid=mediaid, convert_type=settings.RECOGNIZE_SOURCE + ) + event = await eventmanager.async_send_event( + ChainEventType.MediaRecognizeConvert, event_data + ) + if event and event.event_data and event.event_data.media_dict: + event_data = event.event_data + search_id = event_data.media_dict.get("id") + if event_data.convert_type == "themoviedb": + return call_search(tmdbid=search_id), "" + if event_data.convert_type == "douban": + return call_search(doubanid=search_id), "" + + if not title: + return None, "未知的媒体ID" + + meta = MetaInfo(title) + if year: + meta.year = year + if media_type: + meta.type = media_type + if media_season: + meta.type = MediaType.TV + meta.begin_season = media_season + mediainfo = await media_chain.async_recognize_by_meta( + meta, + obtain_images=False, + ) + if not mediainfo: + return None, "未识别到媒体信息" + if settings.RECOGNIZE_SOURCE == "themoviedb": + return call_search(tmdbid=mediainfo.tmdb_id), "" + return call_search(doubanid=mediainfo.douban_id), "" + + +@router.get("/subtitle/media/{mediaid}/stream", summary="渐进式精确搜索字幕") +async def search_subtitle_by_id_stream( + request: Request, + mediaid: str, + mtype: Optional[str] = None, + title: Optional[str] = None, + year: Optional[str] = None, + season: Optional[str] = None, + episode: Optional[str] = None, + sites: Optional[str] = None, + _: schemas.TokenPayload = Depends(verify_resource_token), +) -> Any: + """ + 根据TMDBID/豆瓣ID渐进式精确搜索站点字幕资源,返回格式为SSE。 + """ + subtitles, message = await _build_subtitle_search_source( + mediaid=mediaid, + mtype=mtype, + title=title, + year=year, + season=season, + episode=episode, + sites=sites, + stream=True, + ) + + async def event_source(): + """ + 输出字幕精确搜索流事件。 + """ + if not subtitles: + yield {"type": "error", "success": False, "message": message or "未搜索到任何字幕"} + return + async for event in subtitles: + yield event + + return StreamingResponse( + _stream_search_events(request, event_source()), media_type="text/event-stream" + ) + + +@router.get("/subtitle/media/{mediaid}", summary="精确搜索字幕", response_model=schemas.Response) +async def search_subtitle_by_id( + mediaid: str, + mtype: Optional[str] = None, + title: Optional[str] = None, + year: Optional[str] = None, + season: Optional[str] = None, + episode: Optional[str] = None, + sites: Optional[str] = None, + _: schemas.TokenPayload = Depends(verify_token), +) -> Any: + """ + 根据TMDBID/豆瓣ID精确搜索站点字幕资源。 + """ + subtitles, message = await _build_subtitle_search_source( + mediaid=mediaid, + mtype=mtype, + title=title, + year=year, + season=season, + episode=episode, + sites=sites, + ) + if not subtitles: + return schemas.Response(success=False, message=message or "未搜索到任何字幕") + + subtitles = await subtitles + if not subtitles: + return schemas.Response(success=False, message="未搜索到任何字幕") + return schemas.Response( + success=True, data=[subtitle.to_dict() for subtitle in subtitles] + ) + + @router.post("/recommend", summary="AI推荐资源", response_model=schemas.Response) async def recommend_search_results( filtered_indices: Optional[List[int]] = Body( diff --git a/app/chain/search.py b/app/chain/search.py index 7d04780b..0d9d1b44 100644 --- a/app/chain/search.py +++ b/app/chain/search.py @@ -204,6 +204,7 @@ class SearchChain(ChainBase): "title": str(params.get("title") or ""), "year": str(params.get("year") or ""), "season": str(params.get("season") or ""), + "episode": str(params.get("episode") or ""), "sites": str(params.get("sites") or ""), "result_type": str(params.get("result_type") or "torrent"), } @@ -218,6 +219,7 @@ class SearchChain(ChainBase): title: Optional[str] = None, year: Optional[str] = None, season: Optional[int] = None, + episode: Optional[int] = None, sites: Optional[List[int]] = None, result_type: Optional[str] = "torrent", ) -> None: @@ -232,6 +234,7 @@ class SearchChain(ChainBase): "title": title, "year": year, "season": season, + "episode": episode, "sites": self._stringify_sites(sites), "result_type": result_type or "torrent", } @@ -248,6 +251,7 @@ class SearchChain(ChainBase): title: Optional[str] = None, year: Optional[str] = None, season: Optional[int] = None, + episode: Optional[int] = None, sites: Optional[List[int]] = None, result_type: Optional[str] = "torrent", ) -> None: @@ -262,6 +266,7 @@ class SearchChain(ChainBase): "title": title, "year": year, "season": season, + "episode": episode, "sites": self._stringify_sites(sites), "result_type": result_type or "torrent", } @@ -650,6 +655,101 @@ class SearchChain(ChainBase): "total_items": len(subtitles) } + async def async_search_subtitles_by_id(self, tmdbid: Optional[int] = None, doubanid: Optional[str] = None, + mtype: MediaType = None, season: Optional[int] = None, + episode: Optional[int] = None, sites: List[int] = None, + cache_local: bool = False) -> List[SubtitleInfo]: + """ + 根据TMDBID/豆瓣ID异步精确搜索字幕,不应用过滤规则。 + :param tmdbid: TMDB ID + :param doubanid: 豆瓣 ID + :param mtype: 媒体,电影 or 电视剧 + :param season: 季数 + :param episode: 集数 + :param sites: 站点ID列表 + :param cache_local: 是否缓存到本地 + """ + if cache_local: + self.cancel_ai_recommend() + await self.async_save_last_search_params( + keyword=self._build_search_keyword(tmdbid=tmdbid, doubanid=doubanid), + mtype=mtype, + area="title", + season=season, + episode=episode, + sites=sites, + result_type="subtitle", + ) + mediainfo = await self.async_recognize_media(tmdbid=tmdbid, doubanid=doubanid, mtype=mtype) + if not mediainfo: + logger.error(f'{tmdbid} 媒体信息识别失败!') + return [] + subtitles = await self.__async_search_subtitles_for_media( + mediainfo=mediainfo, + tmdbid=tmdbid, + doubanid=doubanid, + season=season, + episode=episode, + sites=sites, + ) + if cache_local: + await self.async_save_cache(subtitles, self.__subtitle_result_temp_file) + return subtitles + + async def async_search_subtitles_by_id_stream( + self, + tmdbid: Optional[int] = None, + doubanid: Optional[str] = None, + mtype: MediaType = None, + season: Optional[int] = None, + episode: Optional[int] = None, + sites: List[int] = None, + cache_local: bool = False, + ) -> AsyncIterator[dict]: + """ + 根据TMDBID/豆瓣ID渐进式精确搜索字幕,先返回站点候选,再返回标题和剧集匹配后的结果。 + """ + if cache_local: + self.cancel_ai_recommend() + await self.async_save_last_search_params( + keyword=self._build_search_keyword(tmdbid=tmdbid, doubanid=doubanid), + mtype=mtype, + area="title", + season=season, + episode=episode, + sites=sites, + result_type="subtitle", + ) + mediainfo = await self.async_recognize_media(tmdbid=tmdbid, doubanid=doubanid, mtype=mtype) + if not mediainfo: + logger.error(f'{tmdbid} 媒体信息识别失败!') + yield { + "type": "error", + "success": False, + "message": "媒体信息识别失败" + } + return + + subtitles: List[SubtitleInfo] = [] + async for event in self.__async_search_subtitles_for_media_stream( + mediainfo=mediainfo, + tmdbid=tmdbid, + doubanid=doubanid, + season=season, + episode=episode, + sites=sites): + if event.get("type") == "done": + subtitles = event.get("subtitles") or [] + event = { + key: value + for key, value in event.items() + if key != "subtitles" + } + yield event + + if cache_local: + await self.async_save_cache(subtitles, self.__subtitle_result_temp_file) + async def async_search_by_id(self, tmdbid: Optional[int] = None, doubanid: Optional[str] = None, mtype: MediaType = None, area: Optional[str] = "title", season: Optional[int] = None, sites: List[int] = None, cache_local: bool = False) -> List[Context]: @@ -1341,6 +1441,321 @@ class SearchChain(ChainBase): "contexts": contexts } + @staticmethod + def __build_subtitle_season_episodes(mediainfo: MediaInfo, + season: Optional[int] = None, + episode: Optional[int] = None) -> Optional[Dict[int, List[int]]]: + """ + 构造字幕匹配用季集约束,未指定集数时只约束到同一季。 + """ + if mediainfo.type != MediaType.TV: + return None + media_season = season if season is not None else mediainfo.season + if media_season is None: + return None + return {media_season: [episode] if episode is not None else []} + + @staticmethod + def __build_subtitle_torrent(subtitle: SubtitleInfo, title: Optional[str] = None) -> TorrentInfo: + """ + 将字幕结果转换为轻量资源对象,复用既有标题匹配逻辑。 + """ + return TorrentInfo( + site=subtitle.site, + site_name=subtitle.site_name, + site_cookie=subtitle.site_cookie, + site_ua=subtitle.site_ua, + site_proxy=subtitle.site_proxy, + site_order=subtitle.site_order, + title=title or subtitle.title or subtitle.file_name, + description=subtitle.description, + enclosure=subtitle.enclosure, + page_url=subtitle.page_url, + size=subtitle.size, + grabs=subtitle.grabs, + pubdate=subtitle.pubdate, + date_elapsed=subtitle.date_elapsed, + ) + + @staticmethod + def __build_subtitle_names(subtitle: SubtitleInfo) -> List[str]: + """ + 提取字幕标题和下载文件名,作为精确匹配的名称候选。 + """ + return list(dict.fromkeys( + name.strip() + for name in (subtitle.title, subtitle.file_name) + if name and name.strip() + )) + + @staticmethod + def __build_subtitle_meta(title: str, + subtitle: SubtitleInfo, + custom_words: Optional[List[str]] = None) -> MetaInfo: + """ + 识别字幕名称。 + """ + return MetaInfo( + title=title, + subtitle=subtitle.description, + custom_words=custom_words, + ) + + @staticmethod + def __match_subtitle_episode(meta: MetaInfo, + season_episodes: Optional[Dict[int, List[int]]], + episode: Optional[int] = None) -> bool: + """ + 判断字幕识别出的季集是否落在目标媒体季集内。 + """ + if not season_episodes: + return True + subtitle_torrent = TorrentInfo(title=meta.org_string) + if not TorrentHelper.match_season_episodes( + torrent=subtitle_torrent, + meta=meta, + season_episodes=season_episodes): + return False + if episode is not None: + return bool(meta.episode_list) and episode in meta.episode_list + return True + + def __parse_subtitle_result(self, + subtitles: List[SubtitleInfo], + mediainfo: MediaInfo, + keyword: Optional[str] = None, + season_episodes: Optional[Dict[int, List[int]]] = None, + episode: Optional[int] = None, + custom_words: Optional[List[str]] = None) -> List[SubtitleInfo]: + """ + 识别并精确匹配字幕搜索结果,不使用任何过滤规则。 + """ + if not subtitles: + logger.warn(f'{keyword or mediainfo.title} 未搜索到字幕') + return [] + + match_subtitles = [] + logger.info(f"开始匹配字幕 标题:{mediainfo.title},原标题:{mediainfo.original_title},别名:{mediainfo.names}") + for subtitle in subtitles: + if global_vars.is_system_stopped: + break + subtitle_names = self.__build_subtitle_names(subtitle) + if not subtitle_names: + continue + + for subtitle_name in subtitle_names: + subtitle_meta = self.__build_subtitle_meta( + title=subtitle_name, + subtitle=subtitle, + custom_words=custom_words, + ) + if not self.__match_subtitle_episode( + meta=subtitle_meta, + season_episodes=season_episodes, + episode=episode): + continue + + subtitle_torrent = self.__build_subtitle_torrent( + subtitle=subtitle, + title=subtitle_name, + ) + if TorrentHelper.match_torrent( + mediainfo=mediainfo, + torrent_meta=subtitle_meta, + torrent=subtitle_torrent): + match_subtitles.append(subtitle) + break + + logger.info(f"字幕匹配完成,共匹配到 {len(match_subtitles)} 个字幕") + return self.__remove_duplicate_subtitles(match_subtitles) + + @staticmethod + def __remove_duplicate_subtitles(subtitles: List[SubtitleInfo]) -> List[SubtitleInfo]: + """ + 去除重复的字幕结果。 + """ + return list({ + f"{subtitle.site_name}_{subtitle.torrent_id}_{subtitle.subtitle_id}_{subtitle.title}_{subtitle.enclosure}": subtitle + for subtitle in subtitles + }.values()) + + async def __async_search_subtitles_for_media(self, + mediainfo: MediaInfo, + tmdbid: Optional[int] = None, + doubanid: Optional[str] = None, + season: Optional[int] = None, + episode: Optional[int] = None, + sites: List[int] = None, + custom_words: List[str] = None) -> List[SubtitleInfo]: + """ + 根据媒体信息搜索并精确匹配字幕结果。 + """ + if not mediainfo.tmdb_id: + meta = MetaInfo(title=mediainfo.title) + mediainfo.title = meta.name + mediainfo.season = meta.begin_season + logger.info(f'开始精确搜索字幕,关键词:{mediainfo.title} ...') + + if not mediainfo.names: + mediainfo = await self.async_recognize_media(mtype=mediainfo.type, + tmdbid=mediainfo.tmdb_id, + doubanid=mediainfo.douban_id) + if not mediainfo: + logger.error('媒体信息识别失败!') + return [] + + no_exists = None + if season is not None: + no_exists = { + tmdbid or doubanid: { + season: NotExistMediaInfo(episodes=[episode] if episode is not None else []) + } + } + season_episodes, keywords = self.__prepare_params( + mediainfo=mediainfo, + no_exists=no_exists, + ) + season_episodes = self.__build_subtitle_season_episodes( + mediainfo=mediainfo, + season=season, + episode=episode, + ) or season_episodes + + subtitles: List[SubtitleInfo] = [] + search_count = 0 + for search_word in keywords: + if search_count > 0: + logger.info(f"已搜索 {search_count} 次,强制休眠 1-10 秒 ...") + await asyncio.sleep(random.randint(1, 10)) + subtitles.extend( + await self.__async_search_subtitles_all_sites( + keyword=search_word, + sites=sites, + ) or [] + ) + search_count += 1 + if not settings.SEARCH_MULTIPLE_NAME and subtitles: + logger.info(f"共搜索到 {len(subtitles)} 个字幕,停止搜索") + break + + return await run_in_threadpool( + self.__parse_subtitle_result, + subtitles=subtitles, + mediainfo=mediainfo, + keyword=mediainfo.title, + season_episodes=season_episodes, + episode=episode, + custom_words=custom_words, + ) + + async def __async_search_subtitles_for_media_stream( + self, + mediainfo: MediaInfo, + tmdbid: Optional[int] = None, + doubanid: Optional[str] = None, + season: Optional[int] = None, + episode: Optional[int] = None, + sites: List[int] = None, + custom_words: List[str] = None, + ) -> AsyncIterator[Dict[str, Any]]: + """ + 根据媒体信息渐进式搜索并精确匹配字幕结果。 + """ + if not mediainfo.tmdb_id: + meta = MetaInfo(title=mediainfo.title) + mediainfo.title = meta.name + mediainfo.season = meta.begin_season + logger.info(f'开始渐进式精确搜索字幕,关键词:{mediainfo.title} ...') + + if not mediainfo.names: + mediainfo = await self.async_recognize_media(mtype=mediainfo.type, + tmdbid=mediainfo.tmdb_id, + doubanid=mediainfo.douban_id) + if not mediainfo: + logger.error('媒体信息识别失败!') + yield { + "type": "error", + "success": False, + "message": "媒体信息识别失败" + } + return + + no_exists = None + if season is not None: + no_exists = { + tmdbid or doubanid: { + season: NotExistMediaInfo(episodes=[episode] if episode is not None else []) + } + } + season_episodes, keywords = self.__prepare_params( + mediainfo=mediainfo, + no_exists=no_exists, + ) + season_episodes = self.__build_subtitle_season_episodes( + mediainfo=mediainfo, + season=season, + episode=episode, + ) or season_episodes + + subtitles: List[SubtitleInfo] = [] + search_count = 0 + for search_word in keywords: + if search_count > 0: + logger.info(f"已搜索 {search_count} 次,强制休眠 1-10 秒 ...") + await asyncio.sleep(random.randint(1, 10)) + + async for event in self.__async_search_subtitles_all_sites_stream( + keyword=search_word, + sites=sites): + result = event.pop("items", []) or [] + subtitles.extend(result) + yield { + **event, + "type": "append", + "stage": "searching", + "items": [subtitle.to_dict() for subtitle in result], + "total_items": len(subtitles) + } + + search_count += 1 + if not settings.SEARCH_MULTIPLE_NAME and subtitles: + logger.info(f"共搜索到 {len(subtitles)} 个字幕,停止搜索") + break + + yield { + "type": "progress", + "stage": "filtering", + "value": 98, + "text": f"正在识别匹配 {len(subtitles)} 个候选字幕 ..." + } + + match_subtitles = await run_in_threadpool( + self.__parse_subtitle_result, + subtitles=subtitles, + mediainfo=mediainfo, + keyword=mediainfo.title, + season_episodes=season_episodes, + episode=episode, + custom_words=custom_words, + ) + final_items = [subtitle.to_dict() for subtitle in match_subtitles] + yield { + "type": "replace", + "stage": "filtered", + "value": 100, + "text": f"识别匹配完成,共 {len(match_subtitles)} 个字幕", + "items": final_items, + "total_items": len(match_subtitles) + } + yield { + "type": "done", + "stage": "done", + "text": f"搜索完成,共 {len(match_subtitles)} 个字幕", + "items": final_items, + "total_items": len(match_subtitles), + "subtitles": match_subtitles + } + def __search_all_sites(self, keyword: str, mediainfo: Optional[MediaInfo] = None, sites: List[int] = None, diff --git a/app/modules/indexer/spider/__init__.py b/app/modules/indexer/spider/__init__.py index 64d9f364..09f87fbd 100644 --- a/app/modules/indexer/spider/__init__.py +++ b/app/modules/indexer/spider/__init__.py @@ -866,7 +866,17 @@ class SiteSpider: self.is_error = True return [] - if self.search_type != "subtitles": + if self.search_type == "subtitles": + rust_subtitles = rust_accel.parse_indexer_subtitles( + html_text=html_text, + domain=self.domain, + list_config=self.list, + fields=self.fields, + result_num=self.result_num + ) + if rust_subtitles is not None: + return rust_subtitles + else: rust_torrents = rust_accel.parse_indexer_torrents( html_text=html_text, domain=self.domain, diff --git a/app/utils/rust_accel.py b/app/utils/rust_accel.py index cd43f32d..448fd7dd 100644 --- a/app/utils/rust_accel.py +++ b/app/utils/rust_accel.py @@ -118,6 +118,32 @@ def parse_indexer_torrents( return None +def parse_indexer_subtitles( + html_text: str, + domain: str, + list_config: dict, + fields: dict, + result_num: int = 100 +) -> Optional[List[dict]]: + """ + 使用 Rust 批量解析普通配置站点字幕列表,不可用时返回 None。 + """ + if not is_enabled(): + return None + try: + return _moviepilot_rust.parse_indexer_subtitles_fast( + html_text, + domain, + list_config, + fields, + result_num + ) + except BaseException as err: + _raise_non_rust_panic(err) + logger.debug(f"Rust 字幕列表解析失败,使用 Python 解析兜底:{err}") + return None + + def parse_rss_items(xml_text: str, max_items: int = 1000) -> Optional[List[dict]]: """ 使用 Rust 解析 RSS/Atom 条目,不可用或异常时返回 None。 diff --git a/docs/mcp-api.md b/docs/mcp-api.md index 006429fa..126353ac 100644 --- a/docs/mcp-api.md +++ b/docs/mcp-api.md @@ -88,6 +88,8 @@ MoviePilot 也提供普通 REST API 给前端和自动化客户端使用。所 | GET | `/api/v1/search/title/stream` | 按关键字渐进式搜索站点种子资源,返回 SSE,参数:`keyword`、`page`、`sites` | | GET | `/api/v1/search/subtitle/title` | 按关键字搜索站点字幕资源,参数:`keyword`、`page`、`sites` | | GET | `/api/v1/search/subtitle/title/stream` | 按关键字渐进式搜索站点字幕资源,返回 SSE,参数:`keyword`、`page`、`sites` | +| GET | `/api/v1/search/subtitle/media/{mediaid}` | 按媒体 ID 精确搜索站点字幕资源,`mediaid` 支持 `tmdb:123`、`douban:123`、`bangumi:123`,参数:`mtype`、`title`、`year`、`season`、`episode`、`sites` | +| GET | `/api/v1/search/subtitle/media/{mediaid}/stream` | 按媒体 ID 渐进式精确搜索站点字幕资源,返回 SSE,参数同上 | | GET | `/api/v1/search/last` | 获取上一次种子搜索结果 | | GET | `/api/v1/search/last/context` | 获取上一次搜索结果及可复用搜索参数,`params.result_type` 为 `torrent` 或 `subtitle` | | POST | `/api/v1/search/recommend` | 获取 AI 推荐资源,请求体:`filtered_indices`、`check_only`、`force` | diff --git a/skills/moviepilot-api/SKILL.md b/skills/moviepilot-api/SKILL.md index 5475a1bc..7da30dbc 100644 --- a/skills/moviepilot-api/SKILL.md +++ b/skills/moviepilot-api/SKILL.md @@ -107,7 +107,7 @@ All endpoints are under the base URL `{MP_HOST}`. Path parameters are shown as ` | GET | `/api/v1/bangumi/person/{person_id}` | Person detail | | GET | `/api/v1/bangumi/person/credits/{person_id}` | Person filmography. Params: `page`, `count` | -### Search / Torrents / Subtitles (9 endpoints) +### Search / Torrents / Subtitles (11 endpoints) | Method | Path | Description | |--------|------|-------------| @@ -117,6 +117,8 @@ All endpoints are under the base URL `{MP_HOST}`. Path parameters are shown as ` | GET | `/api/v1/search/title/stream` | Stream fuzzy torrent search with SSE. Params: `keyword`, `page`, `sites` | | GET | `/api/v1/search/subtitle/title` | Fuzzy search site subtitles by keyword. Params: `keyword`, `page`, `sites` | | GET | `/api/v1/search/subtitle/title/stream` | Stream fuzzy site subtitle search with SSE. Params: `keyword`, `page`, `sites` | +| GET | `/api/v1/search/subtitle/media/{mediaid}` | Exact subtitle search by media ID (format: `tmdb:123` / `douban:123` / `bangumi:123`). Params: `mtype`, `title`, `year`, `season`, `episode`, `sites` | +| GET | `/api/v1/search/subtitle/media/{mediaid}/stream` | Stream exact subtitle search by media ID with SSE. Params: `mtype`, `title`, `year`, `season`, `episode`, `sites` | | GET | `/api/v1/search/last` | Get latest search results | | GET | `/api/v1/search/last/context` | Get latest search results with replayable params. `params.result_type` is `torrent` or `subtitle` | | POST | `/api/v1/search/recommend` | AI recommended resources. Body: `filtered_indices`, `check_only`, `force` | diff --git a/tests/test_rust_accel.py b/tests/test_rust_accel.py index 8c5d19fc..4c6774ab 100644 --- a/tests/test_rust_accel.py +++ b/tests/test_rust_accel.py @@ -1,3 +1,5 @@ +import os +import time from datetime import datetime, timezone from types import SimpleNamespace @@ -389,6 +391,136 @@ def test_rust_indexer_parser_handles_jinja_pyquery_filters_and_links(): }] +def test_rust_indexer_subtitle_parser_dispatches_to_extension(monkeypatch): + """ + Rust 字幕解析入口应将站点配置透传给扩展函数。 + """ + calls = [] + expected = [{"title": "Green Snake"}] + + def fake_parse_indexer_subtitles_fast(html_text, domain, list_config, fields, result_num): + """ + 记录字幕解析扩展入口调用参数。 + """ + calls.append((html_text, domain, list_config, fields, result_num)) + return expected + + fake_extension = SimpleNamespace( + is_available=lambda: True, + parse_indexer_subtitles_fast=fake_parse_indexer_subtitles_fast, + ) + monkeypatch.setattr(rust_accel, "_moviepilot_rust", fake_extension) + fields = { + "language_icon": {"selector": "div:nth-child(1) img", "attribute": "src"}, + "title": {"selector": 'div:nth-child(2) a[href*="downloadsubs.php"]'}, + } + list_config = {"selector": "#subtitles-table > div"} + + result = rust_accel.parse_indexer_subtitles( + html_text="
", + domain="https://hhanclub.net/", + list_config=list_config, + fields=fields, + result_num=100, + ) + + assert result == expected + assert calls == [("", "https://hhanclub.net/", list_config, fields, 100)] + + +@pytest.mark.skipif( + os.environ.get("MP_RUST_PERF_TEST") != "1", + reason="性能测试仅在显式开启 MP_RUST_PERF_TEST=1 时运行", +) +def test_rust_subtitle_parser_is_several_times_faster_than_python(monkeypatch): + """ + Rust 字幕解析在生产 SiteSpider 路径下应显著快于 Python 兜底解析。 + """ + if not hasattr(rust_accel._moviepilot_rust, "parse_indexer_subtitles_fast"): + pytest.skip("当前 Rust 扩展未包含字幕解析入口") + + def subtitle_row(index: int) -> str: + """ + 构造憨憨新版字幕卡片行,放大样本以稳定性能对比。 + """ + return f""" + + """ + + html = f'