feat:支持指定剧集组识别和刮削

This commit is contained in:
jxxghp
2025-04-03 18:35:02 +08:00
parent 21e120a4f8
commit 078b60cc1e
25 changed files with 256 additions and 100 deletions

View File

@@ -75,8 +75,8 @@ class DoubanModule(_ModuleBase):
def recognize_media(self, meta: MetaBase = None,
mtype: MediaType = None,
doubanid: str = None,
cache: bool = True,
doubanid: Optional[str] = None,
cache: Optional[bool] = True,
**kwargs) -> Optional[MediaInfo]:
"""
识别媒体信息

View File

@@ -1,3 +1,4 @@
import re
from typing import Optional, List, Tuple, Union, Dict
import cn2an
@@ -85,6 +86,7 @@ class TheMovieDbModule(_ModuleBase):
def recognize_media(self, meta: MetaBase = None,
mtype: MediaType = None,
tmdbid: Optional[int] = None,
episode_group: Optional[str] = None,
cache: Optional[bool] = True,
**kwargs) -> Optional[MediaInfo]:
"""
@@ -92,6 +94,7 @@ class TheMovieDbModule(_ModuleBase):
:param meta: 识别的元数据
:param mtype: 识别的媒体类型与tmdbid配套
:param tmdbid: tmdbid
:param episode_group: 剧集组
:param cache: 是否使用缓存
:return: 识别的媒体信息,包括剧集信息
"""
@@ -116,6 +119,11 @@ class TheMovieDbModule(_ModuleBase):
meta.tmdbid = tmdbid
cache_info = self.cache.get(meta)
# 查询剧集组
group_episodes = []
if episode_group:
group_episodes = self.tmdb.get_tv_group_episodes(episode_group)
# 识别匹配
if not cache_info or not cache:
info = None
@@ -143,7 +151,8 @@ class TheMovieDbModule(_ModuleBase):
year=meta.year,
mtype=meta.type,
season_year=meta.year,
season_number=meta.begin_season)
season_number=meta.begin_season,
group_episodes=group_episodes)
if not info:
# 去掉年份再查一次
info = self.tmdb.match(name=name,
@@ -157,7 +166,8 @@ class TheMovieDbModule(_ModuleBase):
if not info:
info = self.tmdb.match(name=name,
year=meta.year,
mtype=MediaType.TV)
mtype=MediaType.TV,
group_episodes=group_episodes)
if not info:
# 去掉年份和类型再查一次
info = self.tmdb.match_multi(name=name)
@@ -207,11 +217,61 @@ class TheMovieDbModule(_ModuleBase):
logger.info(f"{tmdbid} TMDB识别结果{mediainfo.type.value} "
f"{mediainfo.title_year}")
# 补充剧集年份为季年份
# 使用剧集组的集信息和年份
if mediainfo.type == MediaType.TV and mediainfo.episode_groups:
episode_years = self.tmdb.get_tv_episode_years(mediainfo.episode_groups)
if episode_years:
mediainfo.season_years = episode_years
if group_episodes:
# 指定剧集组时
seasons = {}
season_info = []
season_years = {}
for group_episode in group_episodes:
# 季
season = group_episode.get("order")
# 集列表
episodes = group_episode.get("episodes")
if not episodes:
continue
seasons[season] = [ep.get("episode_number") for ep in episodes]
season_info[season] = episodes
# 当前季第一季时间
first_date = episodes[0].get("air_date")
if re.match(r"^\d{4}-\d{2}-\d{2}$", first_date):
season_years[season] = str(first_date).split("-")[0]
# 每季集清单
if seasons:
mediainfo.seasons = seasons
mediainfo.number_of_seasons = len(seasons)
# 每季集详情
if season_info:
mediainfo.season_info = season_info
# 每季年份
if season_years:
mediainfo.season_years = season_years
# 所有剧集组
mediainfo.episode_group = episode_group
mediainfo.episode_groups = group_episodes
else:
# 每季年份
season_years = {}
for group in mediainfo.episode_groups:
if group.get('type') != 6:
# 只处理剧集部分
continue
group_episodes = self.tmdb.get_tv_group_episodes(group.get('id'))
if not group_episodes:
continue
for group_episode in group_episodes:
season = group_episode.get('order')
episodes = group_episode.get('episodes')
if not episodes:
continue
# 当前季第一季时间
first_date = episodes[0].get("air_date")
# 判断是不是日期格式
if re.match(r"^\d{4}-\d{2}-\d{2}$", first_date):
season_years[season] = str(first_date).split("-")[0]
if season_years:
mediainfo.season_years = season_years
return mediainfo
else:
logger.info(f"{meta.name if meta else tmdbid} 未匹配到TMDB媒体信息")
@@ -431,13 +491,17 @@ class TheMovieDbModule(_ModuleBase):
return [schemas.TmdbSeason(**season)
for season in tmdb_info.get("seasons", []) if season.get("season_number")]
def tmdb_episodes(self, tmdbid: int, season: int) -> List[schemas.TmdbEpisode]:
def tmdb_episodes(self, tmdbid: int, season: int, episode_group: Optional[str] = None) -> List[schemas.TmdbEpisode]:
"""
根据TMDBID查询某季的所有信信息
:param tmdbid: TMDBID
:param season: 季
:param episode_group: 剧集组
"""
season_info = self.tmdb.get_tv_season_detail(tmdbid=tmdbid, season=season)
if episode_group:
season_info = self.tmdb.get_tv_group_episodes(episode_group)
else:
season_info = self.tmdb.get_tv_season_detail(tmdbid=tmdbid, season=season)
if not season_info or not season_info.get("episodes"):
return []
return [schemas.TmdbEpisode(**episode) for episode in season_info.get("episodes")]

View File

@@ -32,7 +32,10 @@ class TmdbScraper:
else:
if season is not None:
# 查询季信息
seasoninfo = self.tmdb.get_tv_season_detail(mediainfo.tmdb_id, season)
if mediainfo.episode_group:
seasoninfo = self.tmdb.get_tv_group_episodes(mediainfo.episode_group)
else:
seasoninfo = self.tmdb.get_tv_season_detail(mediainfo.tmdb_id, season)
if episode:
# 集元数据文件
episodeinfo = self.__get_episode_detail(seasoninfo, meta.begin_episode)
@@ -61,7 +64,10 @@ class TmdbScraper:
# 只需要集的图片
if episode:
# 集的图片
seasoninfo = self.tmdb.get_tv_season_detail(mediainfo.tmdb_id, season)
if mediainfo.episode_group:
seasoninfo = self.tmdb.get_tv_group_episodes(mediainfo.episode_group)
else:
seasoninfo = self.tmdb.get_tv_season_detail(mediainfo.tmdb_id, season)
if seasoninfo:
episodeinfo = self.__get_episode_detail(seasoninfo, episode)
if episodeinfo and episodeinfo.get("still_path"):

View File

@@ -1,3 +1,4 @@
import re
import traceback
from typing import Optional, List
from urllib.parse import quote
@@ -187,7 +188,8 @@ class TmdbApi:
mtype: MediaType,
year: Optional[str] = None,
season_year: Optional[str] = None,
season_number: Optional[int] = None) -> Optional[dict]:
season_number: Optional[int] = None,
group_episodes: Optional[List[dict]] = None) -> Optional[dict]:
"""
搜索tmdb中的媒体信息匹配返回一条尽可能正确的信息
:param name: 检索的名称
@@ -195,6 +197,7 @@ class TmdbApi:
:param year: 年份,如要是季集需要是首播年份(first_air_date)
:param season_year: 当前季集年份
:param season_number: 季集,整数
:param group_episodes: 集数组信息
:return: TMDB的INFO同时会将mtype赋值到media_type中
"""
if not self.search:
@@ -222,7 +225,8 @@ class TmdbApi:
f"正在识别{mtype.value}{name}, 季集={season_number}, 季集年份={season_year} ...")
info = self.__search_tv_by_season(name,
season_year,
season_number)
season_number,
group_episodes)
if not info:
year_range = [year]
if year:
@@ -332,12 +336,14 @@ class TmdbApi:
return tv
return {}
def __search_tv_by_season(self, name: str, season_year: str, season_number: int) -> Optional[dict]:
def __search_tv_by_season(self, name: str, season_year: str, season_number: int,
group_episodes: Optional[List[dict]] = None) -> Optional[dict]:
"""
根据电视剧的名称和季的年份及序号匹配TMDB
:param name: 识别的文件名或者种子名
:param season_year: 季的年份
:param season_number: 季序号
:param group_episodes: 集数组信息
:return: 匹配的媒体信息
"""
@@ -345,12 +351,25 @@ class TmdbApi:
if not tv_info:
return False
try:
seasons = self.__get_tv_seasons(tv_info)
for season, season_info in seasons.items():
if season_info.get("air_date"):
if season_info.get("air_date")[0:4] == str(_season_year) \
and season == int(season_number):
return True
if group_episodes:
for group_episode in group_episodes:
season = group_episode.get('order')
if season != season_number:
continue
episodes = group_episode.get('episodes')
if not episodes:
continue
first_date = episodes[0].get("air_date")
if re.match(r"^\d{4}-\d{2}-\d{2}$", first_date):
if str(_season_year) == str(first_date).split("-")[0]:
return True
else:
seasons = self.__get_tv_seasons(tv_info)
for season, season_info in seasons.items():
if season_info.get("air_date"):
if season_info.get("air_date")[0:4] == str(_season_year) \
and season == int(season_number):
return True
except Exception as e1:
logger.error(f"连接TMDB出错{e1}")
print(traceback.format_exc())
@@ -1317,6 +1336,19 @@ class TmdbApi:
logger.error(str(e))
return []
def get_tv_group_episodes(self, group_id: str) -> List[dict]:
"""
获取电视剧剧集组集列表
"""
if not self.tv:
return []
try:
logger.debug(f"正在获取剧集组:{group_id}...")
return self.tv.group_episodes(group_id) or []
except Exception as e:
logger.error(str(e))
return []
def get_person_detail(self, person_id: int) -> dict:
"""
获取人物详情
@@ -1377,37 +1409,6 @@ class TmdbApi:
"""
self.tmdb.cache_clear()
def get_tv_episode_years(self, episode_groups: List[dict]) -> dict:
"""
查询剧集组年份
"""
try:
if not episode_groups:
return {}
episode_years = {}
for episode_group in episode_groups:
if episode_group.get('type') != 6:
# 只处理剧集部分
continue
logger.debug(f"正在获取剧集组年份:{episode_group.get('id')}...")
group_episodes = self.tv.group_episodes(episode_group.get('id'))
if not group_episodes:
continue
for group_episode in group_episodes:
order = group_episode.get('order')
episodes = group_episode.get('episodes')
if not episodes:
continue
# 当前季第一季时间
first_date = episodes[0].get("air_date")
if not first_date and str(first_date).split("-") != 3:
continue
episode_years[order] = str(first_date).split("-")[0]
return episode_years
except Exception as e:
logger.error(str(e))
return {}
def close(self):
"""
关闭连接