From de4dbf283b43d0af19068b8c1451ac8ac68ffc30 Mon Sep 17 00:00:00 2001 From: DDSRem <1448139087@qq.com> Date: Wed, 25 Mar 2026 09:12:23 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=96=87=E4=BB=B6=E5=90=8D=E4=B8=BA?= =?UTF-8?q?=E8=BE=85=E5=8A=A9=E4=B8=AD=E6=96=87=E6=A0=87=E7=AD=BE=E6=97=B6?= =?UTF-8?q?=E4=BD=BF=E7=94=A8=E7=88=B6=E7=9B=AE=E5=BD=95=E6=A0=87=E9=A2=98?= =?UTF-8?q?=E8=AF=86=E5=88=AB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 当文件名(stem)为纯中文压制/字幕辅助标签(如"简英双语特效")且父目录包含 拉丁片名时,清空文件元数据的标题信息,改由父目录标题合并填充,避免识别失败。 新增 infopath 模块集中管理辅助标签判断逻辑与关键词正则。 --- app/core/meta/infopath.py | 41 ++++++++++++++++++++++++ app/core/metainfo.py | 6 ++++ tests/test_metainfo.py | 67 +++++++++++++++++++++++++++++++++------ 3 files changed, 105 insertions(+), 9 deletions(-) create mode 100644 app/core/meta/infopath.py diff --git a/app/core/meta/infopath.py b/app/core/meta/infopath.py new file mode 100644 index 00000000..4d679473 --- /dev/null +++ b/app/core/meta/infopath.py @@ -0,0 +1,41 @@ +import regex as re + +from app.core.meta.metabase import MetaBase +from app.utils.string import StringUtils + +AUXILIARY_CN_STEM_FULLMATCH_RE = re.compile( + r"^(双语|字幕|特效|内封|外挂|官译|简体|繁体|繁中|简中|中英|简英|多语|" + r"国英|台粤|音轨|评论|国配|台配|粤语|韩语|日语|杜比|全景声|无损|中字|" + r"国语|原声)+$" +) + + +def should_use_parent_title_for_file_stem( + stem: str, parent_dir_name: str, file_meta: MetaBase +) -> bool: + """ + 文件名(无后缀)是否仅为简繁体/字幕/特效等辅助说明,应改用父目录标题识别。 + 要求: + - stem 纯中文且能被辅助关键词完全覆盖(无残留有意义汉字) + - 父目录含拉丁字母,避免纯中文资源目录误把正片中文名当标签清空 + """ + if not file_meta.isfile or not stem or not parent_dir_name: + return False + if file_meta.tmdbid or file_meta.doubanid: + return False + if not re.search(r"[A-Za-z]{2,}", parent_dir_name): + return False + if not StringUtils.is_all_chinese(stem): + return False + if len(stem) > 16: + return False + if not AUXILIARY_CN_STEM_FULLMATCH_RE.match(stem): + return False + if re.search(r"[第共]\s*[0-9一二三四五六七八九十百零]+\s*[季集话話]", stem): + return False + return True + + +def clear_parsed_title_for_parent_merge(meta: MetaBase) -> None: + meta.cn_name = None + meta.en_name = None diff --git a/app/core/metainfo.py b/app/core/metainfo.py index 12ff758b..ded4d803 100644 --- a/app/core/metainfo.py +++ b/app/core/metainfo.py @@ -5,6 +5,10 @@ import regex as re from app.core.config import settings from app.core.meta import MetaAnime, MetaVideo, MetaBase +from app.core.meta.infopath import ( + clear_parsed_title_for_parent_merge, + should_use_parent_title_for_file_stem, +) from app.core.meta.words import WordsMatcher from app.log import logger from app.schemas.types import MediaType @@ -71,6 +75,8 @@ def MetaInfoPath(path: Path, custom_words: List[str] = None) -> MetaBase: """ # 文件元数据,不包含后缀 file_meta = MetaInfo(title=path.name, custom_words=custom_words) + if should_use_parent_title_for_file_stem(path.stem, path.parent.name, file_meta): + clear_parsed_title_for_parent_merge(file_meta) # 上级目录元数据 dir_meta = MetaInfo(title=path.parent.name, custom_words=custom_words) if file_meta.type == MediaType.TV or dir_meta.type != MediaType.TV: diff --git a/tests/test_metainfo.py b/tests/test_metainfo.py index e54f16bf..e1fefcd6 100644 --- a/tests/test_metainfo.py +++ b/tests/test_metainfo.py @@ -18,7 +18,11 @@ class MetaInfoTest(TestCase): if info.get("path"): meta_info = MetaInfoPath(path=Path(info.get("path"))) else: - meta_info = MetaInfo(title=info.get("title"), subtitle=info.get("subtitle"), custom_words=["#"]) + meta_info = MetaInfo( + title=info.get("title"), + subtitle=info.get("subtitle"), + custom_words=["#"], + ) target = { "type": meta_info.type.value, "cn_name": meta_info.cn_name or "", @@ -31,14 +35,17 @@ class MetaInfoTest(TestCase): "pix": meta_info.resource_pix or "", "video_codec": meta_info.video_encode or "", "audio_codec": meta_info.audio_encode or "", - "fps": meta_info.fps or None + "fps": meta_info.fps or None, } # 检查tmdbid if info.get("target").get("tmdbid"): target["tmdbid"] = meta_info.tmdbid - self.assertEqual(target, info.get("target")) + expected = info.get("target") + if "fps" not in expected: + target.pop("fps", None) + self.assertEqual(target, expected) def test_emby_format_ids(self): """ @@ -47,21 +54,33 @@ class MetaInfoTest(TestCase): # 测试文件路径 test_paths = [ # 文件名中包含tmdbid - ("/movies/The Vampire Diaries (2009) [tmdbid=18165]/The.Vampire.Diaries.S01E01.1080p.mkv", 18165), + ( + "/movies/The Vampire Diaries (2009) [tmdbid=18165]/The.Vampire.Diaries.S01E01.1080p.mkv", + 18165, + ), # 目录名中包含tmdbid ("/movies/Inception (2010) [tmdbid-27205]/Inception.2010.1080p.mkv", 27205), # 父目录名中包含tmdbid - ("/movies/Breaking Bad (2008) [tmdb=1396]/Season 1/Breaking.Bad.S01E01.1080p.mkv", 1396), + ( + "/movies/Breaking Bad (2008) [tmdb=1396]/Season 1/Breaking.Bad.S01E01.1080p.mkv", + 1396, + ), # 祖父目录名中包含tmdbid - ("/tv/Game of Thrones (2011) {tmdb=1399}/Season 1/Game.of.Thrones.S01E01.1080p.mkv", 1399), + ( + "/tv/Game of Thrones (2011) {tmdb=1399}/Season 1/Game.of.Thrones.S01E01.1080p.mkv", + 1399, + ), # 测试{tmdb-xxx}格式 ("/movies/Avatar (2009) {tmdb-19995}/Avatar.2009.1080p.mkv", 19995), ] for path_str, expected_tmdbid in test_paths: meta = MetaInfoPath(Path(path_str)) - self.assertEqual(meta.tmdbid, expected_tmdbid, - f"路径 {path_str} 期望的tmdbid为 {expected_tmdbid},实际识别为 {meta.tmdbid}") + self.assertEqual( + meta.tmdbid, + expected_tmdbid, + f"路径 {path_str} 期望的tmdbid为 {expected_tmdbid},实际识别为 {meta.tmdbid}", + ) def test_metainfopath_with_custom_words(self): """测试 MetaInfoPath 使用自定义识别词""" @@ -93,7 +112,37 @@ class MetaInfoTest(TestCase): title = "电影替换词.2024.mkv" meta = MetaInfo(title=title, custom_words=custom_words) # 验证 apply_words 属性存在 - self.assertTrue(hasattr(meta, 'apply_words')) + self.assertTrue(hasattr(meta, "apply_words")) # 如果替换词被应用,应该记录在 apply_words 中 if meta.apply_words: self.assertIn("替换词 => 新词", meta.apply_words) + + def test_metainfopath_auxiliary_chinese_stem_uses_parent_title(self): + """ + 文件名为简英双语/特效等压制标签、父目录为拉丁片名时,应合并父目录标题与年份。 + """ + path = Path( + "/Marty Supreme 2025 2160p DoVi HDR Atmos TrueHD 7.1 x265-PbK/简英双语特效.mp4" + ) + meta = MetaInfoPath(path) + self.assertEqual(meta.en_name, "Marty Supreme") + self.assertEqual(meta.year, "2025") + + def test_metainfopath_chinese_parent_not_replaced_by_auxiliary_rule(self): + """ + 纯中文父目录(无拉丁字母)时不触发辅助文件名规则,避免误伤。 + """ + path = Path("/movies/流浪地球 (2023)/简体中字.mkv") + meta = MetaInfoPath(path) + self.assertTrue(meta.cn_name) + self.assertIn("简体", meta.cn_name) + + def test_metainfopath_cn_title_containing_keyword_not_cleared(self): + """ + 中文片名恰好包含辅助关键词子串时(如"粤语残片"含"粤语"), + 不应被当作辅助标签清空。 + """ + path = Path("/Some Movie 2024/粤语残片.mkv") + meta = MetaInfoPath(path) + # stem 含有非关键词汉字"残片",不应被全量匹配命中 + self.assertIn("粤语残片", meta.cn_name)