mirror of
https://github.com/jxxghp/MoviePilot.git
synced 2026-05-06 20:42:43 +08:00
feat: 文件名为辅助中文标签时使用父目录标题识别
当文件名(stem)为纯中文压制/字幕辅助标签(如"简英双语特效")且父目录包含 拉丁片名时,清空文件元数据的标题信息,改由父目录标题合并填充,避免识别失败。 新增 infopath 模块集中管理辅助标签判断逻辑与关键词正则。
This commit is contained in:
41
app/core/meta/infopath.py
Normal file
41
app/core/meta/infopath.py
Normal file
@@ -0,0 +1,41 @@
|
||||
import regex as re
|
||||
|
||||
from app.core.meta.metabase import MetaBase
|
||||
from app.utils.string import StringUtils
|
||||
|
||||
AUXILIARY_CN_STEM_FULLMATCH_RE = re.compile(
|
||||
r"^(双语|字幕|特效|内封|外挂|官译|简体|繁体|繁中|简中|中英|简英|多语|"
|
||||
r"国英|台粤|音轨|评论|国配|台配|粤语|韩语|日语|杜比|全景声|无损|中字|"
|
||||
r"国语|原声)+$"
|
||||
)
|
||||
|
||||
|
||||
def should_use_parent_title_for_file_stem(
|
||||
stem: str, parent_dir_name: str, file_meta: MetaBase
|
||||
) -> bool:
|
||||
"""
|
||||
文件名(无后缀)是否仅为简繁体/字幕/特效等辅助说明,应改用父目录标题识别。
|
||||
要求:
|
||||
- stem 纯中文且能被辅助关键词完全覆盖(无残留有意义汉字)
|
||||
- 父目录含拉丁字母,避免纯中文资源目录误把正片中文名当标签清空
|
||||
"""
|
||||
if not file_meta.isfile or not stem or not parent_dir_name:
|
||||
return False
|
||||
if file_meta.tmdbid or file_meta.doubanid:
|
||||
return False
|
||||
if not re.search(r"[A-Za-z]{2,}", parent_dir_name):
|
||||
return False
|
||||
if not StringUtils.is_all_chinese(stem):
|
||||
return False
|
||||
if len(stem) > 16:
|
||||
return False
|
||||
if not AUXILIARY_CN_STEM_FULLMATCH_RE.match(stem):
|
||||
return False
|
||||
if re.search(r"[第共]\s*[0-9一二三四五六七八九十百零]+\s*[季集话話]", stem):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def clear_parsed_title_for_parent_merge(meta: MetaBase) -> None:
|
||||
meta.cn_name = None
|
||||
meta.en_name = None
|
||||
@@ -5,6 +5,10 @@ import regex as re
|
||||
|
||||
from app.core.config import settings
|
||||
from app.core.meta import MetaAnime, MetaVideo, MetaBase
|
||||
from app.core.meta.infopath import (
|
||||
clear_parsed_title_for_parent_merge,
|
||||
should_use_parent_title_for_file_stem,
|
||||
)
|
||||
from app.core.meta.words import WordsMatcher
|
||||
from app.log import logger
|
||||
from app.schemas.types import MediaType
|
||||
@@ -71,6 +75,8 @@ def MetaInfoPath(path: Path, custom_words: List[str] = None) -> MetaBase:
|
||||
"""
|
||||
# 文件元数据,不包含后缀
|
||||
file_meta = MetaInfo(title=path.name, custom_words=custom_words)
|
||||
if should_use_parent_title_for_file_stem(path.stem, path.parent.name, file_meta):
|
||||
clear_parsed_title_for_parent_merge(file_meta)
|
||||
# 上级目录元数据
|
||||
dir_meta = MetaInfo(title=path.parent.name, custom_words=custom_words)
|
||||
if file_meta.type == MediaType.TV or dir_meta.type != MediaType.TV:
|
||||
|
||||
@@ -18,7 +18,11 @@ class MetaInfoTest(TestCase):
|
||||
if info.get("path"):
|
||||
meta_info = MetaInfoPath(path=Path(info.get("path")))
|
||||
else:
|
||||
meta_info = MetaInfo(title=info.get("title"), subtitle=info.get("subtitle"), custom_words=["#"])
|
||||
meta_info = MetaInfo(
|
||||
title=info.get("title"),
|
||||
subtitle=info.get("subtitle"),
|
||||
custom_words=["#"],
|
||||
)
|
||||
target = {
|
||||
"type": meta_info.type.value,
|
||||
"cn_name": meta_info.cn_name or "",
|
||||
@@ -31,14 +35,17 @@ class MetaInfoTest(TestCase):
|
||||
"pix": meta_info.resource_pix or "",
|
||||
"video_codec": meta_info.video_encode or "",
|
||||
"audio_codec": meta_info.audio_encode or "",
|
||||
"fps": meta_info.fps or None
|
||||
"fps": meta_info.fps or None,
|
||||
}
|
||||
|
||||
# 检查tmdbid
|
||||
if info.get("target").get("tmdbid"):
|
||||
target["tmdbid"] = meta_info.tmdbid
|
||||
|
||||
self.assertEqual(target, info.get("target"))
|
||||
expected = info.get("target")
|
||||
if "fps" not in expected:
|
||||
target.pop("fps", None)
|
||||
self.assertEqual(target, expected)
|
||||
|
||||
def test_emby_format_ids(self):
|
||||
"""
|
||||
@@ -47,21 +54,33 @@ class MetaInfoTest(TestCase):
|
||||
# 测试文件路径
|
||||
test_paths = [
|
||||
# 文件名中包含tmdbid
|
||||
("/movies/The Vampire Diaries (2009) [tmdbid=18165]/The.Vampire.Diaries.S01E01.1080p.mkv", 18165),
|
||||
(
|
||||
"/movies/The Vampire Diaries (2009) [tmdbid=18165]/The.Vampire.Diaries.S01E01.1080p.mkv",
|
||||
18165,
|
||||
),
|
||||
# 目录名中包含tmdbid
|
||||
("/movies/Inception (2010) [tmdbid-27205]/Inception.2010.1080p.mkv", 27205),
|
||||
# 父目录名中包含tmdbid
|
||||
("/movies/Breaking Bad (2008) [tmdb=1396]/Season 1/Breaking.Bad.S01E01.1080p.mkv", 1396),
|
||||
(
|
||||
"/movies/Breaking Bad (2008) [tmdb=1396]/Season 1/Breaking.Bad.S01E01.1080p.mkv",
|
||||
1396,
|
||||
),
|
||||
# 祖父目录名中包含tmdbid
|
||||
("/tv/Game of Thrones (2011) {tmdb=1399}/Season 1/Game.of.Thrones.S01E01.1080p.mkv", 1399),
|
||||
(
|
||||
"/tv/Game of Thrones (2011) {tmdb=1399}/Season 1/Game.of.Thrones.S01E01.1080p.mkv",
|
||||
1399,
|
||||
),
|
||||
# 测试{tmdb-xxx}格式
|
||||
("/movies/Avatar (2009) {tmdb-19995}/Avatar.2009.1080p.mkv", 19995),
|
||||
]
|
||||
|
||||
for path_str, expected_tmdbid in test_paths:
|
||||
meta = MetaInfoPath(Path(path_str))
|
||||
self.assertEqual(meta.tmdbid, expected_tmdbid,
|
||||
f"路径 {path_str} 期望的tmdbid为 {expected_tmdbid},实际识别为 {meta.tmdbid}")
|
||||
self.assertEqual(
|
||||
meta.tmdbid,
|
||||
expected_tmdbid,
|
||||
f"路径 {path_str} 期望的tmdbid为 {expected_tmdbid},实际识别为 {meta.tmdbid}",
|
||||
)
|
||||
|
||||
def test_metainfopath_with_custom_words(self):
|
||||
"""测试 MetaInfoPath 使用自定义识别词"""
|
||||
@@ -93,7 +112,37 @@ class MetaInfoTest(TestCase):
|
||||
title = "电影替换词.2024.mkv"
|
||||
meta = MetaInfo(title=title, custom_words=custom_words)
|
||||
# 验证 apply_words 属性存在
|
||||
self.assertTrue(hasattr(meta, 'apply_words'))
|
||||
self.assertTrue(hasattr(meta, "apply_words"))
|
||||
# 如果替换词被应用,应该记录在 apply_words 中
|
||||
if meta.apply_words:
|
||||
self.assertIn("替换词 => 新词", meta.apply_words)
|
||||
|
||||
def test_metainfopath_auxiliary_chinese_stem_uses_parent_title(self):
|
||||
"""
|
||||
文件名为简英双语/特效等压制标签、父目录为拉丁片名时,应合并父目录标题与年份。
|
||||
"""
|
||||
path = Path(
|
||||
"/Marty Supreme 2025 2160p DoVi HDR Atmos TrueHD 7.1 x265-PbK/简英双语特效.mp4"
|
||||
)
|
||||
meta = MetaInfoPath(path)
|
||||
self.assertEqual(meta.en_name, "Marty Supreme")
|
||||
self.assertEqual(meta.year, "2025")
|
||||
|
||||
def test_metainfopath_chinese_parent_not_replaced_by_auxiliary_rule(self):
|
||||
"""
|
||||
纯中文父目录(无拉丁字母)时不触发辅助文件名规则,避免误伤。
|
||||
"""
|
||||
path = Path("/movies/流浪地球 (2023)/简体中字.mkv")
|
||||
meta = MetaInfoPath(path)
|
||||
self.assertTrue(meta.cn_name)
|
||||
self.assertIn("简体", meta.cn_name)
|
||||
|
||||
def test_metainfopath_cn_title_containing_keyword_not_cleared(self):
|
||||
"""
|
||||
中文片名恰好包含辅助关键词子串时(如"粤语残片"含"粤语"),
|
||||
不应被当作辅助标签清空。
|
||||
"""
|
||||
path = Path("/Some Movie 2024/粤语残片.mkv")
|
||||
meta = MetaInfoPath(path)
|
||||
# stem 含有非关键词汉字"残片",不应被全量匹配命中
|
||||
self.assertIn("粤语残片", meta.cn_name)
|
||||
|
||||
Reference in New Issue
Block a user