feat: 文件名为辅助中文标签时使用父目录标题识别

当文件名(stem)为纯中文压制/字幕辅助标签(如"简英双语特效")且父目录包含
拉丁片名时,清空文件元数据的标题信息,改由父目录标题合并填充,避免识别失败。

新增 infopath 模块集中管理辅助标签判断逻辑与关键词正则。
This commit is contained in:
DDSRem
2026-03-25 09:12:23 +08:00
committed by jxxghp
parent 10807a6fb7
commit de4dbf283b
3 changed files with 105 additions and 9 deletions

41
app/core/meta/infopath.py Normal file
View File

@@ -0,0 +1,41 @@
import regex as re
from app.core.meta.metabase import MetaBase
from app.utils.string import StringUtils
AUXILIARY_CN_STEM_FULLMATCH_RE = re.compile(
r"^(双语|字幕|特效|内封|外挂|官译|简体|繁体|繁中|简中|中英|简英|多语|"
r"国英|台粤|音轨|评论|国配|台配|粤语|韩语|日语|杜比|全景声|无损|中字|"
r"国语|原声)+$"
)
def should_use_parent_title_for_file_stem(
stem: str, parent_dir_name: str, file_meta: MetaBase
) -> bool:
"""
文件名(无后缀)是否仅为简繁体/字幕/特效等辅助说明,应改用父目录标题识别。
要求:
- stem 纯中文且能被辅助关键词完全覆盖(无残留有意义汉字)
- 父目录含拉丁字母,避免纯中文资源目录误把正片中文名当标签清空
"""
if not file_meta.isfile or not stem or not parent_dir_name:
return False
if file_meta.tmdbid or file_meta.doubanid:
return False
if not re.search(r"[A-Za-z]{2,}", parent_dir_name):
return False
if not StringUtils.is_all_chinese(stem):
return False
if len(stem) > 16:
return False
if not AUXILIARY_CN_STEM_FULLMATCH_RE.match(stem):
return False
if re.search(r"[第共]\s*[0-9一二三四五六七八九十百零]+\s*[季集话話]", stem):
return False
return True
def clear_parsed_title_for_parent_merge(meta: MetaBase) -> None:
meta.cn_name = None
meta.en_name = None

View File

@@ -5,6 +5,10 @@ import regex as re
from app.core.config import settings
from app.core.meta import MetaAnime, MetaVideo, MetaBase
from app.core.meta.infopath import (
clear_parsed_title_for_parent_merge,
should_use_parent_title_for_file_stem,
)
from app.core.meta.words import WordsMatcher
from app.log import logger
from app.schemas.types import MediaType
@@ -71,6 +75,8 @@ def MetaInfoPath(path: Path, custom_words: List[str] = None) -> MetaBase:
"""
# 文件元数据,不包含后缀
file_meta = MetaInfo(title=path.name, custom_words=custom_words)
if should_use_parent_title_for_file_stem(path.stem, path.parent.name, file_meta):
clear_parsed_title_for_parent_merge(file_meta)
# 上级目录元数据
dir_meta = MetaInfo(title=path.parent.name, custom_words=custom_words)
if file_meta.type == MediaType.TV or dir_meta.type != MediaType.TV:

View File

@@ -18,7 +18,11 @@ class MetaInfoTest(TestCase):
if info.get("path"):
meta_info = MetaInfoPath(path=Path(info.get("path")))
else:
meta_info = MetaInfo(title=info.get("title"), subtitle=info.get("subtitle"), custom_words=["#"])
meta_info = MetaInfo(
title=info.get("title"),
subtitle=info.get("subtitle"),
custom_words=["#"],
)
target = {
"type": meta_info.type.value,
"cn_name": meta_info.cn_name or "",
@@ -31,14 +35,17 @@ class MetaInfoTest(TestCase):
"pix": meta_info.resource_pix or "",
"video_codec": meta_info.video_encode or "",
"audio_codec": meta_info.audio_encode or "",
"fps": meta_info.fps or None
"fps": meta_info.fps or None,
}
# 检查tmdbid
if info.get("target").get("tmdbid"):
target["tmdbid"] = meta_info.tmdbid
self.assertEqual(target, info.get("target"))
expected = info.get("target")
if "fps" not in expected:
target.pop("fps", None)
self.assertEqual(target, expected)
def test_emby_format_ids(self):
"""
@@ -47,21 +54,33 @@ class MetaInfoTest(TestCase):
# 测试文件路径
test_paths = [
# 文件名中包含tmdbid
("/movies/The Vampire Diaries (2009) [tmdbid=18165]/The.Vampire.Diaries.S01E01.1080p.mkv", 18165),
(
"/movies/The Vampire Diaries (2009) [tmdbid=18165]/The.Vampire.Diaries.S01E01.1080p.mkv",
18165,
),
# 目录名中包含tmdbid
("/movies/Inception (2010) [tmdbid-27205]/Inception.2010.1080p.mkv", 27205),
# 父目录名中包含tmdbid
("/movies/Breaking Bad (2008) [tmdb=1396]/Season 1/Breaking.Bad.S01E01.1080p.mkv", 1396),
(
"/movies/Breaking Bad (2008) [tmdb=1396]/Season 1/Breaking.Bad.S01E01.1080p.mkv",
1396,
),
# 祖父目录名中包含tmdbid
("/tv/Game of Thrones (2011) {tmdb=1399}/Season 1/Game.of.Thrones.S01E01.1080p.mkv", 1399),
(
"/tv/Game of Thrones (2011) {tmdb=1399}/Season 1/Game.of.Thrones.S01E01.1080p.mkv",
1399,
),
# 测试{tmdb-xxx}格式
("/movies/Avatar (2009) {tmdb-19995}/Avatar.2009.1080p.mkv", 19995),
]
for path_str, expected_tmdbid in test_paths:
meta = MetaInfoPath(Path(path_str))
self.assertEqual(meta.tmdbid, expected_tmdbid,
f"路径 {path_str} 期望的tmdbid为 {expected_tmdbid},实际识别为 {meta.tmdbid}")
self.assertEqual(
meta.tmdbid,
expected_tmdbid,
f"路径 {path_str} 期望的tmdbid为 {expected_tmdbid},实际识别为 {meta.tmdbid}",
)
def test_metainfopath_with_custom_words(self):
"""测试 MetaInfoPath 使用自定义识别词"""
@@ -93,7 +112,37 @@ class MetaInfoTest(TestCase):
title = "电影替换词.2024.mkv"
meta = MetaInfo(title=title, custom_words=custom_words)
# 验证 apply_words 属性存在
self.assertTrue(hasattr(meta, 'apply_words'))
self.assertTrue(hasattr(meta, "apply_words"))
# 如果替换词被应用,应该记录在 apply_words 中
if meta.apply_words:
self.assertIn("替换词 => 新词", meta.apply_words)
def test_metainfopath_auxiliary_chinese_stem_uses_parent_title(self):
"""
文件名为简英双语/特效等压制标签、父目录为拉丁片名时,应合并父目录标题与年份。
"""
path = Path(
"/Marty Supreme 2025 2160p DoVi HDR Atmos TrueHD 7.1 x265-PbK/简英双语特效.mp4"
)
meta = MetaInfoPath(path)
self.assertEqual(meta.en_name, "Marty Supreme")
self.assertEqual(meta.year, "2025")
def test_metainfopath_chinese_parent_not_replaced_by_auxiliary_rule(self):
"""
纯中文父目录(无拉丁字母)时不触发辅助文件名规则,避免误伤。
"""
path = Path("/movies/流浪地球 (2023)/简体中字.mkv")
meta = MetaInfoPath(path)
self.assertTrue(meta.cn_name)
self.assertIn("简体", meta.cn_name)
def test_metainfopath_cn_title_containing_keyword_not_cleared(self):
"""
中文片名恰好包含辅助关键词子串时(如"粤语残片""粤语"
不应被当作辅助标签清空。
"""
path = Path("/Some Movie 2024/粤语残片.mkv")
meta = MetaInfoPath(path)
# stem 含有非关键词汉字"残片",不应被全量匹配命中
self.assertIn("粤语残片", meta.cn_name)