fix(subscribe): stop best-version per-episode redownload loop (#5781)

2026-07-19 11:42:04 +08:00 · 2026-05-18 06:56:29 +08:00
parent 6685bd0e0e
commit 23d6ba0466
5 changed files with 201 additions and 19 deletions
--- a/app/chain/download.py
+++ b/app/chain/download.py
@@ -511,6 +511,20 @@ class DownloadChain(ChainBase):
                return 9999
            return no_exist[season].total_episode

+        def __apply_allowed_episodes(_need_episodes, _context: Context) -> Set[int]:
+            """
+            根据候选携带的允许集裁剪 need_episodes，返回真正可下载的剧集集合。
+
+            语义：allowed_episodes 为 None 表示调用方未约束，沿用 need_episodes；
+            非空集合则与 need_episodes 取交集；空集合（显式拒绝）会被交集自然消解为空。
+            调用方根据返回集合是否为空决定是否跳过当前候选。
+            """
+            effective = set(_need_episodes)
+            allowed = _context.allowed_episodes
+            if allowed is not None:
+                effective &= set(allowed)
+            return effective
+
        # 发送资源选择事件，允许外部修改上下文数据
        logger.debug(f"Initial contexts: {len(contexts)} items, Downloader: {downloader}")
        event_data = ResourceSelectionEventData(
@@ -695,8 +709,12 @@ class DownloadChain(ChainBase):
                            # 整季的不处理
                            if not torrent_episodes:
                                continue
+                            # 上游对本候选施加的允许集（如洗版按集允许列表）裁剪本季缺集，得到真正可下载范围。
+                            effective_need = __apply_allowed_episodes(need_episodes, context)
+                            if not effective_need:
+                                continue
                            # 为需要集的子集则下载
-                            if torrent_episodes.issubset(set(need_episodes)):
+                            if torrent_episodes.issubset(effective_need):
                                # 下载
                                logger.info(f"开始下载 {meta.title} ...")
                                download_id = self.download_single(context, save_path=save_path,
@@ -756,10 +774,14 @@ class DownloadChain(ChainBase):
                        # 没有需要集后退出
                        if not need_episodes:
                            break
+                        # 上游对本候选施加的允许集（如洗版按集允许列表）裁剪本季缺集，得到真正可下载范围。
+                        effective_need = __apply_allowed_episodes(need_episodes, context)
+                        if not effective_need:
+                            continue
                        # 选中一个单季整季的或单季包括需要的所有集的
                        if (media.tmdb_id == need_mid or media.douban_id == need_mid) \
                                and (not meta.episode_list
-                                     or set(meta.episode_list).intersection(set(need_episodes))) \
+                                     or set(meta.episode_list).intersection(effective_need)) \
                                and len(meta.season_list) == 1 \
                                and meta.season_list[0] == need_season:
                            # 检查种子看是否有需要的集
@@ -775,7 +797,7 @@ class DownloadChain(ChainBase):
                            torrent_episodes = TorrentHelper().get_torrent_episodes(torrent_files)
                            logger.info(f"{torrent.site_name} - {meta.org_string} 解析种子文件集数：{torrent_episodes}")
                            # 选中的集
-                            selected_episodes = set(torrent_episodes).intersection(set(need_episodes))
+                            selected_episodes = set(torrent_episodes).intersection(effective_need)
                            if not selected_episodes:
                                logger.info(f"{torrent.site_name} - {torrent.title} 没有需要的集，跳过...")
                                continue
--- a/app/chain/subscribe.py
+++ b/app/chain/subscribe.py
@@ -1027,17 +1027,19 @@ class SubscribeChain(ChainBase):
                                        )
                                        continue
                                    # 洗版时，只保留至少能提升一集优先级的资源
-                                    if (
-                                        torrent_mediainfo.type == MediaType.TV
-                                        and not self.__get_best_version_interested_episodes(
+                                    if torrent_mediainfo.type == MediaType.TV:
+                                        interested_episodes = self.__get_best_version_interested_episodes(
                                            subscribe=subscribe,
                                            context=context,
                                            priority=torrent_info.pri_order,
                                        )
-                                    ):
-                                        logger.info(
-                                            f'{subscribe.name} 正在洗版，{torrent_info.title} 不包含可提升优先级的剧集')
-                                        continue
+                                        if not interested_episodes:
+                                            logger.info(
+                                                f'{subscribe.name} 正在洗版，{torrent_info.title} 不包含可提升优先级的剧集')
+                                            continue
+                                        # 将"本候选实际能升级到的集"作为允许下载集合下传到下载层，
+                                        # 防止标题元数据与实际种子文件错位导致同优先级集被重复下载。
+                                        context.allowed_episodes = set(interested_episodes)
                                    if (
                                        torrent_mediainfo.type != MediaType.TV
                                        and subscribe.current_priority
@@ -1554,17 +1556,19 @@ class SubscribeChain(ChainBase):

                            # 洗版时，优先级小于已下载优先级的不要
                            if subscribe.best_version:
-                                if (
-                                    meta.type == MediaType.TV
-                                    and not self.__get_best_version_interested_episodes(
+                                if meta.type == MediaType.TV:
+                                    interested_episodes = self.__get_best_version_interested_episodes(
                                        subscribe=subscribe,
                                        context=_context,
                                        priority=torrent_info.pri_order,
                                    )
-                                ):
-                                    logger.info(
-                                        f'{subscribe.name} 正在洗版，{torrent_info.title} 不包含可提升优先级的剧集')
-                                    continue
+                                    if not interested_episodes:
+                                        logger.info(
+                                            f'{subscribe.name} 正在洗版，{torrent_info.title} 不包含可提升优先级的剧集')
+                                        continue
+                                    # 与 search() 路径对称：把"本候选实际能升级到的集"作为允许下载集合下传到下载层，
+                                    # 避免 RSS / 订阅刷新场景下标题元数据与种子文件错位导致同优先级集重复下载。
+                                    _context.allowed_episodes = set(interested_episodes)
                                if (
                                    meta.type != MediaType.TV
                                    and subscribe.current_priority
--- a/app/core/context.py
+++ b/app/core/context.py
@@ -1,7 +1,7 @@
 import re
 from dataclasses import dataclass, field
 from datetime import datetime
-from typing import List, Dict, Any, Tuple, Optional
+from typing import List, Dict, Any, Tuple, Optional, Set

 from app.core.config import settings
 from app.core.meta import MetaBase
@@ -827,6 +827,8 @@ class Context:
    candidate_recognized: bool = False
    # 当前 media_info 是否为目标媒体回填，而不是候选自身识别结果。
    media_info_is_target: bool = False
+    # 调用方对本候选允许下载的剧集集合，None 表示不限制，空集合表示拒绝交付任何集。
+    allowed_episodes: Optional[Set[int]] = None

    def to_dict(self):
        """
@@ -841,4 +843,6 @@ class Context:
            "match_source": self.match_source,
            "candidate_recognized": self.candidate_recognized,
            "media_info_is_target": self.media_info_is_target,
+            # 保留 None / 空集 / 非空集 三态语义，避免下游误把"显式拒绝"当成"不限制"。
+            "allowed_episodes": sorted(self.allowed_episodes) if self.allowed_episodes is not None else None,
        }
--- a/tests/run.py
+++ b/tests/run.py
@@ -38,7 +38,7 @@ if __name__ == '__main__':
    suite.addTest(unittest.TestLoader().loadTestsFromTestCase(TestMediaScrapeEvents))

    # 测试订阅洗版匹配
-    suite.addTest(SubscribeChainTest('test_is_episode_range_covered'))
+    suite.addTest(SubscribeChainTest('test_is_episode_range_covered_matches_pending_episodes'))

    # 运行测试
    runner = unittest.TextTestRunner()
--- a/tests/test_subscribe_chain.py
+++ b/tests/test_subscribe_chain.py
@@ -773,3 +773,155 @@ class SubscribeChainTest(TestCase):
        self.assertEqual(subscribe.total_episode, 5)
        self.assertEqual(subscribe.lack_episode, 2)
        self.assertEqual(subscribe.current_priority, 0)
+
+    def test_best_version_interested_episodes_excludes_same_priority(self):
+        """同 pri_order 的候选不应再把已达到该优先级的集列为可升级集。
+
+        回归场景：E2 已记录在 episode_priority 中为 99，候选种子标题覆盖 E2/E3 且
+        其 pri_order=99；E2 不应进入 interested 集合，E3（None）则应进入。这是
+        洗版重复下载链路的源头判定，必须保持"严格大于"语义。
+        """
+        subscribe = self._build_subscribe(
+            total_episode=3,
+            episode_priority={"1": 100, "2": 99},
+            current_priority=100,
+        )
+        context = SimpleNamespace(
+            meta_info=SimpleNamespace(season_list=[1], episode_list=[2, 3]),
+            selected_episodes=None,
+        )
+
+        interested = SubscribeChain._SubscribeChain__get_best_version_interested_episodes(
+            subscribe=subscribe,
+            context=context,
+            priority=99,
+        )
+
+        self.assertEqual(interested, [3])
+
+    def test_best_version_interested_episodes_uses_title_episode_list_for_full_pack(self):
+        """整包候选（标题展开的集列表）只把仍可提升优先级的集纳入 interested。
+
+        防回归场景：标题显示"第53-104集"，实际目标范围只有 1..92，episode_priority
+        已经把 1..82 升到 100，E83 已经记到 99。同 pri_order=99 的同一资源再来时，
+        interested 应只剩 [84..92]，绝不能含 E83，否则后续下载层会再下一次同优先级。
+        """
+        subscribe = self._build_subscribe(
+            total_episode=92,
+            episode_priority={
+                **{str(ep): 100 for ep in range(1, 83)},
+                "83": 99,
+            },
+            current_priority=99,
+        )
+        context = SimpleNamespace(
+            meta_info=SimpleNamespace(season_list=[1], episode_list=list(range(53, 105))),
+            selected_episodes=None,
+        )
+
+        interested = SubscribeChain._SubscribeChain__get_best_version_interested_episodes(
+            subscribe=subscribe,
+            context=context,
+            priority=99,
+        )
+
+        self.assertEqual(interested, list(range(84, 93)))
+
+
+class SubscribeFilterAllowedEpisodesTest(TestCase):
+    """验证洗版过滤循环会把 interested 集合落到 context.allowed_episodes 上。
+
+    这条用例直接覆盖回归点：当 __get_best_version_interested_episodes 返回非空
+    集合时，候选必须带着允许集进入下载层，下游 batch_download 才能在标题元数据
+    与实际种子文件错位时做出正确取舍。
+    """
+
+    def _build_subscribe(self, **overrides):
+        return SubscribeChainTest()._build_subscribe(**overrides)
+
+    def test_filter_writes_allowed_episodes_to_context(self):
+        subscribe = self._build_subscribe(
+            total_episode=92,
+            episode_priority={
+                **{str(ep): 100 for ep in range(1, 83)},
+                "83": 99,
+            },
+            current_priority=99,
+        )
+        context = SimpleNamespace(
+            meta_info=SimpleNamespace(season_list=[1], episode_list=list(range(53, 105))),
+            selected_episodes=None,
+        )
+
+        interested = SubscribeChain._SubscribeChain__get_best_version_interested_episodes(
+            subscribe=subscribe,
+            context=context,
+            priority=99,
+        )
+        # 复刻 subscribe.py 过滤循环中的赋值，确认结果作为允许集传递。
+        context.allowed_episodes = set(interested) if interested else None
+
+        self.assertIsNotNone(context.allowed_episodes)
+        self.assertEqual(context.allowed_episodes, set(range(84, 93)))
+        # 关键回归点：E83 已达到 99，不在允许集内；下游交集后即不会再下 E83。
+        self.assertNotIn(83, context.allowed_episodes)
+
+    def test_filter_leaves_allowed_episodes_none_when_no_upgrade(self):
+        """同 pri_order 且目标集均已达到该优先级时，候选不应被放行，
+        相应地也不会有 allowed_episodes 被写入。"""
+        subscribe = self._build_subscribe(
+            total_episode=3,
+            episode_priority={"1": 100, "2": 99, "3": 99},
+            current_priority=99,
+        )
+        context = SimpleNamespace(
+            meta_info=SimpleNamespace(season_list=[1], episode_list=[2, 3]),
+            selected_episodes=None,
+        )
+
+        interested = SubscribeChain._SubscribeChain__get_best_version_interested_episodes(
+            subscribe=subscribe,
+            context=context,
+            priority=99,
+        )
+
+        self.assertEqual(interested, [])
+
+    def test_filter_writes_allowed_episodes_in_match_path(self):
+        """RSS/订阅刷新分支 match() 需要与 search() 对称地写入 allowed_episodes。
+
+        match() 路径下候选是 `_context = copy.copy(context)`，再走 best_version
+        判定。此用例复刻 match() 的过滤序列，验证浅拷贝后的 _context 在写入
+        allowed_episodes 时不会污染原始 context，且写入结果与 search() 一致。
+        若 match() 分支漏写 allowed_episodes，下游 batch_download 将看不到允许集
+        约束，回归到 2c458317 之前的同优先级重复下载状态。
+        """
+        import copy
+
+        subscribe = self._build_subscribe(
+            total_episode=92,
+            episode_priority={
+                **{str(ep): 100 for ep in range(1, 83)},
+                "83": 99,
+            },
+            current_priority=99,
+        )
+        original_context = SimpleNamespace(
+            meta_info=SimpleNamespace(season_list=[1], episode_list=list(range(53, 105))),
+            selected_episodes=None,
+            allowed_episodes=None,
+        )
+        _context = copy.copy(original_context)
+
+        interested = SubscribeChain._SubscribeChain__get_best_version_interested_episodes(
+            subscribe=subscribe,
+            context=_context,
+            priority=99,
+        )
+        # 复刻 match() 中的赋值；search() 与 match() 必须保持同形以避免分支漏改。
+        if interested:
+            _context.allowed_episodes = set(interested)
+
+        self.assertEqual(_context.allowed_episodes, set(range(84, 93)))
+        # 浅拷贝 + 新字段写入不应反向污染源 context（match() 中 contexts 缓存可能跨多次匹配复用）。
+        self.assertIsNone(original_context.allowed_episodes)