From cba52c57e6113a27efc2edc9c66f8dc5fc04cd53 Mon Sep 17 00:00:00 2001 From: jxxghp Date: Wed, 10 Jun 2026 08:46:10 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=20RAR=20=E5=AD=97=E5=B9=95?= =?UTF-8?q?=E5=8C=85=E4=B8=8B=E8=BD=BD=E8=AF=86=E5=88=AB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/chain/download.py | 24 ++++++++- app/modules/subtitle/__init__.py | 53 ++++++++++++------- app/utils/system.py | 78 ++++++++++++++++++++++++++++ docker/Dockerfile | 1 + docs/development-setup.md | 1 + tests/test_download_chain.py | 87 ++++++++++++++++++++++++++++++++ 6 files changed, 224 insertions(+), 20 deletions(-) diff --git a/app/chain/download.py b/app/chain/download.py index f1e70939..79b2c617 100644 --- a/app/chain/download.py +++ b/app/chain/download.py @@ -36,6 +36,11 @@ class DownloadChain(ChainBase): 下载处理链 """ + _SUBTITLE_ARCHIVE_FORMATS = { + ".zip": "zip", + ".rar": "rar", + } + @staticmethod def _safe_subtitle_file_name(file_name: str, fallback_name: str) -> str: """ @@ -51,7 +56,14 @@ class DownloadChain(ChainBase): """ 判断是否为字幕压缩包。 """ - return Path(file_name).suffix.lower() == ".zip" + return Path(file_name).suffix.lower() in DownloadChain._SUBTITLE_ARCHIVE_FORMATS + + @classmethod + def _subtitle_archive_format(cls, file_name: str) -> Optional[str]: + """ + 获取字幕压缩包格式。 + """ + return cls._SUBTITLE_ARCHIVE_FORMATS.get(Path(file_name).suffix.lower()) @staticmethod def _is_subtitle_file(file_name: str) -> bool: @@ -154,7 +166,15 @@ class DownloadChain(ChainBase): try: temp_file.write_bytes(response.content) if self._is_subtitle_archive(file_name): - shutil.unpack_archive(temp_file, temp_extract_dir, format='zip') + try: + SystemUtils.unpack_archive( + temp_file, + temp_extract_dir, + archive_format=self._subtitle_archive_format(file_name), + ) + except Exception as err: + logger.error(f"字幕压缩包解压失败:{temp_file} - {str(err)}") + return [] for sub_file in SystemUtils.list_files(temp_extract_dir, settings.RMT_SUBEXT): uploaded_path = self._upload_subtitle_file( storage_chain=storage_chain, diff --git a/app/modules/subtitle/__init__.py b/app/modules/subtitle/__init__.py index d305ccf3..1cad07c7 100644 --- a/app/modules/subtitle/__init__.py +++ b/app/modules/subtitle/__init__.py @@ -28,6 +28,11 @@ class SubtitleModule(_ModuleBase): 字幕下载模块 """ + _SUBTITLE_ARCHIVE_FORMATS = { + ".zip": "zip", + ".rar": "rar", + } + # 站点详情页字幕下载元素识别XPATH _SITE_SUBTITLE_XPATH = [ '//td[@class="rowhead"][text()="字幕"]/following-sibling::td//a[not(@class)]', @@ -233,40 +238,52 @@ class SubtitleModule(_ModuleBase): ua=torrent.site_ua, proxies=settings.PROXY if torrent.site_proxy else None, ) + settings.TEMP_PATH.mkdir(parents=True, exist_ok=True) for sublink in sublink_list: logger.info(f"找到字幕下载链接:{sublink},开始下载...") # 下载 ret = request.get_res(sublink) if ret and ret.status_code == 200: - # 保存ZIP file_name = TorrentHelper.get_url_filename(ret, sublink) if not file_name: logger.warn(f"链接不是字幕文件:{sublink}") continue - if file_name.lower().endswith(".zip"): - # ZIP包 - zip_file = settings.TEMP_PATH / file_name + archive_format = self._SUBTITLE_ARCHIVE_FORMATS.get(Path(file_name).suffix.lower()) + if archive_format: + archive_file = settings.TEMP_PATH / file_name # 保存 - zip_file.write_bytes(ret.content) + archive_file.write_bytes(ret.content) # 解压路径 - zip_path = zip_file.with_name(zip_file.stem) - # 解压文件 - shutil.unpack_archive(zip_file, zip_path, format='zip') - # 遍历转移文件 - for sub_file in SystemUtils.list_files(zip_path, settings.RMT_SUBEXT): - target_sub_file = Path(working_dir_item.path) / Path(sub_file.name) - if storageChain.get_file_item(storage, target_sub_file): - logger.info(f"字幕文件已存在:{target_sub_file}") - continue - logger.info(f"转移字幕 {sub_file} 到 {target_sub_file} ...") - storageChain.upload_file(working_dir_item, sub_file) + archive_path = archive_file.with_name(archive_file.stem) + try: + # 解压文件 + SystemUtils.unpack_archive( + archive_file, + archive_path, + archive_format=archive_format, + ) + # 遍历转移文件 + for sub_file in SystemUtils.list_files(archive_path, settings.RMT_SUBEXT): + target_sub_file = Path(working_dir_item.path) / Path(sub_file.name) + if storageChain.get_file_item(storage, target_sub_file): + logger.info(f"字幕文件已存在:{target_sub_file}") + continue + logger.info(f"转移字幕 {sub_file} 到 {target_sub_file} ...") + storageChain.upload_file(working_dir_item, sub_file) + except Exception as err: + logger.error(f"字幕压缩包解压失败:{archive_file} - {str(err)}") # 删除临时文件 try: - shutil.rmtree(zip_path) - zip_file.unlink() + if archive_path.exists(): + shutil.rmtree(archive_path) + if archive_file.exists(): + archive_file.unlink() except Exception as err: logger.error(f"删除临时文件失败:{str(err)}") else: + if Path(file_name).suffix.lower() not in settings.RMT_SUBEXT: + logger.warn(f"链接不是支持的字幕文件:{sublink} - {file_name}") + continue sub_file = settings.TEMP_PATH / file_name # 保存 sub_file.write_bytes(ret.content) diff --git a/app/utils/system.py b/app/utils/system.py index e4afd4c7..42731399 100644 --- a/app/utils/system.py +++ b/app/utils/system.py @@ -262,6 +262,84 @@ class SystemUtils: _scan_directory(directory, recursive) return files + @staticmethod + def unpack_archive(archive_file: Path, extract_dir: Path, archive_format: Optional[str] = None) -> None: + """ + 解压压缩包,并补充标准库未覆盖的 RAR 格式支持。 + + :param archive_file: 待解压的压缩包文件 + :param extract_dir: 解压目标目录 + :param archive_format: 压缩包格式,未指定时按文件后缀推断 + """ + if archive_format == "rar" or (not archive_format and archive_file.suffix.lower() == ".rar"): + SystemUtils.__unpack_rar_archive(archive_file, extract_dir) + return + shutil.unpack_archive(archive_file, extract_dir, format=archive_format) + + @staticmethod + def __unpack_rar_archive(archive_file: Path, extract_dir: Path) -> None: + """ + 调用系统解压工具处理 RAR 压缩包。 + """ + extract_dir.mkdir(parents=True, exist_ok=True) + commands = [] + if shutil.which("unar"): + commands.append([ + "unar", + "-quiet", + "-force-overwrite", + "-output-directory", + extract_dir.as_posix(), + archive_file.as_posix(), + ]) + if shutil.which("unrar"): + commands.append([ + "unrar", + "x", + "-o+", + "-idq", + archive_file.as_posix(), + f"{extract_dir.as_posix()}/", + ]) + if shutil.which("7z"): + commands.append([ + "7z", + "x", + "-y", + f"-o{extract_dir.as_posix()}", + archive_file.as_posix(), + ]) + if shutil.which("bsdtar"): + commands.append([ + "bsdtar", + "-xf", + archive_file.as_posix(), + "-C", + extract_dir.as_posix(), + ]) + if not commands: + raise RuntimeError("未找到可用的 RAR 解压工具,请安装 unar、unrar、7z 或 bsdtar") + + errors = [] + for command in commands: + try: + result = subprocess.run( + command, + check=False, + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + timeout=120, + ) + except Exception as err: + errors.append(f"{command[0]}:{str(err)}") + continue + if result.returncode == 0: + return + output = (result.stderr or result.stdout or "").strip() + errors.append(f"{command[0]}:{output or f'返回码 {result.returncode}'}") + raise RuntimeError(f"RAR 压缩包解压失败:{';'.join(errors)}") + @staticmethod def exits_files(directory: Path, extensions: list, min_filesize: int = 0, recursive: bool = True) -> bool: """ diff --git a/docker/Dockerfile b/docker/Dockerfile index 0f42173c..51d079b3 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -42,6 +42,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ netcat-openbsd \ lsof \ nano \ + unar \ libjemalloc2 \ && dpkg-reconfigure --frontend noninteractive tzdata \ && curl https://rclone.org/install.sh | bash \ diff --git a/docs/development-setup.md b/docs/development-setup.md index 132c355d..9e13fc41 100644 --- a/docs/development-setup.md +++ b/docs/development-setup.md @@ -9,6 +9,7 @@ - **Python 3.11 或更高版本** - **pip** (Python 包管理器) - **Git** (用于版本控制) +- **RAR 解压工具**:本地开发如需测试或使用 `.rar` 字幕包解压,请安装 `unar`、`unrar`、`7z` 或 `bsdtar` 之一;Docker 镜像会内置 `unar`。 Rust 加速扩展通过 `moviepilot-rust` PyPI 包安装,主项目本地开发不再需要 Rust toolchain。需要修改或发布 Rust 扩展时,请在 `MoviePilot-Rust` 仓库中构建。 diff --git a/tests/test_download_chain.py b/tests/test_download_chain.py index 07f64bdd..7c90d7cd 100644 --- a/tests/test_download_chain.py +++ b/tests/test_download_chain.py @@ -85,6 +85,20 @@ class _FakeSubtitleResponse: """ content = b"subtitle-content" + headers = {} + + +class _FakeSubtitleResponseWithHeader: + """ + 模拟带下载文件名响应头的字幕 API 响应。 + """ + + content = b"archive-content" + headers = { + "content-disposition": ( + 'attachment; filename="Hypnosis_AKA_Saimin_(1999)_480i_JAPANESE_NTSC_DVD_REMUX_MPEG-2_DD_2.0-MeeSta.rar"' + ) + } def test_download_single_submits_download_added_to_background(monkeypatch): @@ -176,6 +190,79 @@ def test_save_subtitle_response_creates_missing_temp_directory(monkeypatch, tmp_ assert storage_chain.uploaded_files +def test_save_subtitle_response_accepts_rar_filename_from_header(monkeypatch, tmp_path): + """ + PHP 下载链接应按响应头文件名识别 RAR 字幕压缩包,而不是按 URL 后缀误拒绝。 + """ + storage_chain = _FakeSubtitleStorageChain() + temp_path = tmp_path / "temp" + extracted_dir = temp_path / "Hypnosis_AKA_Saimin_(1999)_480i_JAPANESE_NTSC_DVD_REMUX_MPEG-2_DD_2.0-MeeSta" + extracted_subtitle = extracted_dir / "Hypnosis_AKA_Saimin_(1999).srt" + + def fake_unpack_archive(archive_file, extract_dir, archive_format=None): + assert archive_format == "rar" + assert archive_file.suffix == ".rar" + extract_dir.mkdir(parents=True, exist_ok=True) + extracted_subtitle.write_text("subtitle", encoding="utf-8") + + monkeypatch.setattr( + download_module, + "settings", + SimpleNamespace(TEMP_PATH=temp_path, RMT_SUBEXT=settings.RMT_SUBEXT), + ) + monkeypatch.setattr(download_module, "StorageChain", lambda: storage_chain) + monkeypatch.setattr(download_module.SystemUtils, "unpack_archive", fake_unpack_archive) + + chain = DownloadChain.__new__(DownloadChain) + subtitle = SubtitleInfo( + title="Hypnosis", + enclosure="https://audiences.me/downloadsubs.php?torrentid=666519&subid=2195", + ) + + saved_files = chain._save_subtitle_response( + subtitle=subtitle, + response=_FakeSubtitleResponseWithHeader(), + target_dir=Path("/downloads"), + ) + + assert saved_files == ["/downloads/Hypnosis_AKA_Saimin_(1999).srt"] + assert storage_chain.uploaded_files == [extracted_subtitle] + + +def test_save_subtitle_response_rejects_unsupported_filename_from_header(monkeypatch, tmp_path): + """ + 响应头文件名不是字幕或支持的压缩包时,应继续拒绝保存。 + """ + storage_chain = _FakeSubtitleStorageChain() + temp_path = tmp_path / "temp" + response = SimpleNamespace( + content=b"error", + headers={"content-disposition": 'attachment; filename="error.html"'}, + ) + + monkeypatch.setattr( + download_module, + "settings", + SimpleNamespace(TEMP_PATH=temp_path, RMT_SUBEXT=settings.RMT_SUBEXT), + ) + monkeypatch.setattr(download_module, "StorageChain", lambda: storage_chain) + + chain = DownloadChain.__new__(DownloadChain) + subtitle = SubtitleInfo( + title="Hypnosis", + enclosure="https://audiences.me/downloadsubs.php?torrentid=666519&subid=2195", + ) + + saved_files = chain._save_subtitle_response( + subtitle=subtitle, + response=response, + target_dir=Path("/downloads"), + ) + + assert saved_files == [] + assert storage_chain.uploaded_files == [] + + class _FakeBatchTorrentHelper: """ 为批量下载测试提供稳定排序和种子文件集数解析。