Merge pull request #5294 from cddjr/mteam_subtitle

2026-07-03 13:51:28 +08:00 · 2025-12-30 06:57:15 +08:00
parent 8c657ce41d 0b01a6aa91
commit 2c70f990c2
2 changed files with 209 additions and 71 deletions
--- a/app/modules/indexer/spider/mtorrent.py
+++ b/app/modules/indexer/spider/mtorrent.py
@@ -2,6 +2,7 @@ import base64
 import json
 import re
 from typing import Tuple, List, Optional
+from urllib.parse import urlparse

 from app.core.config import settings
 from app.db.systemconfig_oper import SystemConfigOper
@@ -25,6 +26,9 @@ class MTorrentSpider:
    _size = 100
    _searchurl = "https://api.%s/api/torrent/search"
    _downloadurl = "https://api.%s/api/torrent/genDlToken"
+    _subtitle_list_url = "https://api.%s/api/subtitle/list"
+    _subtitle_genlink_url = "https://api.%s/api/subtitle/genlink"
+    _subtitle_download_url ="https://api.%s/api/subtitle/dlV2?credential=%s"
    _pageurl = "%sdetail/%s"
    _timeout = 15

@@ -262,3 +266,110 @@ class MTorrentSpider:
        # base64编码
        base64_str = base64.b64encode(json.dumps(params).encode('utf-8')).decode('utf-8')
        return f"[{base64_str}]{url}"
+
+    def get_subtitle_links(self, page_url: str) -> List[str]:
+        """
+        获取指定页面的字幕下载链接
+
+        :param page_url: 种子详情页网址
+        :type page_url: str
+        :return: 字幕下载链接
+        :rtype: List[str]
+        """
+        if not page_url:
+            return []
+        # 从馒头的详情页网址中提取种子id
+        torrent_id = urlparse(page_url).path.rsplit("/", 1)[-1].strip()
+        if not torrent_id:
+            return []
+        return self.get_subtitle_links_by_id(torrent_id)
+
+    def get_subtitle_links_by_id(self, torrent_id: str) -> List[str]:
+        """
+        获取指定种子的字幕下载链接
+
+        :param torrent_id: 种子ID
+        :type torrent_id: str
+        :return: 字幕下载链接
+        :rtype: List[str]
+        """
+        results = []
+        try:
+            for subtitle_id in self.__subtitle_ids(torrent_id) or []:
+                if link := self.__subtitle_genlink(subtitle_id):
+                    results.append(link)
+        except Exception as e:
+            logger.error(f"{self._name} 获取字幕失败：{e}")
+        return results
+
+    def __subtitle_ids(self, torrent_id: str) -> Optional[List[str]]:
+        """
+        获取指定种子的字幕列表
+
+        :param torrent_id: 种子ID
+        :type torrent_id: str
+        :return: 字幕ID
+        :rtype: List[str] | None
+        """
+        url = self._subtitle_list_url % self._domain
+        # 发送请求
+        res = RequestUtils(
+            headers={
+                "Accept": "application/json, text/plain, */*",
+                "User-Agent": f"{self._ua}",
+                "x-api-key": self._apikey,
+            },
+            proxies=self._proxy,
+            timeout=self._timeout,
+        ).post_res(url, data={"id": torrent_id})
+        if res and res.status_code == 200:
+            result = res.json()
+            if int(result.get("code", -1)) == 0:
+                return [item["id"] for item in result.get("data", []) if "id" in item]
+            else:
+                logger.warn(
+                    f"{self._name} 获取字幕列表失败，返回：{result.get("message", "未知")}"
+                )
+                return None
+        elif res is not None:
+            logger.warn(f"{self._name} 获取字幕列表失败，错误码：{res.status_code}")
+            return None
+        else:
+            logger.warn(f"{self._name} 获取字幕列表失败，无法连接 {self._domain}")
+            return None
+
+    def __subtitle_genlink(self, subtitle_id: str) -> Optional[str]:
+        """
+        获取字幕下载链接
+
+        :param subtitle_id: 字幕ID
+        :type subtitle_id: str
+        :return: 下载链接
+        :rtype: str | None
+        """
+        url = self._subtitle_genlink_url % self._domain
+        # 发送请求
+        res = RequestUtils(
+            headers={
+                "Accept": "application/json, text/plain, */*",
+                "User-Agent": f"{self._ua}",
+                "x-api-key": self._apikey,
+            },
+            proxies=self._proxy,
+            timeout=self._timeout,
+        ).post_res(url, data={"id": subtitle_id})
+        if res and res.status_code == 200:
+            result = res.json()
+            if int(result.get("code", -1)) == 0 and isinstance(result.get("data"), str):
+                return self._subtitle_download_url % (self._domain, result["data"])
+            else:
+                logger.warn(
+                    f"{self._name} 获取字幕下载链接失败，返回：{result.get("message", "未知")}"
+                )
+                return None
+        elif res is not None:
+            logger.warn(f"{self._name} 获取字幕下载链接失败，错误码：{res.status_code}")
+            return None
+        else:
+            logger.warn(f"{self._name} 获取字幕下载链接失败，无法连接 {self._domain}")
+            return None
--- a/app/modules/subtitle/init.py
+++ b/app/modules/subtitle/init.py
@@ -8,9 +8,13 @@ from lxml import etree
 from app.chain.storage import StorageChain
 from app.core.config import settings
 from app.core.context import Context
+from app.db.site_oper import SiteOper
+from app.helper.sites import SitesHelper  # noqa
 from app.helper.torrent import TorrentHelper
 from app.log import logger
 from app.modules import _ModuleBase
+from app.modules.indexer.spider.mtorrent import MTorrentSpider
+from app.schemas import TorrentInfo
 from app.schemas.file import FileURI 
 from app.schemas.types import ModuleType, OtherModulesType
 from app.utils.http import RequestUtils
@@ -25,7 +29,9 @@ class SubtitleModule(_ModuleBase):

    # 站点详情页字幕下载链接识别XPATH
    _SITE_SUBTITLE_XPATH = [
+        '//td[@class="rowhead"][text()="字幕"]/following-sibling::td//a[not(@class)]/@href',
        '//td[@class="rowhead"][text()="字幕"]/following-sibling::td//a/@href',
+        '//div[contains(@class, "font-bold")][text()="字幕"]/following-sibling::div[1]//a[not(@class)]/@href', # 憨憨
    ]

    def init_module(self) -> None:
@@ -65,6 +71,54 @@ class SubtitleModule(_ModuleBase):
    def test(self):
        pass

+    def _get_subtitle_links(self, torrent: TorrentInfo):
+        """
+        获取字幕链接
+        """
+        # API请求方式的站点需要特殊处理
+        if torrent.site is not None:
+            site = SiteOper().get(torrent.site)
+            if indexer := SitesHelper().get_indexer(site.domain):
+                if indexer.get("parser") == "mTorrent":
+                    return MTorrentSpider(indexer).get_subtitle_links(
+                        torrent.page_url
+                    )
+                # TODO 其它采用API访问的站点
+        # 普通站点通过解析网站代码的方式获取
+        request = RequestUtils(cookies=torrent.site_cookie, ua=torrent.site_ua)
+        res = request.get_res(torrent.page_url)
+        if res and res.status_code == 200:
+            if not res.text:
+                logger.warn(f"读取页面代码失败：{torrent.page_url}")
+                return []
+            html = etree.HTML(res.text)
+            try:
+                sublink_list = []
+                for xpath in self._SITE_SUBTITLE_XPATH:
+                    sublinks = html.xpath(xpath)
+                    if sublinks:
+                        for sublink in sublinks:
+                            if not sublink:
+                                continue
+                            if not sublink.startswith("http"):
+                                base_url = StringUtils.get_base_url(torrent.page_url)
+                                if sublink.startswith("/"):
+                                    sublink = "%s%s" % (base_url, sublink)
+                                else:
+                                    sublink = "%s/%s" % (base_url, sublink)
+                            sublink_list.append(sublink)
+                        # 已成功获取了链接，后续xpath可以忽略
+                        break
+                return sublink_list
+            finally:
+                if html is not None:
+                    del html
+        elif res is not None:
+            logger.warn(f"连接 {torrent.page_url} 失败，状态码：{res.status_code}")
+        else:
+            logger.warn(f"无法打开链接：{torrent.page_url}")
+        return None
+
    def download_added(self, context: Context, download_dir: Path, torrent_content: Union[str, bytes] = None):
        """
        添加下载任务成功后，从站点下载字幕，保存到下载目录
@@ -117,83 +171,56 @@ class SubtitleModule(_ModuleBase):
            logger.error(f"下载目录不存在，无法保存字幕：{download_dir / folder_name}")
            return
        # 读取网站代码
+        sublink_list = self._get_subtitle_links(torrent)
+        if not sublink_list:
+            logger.warn(f"{torrent.page_url} 页面未找到字幕下载链接")
+            return
+        # 下载所有字幕文件
        request = RequestUtils(cookies=torrent.site_cookie, ua=torrent.site_ua)
-        res = request.get_res(torrent.page_url)
-        if res and res.status_code == 200:
-            if not res.text:
-                logger.warn(f"读取页面代码失败：{torrent.page_url}")
-                return
-            html = etree.HTML(res.text)
-            try:
-                sublink_list = []
-                for xpath in self._SITE_SUBTITLE_XPATH:
-                    sublinks = html.xpath(xpath)
-                    if sublinks:
-                        for sublink in sublinks:
-                            if not sublink:
-                                continue
-                            if not sublink.startswith("http"):
-                                base_url = StringUtils.get_base_url(torrent.page_url)
-                                if sublink.startswith("/"):
-                                    sublink = "%s%s" % (base_url, sublink)
-                                else:
-                                    sublink = "%s/%s" % (base_url, sublink)
-                            sublink_list.append(sublink)
-            finally:
-                if html is not None:
-                    del html
-            # 下载所有字幕文件
-            for sublink in sublink_list:
-                logger.info(f"找到字幕下载链接：{sublink}，开始下载...")
-                # 下载
-                ret = request.get_res(sublink)
-                if ret and ret.status_code == 200:
-                    # 保存ZIP
-                    file_name = TorrentHelper.get_url_filename(ret, sublink)
-                    if not file_name:
-                        logger.warn(f"链接不是字幕文件：{sublink}")
-                        continue
-                    if file_name.lower().endswith(".zip"):
-                        # ZIP包
-                        zip_file = settings.TEMP_PATH / file_name
-                        # 保存
-                        zip_file.write_bytes(ret.content)
-                        # 解压路径
-                        zip_path = zip_file.with_name(zip_file.stem)
-                        # 解压文件
-                        shutil.unpack_archive(zip_file, zip_path, format='zip')
-                        # 遍历转移文件
-                        for sub_file in SystemUtils.list_files(zip_path, settings.RMT_SUBEXT):
-                            target_sub_file = Path(working_dir_item.path) / Path(sub_file.name)
-                            if storageChain.get_file_item(storage, target_sub_file):
-                                logger.info(f"字幕文件已存在：{target_sub_file}")
-                                continue
-                            logger.info(f"转移字幕 {sub_file} 到 {target_sub_file} ...")
-                            storageChain.upload_file(working_dir_item, sub_file)
-                        # 删除临时文件
-                        try:
-                            shutil.rmtree(zip_path)
-                            zip_file.unlink()
-                        except Exception as err:
-                            logger.error(f"删除临时文件失败：{str(err)}")
-                    else:
-                        sub_file = settings.TEMP_PATH / file_name
-                        # 保存
-                        sub_file.write_bytes(ret.content)
+        for sublink in sublink_list:
+            logger.info(f"找到字幕下载链接：{sublink}，开始下载...")
+            # 下载
+            ret = request.get_res(sublink)
+            if ret and ret.status_code == 200:
+                # 保存ZIP
+                file_name = TorrentHelper.get_url_filename(ret, sublink)
+                if not file_name:
+                    logger.warn(f"链接不是字幕文件：{sublink}")
+                    continue
+                if file_name.lower().endswith(".zip"):
+                    # ZIP包
+                    zip_file = settings.TEMP_PATH / file_name
+                    # 保存
+                    zip_file.write_bytes(ret.content)
+                    # 解压路径
+                    zip_path = zip_file.with_name(zip_file.stem)
+                    # 解压文件
+                    shutil.unpack_archive(zip_file, zip_path, format='zip')
+                    # 遍历转移文件
+                    for sub_file in SystemUtils.list_files(zip_path, settings.RMT_SUBEXT):
                        target_sub_file = Path(working_dir_item.path) / Path(sub_file.name)
                        if storageChain.get_file_item(storage, target_sub_file):
                            logger.info(f"字幕文件已存在：{target_sub_file}")
                            continue
                        logger.info(f"转移字幕 {sub_file} 到 {target_sub_file} ...")
                        storageChain.upload_file(working_dir_item, sub_file)
+                    # 删除临时文件
+                    try:
+                        shutil.rmtree(zip_path)
+                        zip_file.unlink()
+                    except Exception as err:
+                        logger.error(f"删除临时文件失败：{str(err)}")
                else:
-                    logger.error(f"下载字幕文件失败：{sublink}")
-                    continue
-            if sublink_list:
-                logger.info(f"{torrent.page_url} 页面字幕下载完成")
+                    sub_file = settings.TEMP_PATH / file_name
+                    # 保存
+                    sub_file.write_bytes(ret.content)
+                    target_sub_file = Path(working_dir_item.path) / Path(sub_file.name)
+                    if storageChain.get_file_item(storage, target_sub_file):
+                        logger.info(f"字幕文件已存在：{target_sub_file}")
+                        continue
+                    logger.info(f"转移字幕 {sub_file} 到 {target_sub_file} ...")
+                    storageChain.upload_file(working_dir_item, sub_file)
            else:
-                logger.warn(f"{torrent.page_url} 页面未找到字幕下载链接")
-        elif res is not None:
-            logger.warn(f"连接 {torrent.page_url} 失败，状态码：{res.status_code}")
-        else:
-            logger.warn(f"无法打开链接：{torrent.page_url}")
+                logger.error(f"下载字幕文件失败：{sublink}")
+                continue
+        logger.info(f"{torrent.page_url} 页面字幕下载完成")