Compare commits

...

17 Commits

Author SHA1 Message Date
jxxghp
b8f4cd5fea v2.2.9
- 资源包升级以提升安全性
- 优化了页面数据刷新机制

注意:本次升级后会默认清理一次种子识别缓存
2025-02-14 19:35:49 +08:00
jxxghp
aa1557ad9e fix setup 2025-02-14 17:37:10 +08:00
jxxghp
f03da6daca fix setup 2025-02-14 17:17:16 +08:00
jxxghp
30eb4385d4 fix sites 2025-02-14 13:44:18 +08:00
jxxghp
4c9afcc1a8 fix 2025-02-14 13:32:20 +08:00
jxxghp
dd47432a45 fix 2025-02-14 12:55:32 +08:00
jxxghp
0ba6974bd6 fix #3843
fix #3829
2025-02-13 08:08:13 +08:00
jxxghp
827d8f6d84 add workflow framework 2025-02-12 17:49:01 +08:00
jxxghp
943a462c69 Merge pull request #3885 from InfinityPacer/feature/security 2025-02-11 17:21:04 +08:00
InfinityPacer
a1bc773fb5 feat(security): add AVIF support 2025-02-11 17:10:50 +08:00
InfinityPacer
ac169b7d22 feat(security): add cache default extension for files without suffix 2025-02-11 17:09:43 +08:00
jxxghp
eecbbfea3a 更新 version.py 2025-02-10 22:28:06 +08:00
jxxghp
635ddb044e add depends for DiscoverMediaSource 2025-02-10 22:05:56 +08:00
jxxghp
1a6123489d 更新 config.py 2025-02-10 07:52:40 +08:00
jxxghp
4e69195a8d Merge pull request #3876 from InfinityPacer/feature/security 2025-02-10 07:11:28 +08:00
InfinityPacer
e48c8ee652 Revert "fix is_safe_url"
This reverts commit 5e2ad34864.
2025-02-10 02:22:53 +08:00
InfinityPacer
7df07b86b9 feat(security): add cmvideo image for http with port 2025-02-10 02:19:08 +08:00
26 changed files with 203 additions and 802 deletions

25
app/actions/__init__.py Normal file
View File

@@ -0,0 +1,25 @@
from abc import ABC, abstractmethod
from pydantic.main import BaseModel
from app.schemas import ActionContext
class BaseAction(BaseModel, ABC):
"""
工作流动作基类
"""
@property
@abstractmethod
def name(self) -> str:
pass
@property
@abstractmethod
def description(self) -> str:
pass
@abstractmethod
async def execute(self, params: dict, context: ActionContext) -> ActionContext:
raise NotImplementedError

View File

@@ -2,7 +2,7 @@ from fastapi import APIRouter
from app.api.endpoints import login, user, site, message, webhook, subscribe, \
media, douban, search, plugin, tmdb, history, system, download, dashboard, \
transfer, mediaserver, bangumi, storage, discover, recommend
transfer, mediaserver, bangumi, storage, discover, recommend, workflow
api_router = APIRouter()
api_router.include_router(login.router, prefix="/login", tags=["login"])
@@ -26,3 +26,4 @@ api_router.include_router(mediaserver.router, prefix="/mediaserver", tags=["medi
api_router.include_router(bangumi.router, prefix="/bangumi", tags=["bangumi"])
api_router.include_router(discover.router, prefix="/discover", tags=["discover"])
api_router.include_router(recommend.router, prefix="/recommend", tags=["recommend"])
api_router.include_router(workflow.router, prefix="/workflow", tags=["workflow"])

View File

@@ -8,6 +8,7 @@ from pathlib import Path
from typing import Optional, Union
import aiofiles
import pillow_avif # noqa 用于自动注册AVIF支持
from PIL import Image
from fastapi import APIRouter, Depends, HTTPException, Header, Request, Response
from fastapi.responses import StreamingResponse
@@ -50,7 +51,6 @@ def fetch_image(
"""
处理图片缓存逻辑支持HTTP缓存和磁盘缓存
"""
if not url:
raise HTTPException(status_code=404, detail="URL not provided")
@@ -68,6 +68,10 @@ def fetch_image(
sanitized_path = SecurityUtils.sanitize_url_path(url)
cache_path = settings.CACHE_PATH / "images" / sanitized_path
# 没有文件类型,则添加后缀,在恶意文件类型和实际需求下的折衷选择
if not cache_path.suffix:
cache_path = cache_path.with_suffix(".jpg")
# 确保缓存路径和文件类型合法
if not SecurityUtils.is_safe_path(settings.CACHE_PATH, cache_path, settings.SECURITY_IMAGE_SUFFIXES):
raise HTTPException(status_code=400, detail="Invalid cache path or file type")
@@ -88,7 +92,8 @@ def fetch_image(
# 请求远程图片
referer = "https://movie.douban.com/" if "doubanio.com" in url else None
proxies = settings.PROXY if proxy else None
response = RequestUtils(ua=settings.USER_AGENT, proxies=proxies, referer=referer).get_res(url=url)
response = RequestUtils(ua=settings.USER_AGENT, proxies=proxies, referer=referer,
accept_type="image/avif,image/webp,image/apng,*/*").get_res(url=url)
if not response:
raise HTTPException(status_code=502, detail="Failed to fetch the image from the remote server")

View File

@@ -0,0 +1,3 @@
from fastapi import APIRouter
router = APIRouter()

View File

@@ -7,7 +7,6 @@ from pathlib import Path
from typing import Optional, Any, Tuple, List, Set, Union, Dict
from qbittorrentapi import TorrentFilesList
from ruamel.yaml import CommentedMap
from transmission_rpc import File
from app.core.config import settings
@@ -77,7 +76,7 @@ class ChainBase(metaclass=ABCMeta):
"""
cache_path = settings.TEMP_PATH / filename
if cache_path.exists():
Path(cache_path).unlink()
cache_path.unlink()
def run_module(self, method: str, *args, **kwargs) -> Any:
"""
@@ -308,7 +307,7 @@ class ChainBase(metaclass=ABCMeta):
"""
return self.run_module("search_collections", name=name)
def search_torrents(self, site: CommentedMap,
def search_torrents(self, site: dict,
keywords: List[str],
mtype: MediaType = None,
page: int = 0) -> List[TorrentInfo]:
@@ -323,7 +322,7 @@ class ChainBase(metaclass=ABCMeta):
return self.run_module("search_torrents", site=site, keywords=keywords,
mtype=mtype, page=page)
def refresh_torrents(self, site: CommentedMap) -> List[TorrentInfo]:
def refresh_torrents(self, site: dict) -> List[TorrentInfo]:
"""
获取站点最新一页的种子,多个站点需要多线程处理
:param site: 站点

View File

@@ -3,6 +3,7 @@ import tempfile
from pathlib import Path
from typing import List
import pillow_avif # noqa 用于自动注册AVIF支持
from PIL import Image
from app.chain import ChainBase
@@ -116,6 +117,10 @@ class RecommendChain(ChainBase, metaclass=Singleton):
sanitized_path = SecurityUtils.sanitize_url_path(url)
cache_path = settings.CACHE_PATH / "images" / sanitized_path
# 没有文件类型,则添加后缀,在恶意文件类型和实际需求下的折衷选择
if not cache_path.suffix:
cache_path = cache_path.with_suffix(".jpg")
# 确保缓存路径和文件类型合法
if not SecurityUtils.is_safe_path(settings.CACHE_PATH, cache_path, settings.SECURITY_IMAGE_SUFFIXES):
logger.debug(f"Invalid cache path or file type for URL: {url}, sanitized path: {sanitized_path}")

View File

@@ -6,7 +6,6 @@ from typing import Optional, Tuple, Union, Dict
from urllib.parse import urljoin
from lxml import etree
from ruamel.yaml import CommentedMap
from app.chain import ChainBase
from app.core.config import global_vars, settings
@@ -55,7 +54,7 @@ class SiteChain(ChainBase):
"yemapt.org": self.__yema_test,
}
def refresh_userdata(self, site: CommentedMap = None) -> Optional[SiteUserData]:
def refresh_userdata(self, site: dict = None) -> Optional[SiteUserData]:
"""
刷新站点的用户数据
:param site: 站点

View File

@@ -670,13 +670,10 @@ class TransferChain(ChainBase, metaclass=Singleton):
self.jobview.add_task(task, state=curr_task.state if curr_task else "waiting")
# 获取集数据
if not task.episodes_info and task.mediainfo.type == MediaType.TV:
if task.meta.begin_season is None:
task.meta.begin_season = 1
task.mediainfo.season = task.mediainfo.season or task.meta.begin_season
if task.mediainfo.type == MediaType.TV and not task.episodes_info:
task.episodes_info = self.tmdbchain.tmdb_episodes(
tmdbid=task.mediainfo.tmdb_id,
season=task.mediainfo.season
season=task.mediainfo.season or task.meta.begin_season or 1
)
# 查询整理目标目录

View File

@@ -247,7 +247,7 @@ class ConfigModel(BaseModel):
)
# 允许的图片文件后缀格式
SECURITY_IMAGE_SUFFIXES: List[str] = Field(
default_factory=lambda: [".jpg", ".jpeg", ".png", ".webp", ".gif", ".svg"]
default_factory=lambda: [".jpg", ".jpeg", ".png", ".webp", ".gif", ".svg", ".avif"]
)
# 重命名时支持的S0别名
RENAME_FORMAT_S0_NAMES: List[str] = Field(

24
app/core/workflow.py Normal file
View File

@@ -0,0 +1,24 @@
class WorkFlowManager:
"""
工作流管理器
"""
def __init__(self):
self.workflows = {}
def register(self, workflow):
"""
注册工作流
:param workflow: 工作流对象
:return:
"""
self.workflows[workflow.name] = workflow
def get_workflow(self, name):
"""
获取工作流
:param name: 工作流名称
:return:
"""
return self.workflows.get(name)

35
app/db/models/workflow.py Normal file
View File

@@ -0,0 +1,35 @@
from datetime import datetime
from sqlalchemy import Column, Integer, JSON, Sequence, String
from app.db import Base
class Workflow(Base):
"""
工作流表
"""
# ID
id = Column(Integer, Sequence('id'), primary_key=True, index=True)
# 名称
name = Column(String, index=True, nullable=False)
# 描述
description = Column(String)
# 定时器
timer = Column(String)
# 状态N-新建 R-运行中 P-暂停 S-成功 F-失败
state = Column(String, nullable=False, index=True, default='N')
# 当前执行动作
current_action = Column(String)
# 任务执行结果
result = Column(String)
# 已执行次数
run_count = Column(Integer, default=0)
# 任务列表
actions = Column(JSON, default=list)
# 执行上下文
context = Column(JSON, default=dict)
# 创建时间
add_time = Column(String, default=datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
# 最后执行时间
last_time = Column(String)

View File

@@ -1,17 +1,14 @@
from datetime import datetime
from typing import List, Optional, Tuple, Union
from ruamel.yaml import CommentedMap
from app.core.config import settings
from app.core.context import TorrentInfo
from app.db.site_oper import SiteOper
from app.helper.module import ModuleHelper
from app.helper.sites import SitesHelper
from app.helper.sites import SitesHelper, SiteSpider
from app.log import logger
from app.modules import _ModuleBase
from app.modules.indexer.parser import SiteParserBase
from app.modules.indexer.spider import TorrentSpider
from app.modules.indexer.spider.haidan import HaiDanSpider
from app.modules.indexer.spider.mtorrent import MTorrentSpider
from app.modules.indexer.spider.tnode import TNodeSpider
@@ -76,7 +73,7 @@ class IndexerModule(_ModuleBase):
def init_setting(self) -> Tuple[str, Union[str, bool]]:
pass
def search_torrents(self, site: CommentedMap,
def search_torrents(self, site: dict,
keywords: List[str] = None,
mtype: MediaType = None,
page: int = 0) -> List[TorrentInfo]:
@@ -204,7 +201,7 @@ class IndexerModule(_ModuleBase):
return __remove_duplicate(torrents)
@staticmethod
def __spider_search(indexer: CommentedMap,
def __spider_search(indexer: dict,
search_word: str = None,
mtype: MediaType = None,
page: int = 0) -> Tuple[bool, List[dict]]:
@@ -217,14 +214,14 @@ class IndexerModule(_ModuleBase):
:param: timeout: 超时时间
:return: 是否发生错误, 种子列表
"""
_spider = TorrentSpider(indexer=indexer,
mtype=mtype,
keyword=search_word,
page=page)
_spider = SiteSpider(indexer=indexer,
mtype=mtype,
keyword=search_word,
page=page)
return _spider.is_error, _spider.get_torrents()
def refresh_torrents(self, site: CommentedMap) -> Optional[List[TorrentInfo]]:
def refresh_torrents(self, site: dict) -> Optional[List[TorrentInfo]]:
"""
获取站点最新一页的种子,多个站点需要多线程处理
:param site: 站点
@@ -232,7 +229,7 @@ class IndexerModule(_ModuleBase):
"""
return self.search_torrents(site=site)
def refresh_userdata(self, site: CommentedMap) -> Optional[SiteUserData]:
def refresh_userdata(self, site: dict) -> Optional[SiteUserData]:
"""
刷新站点的用户数据
:param site: 站点

View File

@@ -1,742 +0,0 @@
import copy
import datetime
import re
import traceback
from typing import List
from urllib.parse import quote, urlencode, urlparse, parse_qs
from jinja2 import Template
from pyquery import PyQuery
from ruamel.yaml import CommentedMap
from app.core.config import settings
from app.helper.browser import PlaywrightHelper
from app.log import logger
from app.schemas.types import MediaType
from app.utils.http import RequestUtils
from app.utils.string import StringUtils
class TorrentSpider:
# 是否出现错误
is_error: bool = False
# 索引器ID
indexerid: int = None
# 索引器名称
indexername: str = None
# 站点域名
domain: str = None
# 站点Cookie
cookie: str = None
# 站点UA
ua: str = None
# Requests 代理
proxies: dict = None
# playwright 代理
proxy_server: dict = None
# 是否渲染
render: bool = False
# Referer
referer: str = None
# 搜索关键字
keyword: str = None
# 媒体类型
mtype: MediaType = None
# 搜索路径、方式配置
search: dict = {}
# 批量搜索配置
batch: dict = {}
# 浏览配置
browse: dict = {}
# 站点分类配置
category: dict = {}
# 站点种子列表配置
list: dict = {}
# 站点种子字段配置
fields: dict = {}
# 页码
page: int = 0
# 搜索条数, 默认: 100条
result_num: int = 100
# 单个种子信息
torrents_info: dict = {}
# 种子列表
torrents_info_array: list = []
# 搜索超时, 默认: 15秒
_timeout = 15
def __init__(self,
indexer: CommentedMap,
keyword: [str, list] = None,
page: int = 0,
referer: str = None,
mtype: MediaType = None):
"""
设置查询参数
:param indexer: 索引器
:param keyword: 搜索关键字,如果数组则为批量搜索
:param page: 页码
:param referer: Referer
:param mtype: 媒体类型
"""
if not indexer:
return
self.keyword = keyword
self.mtype = mtype
self.indexerid = indexer.get('id')
self.indexername = indexer.get('name')
self.search = indexer.get('search')
self.batch = indexer.get('batch')
self.browse = indexer.get('browse')
self.category = indexer.get('category')
self.list = indexer.get('torrents').get('list', {})
self.fields = indexer.get('torrents').get('fields')
self.render = indexer.get('render')
self.domain = indexer.get('domain')
self.result_num = int(indexer.get('result_num') or 100)
self._timeout = int(indexer.get('timeout') or 15)
self.page = page
if self.domain and not str(self.domain).endswith("/"):
self.domain = self.domain + "/"
if indexer.get('ua'):
self.ua = indexer.get('ua') or settings.USER_AGENT
else:
self.ua = settings.USER_AGENT
if indexer.get('proxy'):
self.proxies = settings.PROXY
self.proxy_server = settings.PROXY_SERVER
if indexer.get('cookie'):
self.cookie = indexer.get('cookie')
if referer:
self.referer = referer
self.torrents_info_array = []
def get_torrents(self) -> List[dict]:
"""
开始请求
"""
if not self.search or not self.domain:
return []
# 种子搜索相对路径
paths = self.search.get('paths', [])
torrentspath = ""
if len(paths) == 1:
torrentspath = paths[0].get('path', '')
else:
for path in paths:
if path.get("type") == "all" and not self.mtype:
torrentspath = path.get('path')
break
elif path.get("type") == "movie" and self.mtype == MediaType.MOVIE:
torrentspath = path.get('path')
break
elif path.get("type") == "tv" and self.mtype == MediaType.TV:
torrentspath = path.get('path')
break
# 精确搜索
if self.keyword:
if isinstance(self.keyword, list):
# 批量查询
if self.batch:
delimiter = self.batch.get('delimiter') or ' '
space_replace = self.batch.get('space_replace') or ' '
search_word = delimiter.join([str(k).replace(' ',
space_replace) for k in self.keyword])
else:
search_word = " ".join(self.keyword)
# 查询模式:或
search_mode = "1"
else:
# 单个查询
search_word = self.keyword
# 查询模式与
search_mode = "0"
# 搜索URL
indexer_params = self.search.get("params", {}).copy()
if indexer_params:
search_area = indexer_params.get('search_area')
# search_area非0表示支持imdbid搜索
if (search_area and
(not self.keyword or not self.keyword.startswith('tt'))):
# 支持imdbid搜索但关键字不是imdbid时不启用imdbid搜索
indexer_params.pop('search_area')
# 变量字典
inputs_dict = {
"keyword": search_word
}
# 查询参数,默认查询标题
params = {
"search_mode": search_mode,
"search_area": 0,
"page": self.page or 0,
"notnewword": 1
}
# 额外参数
for key, value in indexer_params.items():
params.update({
"%s" % key: str(value).format(**inputs_dict)
})
# 分类条件
if self.category:
if self.mtype == MediaType.TV:
cats = self.category.get("tv") or []
elif self.mtype == MediaType.MOVIE:
cats = self.category.get("movie") or []
else:
cats = (self.category.get("movie") or []) + (self.category.get("tv") or [])
for cat in cats:
if self.category.get("field"):
value = params.get(self.category.get("field"), "")
params.update({
"%s" % self.category.get("field"): value + self.category.get("delimiter",
' ') + cat.get("id")
})
else:
params.update({
"cat%s" % cat.get("id"): 1
})
searchurl = self.domain + torrentspath + "?" + urlencode(params)
else:
# 变量字典
inputs_dict = {
"keyword": quote(search_word),
"page": self.page or 0
}
# 无额外参数
searchurl = self.domain + str(torrentspath).format(**inputs_dict)
# 列表浏览
else:
# 变量字典
inputs_dict = {
"page": self.page or 0,
"keyword": ""
}
# 有单独浏览路径
if self.browse:
torrentspath = self.browse.get("path")
if self.browse.get("start"):
start_page = int(self.browse.get("start")) + int(self.page or 0)
inputs_dict.update({
"page": start_page
})
elif self.page:
torrentspath = torrentspath + f"?page={self.page}"
# 搜索Url
searchurl = self.domain + str(torrentspath).format(**inputs_dict)
logger.info(f"开始请求:{searchurl}")
if self.render:
# 浏览器仿真
page_source = PlaywrightHelper().get_page_source(
url=searchurl,
cookies=self.cookie,
ua=self.ua,
proxies=self.proxy_server,
timeout=self._timeout
)
else:
# requests请求
ret = RequestUtils(
ua=self.ua,
cookies=self.cookie,
timeout=self._timeout,
referer=self.referer,
proxies=self.proxies
).get_res(searchurl, allow_redirects=True)
page_source = RequestUtils.get_decoded_html_content(ret,
settings.ENCODING_DETECTION_PERFORMANCE_MODE,
settings.ENCODING_DETECTION_MIN_CONFIDENCE)
# 解析
return self.parse(page_source)
def __get_title(self, torrent):
# title default text
if 'title' not in self.fields:
return
selector = self.fields.get('title', {})
if 'selector' in selector:
title = torrent(selector.get('selector', '')).clone()
self.__remove(title, selector)
items = self.__attribute_or_text(title, selector)
self.torrents_info['title'] = self.__index(items, selector)
elif 'text' in selector:
render_dict = {}
if "title_default" in self.fields:
title_default_selector = self.fields.get('title_default', {})
title_default_item = torrent(title_default_selector.get('selector', '')).clone()
self.__remove(title_default_item, title_default_selector)
items = self.__attribute_or_text(title_default_item, selector)
title_default = self.__index(items, title_default_selector)
render_dict.update({'title_default': title_default})
if "title_optional" in self.fields:
title_optional_selector = self.fields.get('title_optional', {})
title_optional_item = torrent(title_optional_selector.get('selector', '')).clone()
self.__remove(title_optional_item, title_optional_selector)
items = self.__attribute_or_text(title_optional_item, title_optional_selector)
title_optional = self.__index(items, title_optional_selector)
render_dict.update({'title_optional': title_optional})
self.torrents_info['title'] = Template(selector.get('text')).render(fields=render_dict)
self.torrents_info['title'] = self.__filter_text(self.torrents_info.get('title'),
selector.get('filters'))
def __get_description(self, torrent):
# title optional text
if 'description' not in self.fields:
return
selector = self.fields.get('description', {})
if "selector" in selector \
or "selectors" in selector:
description = torrent(selector.get('selector', selector.get('selectors', ''))).clone()
if description:
self.__remove(description, selector)
items = self.__attribute_or_text(description, selector)
self.torrents_info['description'] = self.__index(items, selector)
elif "text" in selector:
render_dict = {}
if "tags" in self.fields:
tags_selector = self.fields.get('tags', {})
tags_item = torrent(tags_selector.get('selector', '')).clone()
self.__remove(tags_item, tags_selector)
items = self.__attribute_or_text(tags_item, tags_selector)
tag = self.__index(items, tags_selector)
render_dict.update({'tags': tag})
if "subject" in self.fields:
subject_selector = self.fields.get('subject', {})
subject_item = torrent(subject_selector.get('selector', '')).clone()
self.__remove(subject_item, subject_selector)
items = self.__attribute_or_text(subject_item, subject_selector)
subject = self.__index(items, subject_selector)
render_dict.update({'subject': subject})
if "description_free_forever" in self.fields:
description_free_forever_selector = self.fields.get("description_free_forever", {})
description_free_forever_item = torrent(description_free_forever_selector.get("selector", '')).clone()
self.__remove(description_free_forever_item, description_free_forever_selector)
items = self.__attribute_or_text(description_free_forever_item, description_free_forever_selector)
description_free_forever = self.__index(items, description_free_forever_selector)
render_dict.update({"description_free_forever": description_free_forever})
if "description_normal" in self.fields:
description_normal_selector = self.fields.get("description_normal", {})
description_normal_item = torrent(description_normal_selector.get("selector", '')).clone()
self.__remove(description_normal_item, description_normal_selector)
items = self.__attribute_or_text(description_normal_item, description_normal_selector)
description_normal = self.__index(items, description_normal_selector)
render_dict.update({"description_normal": description_normal})
self.torrents_info['description'] = Template(selector.get('text')).render(fields=render_dict)
self.torrents_info['description'] = self.__filter_text(self.torrents_info.get('description'),
selector.get('filters'))
def __get_detail(self, torrent):
# details page text
if 'details' not in self.fields:
return
selector = self.fields.get('details', {})
details = torrent(selector.get('selector', '')).clone()
self.__remove(details, selector)
items = self.__attribute_or_text(details, selector)
item = self.__index(items, selector)
detail_link = self.__filter_text(item, selector.get('filters'))
if detail_link:
if not detail_link.startswith("http"):
if detail_link.startswith("//"):
self.torrents_info['page_url'] = self.domain.split(":")[0] + ":" + detail_link
elif detail_link.startswith("/"):
self.torrents_info['page_url'] = self.domain + detail_link[1:]
else:
self.torrents_info['page_url'] = self.domain + detail_link
else:
self.torrents_info['page_url'] = detail_link
def __get_download(self, torrent):
# download link text
if 'download' not in self.fields:
return
selector = self.fields.get('download', {})
download = torrent(selector.get('selector', '')).clone()
self.__remove(download, selector)
items = self.__attribute_or_text(download, selector)
item = self.__index(items, selector)
download_link = self.__filter_text(item, selector.get('filters'))
if download_link:
if not download_link.startswith("http") \
and not download_link.startswith("magnet"):
_scheme, _domain = StringUtils.get_url_netloc(self.domain)
if _domain in download_link:
if download_link.startswith("/"):
self.torrents_info['enclosure'] = f"{_scheme}:{download_link}"
else:
self.torrents_info['enclosure'] = f"{_scheme}://{download_link}"
else:
if download_link.startswith("/"):
self.torrents_info['enclosure'] = f"{self.domain}{download_link[1:]}"
else:
self.torrents_info['enclosure'] = f"{self.domain}{download_link}"
else:
self.torrents_info['enclosure'] = download_link
def __get_imdbid(self, torrent):
# imdbid
if "imdbid" not in self.fields:
return
selector = self.fields.get('imdbid', {})
imdbid = torrent(selector.get('selector', '')).clone()
self.__remove(imdbid, selector)
items = self.__attribute_or_text(imdbid, selector)
item = self.__index(items, selector)
self.torrents_info['imdbid'] = item
self.torrents_info['imdbid'] = self.__filter_text(self.torrents_info.get('imdbid'),
selector.get('filters'))
def __get_size(self, torrent):
# torrent size int
if 'size' not in self.fields:
return
selector = self.fields.get('size', {})
size = torrent(selector.get('selector', selector.get("selectors", ''))).clone()
self.__remove(size, selector)
items = self.__attribute_or_text(size, selector)
item = self.__index(items, selector)
if item:
size_val = item.replace("\n", "").strip()
size_val = self.__filter_text(size_val,
selector.get('filters'))
self.torrents_info['size'] = StringUtils.num_filesize(size_val)
else:
self.torrents_info['size'] = 0
def __get_leechers(self, torrent):
# torrent leechers int
if 'leechers' not in self.fields:
return
selector = self.fields.get('leechers', {})
leechers = torrent(selector.get('selector', '')).clone()
self.__remove(leechers, selector)
items = self.__attribute_or_text(leechers, selector)
item = self.__index(items, selector)
if item:
peers_val = item.split("/")[0]
peers_val = peers_val.replace(",", "")
peers_val = self.__filter_text(peers_val,
selector.get('filters'))
self.torrents_info['peers'] = int(peers_val) if peers_val and peers_val.isdigit() else 0
else:
self.torrents_info['peers'] = 0
def __get_seeders(self, torrent):
# torrent leechers int
if 'seeders' not in self.fields:
return
selector = self.fields.get('seeders', {})
seeders = torrent(selector.get('selector', '')).clone()
self.__remove(seeders, selector)
items = self.__attribute_or_text(seeders, selector)
item = self.__index(items, selector)
if item:
seeders_val = item.split("/")[0]
seeders_val = seeders_val.replace(",", "")
seeders_val = self.__filter_text(seeders_val,
selector.get('filters'))
self.torrents_info['seeders'] = int(seeders_val) if seeders_val and seeders_val.isdigit() else 0
else:
self.torrents_info['seeders'] = 0
def __get_grabs(self, torrent):
# torrent grabs int
if 'grabs' not in self.fields:
return
selector = self.fields.get('grabs', {})
grabs = torrent(selector.get('selector', '')).clone()
self.__remove(grabs, selector)
items = self.__attribute_or_text(grabs, selector)
item = self.__index(items, selector)
if item:
grabs_val = item.split("/")[0]
grabs_val = grabs_val.replace(",", "")
grabs_val = self.__filter_text(grabs_val,
selector.get('filters'))
self.torrents_info['grabs'] = int(grabs_val) if grabs_val and grabs_val.isdigit() else 0
else:
self.torrents_info['grabs'] = 0
def __get_pubdate(self, torrent):
# torrent pubdate yyyy-mm-dd hh:mm:ss
if 'date_added' not in self.fields:
return
selector = self.fields.get('date_added', {})
pubdate = torrent(selector.get('selector', '')).clone()
self.__remove(pubdate, selector)
items = self.__attribute_or_text(pubdate, selector)
pubdate_str = self.__index(items, selector)
if pubdate_str:
pubdate_str = pubdate_str.replace('\n', ' ').strip()
self.torrents_info['pubdate'] = self.__filter_text(pubdate_str,
selector.get('filters'))
def __get_date_elapsed(self, torrent):
# torrent data elaspsed text
if 'date_elapsed' not in self.fields:
return
selector = self.fields.get('date_elapsed', {})
date_elapsed = torrent(selector.get('selector', '')).clone()
self.__remove(date_elapsed, selector)
items = self.__attribute_or_text(date_elapsed, selector)
self.torrents_info['date_elapsed'] = self.__index(items, selector)
self.torrents_info['date_elapsed'] = self.__filter_text(self.torrents_info.get('date_elapsed'),
selector.get('filters'))
def __get_downloadvolumefactor(self, torrent):
# downloadvolumefactor int
selector = self.fields.get('downloadvolumefactor', {})
if not selector:
return
self.torrents_info['downloadvolumefactor'] = 1
if 'case' in selector:
for downloadvolumefactorselector in list(selector.get('case', {}).keys()):
downloadvolumefactor = torrent(downloadvolumefactorselector)
if len(downloadvolumefactor) > 0:
self.torrents_info['downloadvolumefactor'] = selector.get('case', {}).get(
downloadvolumefactorselector)
break
elif "selector" in selector:
downloadvolume = torrent(selector.get('selector', '')).clone()
self.__remove(downloadvolume, selector)
items = self.__attribute_or_text(downloadvolume, selector)
item = self.__index(items, selector)
if item:
downloadvolumefactor = re.search(r'(\d+\.?\d*)', item)
if downloadvolumefactor:
self.torrents_info['downloadvolumefactor'] = int(downloadvolumefactor.group(1))
def __get_uploadvolumefactor(self, torrent):
# uploadvolumefactor int
selector = self.fields.get('uploadvolumefactor', {})
if not selector:
return
self.torrents_info['uploadvolumefactor'] = 1
if 'case' in selector:
for uploadvolumefactorselector in list(selector.get('case', {}).keys()):
uploadvolumefactor = torrent(uploadvolumefactorselector)
if len(uploadvolumefactor) > 0:
self.torrents_info['uploadvolumefactor'] = selector.get('case', {}).get(
uploadvolumefactorselector)
break
elif "selector" in selector:
uploadvolume = torrent(selector.get('selector', '')).clone()
self.__remove(uploadvolume, selector)
items = self.__attribute_or_text(uploadvolume, selector)
item = self.__index(items, selector)
if item:
uploadvolumefactor = re.search(r'(\d+\.?\d*)', item)
if uploadvolumefactor:
self.torrents_info['uploadvolumefactor'] = int(uploadvolumefactor.group(1))
def __get_labels(self, torrent):
# labels ['label1', 'label2']
if 'labels' not in self.fields:
return
selector = self.fields.get('labels', {})
labels = torrent(selector.get("selector", "")).clone()
self.__remove(labels, selector)
items = self.__attribute_or_text(labels, selector)
if items:
self.torrents_info['labels'] = [item for item in items if item]
else:
self.torrents_info['labels'] = []
def __get_free_date(self, torrent):
# free date yyyy-mm-dd hh:mm:ss
if 'freedate' not in self.fields:
return
selector = self.fields.get('freedate', {})
freedate = torrent(selector.get('selector', '')).clone()
self.__remove(freedate, selector)
items = self.__attribute_or_text(freedate, selector)
self.torrents_info['freedate'] = self.__index(items, selector)
self.torrents_info['freedate'] = self.__filter_text(self.torrents_info.get('freedate'),
selector.get('filters'))
def __get_hit_and_run(self, torrent):
# hitandrun True/False
if 'hr' not in self.fields:
return
selector = self.fields.get('hr', {})
hit_and_run = torrent(selector.get('selector', ''))
if hit_and_run:
self.torrents_info['hit_and_run'] = True
else:
self.torrents_info['hit_and_run'] = False
def __get_category(self, torrent):
# category 电影/电视剧
if 'category' not in self.fields:
return
selector = self.fields.get('category', {})
category = torrent(selector.get('selector', '')).clone()
self.__remove(category, selector)
items = self.__attribute_or_text(category, selector)
category_value = self.__index(items, selector)
category_value = self.__filter_text(category_value,
selector.get('filters'))
if category_value and self.category:
tv_cats = [str(cat.get("id")) for cat in self.category.get("tv") or []]
movie_cats = [str(cat.get("id")) for cat in self.category.get("movie") or []]
if category_value in tv_cats \
and category_value not in movie_cats:
self.torrents_info['category'] = MediaType.TV.value
elif category_value in movie_cats:
self.torrents_info['category'] = MediaType.MOVIE.value
else:
self.torrents_info['category'] = MediaType.UNKNOWN.value
else:
self.torrents_info['category'] = MediaType.UNKNOWN.value
def get_info(self, torrent) -> dict:
"""
解析单条种子数据
"""
self.torrents_info = {}
try:
# 标题
self.__get_title(torrent)
# 描述
self.__get_description(torrent)
# 详情页面
self.__get_detail(torrent)
# 下载链接
self.__get_download(torrent)
# 完成数
self.__get_grabs(torrent)
# 下载数
self.__get_leechers(torrent)
# 做种数
self.__get_seeders(torrent)
# 大小
self.__get_size(torrent)
# IMDBID
self.__get_imdbid(torrent)
# 下载系数
self.__get_downloadvolumefactor(torrent)
# 上传系数
self.__get_uploadvolumefactor(torrent)
# 发布时间
self.__get_pubdate(torrent)
# 已发布时间
self.__get_date_elapsed(torrent)
# 免费载止时间
self.__get_free_date(torrent)
# 标签
self.__get_labels(torrent)
# HR
self.__get_hit_and_run(torrent)
# 分类
self.__get_category(torrent)
except Exception as err:
logger.error("%s 搜索出现错误:%s" % (self.indexername, str(err)))
return self.torrents_info
@staticmethod
def __filter_text(text: str, filters: list):
"""
对文件进行处理
"""
if not text or not filters or not isinstance(filters, list):
return text
if not isinstance(text, str):
text = str(text)
for filter_item in filters:
if not text:
break
method_name = filter_item.get("name")
try:
args = filter_item.get("args")
if method_name == "re_search" and isinstance(args, list):
rematch = re.search(r"%s" % args[0], text)
if rematch:
text = rematch.group(args[-1])
elif method_name == "split" and isinstance(args, list):
text = text.split(r"%s" % args[0])[args[-1]]
elif method_name == "replace" and isinstance(args, list):
text = text.replace(r"%s" % args[0], r"%s" % args[-1])
elif method_name == "dateparse" and isinstance(args, str):
text = text.replace("\n", " ").strip()
text = datetime.datetime.strptime(text, r"%s" % args)
elif method_name == "strip":
text = text.strip()
elif method_name == "appendleft":
text = f"{args}{text}"
elif method_name == "querystring":
parsed_url = urlparse(str(text))
query_params = parse_qs(parsed_url.query)
param_value = query_params.get(args)
text = param_value[0] if param_value else ''
except Exception as err:
logger.debug(f'过滤器 {method_name} 处理失败:{str(err)} - {traceback.format_exc()}')
return text.strip()
@staticmethod
def __remove(item, selector):
"""
移除元素
"""
if selector and "remove" in selector:
removelist = selector.get('remove', '').split(', ')
for v in removelist:
item.remove(v)
@staticmethod
def __attribute_or_text(item, selector: dict):
if not selector:
return item
if not item:
return []
if 'attribute' in selector:
items = [i.attr(selector.get('attribute')) for i in item.items() if i]
else:
items = [i.text() for i in item.items() if i]
return items
@staticmethod
def __index(items: list, selector: dict):
if not items:
return None
if selector:
if "contents" in selector \
and len(items) > int(selector.get("contents")):
items = items[0].split("\n")[selector.get("contents")]
elif "index" in selector \
and len(items) > int(selector.get("index")):
items = items[int(selector.get("index"))]
if isinstance(items, list):
items = items[0]
return items
def parse(self, html_text: str) -> List[dict]:
"""
解析整个页面
"""
if not html_text:
self.is_error = True
return []
# 清空旧结果
self.torrents_info_array = []
try:
# 解析站点文本对象
html_doc = PyQuery(html_text)
# 种子筛选器
torrents_selector = self.list.get('selector', '')
# 遍历种子html列表
for torn in html_doc(torrents_selector):
self.torrents_info_array.append(copy.deepcopy(self.get_info(PyQuery(torn))))
if len(self.torrents_info_array) >= int(self.result_num):
break
return self.torrents_info_array
except Exception as err:
self.is_error = True
logger.warn(f"错误:{self.indexername} {str(err)}")

View File

@@ -1,8 +1,6 @@
import urllib.parse
from typing import Tuple, List
from ruamel.yaml import CommentedMap
from app.core.config import settings
from app.db.systemconfig_oper import SystemConfigOper
from app.log import logger
@@ -51,7 +49,7 @@ class HaiDanSpider:
"7": 1
}
def __init__(self, indexer: CommentedMap):
def __init__(self, indexer: dict):
self.systemconfig = SystemConfigOper()
if indexer:
self._indexerid = indexer.get('id')

View File

@@ -3,8 +3,6 @@ import json
import re
from typing import Tuple, List
from ruamel.yaml import CommentedMap
from app.core.config import settings
from app.db.systemconfig_oper import SystemConfigOper
from app.log import logger
@@ -51,7 +49,7 @@ class MTorrentSpider:
"7": "DIY 国配 中字"
}
def __init__(self, indexer: CommentedMap):
def __init__(self, indexer: dict):
self.systemconfig = SystemConfigOper()
if indexer:
self._indexerid = indexer.get('id')

View File

@@ -1,8 +1,6 @@
import re
from typing import Tuple, List
from ruamel.yaml import CommentedMap
from app.core.config import settings
from app.log import logger
from app.utils.http import RequestUtils
@@ -23,7 +21,7 @@ class TNodeSpider:
_downloadurl = "%sapi/torrent/download/%s"
_pageurl = "%storrent/info/%s"
def __init__(self, indexer: CommentedMap):
def __init__(self, indexer: dict):
if indexer:
self._indexerid = indexer.get('id')
self._domain = indexer.get('domain')

View File

@@ -1,8 +1,6 @@
from typing import List, Tuple
from urllib.parse import quote
from ruamel.yaml import CommentedMap
from app.core.config import settings
from app.log import logger
from app.utils.http import RequestUtils
@@ -19,7 +17,7 @@ class TorrentLeech:
_pageurl = "%storrent/%s"
_timeout = 15
def __init__(self, indexer: CommentedMap):
def __init__(self, indexer: dict):
self._indexer = indexer
if indexer.get('proxy'):
self._proxy = settings.PROXY

View File

@@ -1,7 +1,5 @@
from typing import Tuple, List
from ruamel.yaml import CommentedMap
from app.core.config import settings
from app.db.systemconfig_oper import SystemConfigOper
from app.log import logger
@@ -46,7 +44,7 @@ class YemaSpider:
"12": "完结",
}
def __init__(self, indexer: CommentedMap):
def __init__(self, indexer: dict):
self.systemconfig = SystemConfigOper()
if indexer:
self._indexerid = indexer.get('id')

View File

@@ -19,3 +19,4 @@ from .file import *
from .exception import *
from .system import *
from .event import *
from .workflow import *

View File

@@ -244,6 +244,7 @@ class DiscoverMediaSource(BaseModel):
api_path: str = Field(..., description="媒体数据源API地址")
filter_params: Optional[Dict[str, Any]] = Field(default=None, description="过滤参数")
filter_ui: Optional[List[dict]] = Field(default=[], description="过滤参数UI配置")
depends: Optional[Dict[str, list]] = Field(default=None, description="UI依赖关系字典")
class DiscoverSourceEventData(ChainEventData):

35
app/schemas/workflow.py Normal file
View File

@@ -0,0 +1,35 @@
from abc import ABC, abstractmethod
from typing import Optional
from pydantic import BaseModel, Field
class Workflow(BaseModel):
"""
工作流信息
"""
name: Optional[str] = Field(None, description="工作流名称")
description: Optional[str] = Field(None, description="工作流描述")
timer: Optional[str] = Field(None, description="定时器")
state: Optional[str] = Field(None, description="状态")
current_action: Optional[str] = Field(None, description="当前执行动作")
result: Optional[str] = Field(None, description="任务执行结果")
run_count: Optional[int] = Field(0, description="已执行次数")
actions: Optional[list] = Field([], description="任务列表")
add_time: Optional[str] = Field(None, description="创建时间")
last_time: Optional[str] = Field(None, description="最后执行时间")
class Action(BaseModel):
"""
动作信息
"""
name: Optional[str] = Field(None, description="动作名称")
description: Optional[str] = Field(None, description="动作描述")
class ActionContext(BaseModel, ABC):
"""
动作上下文
"""
pass

View File

@@ -42,7 +42,7 @@ class SecurityUtils:
@staticmethod
def is_safe_url(url: str, allowed_domains: Union[Set[str], List[str]], strict: bool = False) -> bool:
"""
验证URL是否在允许的域名列表中包括带有端口的域名
验证URL是否在允许的域名列表中包括带有端口的域名
:param url: 需要验证的 URL
:param allowed_domains: 允许的域名集合,域名可以包含端口
@@ -65,7 +65,6 @@ class SecurityUtils:
netloc = parsed_url.netloc.lower()
if not netloc:
return False
netloc_no_port = netloc.split(":")[0]
# 检查每个允许的域名
allowed_domains = {d.lower() for d in allowed_domains}
@@ -79,7 +78,7 @@ class SecurityUtils:
return True
else:
# 非严格模式下,允许子域名匹配
if netloc_no_port == allowed_netloc or netloc_no_port.endswith('.' + allowed_netloc):
if netloc == allowed_netloc or netloc.endswith('.' + allowed_netloc):
return True
return False

View File

@@ -0,0 +1,24 @@
"""2.1.1
Revision ID: 279a949d81b6
Revises: ca5461f314f2
Create Date: 2025-02-14 19:02:24.989349
"""
from app.chain.torrents import TorrentsChain
# revision identifiers, used by Alembic.
revision = '279a949d81b6'
down_revision = 'ca5461f314f2'
branch_labels = None
depends_on = None
def upgrade() -> None:
# 清理一次缓存
TorrentsChain().clear_torrents()
def downgrade() -> None:
pass

View File

@@ -32,6 +32,7 @@ func_timeout==4.3.5
bs4~=0.0.1
beautifulsoup4~=4.12.2
pillow~=10.4.0
pillow-avif-plugin~=1.4.6
pyTelegramBotAPI~=4.12.0
playwright~=1.37.0
cf-clearance~=0.31.0

View File

@@ -1,20 +1,22 @@
from distutils.core import setup
from Cython.Build import cythonize
module_list = ['app/helper/sites.py']
setup(
name="",
author="",
zip_safe=False,
include_package_data=True,
ext_modules=cythonize(
module_list=module_list,
nthreads=0,
compiler_directives={"language_level": "3"},
),
script_args=["build_ext", "-j", '2', "--inplace"],
)
name="MoviePilot",
author="jxxghp",
zip_safe=False,
include_package_data=True,
ext_modules=cythonize(
module_list=module_list,
nthreads=0,
compiler_directives={
"language_level": "3",
"binding": False,
"nonecheck": False
},
),
script_args=["build_ext", "-j", '2', "--inplace"],
)

View File

@@ -1,2 +1,2 @@
APP_VERSION = 'v2.2.8'
FRONTEND_VERSION = 'v2.2.8'
APP_VERSION = 'v2.2.9'
FRONTEND_VERSION = 'v2.2.9'