Compare commits

...

30 Commits

Author SHA1 Message Date
jxxghp
d9ed7b09c7 v2.3.0
- 站点资源浏览支持关键字和分类搜索,优化了界面,修改了站点卡片点击时的交互行为
- 优化了APP模式下更多菜单、滚动条等多处UI细节
2025-02-18 17:05:24 +08:00
jxxghp
4dcb18f00e fix: site browse api 2025-02-18 16:32:10 +08:00
jxxghp
0a52fe0a7a refactor: site browse api 2025-02-17 19:01:05 +08:00
jxxghp
e5a4d11cf9 fix workflow 2025-02-17 15:08:24 +08:00
jxxghp
6c233f13de fix workflow chain 2025-02-17 12:38:29 +08:00
jxxghp
00aee3496c add workflow oper 2025-02-17 11:54:11 +08:00
jxxghp
77ae40e3d6 fix workflow 2025-02-17 11:40:32 +08:00
jxxghp
68cba44476 fix modules load 2025-02-16 17:24:17 +08:00
jxxghp
b86d06f632 add workflow lifecycle 2025-02-16 16:53:38 +08:00
jxxghp
0b7cf305a0 add action templates 2025-02-16 13:45:15 +08:00
jxxghp
21ae36bc3a add action templates 2025-02-16 12:52:29 +08:00
jxxghp
4e2d9e9165 Merge pull request #3899 from Mister-album/v2-sync 2025-02-15 08:10:15 +08:00
Mister-album
6cee308894 添加为指定字幕添加.default后缀设置为默认字幕功能 2025-02-14 19:58:29 +08:00
jxxghp
b8f4cd5fea v2.2.9
- 资源包升级以提升安全性
- 优化了页面数据刷新机制

注意:本次升级后会默认清理一次种子识别缓存
2025-02-14 19:35:49 +08:00
jxxghp
aa1557ad9e fix setup 2025-02-14 17:37:10 +08:00
jxxghp
f03da6daca fix setup 2025-02-14 17:17:16 +08:00
jxxghp
30eb4385d4 fix sites 2025-02-14 13:44:18 +08:00
jxxghp
4c9afcc1a8 fix 2025-02-14 13:32:20 +08:00
jxxghp
dd47432a45 fix 2025-02-14 12:55:32 +08:00
jxxghp
0ba6974bd6 fix #3843
fix #3829
2025-02-13 08:08:13 +08:00
jxxghp
827d8f6d84 add workflow framework 2025-02-12 17:49:01 +08:00
jxxghp
943a462c69 Merge pull request #3885 from InfinityPacer/feature/security 2025-02-11 17:21:04 +08:00
InfinityPacer
a1bc773fb5 feat(security): add AVIF support 2025-02-11 17:10:50 +08:00
InfinityPacer
ac169b7d22 feat(security): add cache default extension for files without suffix 2025-02-11 17:09:43 +08:00
jxxghp
eecbbfea3a 更新 version.py 2025-02-10 22:28:06 +08:00
jxxghp
635ddb044e add depends for DiscoverMediaSource 2025-02-10 22:05:56 +08:00
jxxghp
1a6123489d 更新 config.py 2025-02-10 07:52:40 +08:00
jxxghp
4e69195a8d Merge pull request #3876 from InfinityPacer/feature/security 2025-02-10 07:11:28 +08:00
InfinityPacer
e48c8ee652 Revert "fix is_safe_url"
This reverts commit 5e2ad34864.
2025-02-10 02:22:53 +08:00
InfinityPacer
7df07b86b9 feat(security): add cmvideo image for http with port 2025-02-10 02:19:08 +08:00
49 changed files with 781 additions and 855 deletions

44
app/actions/__init__.py Normal file
View File

@@ -0,0 +1,44 @@
from abc import ABC, abstractmethod
from pydantic.main import BaseModel
from app.schemas import ActionContext, ActionParams
class BaseAction(BaseModel, ABC):
"""
工作流动作基类
"""
@property
@abstractmethod
def name(self) -> str:
pass
@property
@abstractmethod
def description(self) -> str:
pass
@abstractmethod
def execute(self, params: ActionParams, context: ActionContext) -> ActionContext:
"""
执行动作
"""
raise NotImplementedError
@property
@abstractmethod
def done(self) -> bool:
"""
判断动作是否完成
"""
pass
@property
@abstractmethod
def success(self) -> bool:
"""
判断动作是否成功
"""
pass

View File

View File

View File

View File

42
app/actions/fetch_rss.py Normal file
View File

@@ -0,0 +1,42 @@
from typing import Optional
from pydantic import Field
from app.actions import BaseAction
from app.schemas import ActionParams, ActionContext
class FetchRssParams(ActionParams):
"""
获取RSS资源列表参数
"""
url: str = Field(None, description="RSS地址")
proxy: Optional[bool] = Field(False, description="是否使用代理")
timeout: Optional[int] = Field(15, description="超时时间")
headers: Optional[dict] = Field(None, description="请求头")
recognize: Optional[bool] = Field(False, description="是否识别")
class FetchRssAction(BaseAction):
"""
获取RSS资源列表
"""
@property
def name(self) -> str:
return "获取RSS资源列表"
@property
def description(self) -> str:
return "请求RSS地址获取数据并解析为资源列表"
async def execute(self, params: FetchRssParams, context: ActionContext) -> ActionContext:
pass
@property
def done(self) -> bool:
return True
@property
def success(self) -> bool:
return True

View File

View File

View File

@@ -0,0 +1,42 @@
from typing import Optional
from pydantic import Field
from app.actions import BaseAction
from app.schemas import ActionParams, ActionContext
class SearchTorrentsParams(ActionParams):
"""
搜索站点资源参数
"""
name: str = Field(None, description="资源名称")
year: Optional[int] = Field(None, description="年份")
type: Optional[str] = Field(None, description="资源类型 (电影/电视剧)")
season: Optional[int] = Field(None, description="季度")
recognize: Optional[bool] = Field(False, description="是否识别")
class SearchTorrentsAction(BaseAction):
"""
搜索站点资源
"""
@property
def name(self) -> str:
return "搜索站点资源"
@property
def description(self) -> str:
return "根据关键字搜索站点种子资源"
@property
def done(self) -> bool:
return True
@property
def success(self) -> bool:
return True
async def execute(self, params: SearchTorrentsParams, context: ActionContext) -> ActionContext:
pass

View File

View File

View File

View File

@@ -2,7 +2,7 @@ from fastapi import APIRouter
from app.api.endpoints import login, user, site, message, webhook, subscribe, \
media, douban, search, plugin, tmdb, history, system, download, dashboard, \
transfer, mediaserver, bangumi, storage, discover, recommend
transfer, mediaserver, bangumi, storage, discover, recommend, workflow
api_router = APIRouter()
api_router.include_router(login.router, prefix="/login", tags=["login"])
@@ -26,3 +26,4 @@ api_router.include_router(mediaserver.router, prefix="/mediaserver", tags=["medi
api_router.include_router(bangumi.router, prefix="/bangumi", tags=["bangumi"])
api_router.include_router(discover.router, prefix="/discover", tags=["discover"])
api_router.include_router(recommend.router, prefix="/recommend", tags=["recommend"])
api_router.include_router(workflow.router, prefix="/workflow", tags=["workflow"])

View File

@@ -1,4 +1,4 @@
from typing import List, Any
from typing import List, Any, Dict
from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy.orm import Session
@@ -259,8 +259,41 @@ def site_icon(site_id: int,
})
@router.get("/category/{site_id}", summary="站点分类", response_model=List[schemas.SiteCategory])
def site_category(site_id: int,
db: Session = Depends(get_db),
_: schemas.TokenPayload = Depends(verify_token)) -> Any:
"""
获取站点分类
"""
site = Site.get(db, site_id)
if not site:
raise HTTPException(
status_code=404,
detail=f"站点 {site_id} 不存在",
)
indexer = SitesHelper().get_indexer(site.domain)
if not indexer:
raise HTTPException(
status_code=404,
detail=f"站点 {site.domain} 不支持",
)
category: Dict[str, List[dict]] = indexer.get('category') or []
if not category:
return []
result = []
for cats in category.values():
for cat in cats:
if cat not in result:
result.append(cat)
return result
@router.get("/resource/{site_id}", summary="站点资源", response_model=List[schemas.TorrentInfo])
def site_resource(site_id: int,
keyword: str = None,
cat: str = None,
page: int = 0,
db: Session = Depends(get_db),
_: schemas.TokenPayload = Depends(get_current_active_superuser)) -> Any:
"""
@@ -272,7 +305,7 @@ def site_resource(site_id: int,
status_code=404,
detail=f"站点 {site_id} 不存在",
)
torrents = TorrentsChain().browse(domain=site.domain)
torrents = TorrentsChain().browse(domain=site.domain, keyword=keyword, cat=cat, page=page)
if not torrents:
return []
return [torrent.to_dict() for torrent in torrents]

View File

@@ -8,6 +8,7 @@ from pathlib import Path
from typing import Optional, Union
import aiofiles
import pillow_avif # noqa 用于自动注册AVIF支持
from PIL import Image
from fastapi import APIRouter, Depends, HTTPException, Header, Request, Response
from fastapi.responses import StreamingResponse
@@ -50,7 +51,6 @@ def fetch_image(
"""
处理图片缓存逻辑支持HTTP缓存和磁盘缓存
"""
if not url:
raise HTTPException(status_code=404, detail="URL not provided")
@@ -68,6 +68,10 @@ def fetch_image(
sanitized_path = SecurityUtils.sanitize_url_path(url)
cache_path = settings.CACHE_PATH / "images" / sanitized_path
# 没有文件类型,则添加后缀,在恶意文件类型和实际需求下的折衷选择
if not cache_path.suffix:
cache_path = cache_path.with_suffix(".jpg")
# 确保缓存路径和文件类型合法
if not SecurityUtils.is_safe_path(settings.CACHE_PATH, cache_path, settings.SECURITY_IMAGE_SUFFIXES):
raise HTTPException(status_code=400, detail="Invalid cache path or file type")
@@ -88,7 +92,8 @@ def fetch_image(
# 请求远程图片
referer = "https://movie.douban.com/" if "doubanio.com" in url else None
proxies = settings.PROXY if proxy else None
response = RequestUtils(ua=settings.USER_AGENT, proxies=proxies, referer=referer).get_res(url=url)
response = RequestUtils(ua=settings.USER_AGENT, proxies=proxies, referer=referer,
accept_type="image/avif,image/webp,image/apng,*/*").get_res(url=url)
if not response:
raise HTTPException(status_code=502, detail="Failed to fetch the image from the remote server")

View File

@@ -0,0 +1,3 @@
from fastapi import APIRouter
router = APIRouter()

View File

@@ -7,7 +7,6 @@ from pathlib import Path
from typing import Optional, Any, Tuple, List, Set, Union, Dict
from qbittorrentapi import TorrentFilesList
from ruamel.yaml import CommentedMap
from transmission_rpc import File
from app.core.config import settings
@@ -77,7 +76,7 @@ class ChainBase(metaclass=ABCMeta):
"""
cache_path = settings.TEMP_PATH / filename
if cache_path.exists():
Path(cache_path).unlink()
cache_path.unlink()
def run_module(self, method: str, *args, **kwargs) -> Any:
"""
@@ -308,7 +307,7 @@ class ChainBase(metaclass=ABCMeta):
"""
return self.run_module("search_collections", name=name)
def search_torrents(self, site: CommentedMap,
def search_torrents(self, site: dict,
keywords: List[str],
mtype: MediaType = None,
page: int = 0) -> List[TorrentInfo]:
@@ -323,13 +322,16 @@ class ChainBase(metaclass=ABCMeta):
return self.run_module("search_torrents", site=site, keywords=keywords,
mtype=mtype, page=page)
def refresh_torrents(self, site: CommentedMap) -> List[TorrentInfo]:
def refresh_torrents(self, site: dict, keyword: str = None, cat: str = None, page: int = 0) -> List[TorrentInfo]:
"""
获取站点最新一页的种子,多个站点需要多线程处理
:param site: 站点
:param keyword: 标题
:param cat: 分类
:param page: 页码
:reutrn: 种子资源列表
"""
return self.run_module("refresh_torrents", site=site)
return self.run_module("refresh_torrents", site=site, keyword=keyword, cat=cat, page=page)
def filter_torrents(self, rule_groups: List[str],
torrent_list: List[TorrentInfo],

View File

@@ -3,6 +3,7 @@ import tempfile
from pathlib import Path
from typing import List
import pillow_avif # noqa 用于自动注册AVIF支持
from PIL import Image
from app.chain import ChainBase
@@ -116,6 +117,10 @@ class RecommendChain(ChainBase, metaclass=Singleton):
sanitized_path = SecurityUtils.sanitize_url_path(url)
cache_path = settings.CACHE_PATH / "images" / sanitized_path
# 没有文件类型,则添加后缀,在恶意文件类型和实际需求下的折衷选择
if not cache_path.suffix:
cache_path = cache_path.with_suffix(".jpg")
# 确保缓存路径和文件类型合法
if not SecurityUtils.is_safe_path(settings.CACHE_PATH, cache_path, settings.SECURITY_IMAGE_SUFFIXES):
logger.debug(f"Invalid cache path or file type for URL: {url}, sanitized path: {sanitized_path}")

View File

@@ -6,7 +6,6 @@ from typing import Optional, Tuple, Union, Dict
from urllib.parse import urljoin
from lxml import etree
from ruamel.yaml import CommentedMap
from app.chain import ChainBase
from app.core.config import global_vars, settings
@@ -55,7 +54,7 @@ class SiteChain(ChainBase):
"yemapt.org": self.__yema_test,
}
def refresh_userdata(self, site: CommentedMap = None) -> Optional[SiteUserData]:
def refresh_userdata(self, site: dict = None) -> Optional[SiteUserData]:
"""
刷新站点的用户数据
:param site: 站点

View File

@@ -73,17 +73,20 @@ class TorrentsChain(ChainBase, metaclass=Singleton):
logger.info(f'种子缓存数据清理完成')
@cached(cache=TTLCache(maxsize=128, ttl=595))
def browse(self, domain: str) -> List[TorrentInfo]:
def browse(self, domain: str, keyword: str = None, cat: str = None, page: int = 0) -> List[TorrentInfo]:
"""
浏览站点首页内容返回种子清单TTL缓存10分钟
:param domain: 站点域名
:param keyword: 搜索标题
:param cat: 搜索分类
:param page: 页码
"""
logger.info(f'开始获取站点 {domain} 最新种子 ...')
site = self.siteshelper.get_indexer(domain)
if not site:
logger.error(f'站点 {domain} 不存在!')
return []
return self.refresh_torrents(site=site)
return self.refresh_torrents(site=site, keyword=keyword, cat=cat, page=page)
@cached(cache=TTLCache(maxsize=128, ttl=295))
def rss(self, domain: str) -> List[TorrentInfo]:

View File

@@ -670,13 +670,10 @@ class TransferChain(ChainBase, metaclass=Singleton):
self.jobview.add_task(task, state=curr_task.state if curr_task else "waiting")
# 获取集数据
if not task.episodes_info and task.mediainfo.type == MediaType.TV:
if task.meta.begin_season is None:
task.meta.begin_season = 1
task.mediainfo.season = task.mediainfo.season or task.meta.begin_season
if task.mediainfo.type == MediaType.TV and not task.episodes_info:
task.episodes_info = self.tmdbchain.tmdb_episodes(
tmdbid=task.mediainfo.tmdb_id,
season=task.mediainfo.season
season=task.mediainfo.season or task.meta.begin_season or 1
)
# 查询整理目标目录

51
app/chain/workflow.py Normal file
View File

@@ -0,0 +1,51 @@
from typing import List
from app.chain import ChainBase
from app.core.workflow import WorkFlowManager
from app.db.workflow_oper import WorkflowOper
from app.log import logger
from app.schemas import Workflow, ActionContext, Action
class WorkflowChain(ChainBase):
"""
工作流链
"""
def __init__(self):
super().__init__()
self.workflowoper = WorkflowOper()
self.workflowmanager = WorkFlowManager()
def process(self, workflow_id: int) -> bool:
"""
处理工作流
"""
workflow = self.workflowoper.get(workflow_id)
if not workflow:
logger.warn(f"工作流 {workflow_id} 不存在")
return False
if not workflow.actions:
logger.warn(f"工作流 {workflow.name} 无动作")
return False
logger.info(f"开始处理 {workflow.name},共 {len(workflow.actions)} 个动作 ...")
# 启用上下文
context = ActionContext()
self.workflowoper.start(workflow_id)
for act in workflow.actions:
action = Action(**act)
state, context = self.workflowmanager.excute(action, context)
self.workflowoper.step(workflow_id, action=action.name, context=context.dict())
if not state:
logger.error(f"动作 {action.name} 执行失败,工作流失败")
self.workflowoper.fail(workflow_id, result=f"动作 {action.name} 执行失败")
return False
logger.info(f"工作流 {workflow.name} 执行完成")
self.workflowoper.success(workflow_id)
return True
def get_workflows(self) -> List[Workflow]:
"""
获取工作流列表
"""
return self.workflowoper.list_enabled()

View File

@@ -247,7 +247,7 @@ class ConfigModel(BaseModel):
)
# 允许的图片文件后缀格式
SECURITY_IMAGE_SUFFIXES: List[str] = Field(
default_factory=lambda: [".jpg", ".jpeg", ".png", ".webp", ".gif", ".svg"]
default_factory=lambda: [".jpg", ".jpeg", ".png", ".webp", ".gif", ".svg", ".avif"]
)
# 重命名时支持的S0别名
RENAME_FORMAT_S0_NAMES: List[str] = Field(
@@ -255,6 +255,8 @@ class ConfigModel(BaseModel):
)
# 启用分词搜索
TOKENIZED_SEARCH: bool = False
# 为指定默认字幕添加.default后缀
DEFAULT_SUB: Optional[str] = "zh-cn"
class Settings(BaseSettings, ConfigModel, LogConfigModel):

77
app/core/workflow.py Normal file
View File

@@ -0,0 +1,77 @@
from time import sleep
from typing import Dict, Any, Tuple
from app.actions import BaseAction
from app.helper.module import ModuleHelper
from app.log import logger
from app.schemas import Action, ActionContext
from app.utils.singleton import Singleton
class WorkFlowManager(metaclass=Singleton):
"""
工作流管理器
"""
# 所有动作定义
_actions: Dict[str, BaseAction] = {}
def __init__(self):
self.init()
def init(self):
"""
初始化
"""
def filter_func(obj: Any):
"""
过滤函数,确保只加载新定义的类
"""
if not isinstance(obj, type):
return False
if not hasattr(obj, 'execute') or not hasattr(obj, "name"):
return False
if obj.__name__ == "BaseAction":
return False
return obj.__module__.startswith("app.actions")
# 加载所有动作
self._actions = {}
actions = ModuleHelper.load(
"app.actions",
filter_func=lambda _, obj: filter_func(obj)
)
for action in actions:
logger.debug(f"加载动作: {action.__name__}")
self._actions[action.__name__] = action
def stop(self):
"""
停止
"""
pass
def excute(self, action: Action, context: ActionContext = None) -> Tuple[bool, ActionContext]:
"""
执行工作流动作
"""
if not context:
context = ActionContext()
if action.id in self._actions:
action_obj = self._actions[action.id]
logger.info(f"执行动作: {action.id} - {action.name}")
result_context = action_obj.execute(action.params, context)
logger.info(f"{action.name} 执行结果: {action_obj.success}")
if action.loop and action.loop_interval:
while not action_obj.done:
logger.info(f"{action.name} 等待 {action.loop_interval} 秒后继续执行")
sleep(action.loop_interval)
logger.info(f"继续执行动作: {action.id} - {action.name}")
result_context = action_obj.execute(action.params, result_context)
logger.info(f"{action.name} 执行结果: {action_obj.success}")
logger.info(f"{action.name} 执行完成")
return action_obj.success, result_context
else:
logger.error(f"未找到动作: {action.id} - {action.name}")
return False, context

87
app/db/models/workflow.py Normal file
View File

@@ -0,0 +1,87 @@
from datetime import datetime
from sqlalchemy import Column, Integer, JSON, Sequence, String
from app.db import Base, db_query, db_update
class Workflow(Base):
"""
工作流表
"""
# ID
id = Column(Integer, Sequence('id'), primary_key=True, index=True)
# 名称
name = Column(String, index=True, nullable=False)
# 描述
description = Column(String)
# 定时器
timer = Column(String)
# 状态W-等待 R-运行中 P-暂停 S-成功 F-失败
state = Column(String, nullable=False, index=True, default='W')
# 当前执行动作
current_action = Column(String)
# 任务执行结果
result = Column(String)
# 已执行次数
run_count = Column(Integer, default=0)
# 任务列表
actions = Column(JSON, default=list)
# 执行上下文
context = Column(JSON, default=dict)
# 创建时间
add_time = Column(String, default=datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
# 最后执行时间
last_time = Column(String)
@staticmethod
@db_query
def get_enabled_workflows(db):
return db.query(Workflow).filter(Workflow.state != 'P').all()
@staticmethod
@db_query
def get_by_name(db, name: str):
return db.query(Workflow).filter(Workflow.name == name).first()
@staticmethod
@db_update
def update_state(db, wid: int, state: str):
db.query(Workflow).filter(Workflow.id == wid).update({"state": state})
return True
@staticmethod
@db_update
def start(db, wid: int):
db.query(Workflow).filter(Workflow.id == wid).update({
"state": 'R'
})
return True
@staticmethod
@db_update
def fail(db, wid: int, result: str):
db.query(Workflow).filter(Workflow.id == wid).update({
"state": 'F',
"result": result,
"run_count": Workflow.run_count + 1,
"last_time": datetime.now().strftime('%Y-%m-%d %H:%M:%S')
})
return True
@staticmethod
@db_update
def success(db, wid: int, result: str = None):
db.query(Workflow).filter(Workflow.id == wid).update({
"state": 'S',
"result": result,
"run_count": Workflow.run_count + 1,
"last_time": datetime.now().strftime('%Y-%m-%d %H:%M:%S')
})
return True
@staticmethod
@db_update
def update_current_action(db, wid: int, action: str, context: dict):
db.query(Workflow).filter(Workflow.id == wid).update({"current_action": action, "context": context})
return True

62
app/db/workflow_oper.py Normal file
View File

@@ -0,0 +1,62 @@
from typing import List, Tuple
from app.db import DbOper
from app.db.models.workflow import Workflow
class WorkflowOper(DbOper):
"""
工作流管理
"""
def add(self, **kwargs) -> Tuple[bool, str]:
"""
新增工作流
"""
wf = Workflow(**kwargs)
if not wf.get_by_name(self._db, kwargs.get("name")):
wf.create(self._db)
return True, "新增工作流成功"
return False, "工作流已存在"
def get(self, wid: int) -> Workflow:
"""
查询单个工作流
"""
return Workflow.get(self._db, wid)
def list_enabled(self) -> List[Workflow]:
"""
获取启用的工作流列表
"""
return Workflow.get_enabled_workflows(self._db)
def get_by_name(self, name: str) -> Workflow:
"""
按名称获取工作流
"""
return Workflow.get_by_name(self._db, name)
def start(self, wid: int) -> bool:
"""
启动
"""
return Workflow.start(self._db, wid)
def success(self, wid: int, result: str = None) -> bool:
"""
成功
"""
return Workflow.success(self._db, wid, result)
def fail(self, wid: int, result: str) -> bool:
"""
失败
"""
return Workflow.fail(self._db, wid, result)
def step(self, wid: int, action: str, context: dict) -> bool:
"""
步进
"""
return Workflow.update_current_action(self._db, wid, action, context)

View File

@@ -23,6 +23,7 @@ class ModuleHelper:
"""
submodules: list = []
loaded_modules = set()
packages = importlib.import_module(package_path)
for importer, package_name, _ in pkgutil.iter_modules(packages.__path__):
try:
@@ -35,6 +36,9 @@ class ModuleHelper:
if name.startswith('_'):
continue
if isinstance(obj, type) and filter_func(name, obj):
if name in loaded_modules:
continue
loaded_modules.add(name)
submodules.append(obj)
except Exception as err:
logger.debug(f'加载模块 {package_name} 失败:{str(err)} - {traceback.format_exc()}')

View File

@@ -676,11 +676,15 @@ class FileManagerModule(_ModuleBase):
".zh-tw": ".繁体中文"
}
new_sub_tag_list = [
new_file_type if t == 0 else "%s%s(%s)" % (new_file_type,
new_sub_tag_dict.get(
new_file_type, ""
),
t) for t in range(6)
(".default" + new_file_type if (
(settings.DEFAULT_SUB == "zh-cn" and new_file_type == ".chi.zh-cn") or
(settings.DEFAULT_SUB == "zh-tw" and new_file_type == ".zh-tw") or
(settings.DEFAULT_SUB == "eng" and new_file_type == ".eng")
) else new_file_type) if t == 0 else "%s%s(%s)" % (new_file_type,
new_sub_tag_dict.get(
new_file_type, ""
),
t) for t in range(6)
]
for new_sub_tag in new_sub_tag_list:
new_file: Path = target_file.with_name(target_file.stem + new_sub_tag + file_ext)

View File

@@ -1,17 +1,14 @@
from datetime import datetime
from typing import List, Optional, Tuple, Union
from ruamel.yaml import CommentedMap
from app.core.config import settings
from app.core.context import TorrentInfo
from app.db.site_oper import SiteOper
from app.helper.module import ModuleHelper
from app.helper.sites import SitesHelper
from app.helper.sites import SitesHelper, SiteSpider
from app.log import logger
from app.modules import _ModuleBase
from app.modules.indexer.parser import SiteParserBase
from app.modules.indexer.spider import TorrentSpider
from app.modules.indexer.spider.haidan import HaiDanSpider
from app.modules.indexer.spider.mtorrent import MTorrentSpider
from app.modules.indexer.spider.tnode import TNodeSpider
@@ -76,15 +73,17 @@ class IndexerModule(_ModuleBase):
def init_setting(self) -> Tuple[str, Union[str, bool]]:
pass
def search_torrents(self, site: CommentedMap,
def search_torrents(self, site: dict,
keywords: List[str] = None,
mtype: MediaType = None,
cat: str = None,
page: int = 0) -> List[TorrentInfo]:
"""
搜索一个站点
:param site: 站点
:param keywords: 搜索关键词列表
:param mtype: 媒体类型
:param cat: 分类
:param page: 页码
:return: 资源列表
"""
@@ -159,6 +158,7 @@ class IndexerModule(_ModuleBase):
search_word=search_word,
indexer=site,
mtype=mtype,
cat=cat,
page=page
)
if error_flag:
@@ -204,35 +204,42 @@ class IndexerModule(_ModuleBase):
return __remove_duplicate(torrents)
@staticmethod
def __spider_search(indexer: CommentedMap,
def __spider_search(indexer: dict,
search_word: str = None,
mtype: MediaType = None,
cat: str = None,
page: int = 0) -> Tuple[bool, List[dict]]:
"""
根据关键字搜索单个站点
:param: indexer: 站点配置
:param: search_word: 关键字
:param: cat: 分类
:param: page: 页码
:param: mtype: 媒体类型
:param: timeout: 超时时间
:return: 是否发生错误, 种子列表
"""
_spider = TorrentSpider(indexer=indexer,
mtype=mtype,
keyword=search_word,
page=page)
_spider = SiteSpider(indexer=indexer,
keyword=search_word,
mtype=mtype,
cat=cat,
page=page)
return _spider.is_error, _spider.get_torrents()
def refresh_torrents(self, site: CommentedMap) -> Optional[List[TorrentInfo]]:
def refresh_torrents(self, site: dict,
keyword: str = None, cat: str = None, page: int = 0) -> Optional[List[TorrentInfo]]:
"""
获取站点最新一页的种子,多个站点需要多线程处理
:param site: 站点
:param keyword: 关键字
:param cat: 分类
:param page: 页码
:reutrn: 种子资源列表
"""
return self.search_torrents(site=site)
return self.search_torrents(site=site, keywords=[keyword], cat=cat, page=page)
def refresh_userdata(self, site: CommentedMap) -> Optional[SiteUserData]:
def refresh_userdata(self, site: dict) -> Optional[SiteUserData]:
"""
刷新站点的用户数据
:param site: 站点

View File

@@ -1,742 +0,0 @@
import copy
import datetime
import re
import traceback
from typing import List
from urllib.parse import quote, urlencode, urlparse, parse_qs
from jinja2 import Template
from pyquery import PyQuery
from ruamel.yaml import CommentedMap
from app.core.config import settings
from app.helper.browser import PlaywrightHelper
from app.log import logger
from app.schemas.types import MediaType
from app.utils.http import RequestUtils
from app.utils.string import StringUtils
class TorrentSpider:
# 是否出现错误
is_error: bool = False
# 索引器ID
indexerid: int = None
# 索引器名称
indexername: str = None
# 站点域名
domain: str = None
# 站点Cookie
cookie: str = None
# 站点UA
ua: str = None
# Requests 代理
proxies: dict = None
# playwright 代理
proxy_server: dict = None
# 是否渲染
render: bool = False
# Referer
referer: str = None
# 搜索关键字
keyword: str = None
# 媒体类型
mtype: MediaType = None
# 搜索路径、方式配置
search: dict = {}
# 批量搜索配置
batch: dict = {}
# 浏览配置
browse: dict = {}
# 站点分类配置
category: dict = {}
# 站点种子列表配置
list: dict = {}
# 站点种子字段配置
fields: dict = {}
# 页码
page: int = 0
# 搜索条数, 默认: 100条
result_num: int = 100
# 单个种子信息
torrents_info: dict = {}
# 种子列表
torrents_info_array: list = []
# 搜索超时, 默认: 15秒
_timeout = 15
def __init__(self,
indexer: CommentedMap,
keyword: [str, list] = None,
page: int = 0,
referer: str = None,
mtype: MediaType = None):
"""
设置查询参数
:param indexer: 索引器
:param keyword: 搜索关键字,如果数组则为批量搜索
:param page: 页码
:param referer: Referer
:param mtype: 媒体类型
"""
if not indexer:
return
self.keyword = keyword
self.mtype = mtype
self.indexerid = indexer.get('id')
self.indexername = indexer.get('name')
self.search = indexer.get('search')
self.batch = indexer.get('batch')
self.browse = indexer.get('browse')
self.category = indexer.get('category')
self.list = indexer.get('torrents').get('list', {})
self.fields = indexer.get('torrents').get('fields')
self.render = indexer.get('render')
self.domain = indexer.get('domain')
self.result_num = int(indexer.get('result_num') or 100)
self._timeout = int(indexer.get('timeout') or 15)
self.page = page
if self.domain and not str(self.domain).endswith("/"):
self.domain = self.domain + "/"
if indexer.get('ua'):
self.ua = indexer.get('ua') or settings.USER_AGENT
else:
self.ua = settings.USER_AGENT
if indexer.get('proxy'):
self.proxies = settings.PROXY
self.proxy_server = settings.PROXY_SERVER
if indexer.get('cookie'):
self.cookie = indexer.get('cookie')
if referer:
self.referer = referer
self.torrents_info_array = []
def get_torrents(self) -> List[dict]:
"""
开始请求
"""
if not self.search or not self.domain:
return []
# 种子搜索相对路径
paths = self.search.get('paths', [])
torrentspath = ""
if len(paths) == 1:
torrentspath = paths[0].get('path', '')
else:
for path in paths:
if path.get("type") == "all" and not self.mtype:
torrentspath = path.get('path')
break
elif path.get("type") == "movie" and self.mtype == MediaType.MOVIE:
torrentspath = path.get('path')
break
elif path.get("type") == "tv" and self.mtype == MediaType.TV:
torrentspath = path.get('path')
break
# 精确搜索
if self.keyword:
if isinstance(self.keyword, list):
# 批量查询
if self.batch:
delimiter = self.batch.get('delimiter') or ' '
space_replace = self.batch.get('space_replace') or ' '
search_word = delimiter.join([str(k).replace(' ',
space_replace) for k in self.keyword])
else:
search_word = " ".join(self.keyword)
# 查询模式:或
search_mode = "1"
else:
# 单个查询
search_word = self.keyword
# 查询模式与
search_mode = "0"
# 搜索URL
indexer_params = self.search.get("params", {}).copy()
if indexer_params:
search_area = indexer_params.get('search_area')
# search_area非0表示支持imdbid搜索
if (search_area and
(not self.keyword or not self.keyword.startswith('tt'))):
# 支持imdbid搜索但关键字不是imdbid时不启用imdbid搜索
indexer_params.pop('search_area')
# 变量字典
inputs_dict = {
"keyword": search_word
}
# 查询参数,默认查询标题
params = {
"search_mode": search_mode,
"search_area": 0,
"page": self.page or 0,
"notnewword": 1
}
# 额外参数
for key, value in indexer_params.items():
params.update({
"%s" % key: str(value).format(**inputs_dict)
})
# 分类条件
if self.category:
if self.mtype == MediaType.TV:
cats = self.category.get("tv") or []
elif self.mtype == MediaType.MOVIE:
cats = self.category.get("movie") or []
else:
cats = (self.category.get("movie") or []) + (self.category.get("tv") or [])
for cat in cats:
if self.category.get("field"):
value = params.get(self.category.get("field"), "")
params.update({
"%s" % self.category.get("field"): value + self.category.get("delimiter",
' ') + cat.get("id")
})
else:
params.update({
"cat%s" % cat.get("id"): 1
})
searchurl = self.domain + torrentspath + "?" + urlencode(params)
else:
# 变量字典
inputs_dict = {
"keyword": quote(search_word),
"page": self.page or 0
}
# 无额外参数
searchurl = self.domain + str(torrentspath).format(**inputs_dict)
# 列表浏览
else:
# 变量字典
inputs_dict = {
"page": self.page or 0,
"keyword": ""
}
# 有单独浏览路径
if self.browse:
torrentspath = self.browse.get("path")
if self.browse.get("start"):
start_page = int(self.browse.get("start")) + int(self.page or 0)
inputs_dict.update({
"page": start_page
})
elif self.page:
torrentspath = torrentspath + f"?page={self.page}"
# 搜索Url
searchurl = self.domain + str(torrentspath).format(**inputs_dict)
logger.info(f"开始请求:{searchurl}")
if self.render:
# 浏览器仿真
page_source = PlaywrightHelper().get_page_source(
url=searchurl,
cookies=self.cookie,
ua=self.ua,
proxies=self.proxy_server,
timeout=self._timeout
)
else:
# requests请求
ret = RequestUtils(
ua=self.ua,
cookies=self.cookie,
timeout=self._timeout,
referer=self.referer,
proxies=self.proxies
).get_res(searchurl, allow_redirects=True)
page_source = RequestUtils.get_decoded_html_content(ret,
settings.ENCODING_DETECTION_PERFORMANCE_MODE,
settings.ENCODING_DETECTION_MIN_CONFIDENCE)
# 解析
return self.parse(page_source)
def __get_title(self, torrent):
# title default text
if 'title' not in self.fields:
return
selector = self.fields.get('title', {})
if 'selector' in selector:
title = torrent(selector.get('selector', '')).clone()
self.__remove(title, selector)
items = self.__attribute_or_text(title, selector)
self.torrents_info['title'] = self.__index(items, selector)
elif 'text' in selector:
render_dict = {}
if "title_default" in self.fields:
title_default_selector = self.fields.get('title_default', {})
title_default_item = torrent(title_default_selector.get('selector', '')).clone()
self.__remove(title_default_item, title_default_selector)
items = self.__attribute_or_text(title_default_item, selector)
title_default = self.__index(items, title_default_selector)
render_dict.update({'title_default': title_default})
if "title_optional" in self.fields:
title_optional_selector = self.fields.get('title_optional', {})
title_optional_item = torrent(title_optional_selector.get('selector', '')).clone()
self.__remove(title_optional_item, title_optional_selector)
items = self.__attribute_or_text(title_optional_item, title_optional_selector)
title_optional = self.__index(items, title_optional_selector)
render_dict.update({'title_optional': title_optional})
self.torrents_info['title'] = Template(selector.get('text')).render(fields=render_dict)
self.torrents_info['title'] = self.__filter_text(self.torrents_info.get('title'),
selector.get('filters'))
def __get_description(self, torrent):
# title optional text
if 'description' not in self.fields:
return
selector = self.fields.get('description', {})
if "selector" in selector \
or "selectors" in selector:
description = torrent(selector.get('selector', selector.get('selectors', ''))).clone()
if description:
self.__remove(description, selector)
items = self.__attribute_or_text(description, selector)
self.torrents_info['description'] = self.__index(items, selector)
elif "text" in selector:
render_dict = {}
if "tags" in self.fields:
tags_selector = self.fields.get('tags', {})
tags_item = torrent(tags_selector.get('selector', '')).clone()
self.__remove(tags_item, tags_selector)
items = self.__attribute_or_text(tags_item, tags_selector)
tag = self.__index(items, tags_selector)
render_dict.update({'tags': tag})
if "subject" in self.fields:
subject_selector = self.fields.get('subject', {})
subject_item = torrent(subject_selector.get('selector', '')).clone()
self.__remove(subject_item, subject_selector)
items = self.__attribute_or_text(subject_item, subject_selector)
subject = self.__index(items, subject_selector)
render_dict.update({'subject': subject})
if "description_free_forever" in self.fields:
description_free_forever_selector = self.fields.get("description_free_forever", {})
description_free_forever_item = torrent(description_free_forever_selector.get("selector", '')).clone()
self.__remove(description_free_forever_item, description_free_forever_selector)
items = self.__attribute_or_text(description_free_forever_item, description_free_forever_selector)
description_free_forever = self.__index(items, description_free_forever_selector)
render_dict.update({"description_free_forever": description_free_forever})
if "description_normal" in self.fields:
description_normal_selector = self.fields.get("description_normal", {})
description_normal_item = torrent(description_normal_selector.get("selector", '')).clone()
self.__remove(description_normal_item, description_normal_selector)
items = self.__attribute_or_text(description_normal_item, description_normal_selector)
description_normal = self.__index(items, description_normal_selector)
render_dict.update({"description_normal": description_normal})
self.torrents_info['description'] = Template(selector.get('text')).render(fields=render_dict)
self.torrents_info['description'] = self.__filter_text(self.torrents_info.get('description'),
selector.get('filters'))
def __get_detail(self, torrent):
# details page text
if 'details' not in self.fields:
return
selector = self.fields.get('details', {})
details = torrent(selector.get('selector', '')).clone()
self.__remove(details, selector)
items = self.__attribute_or_text(details, selector)
item = self.__index(items, selector)
detail_link = self.__filter_text(item, selector.get('filters'))
if detail_link:
if not detail_link.startswith("http"):
if detail_link.startswith("//"):
self.torrents_info['page_url'] = self.domain.split(":")[0] + ":" + detail_link
elif detail_link.startswith("/"):
self.torrents_info['page_url'] = self.domain + detail_link[1:]
else:
self.torrents_info['page_url'] = self.domain + detail_link
else:
self.torrents_info['page_url'] = detail_link
def __get_download(self, torrent):
# download link text
if 'download' not in self.fields:
return
selector = self.fields.get('download', {})
download = torrent(selector.get('selector', '')).clone()
self.__remove(download, selector)
items = self.__attribute_or_text(download, selector)
item = self.__index(items, selector)
download_link = self.__filter_text(item, selector.get('filters'))
if download_link:
if not download_link.startswith("http") \
and not download_link.startswith("magnet"):
_scheme, _domain = StringUtils.get_url_netloc(self.domain)
if _domain in download_link:
if download_link.startswith("/"):
self.torrents_info['enclosure'] = f"{_scheme}:{download_link}"
else:
self.torrents_info['enclosure'] = f"{_scheme}://{download_link}"
else:
if download_link.startswith("/"):
self.torrents_info['enclosure'] = f"{self.domain}{download_link[1:]}"
else:
self.torrents_info['enclosure'] = f"{self.domain}{download_link}"
else:
self.torrents_info['enclosure'] = download_link
def __get_imdbid(self, torrent):
# imdbid
if "imdbid" not in self.fields:
return
selector = self.fields.get('imdbid', {})
imdbid = torrent(selector.get('selector', '')).clone()
self.__remove(imdbid, selector)
items = self.__attribute_or_text(imdbid, selector)
item = self.__index(items, selector)
self.torrents_info['imdbid'] = item
self.torrents_info['imdbid'] = self.__filter_text(self.torrents_info.get('imdbid'),
selector.get('filters'))
def __get_size(self, torrent):
# torrent size int
if 'size' not in self.fields:
return
selector = self.fields.get('size', {})
size = torrent(selector.get('selector', selector.get("selectors", ''))).clone()
self.__remove(size, selector)
items = self.__attribute_or_text(size, selector)
item = self.__index(items, selector)
if item:
size_val = item.replace("\n", "").strip()
size_val = self.__filter_text(size_val,
selector.get('filters'))
self.torrents_info['size'] = StringUtils.num_filesize(size_val)
else:
self.torrents_info['size'] = 0
def __get_leechers(self, torrent):
# torrent leechers int
if 'leechers' not in self.fields:
return
selector = self.fields.get('leechers', {})
leechers = torrent(selector.get('selector', '')).clone()
self.__remove(leechers, selector)
items = self.__attribute_or_text(leechers, selector)
item = self.__index(items, selector)
if item:
peers_val = item.split("/")[0]
peers_val = peers_val.replace(",", "")
peers_val = self.__filter_text(peers_val,
selector.get('filters'))
self.torrents_info['peers'] = int(peers_val) if peers_val and peers_val.isdigit() else 0
else:
self.torrents_info['peers'] = 0
def __get_seeders(self, torrent):
# torrent leechers int
if 'seeders' not in self.fields:
return
selector = self.fields.get('seeders', {})
seeders = torrent(selector.get('selector', '')).clone()
self.__remove(seeders, selector)
items = self.__attribute_or_text(seeders, selector)
item = self.__index(items, selector)
if item:
seeders_val = item.split("/")[0]
seeders_val = seeders_val.replace(",", "")
seeders_val = self.__filter_text(seeders_val,
selector.get('filters'))
self.torrents_info['seeders'] = int(seeders_val) if seeders_val and seeders_val.isdigit() else 0
else:
self.torrents_info['seeders'] = 0
def __get_grabs(self, torrent):
# torrent grabs int
if 'grabs' not in self.fields:
return
selector = self.fields.get('grabs', {})
grabs = torrent(selector.get('selector', '')).clone()
self.__remove(grabs, selector)
items = self.__attribute_or_text(grabs, selector)
item = self.__index(items, selector)
if item:
grabs_val = item.split("/")[0]
grabs_val = grabs_val.replace(",", "")
grabs_val = self.__filter_text(grabs_val,
selector.get('filters'))
self.torrents_info['grabs'] = int(grabs_val) if grabs_val and grabs_val.isdigit() else 0
else:
self.torrents_info['grabs'] = 0
def __get_pubdate(self, torrent):
# torrent pubdate yyyy-mm-dd hh:mm:ss
if 'date_added' not in self.fields:
return
selector = self.fields.get('date_added', {})
pubdate = torrent(selector.get('selector', '')).clone()
self.__remove(pubdate, selector)
items = self.__attribute_or_text(pubdate, selector)
pubdate_str = self.__index(items, selector)
if pubdate_str:
pubdate_str = pubdate_str.replace('\n', ' ').strip()
self.torrents_info['pubdate'] = self.__filter_text(pubdate_str,
selector.get('filters'))
def __get_date_elapsed(self, torrent):
# torrent data elaspsed text
if 'date_elapsed' not in self.fields:
return
selector = self.fields.get('date_elapsed', {})
date_elapsed = torrent(selector.get('selector', '')).clone()
self.__remove(date_elapsed, selector)
items = self.__attribute_or_text(date_elapsed, selector)
self.torrents_info['date_elapsed'] = self.__index(items, selector)
self.torrents_info['date_elapsed'] = self.__filter_text(self.torrents_info.get('date_elapsed'),
selector.get('filters'))
def __get_downloadvolumefactor(self, torrent):
# downloadvolumefactor int
selector = self.fields.get('downloadvolumefactor', {})
if not selector:
return
self.torrents_info['downloadvolumefactor'] = 1
if 'case' in selector:
for downloadvolumefactorselector in list(selector.get('case', {}).keys()):
downloadvolumefactor = torrent(downloadvolumefactorselector)
if len(downloadvolumefactor) > 0:
self.torrents_info['downloadvolumefactor'] = selector.get('case', {}).get(
downloadvolumefactorselector)
break
elif "selector" in selector:
downloadvolume = torrent(selector.get('selector', '')).clone()
self.__remove(downloadvolume, selector)
items = self.__attribute_or_text(downloadvolume, selector)
item = self.__index(items, selector)
if item:
downloadvolumefactor = re.search(r'(\d+\.?\d*)', item)
if downloadvolumefactor:
self.torrents_info['downloadvolumefactor'] = int(downloadvolumefactor.group(1))
def __get_uploadvolumefactor(self, torrent):
# uploadvolumefactor int
selector = self.fields.get('uploadvolumefactor', {})
if not selector:
return
self.torrents_info['uploadvolumefactor'] = 1
if 'case' in selector:
for uploadvolumefactorselector in list(selector.get('case', {}).keys()):
uploadvolumefactor = torrent(uploadvolumefactorselector)
if len(uploadvolumefactor) > 0:
self.torrents_info['uploadvolumefactor'] = selector.get('case', {}).get(
uploadvolumefactorselector)
break
elif "selector" in selector:
uploadvolume = torrent(selector.get('selector', '')).clone()
self.__remove(uploadvolume, selector)
items = self.__attribute_or_text(uploadvolume, selector)
item = self.__index(items, selector)
if item:
uploadvolumefactor = re.search(r'(\d+\.?\d*)', item)
if uploadvolumefactor:
self.torrents_info['uploadvolumefactor'] = int(uploadvolumefactor.group(1))
def __get_labels(self, torrent):
# labels ['label1', 'label2']
if 'labels' not in self.fields:
return
selector = self.fields.get('labels', {})
labels = torrent(selector.get("selector", "")).clone()
self.__remove(labels, selector)
items = self.__attribute_or_text(labels, selector)
if items:
self.torrents_info['labels'] = [item for item in items if item]
else:
self.torrents_info['labels'] = []
def __get_free_date(self, torrent):
# free date yyyy-mm-dd hh:mm:ss
if 'freedate' not in self.fields:
return
selector = self.fields.get('freedate', {})
freedate = torrent(selector.get('selector', '')).clone()
self.__remove(freedate, selector)
items = self.__attribute_or_text(freedate, selector)
self.torrents_info['freedate'] = self.__index(items, selector)
self.torrents_info['freedate'] = self.__filter_text(self.torrents_info.get('freedate'),
selector.get('filters'))
def __get_hit_and_run(self, torrent):
# hitandrun True/False
if 'hr' not in self.fields:
return
selector = self.fields.get('hr', {})
hit_and_run = torrent(selector.get('selector', ''))
if hit_and_run:
self.torrents_info['hit_and_run'] = True
else:
self.torrents_info['hit_and_run'] = False
def __get_category(self, torrent):
# category 电影/电视剧
if 'category' not in self.fields:
return
selector = self.fields.get('category', {})
category = torrent(selector.get('selector', '')).clone()
self.__remove(category, selector)
items = self.__attribute_or_text(category, selector)
category_value = self.__index(items, selector)
category_value = self.__filter_text(category_value,
selector.get('filters'))
if category_value and self.category:
tv_cats = [str(cat.get("id")) for cat in self.category.get("tv") or []]
movie_cats = [str(cat.get("id")) for cat in self.category.get("movie") or []]
if category_value in tv_cats \
and category_value not in movie_cats:
self.torrents_info['category'] = MediaType.TV.value
elif category_value in movie_cats:
self.torrents_info['category'] = MediaType.MOVIE.value
else:
self.torrents_info['category'] = MediaType.UNKNOWN.value
else:
self.torrents_info['category'] = MediaType.UNKNOWN.value
def get_info(self, torrent) -> dict:
"""
解析单条种子数据
"""
self.torrents_info = {}
try:
# 标题
self.__get_title(torrent)
# 描述
self.__get_description(torrent)
# 详情页面
self.__get_detail(torrent)
# 下载链接
self.__get_download(torrent)
# 完成数
self.__get_grabs(torrent)
# 下载数
self.__get_leechers(torrent)
# 做种数
self.__get_seeders(torrent)
# 大小
self.__get_size(torrent)
# IMDBID
self.__get_imdbid(torrent)
# 下载系数
self.__get_downloadvolumefactor(torrent)
# 上传系数
self.__get_uploadvolumefactor(torrent)
# 发布时间
self.__get_pubdate(torrent)
# 已发布时间
self.__get_date_elapsed(torrent)
# 免费载止时间
self.__get_free_date(torrent)
# 标签
self.__get_labels(torrent)
# HR
self.__get_hit_and_run(torrent)
# 分类
self.__get_category(torrent)
except Exception as err:
logger.error("%s 搜索出现错误:%s" % (self.indexername, str(err)))
return self.torrents_info
@staticmethod
def __filter_text(text: str, filters: list):
"""
对文件进行处理
"""
if not text or not filters or not isinstance(filters, list):
return text
if not isinstance(text, str):
text = str(text)
for filter_item in filters:
if not text:
break
method_name = filter_item.get("name")
try:
args = filter_item.get("args")
if method_name == "re_search" and isinstance(args, list):
rematch = re.search(r"%s" % args[0], text)
if rematch:
text = rematch.group(args[-1])
elif method_name == "split" and isinstance(args, list):
text = text.split(r"%s" % args[0])[args[-1]]
elif method_name == "replace" and isinstance(args, list):
text = text.replace(r"%s" % args[0], r"%s" % args[-1])
elif method_name == "dateparse" and isinstance(args, str):
text = text.replace("\n", " ").strip()
text = datetime.datetime.strptime(text, r"%s" % args)
elif method_name == "strip":
text = text.strip()
elif method_name == "appendleft":
text = f"{args}{text}"
elif method_name == "querystring":
parsed_url = urlparse(str(text))
query_params = parse_qs(parsed_url.query)
param_value = query_params.get(args)
text = param_value[0] if param_value else ''
except Exception as err:
logger.debug(f'过滤器 {method_name} 处理失败:{str(err)} - {traceback.format_exc()}')
return text.strip()
@staticmethod
def __remove(item, selector):
"""
移除元素
"""
if selector and "remove" in selector:
removelist = selector.get('remove', '').split(', ')
for v in removelist:
item.remove(v)
@staticmethod
def __attribute_or_text(item, selector: dict):
if not selector:
return item
if not item:
return []
if 'attribute' in selector:
items = [i.attr(selector.get('attribute')) for i in item.items() if i]
else:
items = [i.text() for i in item.items() if i]
return items
@staticmethod
def __index(items: list, selector: dict):
if not items:
return None
if selector:
if "contents" in selector \
and len(items) > int(selector.get("contents")):
items = items[0].split("\n")[selector.get("contents")]
elif "index" in selector \
and len(items) > int(selector.get("index")):
items = items[int(selector.get("index"))]
if isinstance(items, list):
items = items[0]
return items
def parse(self, html_text: str) -> List[dict]:
"""
解析整个页面
"""
if not html_text:
self.is_error = True
return []
# 清空旧结果
self.torrents_info_array = []
try:
# 解析站点文本对象
html_doc = PyQuery(html_text)
# 种子筛选器
torrents_selector = self.list.get('selector', '')
# 遍历种子html列表
for torn in html_doc(torrents_selector):
self.torrents_info_array.append(copy.deepcopy(self.get_info(PyQuery(torn))))
if len(self.torrents_info_array) >= int(self.result_num):
break
return self.torrents_info_array
except Exception as err:
self.is_error = True
logger.warn(f"错误:{self.indexername} {str(err)}")

View File

@@ -1,8 +1,6 @@
import urllib.parse
from typing import Tuple, List
from ruamel.yaml import CommentedMap
from app.core.config import settings
from app.db.systemconfig_oper import SystemConfigOper
from app.log import logger
@@ -51,7 +49,7 @@ class HaiDanSpider:
"7": 1
}
def __init__(self, indexer: CommentedMap):
def __init__(self, indexer: dict):
self.systemconfig = SystemConfigOper()
if indexer:
self._indexerid = indexer.get('id')

View File

@@ -3,8 +3,6 @@ import json
import re
from typing import Tuple, List
from ruamel.yaml import CommentedMap
from app.core.config import settings
from app.db.systemconfig_oper import SystemConfigOper
from app.log import logger
@@ -51,7 +49,7 @@ class MTorrentSpider:
"7": "DIY 国配 中字"
}
def __init__(self, indexer: CommentedMap):
def __init__(self, indexer: dict):
self.systemconfig = SystemConfigOper()
if indexer:
self._indexerid = indexer.get('id')

View File

@@ -1,8 +1,6 @@
import re
from typing import Tuple, List
from ruamel.yaml import CommentedMap
from app.core.config import settings
from app.log import logger
from app.utils.http import RequestUtils
@@ -23,7 +21,7 @@ class TNodeSpider:
_downloadurl = "%sapi/torrent/download/%s"
_pageurl = "%storrent/info/%s"
def __init__(self, indexer: CommentedMap):
def __init__(self, indexer: dict):
if indexer:
self._indexerid = indexer.get('id')
self._domain = indexer.get('domain')

View File

@@ -1,8 +1,6 @@
from typing import List, Tuple
from urllib.parse import quote
from ruamel.yaml import CommentedMap
from app.core.config import settings
from app.log import logger
from app.utils.http import RequestUtils
@@ -19,7 +17,7 @@ class TorrentLeech:
_pageurl = "%storrent/%s"
_timeout = 15
def __init__(self, indexer: CommentedMap):
def __init__(self, indexer: dict):
self._indexer = indexer
if indexer.get('proxy'):
self._proxy = settings.PROXY

View File

@@ -1,7 +1,5 @@
from typing import Tuple, List
from ruamel.yaml import CommentedMap
from app.core.config import settings
from app.db.systemconfig_oper import SystemConfigOper
from app.log import logger
@@ -46,7 +44,7 @@ class YemaSpider:
"12": "完结",
}
def __init__(self, indexer: CommentedMap):
def __init__(self, indexer: dict):
self.systemconfig = SystemConfigOper()
if indexer:
self._indexerid = indexer.get('id')

View File

@@ -7,6 +7,7 @@ import pytz
from apscheduler.executors.pool import ThreadPoolExecutor
from apscheduler.jobstores.base import JobLookupError
from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.triggers.cron import CronTrigger
from app import schemas
from app.chain import ChainBase
@@ -16,13 +17,14 @@ from app.chain.site import SiteChain
from app.chain.subscribe import SubscribeChain
from app.chain.tmdb import TmdbChain
from app.chain.transfer import TransferChain
from app.chain.workflow import WorkflowChain
from app.core.config import settings
from app.core.event import EventManager
from app.core.plugin import PluginManager
from app.db.systemconfig_oper import SystemConfigOper
from app.helper.sites import SitesHelper
from app.log import logger
from app.schemas import Notification, NotificationType
from app.schemas import Notification, NotificationType, Workflow
from app.schemas.types import EventType, SystemConfigKey
from app.utils.singleton import Singleton
from app.utils.timer import TimerUtils
@@ -345,6 +347,10 @@ class Scheduler(metaclass=Singleton):
}
)
# 初始化工作流服务
self.init_workflow_jobs()
# 初始化插件服务
self.init_plugin_jobs()
# 打印服务
@@ -401,52 +407,42 @@ class Scheduler(metaclass=Singleton):
for pid in PluginManager().get_running_plugin_ids():
self.update_plugin_job(pid)
def update_plugin_job(self, pid: str):
def init_workflow_jobs(self):
"""
更新插件定时服务
初始化工作流定时服务
"""
if not self._scheduler or not pid:
for workflow in WorkflowChain().get_workflows() or []:
self.update_workflow_job(workflow)
def remove_workflow_job(self, workflow: Workflow):
"""
移除工作流服务
"""
if not self._scheduler:
return
# 移除该插件的全部服务
self.remove_plugin_job(pid)
# 获取插件服务列表
with self._lock:
try:
plugin_services = PluginManager().get_plugin_services(pid=pid)
except Exception as e:
logger.error(f"运行插件 {pid} 服务失败:{str(e)} - {traceback.format_exc()}")
job_id = f"workflow-{workflow.id}"
service = self._jobs.pop(job_id, None)
if not service:
return
# 获取插件名称
plugin_name = PluginManager().get_plugin_attr(pid, "plugin_name")
# 开始注册插件服务
for service in plugin_services:
try:
sid = f"{service['id']}"
job_id = sid.split("|")[0]
self.remove_plugin_job(pid, job_id)
self._jobs[job_id] = {
"func": service["func"],
"name": service["name"],
"pid": pid,
"plugin_name": plugin_name,
"kwargs": service.get("func_kwargs") or {},
"running": False,
}
self._scheduler.add_job(
self.start,
service["trigger"],
id=sid,
name=service["name"],
**(service.get("kwargs") or {}),
kwargs={"job_id": job_id},
replace_existing=True
)
logger.info(f"注册插件{plugin_name}服务:{service['name']} - {service['trigger']}")
except Exception as e:
logger.error(f"注册插件{plugin_name}服务失败:{str(e)} - {service}")
SchedulerChain().messagehelper.put(title=f"插件 {plugin_name} 服务注册失败",
message=str(e),
role="system")
try:
# 在调度器中查找并移除对应的 job
job_removed = False
for job in list(self._scheduler.get_jobs()):
if job_id == job.id:
try:
self._scheduler.remove_job(job.id)
job_removed = True
except JobLookupError:
pass
break
if job_removed:
logger.info(f"移除工作流服务:{service.get('name')}")
except Exception as e:
logger.error(f"移除工作流服务失败:{str(e)} - {job_id}: {service}")
SchedulerChain().messagehelper.put(title=f"工作流 {workflow.name} 服务移除失败",
message=str(e),
role="system")
def remove_plugin_job(self, pid: str, job_id: str = None):
"""
@@ -494,6 +490,83 @@ class Scheduler(metaclass=Singleton):
message=str(e),
role="system")
def update_workflow_job(self, workflow: Workflow):
"""
更新工作流定时服务
"""
# 移除该工作流的全部服务
self.remove_workflow_job(workflow)
# 添加工作流服务
with self._lock:
try:
job_id = f"workflow-{workflow.id}"
self._jobs[job_id] = {
"func": WorkflowChain().process,
"name": workflow.name,
"running": False,
}
self._scheduler.add_job(
self.start,
trigger=CronTrigger.from_crontab(workflow.timer),
id=job_id,
name=workflow.name,
kwargs={"job_id": job_id, "workflow_id": job_id},
replace_existing=True
)
logger.info(f"注册工作流服务:{workflow.name} - {workflow.timer}")
except Exception as e:
logger.error(f"注册工作流服务失败:{workflow.name} - {str(e)}")
SchedulerChain().messagehelper.put(title=f"工作流 {workflow.name} 服务注册失败",
message=str(e),
role="system")
def update_plugin_job(self, pid: str):
"""
更新插件定时服务
"""
if not self._scheduler or not pid:
return
# 移除该插件的全部服务
self.remove_plugin_job(pid)
# 获取插件服务列表
with self._lock:
try:
plugin_services = PluginManager().get_plugin_services(pid=pid)
except Exception as e:
logger.error(f"运行插件 {pid} 服务失败:{str(e)} - {traceback.format_exc()}")
return
# 获取插件名称
plugin_name = PluginManager().get_plugin_attr(pid, "plugin_name")
# 开始注册插件服务
for service in plugin_services:
try:
sid = f"{service['id']}"
job_id = sid.split("|")[0]
self.remove_plugin_job(pid, job_id)
self._jobs[job_id] = {
"func": service["func"],
"name": service["name"],
"pid": pid,
"plugin_name": plugin_name,
"kwargs": service.get("func_kwargs") or {},
"running": False,
}
self._scheduler.add_job(
self.start,
service["trigger"],
id=sid,
name=service["name"],
**(service.get("kwargs") or {}),
kwargs={"job_id": job_id},
replace_existing=True
)
logger.info(f"注册插件{plugin_name}服务:{service['name']} - {service['trigger']}")
except Exception as e:
logger.error(f"注册插件{plugin_name}服务失败:{str(e)} - {service}")
SchedulerChain().messagehelper.put(title=f"插件 {plugin_name} 服务注册失败",
message=str(e),
role="system")
def list(self) -> List[schemas.ScheduleInfo]:
"""
当前所有任务

View File

@@ -19,3 +19,5 @@ from .file import *
from .exception import *
from .system import *
from .event import *
from .workflow import *
from .download import *

12
app/schemas/download.py Normal file
View File

@@ -0,0 +1,12 @@
from typing import Optional
from pydantic import BaseModel, Field
class DownloadTask(BaseModel):
"""
下载任务
"""
download_id: Optional[str] = Field(None, description="任务ID")
downloader: Optional[str] = Field(None, description="下载器")
completed: Optional[bool] = Field(False, description="是否完成")

View File

@@ -244,6 +244,7 @@ class DiscoverMediaSource(BaseModel):
api_path: str = Field(..., description="媒体数据源API地址")
filter_params: Optional[Dict[str, Any]] = Field(default=None, description="过滤参数")
filter_ui: Optional[List[dict]] = Field(default=[], description="过滤参数UI配置")
depends: Optional[Dict[str, list]] = Field(default=None, description="UI依赖关系字典")
class DiscoverSourceEventData(ChainEventData):

View File

@@ -115,3 +115,9 @@ class SiteUserData(BaseModel):
class SiteAuth(BaseModel):
site: Optional[str] = None
params: Optional[Dict[str, Union[int, str]]] = Field(default_factory=dict)
class SiteCategory(BaseModel):
id: Optional[int] = None
cat: Optional[str] = None
desc: Optional[str] = None

63
app/schemas/workflow.py Normal file
View File

@@ -0,0 +1,63 @@
from typing import Optional, List
from pydantic import BaseModel, Field
from app.schemas.context import Context, MediaInfo
from app.schemas.file import FileItem
from app.schemas.download import DownloadTask
from app.schemas.site import Site
from app.schemas.subscribe import Subscribe
from app.schemas.message import Notification
class Workflow(BaseModel):
"""
工作流信息
"""
id: Optional[str] = Field(None, description="工作流ID")
name: Optional[str] = Field(None, description="工作流名称")
description: Optional[str] = Field(None, description="工作流描述")
timer: Optional[str] = Field(None, description="定时器")
state: Optional[str] = Field(None, description="状态")
current_action: Optional[str] = Field(None, description="当前执行动作")
result: Optional[str] = Field(None, description="任务执行结果")
run_count: Optional[int] = Field(0, description="已执行次数")
actions: Optional[list] = Field([], description="任务列表")
add_time: Optional[str] = Field(None, description="创建时间")
last_time: Optional[str] = Field(None, description="最后执行时间")
class Config:
orm_mode = True
class ActionParams(BaseModel):
"""
动作基础参数
"""
pass
class Action(BaseModel):
"""
动作信息
"""
id: Optional[str] = Field(None, description="动作ID (类名)")
name: Optional[str] = Field(None, description="动作名称")
description: Optional[str] = Field(None, description="动作描述")
loop: Optional[bool] = Field(False, description="是否需要循环")
loop_interval: Optional[int] = Field(0, description="循环间隔 (秒)")
params: Optional[ActionParams] = Field({}, description="参数")
class ActionContext(BaseModel):
"""
动作基础上下文,各动作通用数据
"""
content: Optional[str] = Field(None, description="文本类内容")
torrents: Optional[List[Context]] = Field([], description="资源列表")
medias: Optional[List[MediaInfo]] = Field([], description="媒体列表")
fileitems: Optional[List[FileItem]] = Field([], description="文件列表")
downloads: Optional[List[DownloadTask]] = Field([], description="下载任务列表")
sites: Optional[List[Site]] = Field([], description="站点列表")
subscribes: Optional[List[Subscribe]] = Field([], description="订阅列表")
messages: Optional[List[Notification]] = Field([], description="消息列表")

View File

@@ -3,6 +3,7 @@ from contextlib import asynccontextmanager
from fastapi import FastAPI
from app.startup.workflow_initializer import init_workflow, stop_workflow
from app.startup.modules_initializer import shutdown_modules, start_modules
from app.startup.plugins_initializer import init_plugins_async
from app.startup.routers_initializer import init_routers
@@ -16,6 +17,8 @@ async def lifespan(app: FastAPI):
print("Starting up...")
# 启动模块
start_modules(app)
# 初始化工作流动作
init_workflow(app)
# 初始化路由
init_routers(app)
# 初始化插件
@@ -35,3 +38,6 @@ async def lifespan(app: FastAPI):
print(f"Error during plugin installation shutdown: {e}")
# 清理模块
shutdown_modules(app)
# 关闭工作流
stop_workflow(app)

View File

@@ -0,0 +1,17 @@
from fastapi import FastAPI
from app.core.workflow import WorkFlowManager
def init_workflow(_: FastAPI):
"""
初始化动作
"""
WorkFlowManager()
def stop_workflow(_: FastAPI):
"""
停止动作
"""
WorkFlowManager().stop()

View File

@@ -42,7 +42,7 @@ class SecurityUtils:
@staticmethod
def is_safe_url(url: str, allowed_domains: Union[Set[str], List[str]], strict: bool = False) -> bool:
"""
验证URL是否在允许的域名列表中包括带有端口的域名
验证URL是否在允许的域名列表中包括带有端口的域名
:param url: 需要验证的 URL
:param allowed_domains: 允许的域名集合,域名可以包含端口
@@ -65,7 +65,6 @@ class SecurityUtils:
netloc = parsed_url.netloc.lower()
if not netloc:
return False
netloc_no_port = netloc.split(":")[0]
# 检查每个允许的域名
allowed_domains = {d.lower() for d in allowed_domains}
@@ -79,7 +78,7 @@ class SecurityUtils:
return True
else:
# 非严格模式下,允许子域名匹配
if netloc_no_port == allowed_netloc or netloc_no_port.endswith('.' + allowed_netloc):
if netloc == allowed_netloc or netloc.endswith('.' + allowed_netloc):
return True
return False

View File

@@ -63,3 +63,5 @@ OCR_HOST=https://movie-pilot.org
PLUGIN_MARKET=https://github.com/jxxghp/MoviePilot-Plugins,https://github.com/thsrite/MoviePilot-Plugins,https://github.com/InfinityPacer/MoviePilot-Plugins,https://github.com/honue/MoviePilot-Plugins
# 搜索多个名称true/false为true时搜索时会同时搜索中英文及原始名称搜索结果会更全面但会增加搜索时间为false时其中一个名称搜索到结果或全部名称搜索完毕即停止
SEARCH_MULTIPLE_NAME=true
# 为指定字幕添加.default后缀设置为默认字幕支持为'zh-cn''zh-tw''eng'添加默认字幕未定义或设置为None则不添加
DEFAULT_SUB=None

View File

@@ -0,0 +1,24 @@
"""2.1.1
Revision ID: 279a949d81b6
Revises: ca5461f314f2
Create Date: 2025-02-14 19:02:24.989349
"""
from app.chain.torrents import TorrentsChain
# revision identifiers, used by Alembic.
revision = '279a949d81b6'
down_revision = 'ca5461f314f2'
branch_labels = None
depends_on = None
def upgrade() -> None:
# 清理一次缓存
TorrentsChain().clear_torrents()
def downgrade() -> None:
pass

View File

@@ -32,6 +32,7 @@ func_timeout==4.3.5
bs4~=0.0.1
beautifulsoup4~=4.12.2
pillow~=10.4.0
pillow-avif-plugin~=1.4.6
pyTelegramBotAPI~=4.12.0
playwright~=1.37.0
cf-clearance~=0.31.0

View File

@@ -1,20 +1,22 @@
from distutils.core import setup
from Cython.Build import cythonize
module_list = ['app/helper/sites.py']
setup(
name="",
author="",
zip_safe=False,
include_package_data=True,
ext_modules=cythonize(
module_list=module_list,
nthreads=0,
compiler_directives={"language_level": "3"},
),
script_args=["build_ext", "-j", '2', "--inplace"],
)
name="MoviePilot",
author="jxxghp",
zip_safe=False,
include_package_data=True,
ext_modules=cythonize(
module_list=module_list,
nthreads=0,
compiler_directives={
"language_level": "3",
"binding": False,
"nonecheck": False
},
),
script_args=["build_ext", "-j", '2', "--inplace"],
)

View File

@@ -1,2 +1,2 @@
APP_VERSION = 'v2.2.8'
FRONTEND_VERSION = 'v2.2.8'
APP_VERSION = 'v2.3.0'
FRONTEND_VERSION = 'v2.3.0'