Compare commits

...

48 Commits

Author SHA1 Message Date
jxxghp
6156b9a481 Merge pull request #4561 from jxxghp/cursor/move-media-files-to-season-directory-6ee0 2025-07-08 18:00:50 +08:00
Cursor Agent
8c516c5691 Fix: Ensure parent item exists before saving NFO file
Co-authored-by: jxxghp <jxxghp@163.com>
2025-07-08 09:51:43 +00:00
Cursor Agent
bf9a149898 Fix TV show metadata scraping to use correct parent directory
Co-authored-by: jxxghp <jxxghp@163.com>
2025-07-08 09:31:35 +00:00
jxxghp
277cde8db2 更新 version.py 2025-07-08 12:17:57 +08:00
jxxghp
e06bdaf53e fix:资源包升级失败时一直重启的问题 2025-07-08 12:06:30 +08:00
jxxghp
da367bd138 fix spider 2025-07-08 11:25:36 +08:00
jxxghp
d336bcbf1f fix etree 2025-07-08 11:00:38 +08:00
jxxghp
a8aedba6ff fix https://github.com/jxxghp/MoviePilot/issues/4552 2025-07-08 09:34:24 +08:00
jxxghp
9ede86c6a3 Merge pull request #4555 from cddjr/fix_local_exists 2025-07-07 23:30:51 +08:00
景大侠
1468f2b082 fix 本地媒体文件检查时首选含影视标题的目录
避免了以年份、分辨率等作为重命名第一层目录时的误判问题
2025-07-07 23:24:04 +08:00
jxxghp
e04ae70f89 Merge pull request #4553 from cddjr/fix_trim_task 2025-07-07 22:15:12 +08:00
景大侠
7f7d2c9ba8 fix 飞牛刷新媒体库报错Task duplicate 2025-07-07 21:46:17 +08:00
jxxghp
d73deef8dc Merge pull request #4549 from cddjr/fix_tr 2025-07-07 17:28:28 +08:00
景大侠
f93a1540af fix TR模块报错找不到_protocol属性
v2.5.9引入的bug
2025-07-07 17:05:28 +08:00
jxxghp
c8bd9cb716 Merge pull request #4548 from cddjr/set_lock_timeout 2025-07-07 12:04:46 +08:00
景大侠
2ed13c7e5b fix 订阅匹配锁增加超时,避免罕见的长时间卡任务问题 2025-07-07 11:51:58 +08:00
jxxghp
647c0929c5 v2.6.2 2025-07-06 08:28:33 +08:00
jxxghp
a61533a131 Merge pull request #4536 from cddjr/fix_local_exists 2025-07-05 22:02:16 +08:00
景大侠
bc5e682308 fix 本地媒体检查潜在的额外扫盘问题 2025-07-05 21:46:21 +08:00
jxxghp
25a481df12 Merge pull request #4534 from jxxghp/cursor/bc-55af1137-dea1-4191-9033-64ea5fcaa43a-d338
修复文件整理快照处理问题
2025-07-05 15:44:51 +08:00
Cursor Agent
764c10fae4 Fix snapshot handling logic to correctly process files during monitoring
Co-authored-by: jxxghp <jxxghp@163.com>
2025-07-05 07:22:44 +00:00
Cursor Agent
d8249d4e38 Fix snapshot handling logic to correctly process files during monitoring
Co-authored-by: jxxghp <jxxghp@163.com>
2025-07-05 07:19:53 +00:00
jxxghp
0e3e42b398 Merge pull request #4531 from Aqr-K/feat-process 2025-07-05 06:33:57 +08:00
Aqr-K
7d3b64dcf9 Update requirements.in 2025-07-05 03:16:49 +08:00
Aqr-K
2c8d525796 feat: 增加进程名设置 2025-07-05 03:14:54 +08:00
jxxghp
4869f071ab fix error message 2025-07-04 21:34:31 +08:00
jxxghp
3029eeaf6f fix error message 2025-07-04 21:33:32 +08:00
jxxghp
33fb692aee 更新 plugin.py 2025-07-03 22:20:04 +08:00
jxxghp
6a075d144f 更新 version.py 2025-07-03 20:19:36 +08:00
jxxghp
aa23315599 rollback transmission-rpc 2025-07-03 19:16:36 +08:00
jxxghp
8d0bb35505 add 网络流量API 2025-07-03 19:05:43 +08:00
jxxghp
32e76bc6ce Merge pull request #4529 from cddjr/add_ctx_mgr_proto 2025-07-03 18:47:08 +08:00
景大侠
6c02766000 AutoCloseResponse支持上下文管理协议,避免部分插件报错 2025-07-03 18:38:48 +08:00
jxxghp
52ef390464 图片代理Api增加cache参数 2025-07-03 17:07:54 +08:00
jxxghp
43a557601e fix local usage 2025-07-03 16:48:35 +08:00
jxxghp
82ff7fc090 fix SMB Usage 2025-07-03 15:21:41 +08:00
jxxghp
db40b5105b 修正目录监控模式匹配 2025-07-03 13:55:54 +08:00
jxxghp
b2a379b84b fix SMB Storage 2025-07-03 12:41:44 +08:00
jxxghp
97cbd816fe add SMB Storage 2025-07-03 12:31:59 +08:00
jxxghp
7de3bb2a91 v2.6.0 2025-07-02 21:36:02 +08:00
jxxghp
3a8a2bcab4 Merge pull request #4519 from Aqr-K/patch-2 2025-07-01 19:46:12 +08:00
Aqr-K
eb1adbe992 fix: 错误文案修复,统一文案格式 2025-07-01 19:26:11 +08:00
jxxghp
b55966d42b Merge pull request #4516 from Aqr-K/feat-command
feat(command): 增加 `show` ,用来判断是否注册进菜单里显示
2025-07-01 17:20:59 +08:00
Aqr-K
451ca9cb5a feat(command): 增加 show ,用来判断是否注册进菜单里显示 2025-07-01 17:19:01 +08:00
jxxghp
1e2c607ced fix #4515 流平台不合并到现有标签中,如有需要通过命名模块配置 2025-07-01 17:02:29 +08:00
jxxghp
5ff7da0d19 fix #4515 流平台不合并到现有标签中,如有需要通过命名模块配置 2025-07-01 16:57:45 +08:00
jxxghp
8e06c6f8e6 remove openai 2025-07-01 14:48:16 +08:00
jxxghp
4497cd3904 add site stat api 2025-07-01 11:23:20 +08:00
46 changed files with 2481 additions and 833 deletions

View File

@@ -166,3 +166,19 @@ def memory2(_: Annotated[str, Depends(verify_apitoken)]) -> Any:
获取当前内存使用率 API_TOKEN认证?token=xxx
"""
return memory()
@router.get("/network", summary="获取当前网络流量", response_model=List[int])
def network(_: schemas.TokenPayload = Depends(verify_token)) -> Any:
"""
获取当前网络流量上行和下行流量单位bytes/s
"""
return SystemUtils.network_usage()
@router.get("/network2", summary="获取当前网络流量API_TOKEN", response_model=List[int])
def network2(_: Annotated[str, Depends(verify_apitoken)]) -> Any:
"""
获取当前网络流量 API_TOKEN认证?token=xxx
"""
return network()

View File

@@ -1,5 +1,6 @@
from typing import List, Any, Dict, Optional
from app.helper.sites import SitesHelper
from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy.orm import Session
from starlette.background import BackgroundTasks
@@ -21,7 +22,6 @@ from app.db.models.siteuserdata import SiteUserData
from app.db.site_oper import SiteOper
from app.db.systemconfig_oper import SystemConfigOper
from app.db.user_oper import get_current_active_superuser
from app.helper.sites import SitesHelper
from app.scheduler import Scheduler
from app.schemas.types import SystemConfigKey, EventType
from app.utils.string import StringUtils
@@ -333,8 +333,8 @@ def read_site_by_domain(
return site
@router.get("/statistic/{site_url}", summary="站点统计信息", response_model=schemas.SiteStatistic)
def read_site_by_domain(
@router.get("/statistic/{site_url}", summary="特定站点统计信息", response_model=schemas.SiteStatistic)
def read_statistic_by_domain(
site_url: str,
db: Session = Depends(get_db),
_: schemas.TokenPayload = Depends(verify_token)
@@ -349,6 +349,17 @@ def read_site_by_domain(
return schemas.SiteStatistic(domain=domain)
@router.get("/statistic", summary="所有站点统计信息", response_model=List[schemas.SiteStatistic])
def read_statistics(
db: Session = Depends(get_db),
_: schemas.TokenPayload = Depends(verify_token)
) -> Any:
"""
获取所有站点统计信息
"""
return SiteStatistic.list(db)
@router.get("/rss", summary="所有订阅站点", response_model=List[schemas.Site])
def read_rss_sites(db: Session = Depends(get_db),
_: schemas.TokenPayload = Depends(verify_token)) -> List[dict]:

View File

@@ -144,6 +144,7 @@ def fetch_image(
def proxy_img(
imgurl: str,
proxy: bool = False,
cache: bool = False,
if_none_match: Annotated[str | None, Header()] = None,
_: schemas.TokenPayload = Depends(verify_resource_token)
) -> Response:
@@ -154,7 +155,7 @@ def proxy_img(
hosts = [config.config.get("host") for config in MediaServerHelper().get_configs().values() if
config and config.config and config.config.get("host")]
allowed_domains = set(settings.SECURITY_IMAGE_DOMAINS) | set(hosts)
return fetch_image(url=imgurl, proxy=proxy, use_disk_cache=False,
return fetch_image(url=imgurl, proxy=proxy, use_disk_cache=cache,
if_none_match=if_none_match, allowed_domains=allowed_domains)

View File

@@ -19,7 +19,6 @@ from app.utils.string import StringUtils
recognize_lock = Lock()
scraping_lock = Lock()
scraping_files = []
class MediaChain(ChainBase):
@@ -35,25 +34,25 @@ class MediaChain(ChainBase):
switchs = SystemConfigOper().get(SystemConfigKey.ScrapingSwitchs) or {}
# 默认配置
default_switchs = {
'movie_nfo': True, # 电影NFO
'movie_poster': True, # 电影海报
'movie_backdrop': True, # 电影背景图
'movie_logo': True, # 电影Logo
'movie_disc': True, # 电影光盘图
'movie_banner': True, # 电影横幅图
'movie_thumb': True, # 电影缩略图
'tv_nfo': True, # 电视剧NFO
'tv_poster': True, # 电视剧海报
'tv_backdrop': True, # 电视剧背景图
'tv_banner': True, # 电视剧横幅图
'tv_logo': True, # 电视剧Logo
'tv_thumb': True, # 电视剧缩略图
'season_nfo': True, # 季NFO
'season_poster': True, # 季海报
'season_banner': True, # 季横幅图
'season_thumb': True, # 季缩略图
'episode_nfo': True, # 集NFO
'episode_thumb': True # 集缩略图
'movie_nfo': True, # 电影NFO
'movie_poster': True, # 电影海报
'movie_backdrop': True, # 电影背景图
'movie_logo': True, # 电影Logo
'movie_disc': True, # 电影光盘图
'movie_banner': True, # 电影横幅图
'movie_thumb': True, # 电影缩略图
'tv_nfo': True, # 电视剧NFO
'tv_poster': True, # 电视剧海报
'tv_backdrop': True, # 电视剧背景图
'tv_banner': True, # 电视剧横幅图
'tv_logo': True, # 电视剧Logo
'tv_thumb': True, # 电视剧缩略图
'season_nfo': True, # 季NFO
'season_poster': True, # 季海报
'season_banner': True, # 季横幅图
'season_thumb': True, # 季缩略图
'episode_nfo': True, # 集NFO
'episode_thumb': True # 集缩略图
}
# 合并用户配置和默认配置
for key, default_value in default_switchs.items():
@@ -344,23 +343,49 @@ class MediaChain(ChainBase):
return
event_data = event.event_data or {}
fileitem: FileItem = event_data.get("fileitem")
file_list: List[str] = event_data.get("file_list", [])
meta: MetaBase = event_data.get("meta")
mediainfo: MediaInfo = event_data.get("mediainfo")
overwrite = event_data.get("overwrite", False)
if not fileitem:
return
# 刮削锁
with scraping_lock:
if fileitem.path in scraping_files:
# 检查文件项是否存在
storagechain = StorageChain()
if not storagechain.get_item(fileitem):
logger.warn(f"文件项不存在:{fileitem.path}")
return
scraping_files.append(fileitem.path)
try:
# 执行刮削
self.scrape_metadata(fileitem=fileitem, meta=meta, mediainfo=mediainfo, overwrite=overwrite)
finally:
# 释放锁
with scraping_lock:
scraping_files.remove(fileitem.path)
# 检查是否为目录
if fileitem.type == "file":
# 单个文件刮削
self.scrape_metadata(fileitem=fileitem,
mediainfo=mediainfo,
init_folder=False,
parent=storagechain.get_parent_item(fileitem),
overwrite=overwrite)
else:
# 检查目的目录下是否已经有nfo刮削文件
sub_files = storagechain.list_files(fileitem)
if any(f.name.endswith('.nfo') for f in sub_files):
logger.info(f"目录 {fileitem.path} 已有NFO文件开始增量刮削...")
for file_path in file_list:
file_item = storagechain.get_file_item(storage=fileitem.storage,
path=Path(file_path))
if file_item:
# 对于电视剧文件,应该保存到与视频文件相同的目录
# 而不是电视剧根目录
self.scrape_metadata(fileitem=file_item,
mediainfo=mediainfo,
init_folder=False,
parent=None, # 让函数内部自动获取正确的父目录
overwrite=overwrite)
else:
# 执行全量刮削
logger.info(f"开始全量刮削目录 {fileitem.path} ...")
self.scrape_metadata(fileitem=fileitem, meta=meta, init_folder=True,
mediainfo=mediainfo, overwrite=overwrite)
def scrape_metadata(self, fileitem: schemas.FileItem,
meta: MetaBase = None, mediainfo: MediaInfo = None,
@@ -436,6 +461,9 @@ class MediaChain(ChainBase):
logger.error(f"{_url} 图片下载失败:{str(err)}")
return None
if not fileitem:
return
# 当前文件路径
filepath = Path(fileitem.path)
if fileitem.type == "file" \
@@ -464,6 +492,8 @@ class MediaChain(ChainBase):
movie_nfo = self.metadata_nfo(meta=meta, mediainfo=mediainfo)
if movie_nfo:
# 保存或上传nfo文件到上级目录
if not parent:
parent = storagechain.get_parent_item(fileitem)
__save_file(_fileitem=parent, _path=nfo_path, _content=movie_nfo)
else:
logger.warn(f"{filepath.name} nfo文件生成失败")
@@ -494,8 +524,9 @@ class MediaChain(ChainBase):
files = __list_files(_fileitem=fileitem)
for file in files:
self.scrape_metadata(fileitem=file,
meta=meta, mediainfo=mediainfo,
init_folder=False, parent=fileitem,
mediainfo=mediainfo,
init_folder=False,
parent=fileitem,
overwrite=overwrite)
# 生成目录内图片文件
if init_folder:
@@ -587,11 +618,11 @@ class MediaChain(ChainBase):
else:
logger.info("集缩略图刮削已关闭,跳过")
else:
# 当前为目录,处理目录内的文件
# 当前为电视剧目录,处理目录内的文件
files = __list_files(_fileitem=fileitem)
for file in files:
self.scrape_metadata(fileitem=file,
meta=meta, mediainfo=mediainfo,
mediainfo=mediainfo,
parent=fileitem if file.type == "file" else None,
init_folder=True if file.type == "dir" else False,
overwrite=overwrite)
@@ -659,7 +690,8 @@ class MediaChain(ChainBase):
# 只下载当前刮削季的图片
image_season = "00" if "specials" in image_name else image_name[6:8]
if image_season != str(season_meta.begin_season).rjust(2, '0'):
logger.info(f"当前刮削季为:{season_meta.begin_season},跳过文件:{image_path}")
logger.info(
f"当前刮削季为:{season_meta.begin_season},跳过文件:{image_path}")
continue
if overwrite or not storagechain.get_file_item(storage=fileitem.storage,
path=image_path):

View File

@@ -271,16 +271,20 @@ class SiteChain(ChainBase):
logger.error(f"获取站点页面失败:{url}")
return favicon_url, None
html = etree.HTML(html_text)
if StringUtils.is_valid_html_element(html):
fav_link = html.xpath('//head/link[contains(@rel, "icon")]/@href')
if fav_link:
favicon_url = urljoin(url, fav_link[0])
try:
if StringUtils.is_valid_html_element(html):
fav_link = html.xpath('//head/link[contains(@rel, "icon")]/@href')
if fav_link:
favicon_url = urljoin(url, fav_link[0])
res = RequestUtils(cookies=cookie, timeout=15, ua=ua).get_res(url=favicon_url)
if res:
return favicon_url, base64.b64encode(res.content).decode()
else:
logger.error(f"获取站点图标失败:{favicon_url}")
res = RequestUtils(cookies=cookie, timeout=15, ua=ua).get_res(url=favicon_url)
if res:
return favicon_url, base64.b64encode(res.content).decode()
else:
logger.error(f"获取站点图标失败:{favicon_url}")
finally:
if html is not None:
del html
return favicon_url, None
def sync_cookies(self, manual=False) -> Tuple[bool, str]:

View File

@@ -39,6 +39,8 @@ class SubscribeChain(ChainBase):
"""
_rlock = threading.RLock()
# 避免莫名原因导致长时间持有锁
_LOCK_TIMOUT = 3600 * 2
def add(self, title: str, year: str,
mtype: MediaType = None,
@@ -279,8 +281,15 @@ class SubscribeChain(ChainBase):
:param manual: 是否手动搜索
:return: 更新订阅状态为R或删除订阅
"""
with self._rlock:
logger.debug(f"search lock acquired at {datetime.now()}")
lock_acquired = False
try:
if lock_acquired := self._rlock.acquire(
blocking=True, timeout=self._LOCK_TIMOUT
):
logger.debug(f"search lock acquired at {datetime.now()}")
else:
logger.warn("search上锁超时")
subscribeoper = SubscribeOper()
if sid:
subscribe = subscribeoper.get(sid)
@@ -434,14 +443,17 @@ class SubscribeChain(ChainBase):
else:
self.messagehelper.put('没有找到订阅!', title="订阅搜索", role="system")
logger.debug(f"search Lock released at {datetime.now()}")
finally:
subscribes.clear()
del subscribes
finally:
if lock_acquired:
self._rlock.release()
logger.debug(f"search Lock released at {datetime.now()}")
# 如果不是大内存模式,进行垃圾回收
if not settings.BIG_MEMORY_MODE:
gc.collect()
# 如果不是大内存模式,进行垃圾回收
if not settings.BIG_MEMORY_MODE:
gc.collect()
def update_subscribe_priority(self, subscribe: Subscribe, meta: MetaBase,
mediainfo: MediaInfo, downloads: Optional[List[Context]]):
@@ -564,8 +576,14 @@ class SubscribeChain(ChainBase):
logger.warn('没有缓存资源,无法匹配订阅')
return
with self._rlock:
logger.debug(f"match lock acquired at {datetime.now()}")
lock_acquired = False
try:
if lock_acquired := self._rlock.acquire(
blocking=True, timeout=self._LOCK_TIMOUT
):
logger.debug(f"match lock acquired at {datetime.now()}")
else:
logger.warn("match上锁超时")
# 预识别所有未识别的种子
processed_torrents: Dict[str, List[Context]] = {}
@@ -821,8 +839,10 @@ class SubscribeChain(ChainBase):
del processed_torrents
subscribes.clear()
del subscribes
logger.debug(f"match Lock released at {datetime.now()}")
finally:
if lock_acquired:
self._rlock.release()
logger.debug(f"match Lock released at {datetime.now()}")
def check(self):
"""

View File

@@ -488,7 +488,9 @@ class TransferChain(ChainBase, metaclass=Singleton):
self.eventmanager.send_event(EventType.MetadataScrape, {
'meta': task.meta,
'mediainfo': task.mediainfo,
'fileitem': transferinfo.target_diritem
'fileitem': transferinfo.target_diritem,
'file_list': transferinfo.file_list_new,
'overwrite': False
})
# 移除已完成的任务
@@ -1204,7 +1206,8 @@ class TransferChain(ChainBase, metaclass=Singleton):
key=ProgressKey.FileTransfer)
progress.end(ProgressKey.FileTransfer)
return all_success, "".join(err_msgs)
error_msg = "".join(err_msgs[:2]) + (f",等{len(err_msgs)}个文件错误!" if len(err_msgs) > 2 else "")
return all_success, error_msg
def remote_transfer(self, arg_str: str, channel: MessageChannel,
userid: Union[str, int] = None, source: Optional[str] = None):

View File

@@ -225,6 +225,9 @@ class Command(metaclass=Singleton):
添加命令集合
"""
for cmd, command in source.items():
if not command.get("show", True):
continue
command_data = {
"type": command_type,
"description": command.get("description"),
@@ -261,6 +264,7 @@ class Command(metaclass=Singleton):
"func": self.send_plugin_event,
"description": command.get("desc"),
"category": command.get("category"),
"show": command.get("show", True),
"data": {
"etype": command.get("event"),
"data": command.get("data")
@@ -335,7 +339,8 @@ class Command(metaclass=Singleton):
return self._commands.get(cmd, {})
def register(self, cmd: str, func: Any, data: Optional[dict] = None,
desc: Optional[str] = None, category: Optional[str] = None) -> None:
desc: Optional[str] = None, category: Optional[str] = None,
show: bool = True) -> None:
"""
注册单个命令
"""
@@ -344,7 +349,8 @@ class Command(metaclass=Singleton):
"func": func,
"description": desc,
"category": category,
"data": data or {}
"data": data or {},
"show": show
}
def execute(self, cmd: str, data_str: Optional[str] = "",

View File

@@ -404,8 +404,6 @@ class MetaBase(object):
返回资源类型字符串,含分辨率
"""
ret_string = ""
if self.web_source:
ret_string = f"{ret_string} {self.web_source}"
if self.resource_type:
ret_string = f"{ret_string} {self.resource_type}"
if self.resource_effect:
@@ -420,8 +418,6 @@ class MetaBase(object):
返回资源类型字符串,不含分辨率
"""
ret_string = ""
if self.web_source:
ret_string = f"{ret_string} {self.web_source}"
if self.resource_type:
ret_string = f"{ret_string} {self.resource_type}"
if self.resource_effect:

View File

@@ -312,4 +312,3 @@ class StreamingPlatforms(metaclass=Singleton):
if name is None:
return False
return name.upper() in self._lookup_cache

View File

@@ -96,53 +96,58 @@ class CookieHelper:
return None, None, "获取源码失败"
# 查找用户名输入框
html = etree.HTML(html_text)
username_xpath = None
for xpath in self._SITE_LOGIN_XPATH.get("username"):
if html.xpath(xpath):
username_xpath = xpath
break
if not username_xpath:
return None, None, "未找到用户名输入框"
# 查找密码输入框
password_xpath = None
for xpath in self._SITE_LOGIN_XPATH.get("password"):
if html.xpath(xpath):
password_xpath = xpath
break
if not password_xpath:
return None, None, "未找到密码输入框"
# 处理二步验证码
otp_code = TwoFactorAuth(two_step_code).get_code()
# 查找二步验证码输入框
twostep_xpath = None
if otp_code:
for xpath in self._SITE_LOGIN_XPATH.get("twostep"):
try:
username_xpath = None
for xpath in self._SITE_LOGIN_XPATH.get("username"):
if html.xpath(xpath):
twostep_xpath = xpath
username_xpath = xpath
break
# 查找验证码输入框
captcha_xpath = None
for xpath in self._SITE_LOGIN_XPATH.get("captcha"):
if html.xpath(xpath):
captcha_xpath = xpath
break
# 查找验证码图片
captcha_img_url = None
if captcha_xpath:
for xpath in self._SITE_LOGIN_XPATH.get("captcha_img"):
if not username_xpath:
return None, None, "未找到用户名输入框"
# 查找密码输入框
password_xpath = None
for xpath in self._SITE_LOGIN_XPATH.get("password"):
if html.xpath(xpath):
captcha_img_url = html.xpath(xpath)[0]
password_xpath = xpath
break
if not captcha_img_url:
return None, None, "未找到验证码图片"
# 查找登录按钮
submit_xpath = None
for xpath in self._SITE_LOGIN_XPATH.get("submit"):
if html.xpath(xpath):
submit_xpath = xpath
break
if not submit_xpath:
return None, None, "未找到登录按钮"
if not password_xpath:
return None, None, "未找到密码输入框"
# 处理二步验证码
otp_code = TwoFactorAuth(two_step_code).get_code()
# 查找二步验证码输入框
twostep_xpath = None
if otp_code:
for xpath in self._SITE_LOGIN_XPATH.get("twostep"):
if html.xpath(xpath):
twostep_xpath = xpath
break
# 查找验证码输入框
captcha_xpath = None
for xpath in self._SITE_LOGIN_XPATH.get("captcha"):
if html.xpath(xpath):
captcha_xpath = xpath
break
# 查找验证码图片
captcha_img_url = None
if captcha_xpath:
for xpath in self._SITE_LOGIN_XPATH.get("captcha_img"):
if html.xpath(xpath):
captcha_img_url = html.xpath(xpath)[0]
break
if not captcha_img_url:
return None, None, "未找到验证码图片"
# 查找登录按钮
submit_xpath = None
for xpath in self._SITE_LOGIN_XPATH.get("submit"):
if html.xpath(xpath):
submit_xpath = xpath
break
if not submit_xpath:
return None, None, "未找到登录按钮"
finally:
if html is not None:
del html
# 点击登录按钮
try:
# 等待登录按钮准备好
@@ -185,19 +190,23 @@ class CookieHelper:
if not otp_code:
return None, None, "需要二次验证码"
html = etree.HTML(page.content())
for xpath in self._SITE_LOGIN_XPATH.get("twostep"):
if html.xpath(xpath):
try:
# 刷新一下 2fa code
otp_code = TwoFactorAuth(two_step_code).get_code()
page.fill(xpath, otp_code)
# 登录按钮 xpath 理论上相同,不再重复查找
page.click(submit_xpath)
page.wait_for_load_state("networkidle", timeout=30 * 1000)
except Exception as e:
logger.error(f"二次验证码输入失败:{str(e)}")
return None, None, f"二次验证码输入失败:{str(e)}"
break
try:
for xpath in self._SITE_LOGIN_XPATH.get("twostep"):
if html.xpath(xpath):
try:
# 刷新一下 2fa code
otp_code = TwoFactorAuth(two_step_code).get_code()
page.fill(xpath, otp_code)
# 登录按钮 xpath 理论上相同,不再重复查找
page.click(submit_xpath)
page.wait_for_load_state("networkidle", timeout=30 * 1000)
except Exception as e:
logger.error(f"二次验证码输入失败:{str(e)}")
return None, None, f"二次验证码输入失败:{str(e)}"
break
finally:
if html is not None:
del html
# 登录后的源码
html_text = page.content()
if not html_text:

View File

@@ -308,7 +308,7 @@ class PluginHelper(metaclass=Singleton):
return None, "连接仓库失败"
elif res.status_code != 200:
return None, f"连接仓库失败:{res.status_code} - " \
f"{'超出速率限制,请置GITHUB_TOKEN环境变量或稍后重试' if res.status_code == 403 else res.reason}"
f"{'超出速率限制,请置Github Token或稍后重试' if res.status_code == 403 else res.reason}"
try:
ret = res.json()

View File

@@ -8,6 +8,7 @@ from app.log import logger
from app.utils.http import RequestUtils
from app.utils.string import StringUtils
from app.utils.system import SystemUtils
from version import APP_VERSION
class ResourceHelper:
@@ -58,15 +59,15 @@ class ResourceHelper:
if rtype == "auth":
# 站点认证资源
local_version = SitesHelper().auth_version
# 阻断v2.3.0以下的版本直接更新,避免无限重启
# 阻断站点认证资源v2.3.0以下的版本直接更新,避免无限重启
if StringUtils.compare_version(local_version, "<", "2.3.0"):
continue
# 阻断主程序版本v2.6.3以下的版本直接更新,避免搜索异常
if StringUtils.compare_version(APP_VERSION, "<", "2.6.3"):
continue
elif rtype == "sites":
# 站点索引资源
local_version = SitesHelper().indexer_version
# 阻断v2.0.0以下的版本直接更新,避免无限重启
if StringUtils.compare_version(local_version, "<", "2.0.0"):
continue
else:
continue
if StringUtils.compare_version(version, ">", local_version):
@@ -84,6 +85,8 @@ class ResourceHelper:
elif not r:
return None, "连接仓库失败"
files_info = r.json()
# 下载资源文件
success = True
for item in files_info:
save_path = need_updates.get(item.get("name"))
if not save_path:
@@ -96,16 +99,23 @@ class ResourceHelper:
timeout=180).get_res(download_url)
if not res:
logger.error(f"文件 {item.get('name')} 下载失败!")
success = False
break
elif res.status_code != 200:
logger.error(f"下载文件 {item.get('name')} 失败:{res.status_code} - {res.reason}")
success = False
break
# 创建插件文件夹
file_path = self._base_dir / save_path / item.get("name")
if not file_path.parent.exists():
file_path.parent.mkdir(parents=True, exist_ok=True)
# 写入文件
file_path.write_bytes(res.content)
logger.info("资源包更新完成,开始重启服务...")
SystemHelper.restart()
if success:
logger.info("资源包更新完成,开始重启服务...")
SystemHelper.restart()
else:
logger.warn("资源包更新失败,跳过升级!")
else:
logger.info("所有资源已最新,无需更新")
except json.JSONDecodeError:

View File

@@ -1,5 +1,6 @@
import multiprocessing
import os
import setproctitle
import signal
import sys
import threading
@@ -19,6 +20,9 @@ if SystemUtils.is_frozen():
from app.core.config import settings
from app.db.init import init_db, update_db
# 设置进程名
setproctitle.setproctitle(settings.PROJECT_NAME)
# uvicorn服务
Server = uvicorn.Server(Config(app, host=settings.HOST, port=settings.PORT,
reload=settings.DEV, workers=multiprocessing.cpu_count(),

View File

@@ -512,17 +512,38 @@ class FileManagerModule(_ModuleBase):
# 重命名格式
rename_format = settings.TV_RENAME_FORMAT \
if mediainfo.type == MediaType.TV else settings.MOVIE_RENAME_FORMAT
# 元数据补上常用属性,尽可能确保重命名后的路径不出现空白
meta = MetaInfo(mediainfo.title)
if meta.type == MediaType.UNKNOWN and mediainfo.type is not None:
meta.type = mediainfo.type
if meta.year is None:
meta.year = mediainfo.year
if meta.begin_season is None:
meta.begin_season = 1
if meta.begin_episode is None:
meta.begin_episode = 1
# 获取路径(重命名路径)
target_path = handler.get_rename_path(
path=dir_path,
template_string=rename_format,
rename_dict=handler.get_naming_dict(meta=MetaInfo(mediainfo.title),
rename_dict=handler.get_naming_dict(meta=meta,
mediainfo=mediainfo)
)
# 计算重命名中的文件夹层数
rename_format_level = len(rename_format.split("/")) - 1
rename_list = rename_format.split("/")
rename_format_level = len(rename_list) - 1
for level, name in enumerate(rename_list):
# 处理特例,有的人重命名第一层是年份、分辨率
if "{{title}}" in name:
# 找出含标题的这一层作为扫描路径
rename_format_level -= level
break
# 取相对路径的第1层目录
media_path = target_path.parents[rename_format_level - 1]
if dir_path.is_relative_to(media_path):
# 兜底检查,避免不必要的扫盘
logger.warn(f"{media_path} 是媒体库目录 {dir_path} 的父目录,忽略获取媒体文件列表,请检查重命名格式!")
continue
# 检索媒体文件
fileitem = storage_oper.get_item(media_path)
if not fileitem:
@@ -548,9 +569,12 @@ class FileManagerModule(_ModuleBase):
if not settings.LOCAL_EXISTS_SEARCH:
return None
logger.debug(f"正在本地媒体库中查找 {mediainfo.title_year}...")
# 检查媒体库
fileitems = self.media_files(mediainfo)
if not fileitems:
logger.debug(f"{mediainfo.title_year} 不在本地媒体库中")
return None
if mediainfo.type == MediaType.MOVIE:

View File

@@ -38,7 +38,7 @@ class Alist(StorageBase, metaclass=Singleton):
"""
初始化
"""
pass
self.__generate_token.clear_cache()
@property
def __get_base_url(self) -> str:
@@ -127,7 +127,7 @@ class Alist(StorageBase, metaclass=Singleton):
"""
检查存储是否可用
"""
pass
return True if self.__generate_token else False
def list(
self,

View File

@@ -191,7 +191,8 @@ class LocalStorage(StorageBase):
"""
return Path(fileitem.path)
def upload(self, fileitem: schemas.FileItem, path: Path, new_name: Optional[str] = None) -> Optional[schemas.FileItem]:
def upload(self, fileitem: schemas.FileItem, path: Path,
new_name: Optional[str] = None) -> Optional[schemas.FileItem]:
"""
上传文件
:param fileitem: 上传目录项
@@ -260,8 +261,11 @@ class LocalStorage(StorageBase):
"""
存储使用情况
"""
library_dirs = DirectoryHelper().get_local_library_dirs()
total_storage, free_storage = SystemUtils.space_usage([Path(d.library_path) for d in library_dirs])
directory_helper = DirectoryHelper()
total_storage, free_storage = SystemUtils.space_usage(
[Path(d.download_path) for d in directory_helper.get_local_download_dirs() if d.download_path] +
[Path(d.library_path) for d in directory_helper.get_local_library_dirs() if d.library_path]
)
return schemas.StorageUsage(
total=total_storage,
available=free_storage

View File

@@ -0,0 +1,549 @@
import threading
import time
from pathlib import Path
from typing import List, Optional, Union
import smbclient
from smbclient import ClientConfig, register_session, reset_connection_cache
from smbprotocol.exceptions import SMBException, SMBResponseException, SMBAuthenticationError
from app import schemas
from app.core.config import settings
from app.log import logger
from app.modules.filemanager import StorageBase
from app.schemas.types import StorageSchema
from app.utils.singleton import Singleton
lock = threading.Lock()
class SMBConnectionError(Exception):
"""SMB 连接错误"""
pass
class SMB(StorageBase, metaclass=Singleton):
"""
SMB网络挂载存储相关操作 - 使用 smbclient 高级接口
"""
# 存储类型
schema = StorageSchema.SMB
# 支持的整理方式
transtype = {
"move": "移动",
"copy": "复制",
}
def __init__(self):
super().__init__()
self._connected = False
self._server_path = None
self._host = None
self._username = None
self._password = None
self._init_connection()
def _init_connection(self):
"""
初始化SMB连接配置
"""
try:
conf = self.get_conf()
if not conf:
return
self._host = conf.get("host")
self._username = conf.get("username")
self._password = conf.get("password")
domain = conf.get("domain", "")
share = conf.get("share", "")
port = conf.get("port", 445)
if not all([self._host, share]):
logger.error("【SMB】缺少必要的连接参数host 和 share")
return
# 构建服务器路径
self._server_path = f"\\\\{self._host}\\{share}"
# 配置全局客户端设置
ClientConfig(
username=self._username,
password=self._password,
domain=domain if domain else None,
connection_timeout=60,
port=port,
auth_protocol="negotiate", # 使用协商认证
require_secure_negotiate=False # 匿名访问时可能需要关闭安全协商
)
# 注册会话以启用连接池
register_session(
self._host,
username=self._username,
password=self._password,
port=port,
encrypt=False, # 根据需要启用加密
connection_timeout=60
)
# 测试连接
self._test_connection()
self._connected = True
# 判断是否为匿名访问
if self._is_anonymous_access():
logger.info(f"【SMB】匿名连接成功{self._server_path}")
else:
logger.info(f"【SMB】认证连接成功{self._server_path} (用户:{self._username})")
except Exception as e:
logger.error(f"【SMB】连接初始化失败{e}")
self._connected = False
def _test_connection(self):
"""
测试SMB连接
"""
try:
# 尝试列出根目录来测试连接
smbclient.listdir(self._server_path)
except SMBAuthenticationError as e:
raise SMBConnectionError(f"SMB认证失败{e}")
except SMBResponseException as e:
raise SMBConnectionError(f"SMB响应错误{e}")
except SMBException as e:
raise SMBConnectionError(f"SMB连接错误{e}")
except Exception as e:
raise SMBConnectionError(f"连接测试失败:{e}")
def _is_anonymous_access(self) -> bool:
"""
检查是否为匿名访问
"""
return not self._username and not self._password
def _check_connection(self):
"""
检查SMB连接状态
"""
if not self._connected or not self._server_path:
raise SMBConnectionError("【SMB】连接未建立或已断开请检查配置")
def _normalize_path(self, path: Union[str, Path]) -> str:
"""
标准化路径格式为SMB路径
"""
path_str = str(path)
# 处理根路径
if path_str in ["/", "\\"]:
return self._server_path
# 去除前导斜杠
if path_str.startswith("/"):
path_str = path_str[1:]
# 构建完整的SMB路径
if path_str:
return f"{self._server_path}\\{path_str.replace('/', '\\')}"
else:
return self._server_path
def _create_fileitem(self, stat_result, file_path: str, name: str) -> schemas.FileItem:
"""
创建文件项
"""
try:
# 检查是否为目录
is_directory = smbclient.path.isdir(file_path)
# 处理路径
relative_path = file_path.replace(self._server_path, "").replace("\\", "/")
if not relative_path.startswith("/"):
relative_path = "/" + relative_path
if is_directory and not relative_path.endswith("/"):
relative_path += "/"
# 获取时间戳
try:
modify_time = int(stat_result.st_mtime)
except (AttributeError, TypeError):
modify_time = int(time.time())
if is_directory:
return schemas.FileItem(
storage=self.schema.value,
type="dir",
path=relative_path,
name=name,
basename=name,
modify_time=modify_time
)
else:
return schemas.FileItem(
storage=self.schema.value,
type="file",
path=relative_path,
name=name,
basename=Path(name).stem,
extension=Path(name).suffix[1:] if Path(name).suffix else None,
size=getattr(stat_result, 'st_size', 0),
modify_time=modify_time
)
except Exception as e:
logger.error(f"【SMB】创建文件项失败{e}")
# 返回基本的文件项信息
return schemas.FileItem(
storage=self.schema.value,
type="file",
path=file_path.replace(self._server_path, "").replace("\\", "/"),
name=name,
basename=Path(name).stem,
modify_time=int(time.time())
)
def init_storage(self):
"""
初始化存储
"""
# 重置连接缓存
reset_connection_cache()
self._init_connection()
def check(self) -> bool:
"""
检查存储是否可用
"""
if not self._connected:
return False
try:
self._test_connection()
return True
except Exception as e:
logger.debug(f"【SMB】连接检查失败{e}")
self._connected = False
return False
def list(self, fileitem: schemas.FileItem) -> List[schemas.FileItem]:
"""
浏览文件
"""
try:
self._check_connection()
if fileitem.type == "file":
item = self.detail(fileitem)
if item:
return [item]
return []
# 构建SMB路径
smb_path = self._normalize_path(fileitem.path.rstrip("/"))
# 列出目录内容
try:
entries = smbclient.listdir(smb_path)
except SMBResponseException as e:
logger.error(f"【SMB】列出目录失败: {smb_path} - {e}")
return []
except SMBException as e:
logger.error(f"【SMB】列出目录失败: {smb_path} - {e}")
return []
items = []
for entry in entries:
if entry in [".", ".."]:
continue
entry_path = f"{smb_path}\\{entry}"
try:
stat_result = smbclient.stat(entry_path)
item = self._create_fileitem(stat_result, entry_path, entry)
items.append(item)
except Exception as e:
logger.debug(f"【SMB】获取文件信息失败: {entry_path} - {e}")
continue
return items
except Exception as e:
logger.error(f"【SMB】列出文件失败: {e}")
return []
def create_folder(self, fileitem: schemas.FileItem, name: str) -> Optional[schemas.FileItem]:
"""
创建目录
"""
try:
self._check_connection()
parent_path = self._normalize_path(fileitem.path.rstrip("/"))
new_path = f"{parent_path}\\{name}"
# 创建目录
smbclient.mkdir(new_path)
# 返回创建的目录信息
return schemas.FileItem(
storage=self.schema.value,
type="dir",
path=f"{fileitem.path.rstrip('/')}/{name}/",
name=name,
basename=name,
modify_time=int(time.time())
)
except Exception as e:
logger.error(f"【SMB】创建目录失败: {e}")
return None
def get_folder(self, path: Path) -> Optional[schemas.FileItem]:
"""
获取目录,如目录不存在则创建
"""
# 检查目录是否存在
folder = self.get_item(path)
if folder:
return folder
# 逐级创建目录
parts = path.parts
current_path = Path("/")
for part in parts[1:]: # 跳过根目录
current_path = current_path / part
folder = self.get_item(current_path)
if not folder:
parent_folder = self.get_item(current_path.parent)
if not parent_folder:
logger.error(f"【SMB】父目录不存在: {current_path.parent}")
return None
folder = self.create_folder(parent_folder, part)
if not folder:
return None
return folder
def get_item(self, path: Path) -> Optional[schemas.FileItem]:
"""
获取文件或目录不存在返回None
"""
try:
self._check_connection()
# 处理根目录
if str(path) == "/":
return schemas.FileItem(
storage=self.schema.value,
type="dir",
path="/",
name="",
basename="",
modify_time=int(time.time())
)
smb_path = self._normalize_path(str(path).rstrip("/"))
# 检查路径是否存在
if not smbclient.path.exists(smb_path):
return None
stat_result = smbclient.stat(smb_path)
file_name = Path(path).name
return self._create_fileitem(stat_result, smb_path, file_name)
except Exception as e:
logger.debug(f"【SMB】获取文件项失败: {e}")
return None
def detail(self, fileitem: schemas.FileItem) -> Optional[schemas.FileItem]:
"""
获取文件详情
"""
return self.get_item(Path(fileitem.path))
def delete(self, fileitem: schemas.FileItem) -> bool:
"""
删除文件或目录
"""
try:
self._check_connection()
smb_path = self._normalize_path(fileitem.path.rstrip("/"))
if fileitem.type == "dir":
# 删除目录
smbclient.rmdir(smb_path)
else:
# 删除文件
smbclient.remove(smb_path)
logger.info(f"【SMB】删除成功: {fileitem.path}")
return True
except Exception as e:
logger.error(f"【SMB】删除失败: {e}")
return False
def rename(self, fileitem: schemas.FileItem, name: str) -> bool:
"""
重命名文件
"""
try:
self._check_connection()
old_path = self._normalize_path(fileitem.path.rstrip("/"))
parent_path = Path(fileitem.path).parent
new_path = self._normalize_path(str(parent_path / name))
# 重命名
smbclient.rename(old_path, new_path)
logger.info(f"【SMB】重命名成功: {fileitem.path} -> {name}")
return True
except Exception as e:
logger.error(f"【SMB】重命名失败: {e}")
return False
def download(self, fileitem: schemas.FileItem, path: Path = None) -> Optional[Path]:
"""
下载文件
"""
try:
self._check_connection()
smb_path = self._normalize_path(fileitem.path)
local_path = path or settings.TEMP_PATH / fileitem.name
# 确保本地目录存在
local_path.parent.mkdir(parents=True, exist_ok=True)
# 使用更高效的文件传输方式
with smbclient.open_file(smb_path, mode="rb") as src_file:
with open(local_path, "wb") as dst_file:
# 使用更大的缓冲区提高性能
buffer_size = 1024 * 1024 # 1MB
while True:
chunk = src_file.read(buffer_size)
if not chunk:
break
dst_file.write(chunk)
logger.info(f"【SMB】下载成功: {fileitem.path} -> {local_path}")
return local_path
except Exception as e:
logger.error(f"【SMB】下载失败: {e}")
return None
def upload(self, fileitem: schemas.FileItem, path: Path,
new_name: Optional[str] = None) -> Optional[schemas.FileItem]:
"""
上传文件
"""
try:
self._check_connection()
target_name = new_name or path.name
target_path = Path(fileitem.path) / target_name
smb_path = self._normalize_path(str(target_path))
# 使用更高效的文件传输方式
with open(path, "rb") as src_file:
with smbclient.open_file(smb_path, mode="wb") as dst_file:
# 使用更大的缓冲区提高性能
buffer_size = 1024 * 1024 # 1MB
while True:
chunk = src_file.read(buffer_size)
if not chunk:
break
dst_file.write(chunk)
logger.info(f"【SMB】上传成功: {path} -> {target_path}")
# 返回上传后的文件信息
return self.get_item(target_path)
except Exception as e:
logger.error(f"【SMB】上传失败: {e}")
return None
def copy(self, fileitem: schemas.FileItem, path: Path, new_name: str) -> bool:
"""
复制文件
"""
try:
# 下载到临时文件
temp_file = self.download(fileitem)
if not temp_file:
return False
# 获取目标目录
target_folder = self.get_item(path)
if not target_folder:
return False
# 上传到目标位置
result = self.upload(target_folder, temp_file, new_name)
# 删除临时文件
if temp_file.exists():
temp_file.unlink()
return result is not None
except Exception as e:
logger.error(f"【SMB】复制失败: {e}")
return False
def move(self, fileitem: schemas.FileItem, path: Path, new_name: str) -> bool:
"""
移动文件
"""
try:
# 先复制
if not self.copy(fileitem, path, new_name):
return False
# 再删除原文件
if not self.delete(fileitem):
logger.warn(f"【SMB】删除原文件失败: {fileitem.path}")
return False
return True
except Exception as e:
logger.error(f"【SMB】移动失败: {e}")
return False
def link(self, fileitem: schemas.FileItem, target_file: Path) -> bool:
pass
def softlink(self, fileitem: schemas.FileItem, target_file: Path) -> bool:
pass
def usage(self) -> Optional[schemas.StorageUsage]:
"""
存储使用情况
"""
try:
self._check_connection()
volume_stat = smbclient.stat_volume(self._server_path)
return schemas.StorageUsage(
total=volume_stat.total_size,
available=volume_stat.caller_available_size
)
except Exception as e:
logger.error(f"【SMB】获取存储使用情况失败: {e}")
return None
def __del__(self):
"""
析构函数,清理连接
"""
try:
# smbclient 自动管理连接池,但我们可以重置缓存
if hasattr(self, '_connected') and self._connected:
reset_connection_cache()
except Exception as e:
logger.debug(f"【SMB】清理连接失败: {e}")

View File

@@ -219,8 +219,11 @@ class U115Pan(StorageBase, metaclass=Singleton):
# 处理速率限制
if resp.status_code == 429:
reset_time = int(resp.headers.get("X-RateLimit-Reset", 60))
time.sleep(reset_time + 5)
reset_time = 5 + int(resp.headers.get("X-RateLimit-Reset", 60))
logger.debug(
f"【115】{method} 请求 {endpoint} 限流,等待{reset_time}秒后重试"
)
time.sleep(reset_time)
return self._request_api(method, endpoint, result_key, **kwargs)
# 处理请求错误

View File

@@ -5,10 +5,11 @@ from app.core.config import settings
from app.core.context import TorrentInfo
from app.db.site_oper import SiteOper
from app.helper.module import ModuleHelper
from app.helper.sites import SitesHelper, SiteSpider
from app.helper.sites import SitesHelper
from app.log import logger
from app.modules import _ModuleBase
from app.modules.indexer.parser import SiteParserBase
from app.modules.indexer.spider import SiteSpider
from app.modules.indexer.spider.haidan import HaiDanSpider
from app.modules.indexer.spider.hddolby import HddolbySpider
from app.modules.indexer.spider.mtorrent import MTorrentSpider

View File

@@ -14,15 +14,18 @@ class DiscuzUserInfo(SiteParserBase):
def _parse_user_base_info(self, html_text: str):
html_text = self._prepare_html_text(html_text)
html = etree.HTML(html_text)
user_info = html.xpath('//a[contains(@href, "&uid=")]')
if user_info:
user_id_match = re.search(r"&uid=(\d+)", user_info[0].attrib['href'])
if user_id_match and user_id_match.group().strip():
self.userid = user_id_match.group(1)
self._torrent_seeding_page = f"forum.php?&mod=torrents&cat_5up=on"
self._user_detail_page = user_info[0].attrib['href']
self.username = user_info[0].text.strip()
try:
user_info = html.xpath('//a[contains(@href, "&uid=")]')
if user_info:
user_id_match = re.search(r"&uid=(\d+)", user_info[0].attrib['href'])
if user_id_match and user_id_match.group().strip():
self.userid = user_id_match.group(1)
self._torrent_seeding_page = f"forum.php?&mod=torrents&cat_5up=on"
self._user_detail_page = user_info[0].attrib['href']
self.username = user_info[0].text.strip()
finally:
if html is not None:
del html
def _parse_site_page(self, html_text: str):
pass
@@ -34,40 +37,44 @@ class DiscuzUserInfo(SiteParserBase):
:return:
"""
html = etree.HTML(html_text)
if not StringUtils.is_valid_html_element(html):
return None
try:
if not StringUtils.is_valid_html_element(html):
return None
# 用户等级
user_levels_text = html.xpath('//a[contains(@href, "usergroup")]/text()')
if user_levels_text:
self.user_level = user_levels_text[-1].strip()
# 用户等级
user_levels_text = html.xpath('//a[contains(@href, "usergroup")]/text()')
if user_levels_text:
self.user_level = user_levels_text[-1].strip()
# 加入日期
join_at_text = html.xpath('//li[em[text()="注册时间"]]/text()')
if join_at_text:
self.join_at = StringUtils.unify_datetime_str(join_at_text[0].strip())
# 加入日期
join_at_text = html.xpath('//li[em[text()="注册时间"]]/text()')
if join_at_text:
self.join_at = StringUtils.unify_datetime_str(join_at_text[0].strip())
# 分享率
ratio_text = html.xpath('//li[contains(.//text(), "分享率")]//text()')
if ratio_text:
ratio_match = re.search(r"\(([\d,.]+)\)", ratio_text[0])
if ratio_match and ratio_match.group(1).strip():
self.bonus = StringUtils.str_float(ratio_match.group(1))
# 分享率
ratio_text = html.xpath('//li[contains(.//text(), "分享率")]//text()')
if ratio_text:
ratio_match = re.search(r"\(([\d,.]+)\)", ratio_text[0])
if ratio_match and ratio_match.group(1).strip():
self.bonus = StringUtils.str_float(ratio_match.group(1))
# 积分
bouns_text = html.xpath('//li[em[text()="积分"]]/text()')
if bouns_text:
self.bonus = StringUtils.str_float(bouns_text[0].strip())
# 积分
bouns_text = html.xpath('//li[em[text()="积分"]]/text()')
if bouns_text:
self.bonus = StringUtils.str_float(bouns_text[0].strip())
# 上传
upload_text = html.xpath('//li[em[contains(text(),"上传量")]]/text()')
if upload_text:
self.upload = StringUtils.num_filesize(upload_text[0].strip().split('/')[-1])
# 上传
upload_text = html.xpath('//li[em[contains(text(),"上传量")]]/text()')
if upload_text:
self.upload = StringUtils.num_filesize(upload_text[0].strip().split('/')[-1])
# 下载
download_text = html.xpath('//li[em[contains(text(),"下载量")]]/text()')
if download_text:
self.download = StringUtils.num_filesize(download_text[0].strip().split('/')[-1])
# 下载
download_text = html.xpath('//li[em[contains(text(),"下载量")]]/text()')
if download_text:
self.download = StringUtils.num_filesize(download_text[0].strip().split('/')[-1])
finally:
if html is not None:
del html
def _parse_user_torrent_seeding_info(self, html_text: str, multi_page: bool = False) -> Optional[str]:
"""
@@ -77,44 +84,48 @@ class DiscuzUserInfo(SiteParserBase):
:return: 下页地址
"""
html = etree.HTML(html_text)
if not StringUtils.is_valid_html_element(html):
return None
try:
if not StringUtils.is_valid_html_element(html):
return None
size_col = 3
seeders_col = 4
# 搜索size列
if html.xpath('//tr[position()=1]/td[.//img[@class="size"] and .//img[@alt="size"]]'):
size_col = len(html.xpath('//tr[position()=1]/td[.//img[@class="size"] '
'and .//img[@alt="size"]]/preceding-sibling::td')) + 1
# 搜索seeders列
if html.xpath('//tr[position()=1]/td[.//img[@class="seeders"] and .//img[@alt="seeders"]]'):
seeders_col = len(html.xpath('//tr[position()=1]/td[.//img[@class="seeders"] '
'and .//img[@alt="seeders"]]/preceding-sibling::td')) + 1
size_col = 3
seeders_col = 4
# 搜索size列
if html.xpath('//tr[position()=1]/td[.//img[@class="size"] and .//img[@alt="size"]]'):
size_col = len(html.xpath('//tr[position()=1]/td[.//img[@class="size"] '
'and .//img[@alt="size"]]/preceding-sibling::td')) + 1
# 搜索seeders列
if html.xpath('//tr[position()=1]/td[.//img[@class="seeders"] and .//img[@alt="seeders"]]'):
seeders_col = len(html.xpath('//tr[position()=1]/td[.//img[@class="seeders"] '
'and .//img[@alt="seeders"]]/preceding-sibling::td')) + 1
page_seeding = 0
page_seeding_size = 0
page_seeding_info = []
seeding_sizes = html.xpath(f'//tr[position()>1]/td[{size_col}]')
seeding_seeders = html.xpath(f'//tr[position()>1]/td[{seeders_col}]//text()')
if seeding_sizes and seeding_seeders:
page_seeding = len(seeding_sizes)
page_seeding = 0
page_seeding_size = 0
page_seeding_info = []
seeding_sizes = html.xpath(f'//tr[position()>1]/td[{size_col}]')
seeding_seeders = html.xpath(f'//tr[position()>1]/td[{seeders_col}]//text()')
if seeding_sizes and seeding_seeders:
page_seeding = len(seeding_sizes)
for i in range(0, len(seeding_sizes)):
size = StringUtils.num_filesize(seeding_sizes[i].xpath("string(.)").strip())
seeders = StringUtils.str_int(seeding_seeders[i])
for i in range(0, len(seeding_sizes)):
size = StringUtils.num_filesize(seeding_sizes[i].xpath("string(.)").strip())
seeders = StringUtils.str_int(seeding_seeders[i])
page_seeding_size += size
page_seeding_info.append([seeders, size])
page_seeding_size += size
page_seeding_info.append([seeders, size])
self.seeding += page_seeding
self.seeding_size += page_seeding_size
self.seeding_info.extend(page_seeding_info)
self.seeding += page_seeding
self.seeding_size += page_seeding_size
self.seeding_info.extend(page_seeding_info)
# 是否存在下页数据
next_page = None
next_page_text = html.xpath('//a[contains(.//text(), "下一页") or contains(.//text(), "下一頁")]/@href')
if next_page_text:
next_page = next_page_text[-1].strip()
# 是否存在下页数据
next_page = None
next_page_text = html.xpath('//a[contains(.//text(), "下一页") or contains(.//text(), "下一頁")]/@href')
if next_page_text:
next_page = next_page_text[-1].strip()
finally:
if html is not None:
del html
return next_page

View File

@@ -24,10 +24,13 @@ class FileListSiteUserInfo(SiteParserBase):
def _parse_user_base_info(self, html_text: str):
html_text = self._prepare_html_text(html_text)
html = etree.HTML(html_text)
ret = html.xpath(f'//a[contains(@href, "userdetails") and contains(@href, "{self.userid}")]//text()')
if ret:
self.username = str(ret[0])
try:
ret = html.xpath(f'//a[contains(@href, "userdetails") and contains(@href, "{self.userid}")]//text()')
if ret:
self.username = str(ret[0])
finally:
if html is not None:
del html
def _parse_user_traffic_info(self, html_text: str):
"""
@@ -40,39 +43,41 @@ class FileListSiteUserInfo(SiteParserBase):
def _parse_user_detail_info(self, html_text: str):
html_text = self._prepare_html_text(html_text)
html = etree.HTML(html_text)
try:
upload_html = html.xpath('//table//tr/td[text()="Uploaded"]/following-sibling::td//text()')
if upload_html:
self.upload = StringUtils.num_filesize(upload_html[0])
download_html = html.xpath('//table//tr/td[text()="Downloaded"]/following-sibling::td//text()')
if download_html:
self.download = StringUtils.num_filesize(download_html[0])
upload_html = html.xpath('//table//tr/td[text()="Uploaded"]/following-sibling::td//text()')
if upload_html:
self.upload = StringUtils.num_filesize(upload_html[0])
download_html = html.xpath('//table//tr/td[text()="Downloaded"]/following-sibling::td//text()')
if download_html:
self.download = StringUtils.num_filesize(download_html[0])
ratio_html = html.xpath('//table//tr/td[text()="Share ratio"]/following-sibling::td//text()')
if ratio_html:
share_ratio = StringUtils.str_float(ratio_html[0])
else:
share_ratio = 0
self.ratio = 0 if self.download == 0 else share_ratio
ratio_html = html.xpath('//table//tr/td[text()="Share ratio"]/following-sibling::td//text()')
if ratio_html:
share_ratio = StringUtils.str_float(ratio_html[0])
else:
share_ratio = 0
self.ratio = 0 if self.download == 0 else share_ratio
seed_html = html.xpath('//table//tr/td[text()="Seed bonus"]/following-sibling::td//text()')
if seed_html:
self.seeding = StringUtils.str_int(seed_html[1])
self.seeding_size = StringUtils.num_filesize(seed_html[3])
seed_html = html.xpath('//table//tr/td[text()="Seed bonus"]/following-sibling::td//text()')
if seed_html:
self.seeding = StringUtils.str_int(seed_html[1])
self.seeding_size = StringUtils.num_filesize(seed_html[3])
user_level_html = html.xpath('//table//tr/td[text()="Class"]/following-sibling::td//text()')
if user_level_html:
self.user_level = user_level_html[0].strip()
user_level_html = html.xpath('//table//tr/td[text()="Class"]/following-sibling::td//text()')
if user_level_html:
self.user_level = user_level_html[0].strip()
join_at_html = html.xpath('//table//tr/td[contains(text(), "Join")]/following-sibling::td//text()')
if join_at_html:
join_at = (join_at_html[0].split("("))[0].strip()
self.join_at = StringUtils.unify_datetime_str(join_at)
join_at_html = html.xpath('//table//tr/td[contains(text(), "Join")]/following-sibling::td//text()')
if join_at_html:
join_at = (join_at_html[0].split("("))[0].strip()
self.join_at = StringUtils.unify_datetime_str(join_at)
bonus_html = html.xpath('//a[contains(@href, "shop.php")]')
if bonus_html:
self.bonus = StringUtils.str_float(bonus_html[0].xpath("string(.)").strip())
pass
bonus_html = html.xpath('//a[contains(@href, "shop.php")]')
if bonus_html:
self.bonus = StringUtils.str_float(bonus_html[0].xpath("string(.)").strip())
finally:
if html is not None:
del html
def _parse_user_torrent_seeding_info(self, html_text: str, multi_page: Optional[bool] = False) -> Optional[str]:
"""
@@ -82,28 +87,32 @@ class FileListSiteUserInfo(SiteParserBase):
:return: 下页地址
"""
html = etree.HTML(html_text)
if not StringUtils.is_valid_html_element(html):
return None
try:
if not StringUtils.is_valid_html_element(html):
return None
size_col = 6
seeders_col = 7
size_col = 6
seeders_col = 7
page_seeding_size = 0
page_seeding_info = []
seeding_sizes = html.xpath(f'//table/tr[position()>1]/td[{size_col}]')
seeding_seeders = html.xpath(f'//table/tr[position()>1]/td[{seeders_col}]')
if seeding_sizes and seeding_seeders:
for i in range(0, len(seeding_sizes)):
size = StringUtils.num_filesize(seeding_sizes[i].xpath("string(.)").strip())
seeders = StringUtils.str_int(seeding_seeders[i].xpath("string(.)").strip())
page_seeding_size = 0
page_seeding_info = []
seeding_sizes = html.xpath(f'//table/tr[position()>1]/td[{size_col}]')
seeding_seeders = html.xpath(f'//table/tr[position()>1]/td[{seeders_col}]')
if seeding_sizes and seeding_seeders:
for i in range(0, len(seeding_sizes)):
size = StringUtils.num_filesize(seeding_sizes[i].xpath("string(.)").strip())
seeders = StringUtils.str_int(seeding_seeders[i].xpath("string(.)").strip())
page_seeding_size += size
page_seeding_info.append([seeders, size])
page_seeding_size += size
page_seeding_info.append([seeders, size])
self.seeding_info.extend(page_seeding_info)
self.seeding_info.extend(page_seeding_info)
# 是否存在下页数据
next_page = None
# 是否存在下页数据
next_page = None
finally:
if html is not None:
del html
return next_page

View File

@@ -14,46 +14,49 @@ class GazelleSiteUserInfo(SiteParserBase):
def _parse_user_base_info(self, html_text: str):
html_text = self._prepare_html_text(html_text)
html = etree.HTML(html_text)
try:
tmps = html.xpath('//a[contains(@href, "user.php?id=")]')
if tmps:
user_id_match = re.search(r"user.php\?id=(\d+)", tmps[0].attrib['href'])
if user_id_match and user_id_match.group().strip():
self.userid = user_id_match.group(1)
self._torrent_seeding_page = f"torrents.php?type=seeding&userid={self.userid}"
self._user_detail_page = f"user.php?id={self.userid}"
self.username = tmps[0].text.strip()
tmps = html.xpath('//a[contains(@href, "user.php?id=")]')
if tmps:
user_id_match = re.search(r"user.php\?id=(\d+)", tmps[0].attrib['href'])
if user_id_match and user_id_match.group().strip():
self.userid = user_id_match.group(1)
self._torrent_seeding_page = f"torrents.php?type=seeding&userid={self.userid}"
self._user_detail_page = f"user.php?id={self.userid}"
self.username = tmps[0].text.strip()
tmps = html.xpath('//*[@id="header-uploaded-value"]/@data-value')
if tmps:
self.upload = StringUtils.num_filesize(tmps[0])
else:
tmps = html.xpath('//li[@id="stats_seeding"]/span/text()')
tmps = html.xpath('//*[@id="header-uploaded-value"]/@data-value')
if tmps:
self.upload = StringUtils.num_filesize(tmps[0])
else:
tmps = html.xpath('//li[@id="stats_seeding"]/span/text()')
if tmps:
self.upload = StringUtils.num_filesize(tmps[0])
tmps = html.xpath('//*[@id="header-downloaded-value"]/@data-value')
if tmps:
self.download = StringUtils.num_filesize(tmps[0])
else:
tmps = html.xpath('//li[@id="stats_leeching"]/span/text()')
tmps = html.xpath('//*[@id="header-downloaded-value"]/@data-value')
if tmps:
self.download = StringUtils.num_filesize(tmps[0])
else:
tmps = html.xpath('//li[@id="stats_leeching"]/span/text()')
if tmps:
self.download = StringUtils.num_filesize(tmps[0])
self.ratio = 0.0 if self.download <= 0.0 else round(self.upload / self.download, 3)
self.ratio = 0.0 if self.download <= 0.0 else round(self.upload / self.download, 3)
tmps = html.xpath('//a[contains(@href, "bonus.php")]/@data-tooltip')
if tmps:
bonus_match = re.search(r"([\d,.]+)", tmps[0])
if bonus_match and bonus_match.group(1).strip():
self.bonus = StringUtils.str_float(bonus_match.group(1))
else:
tmps = html.xpath('//a[contains(@href, "bonus.php")]')
tmps = html.xpath('//a[contains(@href, "bonus.php")]/@data-tooltip')
if tmps:
bonus_text = tmps[0].xpath("string(.)")
bonus_match = re.search(r"([\d,.]+)", bonus_text)
bonus_match = re.search(r"([\d,.]+)", tmps[0])
if bonus_match and bonus_match.group(1).strip():
self.bonus = StringUtils.str_float(bonus_match.group(1))
else:
tmps = html.xpath('//a[contains(@href, "bonus.php")]')
if tmps:
bonus_text = tmps[0].xpath("string(.)")
bonus_match = re.search(r"([\d,.]+)", bonus_text)
if bonus_match and bonus_match.group(1).strip():
self.bonus = StringUtils.str_float(bonus_match.group(1))
finally:
if html is not None:
del html
def _parse_site_page(self, html_text: str):
pass
@@ -65,27 +68,31 @@ class GazelleSiteUserInfo(SiteParserBase):
:return:
"""
html = etree.HTML(html_text)
if not StringUtils.is_valid_html_element(html):
return None
try:
if not StringUtils.is_valid_html_element(html):
return None
# 用户等级
user_levels_text = html.xpath('//*[@id="class-value"]/@data-value')
if user_levels_text:
self.user_level = user_levels_text[0].strip()
else:
user_levels_text = html.xpath('//li[contains(text(), "用户等级")]/text()')
# 用户等级
user_levels_text = html.xpath('//*[@id="class-value"]/@data-value')
if user_levels_text:
self.user_level = user_levels_text[0].split(':')[1].strip()
self.user_level = user_levels_text[0].strip()
else:
user_levels_text = html.xpath('//li[contains(text(), "用户等级")]/text()')
if user_levels_text:
self.user_level = user_levels_text[0].split(':')[1].strip()
# 加入日期
join_at_text = html.xpath('//*[@id="join-date-value"]/@data-value')
if join_at_text:
self.join_at = StringUtils.unify_datetime_str(join_at_text[0].strip())
else:
join_at_text = html.xpath(
'//div[contains(@class, "box_userinfo_stats")]//li[contains(text(), "加入时间")]/span/text()')
# 加入日期
join_at_text = html.xpath('//*[@id="join-date-value"]/@data-value')
if join_at_text:
self.join_at = StringUtils.unify_datetime_str(join_at_text[0].strip())
else:
join_at_text = html.xpath(
'//div[contains(@class, "box_userinfo_stats")]//li[contains(text(), "加入时间")]/span/text()')
if join_at_text:
self.join_at = StringUtils.unify_datetime_str(join_at_text[0].strip())
finally:
if html is not None:
del html
def _parse_user_torrent_seeding_info(self, html_text: str, multi_page: Optional[bool] = False) -> Optional[str]:
"""
@@ -95,48 +102,52 @@ class GazelleSiteUserInfo(SiteParserBase):
:return: 下页地址
"""
html = etree.HTML(html_text)
if not StringUtils.is_valid_html_element(html):
return None
try:
if not StringUtils.is_valid_html_element(html):
return None
size_col = 3
# 搜索size列
if html.xpath('//table[contains(@id, "torrent")]//tr[1]/td'):
size_col = len(html.xpath('//table[contains(@id, "torrent")]//tr[1]/td')) - 3
# 搜索seeders列
seeders_col = size_col + 2
size_col = 3
# 搜索size列
if html.xpath('//table[contains(@id, "torrent")]//tr[1]/td'):
size_col = len(html.xpath('//table[contains(@id, "torrent")]//tr[1]/td')) - 3
# 搜索seeders列
seeders_col = size_col + 2
page_seeding = 0
page_seeding_size = 0
page_seeding_info = []
seeding_sizes = html.xpath(f'//table[contains(@id, "torrent")]//tr[position()>1]/td[{size_col}]')
seeding_seeders = html.xpath(f'//table[contains(@id, "torrent")]//tr[position()>1]/td[{seeders_col}]/text()')
if seeding_sizes and seeding_seeders:
page_seeding = len(seeding_sizes)
page_seeding = 0
page_seeding_size = 0
page_seeding_info = []
seeding_sizes = html.xpath(f'//table[contains(@id, "torrent")]//tr[position()>1]/td[{size_col}]')
seeding_seeders = html.xpath(f'//table[contains(@id, "torrent")]//tr[position()>1]/td[{seeders_col}]/text()')
if seeding_sizes and seeding_seeders:
page_seeding = len(seeding_sizes)
for i in range(0, len(seeding_sizes)):
size = StringUtils.num_filesize(seeding_sizes[i].xpath("string(.)").strip())
seeders = int(seeding_seeders[i])
for i in range(0, len(seeding_sizes)):
size = StringUtils.num_filesize(seeding_sizes[i].xpath("string(.)").strip())
seeders = int(seeding_seeders[i])
page_seeding_size += size
page_seeding_info.append([seeders, size])
page_seeding_size += size
page_seeding_info.append([seeders, size])
if multi_page:
self.seeding += page_seeding
self.seeding_size += page_seeding_size
self.seeding_info.extend(page_seeding_info)
else:
if not self.seeding:
self.seeding = page_seeding
if not self.seeding_size:
self.seeding_size = page_seeding_size
if not self.seeding_info:
self.seeding_info = page_seeding_info
if multi_page:
self.seeding += page_seeding
self.seeding_size += page_seeding_size
self.seeding_info.extend(page_seeding_info)
else:
if not self.seeding:
self.seeding = page_seeding
if not self.seeding_size:
self.seeding_size = page_seeding_size
if not self.seeding_info:
self.seeding_info = page_seeding_info
# 是否存在下页数据
next_page = None
next_page_text = html.xpath('//a[contains(.//text(), "Next") or contains(.//text(), "下一页")]/@href')
if next_page_text:
next_page = next_page_text[-1].strip()
# 是否存在下页数据
next_page = None
next_page_text = html.xpath('//a[contains(.//text(), "Next") or contains(.//text(), "下一页")]/@href')
if next_page_text:
next_page = next_page_text[-1].strip()
finally:
if html is not None:
del html
return next_page

View File

@@ -14,67 +14,79 @@ class IptSiteUserInfo(SiteParserBase):
def _parse_user_base_info(self, html_text: str):
html_text = self._prepare_html_text(html_text)
html = etree.HTML(html_text)
tmps = html.xpath('//a[contains(@href, "/u/")]//text()')
tmps_id = html.xpath('//a[contains(@href, "/u/")]/@href')
if tmps:
self.username = str(tmps[-1])
if tmps_id:
user_id_match = re.search(r"/u/(\d+)", tmps_id[0])
if user_id_match and user_id_match.group().strip():
self.userid = user_id_match.group(1)
self._user_detail_page = f"user.php?u={self.userid}"
self._torrent_seeding_page = f"peers?u={self.userid}"
try:
tmps = html.xpath('//a[contains(@href, "/u/")]//text()')
tmps_id = html.xpath('//a[contains(@href, "/u/")]/@href')
if tmps:
self.username = str(tmps[-1])
if tmps_id:
user_id_match = re.search(r"/u/(\d+)", tmps_id[0])
if user_id_match and user_id_match.group().strip():
self.userid = user_id_match.group(1)
self._user_detail_page = f"user.php?u={self.userid}"
self._torrent_seeding_page = f"peers?u={self.userid}"
tmps = html.xpath('//div[@class = "stats"]/div/div')
if tmps:
self.upload = StringUtils.num_filesize(str(tmps[0].xpath('span/text()')[1]).strip())
self.download = StringUtils.num_filesize(str(tmps[0].xpath('span/text()')[2]).strip())
self.seeding = StringUtils.str_int(tmps[0].xpath('a')[2].xpath('text()')[0])
self.leeching = StringUtils.str_int(tmps[0].xpath('a')[2].xpath('text()')[1])
self.ratio = StringUtils.str_float(str(tmps[0].xpath('span/text()')[0]).strip().replace('-', '0'))
self.bonus = StringUtils.str_float(tmps[0].xpath('a')[3].xpath('text()')[0])
tmps = html.xpath('//div[@class = "stats"]/div/div')
if tmps:
self.upload = StringUtils.num_filesize(str(tmps[0].xpath('span/text()')[1]).strip())
self.download = StringUtils.num_filesize(str(tmps[0].xpath('span/text()')[2]).strip())
self.seeding = StringUtils.str_int(tmps[0].xpath('a')[2].xpath('text()')[0])
self.leeching = StringUtils.str_int(tmps[0].xpath('a')[2].xpath('text()')[1])
self.ratio = StringUtils.str_float(str(tmps[0].xpath('span/text()')[0]).strip().replace('-', '0'))
self.bonus = StringUtils.str_float(tmps[0].xpath('a')[3].xpath('text()')[0])
finally:
if html is not None:
del html
def _parse_site_page(self, html_text: str):
pass
def _parse_user_detail_info(self, html_text: str):
html = etree.HTML(html_text)
if not StringUtils.is_valid_html_element(html):
return
try:
if not StringUtils.is_valid_html_element(html):
return
user_levels_text = html.xpath('//tr/th[text()="Class"]/following-sibling::td[1]/text()')
if user_levels_text:
self.user_level = user_levels_text[0].strip()
user_levels_text = html.xpath('//tr/th[text()="Class"]/following-sibling::td[1]/text()')
if user_levels_text:
self.user_level = user_levels_text[0].strip()
# 加入日期
join_at_text = html.xpath('//tr/th[text()="Join date"]/following-sibling::td[1]/text()')
if join_at_text:
self.join_at = StringUtils.unify_datetime_str(join_at_text[0].split(' (')[0])
# 加入日期
join_at_text = html.xpath('//tr/th[text()="Join date"]/following-sibling::td[1]/text()')
if join_at_text:
self.join_at = StringUtils.unify_datetime_str(join_at_text[0].split(' (')[0])
finally:
if html is not None:
del html
def _parse_user_torrent_seeding_info(self, html_text: str, multi_page: bool = False) -> Optional[str]:
html = etree.HTML(html_text)
if not StringUtils.is_valid_html_element(html):
return None
# seeding start
seeding_end_pos = 3
if html.xpath('//tr/td[text() = "Leechers"]'):
seeding_end_pos = len(html.xpath('//tr/td[text() = "Leechers"]/../preceding-sibling::tr')) + 1
seeding_end_pos = seeding_end_pos - 3
try:
if not StringUtils.is_valid_html_element(html):
return None
# seeding start
seeding_end_pos = 3
if html.xpath('//tr/td[text() = "Leechers"]'):
seeding_end_pos = len(html.xpath('//tr/td[text() = "Leechers"]/../preceding-sibling::tr')) + 1
seeding_end_pos = seeding_end_pos - 3
page_seeding = 0
page_seeding_size = 0
seeding_torrents = html.xpath('//tr/td[text() = "Seeders"]/../following-sibling::tr/td[position()=6]/text()')
if seeding_torrents:
page_seeding = seeding_end_pos
for per_size in seeding_torrents[:seeding_end_pos]:
if '(' in per_size and ')' in per_size:
per_size = per_size.split('(')[-1]
per_size = per_size.split(')')[0]
page_seeding = 0
page_seeding_size = 0
seeding_torrents = html.xpath('//tr/td[text() = "Seeders"]/../following-sibling::tr/td[position()=6]/text()')
if seeding_torrents:
page_seeding = seeding_end_pos
for per_size in seeding_torrents[:seeding_end_pos]:
if '(' in per_size and ')' in per_size:
per_size = per_size.split('(')[-1]
per_size = per_size.split(')')[0]
page_seeding_size += StringUtils.num_filesize(per_size)
page_seeding_size += StringUtils.num_filesize(per_size)
self.seeding = page_seeding
self.seeding_size = page_seeding_size
self.seeding = page_seeding
self.seeding_size = page_seeding_size
finally:
if html is not None:
del html
def _parse_user_traffic_info(self, html_text: str):
pass

View File

@@ -23,12 +23,16 @@ class NexusAudiencesSiteUserInfo(NexusPhpSiteUserInfo):
if not html_text:
return
html = etree.HTML(html_text)
if not StringUtils.is_valid_html_element(html):
return
total_row = html.xpath('//table[@class="table table-bordered"]//tr[td[1][normalize-space()="Total"]]')
if not total_row:
return
seeding_count = total_row[0].xpath('./td[2]/text()')
seeding_size = total_row[0].xpath('./td[3]/text()')
self.seeding = StringUtils.str_int(seeding_count[0]) if seeding_count else 0
self.seeding_size = StringUtils.num_filesize(seeding_size[0].strip()) if seeding_size else 0
try:
if not StringUtils.is_valid_html_element(html):
return
total_row = html.xpath('//table[@class="table table-bordered"]//tr[td[1][normalize-space()="Total"]]')
if not total_row:
return
seeding_count = total_row[0].xpath('./td[2]/text()')
seeding_size = total_row[0].xpath('./td[3]/text()')
self.seeding = StringUtils.str_int(seeding_count[0]) if seeding_count else 0
self.seeding_size = StringUtils.num_filesize(seeding_size[0].strip()) if seeding_size else 0
finally:
if html is not None:
del html

View File

@@ -17,21 +17,25 @@ class NexusHhanclubSiteUserInfo(NexusPhpSiteUserInfo):
html_text = self._prepare_html_text(html_text)
html = etree.HTML(html_text)
# 上传、下载、分享率
upload_match = re.search(r"[_<>/a-zA-Z-=\"'\s#;]+([\d,.\s]+[KMGTPI]*B)",
html.xpath('//*[@id="user-info-panel"]/div[2]/div[2]/div[4]/text()')[0])
download_match = re.search(r"[_<>/a-zA-Z-=\"'\s#;]+([\d,.\s]+[KMGTPI]*B)",
html.xpath('//*[@id="user-info-panel"]/div[2]/div[2]/div[5]/text()')[0])
ratio_match = re.search(r"分享率][:_<>/a-zA-Z-=\"'\s#;]+([\d,.\s]+)",
html.xpath('//*[@id="user-info-panel"]/div[2]/div[1]/div[1]/div/text()')[0])
try:
# 上传、下载、分享率
upload_match = re.search(r"[_<>/a-zA-Z-=\"'\s#;]+([\d,.\s]+[KMGTPI]*B)",
html.xpath('//*[@id="user-info-panel"]/div[2]/div[2]/div[4]/text()')[0])
download_match = re.search(r"[_<>/a-zA-Z-=\"'\s#;]+([\d,.\s]+[KMGTPI]*B)",
html.xpath('//*[@id="user-info-panel"]/div[2]/div[2]/div[5]/text()')[0])
ratio_match = re.search(r"分享率][:_<>/a-zA-Z-=\"'\s#;]+([\d,.\s]+)",
html.xpath('//*[@id="user-info-panel"]/div[2]/div[1]/div[1]/div/text()')[0])
# 计算分享率
self.upload = StringUtils.num_filesize(upload_match.group(1).strip()) if upload_match else 0
self.download = StringUtils.num_filesize(download_match.group(1).strip()) if download_match else 0
# 优先使用页面上的分享率
calc_ratio = 0.0 if self.download <= 0.0 else round(self.upload / self.download, 3)
self.ratio = StringUtils.str_float(ratio_match.group(1)) if (
ratio_match and ratio_match.group(1).strip()) else calc_ratio
# 计算分享率
self.upload = StringUtils.num_filesize(upload_match.group(1).strip()) if upload_match else 0
self.download = StringUtils.num_filesize(download_match.group(1).strip()) if download_match else 0
# 优先使用页面上的分享率
calc_ratio = 0.0 if self.download <= 0.0 else round(self.upload / self.download, 3)
self.ratio = StringUtils.str_float(ratio_match.group(1)) if (
ratio_match and ratio_match.group(1).strip()) else calc_ratio
finally:
if html is not None:
del html
def _parse_user_detail_info(self, html_text: str):
"""
@@ -42,12 +46,16 @@ class NexusHhanclubSiteUserInfo(NexusPhpSiteUserInfo):
super()._parse_user_detail_info(html_text)
html = etree.HTML(html_text)
if not StringUtils.is_valid_html_element(html):
return
# 加入时间
join_at_text = html.xpath('//*[@id="mainContent"]/div/div[2]/div[4]/div[3]/span[2]/text()[1]')
if join_at_text:
self.join_at = StringUtils.unify_datetime_str(join_at_text[0].split(' (')[0].strip())
try:
if not StringUtils.is_valid_html_element(html):
return
# 加入时间
join_at_text = html.xpath('//*[@id="mainContent"]/div/div[2]/div[4]/div[3]/span[2]/text()[1]')
if join_at_text:
self.join_at = StringUtils.unify_datetime_str(join_at_text[0].split(' (')[0].strip())
finally:
if html is not None:
del html
def _get_user_level(self, html):
super()._get_user_level(html)

View File

@@ -34,21 +34,25 @@ class NexusPhpSiteUserInfo(SiteParserBase):
:return:
"""
html = etree.HTML(html_text)
if not StringUtils.is_valid_html_element(html):
return
try:
if not StringUtils.is_valid_html_element(html):
return
message_labels = html.xpath('//a[@href="messages.php"]/..')
message_labels.extend(html.xpath('//a[contains(@href, "messages.php")]/..'))
if message_labels:
message_text = message_labels[0].xpath("string(.)")
message_labels = html.xpath('//a[@href="messages.php"]/..')
message_labels.extend(html.xpath('//a[contains(@href, "messages.php")]/..'))
if message_labels:
message_text = message_labels[0].xpath("string(.)")
logger.debug(f"{self._site_name} 消息原始信息 {message_text}")
message_unread_match = re.findall(r"[^Date](信息箱\s*|\((?![^)]*:)|你有\xa0)(\d+)", message_text)
logger.debug(f"{self._site_name} 消息原始信息 {message_text}")
message_unread_match = re.findall(r"[^Date](信息箱\s*|\((?![^)]*:)|你有\xa0)(\d+)", message_text)
if message_unread_match and len(message_unread_match[-1]) == 2:
self.message_unread = StringUtils.str_int(message_unread_match[-1][1])
elif message_text.isdigit():
self.message_unread = StringUtils.str_int(message_text)
if message_unread_match and len(message_unread_match[-1]) == 2:
self.message_unread = StringUtils.str_int(message_unread_match[-1][1])
elif message_text.isdigit():
self.message_unread = StringUtils.str_int(message_text)
finally:
if html is not None:
del html
def _parse_user_base_info(self, html_text: str):
"""
@@ -61,18 +65,23 @@ class NexusPhpSiteUserInfo(SiteParserBase):
self._parse_message_unread(html_text)
html = etree.HTML(html_text)
if not StringUtils.is_valid_html_element(html):
return
try:
if not StringUtils.is_valid_html_element(html):
return
ret = html.xpath(f'//a[contains(@href, "userdetails") and contains(@href, "{self.userid}")]//b//text()')
if ret:
self.username = str(ret[0])
return
ret = html.xpath(f'//a[contains(@href, "userdetails") and contains(@href, "{self.userid}")]//text()')
if ret:
self.username = str(ret[0])
ret = html.xpath(f'//a[contains(@href, "userdetails") and contains(@href, "{self.userid}")]//b//text()')
if ret:
self.username = str(ret[0])
return
ret = html.xpath(f'//a[contains(@href, "userdetails") and contains(@href, "{self.userid}")]//text()')
if ret:
self.username = str(ret[0])
ret = html.xpath('//a[contains(@href, "userdetails")]//strong//text()')
finally:
if html is not None:
del html
ret = html.xpath('//a[contains(@href, "userdetails")]//strong//text()')
if ret:
self.username = str(ret[0])
return
@@ -98,28 +107,32 @@ class NexusPhpSiteUserInfo(SiteParserBase):
self.leeching = StringUtils.str_int(leeching_match.group(2)) if leeching_match and leeching_match.group(
2).strip() else 0
html = etree.HTML(html_text)
has_ucoin, self.bonus = self._parse_ucoin(html)
if has_ucoin:
return
tmps = html.xpath('//a[contains(@href,"mybonus")]/text()') if html else None
if tmps:
bonus_text = str(tmps[0]).strip()
bonus_match = re.search(r"([\d,.]+)", bonus_text)
if bonus_match and bonus_match.group(1).strip():
self.bonus = StringUtils.str_float(bonus_match.group(1))
return
bonus_match = re.search(r"mybonus.[\[\]:<>/a-zA-Z_\-=\"'\s#;.(使用魔力值豆]+\s*([\d,.]+)[<()&\s]", html_text)
try:
if bonus_match and bonus_match.group(1).strip():
self.bonus = StringUtils.str_float(bonus_match.group(1))
has_ucoin, self.bonus = self._parse_ucoin(html)
if has_ucoin:
return
bonus_match = re.search(r"[魔力值|\]][\[\]:<>/a-zA-Z_\-=\"'\s#;]+\s*([\d,.]+|\"[\d,.]+\")[<>()&\s]",
html_text,
flags=re.S)
if bonus_match and bonus_match.group(1).strip():
self.bonus = StringUtils.str_float(bonus_match.group(1).strip('"'))
except Exception as err:
logger.error(f"{self._site_name} 解析魔力值出错, 错误信息: {str(err)}")
tmps = html.xpath('//a[contains(@href,"mybonus")]/text()') if html else None
if tmps:
bonus_text = str(tmps[0]).strip()
bonus_match = re.search(r"([\d,.]+)", bonus_text)
if bonus_match and bonus_match.group(1).strip():
self.bonus = StringUtils.str_float(bonus_match.group(1))
return
bonus_match = re.search(r"mybonus.[\[\]:<>/a-zA-Z_\-=\"'\s#;.(使用魔力值豆]+\s*([\d,.]+)[<()&\s]", html_text)
try:
if bonus_match and bonus_match.group(1).strip():
self.bonus = StringUtils.str_float(bonus_match.group(1))
return
bonus_match = re.search(r"[魔力值|\]][\[\]:<>/a-zA-Z_\-=\"'\s#;]+\s*([\d,.]+|\"[\d,.]+\")[<>()&\s]",
html_text,
flags=re.S)
if bonus_match and bonus_match.group(1).strip():
self.bonus = StringUtils.str_float(bonus_match.group(1).strip('"'))
except Exception as err:
logger.error(f"{self._site_name} 解析魔力值出错, 错误信息: {str(err)}")
finally:
if html is not None:
del html
@staticmethod
def _parse_ucoin(html):
@@ -155,72 +168,76 @@ class NexusPhpSiteUserInfo(SiteParserBase):
:return: 下页地址
"""
html = etree.HTML(str(html_text).replace(r'\/', '/'))
if not StringUtils.is_valid_html_element(html):
return None
try:
if not StringUtils.is_valid_html_element(html):
return None
# 首页存在扩展链接,使用扩展链接
seeding_url_text = html.xpath('//a[contains(@href,"torrents.php") '
'and contains(@href,"seeding")]/@href')
if multi_page is False and seeding_url_text and seeding_url_text[0].strip():
self._torrent_seeding_page = seeding_url_text[0].strip()
return self._torrent_seeding_page
# 首页存在扩展链接,使用扩展链接
seeding_url_text = html.xpath('//a[contains(@href,"torrents.php") '
'and contains(@href,"seeding")]/@href')
if multi_page is False and seeding_url_text and seeding_url_text[0].strip():
self._torrent_seeding_page = seeding_url_text[0].strip()
return self._torrent_seeding_page
size_col = 3
seeders_col = 4
# 搜索size列
size_col_xpath = '//tr[position()=1]/' \
'td[(img[@class="size"] and img[@alt="size"])' \
' or (text() = "大小")' \
' or (a/img[@class="size" and @alt="size"])]'
if html.xpath(size_col_xpath):
size_col = len(html.xpath(f'{size_col_xpath}/preceding-sibling::td')) + 1
# 搜索seeders列
seeders_col_xpath = '//tr[position()=1]/' \
'td[(img[@class="seeders"] and img[@alt="seeders"])' \
' or (text() = "在做种")' \
' or (a/img[@class="seeders" and @alt="seeders"])]'
if html.xpath(seeders_col_xpath):
seeders_col = len(html.xpath(f'{seeders_col_xpath}/preceding-sibling::td')) + 1
size_col = 3
seeders_col = 4
# 搜索size列
size_col_xpath = '//tr[position()=1]/' \
'td[(img[@class="size"] and img[@alt="size"])' \
' or (text() = "大小")' \
' or (a/img[@class="size" and @alt="size"])]'
if html.xpath(size_col_xpath):
size_col = len(html.xpath(f'{size_col_xpath}/preceding-sibling::td')) + 1
# 搜索seeders列
seeders_col_xpath = '//tr[position()=1]/' \
'td[(img[@class="seeders"] and img[@alt="seeders"])' \
' or (text() = "在做种")' \
' or (a/img[@class="seeders" and @alt="seeders"])]'
if html.xpath(seeders_col_xpath):
seeders_col = len(html.xpath(f'{seeders_col_xpath}/preceding-sibling::td')) + 1
page_seeding = 0
page_seeding_size = 0
page_seeding_info = []
# 如果 table class="torrents"则增加table[@class="torrents"]
table_class = '//table[@class="torrents"]' if html.xpath('//table[@class="torrents"]') else ''
seeding_sizes = html.xpath(f'{table_class}//tr[position()>1]/td[{size_col}]')
seeding_seeders = html.xpath(f'{table_class}//tr[position()>1]/td[{seeders_col}]/b/a/text()')
if not seeding_seeders:
seeding_seeders = html.xpath(f'{table_class}//tr[position()>1]/td[{seeders_col}]//text()')
if seeding_sizes and seeding_seeders:
page_seeding = len(seeding_sizes)
page_seeding = 0
page_seeding_size = 0
page_seeding_info = []
# 如果 table class="torrents"则增加table[@class="torrents"]
table_class = '//table[@class="torrents"]' if html.xpath('//table[@class="torrents"]') else ''
seeding_sizes = html.xpath(f'{table_class}//tr[position()>1]/td[{size_col}]')
seeding_seeders = html.xpath(f'{table_class}//tr[position()>1]/td[{seeders_col}]/b/a/text()')
if not seeding_seeders:
seeding_seeders = html.xpath(f'{table_class}//tr[position()>1]/td[{seeders_col}]//text()')
if seeding_sizes and seeding_seeders:
page_seeding = len(seeding_sizes)
for i in range(0, len(seeding_sizes)):
size = StringUtils.num_filesize(seeding_sizes[i].xpath("string(.)").strip())
seeders = StringUtils.str_int(seeding_seeders[i])
for i in range(0, len(seeding_sizes)):
size = StringUtils.num_filesize(seeding_sizes[i].xpath("string(.)").strip())
seeders = StringUtils.str_int(seeding_seeders[i])
page_seeding_size += size
page_seeding_info.append([seeders, size])
page_seeding_size += size
page_seeding_info.append([seeders, size])
self.seeding += page_seeding
self.seeding_size += page_seeding_size
self.seeding_info.extend(page_seeding_info)
self.seeding += page_seeding
self.seeding_size += page_seeding_size
self.seeding_info.extend(page_seeding_info)
# 是否存在下页数据
next_page = None
next_page_text = html.xpath(
'//a[contains(.//text(), "下一页") or contains(.//text(), "下一頁") or contains(.//text(), ">")]/@href')
# 防止识别到详情页
while next_page_text:
next_page = next_page_text.pop().strip()
if not next_page.startswith('details.php'):
break
# 是否存在下页数据
next_page = None
next_page_text = html.xpath(
'//a[contains(.//text(), "下一页") or contains(.//text(), "下一頁") or contains(.//text(), ">")]/@href')
# fix up page url
if next_page:
if self.userid not in next_page:
next_page = f'{next_page}&userid={self.userid}&type=seeding'
# 防止识别到详情页
while next_page_text:
next_page = next_page_text.pop().strip()
if not next_page.startswith('details.php'):
break
next_page = None
# fix up page url
if next_page:
if self.userid not in next_page:
next_page = f'{next_page}&userid={self.userid}&type=seeding'
finally:
if html is not None:
del html
return next_page
@@ -231,57 +248,61 @@ class NexusPhpSiteUserInfo(SiteParserBase):
:return:
"""
html = etree.HTML(html_text)
if not StringUtils.is_valid_html_element(html):
return
try:
if not StringUtils.is_valid_html_element(html):
return
self._get_user_level(html)
self._get_user_level(html)
self._fixup_traffic_info(html)
self._fixup_traffic_info(html)
# 加入日期
join_at_text = html.xpath(
'//tr/td[text()="加入日期" or text()="注册日期" or *[text()="加入日期"]]/following-sibling::td[1]//text()'
'|//div/b[text()="加入日期"]/../text()')
if join_at_text:
self.join_at = StringUtils.unify_datetime_str(join_at_text[0].split(' (')[0].strip())
# 加入日期
join_at_text = html.xpath(
'//tr/td[text()="加入日期" or text()="注册日期" or *[text()="加入日期"]]/following-sibling::td[1]//text()'
'|//div/b[text()="加入日期"]/../text()')
if join_at_text:
self.join_at = StringUtils.unify_datetime_str(join_at_text[0].split(' (')[0].strip())
# 做种体积 & 做种数
# seeding 页面获取不到的话,此处再获取一次
seeding_sizes = html.xpath('//tr/td[text()="当前上传"]/following-sibling::td[1]//'
'table[tr[1][td[4 and text()="尺寸"]]]//tr[position()>1]/td[4]')
seeding_seeders = html.xpath('//tr/td[text()="当前上传"]/following-sibling::td[1]//'
'table[tr[1][td[5 and text()="做种者"]]]//tr[position()>1]/td[5]//text()')
tmp_seeding = len(seeding_sizes)
tmp_seeding_size = 0
tmp_seeding_info = []
for i in range(0, len(seeding_sizes)):
size = StringUtils.num_filesize(seeding_sizes[i].xpath("string(.)").strip())
seeders = StringUtils.str_int(seeding_seeders[i])
# 做种体积 & 做种数
# seeding 页面获取不到的话,此处再获取一次
seeding_sizes = html.xpath('//tr/td[text()="当前上传"]/following-sibling::td[1]//'
'table[tr[1][td[4 and text()="尺寸"]]]//tr[position()>1]/td[4]')
seeding_seeders = html.xpath('//tr/td[text()="当前上传"]/following-sibling::td[1]//'
'table[tr[1][td[5 and text()="做种者"]]]//tr[position()>1]/td[5]//text()')
tmp_seeding = len(seeding_sizes)
tmp_seeding_size = 0
tmp_seeding_info = []
for i in range(0, len(seeding_sizes)):
size = StringUtils.num_filesize(seeding_sizes[i].xpath("string(.)").strip())
seeders = StringUtils.str_int(seeding_seeders[i])
tmp_seeding_size += size
tmp_seeding_info.append([seeders, size])
tmp_seeding_size += size
tmp_seeding_info.append([seeders, size])
if not self.seeding_size:
self.seeding_size = tmp_seeding_size
if not self.seeding:
self.seeding = tmp_seeding
if not self.seeding_info:
self.seeding_info = tmp_seeding_info
if not self.seeding_size:
self.seeding_size = tmp_seeding_size
if not self.seeding:
self.seeding = tmp_seeding
if not self.seeding_info:
self.seeding_info = tmp_seeding_info
seeding_sizes = html.xpath('//tr/td[text()="做种统计"]/following-sibling::td[1]//text()')
if seeding_sizes:
seeding_match = re.search(r"总做种数:\s+(\d+)", seeding_sizes[0], re.IGNORECASE)
seeding_size_match = re.search(r"总做种体积:\s+([\d,.\s]+[KMGTPI]*B)", seeding_sizes[0], re.IGNORECASE)
tmp_seeding = StringUtils.str_int(seeding_match.group(1)) if (
seeding_match and seeding_match.group(1)) else 0
tmp_seeding_size = StringUtils.num_filesize(
seeding_size_match.group(1).strip()) if seeding_size_match else 0
if not self.seeding_size:
self.seeding_size = tmp_seeding_size
if not self.seeding:
self.seeding = tmp_seeding
seeding_sizes = html.xpath('//tr/td[text()="做种统计"]/following-sibling::td[1]//text()')
if seeding_sizes:
seeding_match = re.search(r"总做种数:\s+(\d+)", seeding_sizes[0], re.IGNORECASE)
seeding_size_match = re.search(r"总做种体积:\s+([\d,.\s]+[KMGTPI]*B)", seeding_sizes[0], re.IGNORECASE)
tmp_seeding = StringUtils.str_int(seeding_match.group(1)) if (
seeding_match and seeding_match.group(1)) else 0
tmp_seeding_size = StringUtils.num_filesize(
seeding_size_match.group(1).strip()) if seeding_size_match else 0
if not self.seeding_size:
self.seeding_size = tmp_seeding_size
if not self.seeding:
self.seeding = tmp_seeding
self._fixup_torrent_seeding_page(html)
self._fixup_torrent_seeding_page(html)
finally:
if html is not None:
del html
def _fixup_torrent_seeding_page(self, html):
"""
@@ -348,43 +369,51 @@ class NexusPhpSiteUserInfo(SiteParserBase):
def _parse_message_unread_links(self, html_text: str, msg_links: list) -> Optional[str]:
html = etree.HTML(html_text)
if not StringUtils.is_valid_html_element(html):
return None
try:
if not StringUtils.is_valid_html_element(html):
return None
message_links = html.xpath('//tr[not(./td/img[@alt="Read"])]/td/a[contains(@href, "viewmessage")]/@href')
msg_links.extend(message_links)
# 是否存在下页数据
next_page = None
next_page_text = html.xpath('//a[contains(.//text(), "下一页") or contains(.//text(), "下一頁")]/@href')
if next_page_text:
next_page = next_page_text[-1].strip()
message_links = html.xpath('//tr[not(./td/img[@alt="Read"])]/td/a[contains(@href, "viewmessage")]/@href')
msg_links.extend(message_links)
# 是否存在下页数据
next_page = None
next_page_text = html.xpath('//a[contains(.//text(), "下一页") or contains(.//text(), "下一頁")]/@href')
if next_page_text:
next_page = next_page_text[-1].strip()
finally:
if html is not None:
del html
return next_page
def _parse_message_content(self, html_text):
html = etree.HTML(html_text)
if not StringUtils.is_valid_html_element(html):
return None, None, None
# 标题
message_head_text = None
message_head = html.xpath('//h1/text()'
'|//div[@class="layui-card-header"]/span[1]/text()')
if message_head:
message_head_text = message_head[-1].strip()
try:
if not StringUtils.is_valid_html_element(html):
return None, None, None
# 标题
message_head_text = None
message_head = html.xpath('//h1/text()'
'|//div[@class="layui-card-header"]/span[1]/text()')
if message_head:
message_head_text = message_head[-1].strip()
# 消息时间
message_date_text = None
message_date = html.xpath('//h1/following-sibling::table[.//tr/td[@class="colhead"]]//tr[2]/td[2]'
'|//div[@class="layui-card-header"]/span[2]/span[2]')
if message_date:
message_date_text = message_date[0].xpath("string(.)").strip()
# 消息时间
message_date_text = None
message_date = html.xpath('//h1/following-sibling::table[.//tr/td[@class="colhead"]]//tr[2]/td[2]'
'|//div[@class="layui-card-header"]/span[2]/span[2]')
if message_date:
message_date_text = message_date[0].xpath("string(.)").strip()
# 消息内容
message_content_text = None
message_content = html.xpath('//h1/following-sibling::table[.//tr/td[@class="colhead"]]//tr[3]/td'
'|//div[contains(@class,"layui-card-body")]')
if message_content:
message_content_text = message_content[0].xpath("string(.)").strip()
# 消息内容
message_content_text = None
message_content = html.xpath('//h1/following-sibling::table[.//tr/td[@class="colhead"]]//tr[3]/td'
'|//div[contains(@class,"layui-card-body")]')
if message_content:
message_content_text = message_content[0].xpath("string(.)").strip()
finally:
if html is not None:
del html
return message_head_text, message_date_text, message_content_text

View File

@@ -114,48 +114,56 @@ class NexusRabbitSiteUserInfo(SiteParserBase):
def _parse_user_base_info(self, html_text: str):
"""只有奶糖余额才需要在 base 中获取,其它均可以在详情页拿到"""
html = etree.HTML(html_text)
if not StringUtils.is_valid_html_element(html):
return
bonus = html.xpath(
'//div[contains(text(), "奶糖余额")]/following-sibling::div[1]/text()'
)
if bonus:
self.bonus = StringUtils.str_float(bonus[0].strip())
try:
if not StringUtils.is_valid_html_element(html):
return
bonus = html.xpath(
'//div[contains(text(), "奶糖余额")]/following-sibling::div[1]/text()'
)
if bonus:
self.bonus = StringUtils.str_float(bonus[0].strip())
finally:
if html is not None:
del html
def _parse_user_detail_info(self, html_text: str):
html = etree.HTML(html_text)
if not StringUtils.is_valid_html_element(html):
return
# 缩小一下查找范围,所有的信息都在这个 div 里
user_info = html.xpath('//div[contains(@class, "layui-hares-user-info-right")]')
if not user_info:
return
user_info = user_info[0]
# 用户名
if username := user_info.xpath(
'.//span[contains(text(), "用户名")]/a/span/text()'
):
self.username = username[0].strip()
# 等级
if user_level := user_info.xpath('.//span[contains(text(), "等级")]/b/text()'):
self.user_level = user_level[0].strip()
# 加入日期
if join_date := user_info.xpath('.//span[contains(text(), "注册日期")]/text()'):
join_date = join_date[0].strip().split("\r")[0].removeprefix("注册日期:")
self.join_at = StringUtils.unify_datetime_str(join_date)
# 上传量
if upload := user_info.xpath('.//span[contains(text(), "上传量")]/text()'):
self.upload = StringUtils.num_filesize(
upload[0].strip().removeprefix("上传量:")
)
# 下载量
if download := user_info.xpath('.//span[contains(text(), "下载量")]/text()'):
self.download = StringUtils.num_filesize(
download[0].strip().removeprefix("下载量:")
)
# 分享率
if ratio := user_info.xpath('.//span[contains(text(), "分享率")]/em/text()'):
self.ratio = StringUtils.str_float(ratio[0].strip())
try:
if not StringUtils.is_valid_html_element(html):
return
# 缩小一下查找范围,所有的信息都在这个 div 里
user_info = html.xpath('//div[contains(@class, "layui-hares-user-info-right")]')
if not user_info:
return
user_info = user_info[0]
# 用户名
if username := user_info.xpath(
'.//span[contains(text(), "用户名")]/a/span/text()'
):
self.username = username[0].strip()
# 等级
if user_level := user_info.xpath('.//span[contains(text(), "等级")]/b/text()'):
self.user_level = user_level[0].strip()
# 加入日期
if join_date := user_info.xpath('.//span[contains(text(), "注册日期")]/text()'):
join_date = join_date[0].strip().split("\r")[0].removeprefix("注册日期:")
self.join_at = StringUtils.unify_datetime_str(join_date)
# 上传量
if upload := user_info.xpath('.//span[contains(text(), "上传量")]/text()'):
self.upload = StringUtils.num_filesize(
upload[0].strip().removeprefix("上传量:")
)
# 下载量
if download := user_info.xpath('.//span[contains(text(), "下载量")]/text()'):
self.download = StringUtils.num_filesize(
download[0].strip().removeprefix("下载量:")
)
# 分享率
if ratio := user_info.xpath('.//span[contains(text(), "分享率")]/em/text()'):
self.ratio = StringUtils.str_float(ratio[0].strip())
finally:
if html is not None:
del html
def _parse_message_content(self, html_text):
"""

View File

@@ -24,9 +24,13 @@ class SmallHorseSiteUserInfo(SiteParserBase):
def _parse_user_base_info(self, html_text: str):
html_text = self._prepare_html_text(html_text)
html = etree.HTML(html_text)
ret = html.xpath('//a[contains(@href, "user.php")]//text()')
if ret:
self.username = str(ret[0])
try:
ret = html.xpath('//a[contains(@href, "user.php")]//text()')
if ret:
self.username = str(ret[0])
finally:
if html is not None:
del html
def _parse_user_traffic_info(self, html_text: str):
"""
@@ -36,21 +40,25 @@ class SmallHorseSiteUserInfo(SiteParserBase):
"""
html_text = self._prepare_html_text(html_text)
html = etree.HTML(html_text)
tmps = html.xpath('//ul[@class = "stats nobullet"]')
if tmps:
if tmps[1].xpath("li") and tmps[1].xpath("li")[0].xpath("span//text()"):
self.join_at = StringUtils.unify_datetime_str(tmps[1].xpath("li")[0].xpath("span//text()")[0])
self.upload = StringUtils.num_filesize(str(tmps[1].xpath("li")[2].xpath("text()")[0]).split(":")[1].strip())
self.download = StringUtils.num_filesize(
str(tmps[1].xpath("li")[3].xpath("text()")[0]).split(":")[1].strip())
if tmps[1].xpath("li")[4].xpath("span//text()"):
self.ratio = StringUtils.str_float(str(tmps[1].xpath("li")[4].xpath("span//text()")[0]).replace('', '0'))
else:
self.ratio = StringUtils.str_float(str(tmps[1].xpath("li")[5].xpath("text()")[0]).split(":")[1])
self.bonus = StringUtils.str_float(str(tmps[1].xpath("li")[5].xpath("text()")[0]).split(":")[1])
self.user_level = str(tmps[3].xpath("li")[0].xpath("text()")[0]).split(":")[1].strip()
self.leeching = StringUtils.str_int(
(tmps[4].xpath("li")[6].xpath("text()")[0]).split(":")[1].replace("[", ""))
try:
tmps = html.xpath('//ul[@class = "stats nobullet"]')
if tmps:
if tmps[1].xpath("li") and tmps[1].xpath("li")[0].xpath("span//text()"):
self.join_at = StringUtils.unify_datetime_str(tmps[1].xpath("li")[0].xpath("span//text()")[0])
self.upload = StringUtils.num_filesize(str(tmps[1].xpath("li")[2].xpath("text()")[0]).split(":")[1].strip())
self.download = StringUtils.num_filesize(
str(tmps[1].xpath("li")[3].xpath("text()")[0]).split(":")[1].strip())
if tmps[1].xpath("li")[4].xpath("span//text()"):
self.ratio = StringUtils.str_float(str(tmps[1].xpath("li")[4].xpath("span//text()")[0]).replace('', '0'))
else:
self.ratio = StringUtils.str_float(str(tmps[1].xpath("li")[5].xpath("text()")[0]).split(":")[1])
self.bonus = StringUtils.str_float(str(tmps[1].xpath("li")[5].xpath("text()")[0]).split(":")[1])
self.user_level = str(tmps[3].xpath("li")[0].xpath("text()")[0]).split(":")[1].strip()
self.leeching = StringUtils.str_int(
(tmps[4].xpath("li")[6].xpath("text()")[0]).split(":")[1].replace("[", ""))
finally:
if html is not None:
del html
def _parse_user_detail_info(self, html_text: str):
pass
@@ -63,39 +71,42 @@ class SmallHorseSiteUserInfo(SiteParserBase):
:return: 下页地址
"""
html = etree.HTML(html_text)
if not StringUtils.is_valid_html_element(html):
return None
try:
if not StringUtils.is_valid_html_element(html):
return None
size_col = 6
seeders_col = 8
size_col = 6
seeders_col = 8
page_seeding = 0
page_seeding_size = 0
page_seeding_info = []
seeding_sizes = html.xpath(f'//table[@id="torrent_table"]//tr[position()>1]/td[{size_col}]')
seeding_seeders = html.xpath(f'//table[@id="torrent_table"]//tr[position()>1]/td[{seeders_col}]')
if seeding_sizes and seeding_seeders:
page_seeding = len(seeding_sizes)
page_seeding = 0
page_seeding_size = 0
page_seeding_info = []
seeding_sizes = html.xpath(f'//table[@id="torrent_table"]//tr[position()>1]/td[{size_col}]')
seeding_seeders = html.xpath(f'//table[@id="torrent_table"]//tr[position()>1]/td[{seeders_col}]')
if seeding_sizes and seeding_seeders:
page_seeding = len(seeding_sizes)
for i in range(0, len(seeding_sizes)):
size = StringUtils.num_filesize(seeding_sizes[i].xpath("string(.)").strip())
seeders = StringUtils.str_int(seeding_seeders[i].xpath("string(.)").strip())
for i in range(0, len(seeding_sizes)):
size = StringUtils.num_filesize(seeding_sizes[i].xpath("string(.)").strip())
seeders = StringUtils.str_int(seeding_seeders[i].xpath("string(.)").strip())
page_seeding_size += size
page_seeding_info.append([seeders, size])
page_seeding_size += size
page_seeding_info.append([seeders, size])
self.seeding += page_seeding
self.seeding_size += page_seeding_size
self.seeding_info.extend(page_seeding_info)
# 是否存在下页数据
next_page = None
next_pages = html.xpath('//ul[@class="pagination"]/li[contains(@class,"active")]/following-sibling::li')
if next_pages and len(next_pages) > 1:
page_num = next_pages[0].xpath("string(.)").strip()
if page_num.isdigit():
next_page = f"{self._torrent_seeding_page}&page={page_num}"
self.seeding += page_seeding
self.seeding_size += page_seeding_size
self.seeding_info.extend(page_seeding_info)
# 是否存在下页数据
next_page = None
next_pages = html.xpath('//ul[@class="pagination"]/li[contains(@class,"active")]/following-sibling::li')
if next_pages and len(next_pages) > 1:
page_num = next_pages[0].xpath("string(.)").strip()
if page_num.isdigit():
next_page = f"{self._torrent_seeding_page}&page={page_num}"
finally:
if html is not None:
del html
return next_page
def _parse_message_unread_links(self, html_text: str, msg_links: list) -> Optional[str]:

View File

@@ -32,29 +32,33 @@ class TorrentLeechSiteUserInfo(SiteParserBase):
"""
html_text = self._prepare_html_text(html_text)
html = etree.HTML(html_text)
upload_html = html.xpath('//div[contains(@class,"profile-uploaded")]//span/text()')
if upload_html:
self.upload = StringUtils.num_filesize(upload_html[0])
download_html = html.xpath('//div[contains(@class,"profile-downloaded")]//span/text()')
if download_html:
self.download = StringUtils.num_filesize(download_html[0])
ratio_html = html.xpath('//div[contains(@class,"profile-ratio")]//span/text()')
if ratio_html:
self.ratio = StringUtils.str_float(ratio_html[0].replace('', '0'))
try:
upload_html = html.xpath('//div[contains(@class,"profile-uploaded")]//span/text()')
if upload_html:
self.upload = StringUtils.num_filesize(upload_html[0])
download_html = html.xpath('//div[contains(@class,"profile-downloaded")]//span/text()')
if download_html:
self.download = StringUtils.num_filesize(download_html[0])
ratio_html = html.xpath('//div[contains(@class,"profile-ratio")]//span/text()')
if ratio_html:
self.ratio = StringUtils.str_float(ratio_html[0].replace('', '0'))
user_level_html = html.xpath('//table[contains(@class, "profileViewTable")]'
'//tr/td[text()="Class"]/following-sibling::td/text()')
if user_level_html:
self.user_level = user_level_html[0].strip()
user_level_html = html.xpath('//table[contains(@class, "profileViewTable")]'
'//tr/td[text()="Class"]/following-sibling::td/text()')
if user_level_html:
self.user_level = user_level_html[0].strip()
join_at_html = html.xpath('//table[contains(@class, "profileViewTable")]'
'//tr/td[text()="Registration date"]/following-sibling::td/text()')
if join_at_html:
self.join_at = StringUtils.unify_datetime_str(join_at_html[0].strip())
join_at_html = html.xpath('//table[contains(@class, "profileViewTable")]'
'//tr/td[text()="Registration date"]/following-sibling::td/text()')
if join_at_html:
self.join_at = StringUtils.unify_datetime_str(join_at_html[0].strip())
bonus_html = html.xpath('//span[contains(@class, "total-TL-points")]/text()')
if bonus_html:
self.bonus = StringUtils.str_float(bonus_html[0].strip())
bonus_html = html.xpath('//span[contains(@class, "total-TL-points")]/text()')
if bonus_html:
self.bonus = StringUtils.str_float(bonus_html[0].strip())
finally:
if html is not None:
del html
def _parse_user_detail_info(self, html_text: str):
pass
@@ -67,33 +71,37 @@ class TorrentLeechSiteUserInfo(SiteParserBase):
:return: 下页地址
"""
html = etree.HTML(html_text)
if not StringUtils.is_valid_html_element(html):
return None
try:
if not StringUtils.is_valid_html_element(html):
return None
size_col = 2
seeders_col = 7
size_col = 2
seeders_col = 7
page_seeding = 0
page_seeding_size = 0
page_seeding_info = []
seeding_sizes = html.xpath(f'//tbody/tr/td[{size_col}]')
seeding_seeders = html.xpath(f'//tbody/tr/td[{seeders_col}]/text()')
if seeding_sizes and seeding_seeders:
page_seeding = len(seeding_sizes)
page_seeding = 0
page_seeding_size = 0
page_seeding_info = []
seeding_sizes = html.xpath(f'//tbody/tr/td[{size_col}]')
seeding_seeders = html.xpath(f'//tbody/tr/td[{seeders_col}]/text()')
if seeding_sizes and seeding_seeders:
page_seeding = len(seeding_sizes)
for i in range(0, len(seeding_sizes)):
size = StringUtils.num_filesize(seeding_sizes[i].xpath("string(.)").strip())
seeders = StringUtils.str_int(seeding_seeders[i])
for i in range(0, len(seeding_sizes)):
size = StringUtils.num_filesize(seeding_sizes[i].xpath("string(.)").strip())
seeders = StringUtils.str_int(seeding_seeders[i])
page_seeding_size += size
page_seeding_info.append([seeders, size])
page_seeding_size += size
page_seeding_info.append([seeders, size])
self.seeding += page_seeding
self.seeding_size += page_seeding_size
self.seeding_info.extend(page_seeding_info)
self.seeding += page_seeding
self.seeding_size += page_seeding_size
self.seeding_info.extend(page_seeding_info)
# 是否存在下页数据
next_page = None
# 是否存在下页数据
next_page = None
finally:
if html is not None:
del html
return next_page

View File

@@ -14,21 +14,24 @@ class Unit3dSiteUserInfo(SiteParserBase):
def _parse_user_base_info(self, html_text: str):
html_text = self._prepare_html_text(html_text)
html = etree.HTML(html_text)
try:
tmps = html.xpath('//a[contains(@href, "/users/") and contains(@href, "settings")]/@href')
if tmps:
user_name_match = re.search(r"/users/(.+)/settings", tmps[0])
if user_name_match and user_name_match.group().strip():
self.username = user_name_match.group(1)
self._torrent_seeding_page = f"/users/{self.username}/active?perPage=100&client=&seeding=include"
self._user_detail_page = f"/users/{self.username}"
tmps = html.xpath('//a[contains(@href, "/users/") and contains(@href, "settings")]/@href')
if tmps:
user_name_match = re.search(r"/users/(.+)/settings", tmps[0])
if user_name_match and user_name_match.group().strip():
self.username = user_name_match.group(1)
self._torrent_seeding_page = f"/users/{self.username}/active?perPage=100&client=&seeding=include"
self._user_detail_page = f"/users/{self.username}"
tmps = html.xpath('//a[contains(@href, "bonus/earnings")]')
if tmps:
bonus_text = tmps[0].xpath("string(.)")
bonus_match = re.search(r"([\d,.]+)", bonus_text)
if bonus_match and bonus_match.group(1).strip():
self.bonus = StringUtils.str_float(bonus_match.group(1))
tmps = html.xpath('//a[contains(@href, "bonus/earnings")]')
if tmps:
bonus_text = tmps[0].xpath("string(.)")
bonus_match = re.search(r"([\d,.]+)", bonus_text)
if bonus_match and bonus_match.group(1).strip():
self.bonus = StringUtils.str_float(bonus_match.group(1))
finally:
if html is not None:
del html
def _parse_site_page(self, html_text: str):
pass
@@ -40,21 +43,25 @@ class Unit3dSiteUserInfo(SiteParserBase):
:return:
"""
html = etree.HTML(html_text)
if not StringUtils.is_valid_html_element(html):
return None
try:
if not StringUtils.is_valid_html_element(html):
return None
# 用户等级
user_levels_text = html.xpath('//div[contains(@class, "content")]//span[contains(@class, "badge-user")]/text()')
if user_levels_text:
self.user_level = user_levels_text[0].strip()
# 用户等级
user_levels_text = html.xpath('//div[contains(@class, "content")]//span[contains(@class, "badge-user")]/text()')
if user_levels_text:
self.user_level = user_levels_text[0].strip()
# 加入日期
join_at_text = html.xpath('//div[contains(@class, "content")]//h4[contains(text(), "注册日期") '
'or contains(text(), "註冊日期") '
'or contains(text(), "Registration date")]/text()')
if join_at_text:
self.join_at = StringUtils.unify_datetime_str(
join_at_text[0].replace('注册日期', '').replace('註冊日期', '').replace('Registration date', ''))
# 加入日期
join_at_text = html.xpath('//div[contains(@class, "content")]//h4[contains(text(), "注册日期") '
'or contains(text(), "註冊日期") '
'or contains(text(), "Registration date")]/text()')
if join_at_text:
self.join_at = StringUtils.unify_datetime_str(
join_at_text[0].replace('注册日期', '').replace('註冊日期', '').replace('Registration date', ''))
finally:
if html is not None:
del html
def _parse_user_torrent_seeding_info(self, html_text: str, multi_page: Optional[bool] = False) -> Optional[str]:
"""
@@ -64,44 +71,48 @@ class Unit3dSiteUserInfo(SiteParserBase):
:return: 下页地址
"""
html = etree.HTML(html_text)
if not StringUtils.is_valid_html_element(html):
return None
try:
if not StringUtils.is_valid_html_element(html):
return None
size_col = 9
seeders_col = 2
# 搜索size列
if html.xpath('//thead//th[contains(@class,"size")]'):
size_col = len(html.xpath('//thead//th[contains(@class,"size")][1]/preceding-sibling::th')) + 1
# 搜索seeders列
if html.xpath('//thead//th[contains(@class,"seeders")]'):
seeders_col = len(html.xpath('//thead//th[contains(@class,"seeders")]/preceding-sibling::th')) + 1
size_col = 9
seeders_col = 2
# 搜索size列
if html.xpath('//thead//th[contains(@class,"size")]'):
size_col = len(html.xpath('//thead//th[contains(@class,"size")][1]/preceding-sibling::th')) + 1
# 搜索seeders列
if html.xpath('//thead//th[contains(@class,"seeders")]'):
seeders_col = len(html.xpath('//thead//th[contains(@class,"seeders")]/preceding-sibling::th')) + 1
page_seeding = 0
page_seeding_size = 0
page_seeding_info = []
seeding_sizes = html.xpath(f'//tr[position()]/td[{size_col}]')
seeding_seeders = html.xpath(f'//tr[position()]/td[{seeders_col}]')
if seeding_sizes and seeding_seeders:
page_seeding = len(seeding_sizes)
page_seeding = 0
page_seeding_size = 0
page_seeding_info = []
seeding_sizes = html.xpath(f'//tr[position()]/td[{size_col}]')
seeding_seeders = html.xpath(f'//tr[position()]/td[{seeders_col}]')
if seeding_sizes and seeding_seeders:
page_seeding = len(seeding_sizes)
for i in range(0, len(seeding_sizes)):
size = StringUtils.num_filesize(seeding_sizes[i].xpath("string(.)").strip())
seeders = StringUtils.str_int(seeding_seeders[i].xpath("string(.)").strip())
for i in range(0, len(seeding_sizes)):
size = StringUtils.num_filesize(seeding_sizes[i].xpath("string(.)").strip())
seeders = StringUtils.str_int(seeding_seeders[i].xpath("string(.)").strip())
page_seeding_size += size
page_seeding_info.append([seeders, size])
page_seeding_size += size
page_seeding_info.append([seeders, size])
self.seeding += page_seeding
self.seeding_size += page_seeding_size
self.seeding_info.extend(page_seeding_info)
self.seeding += page_seeding
self.seeding_size += page_seeding_size
self.seeding_info.extend(page_seeding_info)
# 是否存在下页数据
next_page = None
next_pages = html.xpath('//ul[@class="pagination"]/li[contains(@class,"active")]/following-sibling::li')
if next_pages and len(next_pages) > 1:
page_num = next_pages[0].xpath("string(.)").strip()
if page_num.isdigit():
next_page = f"{self._torrent_seeding_page}&page={page_num}"
# 是否存在下页数据
next_page = None
next_pages = html.xpath('//ul[@class="pagination"]/li[contains(@class,"active")]/following-sibling::li')
if next_pages and len(next_pages) > 1:
page_num = next_pages[0].xpath("string(.)").strip()
if page_num.isdigit():
next_page = f"{self._torrent_seeding_page}&page={page_num}"
finally:
if html is not None:
del html
return next_page

View File

@@ -0,0 +1,704 @@
import datetime
import re
import traceback
from typing import Any, Optional
from typing import List
from urllib.parse import quote, urlencode, urlparse, parse_qs
from jinja2 import Template
from pyquery import PyQuery
from app.core.config import settings
from app.log import logger
from app.schemas.types import MediaType
from app.utils.http import RequestUtils
from app.utils.string import StringUtils
class SiteSpider:
"""
站点爬虫
"""
@property
def __class__(self):
return object
@property
def __dict__(self):
return {}
@property
def __dir__(self):
raise AttributeError(f"Cannot read protected attribute!")
def __init__(self,
indexer: dict,
keyword: Optional[str] = None,
mtype: MediaType = None,
cat: Optional[str] = None,
page: Optional[int] = 0,
referer: Optional[str] = None):
"""
设置查询参数
:param indexer: 索引器
:param keyword: 搜索关键字,如果数组则为批量搜索
:param mtype: 媒体类型
:param cat: 搜索分类
:param page: 页码
:param referer: Referer
"""
if not indexer:
return
self.keyword = keyword
self.cat = cat
self.mtype = mtype
self.indexerid = indexer.get('id')
self.indexername = indexer.get('name')
self.search = indexer.get('search')
self.batch = indexer.get('batch')
self.browse = indexer.get('browse')
self.category = indexer.get('category')
self.list = indexer.get('torrents').get('list', {})
self.fields = indexer.get('torrents').get('fields')
if not keyword and self.browse:
self.list = self.browse.get('list') or self.list
self.fields = self.browse.get('fields') or self.fields
self.domain = indexer.get('domain')
self.result_num = int(indexer.get('result_num') or 100)
self._timeout = int(indexer.get('timeout') or 15)
self.page = page
if self.domain and not str(self.domain).endswith("/"):
self.domain = self.domain + "/"
self.ua = indexer.get('ua') or settings.USER_AGENT
self.proxies = settings.PROXY if indexer.get('proxy') else None
self.proxy_server = settings.PROXY_SERVER if indexer.get('proxy') else None
self.cookie = indexer.get('cookie')
self.referer = referer
# 初始化属性
self.is_error = False
self.torrents_info = {}
self.torrents_info_array = []
def get_torrents(self) -> List[dict]:
"""
开始请求
"""
if not self.search or not self.domain:
return []
# 种子搜索相对路径
paths = self.search.get('paths', [])
torrentspath = ""
if len(paths) == 1:
torrentspath = paths[0].get('path', '')
else:
for path in paths:
if path.get("type") == "all" and not self.mtype:
torrentspath = path.get('path')
break
elif path.get("type") == "movie" and self.mtype == MediaType.MOVIE:
torrentspath = path.get('path')
break
elif path.get("type") == "tv" and self.mtype == MediaType.TV:
torrentspath = path.get('path')
break
# 精确搜索
if self.keyword:
if isinstance(self.keyword, list):
# 批量查询
if self.batch:
delimiter = self.batch.get('delimiter') or ' '
space_replace = self.batch.get('space_replace') or ' '
search_word = delimiter.join([str(k).replace(' ',
space_replace) for k in self.keyword])
else:
search_word = " ".join(self.keyword)
# 查询模式:或
search_mode = "1"
else:
# 单个查询
search_word = self.keyword
# 查询模式与
search_mode = "0"
# 搜索URL
indexer_params = self.search.get("params", {}).copy()
if indexer_params:
search_area = indexer_params.get('search_area')
# search_area非0表示支持imdbid搜索
if (search_area and
(not self.keyword or not self.keyword.startswith('tt'))):
# 支持imdbid搜索但关键字不是imdbid时不启用imdbid搜索
indexer_params.pop('search_area')
# 变量字典
inputs_dict = {
"keyword": search_word
}
# 查询参数,默认查询标题
params = {
"search_mode": search_mode,
"search_area": 0,
"page": self.page or 0,
"notnewword": 1
}
# 额外参数
for key, value in indexer_params.items():
params.update({
"%s" % key: str(value).format(**inputs_dict)
})
# 分类条件
if self.category:
if self.mtype == MediaType.TV:
cats = self.category.get("tv") or []
elif self.mtype == MediaType.MOVIE:
cats = self.category.get("movie") or []
else:
cats = (self.category.get("movie") or []) + (self.category.get("tv") or [])
for cat in cats:
if self.cat and str(cat.get("id")) not in self.cat:
continue
if self.category.get("field"):
value = params.get(self.category.get("field"), "")
params.update({
"%s" % self.category.get("field"): value + self.category.get("delimiter",
' ') + cat.get("id")
})
else:
params.update({
"cat%s" % cat.get("id"): 1
})
searchurl = self.domain + torrentspath + "?" + urlencode(params)
else:
# 变量字典
inputs_dict = {
"keyword": quote(search_word),
"page": self.page or 0
}
# 无额外参数
searchurl = self.domain + str(torrentspath).format(**inputs_dict)
# 列表浏览
else:
# 变量字典
inputs_dict = {
"page": self.page or 0,
"keyword": ""
}
# 有单独浏览路径
if self.browse:
torrentspath = self.browse.get("path")
if self.browse.get("start"):
start_page = int(self.browse.get("start")) + int(self.page or 0)
inputs_dict.update({
"page": start_page
})
elif self.page:
torrentspath = torrentspath + f"?page={self.page}"
# 搜索Url
searchurl = self.domain + str(torrentspath).format(**inputs_dict)
logger.info(f"开始请求:{searchurl}")
# requests请求
ret = RequestUtils(
ua=self.ua,
cookies=self.cookie,
timeout=self._timeout,
referer=self.referer,
proxies=self.proxies
).get_res(searchurl, allow_redirects=True)
# 解析返回
return self.parse(
RequestUtils.get_decoded_html_content(
ret,
performance_mode=settings.ENCODING_DETECTION_PERFORMANCE_MODE,
confidence_threshold=settings.ENCODING_DETECTION_MIN_CONFIDENCE
)
)
def __get_title(self, torrent: Any):
# title default text
if 'title' not in self.fields:
return
selector = self.fields.get('title', {})
if 'selector' in selector:
self.torrents_info['title'] = self._safe_query(torrent, selector)
elif 'text' in selector:
render_dict = {}
if "title_default" in self.fields:
title_default_selector = self.fields.get('title_default', {})
title_default = self._safe_query(torrent, title_default_selector)
render_dict.update({'title_default': title_default})
if "title_optional" in self.fields:
title_optional_selector = self.fields.get('title_optional', {})
title_optional = self._safe_query(torrent, title_optional_selector)
render_dict.update({'title_optional': title_optional})
self.torrents_info['title'] = Template(selector.get('text')).render(fields=render_dict)
self.torrents_info['title'] = self.__filter_text(self.torrents_info.get('title'),
selector.get('filters'))
def __get_description(self, torrent: Any):
# description text
if 'description' not in self.fields:
return
selector = self.fields.get('description', {})
if "selector" in selector or "selectors" in selector:
# 对于selectors情况需要特殊处理selector_config
desc_selector = selector.copy()
if "selectors" in selector and "selector" not in selector:
desc_selector["selector"] = selector.get("selectors", "")
self.torrents_info['description'] = self._safe_query(torrent, desc_selector)
elif "text" in selector:
render_dict = {}
if "tags" in self.fields:
tags_selector = self.fields.get('tags', {})
tag = self._safe_query(torrent, tags_selector)
render_dict.update({'tags': tag})
if "subject" in self.fields:
subject_selector = self.fields.get('subject', {})
subject = self._safe_query(torrent, subject_selector)
render_dict.update({'subject': subject})
if "description_free_forever" in self.fields:
description_free_forever_selector = self.fields.get("description_free_forever", {})
description_free_forever = self._safe_query(torrent, description_free_forever_selector)
render_dict.update({"description_free_forever": description_free_forever})
if "description_normal" in self.fields:
description_normal_selector = self.fields.get("description_normal", {})
description_normal = self._safe_query(torrent, description_normal_selector)
render_dict.update({"description_normal": description_normal})
self.torrents_info['description'] = Template(selector.get('text')).render(fields=render_dict)
self.torrents_info['description'] = self.__filter_text(self.torrents_info.get('description'),
selector.get('filters'))
def __get_detail(self, torrent: Any):
# details page text
if 'details' not in self.fields:
return
selector = self.fields.get('details', {})
item = self._safe_query(torrent, selector)
detail_link = self.__filter_text(item, selector.get('filters'))
if detail_link:
if not detail_link.startswith("http"):
if detail_link.startswith("//"):
self.torrents_info['page_url'] = self.domain.split(":")[0] + ":" + detail_link
elif detail_link.startswith("/"):
self.torrents_info['page_url'] = self.domain + detail_link[1:]
else:
self.torrents_info['page_url'] = self.domain + detail_link
else:
self.torrents_info['page_url'] = detail_link
def __get_download(self, torrent: Any):
# download link text
if 'download' not in self.fields:
return
selector = self.fields.get('download', {})
item = self._safe_query(torrent, selector)
download_link = self.__filter_text(item, selector.get('filters'))
if download_link:
if not download_link.startswith("http") \
and not download_link.startswith("magnet"):
_scheme, _domain = StringUtils.get_url_netloc(self.domain)
if _domain in download_link:
if download_link.startswith("/"):
self.torrents_info['enclosure'] = f"{_scheme}:{download_link}"
else:
self.torrents_info['enclosure'] = f"{_scheme}://{download_link}"
else:
if download_link.startswith("/"):
self.torrents_info['enclosure'] = f"{self.domain}{download_link[1:]}"
else:
self.torrents_info['enclosure'] = f"{self.domain}{download_link}"
else:
self.torrents_info['enclosure'] = download_link
def __get_imdbid(self, torrent: Any):
# imdbid
if "imdbid" not in self.fields:
return
selector = self.fields.get('imdbid', {})
item = self._safe_query(torrent, selector)
self.torrents_info['imdbid'] = self.__filter_text(item, selector.get('filters'))
def __get_size(self, torrent: Any):
# torrent size int
if 'size' not in self.fields:
return
selector = self.fields.get('size', {})
item = self._safe_query(torrent, selector)
if item:
size_val = item.replace("\n", "").strip()
size_val = self.__filter_text(size_val,
selector.get('filters'))
self.torrents_info['size'] = StringUtils.num_filesize(size_val)
else:
self.torrents_info['size'] = 0
def __get_leechers(self, torrent: Any):
# torrent leechers int
if 'leechers' not in self.fields:
return
selector = self.fields.get('leechers', {})
item = self._safe_query(torrent, selector)
if item:
peers_val = item.split("/")[0]
peers_val = peers_val.replace(",", "")
peers_val = self.__filter_text(peers_val, selector.get('filters'))
self.torrents_info['peers'] = int(peers_val) if peers_val and peers_val.isdigit() else 0
else:
self.torrents_info['peers'] = 0
def __get_seeders(self, torrent: Any):
# torrent seeders int
if 'seeders' not in self.fields:
return
selector = self.fields.get('seeders', {})
item = self._safe_query(torrent, selector)
if item:
seeders_val = item.split("/")[0]
seeders_val = seeders_val.replace(",", "")
seeders_val = self.__filter_text(seeders_val, selector.get('filters'))
self.torrents_info['seeders'] = int(seeders_val) if seeders_val and seeders_val.isdigit() else 0
else:
self.torrents_info['seeders'] = 0
def __get_grabs(self, torrent: Any):
# torrent grabs int
if 'grabs' not in self.fields:
return
selector = self.fields.get('grabs', {})
item = self._safe_query(torrent, selector)
if item:
grabs_val = item.split("/")[0]
grabs_val = grabs_val.replace(",", "")
grabs_val = self.__filter_text(grabs_val, selector.get('filters'))
self.torrents_info['grabs'] = int(grabs_val) if grabs_val and grabs_val.isdigit() else 0
else:
self.torrents_info['grabs'] = 0
def __get_pubdate(self, torrent: Any):
# torrent pubdate yyyy-mm-dd hh:mm:ss
if 'date_added' not in self.fields:
return
selector = self.fields.get('date_added', {})
pubdate_str = self._safe_query(torrent, selector)
if pubdate_str:
pubdate_str = pubdate_str.replace('\n', ' ').strip()
self.torrents_info['pubdate'] = self.__filter_text(pubdate_str, selector.get('filters'))
def __get_date_elapsed(self, torrent: Any):
# torrent date elapsed text
if 'date_elapsed' not in self.fields:
return
selector = self.fields.get('date_elapsed', {})
date_elapsed = self._safe_query(torrent, selector)
self.torrents_info['date_elapsed'] = self.__filter_text(date_elapsed, selector.get('filters'))
def __get_downloadvolumefactor(self, torrent: Any):
# downloadvolumefactor int
selector = self.fields.get('downloadvolumefactor', {})
if not selector:
return
self.torrents_info['downloadvolumefactor'] = 1
if 'case' in selector:
for downloadvolumefactorselector in list(selector.get('case', {}).keys()):
downloadvolumefactor = torrent(downloadvolumefactorselector)
try:
if len(downloadvolumefactor) > 0:
self.torrents_info['downloadvolumefactor'] = selector.get('case', {}).get(
downloadvolumefactorselector)
break
finally:
downloadvolumefactor.clear()
del downloadvolumefactor
elif "selector" in selector:
item = self._safe_query(torrent, selector)
if item:
downloadvolumefactor = re.search(r'(\d+\.?\d*)', item)
if downloadvolumefactor:
self.torrents_info['downloadvolumefactor'] = int(downloadvolumefactor.group(1))
def __get_uploadvolumefactor(self, torrent: Any):
# uploadvolumefactor int
selector = self.fields.get('uploadvolumefactor', {})
if not selector:
return
self.torrents_info['uploadvolumefactor'] = 1
if 'case' in selector:
for uploadvolumefactorselector in list(selector.get('case', {}).keys()):
uploadvolumefactor = torrent(uploadvolumefactorselector)
try:
if len(uploadvolumefactor) > 0:
self.torrents_info['uploadvolumefactor'] = selector.get('case', {}).get(
uploadvolumefactorselector)
break
finally:
uploadvolumefactor.clear()
del uploadvolumefactor
elif "selector" in selector:
item = self._safe_query(torrent, selector)
if item:
uploadvolumefactor = re.search(r'(\d+\.?\d*)', item)
if uploadvolumefactor:
self.torrents_info['uploadvolumefactor'] = int(uploadvolumefactor.group(1))
def __get_labels(self, torrent: Any):
# labels ['label1', 'label2']
if 'labels' not in self.fields:
return
selector = self.fields.get('labels', {})
if not selector.get('selector'):
self.torrents_info['labels'] = []
return
# labels需要特殊处理因为它返回的是列表
labels = torrent(selector.get("selector", "")).clone()
try:
self.__remove(labels, selector)
items = self.__attribute_or_text(labels, selector)
if items:
self.torrents_info['labels'] = [item for item in items if item]
else:
self.torrents_info['labels'] = []
finally:
labels.clear()
del labels
def __get_free_date(self, torrent: Any):
# free date yyyy-mm-dd hh:mm:ss
if 'freedate' not in self.fields:
return
selector = self.fields.get('freedate', {})
freedate = self._safe_query(torrent, selector)
self.torrents_info['freedate'] = self.__filter_text(freedate, selector.get('filters'))
def __get_hit_and_run(self, torrent: Any):
# hitandrun True/False
if 'hr' not in self.fields:
return
selector = self.fields.get('hr', {})
hit_and_run = torrent(selector.get('selector', ''))
try:
if hit_and_run:
self.torrents_info['hit_and_run'] = True
else:
self.torrents_info['hit_and_run'] = False
finally:
hit_and_run.clear()
del hit_and_run
def __get_category(self, torrent: Any):
# category 电影/电视剧
if 'category' not in self.fields:
return
selector = self.fields.get('category', {})
category_value = self._safe_query(torrent, selector)
category_value = self.__filter_text(category_value, selector.get('filters'))
if category_value and self.category:
tv_cats = [str(cat.get("id")) for cat in self.category.get("tv") or []]
movie_cats = [str(cat.get("id")) for cat in self.category.get("movie") or []]
if category_value in tv_cats \
and category_value not in movie_cats:
self.torrents_info['category'] = MediaType.TV.value
elif category_value in movie_cats:
self.torrents_info['category'] = MediaType.MOVIE.value
else:
self.torrents_info['category'] = MediaType.UNKNOWN.value
else:
self.torrents_info['category'] = MediaType.UNKNOWN.value
def _safe_query(self, torrent: Any, selector_config: Optional[dict]) -> Optional[str]:
"""
安全地执行PyQuery查询并自动清理资源
:param torrent: PyQuery对象
:param selector_config: 选择器配置
:return: 处理后的结果
"""
if not selector_config or not selector_config.get('selector'):
return None
query_obj = torrent(selector_config.get('selector', '')).clone()
try:
self.__remove(query_obj, selector_config)
items = self.__attribute_or_text(query_obj, selector_config)
return self.__index(items, selector_config)
finally:
query_obj.clear()
del query_obj
def get_info(self, torrent: Any) -> dict:
"""
解析单条种子数据
"""
# 每次调用时重新初始化,避免数据累积
self.torrents_info = {}
try:
# 标题
self.__get_title(torrent)
# 描述
self.__get_description(torrent)
# 详情页面
self.__get_detail(torrent)
# 下载链接
self.__get_download(torrent)
# 完成数
self.__get_grabs(torrent)
# 下载数
self.__get_leechers(torrent)
# 做种数
self.__get_seeders(torrent)
# 大小
self.__get_size(torrent)
# IMDBID
self.__get_imdbid(torrent)
# 下载系数
self.__get_downloadvolumefactor(torrent)
# 上传系数
self.__get_uploadvolumefactor(torrent)
# 发布时间
self.__get_pubdate(torrent)
# 已发布时间
self.__get_date_elapsed(torrent)
# 免费载止时间
self.__get_free_date(torrent)
# 标签
self.__get_labels(torrent)
# HR
self.__get_hit_and_run(torrent)
# 分类
self.__get_category(torrent)
# 返回当前种子信息的副本,而不是引用
return self.torrents_info.copy() if self.torrents_info else {}
except Exception as err:
logger.error("%s 搜索出现错误:%s" % (self.indexername, str(err)))
return {}
finally:
self.torrents_info.clear()
@staticmethod
def __filter_text(text: Optional[str], filters: Optional[List[dict]]) -> str:
"""
对文件进行处理
"""
if not text or not filters or not isinstance(filters, list):
return text
if not isinstance(text, str):
text = str(text)
for filter_item in filters:
if not text:
break
method_name = filter_item.get("name")
try:
args = filter_item.get("args")
if method_name == "re_search" and isinstance(args, list):
rematch = re.search(r"%s" % args[0], text)
if rematch:
text = rematch.group(args[-1])
elif method_name == "split" and isinstance(args, list):
text = text.split(r"%s" % args[0])[args[-1]]
elif method_name == "replace" and isinstance(args, list):
text = text.replace(r"%s" % args[0], r"%s" % args[-1])
elif method_name == "dateparse" and isinstance(args, str):
text = text.replace("\n", " ").strip()
text = datetime.datetime.strptime(text, r"%s" % args)
elif method_name == "strip":
text = text.strip()
elif method_name == "appendleft":
text = f"{args}{text}"
elif method_name == "querystring":
parsed_url = urlparse(str(text))
query_params = parse_qs(parsed_url.query)
param_value = query_params.get(args)
text = param_value[0] if param_value else ''
except Exception as err:
logger.debug(f'过滤器 {method_name} 处理失败:{str(err)} - {traceback.format_exc()}')
return text.strip()
@staticmethod
def __remove(item: Any, selector: Optional[dict]):
"""
移除元素
"""
if selector and "remove" in selector:
removelist = selector.get('remove', '').split(', ')
for v in removelist:
item.remove(v)
@staticmethod
def __attribute_or_text(item: Any, selector: Optional[dict]) -> list:
if not selector:
return item
if not item:
return []
if 'attribute' in selector:
items = [i.attr(selector.get('attribute')) for i in item.items() if i]
else:
items = [i.text() for i in item.items() if i]
return items
@staticmethod
def __index(items: Optional[list], selector: Optional[dict]) -> Optional[str]:
if not items:
return None
if selector:
if "contents" in selector \
and len(items) > int(selector.get("contents")):
item = items[0].split("\n")[selector.get("contents")]
elif "index" in selector \
and len(items) > int(selector.get("index")):
item = items[int(selector.get("index"))]
else:
item = items[0]
else:
item = items[0]
return item
def parse(self, html_text: str) -> List[dict]:
"""
解析整个页面
"""
if not html_text:
self.is_error = True
return []
# 清空旧结果
self.torrents_info_array = []
html_doc = None
try:
# 解析站点文本对象
html_doc = PyQuery(html_text)
# 种子筛选器
torrents_selector = self.list.get('selector', '')
# 遍历种子html列表
for i, torn in enumerate(html_doc(torrents_selector)):
if i >= int(self.result_num):
break
# 创建临时PyQuery对象进行解析
torrent_query = PyQuery(torn)
try:
# 直接获取种子信息,避免深拷贝
torrent_info = self.get_info(torrent_query)
if torrent_info:
# 浅拷贝即可,减少内存使用
self.torrents_info_array.append(torrent_info)
finally:
# 显式删除临时PyQuery对象
torrent_query.clear()
del torrent_query
# 返回数组的副本,防止被后续清理操作影响
return self.torrents_info_array.copy()
except Exception as err:
self.is_error = True
logger.warn(f"错误:{self.indexername} {str(err)}")
return []
finally:
# 清理种子缓存
self.torrents_info_array.clear()
# 清理HTML文档对象
if html_doc is not None:
html_doc.clear()
del html_doc
# 清理html_text引用
del html_text

View File

@@ -104,20 +104,24 @@ class SubtitleModule(_ModuleBase):
logger.warn(f"读取页面代码失败:{torrent.page_url}")
return
html = etree.HTML(res.text)
sublink_list = []
for xpath in self._SITE_SUBTITLE_XPATH:
sublinks = html.xpath(xpath)
if sublinks:
for sublink in sublinks:
if not sublink:
continue
if not sublink.startswith("http"):
base_url = StringUtils.get_base_url(torrent.page_url)
if sublink.startswith("/"):
sublink = "%s%s" % (base_url, sublink)
else:
sublink = "%s/%s" % (base_url, sublink)
sublink_list.append(sublink)
try:
sublink_list = []
for xpath in self._SITE_SUBTITLE_XPATH:
sublinks = html.xpath(xpath)
if sublinks:
for sublink in sublinks:
if not sublink:
continue
if not sublink.startswith("http"):
base_url = StringUtils.get_base_url(torrent.page_url)
if sublink.startswith("/"):
sublink = "%s%s" % (base_url, sublink)
else:
sublink = "%s/%s" % (base_url, sublink)
sublink_list.append(sublink)
finally:
if html is not None:
del html
# 下载所有字幕文件
for sublink in sublink_list:
logger.info(f"找到字幕下载链接:{sublink},开始下载...")

View File

@@ -563,6 +563,9 @@ class TmdbApi:
except Exception as err:
logger.error(f"从TheDbMovie网站查询出错{str(err)}")
return {}
finally:
if html is not None:
del html
return {}
def get_info(self,

View File

@@ -25,7 +25,7 @@ class Transmission:
若不设置参数,则创建配置文件设置的下载器
"""
if host and port:
self._protocol, self._host, self._port = kwargs.get("protocol", self._protocol), host, port
self._protocol, self._host, self._port = kwargs.get("protocol", "http"), host, port
elif host:
result = UrlUtils.parse_url_params(url=host)
if result:

View File

@@ -111,7 +111,7 @@ class Api:
"_api_path",
"_request_utils",
"_version",
"_session"
"_session",
)
@property
@@ -287,6 +287,18 @@ class Api:
return True
return False
def task_running(self):
"""
当前正在运行的任务
"""
if (
res := self.__request_api("/task/running")
) and res.success:
if res.data:
# TODO 具体正在运行的任务
return True
return False
def __build_item(self, info: dict) -> Item:
"""
构造媒体Item

View File

@@ -111,6 +111,8 @@ class TrimeMedia:
if self._userinfo is None:
return False
logger.debug(f"{self._username} 成功登录飞牛影视")
# 刷新媒体库列表
self.get_librarys()
return True
def disconnect(self):
@@ -311,6 +313,8 @@ class TrimeMedia:
logger.error("飞牛仅支持管理员账号刷新媒体库")
return False
# 必须调用 否则容易误报 -14 Task duplicate
self._api.task_running()
logger.info("刷新所有媒体库")
return self._api.mdb_scanall()
@@ -337,6 +341,8 @@ class TrimeMedia:
# 媒体库去重
libraries.add(lib.guid)
# 必须调用 否则容易误报 -14 Task duplicate
self._api.task_running()
for lib_guid in libraries:
# 逐个刷新
lib = self._libraries[lib_guid]

View File

@@ -120,6 +120,71 @@ class Monitor(metaclass=Singleton):
except Exception as e:
logger.error(f"保存快照失败: {e}")
def reset_snapshot(self, storage: str) -> bool:
"""
重置快照,强制下次扫描时重新建立基准
:param storage: 存储名称
:return: 是否成功
"""
try:
cache_file = self._snapshot_cache_dir / f"{storage}_snapshot.json"
if cache_file.exists():
cache_file.unlink()
logger.info(f"快照已重置: {storage}")
return True
logger.debug(f"快照文件不存在,无需重置: {storage}")
return True
except Exception as e:
logger.error(f"重置快照失败: {storage} - {e}")
return False
def force_full_scan(self, storage: str, mon_path: Path) -> bool:
"""
强制全量扫描并处理所有文件(包括已存在的文件)
:param storage: 存储名称
:param mon_path: 监控路径
:return: 是否成功
"""
try:
logger.info(f"开始强制全量扫描: {storage}:{mon_path}")
# 生成快照
new_snapshot = StorageChain().snapshot_storage(
storage=storage,
path=mon_path,
last_snapshot_time=0 # 全量扫描,不使用增量
)
if new_snapshot is None:
logger.warn(f"获取 {storage}:{mon_path} 快照失败")
return False
file_count = len(new_snapshot)
logger.info(f"{storage}:{mon_path} 全量扫描完成,发现 {file_count} 个文件")
# 处理所有文件
processed_count = 0
for file_path, file_info in new_snapshot.items():
try:
logger.info(f"处理文件:{file_path}")
file_size = file_info.get('size', 0) if isinstance(file_info, dict) else file_info
self.__handle_file(storage=storage, event_path=Path(file_path), file_size=file_size)
processed_count += 1
except Exception as e:
logger.error(f"处理文件 {file_path} 失败: {e}")
continue
logger.info(f"{storage}:{mon_path} 全量扫描完成,共处理 {processed_count}/{file_count} 个文件")
# 保存快照
self.save_snapshot(storage, new_snapshot, file_count)
return True
except Exception as e:
logger.error(f"强制全量扫描失败: {storage}:{mon_path} - {e}")
return False
def load_snapshot(self, storage: str) -> Optional[Dict]:
"""
从文件加载快照
@@ -131,7 +196,9 @@ class Monitor(metaclass=Singleton):
if cache_file.exists():
with open(cache_file, 'r', encoding='utf-8') as f:
data = json.load(f)
logger.debug(f"成功加载快照: {cache_file}, 包含 {len(data.get('snapshot', {}))} 个文件")
return data
logger.debug(f"快照文件不存在: {cache_file}")
return None
except Exception as e:
logger.error(f"加载快照失败: {e}")
@@ -300,7 +367,7 @@ class Monitor(metaclass=Singleton):
:param limits: 系统限制信息
:return: (是否使用轮询, 原因)
"""
if monitor_mode == "compatible":
if monitor_mode == "compatibility":
return True, "用户配置为兼容模式"
# 检查网络文件系统
@@ -553,6 +620,9 @@ class Monitor(metaclass=Singleton):
old_snapshot = old_snapshot_data.get('snapshot', {}) if old_snapshot_data else {}
last_snapshot_time = old_snapshot_data.get('timestamp', 0) if old_snapshot_data else 0
# 判断是否为首次快照:检查快照文件是否存在且有效
is_first_snapshot = old_snapshot_data is None
# 生成新快照(增量模式)
new_snapshot = StorageChain().snapshot_storage(
storage=storage,
@@ -567,7 +637,7 @@ class Monitor(metaclass=Singleton):
file_count = len(new_snapshot)
logger.info(f"{storage}:{mon_path} 快照完成,发现 {file_count} 个文件")
if old_snapshot:
if not is_first_snapshot:
# 比较快照找出变化
changes = self.compare_snapshots(old_snapshot, new_snapshot)

View File

@@ -57,6 +57,8 @@ class MetaInfo(BaseModel):
audio_encode: Optional[str] = None
# 资源类型
edition: Optional[str] = None
# 流媒体平台
web_source: Optional[str] = None
# 应用的识别词信息
apply_words: Optional[List[str]] = None

View File

@@ -290,6 +290,7 @@ class StorageSchema(Enum):
U115 = "u115"
Rclone = "rclone"
Alist = "alist"
SMB = "smb"
# 模块类型

View File

@@ -80,6 +80,12 @@ class AutoCloseResponse:
for name, value in state.items():
setattr(self, name, value)
def __enter__(self):
return self
def __exit__(self, *args):
self.close()
class RequestUtils:
def __init__(self,

View File

@@ -13,27 +13,31 @@ class SiteUtils:
:return:
"""
html = etree.HTML(html_text)
if not StringUtils.is_valid_html_element(html):
try:
if not StringUtils.is_valid_html_element(html):
return False
# 存在明显的密码输入框,说明未登录
if html.xpath("//input[@type='password']"):
return False
# 是否存在登出和用户面板等链接
xpaths = [
'//a[contains(@href, "logout")'
' or contains(@data-url, "logout")'
' or contains(@href, "mybonus") '
' or contains(@onclick, "logout")'
' or contains(@href, "usercp")'
' or contains(@lay-on, "logout")]',
'//form[contains(@action, "logout")]',
'//div[@class="user-info-side"]',
'//a[@id="myitem"]'
]
for xpath in xpaths:
if html.xpath(xpath):
return True
return False
# 存在明显的密码输入框,说明未登录
if html.xpath("//input[@type='password']"):
return False
# 是否存在登出和用户面板等链接
xpaths = [
'//a[contains(@href, "logout")'
' or contains(@data-url, "logout")'
' or contains(@href, "mybonus") '
' or contains(@onclick, "logout")'
' or contains(@href, "usercp")'
' or contains(@lay-on, "logout")]',
'//form[contains(@action, "logout")]',
'//div[@class="user-info-side"]',
'//a[@id="myitem"]'
]
for xpath in xpaths:
if html.xpath(xpath):
return True
return False
finally:
if html is not None:
del html
@classmethod
def is_checkin(cls, html_text: str) -> bool:
@@ -42,24 +46,27 @@ class SiteUtils:
:return True已签到 False未签到
"""
html = etree.HTML(html_text)
if not StringUtils.is_valid_html_element(html):
return False
# 站点签到支持的识别XPATH
xpaths = [
'//a[@id="signed"]',
'//a[contains(@href, "attendance")]',
'//a[contains(text(), "签到")]',
'//a/b[contains(text(), "签 到")]',
'//span[@id="sign_in"]/a',
'//a[contains(@href, "addbonus")]',
'//input[@class="dt_button"][contains(@value, "打卡")]',
'//a[contains(@href, "sign_in")]',
'//a[contains(@onclick, "do_signin")]',
'//a[@id="do-attendance"]',
'//shark-icon-button[@href="attendance.php"]'
]
for xpath in xpaths:
if html.xpath(xpath):
try:
if not StringUtils.is_valid_html_element(html):
return False
return True
# 站点签到支持的识别XPATH
xpaths = [
'//a[@id="signed"]',
'//a[contains(@href, "attendance")]',
'//a[contains(text(), "签到")]',
'//a/b[contains(text(), "签 到")]',
'//span[@id="sign_in"]/a',
'//a[contains(@href, "addbonus")]',
'//input[@class="dt_button"][contains(@value, "打卡")]',
'//a[contains(@href, "sign_in")]',
'//a[contains(@onclick, "do_signin")]',
'//a[@id="do-attendance"]',
'//shark-icon-button[@href="attendance.php"]'
]
for xpath in xpaths:
if html.xpath(xpath):
return False
return True
finally:
if html is not None:
del html

View File

@@ -445,6 +445,24 @@ class SystemUtils:
process_memory_percent = (process_memory / system_memory) * 100
return [process_memory, int(process_memory_percent)]
@staticmethod
def network_usage() -> List[int]:
"""
获取当前网络流量上行和下行流量单位bytes/s
"""
import time
# 获取初始网络统计
net_io_1 = psutil.net_io_counters()
time.sleep(1) # 等待1秒
# 获取1秒后的网络统计
net_io_2 = psutil.net_io_counters()
# 计算1秒内的流量变化
upload_speed = net_io_2.bytes_sent - net_io_1.bytes_sent
download_speed = net_io_2.bytes_recv - net_io_1.bytes_recv
return [upload_speed, download_speed]
@staticmethod
def is_hardlink(src: Path, dest: Path) -> bool:
"""

View File

@@ -131,9 +131,9 @@ function load_config_from_app_env() {
# (例如 envsubst, mp_update.sh, cert.sh)
if declare -gx "${var_name}=${final_value}"; then
if [ -z "${final_value}" ]; then
INFO "变量 ${var_name}, 值为空, 来源: ${value_source})。"
INFO "变量 ${var_name}, 值为空 (来源: ${value_source})。"
else
INFO "变量 ${var_name}, 值: ${final_value} , (来源: ${value_source})。"
INFO "变量 ${var_name}, 值: ${final_value} (来源: ${value_source})。"
fi
# 如果变量不是来自初始环境变量,则记录下来以便稍后 unset
@@ -151,7 +151,7 @@ function load_config_from_app_env() {
fi
fi
else
ERROR "导出变量 ${var_name} (值: '${final_value}', 来源: ${value_source}) 失败。"
ERROR "导出变量 ${var_name}, 值: '${final_value}'失败 (来源: ${value_source}) "
fi
done

View File

@@ -25,7 +25,7 @@ pytz~=2025.2
pycryptodome~=3.23.0
qbittorrent-api==2025.5.0
plexapi~=4.17.0
transmission-rpc~=7.0.11
transmission-rpc~=4.3.0
Jinja2~=3.1.6
pyparsing~=3.2.3
func_timeout==4.3.5
@@ -46,7 +46,6 @@ psutil~=7.0.0
python-dotenv~=1.1.1
python-hosts~=1.1.2
watchdog~=6.0.0
openai~=1.92.2
cacheout~=0.16.0
click~=8.2.1
requests-cache~=1.2.1
@@ -70,3 +69,5 @@ oss2~=2.19.1
tqdm~=4.67.1
setuptools~=78.1.0
pympler~=1.1
smbprotocol~=1.15.0
setproctitle~=1.3.6

View File

@@ -1,2 +1,2 @@
APP_VERSION = 'v2.5.9-4'
FRONTEND_VERSION = 'v2.5.9-1'
APP_VERSION = 'v2.6.3'
FRONTEND_VERSION = 'v2.6.3'