mirror of
https://github.com/jxxghp/MoviePilot.git
synced 2026-05-06 20:42:43 +08:00
fix: adapt audiences user data parser
This commit is contained in:
@@ -1,4 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import json
|
||||
import re
|
||||
from urllib.parse import urljoin
|
||||
|
||||
from lxml import etree
|
||||
@@ -11,6 +13,121 @@ from app.utils.string import StringUtils
|
||||
class NexusAudiencesSiteUserInfo(NexusPhpSiteUserInfo):
|
||||
schema = SiteSchema.NexusAudiences
|
||||
|
||||
def _parse_user_traffic_info(self, html_text):
|
||||
"""
|
||||
解析用户流量信息
|
||||
"""
|
||||
super()._parse_user_traffic_info(html_text)
|
||||
self.__parse_userbar_info(html_text)
|
||||
|
||||
def _parse_user_detail_info(self, html_text: str):
|
||||
"""
|
||||
解析用户额外信息
|
||||
"""
|
||||
super()._parse_user_detail_info(html_text)
|
||||
self.__parse_userbar_info(html_text)
|
||||
|
||||
def __parse_userbar_info(self, html_text: str):
|
||||
"""
|
||||
解析 Audiences 新版顶部用户栏,覆盖 NexusPHP 通用正则的误判。
|
||||
"""
|
||||
html = etree.HTML(html_text)
|
||||
try:
|
||||
if not StringUtils.is_valid_html_element(html):
|
||||
return
|
||||
|
||||
for user_node in html.xpath('//*[@data-uploader-url or @data-uploader-stats]'):
|
||||
self.__parse_user_identity(user_node)
|
||||
self.__parse_uploader_stats(user_node.get("data-uploader-stats"))
|
||||
|
||||
# data-uploader-stats 不包含分享率,需从 compact metric 的 class 中读取。
|
||||
self.__parse_compact_metric(html, "ratio", "ratio")
|
||||
self.__parse_compact_metric(html, "uploaded", "upload")
|
||||
self.__parse_compact_metric(html, "downloaded", "download")
|
||||
self.__parse_compact_metric(html, "bonus", "bonus")
|
||||
self.__parse_compact_metric(html, "active", "active")
|
||||
finally:
|
||||
if html is not None:
|
||||
del html
|
||||
|
||||
def __parse_user_identity(self, user_node):
|
||||
"""
|
||||
从新版用户卡属性中提取用户 ID、用户名和等级。
|
||||
"""
|
||||
user_url = user_node.get("data-uploader-url") or ""
|
||||
user_detail = re.search(r"userdetails\.php\?id=(\d+)", user_url)
|
||||
if user_detail and user_detail.group(1).strip():
|
||||
self.userid = user_detail.group(1).strip()
|
||||
|
||||
username = user_node.get("data-uploader-label")
|
||||
if username and username.strip():
|
||||
self.username = username.strip()
|
||||
|
||||
user_level = user_node.get("data-uploader-badge")
|
||||
if user_level and user_level.strip():
|
||||
self.user_level = user_level.strip()
|
||||
|
||||
def __parse_uploader_stats(self, stats_text: str):
|
||||
"""
|
||||
解析 data-uploader-stats 中的结构化流量数据。
|
||||
"""
|
||||
if not stats_text:
|
||||
return
|
||||
|
||||
try:
|
||||
stats = json.loads(stats_text)
|
||||
except (TypeError, ValueError):
|
||||
return
|
||||
|
||||
if not isinstance(stats, list):
|
||||
return
|
||||
|
||||
for item in stats:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
label = str(item.get("label") or "").strip(" ::")
|
||||
tone = str(item.get("tone") or "").strip()
|
||||
value = str(item.get("value") or "").strip()
|
||||
self.__set_metric_value(label=label, tone=tone, value=value)
|
||||
|
||||
def __parse_compact_metric(self, html, metric: str, field: str):
|
||||
"""
|
||||
按 compact metric 的 class 读取新版用户栏中的单项数据。
|
||||
"""
|
||||
values = html.xpath(
|
||||
f'//*[contains(concat(" ", normalize-space(@class), " "), " site-userbar__compact-metric--{metric} ")]'
|
||||
'//span[normalize-space()][last()]/text()'
|
||||
)
|
||||
if not values:
|
||||
values = html.xpath(
|
||||
f'//*[contains(concat(" ", normalize-space(@class), " "), " site-userbar__compact-metric--{metric} ")]'
|
||||
'/text()'
|
||||
)
|
||||
if values:
|
||||
self.__set_metric_value(field=field, value=values[-1].strip())
|
||||
|
||||
def __set_metric_value(self, value: str, label: str = None, tone: str = None, field: str = None):
|
||||
"""
|
||||
将 Audiences 用户栏指标写入通用用户数据字段。
|
||||
"""
|
||||
if not value:
|
||||
return
|
||||
|
||||
metric_key = field or tone or label
|
||||
if metric_key in {"uploaded", "上传量", "upload"}:
|
||||
self.upload = StringUtils.num_filesize(value)
|
||||
elif metric_key in {"downloaded", "下载量", "download"}:
|
||||
self.download = StringUtils.num_filesize(value)
|
||||
elif metric_key in {"bonus", "爆米花"}:
|
||||
self.bonus = StringUtils.str_float(value)
|
||||
elif metric_key == "ratio":
|
||||
self.ratio = StringUtils.str_float(value)
|
||||
elif metric_key in {"active", "活跃"}:
|
||||
active_match = re.search(r"↑\s*(\d+)\s*/\s*↓\s*(\d+)", value)
|
||||
if active_match:
|
||||
self.seeding = StringUtils.str_int(active_match.group(1))
|
||||
self.leeching = StringUtils.str_int(active_match.group(2))
|
||||
|
||||
def _parse_seeding_pages(self):
|
||||
if not self._torrent_seeding_page:
|
||||
return
|
||||
|
||||
46
tests/test_nexus_audiences_parser.py
Normal file
46
tests/test_nexus_audiences_parser.py
Normal file
@@ -0,0 +1,46 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from app.modules.indexer.parser.nexus_audiences import NexusAudiencesSiteUserInfo
|
||||
from app.utils.string import StringUtils
|
||||
|
||||
|
||||
def test_audiences_userbar_metrics_override_generic_nexus_regex():
|
||||
parser = NexusAudiencesSiteUserInfo(
|
||||
site_name="Audiences",
|
||||
url="https://audiences.me/",
|
||||
site_cookie="",
|
||||
apikey=None,
|
||||
token=None,
|
||||
)
|
||||
html_text = """
|
||||
<html>
|
||||
<body>
|
||||
<div
|
||||
data-uploader-label="jxxghp"
|
||||
data-uploader-url="userdetails.php?id=18978"
|
||||
data-uploader-badge="(江湖儿女)Elite User"
|
||||
data-uploader-stats='[
|
||||
{"label":"上传量:","value":"10.150 TB","tone":"uploaded"},
|
||||
{"label":"爆米花:","value":"1,973,896.2","tone":"bonus"},
|
||||
{"label":"下载量:","value":"3.624 TB","tone":"downloaded"},
|
||||
{"label":"活跃","value":"↑ 355 / ↓ 7","tone":"active"}
|
||||
]'>
|
||||
</div>
|
||||
<span class="site-userbar__compact-metric site-userbar__compact-metric--ratio">
|
||||
<i></i><span>2.801</span>
|
||||
</span>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
# Audiences 新版用户栏把流量数据放在 data 属性中,通用 NexusPHP 正则无法稳定识别。
|
||||
parser._parse_user_traffic_info(html_text)
|
||||
|
||||
assert parser.userid == "18978"
|
||||
assert parser.username == "jxxghp"
|
||||
assert parser.user_level == "(江湖儿女)Elite User"
|
||||
assert parser.upload == StringUtils.num_filesize("10.150 TB")
|
||||
assert parser.download == StringUtils.num_filesize("3.624 TB")
|
||||
assert parser.ratio == 2.801
|
||||
assert parser.bonus == 1973896.2
|
||||
assert parser.seeding == 355
|
||||
assert parser.leeching == 7
|
||||
Reference in New Issue
Block a user