fix site parser

This commit is contained in:
jxxghp
2025-06-27 12:26:17 +08:00
parent a9f18ea3ef
commit 23c9862eb3
3 changed files with 35 additions and 24 deletions

View File

@@ -286,26 +286,29 @@ class IndexerModule(_ModuleBase):
return None
# 获取用户数据
logger.info(f"站点 {site.get('name')} 开始以 {site.get('schema')} 模型解析数据...")
site_obj.parse()
logger.debug(f"站点 {site.get('name')} 数据解析完成")
return SiteUserData(
domain=StringUtils.get_url_domain(site.get("url")),
userid=site_obj.userid,
username=site_obj.username,
user_level=site_obj.user_level,
join_at=site_obj.join_at,
upload=site_obj.upload,
download=site_obj.download,
ratio=site_obj.ratio,
bonus=site_obj.bonus,
seeding=site_obj.seeding,
seeding_size=site_obj.seeding_size,
seeding_info=site_obj.seeding_info or [],
leeching=site_obj.leeching,
leeching_size=site_obj.leeching_size,
message_unread=site_obj.message_unread,
message_unread_contents=site_obj.message_unread_contents or [],
updated_day=datetime.now().strftime('%Y-%m-%d'),
err_msg=site_obj.err_msg
)
try:
logger.info(f"站点 {site.get('name')} 开始以 {site.get('schema')} 模型解析数据...")
site_obj.parse()
logger.debug(f"站点 {site.get('name')} 数据解析完成")
return SiteUserData(
domain=StringUtils.get_url_domain(site.get("url")),
userid=site_obj.userid,
username=site_obj.username,
user_level=site_obj.user_level,
join_at=site_obj.join_at,
upload=site_obj.upload,
download=site_obj.download,
ratio=site_obj.ratio,
bonus=site_obj.bonus,
seeding=site_obj.seeding,
seeding_size=site_obj.seeding_size,
seeding_info=site_obj.seeding_info or [],
leeching=site_obj.leeching,
leeching_size=site_obj.leeching_size,
message_unread=site_obj.message_unread,
message_unread_contents=site_obj.message_unread_contents or [],
updated_day=datetime.now().strftime('%Y-%m-%d'),
err_msg=site_obj.err_msg
)
finally:
site_obj.clear()

View File

@@ -442,6 +442,14 @@ class SiteParserBase(metaclass=ABCMeta):
self._session.close()
self._session = None
def clear(self):
"""
清除当前解析器的所有信息
"""
self._index_html = ""
self.seeding_info.clear()
self.message_unread_contents.clear()
def to_dict(self):
"""
转化为字典

View File

@@ -468,7 +468,7 @@ class RequestUtils:
return fallback_encoding or "utf-8"
@staticmethod
def get_decoded_html_content(response: Response,
def get_decoded_html_content(response: Union[Response, AutoCloseResponse],
performance_mode: bool = False, confidence_threshold: float = 0.8) -> str:
"""
获取HTML响应的解码文本内容