feat(encoding): update configuration to performance mode

This commit is contained in:
InfinityPacer
2024-11-27 13:52:17 +08:00
parent 916597047d
commit 2e7e74c803
4 changed files with 10 additions and 10 deletions

View File

@@ -219,8 +219,8 @@ class ConfigModel(BaseModel):
BIG_MEMORY_MODE: bool = False
# 全局图片缓存,将媒体图片缓存到本地
GLOBAL_IMAGE_CACHE: bool = False
# 是否启用编码探测的兼容模式
ENCODING_DETECTION_COMPATIBLE_MODE: bool = True
# 是否启用编码探测的性能模式
ENCODING_DETECTION_PERFORMANCE_MODE: bool = False
# 编码探测的最低置信度阈值
ENCODING_DETECTION_MIN_CONFIDENCE: float = 0.8
# 允许的图片缓存域名

View File

@@ -345,7 +345,7 @@ class SiteParserBase(metaclass=ABCMeta):
f"{self._site_name} 检测到Cloudflare请更新Cookie和UA")
return ""
return RequestUtils.get_decoded_html_content(res,
settings.ENCODING_DETECTION_COMPATIBLE_MODE,
settings.ENCODING_DETECTION_PERFORMANCE_MODE,
settings.ENCODING_DETECTION_MIN_CONFIDENCE)
return ""

View File

@@ -250,7 +250,7 @@ class TorrentSpider:
proxies=self.proxies
).get_res(searchurl, allow_redirects=True)
page_source = RequestUtils.get_decoded_html_content(ret,
settings.ENCODING_DETECTION_COMPATIBLE_MODE,
settings.ENCODING_DETECTION_PERFORMANCE_MODE,
settings.ENCODING_DETECTION_MIN_CONFIDENCE)
# 解析

View File

@@ -278,18 +278,18 @@ class RequestUtils:
@staticmethod
def detect_encoding_from_html_response(response: Response,
compatible_mode: bool = False, confidence_threshold: float = 0.8):
performance_mode: bool = False, confidence_threshold: float = 0.8):
"""
根据HTML响应内容探测编码信息
:param response: HTTP 响应对象
:param compatible_mode: 是否使用兼容模式,默认为 False (性能模式)
:param performance_mode: 是否使用性能模式,默认为 False (兼容模式)
:param confidence_threshold: chardet 检测置信度阈值,默认为 0.8
:return: 解析得到的字符编码
"""
fallback_encoding = None
try:
if compatible_mode:
if not performance_mode:
# 兼容模式使用chardet分析后再处理 BOM 和 meta 信息
# 1. 使用 chardet 库进一步分析内容
detection = chardet.detect(response.content)
@@ -349,12 +349,12 @@ class RequestUtils:
@staticmethod
def get_decoded_html_content(response: Response,
compatible_mode: bool = False, confidence_threshold: float = 0.8) -> str:
performance_mode: bool = False, confidence_threshold: float = 0.8) -> str:
"""
获取HTML响应的解码文本内容
:param response: HTTP 响应对象
:param compatible_mode: 是否使用兼容模式,默认为 False (性能模式)
:param performance_mode: 是否使用性能模式,默认为 False (兼容模式)
:param confidence_threshold: chardet 检测置信度阈值,默认为 0.8
:return: 解码后的响应文本内容
"""
@@ -363,7 +363,7 @@ class RequestUtils:
return ""
if response.content:
# 1. 获取编码信息
encoding = (RequestUtils.detect_encoding_from_html_response(response, compatible_mode,
encoding = (RequestUtils.detect_encoding_from_html_response(response, performance_mode,
confidence_threshold)
or response.apparent_encoding)
# 2. 根据解析得到的编码进行解码