mirror of
https://github.com/jxxghp/MoviePilot.git
synced 2026-05-21 08:09:57 +08:00
feat(encoding): update configuration to performance mode
This commit is contained in:
@@ -219,8 +219,8 @@ class ConfigModel(BaseModel):
|
||||
BIG_MEMORY_MODE: bool = False
|
||||
# 全局图片缓存,将媒体图片缓存到本地
|
||||
GLOBAL_IMAGE_CACHE: bool = False
|
||||
# 是否启用编码探测的兼容模式
|
||||
ENCODING_DETECTION_COMPATIBLE_MODE: bool = True
|
||||
# 是否启用编码探测的性能模式
|
||||
ENCODING_DETECTION_PERFORMANCE_MODE: bool = False
|
||||
# 编码探测的最低置信度阈值
|
||||
ENCODING_DETECTION_MIN_CONFIDENCE: float = 0.8
|
||||
# 允许的图片缓存域名
|
||||
|
||||
@@ -345,7 +345,7 @@ class SiteParserBase(metaclass=ABCMeta):
|
||||
f"{self._site_name} 检测到Cloudflare,请更新Cookie和UA")
|
||||
return ""
|
||||
return RequestUtils.get_decoded_html_content(res,
|
||||
settings.ENCODING_DETECTION_COMPATIBLE_MODE,
|
||||
settings.ENCODING_DETECTION_PERFORMANCE_MODE,
|
||||
settings.ENCODING_DETECTION_MIN_CONFIDENCE)
|
||||
|
||||
return ""
|
||||
|
||||
@@ -250,7 +250,7 @@ class TorrentSpider:
|
||||
proxies=self.proxies
|
||||
).get_res(searchurl, allow_redirects=True)
|
||||
page_source = RequestUtils.get_decoded_html_content(ret,
|
||||
settings.ENCODING_DETECTION_COMPATIBLE_MODE,
|
||||
settings.ENCODING_DETECTION_PERFORMANCE_MODE,
|
||||
settings.ENCODING_DETECTION_MIN_CONFIDENCE)
|
||||
|
||||
# 解析
|
||||
|
||||
@@ -278,18 +278,18 @@ class RequestUtils:
|
||||
|
||||
@staticmethod
|
||||
def detect_encoding_from_html_response(response: Response,
|
||||
compatible_mode: bool = False, confidence_threshold: float = 0.8):
|
||||
performance_mode: bool = False, confidence_threshold: float = 0.8):
|
||||
"""
|
||||
根据HTML响应内容探测编码信息
|
||||
|
||||
:param response: HTTP 响应对象
|
||||
:param compatible_mode: 是否使用兼容模式,默认为 False (性能模式)
|
||||
:param performance_mode: 是否使用性能模式,默认为 False (兼容模式)
|
||||
:param confidence_threshold: chardet 检测置信度阈值,默认为 0.8
|
||||
:return: 解析得到的字符编码
|
||||
"""
|
||||
fallback_encoding = None
|
||||
try:
|
||||
if compatible_mode:
|
||||
if not performance_mode:
|
||||
# 兼容模式:使用chardet分析后,再处理 BOM 和 meta 信息
|
||||
# 1. 使用 chardet 库进一步分析内容
|
||||
detection = chardet.detect(response.content)
|
||||
@@ -349,12 +349,12 @@ class RequestUtils:
|
||||
|
||||
@staticmethod
|
||||
def get_decoded_html_content(response: Response,
|
||||
compatible_mode: bool = False, confidence_threshold: float = 0.8) -> str:
|
||||
performance_mode: bool = False, confidence_threshold: float = 0.8) -> str:
|
||||
"""
|
||||
获取HTML响应的解码文本内容
|
||||
|
||||
:param response: HTTP 响应对象
|
||||
:param compatible_mode: 是否使用兼容模式,默认为 False (性能模式)
|
||||
:param performance_mode: 是否使用性能模式,默认为 False (兼容模式)
|
||||
:param confidence_threshold: chardet 检测置信度阈值,默认为 0.8
|
||||
:return: 解码后的响应文本内容
|
||||
"""
|
||||
@@ -363,7 +363,7 @@ class RequestUtils:
|
||||
return ""
|
||||
if response.content:
|
||||
# 1. 获取编码信息
|
||||
encoding = (RequestUtils.detect_encoding_from_html_response(response, compatible_mode,
|
||||
encoding = (RequestUtils.detect_encoding_from_html_response(response, performance_mode,
|
||||
confidence_threshold)
|
||||
or response.apparent_encoding)
|
||||
# 2. 根据解析得到的编码进行解码
|
||||
|
||||
Reference in New Issue
Block a user