diff --git a/app/core/config.py b/app/core/config.py index 0671eb31..ee8b08e0 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -219,8 +219,8 @@ class ConfigModel(BaseModel): BIG_MEMORY_MODE: bool = False # 全局图片缓存,将媒体图片缓存到本地 GLOBAL_IMAGE_CACHE: bool = False - # 是否启用编码探测的兼容模式 - ENCODING_DETECTION_COMPATIBLE_MODE: bool = True + # 是否启用编码探测的性能模式 + ENCODING_DETECTION_PERFORMANCE_MODE: bool = False # 编码探测的最低置信度阈值 ENCODING_DETECTION_MIN_CONFIDENCE: float = 0.8 # 允许的图片缓存域名 diff --git a/app/modules/indexer/parser/__init__.py b/app/modules/indexer/parser/__init__.py index 9feb1c1f..aeee3f95 100644 --- a/app/modules/indexer/parser/__init__.py +++ b/app/modules/indexer/parser/__init__.py @@ -345,7 +345,7 @@ class SiteParserBase(metaclass=ABCMeta): f"{self._site_name} 检测到Cloudflare,请更新Cookie和UA") return "" return RequestUtils.get_decoded_html_content(res, - settings.ENCODING_DETECTION_COMPATIBLE_MODE, + settings.ENCODING_DETECTION_PERFORMANCE_MODE, settings.ENCODING_DETECTION_MIN_CONFIDENCE) return "" diff --git a/app/modules/indexer/spider/__init__.py b/app/modules/indexer/spider/__init__.py index 34c23a6d..5f67d9c6 100644 --- a/app/modules/indexer/spider/__init__.py +++ b/app/modules/indexer/spider/__init__.py @@ -250,7 +250,7 @@ class TorrentSpider: proxies=self.proxies ).get_res(searchurl, allow_redirects=True) page_source = RequestUtils.get_decoded_html_content(ret, - settings.ENCODING_DETECTION_COMPATIBLE_MODE, + settings.ENCODING_DETECTION_PERFORMANCE_MODE, settings.ENCODING_DETECTION_MIN_CONFIDENCE) # 解析 diff --git a/app/utils/http.py b/app/utils/http.py index 98f75c56..61532f34 100644 --- a/app/utils/http.py +++ b/app/utils/http.py @@ -278,18 +278,18 @@ class RequestUtils: @staticmethod def detect_encoding_from_html_response(response: Response, - compatible_mode: bool = False, confidence_threshold: float = 0.8): + performance_mode: bool = False, confidence_threshold: float = 0.8): """ 根据HTML响应内容探测编码信息 :param response: HTTP 响应对象 - :param compatible_mode: 是否使用兼容模式,默认为 False (性能模式) + :param performance_mode: 是否使用性能模式,默认为 False (兼容模式) :param confidence_threshold: chardet 检测置信度阈值,默认为 0.8 :return: 解析得到的字符编码 """ fallback_encoding = None try: - if compatible_mode: + if not performance_mode: # 兼容模式:使用chardet分析后,再处理 BOM 和 meta 信息 # 1. 使用 chardet 库进一步分析内容 detection = chardet.detect(response.content) @@ -349,12 +349,12 @@ class RequestUtils: @staticmethod def get_decoded_html_content(response: Response, - compatible_mode: bool = False, confidence_threshold: float = 0.8) -> str: + performance_mode: bool = False, confidence_threshold: float = 0.8) -> str: """ 获取HTML响应的解码文本内容 :param response: HTTP 响应对象 - :param compatible_mode: 是否使用兼容模式,默认为 False (性能模式) + :param performance_mode: 是否使用性能模式,默认为 False (兼容模式) :param confidence_threshold: chardet 检测置信度阈值,默认为 0.8 :return: 解码后的响应文本内容 """ @@ -363,7 +363,7 @@ class RequestUtils: return "" if response.content: # 1. 获取编码信息 - encoding = (RequestUtils.detect_encoding_from_html_response(response, compatible_mode, + encoding = (RequestUtils.detect_encoding_from_html_response(response, performance_mode, confidence_threshold) or response.apparent_encoding) # 2. 根据解析得到的编码进行解码