feat(encoding): enhance encoding detection with confidence threshold

This commit is contained in:
InfinityPacer
2024-11-27 12:33:57 +08:00
parent 2086651dbe
commit 83fc474dbe
4 changed files with 117 additions and 27 deletions

View File

@@ -344,11 +344,9 @@ class SiteParserBase(metaclass=ABCMeta):
logger.warn(
f"{self._site_name} 检测到Cloudflare请更新Cookie和UA")
return ""
if re.search(r"charset=\"?utf-8\"?", res.text, re.IGNORECASE):
res.encoding = "utf-8"
else:
res.encoding = res.apparent_encoding
return res.text
return RequestUtils.get_decoded_html_content(res,
settings.ENCODING_DETECTION_COMPATIBLE_MODE,
settings.ENCODING_DETECTION_MIN_CONFIDENCE)
return ""