diff --git a/app/modules/indexer/spider.py b/app/modules/indexer/spider.py index ee1b2ea9..0b4ae684 100644 --- a/app/modules/indexer/spider.py +++ b/app/modules/indexer/spider.py @@ -262,7 +262,12 @@ class TorrentSpider: # 解码为字符串 page_source = raw_data.decode(encoding) except Exception as e: - logger.error(f"chardet解码失败:{e}") + logger.debug(f"chardet解码失败:{e}") + # 探测utf-8解码 + if re.search(r"charset=\"?utf-8\"?", ret.text, re.IGNORECASE): + ret.encoding = "utf-8" + else: + ret.encoding = ret.apparent_encoding page_source = ret.text else: page_source = ret.text diff --git a/app/plugins/sitestatistic/__init__.py b/app/plugins/sitestatistic/__init__.py index 5d7426de..5ff3b870 100644 --- a/app/plugins/sitestatistic/__init__.py +++ b/app/plugins/sitestatistic/__init__.py @@ -1,3 +1,4 @@ +import re import warnings from datetime import datetime, timedelta from multiprocessing.dummy import Pool as ThreadPool @@ -853,8 +854,8 @@ class SiteStatistic(_PluginBase): proxies=proxies ).get_res(url=url) if res and res.status_code == 200: - if "charset=utf-8" in res.text or "charset=UTF-8" in res.text: - res.encoding = "UTF-8" + if re.search(r"charset=\"?utf-8\"?", res.text, re.IGNORECASE): + res.encoding = "utf-8" else: res.encoding = res.apparent_encoding html_text = res.text @@ -893,8 +894,8 @@ class SiteStatistic(_PluginBase): proxies=proxies ).get_res(url=url + "/index.php") if res and res.status_code == 200: - if "charset=utf-8" in res.text or "charset=UTF-8" in res.text: - res.encoding = "UTF-8" + if re.search(r"charset=\"?utf-8\"?", res.text, re.IGNORECASE): + res.encoding = "utf-8" else: res.encoding = res.apparent_encoding html_text = res.text diff --git a/app/plugins/sitestatistic/siteuserinfo/__init__.py b/app/plugins/sitestatistic/siteuserinfo/__init__.py index ada4c657..fbeff1ae 100644 --- a/app/plugins/sitestatistic/siteuserinfo/__init__.py +++ b/app/plugins/sitestatistic/siteuserinfo/__init__.py @@ -247,8 +247,8 @@ class ISiteUserInfo(metaclass=ABCMeta): logger.warn( f"{self.site_name} 检测到Cloudflare,请更新Cookie和UA") return "" - if "charset=utf-8" in res.text or "charset=UTF-8" in res.text: - res.encoding = "UTF-8" + if re.search(r"charset=\"?utf-8\"?", res.text, re.IGNORECASE): + res.encoding = "utf-8" else: res.encoding = res.apparent_encoding return res.text