fix #356 猫站数据统计问题

This commit is contained in:
jxxghp 2023-09-02 07:57:44 +08:00
parent 3c753686c6
commit ec8c9c996a
3 changed files with 13 additions and 7 deletions

View File

@ -262,7 +262,12 @@ class TorrentSpider:
# 解码为字符串
page_source = raw_data.decode(encoding)
except Exception as e:
logger.error(f"chardet解码失败{e}")
logger.debug(f"chardet解码失败{e}")
# 探测utf-8解码
if re.search(r"charset=\"?utf-8\"?", ret.text, re.IGNORECASE):
ret.encoding = "utf-8"
else:
ret.encoding = ret.apparent_encoding
page_source = ret.text
else:
page_source = ret.text

View File

@ -1,3 +1,4 @@
import re
import warnings
from datetime import datetime, timedelta
from multiprocessing.dummy import Pool as ThreadPool
@ -853,8 +854,8 @@ class SiteStatistic(_PluginBase):
proxies=proxies
).get_res(url=url)
if res and res.status_code == 200:
if "charset=utf-8" in res.text or "charset=UTF-8" in res.text:
res.encoding = "UTF-8"
if re.search(r"charset=\"?utf-8\"?", res.text, re.IGNORECASE):
res.encoding = "utf-8"
else:
res.encoding = res.apparent_encoding
html_text = res.text
@ -893,8 +894,8 @@ class SiteStatistic(_PluginBase):
proxies=proxies
).get_res(url=url + "/index.php")
if res and res.status_code == 200:
if "charset=utf-8" in res.text or "charset=UTF-8" in res.text:
res.encoding = "UTF-8"
if re.search(r"charset=\"?utf-8\"?", res.text, re.IGNORECASE):
res.encoding = "utf-8"
else:
res.encoding = res.apparent_encoding
html_text = res.text

View File

@ -247,8 +247,8 @@ class ISiteUserInfo(metaclass=ABCMeta):
logger.warn(
f"{self.site_name} 检测到Cloudflare请更新Cookie和UA")
return ""
if "charset=utf-8" in res.text or "charset=UTF-8" in res.text:
res.encoding = "UTF-8"
if re.search(r"charset=\"?utf-8\"?", res.text, re.IGNORECASE):
res.encoding = "utf-8"
else:
res.encoding = res.apparent_encoding
return res.text