fix 索引乱码问题

This commit is contained in:
jxxghp
2023-06-11 18:58:29 +08:00
parent db4b9fcef8
commit 1e82ecc16c
3 changed files with 24 additions and 10 deletions

View File

@ -4,6 +4,7 @@ import re
from typing import List
from urllib.parse import quote, urlencode
import chardet
from jinja2 import Template
from pyquery import PyQuery
from ruamel.yaml import CommentedMap
@ -216,6 +217,7 @@ class TorrentSpider:
logger.info(f"开始请求:{searchurl}")
if self.render:
# 浏览器仿真
page_source = PlaywrightHelper().get_page_source(
url=searchurl,
cookies=self.cookie,
@ -223,6 +225,7 @@ class TorrentSpider:
proxy=self.proxies
)
else:
# requests请求
ret = RequestUtils(
ua=self.ua,
cookies=self.cookie,
@ -230,8 +233,16 @@ class TorrentSpider:
referer=self.referer,
proxies=self.proxies
).get_res(searchurl, allow_redirects=True)
page_source = ret.text if ret else None
# 使用chardet检测字符编码
raw_data = ret.content
if raw_data:
result = chardet.detect(raw_data)
encoding = result['encoding']
# 解码为字符串
page_source = raw_data.decode(encoding)
else:
page_source = ""
# 解析
return self.parse(page_source)