fix 索引乱码问题

This commit is contained in:
jxxghp 2023-06-11 18:58:29 +08:00
parent db4b9fcef8
commit 1e82ecc16c
3 changed files with 24 additions and 10 deletions

View File

@ -4,6 +4,7 @@ import re
from typing import List
from urllib.parse import quote, urlencode
import chardet
from jinja2 import Template
from pyquery import PyQuery
from ruamel.yaml import CommentedMap
@ -216,6 +217,7 @@ class TorrentSpider:
logger.info(f"开始请求:{searchurl}")
if self.render:
# 浏览器仿真
page_source = PlaywrightHelper().get_page_source(
url=searchurl,
cookies=self.cookie,
@ -223,6 +225,7 @@ class TorrentSpider:
proxy=self.proxies
)
else:
# requests请求
ret = RequestUtils(
ua=self.ua,
cookies=self.cookie,
@ -231,7 +234,15 @@ class TorrentSpider:
proxies=self.proxies
).get_res(searchurl, allow_redirects=True)
page_source = ret.text if ret else None
# 使用chardet检测字符编码
raw_data = ret.content
if raw_data:
result = chardet.detect(raw_data)
encoding = result['encoding']
# 解码为字符串
page_source = raw_data.decode(encoding)
else:
page_source = ""
# 解析
return self.parse(page_source)

View File

@ -1,8 +1,8 @@
from typing import Union, Any
from typing import Union, Any, Optional
import requests
import urllib3
from requests import Session
from requests import Session, Response
from urllib3.exceptions import InsecureRequestWarning
urllib3.disable_warnings(InsecureRequestWarning)
@ -48,7 +48,7 @@ class RequestUtils:
if timeout:
self._timeout = timeout
def post(self, url: str, data: Any = None, json: dict = None):
def post(self, url: str, data: Any = None, json: dict = None) -> Optional[Response]:
if json is None:
json = {}
try:
@ -71,7 +71,7 @@ class RequestUtils:
except requests.exceptions.RequestException:
return None
def get(self, url: str, params: dict = None):
def get(self, url: str, params: dict = None) -> Optional[str]:
try:
if self._session:
r = self._session.get(url,
@ -91,7 +91,8 @@ class RequestUtils:
except requests.exceptions.RequestException:
return None
def get_res(self, url: str, params: dict = None, allow_redirects: bool = True, raise_exception: bool = False):
def get_res(self, url: str, params: dict = None,
allow_redirects: bool = True, raise_exception: bool = False) -> Optional[Response]:
try:
if self._session:
return self._session.get(url,
@ -116,9 +117,10 @@ class RequestUtils:
raise requests.exceptions.RequestException
return None
def post_res(self, url: str, data: Any = None, params: dict = None, allow_redirects: bool = True,
def post_res(self, url: str, data: Any = None, params: dict = None,
allow_redirects: bool = True,
files: Any = None,
json: dict = None):
json: dict = None) -> Optional[Response]:
try:
if self._session:
return self._session.post(url,
@ -148,7 +150,7 @@ class RequestUtils:
return None
@staticmethod
def cookie_parse(cookies_str: str, array: bool = False):
def cookie_parse(cookies_str: str, array: bool = False) -> dict:
"""
解析cookie转化为字典或者数组
:param cookies_str: cookie字符串

View File

@ -38,3 +38,4 @@ cf_clearance~=0.29.2
torrentool~=1.2.0
slack_bolt~=1.18.0
slack_sdk~=3.21.3
chardet~=4.0.0