fix 索引乱码问题
This commit is contained in:
parent
db4b9fcef8
commit
1e82ecc16c
@ -4,6 +4,7 @@ import re
|
||||
from typing import List
|
||||
from urllib.parse import quote, urlencode
|
||||
|
||||
import chardet
|
||||
from jinja2 import Template
|
||||
from pyquery import PyQuery
|
||||
from ruamel.yaml import CommentedMap
|
||||
@ -216,6 +217,7 @@ class TorrentSpider:
|
||||
logger.info(f"开始请求:{searchurl}")
|
||||
|
||||
if self.render:
|
||||
# 浏览器仿真
|
||||
page_source = PlaywrightHelper().get_page_source(
|
||||
url=searchurl,
|
||||
cookies=self.cookie,
|
||||
@ -223,6 +225,7 @@ class TorrentSpider:
|
||||
proxy=self.proxies
|
||||
)
|
||||
else:
|
||||
# requests请求
|
||||
ret = RequestUtils(
|
||||
ua=self.ua,
|
||||
cookies=self.cookie,
|
||||
@ -231,7 +234,15 @@ class TorrentSpider:
|
||||
proxies=self.proxies
|
||||
).get_res(searchurl, allow_redirects=True)
|
||||
|
||||
page_source = ret.text if ret else None
|
||||
# 使用chardet检测字符编码
|
||||
raw_data = ret.content
|
||||
if raw_data:
|
||||
result = chardet.detect(raw_data)
|
||||
encoding = result['encoding']
|
||||
# 解码为字符串
|
||||
page_source = raw_data.decode(encoding)
|
||||
else:
|
||||
page_source = ""
|
||||
|
||||
# 解析
|
||||
return self.parse(page_source)
|
||||
|
@ -1,8 +1,8 @@
|
||||
from typing import Union, Any
|
||||
from typing import Union, Any, Optional
|
||||
|
||||
import requests
|
||||
import urllib3
|
||||
from requests import Session
|
||||
from requests import Session, Response
|
||||
from urllib3.exceptions import InsecureRequestWarning
|
||||
|
||||
urllib3.disable_warnings(InsecureRequestWarning)
|
||||
@ -48,7 +48,7 @@ class RequestUtils:
|
||||
if timeout:
|
||||
self._timeout = timeout
|
||||
|
||||
def post(self, url: str, data: Any = None, json: dict = None):
|
||||
def post(self, url: str, data: Any = None, json: dict = None) -> Optional[Response]:
|
||||
if json is None:
|
||||
json = {}
|
||||
try:
|
||||
@ -71,7 +71,7 @@ class RequestUtils:
|
||||
except requests.exceptions.RequestException:
|
||||
return None
|
||||
|
||||
def get(self, url: str, params: dict = None):
|
||||
def get(self, url: str, params: dict = None) -> Optional[str]:
|
||||
try:
|
||||
if self._session:
|
||||
r = self._session.get(url,
|
||||
@ -91,7 +91,8 @@ class RequestUtils:
|
||||
except requests.exceptions.RequestException:
|
||||
return None
|
||||
|
||||
def get_res(self, url: str, params: dict = None, allow_redirects: bool = True, raise_exception: bool = False):
|
||||
def get_res(self, url: str, params: dict = None,
|
||||
allow_redirects: bool = True, raise_exception: bool = False) -> Optional[Response]:
|
||||
try:
|
||||
if self._session:
|
||||
return self._session.get(url,
|
||||
@ -116,9 +117,10 @@ class RequestUtils:
|
||||
raise requests.exceptions.RequestException
|
||||
return None
|
||||
|
||||
def post_res(self, url: str, data: Any = None, params: dict = None, allow_redirects: bool = True,
|
||||
def post_res(self, url: str, data: Any = None, params: dict = None,
|
||||
allow_redirects: bool = True,
|
||||
files: Any = None,
|
||||
json: dict = None):
|
||||
json: dict = None) -> Optional[Response]:
|
||||
try:
|
||||
if self._session:
|
||||
return self._session.post(url,
|
||||
@ -148,7 +150,7 @@ class RequestUtils:
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def cookie_parse(cookies_str: str, array: bool = False):
|
||||
def cookie_parse(cookies_str: str, array: bool = False) -> dict:
|
||||
"""
|
||||
解析cookie,转化为字典或者数组
|
||||
:param cookies_str: cookie字符串
|
||||
|
@ -38,3 +38,4 @@ cf_clearance~=0.29.2
|
||||
torrentool~=1.2.0
|
||||
slack_bolt~=1.18.0
|
||||
slack_sdk~=3.21.3
|
||||
chardet~=4.0.0
|
Loading…
x
Reference in New Issue
Block a user