fix 索引乱码问题
This commit is contained in:
parent
db4b9fcef8
commit
1e82ecc16c
@ -4,6 +4,7 @@ import re
|
|||||||
from typing import List
|
from typing import List
|
||||||
from urllib.parse import quote, urlencode
|
from urllib.parse import quote, urlencode
|
||||||
|
|
||||||
|
import chardet
|
||||||
from jinja2 import Template
|
from jinja2 import Template
|
||||||
from pyquery import PyQuery
|
from pyquery import PyQuery
|
||||||
from ruamel.yaml import CommentedMap
|
from ruamel.yaml import CommentedMap
|
||||||
@ -216,6 +217,7 @@ class TorrentSpider:
|
|||||||
logger.info(f"开始请求:{searchurl}")
|
logger.info(f"开始请求:{searchurl}")
|
||||||
|
|
||||||
if self.render:
|
if self.render:
|
||||||
|
# 浏览器仿真
|
||||||
page_source = PlaywrightHelper().get_page_source(
|
page_source = PlaywrightHelper().get_page_source(
|
||||||
url=searchurl,
|
url=searchurl,
|
||||||
cookies=self.cookie,
|
cookies=self.cookie,
|
||||||
@ -223,6 +225,7 @@ class TorrentSpider:
|
|||||||
proxy=self.proxies
|
proxy=self.proxies
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
|
# requests请求
|
||||||
ret = RequestUtils(
|
ret = RequestUtils(
|
||||||
ua=self.ua,
|
ua=self.ua,
|
||||||
cookies=self.cookie,
|
cookies=self.cookie,
|
||||||
@ -230,8 +233,16 @@ class TorrentSpider:
|
|||||||
referer=self.referer,
|
referer=self.referer,
|
||||||
proxies=self.proxies
|
proxies=self.proxies
|
||||||
).get_res(searchurl, allow_redirects=True)
|
).get_res(searchurl, allow_redirects=True)
|
||||||
|
|
||||||
page_source = ret.text if ret else None
|
# 使用chardet检测字符编码
|
||||||
|
raw_data = ret.content
|
||||||
|
if raw_data:
|
||||||
|
result = chardet.detect(raw_data)
|
||||||
|
encoding = result['encoding']
|
||||||
|
# 解码为字符串
|
||||||
|
page_source = raw_data.decode(encoding)
|
||||||
|
else:
|
||||||
|
page_source = ""
|
||||||
|
|
||||||
# 解析
|
# 解析
|
||||||
return self.parse(page_source)
|
return self.parse(page_source)
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
from typing import Union, Any
|
from typing import Union, Any, Optional
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
import urllib3
|
import urllib3
|
||||||
from requests import Session
|
from requests import Session, Response
|
||||||
from urllib3.exceptions import InsecureRequestWarning
|
from urllib3.exceptions import InsecureRequestWarning
|
||||||
|
|
||||||
urllib3.disable_warnings(InsecureRequestWarning)
|
urllib3.disable_warnings(InsecureRequestWarning)
|
||||||
@ -48,7 +48,7 @@ class RequestUtils:
|
|||||||
if timeout:
|
if timeout:
|
||||||
self._timeout = timeout
|
self._timeout = timeout
|
||||||
|
|
||||||
def post(self, url: str, data: Any = None, json: dict = None):
|
def post(self, url: str, data: Any = None, json: dict = None) -> Optional[Response]:
|
||||||
if json is None:
|
if json is None:
|
||||||
json = {}
|
json = {}
|
||||||
try:
|
try:
|
||||||
@ -71,7 +71,7 @@ class RequestUtils:
|
|||||||
except requests.exceptions.RequestException:
|
except requests.exceptions.RequestException:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def get(self, url: str, params: dict = None):
|
def get(self, url: str, params: dict = None) -> Optional[str]:
|
||||||
try:
|
try:
|
||||||
if self._session:
|
if self._session:
|
||||||
r = self._session.get(url,
|
r = self._session.get(url,
|
||||||
@ -91,7 +91,8 @@ class RequestUtils:
|
|||||||
except requests.exceptions.RequestException:
|
except requests.exceptions.RequestException:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def get_res(self, url: str, params: dict = None, allow_redirects: bool = True, raise_exception: bool = False):
|
def get_res(self, url: str, params: dict = None,
|
||||||
|
allow_redirects: bool = True, raise_exception: bool = False) -> Optional[Response]:
|
||||||
try:
|
try:
|
||||||
if self._session:
|
if self._session:
|
||||||
return self._session.get(url,
|
return self._session.get(url,
|
||||||
@ -116,9 +117,10 @@ class RequestUtils:
|
|||||||
raise requests.exceptions.RequestException
|
raise requests.exceptions.RequestException
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def post_res(self, url: str, data: Any = None, params: dict = None, allow_redirects: bool = True,
|
def post_res(self, url: str, data: Any = None, params: dict = None,
|
||||||
|
allow_redirects: bool = True,
|
||||||
files: Any = None,
|
files: Any = None,
|
||||||
json: dict = None):
|
json: dict = None) -> Optional[Response]:
|
||||||
try:
|
try:
|
||||||
if self._session:
|
if self._session:
|
||||||
return self._session.post(url,
|
return self._session.post(url,
|
||||||
@ -148,7 +150,7 @@ class RequestUtils:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def cookie_parse(cookies_str: str, array: bool = False):
|
def cookie_parse(cookies_str: str, array: bool = False) -> dict:
|
||||||
"""
|
"""
|
||||||
解析cookie,转化为字典或者数组
|
解析cookie,转化为字典或者数组
|
||||||
:param cookies_str: cookie字符串
|
:param cookies_str: cookie字符串
|
||||||
|
@ -38,3 +38,4 @@ cf_clearance~=0.29.2
|
|||||||
torrentool~=1.2.0
|
torrentool~=1.2.0
|
||||||
slack_bolt~=1.18.0
|
slack_bolt~=1.18.0
|
||||||
slack_sdk~=3.21.3
|
slack_sdk~=3.21.3
|
||||||
|
chardet~=4.0.0
|
Loading…
x
Reference in New Issue
Block a user