remove selenium
This commit is contained in:
44
app/helper/cloudflare.py
Normal file
44
app/helper/cloudflare.py
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
from pyquery import PyQuery
|
||||||
|
|
||||||
|
from app.log import logger
|
||||||
|
|
||||||
|
CHALLENGE_TITLES = [
|
||||||
|
# Cloudflare
|
||||||
|
'Just a moment...',
|
||||||
|
'请稍候…',
|
||||||
|
# DDoS-GUARD
|
||||||
|
'DDOS-GUARD',
|
||||||
|
]
|
||||||
|
CHALLENGE_SELECTORS = [
|
||||||
|
# Cloudflare
|
||||||
|
'#cf-challenge-running', '.ray_id', '.attack-box', '#cf-please-wait', '#challenge-spinner', '#trk_jschal_js',
|
||||||
|
# Custom CloudFlare for EbookParadijs, Film-Paleis, MuziekFabriek and Puur-Hollands
|
||||||
|
'td.info #js_info',
|
||||||
|
# Fairlane / pararius.com
|
||||||
|
'div.vc div.text-box h2'
|
||||||
|
]
|
||||||
|
SHORT_TIMEOUT = 6
|
||||||
|
CF_TIMEOUT = int(os.getenv("NASTOOL_CF_TIMEOUT", "60"))
|
||||||
|
|
||||||
|
|
||||||
|
def under_challenge(html_text: str):
|
||||||
|
"""
|
||||||
|
Check if the page is under challenge
|
||||||
|
:param html_text:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
# get the page title
|
||||||
|
if not html_text:
|
||||||
|
return False
|
||||||
|
page_title = PyQuery(html_text)('title').text()
|
||||||
|
logger.debug("under_challenge page_title=" + page_title)
|
||||||
|
for title in CHALLENGE_TITLES:
|
||||||
|
if page_title.lower() == title.lower():
|
||||||
|
return True
|
||||||
|
for selector in CHALLENGE_SELECTORS:
|
||||||
|
html_doc = PyQuery(html_text)
|
||||||
|
if html_doc(selector):
|
||||||
|
return True
|
||||||
|
return False
|
@ -9,8 +9,9 @@ from ruamel.yaml import CommentedMap
|
|||||||
|
|
||||||
from app.core.event_manager import EventManager, eventmanager
|
from app.core.event_manager import EventManager, eventmanager
|
||||||
from app.core.config import settings
|
from app.core.config import settings
|
||||||
from app.helper.module import ModuleHelper
|
from app.helper.browser import PlaywrightHelper
|
||||||
from app.helper.cloudflare import under_challenge
|
from app.helper.cloudflare import under_challenge
|
||||||
|
from app.helper.module import ModuleHelper
|
||||||
from app.helper.sites import SitesHelper
|
from app.helper.sites import SitesHelper
|
||||||
from app.log import logger
|
from app.log import logger
|
||||||
from app.plugins import _PluginBase
|
from app.plugins import _PluginBase
|
||||||
@ -136,6 +137,8 @@ class AutoSignIn(_PluginBase):
|
|||||||
site_url = site_info.get("url")
|
site_url = site_info.get("url")
|
||||||
site_cookie = site_info.get("cookie")
|
site_cookie = site_info.get("cookie")
|
||||||
ua = site_info.get("ua")
|
ua = site_info.get("ua")
|
||||||
|
render = site_info.get("render")
|
||||||
|
proxy = settings.PROXY if site_info.get("proxy") else None
|
||||||
if not site_url or not site_cookie:
|
if not site_url or not site_cookie:
|
||||||
logger.warn(f"未配置 {site} 的站点地址或Cookie,无法签到")
|
logger.warn(f"未配置 {site} 的站点地址或Cookie,无法签到")
|
||||||
return ""
|
return ""
|
||||||
@ -147,36 +150,46 @@ class AutoSignIn(_PluginBase):
|
|||||||
# 拼登签到地址
|
# 拼登签到地址
|
||||||
checkin_url = urljoin(site_url, "attendance.php")
|
checkin_url = urljoin(site_url, "attendance.php")
|
||||||
logger.info(f"开始站点签到:{site},地址:{checkin_url}...")
|
logger.info(f"开始站点签到:{site},地址:{checkin_url}...")
|
||||||
res = RequestUtils(cookies=site_cookie,
|
if render:
|
||||||
headers=ua,
|
page_source = PlaywrightHelper().get_page_source(url=checkin_url,
|
||||||
proxies=settings.PROXY if site_info.get("proxy") else None
|
cookies=site_cookie,
|
||||||
).get_res(url=checkin_url)
|
ua=ua,
|
||||||
if not res and site_url != checkin_url:
|
proxy=proxy)
|
||||||
logger.info(f"开始站点模拟登录:{site},地址:{site_url}...")
|
if not SiteUtils.is_logged_in(page_source):
|
||||||
|
if under_challenge(page_source):
|
||||||
|
return f"【{site}】无法通过Cloudflare!"
|
||||||
|
return f"【{site}】仿真登录失败,Cookie已失效!"
|
||||||
|
else:
|
||||||
res = RequestUtils(cookies=site_cookie,
|
res = RequestUtils(cookies=site_cookie,
|
||||||
headers=ua,
|
headers=ua,
|
||||||
proxies=settings.PROXY if site_info.get("proxy") else None
|
proxies=proxy
|
||||||
).get_res(url=site_url)
|
).get_res(url=checkin_url)
|
||||||
# 判断登录状态
|
if not res and site_url != checkin_url:
|
||||||
if res and res.status_code in [200, 500, 403]:
|
logger.info(f"开始站点模拟登录:{site},地址:{site_url}...")
|
||||||
if not SiteUtils.is_logged_in(res.text):
|
res = RequestUtils(cookies=site_cookie,
|
||||||
if under_challenge(res.text):
|
headers=ua,
|
||||||
msg = "站点被Cloudflare防护,请更换Cookie和UA!"
|
proxies=settings.PROXY if site_info.get("proxy") else None
|
||||||
elif res.status_code == 200:
|
).get_res(url=site_url)
|
||||||
msg = "Cookie已失效"
|
# 判断登录状态
|
||||||
|
if res and res.status_code in [200, 500, 403]:
|
||||||
|
if not SiteUtils.is_logged_in(res.text):
|
||||||
|
if under_challenge(res.text):
|
||||||
|
msg = "站点被Cloudflare防护,请打开站点浏览器仿真!"
|
||||||
|
elif res.status_code == 200:
|
||||||
|
msg = "Cookie已失效!"
|
||||||
|
else:
|
||||||
|
msg = f"状态码:{res.status_code}"
|
||||||
|
logger.warn(f"{site} 签到失败,{msg}")
|
||||||
|
return f"【{site}】签到失败,{msg}!"
|
||||||
else:
|
else:
|
||||||
msg = f"状态码:{res.status_code}"
|
logger.info(f"{site} 签到成功")
|
||||||
logger.warn(f"{site} 签到失败,{msg}")
|
return f"【{site}】签到成功"
|
||||||
return f"【{site}】签到失败,{msg}!"
|
elif res is not None:
|
||||||
|
logger.warn(f"{site} 签到失败,状态码:{res.status_code}")
|
||||||
|
return f"【{site}】签到失败,状态码:{res.status_code}!"
|
||||||
else:
|
else:
|
||||||
logger.info(f"{site} 签到成功")
|
logger.warn(f"{site} 签到失败,无法打开网站")
|
||||||
return f"【{site}】签到成功"
|
return f"【{site}】签到失败,无法打开网站!"
|
||||||
elif res is not None:
|
|
||||||
logger.warn(f"{site} 签到失败,状态码:{res.status_code}")
|
|
||||||
return f"【{site}】签到失败,状态码:{res.status_code}!"
|
|
||||||
else:
|
|
||||||
logger.warn(f"{site} 签到失败,无法打开网站")
|
|
||||||
return f"【{site}】签到失败,无法打开网站!"
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warn("%s 签到失败:%s" % (site, str(e)))
|
logger.warn("%s 签到失败:%s" % (site, str(e)))
|
||||||
return f"【{site}】签到失败:{str(e)}!"
|
return f"【{site}】签到失败:{str(e)}!"
|
||||||
|
@ -10,6 +10,7 @@ from ruamel.yaml import CommentedMap
|
|||||||
from app.core.config import settings
|
from app.core.config import settings
|
||||||
from app.core.event_manager import eventmanager
|
from app.core.event_manager import eventmanager
|
||||||
from app.core.event_manager import Event
|
from app.core.event_manager import Event
|
||||||
|
from app.helper.browser import PlaywrightHelper
|
||||||
from app.helper.module import ModuleHelper
|
from app.helper.module import ModuleHelper
|
||||||
from app.helper.sites import SitesHelper
|
from app.helper.sites import SitesHelper
|
||||||
from app.log import logger
|
from app.log import logger
|
||||||
@ -87,74 +88,89 @@ class SiteStatistic(_PluginBase):
|
|||||||
logger.error(f"站点匹配失败 {e}")
|
logger.error(f"站点匹配失败 {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def build(self, url: str, site_name: str,
|
def build(self, site_info: CommentedMap) -> Optional[ISiteUserInfo]:
|
||||||
site_cookie: str = None,
|
|
||||||
ua: str = None,
|
|
||||||
proxy: bool = False) -> Optional[ISiteUserInfo]:
|
|
||||||
"""
|
"""
|
||||||
构建站点信息
|
构建站点信息
|
||||||
"""
|
"""
|
||||||
|
site_cookie = site_info.get("cookie")
|
||||||
if not site_cookie:
|
if not site_cookie:
|
||||||
return None
|
return None
|
||||||
|
site_name = site_info.get("name")
|
||||||
|
url = site_info.get("url")
|
||||||
|
proxy = site_info.get("proxy")
|
||||||
|
ua = site_info.get("ua")
|
||||||
session = requests.Session()
|
session = requests.Session()
|
||||||
logger.debug(f"站点 {site_name} url={url} site_cookie={site_cookie} ua={ua}")
|
|
||||||
proxies = settings.PROXY if proxy else None
|
proxies = settings.PROXY if proxy else None
|
||||||
res = RequestUtils(cookies=site_cookie,
|
render = site_info.get("render")
|
||||||
session=session,
|
|
||||||
headers=ua,
|
|
||||||
proxies=proxies
|
|
||||||
).get_res(url=url)
|
|
||||||
if res and res.status_code == 200:
|
|
||||||
if "charset=utf-8" in res.text or "charset=UTF-8" in res.text:
|
|
||||||
res.encoding = "UTF-8"
|
|
||||||
else:
|
|
||||||
res.encoding = res.apparent_encoding
|
|
||||||
html_text = res.text
|
|
||||||
# 第一次登录反爬
|
|
||||||
if html_text.find("title") == -1:
|
|
||||||
i = html_text.find("window.location")
|
|
||||||
if i == -1:
|
|
||||||
return None
|
|
||||||
tmp_url = url + html_text[i:html_text.find(";")] \
|
|
||||||
.replace("\"", "").replace("+", "").replace(" ", "").replace("window.location=", "")
|
|
||||||
res = RequestUtils(cookies=site_cookie,
|
|
||||||
session=session,
|
|
||||||
headers=ua,
|
|
||||||
proxies=proxies
|
|
||||||
).get_res(url=tmp_url)
|
|
||||||
if res and res.status_code == 200:
|
|
||||||
if "charset=utf-8" in res.text or "charset=UTF-8" in res.text:
|
|
||||||
res.encoding = "UTF-8"
|
|
||||||
else:
|
|
||||||
res.encoding = res.apparent_encoding
|
|
||||||
html_text = res.text
|
|
||||||
if not html_text:
|
|
||||||
return None
|
|
||||||
else:
|
|
||||||
logger.error("站点 %s 被反爬限制:%s, 状态码:%s" % (site_name, url, res.status_code))
|
|
||||||
return None
|
|
||||||
|
|
||||||
# 兼容假首页情况,假首页通常没有 <link rel="search" 属性
|
logger.debug(f"站点 {site_name} url={url} site_cookie={site_cookie} ua={ua}")
|
||||||
if '"search"' not in html_text and '"csrf-token"' not in html_text:
|
if render:
|
||||||
res = RequestUtils(cookies=site_cookie,
|
# 演染模式
|
||||||
session=session,
|
html_text = PlaywrightHelper().get_page_source(url=url,
|
||||||
headers=ua,
|
cookies=site_cookie,
|
||||||
proxies=proxies
|
ua=ua,
|
||||||
).get_res(url=url + "/index.php")
|
proxy=proxies)
|
||||||
if res and res.status_code == 200:
|
|
||||||
if "charset=utf-8" in res.text or "charset=UTF-8" in res.text:
|
|
||||||
res.encoding = "UTF-8"
|
|
||||||
else:
|
|
||||||
res.encoding = res.apparent_encoding
|
|
||||||
html_text = res.text
|
|
||||||
if not html_text:
|
|
||||||
return None
|
|
||||||
elif res is not None:
|
|
||||||
logger.error(f"站点 {site_name} 连接失败,状态码:{res.status_code}")
|
|
||||||
return None
|
|
||||||
else:
|
else:
|
||||||
logger.error(f"站点 {site_name} 无法访问:{url}")
|
# 普通模式
|
||||||
return None
|
res = RequestUtils(cookies=site_cookie,
|
||||||
|
session=session,
|
||||||
|
headers=ua,
|
||||||
|
proxies=proxies
|
||||||
|
).get_res(url=url)
|
||||||
|
if res and res.status_code == 200:
|
||||||
|
if "charset=utf-8" in res.text or "charset=UTF-8" in res.text:
|
||||||
|
res.encoding = "UTF-8"
|
||||||
|
else:
|
||||||
|
res.encoding = res.apparent_encoding
|
||||||
|
html_text = res.text
|
||||||
|
# 第一次登录反爬
|
||||||
|
if html_text.find("title") == -1:
|
||||||
|
i = html_text.find("window.location")
|
||||||
|
if i == -1:
|
||||||
|
return None
|
||||||
|
tmp_url = url + html_text[i:html_text.find(";")]\
|
||||||
|
.replace("\"", "")\
|
||||||
|
.replace("+", "")\
|
||||||
|
.replace(" ", "")\
|
||||||
|
.replace("window.location=", "")
|
||||||
|
res = RequestUtils(cookies=site_cookie,
|
||||||
|
session=session,
|
||||||
|
headers=ua,
|
||||||
|
proxies=proxies
|
||||||
|
).get_res(url=tmp_url)
|
||||||
|
if res and res.status_code == 200:
|
||||||
|
if "charset=utf-8" in res.text or "charset=UTF-8" in res.text:
|
||||||
|
res.encoding = "UTF-8"
|
||||||
|
else:
|
||||||
|
res.encoding = res.apparent_encoding
|
||||||
|
html_text = res.text
|
||||||
|
if not html_text:
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
logger.error("站点 %s 被反爬限制:%s, 状态码:%s" % (site_name, url, res.status_code))
|
||||||
|
return None
|
||||||
|
|
||||||
|
# 兼容假首页情况,假首页通常没有 <link rel="search" 属性
|
||||||
|
if '"search"' not in html_text and '"csrf-token"' not in html_text:
|
||||||
|
res = RequestUtils(cookies=site_cookie,
|
||||||
|
session=session,
|
||||||
|
headers=ua,
|
||||||
|
proxies=proxies
|
||||||
|
).get_res(url=url + "/index.php")
|
||||||
|
if res and res.status_code == 200:
|
||||||
|
if "charset=utf-8" in res.text or "charset=UTF-8" in res.text:
|
||||||
|
res.encoding = "UTF-8"
|
||||||
|
else:
|
||||||
|
res.encoding = res.apparent_encoding
|
||||||
|
html_text = res.text
|
||||||
|
if not html_text:
|
||||||
|
return None
|
||||||
|
elif res is not None:
|
||||||
|
logger.error(f"站点 {site_name} 连接失败,状态码:{res.status_code}")
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
logger.error(f"站点 {site_name} 无法访问:{url}")
|
||||||
|
return None
|
||||||
# 解析站点类型
|
# 解析站点类型
|
||||||
site_schema = self.__build_class(html_text)
|
site_schema = self.__build_class(html_text)
|
||||||
if not site_schema:
|
if not site_schema:
|
||||||
@ -168,20 +184,13 @@ class SiteStatistic(_PluginBase):
|
|||||||
:param site_info:
|
:param site_info:
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
site_name = site_info.get("name")
|
site_name = site_info.get('name')
|
||||||
site_url = site_info.get("url")
|
site_url = site_info.get('url')
|
||||||
if not site_url:
|
if not site_url:
|
||||||
return None
|
return None
|
||||||
site_cookie = site_info.get("cookie")
|
|
||||||
ua = site_info.get("ua")
|
|
||||||
unread_msg_notify = True
|
unread_msg_notify = True
|
||||||
proxy = site_info.get("proxy")
|
|
||||||
try:
|
try:
|
||||||
site_user_info: ISiteUserInfo = self.build(url=site_url,
|
site_user_info: ISiteUserInfo = self.build(site_info=site_info)
|
||||||
site_name=site_name,
|
|
||||||
site_cookie=site_cookie,
|
|
||||||
ua=ua,
|
|
||||||
proxy=proxy)
|
|
||||||
if site_user_info:
|
if site_user_info:
|
||||||
logger.debug(f"站点 {site_name} 开始以 {site_user_info.site_schema()} 模型解析")
|
logger.debug(f"站点 {site_name} 开始以 {site_user_info.site_schema()} 模型解析")
|
||||||
# 开始解析
|
# 开始解析
|
||||||
|
@ -31,7 +31,6 @@ transmission-rpc~=4.3.0
|
|||||||
Jinja2~=3.1.2
|
Jinja2~=3.1.2
|
||||||
pyparsing~=3.0.9
|
pyparsing~=3.0.9
|
||||||
func_timeout==4.3.5
|
func_timeout==4.3.5
|
||||||
selenium~=4.9.1
|
|
||||||
bs4~=0.0.1
|
bs4~=0.0.1
|
||||||
beautifulsoup4~=4.12.2
|
beautifulsoup4~=4.12.2
|
||||||
pillow~=9.5.0
|
pillow~=9.5.0
|
||||||
|
Reference in New Issue
Block a user