remove selenium

This commit is contained in:
jxxghp
2023-06-09 20:19:23 +08:00
parent bef1b6918a
commit 1558613772
4 changed files with 162 additions and 97 deletions

44
app/helper/cloudflare.py Normal file
View File

@ -0,0 +1,44 @@
import os
from pyquery import PyQuery
from app.log import logger
CHALLENGE_TITLES = [
# Cloudflare
'Just a moment...',
'请稍候…',
# DDoS-GUARD
'DDOS-GUARD',
]
CHALLENGE_SELECTORS = [
# Cloudflare
'#cf-challenge-running', '.ray_id', '.attack-box', '#cf-please-wait', '#challenge-spinner', '#trk_jschal_js',
# Custom CloudFlare for EbookParadijs, Film-Paleis, MuziekFabriek and Puur-Hollands
'td.info #js_info',
# Fairlane / pararius.com
'div.vc div.text-box h2'
]
SHORT_TIMEOUT = 6
CF_TIMEOUT = int(os.getenv("NASTOOL_CF_TIMEOUT", "60"))
def under_challenge(html_text: str):
"""
Check if the page is under challenge
:param html_text:
:return:
"""
# get the page title
if not html_text:
return False
page_title = PyQuery(html_text)('title').text()
logger.debug("under_challenge page_title=" + page_title)
for title in CHALLENGE_TITLES:
if page_title.lower() == title.lower():
return True
for selector in CHALLENGE_SELECTORS:
html_doc = PyQuery(html_text)
if html_doc(selector):
return True
return False

View File

@ -9,8 +9,9 @@ from ruamel.yaml import CommentedMap
from app.core.event_manager import EventManager, eventmanager from app.core.event_manager import EventManager, eventmanager
from app.core.config import settings from app.core.config import settings
from app.helper.module import ModuleHelper from app.helper.browser import PlaywrightHelper
from app.helper.cloudflare import under_challenge from app.helper.cloudflare import under_challenge
from app.helper.module import ModuleHelper
from app.helper.sites import SitesHelper from app.helper.sites import SitesHelper
from app.log import logger from app.log import logger
from app.plugins import _PluginBase from app.plugins import _PluginBase
@ -136,6 +137,8 @@ class AutoSignIn(_PluginBase):
site_url = site_info.get("url") site_url = site_info.get("url")
site_cookie = site_info.get("cookie") site_cookie = site_info.get("cookie")
ua = site_info.get("ua") ua = site_info.get("ua")
render = site_info.get("render")
proxy = settings.PROXY if site_info.get("proxy") else None
if not site_url or not site_cookie: if not site_url or not site_cookie:
logger.warn(f"未配置 {site} 的站点地址或Cookie无法签到") logger.warn(f"未配置 {site} 的站点地址或Cookie无法签到")
return "" return ""
@ -147,36 +150,46 @@ class AutoSignIn(_PluginBase):
# 拼登签到地址 # 拼登签到地址
checkin_url = urljoin(site_url, "attendance.php") checkin_url = urljoin(site_url, "attendance.php")
logger.info(f"开始站点签到:{site},地址:{checkin_url}...") logger.info(f"开始站点签到:{site},地址:{checkin_url}...")
res = RequestUtils(cookies=site_cookie, if render:
headers=ua, page_source = PlaywrightHelper().get_page_source(url=checkin_url,
proxies=settings.PROXY if site_info.get("proxy") else None cookies=site_cookie,
).get_res(url=checkin_url) ua=ua,
if not res and site_url != checkin_url: proxy=proxy)
logger.info(f"开始站点模拟登录:{site},地址:{site_url}...") if not SiteUtils.is_logged_in(page_source):
if under_challenge(page_source):
return f"{site}】无法通过Cloudflare"
return f"{site}】仿真登录失败Cookie已失效"
else:
res = RequestUtils(cookies=site_cookie, res = RequestUtils(cookies=site_cookie,
headers=ua, headers=ua,
proxies=settings.PROXY if site_info.get("proxy") else None proxies=proxy
).get_res(url=site_url) ).get_res(url=checkin_url)
# 判断登录状态 if not res and site_url != checkin_url:
if res and res.status_code in [200, 500, 403]: logger.info(f"开始站点模拟登录:{site},地址:{site_url}...")
if not SiteUtils.is_logged_in(res.text): res = RequestUtils(cookies=site_cookie,
if under_challenge(res.text): headers=ua,
msg = "站点被Cloudflare防护请更换Cookie和UA" proxies=settings.PROXY if site_info.get("proxy") else None
elif res.status_code == 200: ).get_res(url=site_url)
msg = "Cookie已失效" # 判断登录状态
if res and res.status_code in [200, 500, 403]:
if not SiteUtils.is_logged_in(res.text):
if under_challenge(res.text):
msg = "站点被Cloudflare防护请打开站点浏览器仿真"
elif res.status_code == 200:
msg = "Cookie已失效"
else:
msg = f"状态码:{res.status_code}"
logger.warn(f"{site} 签到失败,{msg}")
return f"{site}】签到失败,{msg}"
else: else:
msg = f"状态码:{res.status_code}" logger.info(f"{site} 签到成功")
logger.warn(f"{site} 签到失败,{msg}") return f"{site}签到成功"
return f"{site}】签到失败,{msg}" elif res is not None:
logger.warn(f"{site} 签到失败,状态码:{res.status_code}")
return f"{site}】签到失败,状态码:{res.status_code}"
else: else:
logger.info(f"{site} 签到成功") logger.warn(f"{site} 签到失败,无法打开网站")
return f"{site}】签到成功" return f"{site}】签到失败,无法打开网站!"
elif res is not None:
logger.warn(f"{site} 签到失败,状态码:{res.status_code}")
return f"{site}】签到失败,状态码:{res.status_code}"
else:
logger.warn(f"{site} 签到失败,无法打开网站")
return f"{site}】签到失败,无法打开网站!"
except Exception as e: except Exception as e:
logger.warn("%s 签到失败:%s" % (site, str(e))) logger.warn("%s 签到失败:%s" % (site, str(e)))
return f"{site}】签到失败:{str(e)}" return f"{site}】签到失败:{str(e)}"

View File

@ -10,6 +10,7 @@ from ruamel.yaml import CommentedMap
from app.core.config import settings from app.core.config import settings
from app.core.event_manager import eventmanager from app.core.event_manager import eventmanager
from app.core.event_manager import Event from app.core.event_manager import Event
from app.helper.browser import PlaywrightHelper
from app.helper.module import ModuleHelper from app.helper.module import ModuleHelper
from app.helper.sites import SitesHelper from app.helper.sites import SitesHelper
from app.log import logger from app.log import logger
@ -87,74 +88,89 @@ class SiteStatistic(_PluginBase):
logger.error(f"站点匹配失败 {e}") logger.error(f"站点匹配失败 {e}")
return None return None
def build(self, url: str, site_name: str, def build(self, site_info: CommentedMap) -> Optional[ISiteUserInfo]:
site_cookie: str = None,
ua: str = None,
proxy: bool = False) -> Optional[ISiteUserInfo]:
""" """
构建站点信息 构建站点信息
""" """
site_cookie = site_info.get("cookie")
if not site_cookie: if not site_cookie:
return None return None
site_name = site_info.get("name")
url = site_info.get("url")
proxy = site_info.get("proxy")
ua = site_info.get("ua")
session = requests.Session() session = requests.Session()
logger.debug(f"站点 {site_name} url={url} site_cookie={site_cookie} ua={ua}")
proxies = settings.PROXY if proxy else None proxies = settings.PROXY if proxy else None
res = RequestUtils(cookies=site_cookie, render = site_info.get("render")
session=session,
headers=ua,
proxies=proxies
).get_res(url=url)
if res and res.status_code == 200:
if "charset=utf-8" in res.text or "charset=UTF-8" in res.text:
res.encoding = "UTF-8"
else:
res.encoding = res.apparent_encoding
html_text = res.text
# 第一次登录反爬
if html_text.find("title") == -1:
i = html_text.find("window.location")
if i == -1:
return None
tmp_url = url + html_text[i:html_text.find(";")] \
.replace("\"", "").replace("+", "").replace(" ", "").replace("window.location=", "")
res = RequestUtils(cookies=site_cookie,
session=session,
headers=ua,
proxies=proxies
).get_res(url=tmp_url)
if res and res.status_code == 200:
if "charset=utf-8" in res.text or "charset=UTF-8" in res.text:
res.encoding = "UTF-8"
else:
res.encoding = res.apparent_encoding
html_text = res.text
if not html_text:
return None
else:
logger.error("站点 %s 被反爬限制:%s, 状态码:%s" % (site_name, url, res.status_code))
return None
# 兼容假首页情况,假首页通常没有 <link rel="search" 属性 logger.debug(f"站点 {site_name} url={url} site_cookie={site_cookie} ua={ua}")
if '"search"' not in html_text and '"csrf-token"' not in html_text: if render:
res = RequestUtils(cookies=site_cookie, # 演染模式
session=session, html_text = PlaywrightHelper().get_page_source(url=url,
headers=ua, cookies=site_cookie,
proxies=proxies ua=ua,
).get_res(url=url + "/index.php") proxy=proxies)
if res and res.status_code == 200:
if "charset=utf-8" in res.text or "charset=UTF-8" in res.text:
res.encoding = "UTF-8"
else:
res.encoding = res.apparent_encoding
html_text = res.text
if not html_text:
return None
elif res is not None:
logger.error(f"站点 {site_name} 连接失败,状态码:{res.status_code}")
return None
else: else:
logger.error(f"站点 {site_name} 无法访问:{url}") # 普通模式
return None res = RequestUtils(cookies=site_cookie,
session=session,
headers=ua,
proxies=proxies
).get_res(url=url)
if res and res.status_code == 200:
if "charset=utf-8" in res.text or "charset=UTF-8" in res.text:
res.encoding = "UTF-8"
else:
res.encoding = res.apparent_encoding
html_text = res.text
# 第一次登录反爬
if html_text.find("title") == -1:
i = html_text.find("window.location")
if i == -1:
return None
tmp_url = url + html_text[i:html_text.find(";")]\
.replace("\"", "")\
.replace("+", "")\
.replace(" ", "")\
.replace("window.location=", "")
res = RequestUtils(cookies=site_cookie,
session=session,
headers=ua,
proxies=proxies
).get_res(url=tmp_url)
if res and res.status_code == 200:
if "charset=utf-8" in res.text or "charset=UTF-8" in res.text:
res.encoding = "UTF-8"
else:
res.encoding = res.apparent_encoding
html_text = res.text
if not html_text:
return None
else:
logger.error("站点 %s 被反爬限制:%s, 状态码:%s" % (site_name, url, res.status_code))
return None
# 兼容假首页情况,假首页通常没有 <link rel="search" 属性
if '"search"' not in html_text and '"csrf-token"' not in html_text:
res = RequestUtils(cookies=site_cookie,
session=session,
headers=ua,
proxies=proxies
).get_res(url=url + "/index.php")
if res and res.status_code == 200:
if "charset=utf-8" in res.text or "charset=UTF-8" in res.text:
res.encoding = "UTF-8"
else:
res.encoding = res.apparent_encoding
html_text = res.text
if not html_text:
return None
elif res is not None:
logger.error(f"站点 {site_name} 连接失败,状态码:{res.status_code}")
return None
else:
logger.error(f"站点 {site_name} 无法访问:{url}")
return None
# 解析站点类型 # 解析站点类型
site_schema = self.__build_class(html_text) site_schema = self.__build_class(html_text)
if not site_schema: if not site_schema:
@ -168,20 +184,13 @@ class SiteStatistic(_PluginBase):
:param site_info: :param site_info:
:return: :return:
""" """
site_name = site_info.get("name") site_name = site_info.get('name')
site_url = site_info.get("url") site_url = site_info.get('url')
if not site_url: if not site_url:
return None return None
site_cookie = site_info.get("cookie")
ua = site_info.get("ua")
unread_msg_notify = True unread_msg_notify = True
proxy = site_info.get("proxy")
try: try:
site_user_info: ISiteUserInfo = self.build(url=site_url, site_user_info: ISiteUserInfo = self.build(site_info=site_info)
site_name=site_name,
site_cookie=site_cookie,
ua=ua,
proxy=proxy)
if site_user_info: if site_user_info:
logger.debug(f"站点 {site_name} 开始以 {site_user_info.site_schema()} 模型解析") logger.debug(f"站点 {site_name} 开始以 {site_user_info.site_schema()} 模型解析")
# 开始解析 # 开始解析

View File

@ -31,7 +31,6 @@ transmission-rpc~=4.3.0
Jinja2~=3.1.2 Jinja2~=3.1.2
pyparsing~=3.0.9 pyparsing~=3.0.9
func_timeout==4.3.5 func_timeout==4.3.5
selenium~=4.9.1
bs4~=0.0.1 bs4~=0.0.1
beautifulsoup4~=4.12.2 beautifulsoup4~=4.12.2
pillow~=9.5.0 pillow~=9.5.0