From 1558613772985dbb8e026ec5c768a653d2f16329 Mon Sep 17 00:00:00 2001 From: jxxghp Date: Fri, 9 Jun 2023 20:19:23 +0800 Subject: [PATCH] remove selenium --- app/helper/cloudflare.py | 44 ++++++++ app/plugins/autosignin/__init__.py | 67 +++++++----- app/plugins/sitestatistic/__init__.py | 147 ++++++++++++++------------ requirements.txt | 1 - 4 files changed, 162 insertions(+), 97 deletions(-) create mode 100644 app/helper/cloudflare.py diff --git a/app/helper/cloudflare.py b/app/helper/cloudflare.py new file mode 100644 index 00000000..dc4a57c7 --- /dev/null +++ b/app/helper/cloudflare.py @@ -0,0 +1,44 @@ +import os + +from pyquery import PyQuery + +from app.log import logger + +CHALLENGE_TITLES = [ + # Cloudflare + 'Just a moment...', + '请稍候…', + # DDoS-GUARD + 'DDOS-GUARD', +] +CHALLENGE_SELECTORS = [ + # Cloudflare + '#cf-challenge-running', '.ray_id', '.attack-box', '#cf-please-wait', '#challenge-spinner', '#trk_jschal_js', + # Custom CloudFlare for EbookParadijs, Film-Paleis, MuziekFabriek and Puur-Hollands + 'td.info #js_info', + # Fairlane / pararius.com + 'div.vc div.text-box h2' +] +SHORT_TIMEOUT = 6 +CF_TIMEOUT = int(os.getenv("NASTOOL_CF_TIMEOUT", "60")) + + +def under_challenge(html_text: str): + """ + Check if the page is under challenge + :param html_text: + :return: + """ + # get the page title + if not html_text: + return False + page_title = PyQuery(html_text)('title').text() + logger.debug("under_challenge page_title=" + page_title) + for title in CHALLENGE_TITLES: + if page_title.lower() == title.lower(): + return True + for selector in CHALLENGE_SELECTORS: + html_doc = PyQuery(html_text) + if html_doc(selector): + return True + return False diff --git a/app/plugins/autosignin/__init__.py b/app/plugins/autosignin/__init__.py index be038894..08b5c884 100644 --- a/app/plugins/autosignin/__init__.py +++ b/app/plugins/autosignin/__init__.py @@ -9,8 +9,9 @@ from ruamel.yaml import CommentedMap from app.core.event_manager import EventManager, eventmanager from app.core.config import settings -from app.helper.module import ModuleHelper +from app.helper.browser import PlaywrightHelper from app.helper.cloudflare import under_challenge +from app.helper.module import ModuleHelper from app.helper.sites import SitesHelper from app.log import logger from app.plugins import _PluginBase @@ -136,6 +137,8 @@ class AutoSignIn(_PluginBase): site_url = site_info.get("url") site_cookie = site_info.get("cookie") ua = site_info.get("ua") + render = site_info.get("render") + proxy = settings.PROXY if site_info.get("proxy") else None if not site_url or not site_cookie: logger.warn(f"未配置 {site} 的站点地址或Cookie,无法签到") return "" @@ -147,36 +150,46 @@ class AutoSignIn(_PluginBase): # 拼登签到地址 checkin_url = urljoin(site_url, "attendance.php") logger.info(f"开始站点签到:{site},地址:{checkin_url}...") - res = RequestUtils(cookies=site_cookie, - headers=ua, - proxies=settings.PROXY if site_info.get("proxy") else None - ).get_res(url=checkin_url) - if not res and site_url != checkin_url: - logger.info(f"开始站点模拟登录:{site},地址:{site_url}...") + if render: + page_source = PlaywrightHelper().get_page_source(url=checkin_url, + cookies=site_cookie, + ua=ua, + proxy=proxy) + if not SiteUtils.is_logged_in(page_source): + if under_challenge(page_source): + return f"【{site}】无法通过Cloudflare!" + return f"【{site}】仿真登录失败,Cookie已失效!" + else: res = RequestUtils(cookies=site_cookie, headers=ua, - proxies=settings.PROXY if site_info.get("proxy") else None - ).get_res(url=site_url) - # 判断登录状态 - if res and res.status_code in [200, 500, 403]: - if not SiteUtils.is_logged_in(res.text): - if under_challenge(res.text): - msg = "站点被Cloudflare防护,请更换Cookie和UA!" - elif res.status_code == 200: - msg = "Cookie已失效" + proxies=proxy + ).get_res(url=checkin_url) + if not res and site_url != checkin_url: + logger.info(f"开始站点模拟登录:{site},地址:{site_url}...") + res = RequestUtils(cookies=site_cookie, + headers=ua, + proxies=settings.PROXY if site_info.get("proxy") else None + ).get_res(url=site_url) + # 判断登录状态 + if res and res.status_code in [200, 500, 403]: + if not SiteUtils.is_logged_in(res.text): + if under_challenge(res.text): + msg = "站点被Cloudflare防护,请打开站点浏览器仿真!" + elif res.status_code == 200: + msg = "Cookie已失效!" + else: + msg = f"状态码:{res.status_code}" + logger.warn(f"{site} 签到失败,{msg}") + return f"【{site}】签到失败,{msg}!" else: - msg = f"状态码:{res.status_code}" - logger.warn(f"{site} 签到失败,{msg}") - return f"【{site}】签到失败,{msg}!" + logger.info(f"{site} 签到成功") + return f"【{site}】签到成功" + elif res is not None: + logger.warn(f"{site} 签到失败,状态码:{res.status_code}") + return f"【{site}】签到失败,状态码:{res.status_code}!" else: - logger.info(f"{site} 签到成功") - return f"【{site}】签到成功" - elif res is not None: - logger.warn(f"{site} 签到失败,状态码:{res.status_code}") - return f"【{site}】签到失败,状态码:{res.status_code}!" - else: - logger.warn(f"{site} 签到失败,无法打开网站") - return f"【{site}】签到失败,无法打开网站!" + logger.warn(f"{site} 签到失败,无法打开网站") + return f"【{site}】签到失败,无法打开网站!" except Exception as e: logger.warn("%s 签到失败:%s" % (site, str(e))) return f"【{site}】签到失败:{str(e)}!" diff --git a/app/plugins/sitestatistic/__init__.py b/app/plugins/sitestatistic/__init__.py index 558fc9a2..8348345a 100644 --- a/app/plugins/sitestatistic/__init__.py +++ b/app/plugins/sitestatistic/__init__.py @@ -10,6 +10,7 @@ from ruamel.yaml import CommentedMap from app.core.config import settings from app.core.event_manager import eventmanager from app.core.event_manager import Event +from app.helper.browser import PlaywrightHelper from app.helper.module import ModuleHelper from app.helper.sites import SitesHelper from app.log import logger @@ -87,74 +88,89 @@ class SiteStatistic(_PluginBase): logger.error(f"站点匹配失败 {e}") return None - def build(self, url: str, site_name: str, - site_cookie: str = None, - ua: str = None, - proxy: bool = False) -> Optional[ISiteUserInfo]: + def build(self, site_info: CommentedMap) -> Optional[ISiteUserInfo]: """ 构建站点信息 """ + site_cookie = site_info.get("cookie") if not site_cookie: return None + site_name = site_info.get("name") + url = site_info.get("url") + proxy = site_info.get("proxy") + ua = site_info.get("ua") session = requests.Session() - logger.debug(f"站点 {site_name} url={url} site_cookie={site_cookie} ua={ua}") proxies = settings.PROXY if proxy else None - res = RequestUtils(cookies=site_cookie, - session=session, - headers=ua, - proxies=proxies - ).get_res(url=url) - if res and res.status_code == 200: - if "charset=utf-8" in res.text or "charset=UTF-8" in res.text: - res.encoding = "UTF-8" - else: - res.encoding = res.apparent_encoding - html_text = res.text - # 第一次登录反爬 - if html_text.find("title") == -1: - i = html_text.find("window.location") - if i == -1: - return None - tmp_url = url + html_text[i:html_text.find(";")] \ - .replace("\"", "").replace("+", "").replace(" ", "").replace("window.location=", "") - res = RequestUtils(cookies=site_cookie, - session=session, - headers=ua, - proxies=proxies - ).get_res(url=tmp_url) - if res and res.status_code == 200: - if "charset=utf-8" in res.text or "charset=UTF-8" in res.text: - res.encoding = "UTF-8" - else: - res.encoding = res.apparent_encoding - html_text = res.text - if not html_text: - return None - else: - logger.error("站点 %s 被反爬限制:%s, 状态码:%s" % (site_name, url, res.status_code)) - return None + render = site_info.get("render") - # 兼容假首页情况,假首页通常没有