remove selenium

This commit is contained in:
jxxghp
2023-06-09 20:19:23 +08:00
parent bef1b6918a
commit 1558613772
4 changed files with 162 additions and 97 deletions

44
app/helper/cloudflare.py Normal file
View File

@ -0,0 +1,44 @@
import os
from pyquery import PyQuery
from app.log import logger
CHALLENGE_TITLES = [
# Cloudflare
'Just a moment...',
'请稍候…',
# DDoS-GUARD
'DDOS-GUARD',
]
CHALLENGE_SELECTORS = [
# Cloudflare
'#cf-challenge-running', '.ray_id', '.attack-box', '#cf-please-wait', '#challenge-spinner', '#trk_jschal_js',
# Custom CloudFlare for EbookParadijs, Film-Paleis, MuziekFabriek and Puur-Hollands
'td.info #js_info',
# Fairlane / pararius.com
'div.vc div.text-box h2'
]
SHORT_TIMEOUT = 6
CF_TIMEOUT = int(os.getenv("NASTOOL_CF_TIMEOUT", "60"))
def under_challenge(html_text: str):
"""
Check if the page is under challenge
:param html_text:
:return:
"""
# get the page title
if not html_text:
return False
page_title = PyQuery(html_text)('title').text()
logger.debug("under_challenge page_title=" + page_title)
for title in CHALLENGE_TITLES:
if page_title.lower() == title.lower():
return True
for selector in CHALLENGE_SELECTORS:
html_doc = PyQuery(html_text)
if html_doc(selector):
return True
return False

View File

@ -9,8 +9,9 @@ from ruamel.yaml import CommentedMap
from app.core.event_manager import EventManager, eventmanager
from app.core.config import settings
from app.helper.module import ModuleHelper
from app.helper.browser import PlaywrightHelper
from app.helper.cloudflare import under_challenge
from app.helper.module import ModuleHelper
from app.helper.sites import SitesHelper
from app.log import logger
from app.plugins import _PluginBase
@ -136,6 +137,8 @@ class AutoSignIn(_PluginBase):
site_url = site_info.get("url")
site_cookie = site_info.get("cookie")
ua = site_info.get("ua")
render = site_info.get("render")
proxy = settings.PROXY if site_info.get("proxy") else None
if not site_url or not site_cookie:
logger.warn(f"未配置 {site} 的站点地址或Cookie无法签到")
return ""
@ -147,36 +150,46 @@ class AutoSignIn(_PluginBase):
# 拼登签到地址
checkin_url = urljoin(site_url, "attendance.php")
logger.info(f"开始站点签到:{site},地址:{checkin_url}...")
res = RequestUtils(cookies=site_cookie,
headers=ua,
proxies=settings.PROXY if site_info.get("proxy") else None
).get_res(url=checkin_url)
if not res and site_url != checkin_url:
logger.info(f"开始站点模拟登录:{site},地址:{site_url}...")
if render:
page_source = PlaywrightHelper().get_page_source(url=checkin_url,
cookies=site_cookie,
ua=ua,
proxy=proxy)
if not SiteUtils.is_logged_in(page_source):
if under_challenge(page_source):
return f"{site}】无法通过Cloudflare"
return f"{site}】仿真登录失败Cookie已失效"
else:
res = RequestUtils(cookies=site_cookie,
headers=ua,
proxies=settings.PROXY if site_info.get("proxy") else None
).get_res(url=site_url)
# 判断登录状态
if res and res.status_code in [200, 500, 403]:
if not SiteUtils.is_logged_in(res.text):
if under_challenge(res.text):
msg = "站点被Cloudflare防护请更换Cookie和UA"
elif res.status_code == 200:
msg = "Cookie已失效"
proxies=proxy
).get_res(url=checkin_url)
if not res and site_url != checkin_url:
logger.info(f"开始站点模拟登录:{site},地址:{site_url}...")
res = RequestUtils(cookies=site_cookie,
headers=ua,
proxies=settings.PROXY if site_info.get("proxy") else None
).get_res(url=site_url)
# 判断登录状态
if res and res.status_code in [200, 500, 403]:
if not SiteUtils.is_logged_in(res.text):
if under_challenge(res.text):
msg = "站点被Cloudflare防护请打开站点浏览器仿真"
elif res.status_code == 200:
msg = "Cookie已失效"
else:
msg = f"状态码:{res.status_code}"
logger.warn(f"{site} 签到失败,{msg}")
return f"{site}】签到失败,{msg}"
else:
msg = f"状态码:{res.status_code}"
logger.warn(f"{site} 签到失败,{msg}")
return f"{site}】签到失败,{msg}"
logger.info(f"{site} 签到成功")
return f"{site}签到成功"
elif res is not None:
logger.warn(f"{site} 签到失败,状态码:{res.status_code}")
return f"{site}】签到失败,状态码:{res.status_code}"
else:
logger.info(f"{site} 签到成功")
return f"{site}】签到成功"
elif res is not None:
logger.warn(f"{site} 签到失败,状态码:{res.status_code}")
return f"{site}】签到失败,状态码:{res.status_code}"
else:
logger.warn(f"{site} 签到失败,无法打开网站")
return f"{site}】签到失败,无法打开网站!"
logger.warn(f"{site} 签到失败,无法打开网站")
return f"{site}】签到失败,无法打开网站!"
except Exception as e:
logger.warn("%s 签到失败:%s" % (site, str(e)))
return f"{site}】签到失败:{str(e)}"

View File

@ -10,6 +10,7 @@ from ruamel.yaml import CommentedMap
from app.core.config import settings
from app.core.event_manager import eventmanager
from app.core.event_manager import Event
from app.helper.browser import PlaywrightHelper
from app.helper.module import ModuleHelper
from app.helper.sites import SitesHelper
from app.log import logger
@ -87,74 +88,89 @@ class SiteStatistic(_PluginBase):
logger.error(f"站点匹配失败 {e}")
return None
def build(self, url: str, site_name: str,
site_cookie: str = None,
ua: str = None,
proxy: bool = False) -> Optional[ISiteUserInfo]:
def build(self, site_info: CommentedMap) -> Optional[ISiteUserInfo]:
"""
构建站点信息
"""
site_cookie = site_info.get("cookie")
if not site_cookie:
return None
site_name = site_info.get("name")
url = site_info.get("url")
proxy = site_info.get("proxy")
ua = site_info.get("ua")
session = requests.Session()
logger.debug(f"站点 {site_name} url={url} site_cookie={site_cookie} ua={ua}")
proxies = settings.PROXY if proxy else None
res = RequestUtils(cookies=site_cookie,
session=session,
headers=ua,
proxies=proxies
).get_res(url=url)
if res and res.status_code == 200:
if "charset=utf-8" in res.text or "charset=UTF-8" in res.text:
res.encoding = "UTF-8"
else:
res.encoding = res.apparent_encoding
html_text = res.text
# 第一次登录反爬
if html_text.find("title") == -1:
i = html_text.find("window.location")
if i == -1:
return None
tmp_url = url + html_text[i:html_text.find(";")] \
.replace("\"", "").replace("+", "").replace(" ", "").replace("window.location=", "")
res = RequestUtils(cookies=site_cookie,
session=session,
headers=ua,
proxies=proxies
).get_res(url=tmp_url)
if res and res.status_code == 200:
if "charset=utf-8" in res.text or "charset=UTF-8" in res.text:
res.encoding = "UTF-8"
else:
res.encoding = res.apparent_encoding
html_text = res.text
if not html_text:
return None
else:
logger.error("站点 %s 被反爬限制:%s, 状态码:%s" % (site_name, url, res.status_code))
return None
render = site_info.get("render")
# 兼容假首页情况,假首页通常没有 <link rel="search" 属性
if '"search"' not in html_text and '"csrf-token"' not in html_text:
res = RequestUtils(cookies=site_cookie,
session=session,
headers=ua,
proxies=proxies
).get_res(url=url + "/index.php")
if res and res.status_code == 200:
if "charset=utf-8" in res.text or "charset=UTF-8" in res.text:
res.encoding = "UTF-8"
else:
res.encoding = res.apparent_encoding
html_text = res.text
if not html_text:
return None
elif res is not None:
logger.error(f"站点 {site_name} 连接失败,状态码:{res.status_code}")
return None
logger.debug(f"站点 {site_name} url={url} site_cookie={site_cookie} ua={ua}")
if render:
# 演染模式
html_text = PlaywrightHelper().get_page_source(url=url,
cookies=site_cookie,
ua=ua,
proxy=proxies)
else:
logger.error(f"站点 {site_name} 无法访问:{url}")
return None
# 普通模式
res = RequestUtils(cookies=site_cookie,
session=session,
headers=ua,
proxies=proxies
).get_res(url=url)
if res and res.status_code == 200:
if "charset=utf-8" in res.text or "charset=UTF-8" in res.text:
res.encoding = "UTF-8"
else:
res.encoding = res.apparent_encoding
html_text = res.text
# 第一次登录反爬
if html_text.find("title") == -1:
i = html_text.find("window.location")
if i == -1:
return None
tmp_url = url + html_text[i:html_text.find(";")]\
.replace("\"", "")\
.replace("+", "")\
.replace(" ", "")\
.replace("window.location=", "")
res = RequestUtils(cookies=site_cookie,
session=session,
headers=ua,
proxies=proxies
).get_res(url=tmp_url)
if res and res.status_code == 200:
if "charset=utf-8" in res.text or "charset=UTF-8" in res.text:
res.encoding = "UTF-8"
else:
res.encoding = res.apparent_encoding
html_text = res.text
if not html_text:
return None
else:
logger.error("站点 %s 被反爬限制:%s, 状态码:%s" % (site_name, url, res.status_code))
return None
# 兼容假首页情况,假首页通常没有 <link rel="search" 属性
if '"search"' not in html_text and '"csrf-token"' not in html_text:
res = RequestUtils(cookies=site_cookie,
session=session,
headers=ua,
proxies=proxies
).get_res(url=url + "/index.php")
if res and res.status_code == 200:
if "charset=utf-8" in res.text or "charset=UTF-8" in res.text:
res.encoding = "UTF-8"
else:
res.encoding = res.apparent_encoding
html_text = res.text
if not html_text:
return None
elif res is not None:
logger.error(f"站点 {site_name} 连接失败,状态码:{res.status_code}")
return None
else:
logger.error(f"站点 {site_name} 无法访问:{url}")
return None
# 解析站点类型
site_schema = self.__build_class(html_text)
if not site_schema:
@ -168,20 +184,13 @@ class SiteStatistic(_PluginBase):
:param site_info:
:return:
"""
site_name = site_info.get("name")
site_url = site_info.get("url")
site_name = site_info.get('name')
site_url = site_info.get('url')
if not site_url:
return None
site_cookie = site_info.get("cookie")
ua = site_info.get("ua")
unread_msg_notify = True
proxy = site_info.get("proxy")
try:
site_user_info: ISiteUserInfo = self.build(url=site_url,
site_name=site_name,
site_cookie=site_cookie,
ua=ua,
proxy=proxy)
site_user_info: ISiteUserInfo = self.build(site_info=site_info)
if site_user_info:
logger.debug(f"站点 {site_name} 开始以 {site_user_info.site_schema()} 模型解析")
# 开始解析

View File

@ -31,7 +31,6 @@ transmission-rpc~=4.3.0
Jinja2~=3.1.2
pyparsing~=3.0.9
func_timeout==4.3.5
selenium~=4.9.1
bs4~=0.0.1
beautifulsoup4~=4.12.2
pillow~=9.5.0