fix sites.so

This commit is contained in:
jxxghp 2023-06-09 19:44:21 +08:00
parent e6f51be105
commit e1da4cb67a
4 changed files with 1 additions and 201 deletions

View File

@ -29,6 +29,7 @@ class PlaywrightHelper:
if cookies: if cookies:
page.set_extra_http_headers({"cookie": cookies}) page.set_extra_http_headers({"cookie": cookies})
try: try:
page.goto(url) page.goto(url)
page.wait_for_load_state("networkidle", timeout=timeout * 1000) page.wait_for_load_state("networkidle", timeout=timeout * 1000)
source = page.content() source = page.content()

View File

@ -1,201 +0,0 @@
import time
import os
from func_timeout import func_timeout, FunctionTimedOut
from pyquery import PyQuery
from selenium.common import TimeoutException
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.remote.webdriver import WebDriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from app.log import logger
ACCESS_DENIED_TITLES = [
# Cloudflare
'Access denied',
# Cloudflare http://bitturk.net/ Firefox
'Attention Required! | Cloudflare'
]
ACCESS_DENIED_SELECTORS = [
# Cloudflare
'div.cf-error-title span.cf-code-label span',
# Cloudflare http://bitturk.net/ Firefox
'#cf-error-details div.cf-error-overview h1'
]
CHALLENGE_TITLES = [
# Cloudflare
'Just a moment...',
'请稍候…',
# DDoS-GUARD
'DDOS-GUARD',
]
CHALLENGE_SELECTORS = [
# Cloudflare
'#cf-challenge-running', '.ray_id', '.attack-box', '#cf-please-wait', '#challenge-spinner', '#trk_jschal_js',
# Custom CloudFlare for EbookParadijs, Film-Paleis, MuziekFabriek and Puur-Hollands
'td.info #js_info',
# Fairlane / pararius.com
'div.vc div.text-box h2'
]
SHORT_TIMEOUT = 6
CF_TIMEOUT = int(os.getenv("NASTOOL_CF_TIMEOUT", "60"))
def resolve_challenge(driver: WebDriver, timeout=CF_TIMEOUT):
start_ts = time.time()
try:
func_timeout(timeout, _evil_logic, args=(driver,))
return True
except FunctionTimedOut:
logger.error(f'Error solving the challenge. Timeout {timeout} after {round(time.time() - start_ts, 1)} seconds.')
return False
except Exception as e:
logger.error('Error solving the challenge. ' + str(e))
return False
def under_challenge(html_text: str):
"""
Check if the page is under challenge
:param html_text:
:return:
"""
# get the page title
if not html_text:
return False
page_title = PyQuery(html_text)('title').text()
logger.debug("under_challenge page_title=" + page_title)
for title in CHALLENGE_TITLES:
if page_title.lower() == title.lower():
return True
for selector in CHALLENGE_SELECTORS:
html_doc = PyQuery(html_text)
if html_doc(selector):
return True
return False
def _until_title_changes(driver: WebDriver, titles):
WebDriverWait(driver, SHORT_TIMEOUT).until_not(lambda x: _any_match_titles(x, titles))
def _any_match_titles(driver: WebDriver, titles):
page_title = driver.title
for title in titles:
if page_title.lower() == title.lower():
return True
return False
def _until_selectors_disappear(driver: WebDriver, selectors):
WebDriverWait(driver, SHORT_TIMEOUT).until_not(lambda x: _any_match_selectors(x, selectors))
def _any_match_selectors(driver: WebDriver, selectors):
for selector in selectors:
html_doc = PyQuery(driver.page_source)
if html_doc(selector):
return True
return False
def _evil_logic(driver: WebDriver):
driver.implicitly_wait(SHORT_TIMEOUT)
# wait for the page
html_element = driver.find_element(By.TAG_NAME, "html")
# find access denied titles
if _any_match_titles(driver, ACCESS_DENIED_TITLES):
raise Exception('Cloudflare has blocked this request. '
'Probably your IP is banned for this site, check in your web browser.')
# find access denied selectors
if _any_match_selectors(driver, ACCESS_DENIED_SELECTORS):
raise Exception('Cloudflare has blocked this request. '
'Probably your IP is banned for this site, check in your web browser.')
# find challenge by title
challenge_found = False
if _any_match_titles(driver, CHALLENGE_TITLES):
challenge_found = True
logger.info("Challenge detected. Title found: " + driver.title)
if not challenge_found:
# find challenge by selectors
if _any_match_selectors(driver, CHALLENGE_SELECTORS):
challenge_found = True
logger.info("Challenge detected. Selector found")
attempt = 0
if challenge_found:
while True:
try:
attempt = attempt + 1
# wait until the title changes
_until_title_changes(driver, CHALLENGE_TITLES)
# then wait until all the selectors disappear
_until_selectors_disappear(driver, CHALLENGE_SELECTORS)
# all elements not found
break
except TimeoutException:
logger.debug("Timeout waiting for selector")
click_verify(driver)
# update the html (cloudflare reloads the page every 5 s)
html_element = driver.find_element(By.TAG_NAME, "html")
# waits until cloudflare redirection ends
logger.debug("Waiting for redirect")
# noinspection PyBroadException
try:
WebDriverWait(driver, SHORT_TIMEOUT).until(EC.staleness_of(html_element))
except Exception:
logger.debug("Timeout waiting for redirect")
logger.info("Challenge solved!")
else:
logger.info("Challenge not detected!")
def click_verify(driver: WebDriver):
try:
logger.debug("Try to find the Cloudflare verify checkbox")
iframe = driver.find_element(By.XPATH, "//iframe[@title='Widget containing a Cloudflare security challenge']")
driver.switch_to.frame(iframe)
checkbox = driver.find_element(
by=By.XPATH,
value='//*[@id="cf-stage"]//label[@class="ctp-checkbox-label"]/input',
)
if checkbox:
actions = ActionChains(driver)
actions.move_to_element_with_offset(checkbox, 5, 7)
actions.click(checkbox)
actions.perform()
logger.debug("Cloudflare verify checkbox found and clicked")
except Exception as e:
logger.debug(f"Cloudflare verify checkbox not found on the page: {str(e)}")
# print(e)
finally:
driver.switch_to.default_content()
try:
logger.debug("Try to find the Cloudflare 'Verify you are human' button")
button = driver.find_element(
by=By.XPATH,
value="//input[@type='button' and @value='Verify you are human']",
)
if button:
actions = ActionChains(driver)
actions.move_to_element_with_offset(button, 5, 7)
actions.click(button)
actions.perform()
logger.debug("The Cloudflare 'Verify you are human' button found and clicked")
except Exception as e:
logger.debug(f"The Cloudflare 'Verify you are human' button not found on the page{str(e)}")
# print(e)
time.sleep(2)

Binary file not shown.