fix sites.so
This commit is contained in:
parent
e6f51be105
commit
e1da4cb67a
@ -29,6 +29,7 @@ class PlaywrightHelper:
|
|||||||
if cookies:
|
if cookies:
|
||||||
page.set_extra_http_headers({"cookie": cookies})
|
page.set_extra_http_headers({"cookie": cookies})
|
||||||
try:
|
try:
|
||||||
|
|
||||||
page.goto(url)
|
page.goto(url)
|
||||||
page.wait_for_load_state("networkidle", timeout=timeout * 1000)
|
page.wait_for_load_state("networkidle", timeout=timeout * 1000)
|
||||||
source = page.content()
|
source = page.content()
|
||||||
|
@ -1,201 +0,0 @@
|
|||||||
import time
|
|
||||||
import os
|
|
||||||
|
|
||||||
from func_timeout import func_timeout, FunctionTimedOut
|
|
||||||
from pyquery import PyQuery
|
|
||||||
from selenium.common import TimeoutException
|
|
||||||
from selenium.webdriver import ActionChains
|
|
||||||
from selenium.webdriver.common.by import By
|
|
||||||
from selenium.webdriver.remote.webdriver import WebDriver
|
|
||||||
from selenium.webdriver.support import expected_conditions as EC
|
|
||||||
from selenium.webdriver.support.wait import WebDriverWait
|
|
||||||
|
|
||||||
from app.log import logger
|
|
||||||
|
|
||||||
ACCESS_DENIED_TITLES = [
|
|
||||||
# Cloudflare
|
|
||||||
'Access denied',
|
|
||||||
# Cloudflare http://bitturk.net/ Firefox
|
|
||||||
'Attention Required! | Cloudflare'
|
|
||||||
]
|
|
||||||
ACCESS_DENIED_SELECTORS = [
|
|
||||||
# Cloudflare
|
|
||||||
'div.cf-error-title span.cf-code-label span',
|
|
||||||
# Cloudflare http://bitturk.net/ Firefox
|
|
||||||
'#cf-error-details div.cf-error-overview h1'
|
|
||||||
]
|
|
||||||
CHALLENGE_TITLES = [
|
|
||||||
# Cloudflare
|
|
||||||
'Just a moment...',
|
|
||||||
'请稍候…',
|
|
||||||
# DDoS-GUARD
|
|
||||||
'DDOS-GUARD',
|
|
||||||
]
|
|
||||||
CHALLENGE_SELECTORS = [
|
|
||||||
# Cloudflare
|
|
||||||
'#cf-challenge-running', '.ray_id', '.attack-box', '#cf-please-wait', '#challenge-spinner', '#trk_jschal_js',
|
|
||||||
# Custom CloudFlare for EbookParadijs, Film-Paleis, MuziekFabriek and Puur-Hollands
|
|
||||||
'td.info #js_info',
|
|
||||||
# Fairlane / pararius.com
|
|
||||||
'div.vc div.text-box h2'
|
|
||||||
]
|
|
||||||
SHORT_TIMEOUT = 6
|
|
||||||
CF_TIMEOUT = int(os.getenv("NASTOOL_CF_TIMEOUT", "60"))
|
|
||||||
|
|
||||||
|
|
||||||
def resolve_challenge(driver: WebDriver, timeout=CF_TIMEOUT):
|
|
||||||
start_ts = time.time()
|
|
||||||
try:
|
|
||||||
func_timeout(timeout, _evil_logic, args=(driver,))
|
|
||||||
return True
|
|
||||||
except FunctionTimedOut:
|
|
||||||
logger.error(f'Error solving the challenge. Timeout {timeout} after {round(time.time() - start_ts, 1)} seconds.')
|
|
||||||
return False
|
|
||||||
except Exception as e:
|
|
||||||
logger.error('Error solving the challenge. ' + str(e))
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def under_challenge(html_text: str):
|
|
||||||
"""
|
|
||||||
Check if the page is under challenge
|
|
||||||
:param html_text:
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
# get the page title
|
|
||||||
if not html_text:
|
|
||||||
return False
|
|
||||||
page_title = PyQuery(html_text)('title').text()
|
|
||||||
logger.debug("under_challenge page_title=" + page_title)
|
|
||||||
for title in CHALLENGE_TITLES:
|
|
||||||
if page_title.lower() == title.lower():
|
|
||||||
return True
|
|
||||||
for selector in CHALLENGE_SELECTORS:
|
|
||||||
html_doc = PyQuery(html_text)
|
|
||||||
if html_doc(selector):
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def _until_title_changes(driver: WebDriver, titles):
|
|
||||||
WebDriverWait(driver, SHORT_TIMEOUT).until_not(lambda x: _any_match_titles(x, titles))
|
|
||||||
|
|
||||||
|
|
||||||
def _any_match_titles(driver: WebDriver, titles):
|
|
||||||
page_title = driver.title
|
|
||||||
for title in titles:
|
|
||||||
if page_title.lower() == title.lower():
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def _until_selectors_disappear(driver: WebDriver, selectors):
|
|
||||||
WebDriverWait(driver, SHORT_TIMEOUT).until_not(lambda x: _any_match_selectors(x, selectors))
|
|
||||||
|
|
||||||
|
|
||||||
def _any_match_selectors(driver: WebDriver, selectors):
|
|
||||||
for selector in selectors:
|
|
||||||
html_doc = PyQuery(driver.page_source)
|
|
||||||
if html_doc(selector):
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def _evil_logic(driver: WebDriver):
|
|
||||||
driver.implicitly_wait(SHORT_TIMEOUT)
|
|
||||||
# wait for the page
|
|
||||||
html_element = driver.find_element(By.TAG_NAME, "html")
|
|
||||||
|
|
||||||
# find access denied titles
|
|
||||||
if _any_match_titles(driver, ACCESS_DENIED_TITLES):
|
|
||||||
raise Exception('Cloudflare has blocked this request. '
|
|
||||||
'Probably your IP is banned for this site, check in your web browser.')
|
|
||||||
# find access denied selectors
|
|
||||||
if _any_match_selectors(driver, ACCESS_DENIED_SELECTORS):
|
|
||||||
raise Exception('Cloudflare has blocked this request. '
|
|
||||||
'Probably your IP is banned for this site, check in your web browser.')
|
|
||||||
|
|
||||||
# find challenge by title
|
|
||||||
challenge_found = False
|
|
||||||
if _any_match_titles(driver, CHALLENGE_TITLES):
|
|
||||||
challenge_found = True
|
|
||||||
logger.info("Challenge detected. Title found: " + driver.title)
|
|
||||||
if not challenge_found:
|
|
||||||
# find challenge by selectors
|
|
||||||
if _any_match_selectors(driver, CHALLENGE_SELECTORS):
|
|
||||||
challenge_found = True
|
|
||||||
logger.info("Challenge detected. Selector found")
|
|
||||||
|
|
||||||
attempt = 0
|
|
||||||
if challenge_found:
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
attempt = attempt + 1
|
|
||||||
# wait until the title changes
|
|
||||||
_until_title_changes(driver, CHALLENGE_TITLES)
|
|
||||||
|
|
||||||
# then wait until all the selectors disappear
|
|
||||||
_until_selectors_disappear(driver, CHALLENGE_SELECTORS)
|
|
||||||
|
|
||||||
# all elements not found
|
|
||||||
break
|
|
||||||
|
|
||||||
except TimeoutException:
|
|
||||||
logger.debug("Timeout waiting for selector")
|
|
||||||
|
|
||||||
click_verify(driver)
|
|
||||||
|
|
||||||
# update the html (cloudflare reloads the page every 5 s)
|
|
||||||
html_element = driver.find_element(By.TAG_NAME, "html")
|
|
||||||
|
|
||||||
# waits until cloudflare redirection ends
|
|
||||||
logger.debug("Waiting for redirect")
|
|
||||||
# noinspection PyBroadException
|
|
||||||
try:
|
|
||||||
WebDriverWait(driver, SHORT_TIMEOUT).until(EC.staleness_of(html_element))
|
|
||||||
except Exception:
|
|
||||||
logger.debug("Timeout waiting for redirect")
|
|
||||||
|
|
||||||
logger.info("Challenge solved!")
|
|
||||||
else:
|
|
||||||
logger.info("Challenge not detected!")
|
|
||||||
|
|
||||||
|
|
||||||
def click_verify(driver: WebDriver):
|
|
||||||
try:
|
|
||||||
logger.debug("Try to find the Cloudflare verify checkbox")
|
|
||||||
iframe = driver.find_element(By.XPATH, "//iframe[@title='Widget containing a Cloudflare security challenge']")
|
|
||||||
driver.switch_to.frame(iframe)
|
|
||||||
checkbox = driver.find_element(
|
|
||||||
by=By.XPATH,
|
|
||||||
value='//*[@id="cf-stage"]//label[@class="ctp-checkbox-label"]/input',
|
|
||||||
)
|
|
||||||
if checkbox:
|
|
||||||
actions = ActionChains(driver)
|
|
||||||
actions.move_to_element_with_offset(checkbox, 5, 7)
|
|
||||||
actions.click(checkbox)
|
|
||||||
actions.perform()
|
|
||||||
logger.debug("Cloudflare verify checkbox found and clicked")
|
|
||||||
except Exception as e:
|
|
||||||
logger.debug(f"Cloudflare verify checkbox not found on the page: {str(e)}")
|
|
||||||
# print(e)
|
|
||||||
finally:
|
|
||||||
driver.switch_to.default_content()
|
|
||||||
|
|
||||||
try:
|
|
||||||
logger.debug("Try to find the Cloudflare 'Verify you are human' button")
|
|
||||||
button = driver.find_element(
|
|
||||||
by=By.XPATH,
|
|
||||||
value="//input[@type='button' and @value='Verify you are human']",
|
|
||||||
)
|
|
||||||
if button:
|
|
||||||
actions = ActionChains(driver)
|
|
||||||
actions.move_to_element_with_offset(button, 5, 7)
|
|
||||||
actions.click(button)
|
|
||||||
actions.perform()
|
|
||||||
logger.debug("The Cloudflare 'Verify you are human' button found and clicked")
|
|
||||||
except Exception as e:
|
|
||||||
logger.debug(f"The Cloudflare 'Verify you are human' button not found on the page:{str(e)}")
|
|
||||||
# print(e)
|
|
||||||
|
|
||||||
time.sleep(2)
|
|
Binary file not shown.
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user