import os from pyquery import PyQuery from app.log import logger CHALLENGE_TITLES = [ # Cloudflare 'Just a moment...', '请稍候…', # DDoS-GUARD 'DDOS-GUARD', ] CHALLENGE_SELECTORS = [ # Cloudflare '#cf-challenge-running', '.ray_id', '.attack-box', '#cf-please-wait', '#challenge-spinner', '#trk_jschal_js', # Custom CloudFlare for EbookParadijs, Film-Paleis, MuziekFabriek and Puur-Hollands 'td.info #js_info', # Fairlane / pararius.com 'div.vc div.text-box h2' ] SHORT_TIMEOUT = 6 CF_TIMEOUT = int(os.getenv("NASTOOL_CF_TIMEOUT", "60")) def under_challenge(html_text: str): """ Check if the page is under challenge :param html_text: :return: """ # get the page title if not html_text: return False page_title = PyQuery(html_text)('title').text() logger.debug("under_challenge page_title=" + page_title) for title in CHALLENGE_TITLES: if page_title.lower() == title.lower(): return True for selector in CHALLENGE_SELECTORS: html_doc = PyQuery(html_text) if html_doc(selector): return True return False