add 自动更新Cookie

This commit is contained in:
jxxghp
2023-06-15 12:37:10 +08:00
parent 9155c44041
commit 9efb2007fc
10 changed files with 445 additions and 112 deletions

View File

@ -1,4 +1,6 @@
from playwright.sync_api import sync_playwright
from typing import Callable, Any
from playwright.sync_api import sync_playwright, Page
from cf_clearance import sync_cf_retry, sync_stealth
from app.log import logger
@ -7,6 +9,53 @@ class PlaywrightHelper:
def __init__(self, browser_type="chromium"):
self.browser_type = browser_type
@staticmethod
def __pass_cloudflare(url: str, page: Page) -> bool:
"""
尝试跳过cloudfare验证
"""
sync_stealth(page, pure=True)
page.goto(url)
return sync_cf_retry(page)
def action(self, url: str,
callback: Callable,
cookies: str = None,
ua: str = None,
proxies: dict = None,
headless: bool = False,
timeout: int = 30) -> Any:
"""
访问网页接收Page对象并执行操作
:param url: 网页地址
:param callback: 回调函数需要接收page对象
:param cookies: cookies
:param ua: user-agent
:param proxies: 代理
:param headless: 是否无头模式
:param timeout: 超时时间
"""
try:
with sync_playwright() as playwright:
browser = playwright[self.browser_type].launch(headless=headless)
context = browser.new_context(user_agent=ua, proxy=proxies)
page = context.new_page()
if cookies:
page.set_extra_http_headers({"cookie": cookies})
try:
if not self.__pass_cloudflare(url, page):
logger.warn("cloudflare challenge fail")
page.wait_for_load_state("networkidle", timeout=timeout * 1000)
# 回调函数
return callback(page)
except Exception as e:
logger.error(f"网页操作失败: {e}")
finally:
browser.close()
except Exception as e:
logger.error(f"网页操作失败: {e}")
return None
def get_page_source(self, url: str,
cookies: str = None,
ua: str = None,
@ -31,10 +80,7 @@ class PlaywrightHelper:
if cookies:
page.set_extra_http_headers({"cookie": cookies})
try:
sync_stealth(page, pure=True)
page.goto(url)
res = sync_cf_retry(page)
if not res:
if not self.__pass_cloudflare(url, page):
logger.warn("cloudflare challenge fail")
page.wait_for_load_state("networkidle", timeout=timeout * 1000)
source = page.content()