add 自动更新Cookie
This commit is contained in:
@ -1,4 +1,6 @@
|
||||
from playwright.sync_api import sync_playwright
|
||||
from typing import Callable, Any
|
||||
|
||||
from playwright.sync_api import sync_playwright, Page
|
||||
from cf_clearance import sync_cf_retry, sync_stealth
|
||||
from app.log import logger
|
||||
|
||||
@ -7,6 +9,53 @@ class PlaywrightHelper:
|
||||
def __init__(self, browser_type="chromium"):
|
||||
self.browser_type = browser_type
|
||||
|
||||
@staticmethod
|
||||
def __pass_cloudflare(url: str, page: Page) -> bool:
|
||||
"""
|
||||
尝试跳过cloudfare验证
|
||||
"""
|
||||
sync_stealth(page, pure=True)
|
||||
page.goto(url)
|
||||
return sync_cf_retry(page)
|
||||
|
||||
def action(self, url: str,
|
||||
callback: Callable,
|
||||
cookies: str = None,
|
||||
ua: str = None,
|
||||
proxies: dict = None,
|
||||
headless: bool = False,
|
||||
timeout: int = 30) -> Any:
|
||||
"""
|
||||
访问网页,接收Page对象并执行操作
|
||||
:param url: 网页地址
|
||||
:param callback: 回调函数,需要接收page对象
|
||||
:param cookies: cookies
|
||||
:param ua: user-agent
|
||||
:param proxies: 代理
|
||||
:param headless: 是否无头模式
|
||||
:param timeout: 超时时间
|
||||
"""
|
||||
try:
|
||||
with sync_playwright() as playwright:
|
||||
browser = playwright[self.browser_type].launch(headless=headless)
|
||||
context = browser.new_context(user_agent=ua, proxy=proxies)
|
||||
page = context.new_page()
|
||||
if cookies:
|
||||
page.set_extra_http_headers({"cookie": cookies})
|
||||
try:
|
||||
if not self.__pass_cloudflare(url, page):
|
||||
logger.warn("cloudflare challenge fail!")
|
||||
page.wait_for_load_state("networkidle", timeout=timeout * 1000)
|
||||
# 回调函数
|
||||
return callback(page)
|
||||
except Exception as e:
|
||||
logger.error(f"网页操作失败: {e}")
|
||||
finally:
|
||||
browser.close()
|
||||
except Exception as e:
|
||||
logger.error(f"网页操作失败: {e}")
|
||||
return None
|
||||
|
||||
def get_page_source(self, url: str,
|
||||
cookies: str = None,
|
||||
ua: str = None,
|
||||
@ -31,10 +80,7 @@ class PlaywrightHelper:
|
||||
if cookies:
|
||||
page.set_extra_http_headers({"cookie": cookies})
|
||||
try:
|
||||
sync_stealth(page, pure=True)
|
||||
page.goto(url)
|
||||
res = sync_cf_retry(page)
|
||||
if not res:
|
||||
if not self.__pass_cloudflare(url, page):
|
||||
logger.warn("cloudflare challenge fail!")
|
||||
page.wait_for_load_state("networkidle", timeout=timeout * 1000)
|
||||
source = page.content()
|
||||
|
Reference in New Issue
Block a user