56 lines
2.0 KiB
Python
56 lines
2.0 KiB
Python
from playwright.sync_api import sync_playwright
|
||
from cf_clearance import sync_cf_retry, sync_stealth
|
||
from app.log import logger
|
||
|
||
|
||
class PlaywrightHelper:
|
||
def __init__(self, browser_type="chromium"):
|
||
self.browser_type = browser_type
|
||
|
||
def get_page_source(self, url: str,
|
||
cookies: str = None,
|
||
ua: str = None,
|
||
proxy: dict = None,
|
||
headless: bool = True,
|
||
timeout: int = 30) -> str:
|
||
"""
|
||
获取网页源码
|
||
:param url: 网页地址
|
||
:param cookies: cookies
|
||
:param ua: user-agent
|
||
:param proxy: 代理
|
||
:param headless: 是否无头模式
|
||
:param timeout: 超时时间
|
||
"""
|
||
with sync_playwright() as playwright:
|
||
browser = playwright[self.browser_type].launch(headless=headless)
|
||
context = browser.new_context(user_agent=ua, proxy=proxy)
|
||
page = context.new_page()
|
||
if cookies:
|
||
page.set_extra_http_headers({"cookie": cookies})
|
||
try:
|
||
sync_stealth(page, pure=True)
|
||
page.goto(url)
|
||
res = sync_cf_retry(page)
|
||
if not res:
|
||
logger.warn("cloudflare challenge fail!")
|
||
page.wait_for_load_state("networkidle", timeout=timeout * 1000)
|
||
source = page.content()
|
||
except Exception as e:
|
||
logger.error(f"获取网页源码失败: {e}")
|
||
source = None
|
||
finally:
|
||
browser.close()
|
||
|
||
return source
|
||
|
||
|
||
# 示例用法
|
||
if __name__ == "__main__":
|
||
utils = PlaywrightHelper()
|
||
test_url = "https://piggo.me"
|
||
test_cookies = ""
|
||
test_user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36"
|
||
source_code = utils.get_page_source(test_url, cookies=test_cookies, ua=test_user_agent)
|
||
print(source_code)
|