MoviePilot/app/helper/browser.py
2023-06-09 19:58:05 +08:00

56 lines
2.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from playwright.sync_api import sync_playwright
from cf_clearance import sync_cf_retry, sync_stealth
from app.log import logger
class PlaywrightHelper:
def __init__(self, browser_type="chromium"):
self.browser_type = browser_type
def get_page_source(self, url: str,
cookies: str = None,
ua: str = None,
proxy: dict = None,
headless: bool = True,
timeout: int = 30) -> str:
"""
获取网页源码
:param url: 网页地址
:param cookies: cookies
:param ua: user-agent
:param proxy: 代理
:param headless: 是否无头模式
:param timeout: 超时时间
"""
with sync_playwright() as playwright:
browser = playwright[self.browser_type].launch(headless=headless)
context = browser.new_context(user_agent=ua, proxy=proxy)
page = context.new_page()
if cookies:
page.set_extra_http_headers({"cookie": cookies})
try:
sync_stealth(page, pure=True)
page.goto(url)
res = sync_cf_retry(page)
if not res:
logger.warn("cloudflare challenge fail")
page.wait_for_load_state("networkidle", timeout=timeout * 1000)
source = page.content()
except Exception as e:
logger.error(f"获取网页源码失败: {e}")
source = None
finally:
browser.close()
return source
# 示例用法
if __name__ == "__main__":
utils = PlaywrightHelper()
test_url = "https://piggo.me"
test_cookies = ""
test_user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36"
source_code = utils.get_page_source(test_url, cookies=test_cookies, ua=test_user_agent)
print(source_code)