56 lines
		
	
	
		
			2.0 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			56 lines
		
	
	
		
			2.0 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| from playwright.sync_api import sync_playwright
 | ||
| from cf_clearance import sync_cf_retry, sync_stealth
 | ||
| from app.log import logger
 | ||
| 
 | ||
| 
 | ||
| class PlaywrightHelper:
 | ||
|     def __init__(self, browser_type="chromium"):
 | ||
|         self.browser_type = browser_type
 | ||
| 
 | ||
|     def get_page_source(self, url: str,
 | ||
|                         cookies: str = None,
 | ||
|                         ua: str = None,
 | ||
|                         proxy: dict = None,
 | ||
|                         headless: bool = True,
 | ||
|                         timeout: int = 30) -> str:
 | ||
|         """
 | ||
|         获取网页源码
 | ||
|         :param url: 网页地址
 | ||
|         :param cookies: cookies
 | ||
|         :param ua: user-agent
 | ||
|         :param proxy: 代理
 | ||
|         :param headless: 是否无头模式
 | ||
|         :param timeout: 超时时间
 | ||
|         """
 | ||
|         with sync_playwright() as playwright:
 | ||
|             browser = playwright[self.browser_type].launch(headless=headless)
 | ||
|             context = browser.new_context(user_agent=ua, proxy=proxy)
 | ||
|             page = context.new_page()
 | ||
|             if cookies:
 | ||
|                 page.set_extra_http_headers({"cookie": cookies})
 | ||
|             try:
 | ||
|                 sync_stealth(page, pure=True)
 | ||
|                 page.goto(url)
 | ||
|                 res = sync_cf_retry(page)
 | ||
|                 if not res:
 | ||
|                     logger.warn("cloudflare challenge fail!")
 | ||
|                 page.wait_for_load_state("networkidle", timeout=timeout * 1000)
 | ||
|                 source = page.content()
 | ||
|             except Exception as e:
 | ||
|                 logger.error(f"获取网页源码失败: {e}")
 | ||
|                 source = None
 | ||
|             finally:
 | ||
|                 browser.close()
 | ||
| 
 | ||
|         return source
 | ||
| 
 | ||
| 
 | ||
| # 示例用法
 | ||
| if __name__ == "__main__":
 | ||
|     utils = PlaywrightHelper()
 | ||
|     test_url = "https://piggo.me"
 | ||
|     test_cookies = ""
 | ||
|     test_user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36"
 | ||
|     source_code = utils.get_page_source(test_url, cookies=test_cookies, ua=test_user_agent)
 | ||
|     print(source_code)
 |