add playwright && remove feapder
This commit is contained in:
42
app/helper/playwright.py
Normal file
42
app/helper/playwright.py
Normal file
@ -0,0 +1,42 @@
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
|
||||
class PlaywrightHelper:
|
||||
def __init__(self, browser_type="chromium"):
|
||||
self.browser_type = browser_type
|
||||
|
||||
def get_page_source(self, url: str,
|
||||
cookie: str = None,
|
||||
ua: str = None,
|
||||
proxy: dict = None,
|
||||
headless: bool = True):
|
||||
"""
|
||||
获取网页源码
|
||||
:param url: 网页地址
|
||||
:param cookie: cookie
|
||||
:param ua: user-agent
|
||||
:param proxy: 代理
|
||||
:param headless: 是否无头模式
|
||||
"""
|
||||
with sync_playwright() as playwright:
|
||||
browser = playwright[self.browser_type].launch(headless=headless)
|
||||
context = browser.new_context(user_agent=ua, proxy=proxy)
|
||||
page = context.new_page()
|
||||
if cookie:
|
||||
page.set_extra_http_headers({"cookie": cookie})
|
||||
page.goto(url)
|
||||
page.wait_for_load_state("networkidle")
|
||||
source = page.content()
|
||||
browser.close()
|
||||
|
||||
return source
|
||||
|
||||
|
||||
# 示例用法
|
||||
if __name__ == "__main__":
|
||||
utils = PlaywrightHelper()
|
||||
test_url = "https://www.baidu.com"
|
||||
test_cookies = "cookie1=value1; cookie2=value2"
|
||||
test_user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36"
|
||||
source_code = utils.get_page_source(test_url, cookie=test_cookies, ua=test_user_agent)
|
||||
print(source_code)
|
Reference in New Issue
Block a user