This commit is contained in:
jxxghp
2023-06-09 19:25:12 +08:00
parent 77b755da5f
commit 6925dde254
76 changed files with 137 additions and 102 deletions

View File

@ -1 +0,0 @@
from .module import ModuleHelper

View File

@ -1,12 +1,14 @@
from playwright.sync_api import sync_playwright
from app.log import logger
class PlaywrightHelper:
def __init__(self, browser_type="chromium"):
self.browser_type = browser_type
def get_page_source(self, url: str,
cookie: str = None,
cookies: str = None,
ua: str = None,
proxy: dict = None,
headless: bool = True,
@ -14,7 +16,7 @@ class PlaywrightHelper:
"""
获取网页源码
:param url: 网页地址
:param cookie: cookie
:param cookies: cookies
:param ua: user-agent
:param proxy: 代理
:param headless: 是否无头模式
@ -24,12 +26,17 @@ class PlaywrightHelper:
browser = playwright[self.browser_type].launch(headless=headless)
context = browser.new_context(user_agent=ua, proxy=proxy)
page = context.new_page()
if cookie:
page.set_extra_http_headers({"cookie": cookie})
page.goto(url)
page.wait_for_load_state("networkidle", timeout=timeout)
source = page.content()
browser.close()
if cookies:
page.set_extra_http_headers({"cookie": cookies})
try:
page.goto(url)
page.wait_for_load_state("networkidle", timeout=timeout * 1000)
source = page.content()
except Exception as e:
logger.error(f"获取网页源码失败: {e}")
source = None
finally:
browser.close()
return source
@ -40,5 +47,5 @@ if __name__ == "__main__":
test_url = "https://www.baidu.com"
test_cookies = "cookie1=value1; cookie2=value2"
test_user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36"
source_code = utils.get_page_source(test_url, cookie=test_cookies, ua=test_user_agent)
source_code = utils.get_page_source(test_url, cookies=test_cookies, ua=test_user_agent)
print(source_code)

View File

@ -1,7 +1,7 @@
import xml.dom.minidom
from typing import List
from app.core import settings
from app.core.config import settings
from app.utils.dom import DomUtils
from app.utils.http import RequestUtils
from app.utils.string import StringUtils

View File

@ -6,7 +6,9 @@ from urllib.parse import unquote
from bencode import bdecode
from app.core import settings, Context, MetaInfo
from app.core.config import settings
from app.core.context import Context
from app.core.meta_info import MetaInfo
from app.log import logger
from app.utils.http import RequestUtils
from app.utils.string import StringUtils