fix requests session

This commit is contained in:
jxxghp
2023-10-05 17:21:59 -07:00
parent 1a49c7c59e
commit 5bcd90c569
7 changed files with 190 additions and 134 deletions

View File

@ -2,7 +2,7 @@ from pathlib import Path
from typing import Any, List, Optional
from fastapi import APIRouter, Depends
from requests import Session
from sqlalchemy.orm import Session
from app import schemas
from app.chain.dashboard import DashboardChain

View File

@ -1,7 +1,7 @@
from typing import Any, List
from fastapi import APIRouter, HTTPException, Depends
from requests import Session
from sqlalchemy.orm import Session
from app import schemas
from app.chain.media import MediaChain

View File

@ -3,7 +3,7 @@ import re
from datetime import datetime
from typing import Dict, List, Optional, Union, Tuple
from requests import Session
from sqlalchemy.orm import Session
from app.chain import ChainBase
from app.chain.download import DownloadChain

View File

@ -146,10 +146,10 @@ class DoubanApi(metaclass=Singleton):
_api_secret_key = "bf7dddc7c9cfe6f7"
_api_key = "0dad551ec0f84ed02907ff5c42e8ec70"
_base_url = "https://frodo.douban.com/api/v2"
_session = requests.Session()
_session = None
def __init__(self):
pass
self._session = requests.Session()
@classmethod
def __sign(cls, url: str, ts: int, method='GET') -> str:
@ -163,60 +163,94 @@ class DoubanApi(metaclass=Singleton):
).digest()
).decode()
@classmethod
@lru_cache(maxsize=settings.CACHE_CONF.get('douban'))
def __invoke(cls, url, **kwargs):
req_url = cls._base_url + url
def __invoke(self, url, **kwargs):
req_url = self._base_url + url
params = {'apiKey': cls._api_key}
params = {'apiKey': self._api_key}
if kwargs:
params.update(kwargs)
ts = params.pop('_ts', int(datetime.strftime(datetime.now(), '%Y%m%d')))
params.update({'os_rom': 'android', 'apiKey': cls._api_key, '_ts': ts, '_sig': cls.__sign(url=req_url, ts=ts)})
resp = RequestUtils(ua=choice(cls._user_agents), session=cls._session).get_res(url=req_url, params=params)
ts = params.pop(
'_ts',
int(datetime.strftime(datetime.now(), '%Y%m%d'))
)
params.update({
'os_rom': 'android',
'apiKey': self._api_key,
'_ts': ts,
'_sig': self.__sign(url=req_url, ts=ts)
})
resp = RequestUtils(
ua=choice(self._user_agents),
session=self._session
).get_res(url=req_url, params=params)
return resp.json() if resp else {}
def search(self, keyword, start=0, count=20, ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["search"], q=keyword, start=start, count=count, _ts=ts)
def search(self, keyword, start=0, count=20,
ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["search"], q=keyword,
start=start, count=count, _ts=ts)
def movie_search(self, keyword, start=0, count=20, ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["movie_search"], q=keyword, start=start, count=count, _ts=ts)
def movie_search(self, keyword, start=0, count=20,
ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["movie_search"], q=keyword,
start=start, count=count, _ts=ts)
def tv_search(self, keyword, start=0, count=20, ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["tv_search"], q=keyword, start=start, count=count, _ts=ts)
def tv_search(self, keyword, start=0, count=20,
ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["tv_search"], q=keyword,
start=start, count=count, _ts=ts)
def book_search(self, keyword, start=0, count=20, ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["book_search"], q=keyword, start=start, count=count, _ts=ts)
def book_search(self, keyword, start=0, count=20,
ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["book_search"], q=keyword,
start=start, count=count, _ts=ts)
def group_search(self, keyword, start=0, count=20, ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["group_search"], q=keyword, start=start, count=count, _ts=ts)
def group_search(self, keyword, start=0, count=20,
ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["group_search"], q=keyword,
start=start, count=count, _ts=ts)
def movie_showing(self, start=0, count=20, ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["movie_showing"], start=start, count=count, _ts=ts)
def movie_showing(self, start=0, count=20,
ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["movie_showing"],
start=start, count=count, _ts=ts)
def movie_soon(self, start=0, count=20, ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["movie_soon"], start=start, count=count, _ts=ts)
def movie_soon(self, start=0, count=20,
ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["movie_soon"],
start=start, count=count, _ts=ts)
def movie_hot_gaia(self, start=0, count=20, ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["movie_hot_gaia"], start=start, count=count, _ts=ts)
def movie_hot_gaia(self, start=0, count=20,
ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["movie_hot_gaia"],
start=start, count=count, _ts=ts)
def tv_hot(self, start=0, count=20, ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["tv_hot"], start=start, count=count, _ts=ts)
def tv_hot(self, start=0, count=20,
ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["tv_hot"],
start=start, count=count, _ts=ts)
def tv_animation(self, start=0, count=20, ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["tv_animation"], start=start, count=count, _ts=ts)
def tv_animation(self, start=0, count=20,
ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["tv_animation"],
start=start, count=count, _ts=ts)
def tv_variety_show(self, start=0, count=20, ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["tv_variety_show"], start=start, count=count, _ts=ts)
def tv_variety_show(self, start=0, count=20,
ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["tv_variety_show"],
start=start, count=count, _ts=ts)
def tv_rank_list(self, start=0, count=20, ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["tv_rank_list"], start=start, count=count, _ts=ts)
def tv_rank_list(self, start=0, count=20,
ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["tv_rank_list"],
start=start, count=count, _ts=ts)
def show_hot(self, start=0, count=20, ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["show_hot"], start=start, count=count, _ts=ts)
def show_hot(self, start=0, count=20,
ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["show_hot"],
start=start, count=count, _ts=ts)
def movie_detail(self, subject_id):
return self.__invoke(self._urls["movie_detail"] + subject_id)
@ -233,20 +267,30 @@ class DoubanApi(metaclass=Singleton):
def book_detail(self, subject_id):
return self.__invoke(self._urls["book_detail"] + subject_id)
def movie_top250(self, start=0, count=20, ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["movie_top250"], start=start, count=count, _ts=ts)
def movie_top250(self, start=0, count=20,
ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["movie_top250"],
start=start, count=count, _ts=ts)
def movie_recommend(self, tags='', sort='R', start=0, count=20, ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["movie_recommend"], tags=tags, sort=sort, start=start, count=count, _ts=ts)
def movie_recommend(self, tags='', sort='R', start=0, count=20,
ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["movie_recommend"], tags=tags, sort=sort,
start=start, count=count, _ts=ts)
def tv_recommend(self, tags='', sort='R', start=0, count=20, ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["tv_recommend"], tags=tags, sort=sort, start=start, count=count, _ts=ts)
def tv_recommend(self, tags='', sort='R', start=0, count=20,
ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["tv_recommend"], tags=tags, sort=sort,
start=start, count=count, _ts=ts)
def tv_chinese_best_weekly(self, start=0, count=20, ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["tv_chinese_best_weekly"], start=start, count=count, _ts=ts)
def tv_chinese_best_weekly(self, start=0, count=20,
ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["tv_chinese_best_weekly"],
start=start, count=count, _ts=ts)
def tv_global_best_weekly(self, start=0, count=20, ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["tv_global_best_weekly"], start=start, count=count, _ts=ts)
def tv_global_best_weekly(self, start=0, count=20,
ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["tv_global_best_weekly"],
start=start, count=count, _ts=ts)
def doulist_detail(self, subject_id):
"""
@ -255,7 +299,8 @@ class DoubanApi(metaclass=Singleton):
"""
return self.__invoke(self._urls["doulist"] + subject_id)
def doulist_items(self, subject_id, start=0, count=20, ts=datetime.strftime(datetime.now(), '%Y%m%d')):
def doulist_items(self, subject_id, start=0, count=20,
ts=datetime.strftime(datetime.now(), '%Y%m%d')):
"""
豆列列表
:param subject_id: 豆列id
@ -263,4 +308,9 @@ class DoubanApi(metaclass=Singleton):
:param count: 数量
:param ts: 时间戳
"""
return self.__invoke(self._urls["doulist_items"] % subject_id, start=start, count=count, _ts=ts)
return self.__invoke(self._urls["doulist_items"] % subject_id,
start=start, count=count, _ts=ts)
def __del__(self):
if self._session:
self._session.close()

View File

@ -26,12 +26,14 @@ class TMDb(object):
REQUEST_CACHE_MAXSIZE = None
_req = None
_session = None
def __init__(self, obj_cached=True, session=None):
if session is not None:
self._req = RequestUtils(session=session, proxies=self.proxies)
else:
self._req = RequestUtils(session=requests.Session(), proxies=self.proxies)
self._session = requests.Session()
self._req = RequestUtils(session=self._session, proxies=self.proxies)
self._remaining = 40
self._reset = None
self._timeout = 15
@ -208,3 +210,7 @@ class TMDb(object):
if key:
return json.get(key)
return json
def __del__(self):
if self._session:
self._session.close()

View File

@ -841,87 +841,88 @@ class SiteStatistic(_PluginBase):
url = site_info.get("url")
proxy = site_info.get("proxy")
ua = site_info.get("ua")
session = requests.Session()
proxies = settings.PROXY if proxy else None
proxy_server = settings.PROXY_SERVER if proxy else None
render = site_info.get("render")
# 会话管理
with requests.Session() as session:
proxies = settings.PROXY if proxy else None
proxy_server = settings.PROXY_SERVER if proxy else None
render = site_info.get("render")
logger.debug(f"站点 {site_name} url={url} site_cookie={site_cookie} ua={ua}")
if render:
# 演染模式
html_text = PlaywrightHelper().get_page_source(url=url,
cookies=site_cookie,
ua=ua,
proxies=proxy_server)
else:
# 普通模式
res = RequestUtils(cookies=site_cookie,
session=session,
ua=ua,
proxies=proxies
).get_res(url=url)
if res and res.status_code == 200:
if re.search(r"charset=\"?utf-8\"?", res.text, re.IGNORECASE):
res.encoding = "utf-8"
else:
res.encoding = res.apparent_encoding
html_text = res.text
# 第一次登录反爬
if html_text.find("title") == -1:
i = html_text.find("window.location")
if i == -1:
return None
tmp_url = url + html_text[i:html_text.find(";")] \
.replace("\"", "") \
.replace("+", "") \
.replace(" ", "") \
.replace("window.location=", "")
res = RequestUtils(cookies=site_cookie,
session=session,
ua=ua,
proxies=proxies
).get_res(url=tmp_url)
if res and res.status_code == 200:
if "charset=utf-8" in res.text or "charset=UTF-8" in res.text:
res.encoding = "UTF-8"
else:
res.encoding = res.apparent_encoding
html_text = res.text
if not html_text:
return None
else:
logger.error("站点 %s 被反爬限制:%s, 状态码:%s" % (site_name, url, res.status_code))
return None
# 兼容假首页情况,假首页通常没有 <link rel="search" 属性
if '"search"' not in html_text and '"csrf-token"' not in html_text:
res = RequestUtils(cookies=site_cookie,
session=session,
ua=ua,
proxies=proxies
).get_res(url=url + "/index.php")
if res and res.status_code == 200:
if re.search(r"charset=\"?utf-8\"?", res.text, re.IGNORECASE):
res.encoding = "utf-8"
else:
res.encoding = res.apparent_encoding
html_text = res.text
if not html_text:
return None
elif res is not None:
logger.error(f"站点 {site_name} 连接失败,状态码:{res.status_code}")
return None
logger.debug(f"站点 {site_name} url={url} site_cookie={site_cookie} ua={ua}")
if render:
# 演染模式
html_text = PlaywrightHelper().get_page_source(url=url,
cookies=site_cookie,
ua=ua,
proxies=proxy_server)
else:
logger.error(f"站点 {site_name} 无法访问:{url}")
return None
# 解析站点类型
if html_text:
site_schema = self.__build_class(html_text)
if not site_schema:
logger.error("站点 %s 无法识别站点类型" % site_name)
return None
return site_schema(site_name, url, site_cookie, html_text, session=session, ua=ua, proxy=proxy)
return None
# 普通模式
res = RequestUtils(cookies=site_cookie,
session=session,
ua=ua,
proxies=proxies
).get_res(url=url)
if res and res.status_code == 200:
if re.search(r"charset=\"?utf-8\"?", res.text, re.IGNORECASE):
res.encoding = "utf-8"
else:
res.encoding = res.apparent_encoding
html_text = res.text
# 第一次登录反爬
if html_text.find("title") == -1:
i = html_text.find("window.location")
if i == -1:
return None
tmp_url = url + html_text[i:html_text.find(";")] \
.replace("\"", "") \
.replace("+", "") \
.replace(" ", "") \
.replace("window.location=", "")
res = RequestUtils(cookies=site_cookie,
session=session,
ua=ua,
proxies=proxies
).get_res(url=tmp_url)
if res and res.status_code == 200:
if "charset=utf-8" in res.text or "charset=UTF-8" in res.text:
res.encoding = "UTF-8"
else:
res.encoding = res.apparent_encoding
html_text = res.text
if not html_text:
return None
else:
logger.error("站点 %s 被反爬限制:%s, 状态码:%s" % (site_name, url, res.status_code))
return None
# 兼容假首页情况,假首页通常没有 <link rel="search" 属性
if '"search"' not in html_text and '"csrf-token"' not in html_text:
res = RequestUtils(cookies=site_cookie,
session=session,
ua=ua,
proxies=proxies
).get_res(url=url + "/index.php")
if res and res.status_code == 200:
if re.search(r"charset=\"?utf-8\"?", res.text, re.IGNORECASE):
res.encoding = "utf-8"
else:
res.encoding = res.apparent_encoding
html_text = res.text
if not html_text:
return None
elif res is not None:
logger.error(f"站点 {site_name} 连接失败,状态码:{res.status_code}")
return None
else:
logger.error(f"站点 {site_name} 无法访问:{url}")
return None
# 解析站点类型
if html_text:
site_schema = self.__build_class(html_text)
if not site_schema:
logger.error("站点 %s 无法识别站点类型" % site_name)
return None
return site_schema(site_name, url, site_cookie, html_text, session=session, ua=ua, proxy=proxy)
return None
def refresh_by_domain(self, domain: str) -> schemas.Response:
"""

View File

@ -6,7 +6,6 @@ from enum import Enum
from typing import Optional
from urllib.parse import urljoin, urlsplit
import requests
from requests import Session
from app.core.config import settings
@ -107,7 +106,7 @@ class ISiteUserInfo(metaclass=ABCMeta):
self._base_url = f"{split_url.scheme}://{split_url.netloc}"
self._site_cookie = site_cookie
self._index_html = index_html
self._session = session if session else requests.Session()
self._session = session if session else None
self._ua = ua
self._emulate = emulate