fix requests session

This commit is contained in:
jxxghp
2023-10-05 17:21:59 -07:00
parent 1a49c7c59e
commit 5bcd90c569
7 changed files with 190 additions and 134 deletions

View File

@ -2,7 +2,7 @@ from pathlib import Path
from typing import Any, List, Optional from typing import Any, List, Optional
from fastapi import APIRouter, Depends from fastapi import APIRouter, Depends
from requests import Session from sqlalchemy.orm import Session
from app import schemas from app import schemas
from app.chain.dashboard import DashboardChain from app.chain.dashboard import DashboardChain

View File

@ -1,7 +1,7 @@
from typing import Any, List from typing import Any, List
from fastapi import APIRouter, HTTPException, Depends from fastapi import APIRouter, HTTPException, Depends
from requests import Session from sqlalchemy.orm import Session
from app import schemas from app import schemas
from app.chain.media import MediaChain from app.chain.media import MediaChain

View File

@ -3,7 +3,7 @@ import re
from datetime import datetime from datetime import datetime
from typing import Dict, List, Optional, Union, Tuple from typing import Dict, List, Optional, Union, Tuple
from requests import Session from sqlalchemy.orm import Session
from app.chain import ChainBase from app.chain import ChainBase
from app.chain.download import DownloadChain from app.chain.download import DownloadChain

View File

@ -146,10 +146,10 @@ class DoubanApi(metaclass=Singleton):
_api_secret_key = "bf7dddc7c9cfe6f7" _api_secret_key = "bf7dddc7c9cfe6f7"
_api_key = "0dad551ec0f84ed02907ff5c42e8ec70" _api_key = "0dad551ec0f84ed02907ff5c42e8ec70"
_base_url = "https://frodo.douban.com/api/v2" _base_url = "https://frodo.douban.com/api/v2"
_session = requests.Session() _session = None
def __init__(self): def __init__(self):
pass self._session = requests.Session()
@classmethod @classmethod
def __sign(cls, url: str, ts: int, method='GET') -> str: def __sign(cls, url: str, ts: int, method='GET') -> str:
@ -163,60 +163,94 @@ class DoubanApi(metaclass=Singleton):
).digest() ).digest()
).decode() ).decode()
@classmethod
@lru_cache(maxsize=settings.CACHE_CONF.get('douban')) @lru_cache(maxsize=settings.CACHE_CONF.get('douban'))
def __invoke(cls, url, **kwargs): def __invoke(self, url, **kwargs):
req_url = cls._base_url + url req_url = self._base_url + url
params = {'apiKey': cls._api_key} params = {'apiKey': self._api_key}
if kwargs: if kwargs:
params.update(kwargs) params.update(kwargs)
ts = params.pop('_ts', int(datetime.strftime(datetime.now(), '%Y%m%d'))) ts = params.pop(
params.update({'os_rom': 'android', 'apiKey': cls._api_key, '_ts': ts, '_sig': cls.__sign(url=req_url, ts=ts)}) '_ts',
int(datetime.strftime(datetime.now(), '%Y%m%d'))
resp = RequestUtils(ua=choice(cls._user_agents), session=cls._session).get_res(url=req_url, params=params) )
params.update({
'os_rom': 'android',
'apiKey': self._api_key,
'_ts': ts,
'_sig': self.__sign(url=req_url, ts=ts)
})
resp = RequestUtils(
ua=choice(self._user_agents),
session=self._session
).get_res(url=req_url, params=params)
return resp.json() if resp else {} return resp.json() if resp else {}
def search(self, keyword, start=0, count=20, ts=datetime.strftime(datetime.now(), '%Y%m%d')): def search(self, keyword, start=0, count=20,
return self.__invoke(self._urls["search"], q=keyword, start=start, count=count, _ts=ts) ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["search"], q=keyword,
start=start, count=count, _ts=ts)
def movie_search(self, keyword, start=0, count=20, ts=datetime.strftime(datetime.now(), '%Y%m%d')): def movie_search(self, keyword, start=0, count=20,
return self.__invoke(self._urls["movie_search"], q=keyword, start=start, count=count, _ts=ts) ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["movie_search"], q=keyword,
start=start, count=count, _ts=ts)
def tv_search(self, keyword, start=0, count=20, ts=datetime.strftime(datetime.now(), '%Y%m%d')): def tv_search(self, keyword, start=0, count=20,
return self.__invoke(self._urls["tv_search"], q=keyword, start=start, count=count, _ts=ts) ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["tv_search"], q=keyword,
start=start, count=count, _ts=ts)
def book_search(self, keyword, start=0, count=20, ts=datetime.strftime(datetime.now(), '%Y%m%d')): def book_search(self, keyword, start=0, count=20,
return self.__invoke(self._urls["book_search"], q=keyword, start=start, count=count, _ts=ts) ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["book_search"], q=keyword,
start=start, count=count, _ts=ts)
def group_search(self, keyword, start=0, count=20, ts=datetime.strftime(datetime.now(), '%Y%m%d')): def group_search(self, keyword, start=0, count=20,
return self.__invoke(self._urls["group_search"], q=keyword, start=start, count=count, _ts=ts) ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["group_search"], q=keyword,
start=start, count=count, _ts=ts)
def movie_showing(self, start=0, count=20, ts=datetime.strftime(datetime.now(), '%Y%m%d')): def movie_showing(self, start=0, count=20,
return self.__invoke(self._urls["movie_showing"], start=start, count=count, _ts=ts) ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["movie_showing"],
start=start, count=count, _ts=ts)
def movie_soon(self, start=0, count=20, ts=datetime.strftime(datetime.now(), '%Y%m%d')): def movie_soon(self, start=0, count=20,
return self.__invoke(self._urls["movie_soon"], start=start, count=count, _ts=ts) ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["movie_soon"],
start=start, count=count, _ts=ts)
def movie_hot_gaia(self, start=0, count=20, ts=datetime.strftime(datetime.now(), '%Y%m%d')): def movie_hot_gaia(self, start=0, count=20,
return self.__invoke(self._urls["movie_hot_gaia"], start=start, count=count, _ts=ts) ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["movie_hot_gaia"],
start=start, count=count, _ts=ts)
def tv_hot(self, start=0, count=20, ts=datetime.strftime(datetime.now(), '%Y%m%d')): def tv_hot(self, start=0, count=20,
return self.__invoke(self._urls["tv_hot"], start=start, count=count, _ts=ts) ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["tv_hot"],
start=start, count=count, _ts=ts)
def tv_animation(self, start=0, count=20, ts=datetime.strftime(datetime.now(), '%Y%m%d')): def tv_animation(self, start=0, count=20,
return self.__invoke(self._urls["tv_animation"], start=start, count=count, _ts=ts) ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["tv_animation"],
start=start, count=count, _ts=ts)
def tv_variety_show(self, start=0, count=20, ts=datetime.strftime(datetime.now(), '%Y%m%d')): def tv_variety_show(self, start=0, count=20,
return self.__invoke(self._urls["tv_variety_show"], start=start, count=count, _ts=ts) ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["tv_variety_show"],
start=start, count=count, _ts=ts)
def tv_rank_list(self, start=0, count=20, ts=datetime.strftime(datetime.now(), '%Y%m%d')): def tv_rank_list(self, start=0, count=20,
return self.__invoke(self._urls["tv_rank_list"], start=start, count=count, _ts=ts) ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["tv_rank_list"],
start=start, count=count, _ts=ts)
def show_hot(self, start=0, count=20, ts=datetime.strftime(datetime.now(), '%Y%m%d')): def show_hot(self, start=0, count=20,
return self.__invoke(self._urls["show_hot"], start=start, count=count, _ts=ts) ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["show_hot"],
start=start, count=count, _ts=ts)
def movie_detail(self, subject_id): def movie_detail(self, subject_id):
return self.__invoke(self._urls["movie_detail"] + subject_id) return self.__invoke(self._urls["movie_detail"] + subject_id)
@ -233,20 +267,30 @@ class DoubanApi(metaclass=Singleton):
def book_detail(self, subject_id): def book_detail(self, subject_id):
return self.__invoke(self._urls["book_detail"] + subject_id) return self.__invoke(self._urls["book_detail"] + subject_id)
def movie_top250(self, start=0, count=20, ts=datetime.strftime(datetime.now(), '%Y%m%d')): def movie_top250(self, start=0, count=20,
return self.__invoke(self._urls["movie_top250"], start=start, count=count, _ts=ts) ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["movie_top250"],
start=start, count=count, _ts=ts)
def movie_recommend(self, tags='', sort='R', start=0, count=20, ts=datetime.strftime(datetime.now(), '%Y%m%d')): def movie_recommend(self, tags='', sort='R', start=0, count=20,
return self.__invoke(self._urls["movie_recommend"], tags=tags, sort=sort, start=start, count=count, _ts=ts) ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["movie_recommend"], tags=tags, sort=sort,
start=start, count=count, _ts=ts)
def tv_recommend(self, tags='', sort='R', start=0, count=20, ts=datetime.strftime(datetime.now(), '%Y%m%d')): def tv_recommend(self, tags='', sort='R', start=0, count=20,
return self.__invoke(self._urls["tv_recommend"], tags=tags, sort=sort, start=start, count=count, _ts=ts) ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["tv_recommend"], tags=tags, sort=sort,
start=start, count=count, _ts=ts)
def tv_chinese_best_weekly(self, start=0, count=20, ts=datetime.strftime(datetime.now(), '%Y%m%d')): def tv_chinese_best_weekly(self, start=0, count=20,
return self.__invoke(self._urls["tv_chinese_best_weekly"], start=start, count=count, _ts=ts) ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["tv_chinese_best_weekly"],
start=start, count=count, _ts=ts)
def tv_global_best_weekly(self, start=0, count=20, ts=datetime.strftime(datetime.now(), '%Y%m%d')): def tv_global_best_weekly(self, start=0, count=20,
return self.__invoke(self._urls["tv_global_best_weekly"], start=start, count=count, _ts=ts) ts=datetime.strftime(datetime.now(), '%Y%m%d')):
return self.__invoke(self._urls["tv_global_best_weekly"],
start=start, count=count, _ts=ts)
def doulist_detail(self, subject_id): def doulist_detail(self, subject_id):
""" """
@ -255,7 +299,8 @@ class DoubanApi(metaclass=Singleton):
""" """
return self.__invoke(self._urls["doulist"] + subject_id) return self.__invoke(self._urls["doulist"] + subject_id)
def doulist_items(self, subject_id, start=0, count=20, ts=datetime.strftime(datetime.now(), '%Y%m%d')): def doulist_items(self, subject_id, start=0, count=20,
ts=datetime.strftime(datetime.now(), '%Y%m%d')):
""" """
豆列列表 豆列列表
:param subject_id: 豆列id :param subject_id: 豆列id
@ -263,4 +308,9 @@ class DoubanApi(metaclass=Singleton):
:param count: 数量 :param count: 数量
:param ts: 时间戳 :param ts: 时间戳
""" """
return self.__invoke(self._urls["doulist_items"] % subject_id, start=start, count=count, _ts=ts) return self.__invoke(self._urls["doulist_items"] % subject_id,
start=start, count=count, _ts=ts)
def __del__(self):
if self._session:
self._session.close()

View File

@ -26,12 +26,14 @@ class TMDb(object):
REQUEST_CACHE_MAXSIZE = None REQUEST_CACHE_MAXSIZE = None
_req = None _req = None
_session = None
def __init__(self, obj_cached=True, session=None): def __init__(self, obj_cached=True, session=None):
if session is not None: if session is not None:
self._req = RequestUtils(session=session, proxies=self.proxies) self._req = RequestUtils(session=session, proxies=self.proxies)
else: else:
self._req = RequestUtils(session=requests.Session(), proxies=self.proxies) self._session = requests.Session()
self._req = RequestUtils(session=self._session, proxies=self.proxies)
self._remaining = 40 self._remaining = 40
self._reset = None self._reset = None
self._timeout = 15 self._timeout = 15
@ -208,3 +210,7 @@ class TMDb(object):
if key: if key:
return json.get(key) return json.get(key)
return json return json
def __del__(self):
if self._session:
self._session.close()

View File

@ -841,87 +841,88 @@ class SiteStatistic(_PluginBase):
url = site_info.get("url") url = site_info.get("url")
proxy = site_info.get("proxy") proxy = site_info.get("proxy")
ua = site_info.get("ua") ua = site_info.get("ua")
session = requests.Session() # 会话管理
proxies = settings.PROXY if proxy else None with requests.Session() as session:
proxy_server = settings.PROXY_SERVER if proxy else None proxies = settings.PROXY if proxy else None
render = site_info.get("render") proxy_server = settings.PROXY_SERVER if proxy else None
render = site_info.get("render")
logger.debug(f"站点 {site_name} url={url} site_cookie={site_cookie} ua={ua}") logger.debug(f"站点 {site_name} url={url} site_cookie={site_cookie} ua={ua}")
if render: if render:
# 演染模式 # 演染模式
html_text = PlaywrightHelper().get_page_source(url=url, html_text = PlaywrightHelper().get_page_source(url=url,
cookies=site_cookie, cookies=site_cookie,
ua=ua, ua=ua,
proxies=proxy_server) proxies=proxy_server)
else:
# 普通模式
res = RequestUtils(cookies=site_cookie,
session=session,
ua=ua,
proxies=proxies
).get_res(url=url)
if res and res.status_code == 200:
if re.search(r"charset=\"?utf-8\"?", res.text, re.IGNORECASE):
res.encoding = "utf-8"
else:
res.encoding = res.apparent_encoding
html_text = res.text
# 第一次登录反爬
if html_text.find("title") == -1:
i = html_text.find("window.location")
if i == -1:
return None
tmp_url = url + html_text[i:html_text.find(";")] \
.replace("\"", "") \
.replace("+", "") \
.replace(" ", "") \
.replace("window.location=", "")
res = RequestUtils(cookies=site_cookie,
session=session,
ua=ua,
proxies=proxies
).get_res(url=tmp_url)
if res and res.status_code == 200:
if "charset=utf-8" in res.text or "charset=UTF-8" in res.text:
res.encoding = "UTF-8"
else:
res.encoding = res.apparent_encoding
html_text = res.text
if not html_text:
return None
else:
logger.error("站点 %s 被反爬限制:%s, 状态码:%s" % (site_name, url, res.status_code))
return None
# 兼容假首页情况,假首页通常没有 <link rel="search" 属性
if '"search"' not in html_text and '"csrf-token"' not in html_text:
res = RequestUtils(cookies=site_cookie,
session=session,
ua=ua,
proxies=proxies
).get_res(url=url + "/index.php")
if res and res.status_code == 200:
if re.search(r"charset=\"?utf-8\"?", res.text, re.IGNORECASE):
res.encoding = "utf-8"
else:
res.encoding = res.apparent_encoding
html_text = res.text
if not html_text:
return None
elif res is not None:
logger.error(f"站点 {site_name} 连接失败,状态码:{res.status_code}")
return None
else: else:
logger.error(f"站点 {site_name} 无法访问:{url}") # 普通模式
return None res = RequestUtils(cookies=site_cookie,
# 解析站点类型 session=session,
if html_text: ua=ua,
site_schema = self.__build_class(html_text) proxies=proxies
if not site_schema: ).get_res(url=url)
logger.error("站点 %s 无法识别站点类型" % site_name) if res and res.status_code == 200:
return None if re.search(r"charset=\"?utf-8\"?", res.text, re.IGNORECASE):
return site_schema(site_name, url, site_cookie, html_text, session=session, ua=ua, proxy=proxy) res.encoding = "utf-8"
return None else:
res.encoding = res.apparent_encoding
html_text = res.text
# 第一次登录反爬
if html_text.find("title") == -1:
i = html_text.find("window.location")
if i == -1:
return None
tmp_url = url + html_text[i:html_text.find(";")] \
.replace("\"", "") \
.replace("+", "") \
.replace(" ", "") \
.replace("window.location=", "")
res = RequestUtils(cookies=site_cookie,
session=session,
ua=ua,
proxies=proxies
).get_res(url=tmp_url)
if res and res.status_code == 200:
if "charset=utf-8" in res.text or "charset=UTF-8" in res.text:
res.encoding = "UTF-8"
else:
res.encoding = res.apparent_encoding
html_text = res.text
if not html_text:
return None
else:
logger.error("站点 %s 被反爬限制:%s, 状态码:%s" % (site_name, url, res.status_code))
return None
# 兼容假首页情况,假首页通常没有 <link rel="search" 属性
if '"search"' not in html_text and '"csrf-token"' not in html_text:
res = RequestUtils(cookies=site_cookie,
session=session,
ua=ua,
proxies=proxies
).get_res(url=url + "/index.php")
if res and res.status_code == 200:
if re.search(r"charset=\"?utf-8\"?", res.text, re.IGNORECASE):
res.encoding = "utf-8"
else:
res.encoding = res.apparent_encoding
html_text = res.text
if not html_text:
return None
elif res is not None:
logger.error(f"站点 {site_name} 连接失败,状态码:{res.status_code}")
return None
else:
logger.error(f"站点 {site_name} 无法访问:{url}")
return None
# 解析站点类型
if html_text:
site_schema = self.__build_class(html_text)
if not site_schema:
logger.error("站点 %s 无法识别站点类型" % site_name)
return None
return site_schema(site_name, url, site_cookie, html_text, session=session, ua=ua, proxy=proxy)
return None
def refresh_by_domain(self, domain: str) -> schemas.Response: def refresh_by_domain(self, domain: str) -> schemas.Response:
""" """

View File

@ -6,7 +6,6 @@ from enum import Enum
from typing import Optional from typing import Optional
from urllib.parse import urljoin, urlsplit from urllib.parse import urljoin, urlsplit
import requests
from requests import Session from requests import Session
from app.core.config import settings from app.core.config import settings
@ -107,7 +106,7 @@ class ISiteUserInfo(metaclass=ABCMeta):
self._base_url = f"{split_url.scheme}://{split_url.netloc}" self._base_url = f"{split_url.scheme}://{split_url.netloc}"
self._site_cookie = site_cookie self._site_cookie = site_cookie
self._index_html = index_html self._index_html = index_html
self._session = session if session else requests.Session() self._session = session if session else None
self._ua = ua self._ua = ua
self._emulate = emulate self._emulate = emulate