fix rsslink helper

This commit is contained in:
jxxghp 2023-09-11 17:13:26 +08:00
parent c45f5e6ac4
commit 08aef1f47f
2 changed files with 281 additions and 225 deletions

View File

@ -10,10 +10,10 @@ from app.chain.site import SiteChain
from app.core.config import settings from app.core.config import settings
from app.db.site_oper import SiteOper from app.db.site_oper import SiteOper
from app.db.siteicon_oper import SiteIconOper from app.db.siteicon_oper import SiteIconOper
from app.helper.browser import PlaywrightHelper
from app.helper.cloudflare import under_challenge from app.helper.cloudflare import under_challenge
from app.helper.cookiecloud import CookieCloudHelper from app.helper.cookiecloud import CookieCloudHelper
from app.helper.message import MessageHelper from app.helper.message import MessageHelper
from app.helper.rss import RssHelper
from app.helper.sites import SitesHelper from app.helper.sites import SitesHelper
from app.log import logger from app.log import logger
from app.schemas import Notification, NotificationType, MessageChannel from app.schemas import Notification, NotificationType, MessageChannel
@ -31,6 +31,7 @@ class CookieCloudChain(ChainBase):
self.siteoper = SiteOper(self._db) self.siteoper = SiteOper(self._db)
self.siteiconoper = SiteIconOper(self._db) self.siteiconoper = SiteIconOper(self._db)
self.siteshelper = SitesHelper() self.siteshelper = SitesHelper()
self.rsshelper = RssHelper()
self.sitechain = SiteChain(self._db) self.sitechain = SiteChain(self._db)
self.message = MessageHelper() self.message = MessageHelper()
self.cookiecloud = CookieCloudHelper( self.cookiecloud = CookieCloudHelper(
@ -82,10 +83,17 @@ class CookieCloudChain(ChainBase):
logger.info(f"站点【{site_info.name}】连通性正常不同步CookieCloud数据") logger.info(f"站点【{site_info.name}】连通性正常不同步CookieCloud数据")
if not site_info.public and not site_info.rss: if not site_info.public and not site_info.rss:
# 自动生成rss地址 # 自动生成rss地址
rss_url = self.__get_rss(url=site_info.url, cookie=cookie, ua=settings.USER_AGENT, rss_url, errmsg = self.rsshelper.get_rss_link(
proxy=site_info.proxy) url=site_info.url,
# 更新站点rss地址 cookie=cookie,
self.siteoper.update_rss(domain=domain, rss=rss_url) ua=settings.USER_AGENT,
proxy=True if site_info.proxy else False
)
if rss_url:
# 更新站点rss地址
self.siteoper.update_rss(domain=domain, rss=rss_url)
else:
logger.warn(errmsg)
continue continue
# 更新站点Cookie # 更新站点Cookie
self.siteoper.update_cookie(domain=domain, cookies=cookie) self.siteoper.update_cookie(domain=domain, cookies=cookie)
@ -115,7 +123,12 @@ class CookieCloudChain(ChainBase):
rss_url = None rss_url = None
if not indexer.get("public") and indexer.get("domain"): if not indexer.get("public") and indexer.get("domain"):
# 自动生成rss地址 # 自动生成rss地址
rss_url = self.__get_rss(url=indexer.get("domain"), cookie=cookie, ua=settings.USER_AGENT) rss_url, errmsg = self.rsshelper.get_rss_link(url=indexer.get("domain"),
cookie=cookie,
ua=settings.USER_AGENT)
if errmsg:
logger.warn(errmsg)
# 插入数据库
self.siteoper.add(name=indexer.get("name"), self.siteoper.add(name=indexer.get("name"),
url=indexer.get("domain"), url=indexer.get("domain"),
domain=domain, domain=domain,
@ -148,223 +161,6 @@ class CookieCloudChain(ChainBase):
logger.info(f"CookieCloud同步成功{ret_msg}") logger.info(f"CookieCloud同步成功{ret_msg}")
return True, ret_msg return True, ret_msg
def __get_rss(self, url: str, cookie: str, ua: str, proxy: int) -> str:
"""
获取站点rss地址
"""
if "ourbits.club" in url:
return self.__get_rss_ourbits(url=url, cookie=cookie, ua=ua, proxy=proxy)
if "totheglory.im" in url:
return self.__get_rss_ttg(url=url, cookie=cookie, ua=ua, proxy=proxy)
if "monikadesign.uk" in url:
return self.__get_rss_monika(url=url, cookie=cookie, ua=ua, proxy=proxy)
if "zhuque.in" in url:
return self.__get_rss_zhuque(url=url, cookie=cookie, ua=ua, proxy=proxy)
xpath = "//a[@class='faqlink']/@href"
if "club.hares.top" in url:
xpath = "//*[@id='layui-layer100001']/div[2]/div/p[4]/a/@href"
if "et8.org" in url:
xpath = "//*[@id='outer']/table/tbody/tr/td/table/tbody/tr/td/a[2]/@href"
if "pttime.org" in url:
xpath = "//*[@id='outer']/table/tbody/tr/td/table/tbody/tr/td/text()[5]"
return self.__get_rss_base(url=url, cookie=cookie, ua=ua, xpath=xpath, proxy=proxy)
def __get_rss_base(self, url: str, cookie: str, ua: str, xpath: str, proxy: int) -> str:
"""
默认获取站点rss地址
"""
try:
get_rss_url = urljoin(url, "getrss.php")
rss_data = self.__get_rss_data(url)
res = RequestUtils(cookies=cookie,
timeout=60,
ua=ua,
proxies=settings.PROXY if proxy else None).post_res(
url=get_rss_url, data=rss_data)
if res:
html_text = res.text
else:
logger.error(f"获取rss失败{url}")
return ""
html = etree.HTML(html_text)
if html:
rss_link = html.xpath(xpath)
if rss_link:
return str(rss_link[-1])
return ""
except Exception as e:
print(str(e))
return ""
def __get_rss_ttg(self, url: str, cookie: str, ua: str, proxy: int) -> str:
"""
获取ttg rss地址
"""
try:
get_rss_url = urljoin(url,
"rsstools.php?c51=51&c52=52&c53=53&c54=54&c108=108&c109=109&c62=62&c63=63&c67=67&c69=69&c70=70&c73=73&c76=76&c75=75&c74=74&c87=87&c88=88&c99=99&c90=90&c58=58&c103=103&c101=101&c60=60")
res = RequestUtils(cookies=cookie,
timeout=60,
ua=ua,
proxies=settings.PROXY if proxy else None).get_res(url=get_rss_url)
if res:
html_text = res.text
else:
logger.error(f"获取rss失败{url}")
return ""
html = etree.HTML(html_text)
if html:
rss_link = html.xpath("//textarea/text()")
if rss_link:
return str(rss_link[-1])
return ""
except Exception as e:
print(str(e))
return ""
def __get_rss_monika(self, url: str, cookie: str, ua: str, proxy: int) -> str:
"""
获取monikadesign rss地址
"""
try:
get_rss_url = urljoin(url, "rss")
res = RequestUtils(cookies=cookie,
timeout=60,
ua=ua,
proxies=settings.PROXY if proxy else None).get_res(url=get_rss_url)
if res:
html_text = res.text
else:
logger.error(f"获取rss失败{url}")
return ""
html = etree.HTML(html_text)
if html:
rss_link = html.xpath("//a/@href")
if rss_link:
return str(rss_link[0])
return ""
except Exception as e:
print(str(e))
return ""
def __get_rss_ourbits(self, url: str, cookie: str, ua: str, proxy: int) -> str:
"""
获取我堡rss地址
"""
try:
get_rss_url = urljoin(url, "getrss.php")
html_text = PlaywrightHelper().get_page_source(url=get_rss_url,
cookies=cookie,
ua=ua,
proxies=settings.PROXY if proxy else None)
if html_text:
html = etree.HTML(html_text)
if html:
rss_link = html.xpath("//a[@class='gen_rsslink']/@href")
if rss_link:
return str(rss_link[-1])
return ""
except Exception as e:
print(str(e))
return ""
def __get_rss_zhuque(self, url: str, cookie: str, ua: str, proxy: int) -> str:
"""
获取zhuque rss地址
"""
try:
get_rss_url = urljoin(url, "user/rss")
html_text = PlaywrightHelper().get_page_source(url=get_rss_url,
cookies=cookie,
ua=ua,
proxies=settings.PROXY if proxy else None)
if html_text:
html = etree.HTML(html_text)
if html:
rss_link = html.xpath("//a/@href")
if rss_link:
return str(rss_link[-1])
return ""
except Exception as e:
print(str(e))
return ""
@staticmethod
def __get_rss_data(url: str) -> dict:
"""
获取请求rss的参数有的站不太一样后续不断维护
"""
_rss_data = {
"inclbookmarked": 0,
"itemsmalldescr": 1,
"showrows": 50,
"search_mode": 1,
}
if 'hdchina.org' in url:
# 显示下载框 0全部 1仅下载框
_rss_data['rsscart'] = 0
if 'audiences.me' in url:
# 种子类型 1新种与重置顶旧种 0只包含新种
_rss_data['torrent_type'] = 1
# RSS链接有效期 180天
_rss_data['exp'] = 180
if 'shadowflow.org' in url:
# 下载需扣除魔力 0不需要 1需要 2全部
_rss_data['paid'] = 0
_rss_data['search_mode'] = 0
_rss_data['showrows'] = 30
if 'hddolby.com' in url:
# RSS链接有效期 180天
_rss_data['exp'] = 180
if 'hdhome.org' in url:
# RSS链接有效期 180天
_rss_data['exp'] = 180
if 'pthome.net' in url:
# RSS链接有效期 180天
_rss_data['exp'] = 180
if 'ptsbao.club' in url:
_rss_data['size'] = 0
if 'leaves.red' in url:
# 下载需扣除魔力 0不需要 1需要 2全部
_rss_data['paid'] = 2
_rss_data['search_mode'] = 0
if 'hdtime.org' in url:
_rss_data['search_mode'] = 0
if 'kp.m-team.cc' in url:
_rss_data = {
"showrows": 50,
"inclbookmarked": 0,
"itemsmalldescr": 1,
"https": 1
}
if 'u2.dmhy.org' in url:
# 显示自动通过的种子 0不显示自动通过的种子 1全部
_rss_data['inclautochecked'] = 1
# Tracker SSL 0不使用SSL 1使用SSL
_rss_data['trackerssl'] = 1
if 'www.pttime.org' in url:
_rss_data = {
"showrows": 10,
"inclbookmarked": 0,
"itemsmalldescr": 1
}
return _rss_data
@staticmethod @staticmethod
def __parse_favicon(url: str, cookie: str, ua: str) -> Tuple[str, Optional[str]]: def __parse_favicon(url: str, cookie: str, ua: str) -> Tuple[str, Optional[str]]:
""" """

View File

@ -1,14 +1,225 @@
import xml.dom.minidom import xml.dom.minidom
from typing import List from typing import List, Tuple
from urllib.parse import urljoin
from lxml import etree
from app.core.config import settings from app.core.config import settings
from app.helper.browser import PlaywrightHelper
from app.utils.dom import DomUtils from app.utils.dom import DomUtils
from app.utils.http import RequestUtils from app.utils.http import RequestUtils
from app.utils.string import StringUtils from app.utils.string import StringUtils
class RssHelper: class RssHelper:
"""
RSS帮助类解析RSS报文获取RSS地址等
"""
# 各站点RSS链接获取配置
rss_link_conf = {
"default": {
"xpath": "//a[@class='faqlink']/@href",
"url": "getrss.php",
"params": {
"inclbookmarked": 0,
"itemsmalldescr": 1,
"showrows": 50,
"search_mode": 1,
}
},
"hares.top": {
"xpath": "//*[@id='layui-layer100001']/div[2]/div/p[4]/a/@href",
"url": "getrss.php",
"params": {
"inclbookmarked": 0,
"itemsmalldescr": 1,
"showrows": 50,
"search_mode": 1,
}
},
"et8.org": {
"xpath": "//*[@id='outer']/table/tbody/tr/td/table/tbody/tr/td/a[2]/@href",
"url": "getrss.php",
"params": {
"inclbookmarked": 0,
"itemsmalldescr": 1,
"showrows": 50,
"search_mode": 1,
}
},
"pttime.org": {
"xpath": "//*[@id='outer']/table/tbody/tr/td/table/tbody/tr/td/text()[5]",
"url": "getrss.php",
"params": {
"showrows": 10,
"inclbookmarked": 0,
"itemsmalldescr": 1
}
},
"ourbits.club": {
"xpath": "//a[@class='gen_rsslink']/@href",
"url": "getrss.php",
"params": {
"inclbookmarked": 0,
"itemsmalldescr": 1,
"showrows": 50,
"search_mode": 1,
}
},
"totheglory.im": {
"xpath": "//textarea/text()",
"url": "rsstools.php?c51=51&c52=52&c53=53&c54=54&c108=108&c109=109&c62=62&c63=63&c67=67&c69=69&c70=70&c73=73&c76=76&c75=75&c74=74&c87=87&c88=88&c99=99&c90=90&c58=58&c103=103&c101=101&c60=60",
"params": {
"inclbookmarked": 0,
"itemsmalldescr": 1,
"showrows": 50,
"search_mode": 1,
}
},
"monikadesign.uk": {
"xpath": "//a/@href",
"url": "rss",
"params": {
"inclbookmarked": 0,
"itemsmalldescr": 1,
"showrows": 50,
"search_mode": 1,
}
},
"zhuque.in": {
"xpath": "//a/@href",
"url": "user/rss",
"render": True,
"params": {
"inclbookmarked": 0,
"itemsmalldescr": 1,
"showrows": 50,
"search_mode": 1,
}
},
"hdchina.org": {
"xpath": "//a[@class='faqlink']/@href",
"url": "getrss.php",
"params": {
"inclbookmarked": 0,
"itemsmalldescr": 1,
"showrows": 50,
"search_mode": 1,
"rsscart": 0
}
},
"audiences.me": {
"xpath": "//a[@class='faqlink']/@href",
"url": "getrss.php",
"params": {
"inclbookmarked": 0,
"itemsmalldescr": 1,
"showrows": 50,
"search_mode": 1,
"torrent_type": 1,
"exp": 180
}
},
"shadowflow.org": {
"xpath": "//a[@class='faqlink']/@href",
"url": "getrss.php",
"params": {
"inclbookmarked": 0,
"itemsmalldescr": 1,
"paid": 0,
"search_mode": 0,
"showrows": 30
}
},
"hddolby.com": {
"xpath": "//a[@class='faqlink']/@href",
"url": "getrss.php",
"params": {
"inclbookmarked": 0,
"itemsmalldescr": 1,
"showrows": 50,
"search_mode": 1,
"exp": 180
}
},
"hdhome.org": {
"xpath": "//a[@class='faqlink']/@href",
"url": "getrss.php",
"params": {
"inclbookmarked": 0,
"itemsmalldescr": 1,
"showrows": 50,
"search_mode": 1,
"exp": 180
}
},
"pthome.net": {
"xpath": "//a[@class='faqlink']/@href",
"url": "getrss.php",
"params": {
"inclbookmarked": 0,
"itemsmalldescr": 1,
"showrows": 50,
"search_mode": 1,
"exp": 180
}
},
"ptsbao.club": {
"xpath": "//a[@class='faqlink']/@href",
"url": "getrss.php",
"params": {
"inclbookmarked": 0,
"itemsmalldescr": 1,
"showrows": 50,
"search_mode": 1,
"size": 0
}
},
"leaves.red": {
"xpath": "//a[@class='faqlink']/@href",
"url": "getrss.php",
"params": {
"inclbookmarked": 0,
"itemsmalldescr": 1,
"showrows": 50,
"search_mode": 0,
"paid": 2
}
},
"hdtime.org": {
"xpath": "//a[@class='faqlink']/@href",
"url": "getrss.php",
"params": {
"inclbookmarked": 0,
"itemsmalldescr": 1,
"showrows": 50,
"search_mode": 0,
}
},
"m-team.io": {
"xpath": "//a[@class='faqlink']/@href",
"url": "getrss.php",
"params": {
"showrows": 50,
"inclbookmarked": 0,
"itemsmalldescr": 1,
"https": 1
}
},
"u2.dmhy.org": {
"xpath": "//a[@class='faqlink']/@href",
"url": "getrss.php",
"params": {
"inclbookmarked": 0,
"itemsmalldescr": 1,
"showrows": 50,
"search_mode": 1,
"inclautochecked": 1,
"trackerssl": 1
}
},
}
@staticmethod @staticmethod
def parse(url, proxy: bool = False) -> List[dict]: def parse(url, proxy: bool = False) -> List[dict]:
""" """
@ -78,3 +289,52 @@ class RssHelper:
except Exception as e2: except Exception as e2:
print(str(e2)) print(str(e2))
return ret_array return ret_array
def get_rss_link(self, url: str, cookie: str, ua: str, proxy: bool = False) -> Tuple[str, str]:
"""
获取站点rss地址
:param url: 站点地址
:param cookie: 站点cookie
:param ua: 站点ua
:param proxy: 是否使用代理
:return: rss地址错误信息
"""
try:
# 获取站点域名
domain = StringUtils.get_url_domain(url)
# 获取配置
site_conf = self.rss_link_conf.get(domain) or self.rss_link_conf.get("default")
# RSS地址
rss_url = urljoin(url, site_conf.get("url"))
# RSS请求参数
rss_params = site_conf.get("params")
# 请求RSS页面
if site_conf.get("render"):
html_text = PlaywrightHelper().get_page_source(
url=rss_url,
cookies=cookie,
ua=ua,
proxies=settings.PROXY if proxy else None
)
else:
res = RequestUtils(
cookies=cookie,
timeout=60,
ua=ua,
proxies=settings.PROXY if proxy else None
).post_res(url=rss_url, data=rss_params)
if res:
html_text = res.text
elif res is not None:
return "", f"获取 {url} RSS链接失败错误码{res.status_code},错误原因:{res.reason}"
else:
return "", f"获取RSS链接失败无法连接 {url} "
# 解析HTML
html = etree.HTML(html_text)
if html:
rss_link = html.xpath(site_conf.get("xpath"))
if rss_link:
return str(rss_link[-1]), ""
return "", f"获取RSS链接失败{url}"
except Exception as e:
return "", f"获取 {url} RSS链接失败{str(e)}"