fix rsslink helper

This commit is contained in:
jxxghp
2023-09-11 17:13:26 +08:00
parent c45f5e6ac4
commit 08aef1f47f
2 changed files with 281 additions and 225 deletions

View File

@ -1,14 +1,225 @@
import xml.dom.minidom
from typing import List
from typing import List, Tuple
from urllib.parse import urljoin
from lxml import etree
from app.core.config import settings
from app.helper.browser import PlaywrightHelper
from app.utils.dom import DomUtils
from app.utils.http import RequestUtils
from app.utils.string import StringUtils
class RssHelper:
"""
RSS帮助类解析RSS报文、获取RSS地址等
"""
# 各站点RSS链接获取配置
rss_link_conf = {
"default": {
"xpath": "//a[@class='faqlink']/@href",
"url": "getrss.php",
"params": {
"inclbookmarked": 0,
"itemsmalldescr": 1,
"showrows": 50,
"search_mode": 1,
}
},
"hares.top": {
"xpath": "//*[@id='layui-layer100001']/div[2]/div/p[4]/a/@href",
"url": "getrss.php",
"params": {
"inclbookmarked": 0,
"itemsmalldescr": 1,
"showrows": 50,
"search_mode": 1,
}
},
"et8.org": {
"xpath": "//*[@id='outer']/table/tbody/tr/td/table/tbody/tr/td/a[2]/@href",
"url": "getrss.php",
"params": {
"inclbookmarked": 0,
"itemsmalldescr": 1,
"showrows": 50,
"search_mode": 1,
}
},
"pttime.org": {
"xpath": "//*[@id='outer']/table/tbody/tr/td/table/tbody/tr/td/text()[5]",
"url": "getrss.php",
"params": {
"showrows": 10,
"inclbookmarked": 0,
"itemsmalldescr": 1
}
},
"ourbits.club": {
"xpath": "//a[@class='gen_rsslink']/@href",
"url": "getrss.php",
"params": {
"inclbookmarked": 0,
"itemsmalldescr": 1,
"showrows": 50,
"search_mode": 1,
}
},
"totheglory.im": {
"xpath": "//textarea/text()",
"url": "rsstools.php?c51=51&c52=52&c53=53&c54=54&c108=108&c109=109&c62=62&c63=63&c67=67&c69=69&c70=70&c73=73&c76=76&c75=75&c74=74&c87=87&c88=88&c99=99&c90=90&c58=58&c103=103&c101=101&c60=60",
"params": {
"inclbookmarked": 0,
"itemsmalldescr": 1,
"showrows": 50,
"search_mode": 1,
}
},
"monikadesign.uk": {
"xpath": "//a/@href",
"url": "rss",
"params": {
"inclbookmarked": 0,
"itemsmalldescr": 1,
"showrows": 50,
"search_mode": 1,
}
},
"zhuque.in": {
"xpath": "//a/@href",
"url": "user/rss",
"render": True,
"params": {
"inclbookmarked": 0,
"itemsmalldescr": 1,
"showrows": 50,
"search_mode": 1,
}
},
"hdchina.org": {
"xpath": "//a[@class='faqlink']/@href",
"url": "getrss.php",
"params": {
"inclbookmarked": 0,
"itemsmalldescr": 1,
"showrows": 50,
"search_mode": 1,
"rsscart": 0
}
},
"audiences.me": {
"xpath": "//a[@class='faqlink']/@href",
"url": "getrss.php",
"params": {
"inclbookmarked": 0,
"itemsmalldescr": 1,
"showrows": 50,
"search_mode": 1,
"torrent_type": 1,
"exp": 180
}
},
"shadowflow.org": {
"xpath": "//a[@class='faqlink']/@href",
"url": "getrss.php",
"params": {
"inclbookmarked": 0,
"itemsmalldescr": 1,
"paid": 0,
"search_mode": 0,
"showrows": 30
}
},
"hddolby.com": {
"xpath": "//a[@class='faqlink']/@href",
"url": "getrss.php",
"params": {
"inclbookmarked": 0,
"itemsmalldescr": 1,
"showrows": 50,
"search_mode": 1,
"exp": 180
}
},
"hdhome.org": {
"xpath": "//a[@class='faqlink']/@href",
"url": "getrss.php",
"params": {
"inclbookmarked": 0,
"itemsmalldescr": 1,
"showrows": 50,
"search_mode": 1,
"exp": 180
}
},
"pthome.net": {
"xpath": "//a[@class='faqlink']/@href",
"url": "getrss.php",
"params": {
"inclbookmarked": 0,
"itemsmalldescr": 1,
"showrows": 50,
"search_mode": 1,
"exp": 180
}
},
"ptsbao.club": {
"xpath": "//a[@class='faqlink']/@href",
"url": "getrss.php",
"params": {
"inclbookmarked": 0,
"itemsmalldescr": 1,
"showrows": 50,
"search_mode": 1,
"size": 0
}
},
"leaves.red": {
"xpath": "//a[@class='faqlink']/@href",
"url": "getrss.php",
"params": {
"inclbookmarked": 0,
"itemsmalldescr": 1,
"showrows": 50,
"search_mode": 0,
"paid": 2
}
},
"hdtime.org": {
"xpath": "//a[@class='faqlink']/@href",
"url": "getrss.php",
"params": {
"inclbookmarked": 0,
"itemsmalldescr": 1,
"showrows": 50,
"search_mode": 0,
}
},
"m-team.io": {
"xpath": "//a[@class='faqlink']/@href",
"url": "getrss.php",
"params": {
"showrows": 50,
"inclbookmarked": 0,
"itemsmalldescr": 1,
"https": 1
}
},
"u2.dmhy.org": {
"xpath": "//a[@class='faqlink']/@href",
"url": "getrss.php",
"params": {
"inclbookmarked": 0,
"itemsmalldescr": 1,
"showrows": 50,
"search_mode": 1,
"inclautochecked": 1,
"trackerssl": 1
}
},
}
@staticmethod
def parse(url, proxy: bool = False) -> List[dict]:
"""
@ -78,3 +289,52 @@ class RssHelper:
except Exception as e2:
print(str(e2))
return ret_array
def get_rss_link(self, url: str, cookie: str, ua: str, proxy: bool = False) -> Tuple[str, str]:
"""
获取站点rss地址
:param url: 站点地址
:param cookie: 站点cookie
:param ua: 站点ua
:param proxy: 是否使用代理
:return: rss地址、错误信息
"""
try:
# 获取站点域名
domain = StringUtils.get_url_domain(url)
# 获取配置
site_conf = self.rss_link_conf.get(domain) or self.rss_link_conf.get("default")
# RSS地址
rss_url = urljoin(url, site_conf.get("url"))
# RSS请求参数
rss_params = site_conf.get("params")
# 请求RSS页面
if site_conf.get("render"):
html_text = PlaywrightHelper().get_page_source(
url=rss_url,
cookies=cookie,
ua=ua,
proxies=settings.PROXY if proxy else None
)
else:
res = RequestUtils(
cookies=cookie,
timeout=60,
ua=ua,
proxies=settings.PROXY if proxy else None
).post_res(url=rss_url, data=rss_params)
if res:
html_text = res.text
elif res is not None:
return "", f"获取 {url} RSS链接失败错误码{res.status_code},错误原因:{res.reason}"
else:
return "", f"获取RSS链接失败无法连接 {url} "
# 解析HTML
html = etree.HTML(html_text)
if html:
rss_link = html.xpath(site_conf.get("xpath"))
if rss_link:
return str(rss_link[-1]), ""
return "", f"获取RSS链接失败{url}"
except Exception as e:
return "", f"获取 {url} RSS链接失败{str(e)}"