fix rsslink helper
This commit is contained in:
@ -1,14 +1,225 @@
|
||||
import xml.dom.minidom
|
||||
from typing import List
|
||||
from typing import List, Tuple
|
||||
from urllib.parse import urljoin
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from app.core.config import settings
|
||||
from app.helper.browser import PlaywrightHelper
|
||||
from app.utils.dom import DomUtils
|
||||
from app.utils.http import RequestUtils
|
||||
from app.utils.string import StringUtils
|
||||
|
||||
|
||||
class RssHelper:
|
||||
|
||||
"""
|
||||
RSS帮助类,解析RSS报文、获取RSS地址等
|
||||
"""
|
||||
# 各站点RSS链接获取配置
|
||||
rss_link_conf = {
|
||||
"default": {
|
||||
"xpath": "//a[@class='faqlink']/@href",
|
||||
"url": "getrss.php",
|
||||
"params": {
|
||||
"inclbookmarked": 0,
|
||||
"itemsmalldescr": 1,
|
||||
"showrows": 50,
|
||||
"search_mode": 1,
|
||||
}
|
||||
},
|
||||
"hares.top": {
|
||||
"xpath": "//*[@id='layui-layer100001']/div[2]/div/p[4]/a/@href",
|
||||
"url": "getrss.php",
|
||||
"params": {
|
||||
"inclbookmarked": 0,
|
||||
"itemsmalldescr": 1,
|
||||
"showrows": 50,
|
||||
"search_mode": 1,
|
||||
}
|
||||
},
|
||||
"et8.org": {
|
||||
"xpath": "//*[@id='outer']/table/tbody/tr/td/table/tbody/tr/td/a[2]/@href",
|
||||
"url": "getrss.php",
|
||||
"params": {
|
||||
"inclbookmarked": 0,
|
||||
"itemsmalldescr": 1,
|
||||
"showrows": 50,
|
||||
"search_mode": 1,
|
||||
}
|
||||
},
|
||||
"pttime.org": {
|
||||
"xpath": "//*[@id='outer']/table/tbody/tr/td/table/tbody/tr/td/text()[5]",
|
||||
"url": "getrss.php",
|
||||
"params": {
|
||||
"showrows": 10,
|
||||
"inclbookmarked": 0,
|
||||
"itemsmalldescr": 1
|
||||
}
|
||||
},
|
||||
"ourbits.club": {
|
||||
"xpath": "//a[@class='gen_rsslink']/@href",
|
||||
"url": "getrss.php",
|
||||
"params": {
|
||||
"inclbookmarked": 0,
|
||||
"itemsmalldescr": 1,
|
||||
"showrows": 50,
|
||||
"search_mode": 1,
|
||||
}
|
||||
},
|
||||
"totheglory.im": {
|
||||
"xpath": "//textarea/text()",
|
||||
"url": "rsstools.php?c51=51&c52=52&c53=53&c54=54&c108=108&c109=109&c62=62&c63=63&c67=67&c69=69&c70=70&c73=73&c76=76&c75=75&c74=74&c87=87&c88=88&c99=99&c90=90&c58=58&c103=103&c101=101&c60=60",
|
||||
"params": {
|
||||
"inclbookmarked": 0,
|
||||
"itemsmalldescr": 1,
|
||||
"showrows": 50,
|
||||
"search_mode": 1,
|
||||
}
|
||||
},
|
||||
"monikadesign.uk": {
|
||||
"xpath": "//a/@href",
|
||||
"url": "rss",
|
||||
"params": {
|
||||
"inclbookmarked": 0,
|
||||
"itemsmalldescr": 1,
|
||||
"showrows": 50,
|
||||
"search_mode": 1,
|
||||
}
|
||||
},
|
||||
"zhuque.in": {
|
||||
"xpath": "//a/@href",
|
||||
"url": "user/rss",
|
||||
"render": True,
|
||||
"params": {
|
||||
"inclbookmarked": 0,
|
||||
"itemsmalldescr": 1,
|
||||
"showrows": 50,
|
||||
"search_mode": 1,
|
||||
}
|
||||
},
|
||||
"hdchina.org": {
|
||||
"xpath": "//a[@class='faqlink']/@href",
|
||||
"url": "getrss.php",
|
||||
"params": {
|
||||
"inclbookmarked": 0,
|
||||
"itemsmalldescr": 1,
|
||||
"showrows": 50,
|
||||
"search_mode": 1,
|
||||
"rsscart": 0
|
||||
}
|
||||
},
|
||||
"audiences.me": {
|
||||
"xpath": "//a[@class='faqlink']/@href",
|
||||
"url": "getrss.php",
|
||||
"params": {
|
||||
"inclbookmarked": 0,
|
||||
"itemsmalldescr": 1,
|
||||
"showrows": 50,
|
||||
"search_mode": 1,
|
||||
"torrent_type": 1,
|
||||
"exp": 180
|
||||
}
|
||||
},
|
||||
"shadowflow.org": {
|
||||
"xpath": "//a[@class='faqlink']/@href",
|
||||
"url": "getrss.php",
|
||||
"params": {
|
||||
"inclbookmarked": 0,
|
||||
"itemsmalldescr": 1,
|
||||
"paid": 0,
|
||||
"search_mode": 0,
|
||||
"showrows": 30
|
||||
}
|
||||
},
|
||||
"hddolby.com": {
|
||||
"xpath": "//a[@class='faqlink']/@href",
|
||||
"url": "getrss.php",
|
||||
"params": {
|
||||
"inclbookmarked": 0,
|
||||
"itemsmalldescr": 1,
|
||||
"showrows": 50,
|
||||
"search_mode": 1,
|
||||
"exp": 180
|
||||
}
|
||||
},
|
||||
"hdhome.org": {
|
||||
"xpath": "//a[@class='faqlink']/@href",
|
||||
"url": "getrss.php",
|
||||
"params": {
|
||||
"inclbookmarked": 0,
|
||||
"itemsmalldescr": 1,
|
||||
"showrows": 50,
|
||||
"search_mode": 1,
|
||||
"exp": 180
|
||||
}
|
||||
},
|
||||
"pthome.net": {
|
||||
"xpath": "//a[@class='faqlink']/@href",
|
||||
"url": "getrss.php",
|
||||
"params": {
|
||||
"inclbookmarked": 0,
|
||||
"itemsmalldescr": 1,
|
||||
"showrows": 50,
|
||||
"search_mode": 1,
|
||||
"exp": 180
|
||||
}
|
||||
},
|
||||
"ptsbao.club": {
|
||||
"xpath": "//a[@class='faqlink']/@href",
|
||||
"url": "getrss.php",
|
||||
"params": {
|
||||
"inclbookmarked": 0,
|
||||
"itemsmalldescr": 1,
|
||||
"showrows": 50,
|
||||
"search_mode": 1,
|
||||
"size": 0
|
||||
}
|
||||
},
|
||||
"leaves.red": {
|
||||
"xpath": "//a[@class='faqlink']/@href",
|
||||
"url": "getrss.php",
|
||||
"params": {
|
||||
"inclbookmarked": 0,
|
||||
"itemsmalldescr": 1,
|
||||
"showrows": 50,
|
||||
"search_mode": 0,
|
||||
"paid": 2
|
||||
}
|
||||
},
|
||||
"hdtime.org": {
|
||||
"xpath": "//a[@class='faqlink']/@href",
|
||||
"url": "getrss.php",
|
||||
"params": {
|
||||
"inclbookmarked": 0,
|
||||
"itemsmalldescr": 1,
|
||||
"showrows": 50,
|
||||
"search_mode": 0,
|
||||
}
|
||||
},
|
||||
"m-team.io": {
|
||||
"xpath": "//a[@class='faqlink']/@href",
|
||||
"url": "getrss.php",
|
||||
"params": {
|
||||
"showrows": 50,
|
||||
"inclbookmarked": 0,
|
||||
"itemsmalldescr": 1,
|
||||
"https": 1
|
||||
}
|
||||
},
|
||||
"u2.dmhy.org": {
|
||||
"xpath": "//a[@class='faqlink']/@href",
|
||||
"url": "getrss.php",
|
||||
"params": {
|
||||
"inclbookmarked": 0,
|
||||
"itemsmalldescr": 1,
|
||||
"showrows": 50,
|
||||
"search_mode": 1,
|
||||
"inclautochecked": 1,
|
||||
"trackerssl": 1
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def parse(url, proxy: bool = False) -> List[dict]:
|
||||
"""
|
||||
@ -78,3 +289,52 @@ class RssHelper:
|
||||
except Exception as e2:
|
||||
print(str(e2))
|
||||
return ret_array
|
||||
|
||||
def get_rss_link(self, url: str, cookie: str, ua: str, proxy: bool = False) -> Tuple[str, str]:
|
||||
"""
|
||||
获取站点rss地址
|
||||
:param url: 站点地址
|
||||
:param cookie: 站点cookie
|
||||
:param ua: 站点ua
|
||||
:param proxy: 是否使用代理
|
||||
:return: rss地址、错误信息
|
||||
"""
|
||||
try:
|
||||
# 获取站点域名
|
||||
domain = StringUtils.get_url_domain(url)
|
||||
# 获取配置
|
||||
site_conf = self.rss_link_conf.get(domain) or self.rss_link_conf.get("default")
|
||||
# RSS地址
|
||||
rss_url = urljoin(url, site_conf.get("url"))
|
||||
# RSS请求参数
|
||||
rss_params = site_conf.get("params")
|
||||
# 请求RSS页面
|
||||
if site_conf.get("render"):
|
||||
html_text = PlaywrightHelper().get_page_source(
|
||||
url=rss_url,
|
||||
cookies=cookie,
|
||||
ua=ua,
|
||||
proxies=settings.PROXY if proxy else None
|
||||
)
|
||||
else:
|
||||
res = RequestUtils(
|
||||
cookies=cookie,
|
||||
timeout=60,
|
||||
ua=ua,
|
||||
proxies=settings.PROXY if proxy else None
|
||||
).post_res(url=rss_url, data=rss_params)
|
||||
if res:
|
||||
html_text = res.text
|
||||
elif res is not None:
|
||||
return "", f"获取 {url} RSS链接失败,错误码:{res.status_code},错误原因:{res.reason}"
|
||||
else:
|
||||
return "", f"获取RSS链接失败:无法连接 {url} "
|
||||
# 解析HTML
|
||||
html = etree.HTML(html_text)
|
||||
if html:
|
||||
rss_link = html.xpath(site_conf.get("xpath"))
|
||||
if rss_link:
|
||||
return str(rss_link[-1]), ""
|
||||
return "", f"获取RSS链接失败:{url}"
|
||||
except Exception as e:
|
||||
return "", f"获取 {url} RSS链接失败:{str(e)}"
|
||||
|
Reference in New Issue
Block a user