init
This commit is contained in:
81
app/helper/rss.py
Normal file
81
app/helper/rss.py
Normal file
@ -0,0 +1,81 @@
|
||||
import xml.dom.minidom
|
||||
from typing import List
|
||||
|
||||
from app.core import settings
|
||||
from app.utils.dom import DomUtils
|
||||
from app.utils.http import RequestUtils
|
||||
from app.utils.string import StringUtils
|
||||
|
||||
|
||||
class RssHelper:
|
||||
|
||||
@staticmethod
|
||||
def parse(url, proxy: bool = False) -> List[dict]:
|
||||
"""
|
||||
解析RSS订阅URL,获取RSS中的种子信息
|
||||
:param url: RSS地址
|
||||
:param proxy: 是否使用代理
|
||||
:return: 种子信息列表,如为None代表Rss过期
|
||||
"""
|
||||
# 开始处理
|
||||
ret_array: list = []
|
||||
if not url:
|
||||
return []
|
||||
try:
|
||||
ret = RequestUtils(proxies=settings.PROXY if proxy else None).get_res(url)
|
||||
if not ret:
|
||||
return []
|
||||
ret.encoding = ret.apparent_encoding
|
||||
except Exception as err:
|
||||
print(str(err))
|
||||
return []
|
||||
if ret:
|
||||
ret_xml = ret.text
|
||||
try:
|
||||
# 解析XML
|
||||
dom_tree = xml.dom.minidom.parseString(ret_xml)
|
||||
rootNode = dom_tree.documentElement
|
||||
items = rootNode.getElementsByTagName("item")
|
||||
for item in items:
|
||||
try:
|
||||
# 标题
|
||||
title = DomUtils.tag_value(item, "title", default="")
|
||||
if not title:
|
||||
continue
|
||||
# 描述
|
||||
description = DomUtils.tag_value(item, "description", default="")
|
||||
# 种子页面
|
||||
link = DomUtils.tag_value(item, "link", default="")
|
||||
# 种子链接
|
||||
enclosure = DomUtils.tag_value(item, "enclosure", "url", default="")
|
||||
if not enclosure and not link:
|
||||
continue
|
||||
# 部分RSS只有link没有enclosure
|
||||
if not enclosure and link:
|
||||
enclosure = link
|
||||
link = None
|
||||
# 大小
|
||||
size = DomUtils.tag_value(item, "enclosure", "length", default=0)
|
||||
if size and str(size).isdigit():
|
||||
size = int(size)
|
||||
else:
|
||||
size = 0
|
||||
# 发布日期
|
||||
pubdate = DomUtils.tag_value(item, "pubDate", default="")
|
||||
if pubdate:
|
||||
# 转换为时间
|
||||
pubdate = StringUtils.get_time(pubdate)
|
||||
# 返回对象
|
||||
tmp_dict = {'title': title,
|
||||
'enclosure': enclosure,
|
||||
'size': size,
|
||||
'description': description,
|
||||
'link': link,
|
||||
'pubdate': pubdate}
|
||||
ret_array.append(tmp_dict)
|
||||
except Exception as e1:
|
||||
print(str(e1))
|
||||
continue
|
||||
except Exception as e2:
|
||||
print(str(e2))
|
||||
return ret_array
|
Reference in New Issue
Block a user