82 lines
3.2 KiB
Python
82 lines
3.2 KiB
Python
import xml.dom.minidom
|
||
from typing import List
|
||
|
||
from app.core import settings
|
||
from app.utils.dom import DomUtils
|
||
from app.utils.http import RequestUtils
|
||
from app.utils.string import StringUtils
|
||
|
||
|
||
class RssHelper:
|
||
|
||
@staticmethod
|
||
def parse(url, proxy: bool = False) -> List[dict]:
|
||
"""
|
||
解析RSS订阅URL,获取RSS中的种子信息
|
||
:param url: RSS地址
|
||
:param proxy: 是否使用代理
|
||
:return: 种子信息列表,如为None代表Rss过期
|
||
"""
|
||
# 开始处理
|
||
ret_array: list = []
|
||
if not url:
|
||
return []
|
||
try:
|
||
ret = RequestUtils(proxies=settings.PROXY if proxy else None).get_res(url)
|
||
if not ret:
|
||
return []
|
||
ret.encoding = ret.apparent_encoding
|
||
except Exception as err:
|
||
print(str(err))
|
||
return []
|
||
if ret:
|
||
ret_xml = ret.text
|
||
try:
|
||
# 解析XML
|
||
dom_tree = xml.dom.minidom.parseString(ret_xml)
|
||
rootNode = dom_tree.documentElement
|
||
items = rootNode.getElementsByTagName("item")
|
||
for item in items:
|
||
try:
|
||
# 标题
|
||
title = DomUtils.tag_value(item, "title", default="")
|
||
if not title:
|
||
continue
|
||
# 描述
|
||
description = DomUtils.tag_value(item, "description", default="")
|
||
# 种子页面
|
||
link = DomUtils.tag_value(item, "link", default="")
|
||
# 种子链接
|
||
enclosure = DomUtils.tag_value(item, "enclosure", "url", default="")
|
||
if not enclosure and not link:
|
||
continue
|
||
# 部分RSS只有link没有enclosure
|
||
if not enclosure and link:
|
||
enclosure = link
|
||
link = None
|
||
# 大小
|
||
size = DomUtils.tag_value(item, "enclosure", "length", default=0)
|
||
if size and str(size).isdigit():
|
||
size = int(size)
|
||
else:
|
||
size = 0
|
||
# 发布日期
|
||
pubdate = DomUtils.tag_value(item, "pubDate", default="")
|
||
if pubdate:
|
||
# 转换为时间
|
||
pubdate = StringUtils.get_time(pubdate)
|
||
# 返回对象
|
||
tmp_dict = {'title': title,
|
||
'enclosure': enclosure,
|
||
'size': size,
|
||
'description': description,
|
||
'link': link,
|
||
'pubdate': pubdate}
|
||
ret_array.append(tmp_dict)
|
||
except Exception as e1:
|
||
print(str(e1))
|
||
continue
|
||
except Exception as e2:
|
||
print(str(e2))
|
||
return ret_array
|