2023-06-09 19:25:12 +08:00

81 lines
3.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import xml.dom.minidom
from typing import List
from app.core.config import settings
from app.utils.dom import DomUtils
from app.utils.http import RequestUtils
from app.utils.string import StringUtils
class RssHelper:
@staticmethod
def parse(url, proxy: bool = False) -> List[dict]:
"""
解析RSS订阅URL获取RSS中的种子信息
:param url: RSS地址
:param proxy: 是否使用代理
:return: 种子信息列表如为None代表Rss过期
"""
# 开始处理
ret_array: list = []
if not url:
return []
try:
ret = RequestUtils(proxies=settings.PROXY if proxy else None).get_res(url)
if not ret:
return []
ret.encoding = ret.apparent_encoding
except Exception as err:
print(str(err))
return []
if ret:
ret_xml = ret.text
try:
# 解析XML
dom_tree = xml.dom.minidom.parseString(ret_xml)
rootNode = dom_tree.documentElement
items = rootNode.getElementsByTagName("item")
for item in items:
try:
# 标题
title = DomUtils.tag_value(item, "title", default="")
if not title:
continue
# 描述
description = DomUtils.tag_value(item, "description", default="")
# 种子页面
link = DomUtils.tag_value(item, "link", default="")
# 种子链接
enclosure = DomUtils.tag_value(item, "enclosure", "url", default="")
if not enclosure and not link:
continue
# 部分RSS只有link没有enclosure
if not enclosure and link:
enclosure = link
# 大小
size = DomUtils.tag_value(item, "enclosure", "length", default=0)
if size and str(size).isdigit():
size = int(size)
else:
size = 0
# 发布日期
pubdate = DomUtils.tag_value(item, "pubDate", default="")
if pubdate:
# 转换为时间
pubdate = StringUtils.get_time(pubdate)
# 返回对象
tmp_dict = {'title': title,
'enclosure': enclosure,
'size': size,
'description': description,
'link': link,
'pubdate': pubdate}
ret_array.append(tmp_dict)
except Exception as e1:
print(str(e1))
continue
except Exception as e2:
print(str(e2))
return ret_array