import xml.dom.minidom from typing import List from app.core.config import settings from app.utils.dom import DomUtils from app.utils.http import RequestUtils from app.utils.string import StringUtils class RssHelper: @staticmethod def parse(url, proxy: bool = False) -> List[dict]: """ 解析RSS订阅URL,获取RSS中的种子信息 :param url: RSS地址 :param proxy: 是否使用代理 :return: 种子信息列表,如为None代表Rss过期 """ # 开始处理 ret_array: list = [] if not url: return [] try: ret = RequestUtils(proxies=settings.PROXY if proxy else None).get_res(url) if not ret: return [] ret.encoding = ret.apparent_encoding except Exception as err: print(str(err)) return [] if ret: ret_xml = ret.text try: # 解析XML dom_tree = xml.dom.minidom.parseString(ret_xml) rootNode = dom_tree.documentElement items = rootNode.getElementsByTagName("item") for item in items: try: # 标题 title = DomUtils.tag_value(item, "title", default="") if not title: continue # 描述 description = DomUtils.tag_value(item, "description", default="") # 种子页面 link = DomUtils.tag_value(item, "link", default="") # 种子链接 enclosure = DomUtils.tag_value(item, "enclosure", "url", default="") if not enclosure and not link: continue # 部分RSS只有link没有enclosure if not enclosure and link: enclosure = link # 大小 size = DomUtils.tag_value(item, "enclosure", "length", default=0) if size and str(size).isdigit(): size = int(size) else: size = 0 # 发布日期 pubdate = DomUtils.tag_value(item, "pubDate", default="") if pubdate: # 转换为时间 pubdate = StringUtils.get_time(pubdate) # 返回对象 tmp_dict = {'title': title, 'enclosure': enclosure, 'size': size, 'description': description, 'link': link, 'pubdate': pubdate} ret_array.append(tmp_dict) except Exception as e1: print(str(e1)) continue except Exception as e2: print(str(e2)) return ret_array