fix rsslink helper

2023-09-11 17:13:26 +08:00
parent c45f5e6ac4
commit 08aef1f47f
2 changed files with 281 additions and 225 deletions
--- a/app/chain/cookiecloud.py
+++ b/app/chain/cookiecloud.py
@ -10,10 +10,10 @@ from app.chain.site import SiteChain
 from app.core.config import settings
 from app.db.site_oper import SiteOper
 from app.db.siteicon_oper import SiteIconOper
 from app.helper.browser import PlaywrightHelper
 from app.helper.cloudflare import under_challenge
 from app.helper.cookiecloud import CookieCloudHelper
 from app.helper.message import MessageHelper
 from app.helper.rss import RssHelper
 from app.helper.sites import SitesHelper
 from app.log import logger
 from app.schemas import Notification, NotificationType, MessageChannel
@ -31,6 +31,7 @@ class CookieCloudChain(ChainBase):
        self.siteoper = SiteOper(self._db)
        self.siteiconoper = SiteIconOper(self._db)
        self.siteshelper = SitesHelper()
        self.rsshelper = RssHelper()
        self.sitechain = SiteChain(self._db)
        self.message = MessageHelper()
        self.cookiecloud = CookieCloudHelper(
@ -82,10 +83,17 @@ class CookieCloudChain(ChainBase):
                    logger.info(f"站点【{site_info.name}】连通性正常，不同步CookieCloud数据")
                    if not site_info.public and not site_info.rss:
                        # 自动生成rss地址
-                        rss_url = self.__get_rss(url=site_info.url, cookie=cookie, ua=settings.USER_AGENT,
+                        rss_url, errmsg = self.rsshelper.get_rss_link(
-                                                 proxy=site_info.proxy)
+                            url=site_info.url,
-                        # 更新站点rss地址
+                            cookie=cookie,
-                        self.siteoper.update_rss(domain=domain, rss=rss_url)
+                            ua=settings.USER_AGENT,
                            proxy=True if site_info.proxy else False
                        )
                        if rss_url:
                            # 更新站点rss地址
                            self.siteoper.update_rss(domain=domain, rss=rss_url)
                        else:
                            logger.warn(errmsg)
                    continue
                # 更新站点Cookie
                self.siteoper.update_cookie(domain=domain, cookies=cookie)
@ -115,7 +123,12 @@ class CookieCloudChain(ChainBase):
                rss_url = None
                if not indexer.get("public") and indexer.get("domain"):
                    # 自动生成rss地址
-                    rss_url = self.__get_rss(url=indexer.get("domain"), cookie=cookie, ua=settings.USER_AGENT)
+                    rss_url, errmsg = self.rsshelper.get_rss_link(url=indexer.get("domain"),
                                                                  cookie=cookie,
                                                                  ua=settings.USER_AGENT)
                    if errmsg:
                        logger.warn(errmsg)
                # 插入数据库
                self.siteoper.add(name=indexer.get("name"),
                                  url=indexer.get("domain"),
                                  domain=domain,
@ -148,223 +161,6 @@ class CookieCloudChain(ChainBase):
        logger.info(f"CookieCloud同步成功：{ret_msg}")
        return True, ret_msg
    def __get_rss(self, url: str, cookie: str, ua: str, proxy: int) -> str:
        """
        获取站点rss地址
        """
        if "ourbits.club" in url:
            return self.__get_rss_ourbits(url=url, cookie=cookie, ua=ua, proxy=proxy)
        if "totheglory.im" in url:
            return self.__get_rss_ttg(url=url, cookie=cookie, ua=ua, proxy=proxy)
        if "monikadesign.uk" in url:
            return self.__get_rss_monika(url=url, cookie=cookie, ua=ua, proxy=proxy)
        if "zhuque.in" in url:
            return self.__get_rss_zhuque(url=url, cookie=cookie, ua=ua, proxy=proxy)
        xpath = "//a[@class='faqlink']/@href"
        if "club.hares.top" in url:
            xpath = "//*[@id='layui-layer100001']/div[2]/div/p[4]/a/@href"
        if "et8.org" in url:
            xpath = "//*[@id='outer']/table/tbody/tr/td/table/tbody/tr/td/a[2]/@href"
        if "pttime.org" in url:
            xpath = "//*[@id='outer']/table/tbody/tr/td/table/tbody/tr/td/text()[5]"
        return self.__get_rss_base(url=url, cookie=cookie, ua=ua, xpath=xpath, proxy=proxy)
    def __get_rss_base(self, url: str, cookie: str, ua: str, xpath: str, proxy: int) -> str:
        """
        默认获取站点rss地址
        """
        try:
            get_rss_url = urljoin(url, "getrss.php")
            rss_data = self.__get_rss_data(url)
            res = RequestUtils(cookies=cookie,
                               timeout=60,
                               ua=ua,
                               proxies=settings.PROXY if proxy else None).post_res(
                url=get_rss_url, data=rss_data)
            if res:
                html_text = res.text
            else:
                logger.error(f"获取rss失败：{url}")
                return ""
            html = etree.HTML(html_text)
            if html:
                rss_link = html.xpath(xpath)
                if rss_link:
                    return str(rss_link[-1])
            return ""
        except Exception as e:
            print(str(e))
            return ""
    def __get_rss_ttg(self, url: str, cookie: str, ua: str, proxy: int) -> str:
        """
        获取ttg rss地址
        """
        try:
            get_rss_url = urljoin(url,
                                  "rsstools.php?c51=51&c52=52&c53=53&c54=54&c108=108&c109=109&c62=62&c63=63&c67=67&c69=69&c70=70&c73=73&c76=76&c75=75&c74=74&c87=87&c88=88&c99=99&c90=90&c58=58&c103=103&c101=101&c60=60")
            res = RequestUtils(cookies=cookie,
                               timeout=60,
                               ua=ua,
                               proxies=settings.PROXY if proxy else None).get_res(url=get_rss_url)
            if res:
                html_text = res.text
            else:
                logger.error(f"获取rss失败：{url}")
                return ""
            html = etree.HTML(html_text)
            if html:
                rss_link = html.xpath("//textarea/text()")
                if rss_link:
                    return str(rss_link[-1])
            return ""
        except Exception as e:
            print(str(e))
            return ""
    def __get_rss_monika(self, url: str, cookie: str, ua: str, proxy: int) -> str:
        """
        获取monikadesign rss地址
        """
        try:
            get_rss_url = urljoin(url, "rss")
            res = RequestUtils(cookies=cookie,
                               timeout=60,
                               ua=ua,
                               proxies=settings.PROXY if proxy else None).get_res(url=get_rss_url)
            if res:
                html_text = res.text
            else:
                logger.error(f"获取rss失败：{url}")
                return ""
            html = etree.HTML(html_text)
            if html:
                rss_link = html.xpath("//a/@href")
                if rss_link:
                    return str(rss_link[0])
            return ""
        except Exception as e:
            print(str(e))
            return ""
    def __get_rss_ourbits(self, url: str, cookie: str, ua: str, proxy: int) -> str:
        """
        获取我堡rss地址
        """
        try:
            get_rss_url = urljoin(url, "getrss.php")
            html_text = PlaywrightHelper().get_page_source(url=get_rss_url,
                                                           cookies=cookie,
                                                           ua=ua,
                                                           proxies=settings.PROXY if proxy else None)
            if html_text:
                html = etree.HTML(html_text)
                if html:
                    rss_link = html.xpath("//a[@class='gen_rsslink']/@href")
                    if rss_link:
                        return str(rss_link[-1])
            return ""
        except Exception as e:
            print(str(e))
            return ""
    def __get_rss_zhuque(self, url: str, cookie: str, ua: str, proxy: int) -> str:
        """
        获取zhuque rss地址
        """
        try:
            get_rss_url = urljoin(url, "user/rss")
            html_text = PlaywrightHelper().get_page_source(url=get_rss_url,
                                                           cookies=cookie,
                                                           ua=ua,
                                                           proxies=settings.PROXY if proxy else None)
            if html_text:
                html = etree.HTML(html_text)
                if html:
                    rss_link = html.xpath("//a/@href")
                    if rss_link:
                        return str(rss_link[-1])
            return ""
        except Exception as e:
            print(str(e))
            return ""
    @staticmethod
    def __get_rss_data(url: str) -> dict:
        """
        获取请求rss的参数，有的站不太一样，后续不断维护
        """
        _rss_data = {
            "inclbookmarked": 0,
            "itemsmalldescr": 1,
            "showrows": 50,
            "search_mode": 1,
        }
        if 'hdchina.org' in url:
            # 显示下载框	0全部 1仅下载框
            _rss_data['rsscart'] = 0
        if 'audiences.me' in url:
            # 种子类型 1新种与重置顶旧种 0只包含新种
            _rss_data['torrent_type'] = 1
            # RSS链接有效期： 180天
            _rss_data['exp'] = 180
        if 'shadowflow.org' in url:
            # 下载需扣除魔力 0不需要 1需要 2全部
            _rss_data['paid'] = 0
            _rss_data['search_mode'] = 0
            _rss_data['showrows'] = 30
        if 'hddolby.com' in url:
            # RSS链接有效期： 180天
            _rss_data['exp'] = 180
        if 'hdhome.org' in url:
            # RSS链接有效期： 180天
            _rss_data['exp'] = 180
        if 'pthome.net' in url:
            # RSS链接有效期： 180天
            _rss_data['exp'] = 180
        if 'ptsbao.club' in url:
            _rss_data['size'] = 0
        if 'leaves.red' in url:
            # 下载需扣除魔力 0不需要 1需要 2全部
            _rss_data['paid'] = 2
            _rss_data['search_mode'] = 0
        if 'hdtime.org' in url:
            _rss_data['search_mode'] = 0
        if 'kp.m-team.cc' in url:
            _rss_data = {
                "showrows": 50,
                "inclbookmarked": 0,
                "itemsmalldescr": 1,
                "https": 1
            }
        if 'u2.dmhy.org' in url:
            # 显示自动通过的种子 0不显示自动通过的种子 1全部
            _rss_data['inclautochecked'] = 1
            # Tracker SSL 0不使用SSL 1使用SSL
            _rss_data['trackerssl'] = 1
        if 'www.pttime.org' in url:
            _rss_data = {
                "showrows": 10,
                "inclbookmarked": 0,
                "itemsmalldescr": 1
            }
        return _rss_data
    @staticmethod
    def __parse_favicon(url: str, cookie: str, ua: str) -> Tuple[str, Optional[str]]:
        """
--- a/app/helper/rss.py
+++ b/app/helper/rss.py
@ -1,14 +1,225 @@
 import xml.dom.minidom
-from typing import List
+from typing import List, Tuple
 from urllib.parse import urljoin
 from lxml import etree
 from app.core.config import settings
 from app.helper.browser import PlaywrightHelper
 from app.utils.dom import DomUtils
 from app.utils.http import RequestUtils
 from app.utils.string import StringUtils
 class RssHelper:
-    
+    """
    RSS帮助类，解析RSS报文、获取RSS地址等
    """
    # 各站点RSS链接获取配置
    rss_link_conf = {
        "default": {
            "xpath": "//a[@class='faqlink']/@href",
            "url": "getrss.php",
            "params": {
                "inclbookmarked": 0,
                "itemsmalldescr": 1,
                "showrows": 50,
                "search_mode": 1,
            }
        },
        "hares.top": {
            "xpath": "//*[@id='layui-layer100001']/div[2]/div/p[4]/a/@href",
            "url": "getrss.php",
            "params": {
                "inclbookmarked": 0,
                "itemsmalldescr": 1,
                "showrows": 50,
                "search_mode": 1,
            }
        },
        "et8.org": {
            "xpath": "//*[@id='outer']/table/tbody/tr/td/table/tbody/tr/td/a[2]/@href",
            "url": "getrss.php",
            "params": {
                "inclbookmarked": 0,
                "itemsmalldescr": 1,
                "showrows": 50,
                "search_mode": 1,
            }
        },
        "pttime.org": {
            "xpath": "//*[@id='outer']/table/tbody/tr/td/table/tbody/tr/td/text()[5]",
            "url": "getrss.php",
            "params": {
                "showrows": 10,
                "inclbookmarked": 0,
                "itemsmalldescr": 1
            }
        },
        "ourbits.club": {
            "xpath": "//a[@class='gen_rsslink']/@href",
            "url": "getrss.php",
            "params": {
                "inclbookmarked": 0,
                "itemsmalldescr": 1,
                "showrows": 50,
                "search_mode": 1,
            }
        },
        "totheglory.im": {
            "xpath": "//textarea/text()",
            "url": "rsstools.php?c51=51&c52=52&c53=53&c54=54&c108=108&c109=109&c62=62&c63=63&c67=67&c69=69&c70=70&c73=73&c76=76&c75=75&c74=74&c87=87&c88=88&c99=99&c90=90&c58=58&c103=103&c101=101&c60=60",
            "params": {
                "inclbookmarked": 0,
                "itemsmalldescr": 1,
                "showrows": 50,
                "search_mode": 1,
            }
        },
        "monikadesign.uk": {
            "xpath": "//a/@href",
            "url": "rss",
            "params": {
                "inclbookmarked": 0,
                "itemsmalldescr": 1,
                "showrows": 50,
                "search_mode": 1,
            }
        },
        "zhuque.in": {
            "xpath": "//a/@href",
            "url": "user/rss",
            "render": True,
            "params": {
                "inclbookmarked": 0,
                "itemsmalldescr": 1,
                "showrows": 50,
                "search_mode": 1,
            }
        },
        "hdchina.org": {
            "xpath": "//a[@class='faqlink']/@href",
            "url": "getrss.php",
            "params": {
                "inclbookmarked": 0,
                "itemsmalldescr": 1,
                "showrows": 50,
                "search_mode": 1,
                "rsscart": 0
            }
        },
        "audiences.me": {
            "xpath": "//a[@class='faqlink']/@href",
            "url": "getrss.php",
            "params": {
                "inclbookmarked": 0,
                "itemsmalldescr": 1,
                "showrows": 50,
                "search_mode": 1,
                "torrent_type": 1,
                "exp": 180
            }
        },
        "shadowflow.org": {
            "xpath": "//a[@class='faqlink']/@href",
            "url": "getrss.php",
            "params": {
                "inclbookmarked": 0,
                "itemsmalldescr": 1,
                "paid": 0,
                "search_mode": 0,
                "showrows": 30
            }
        },
        "hddolby.com": {
            "xpath": "//a[@class='faqlink']/@href",
            "url": "getrss.php",
            "params": {
                "inclbookmarked": 0,
                "itemsmalldescr": 1,
                "showrows": 50,
                "search_mode": 1,
                "exp": 180
            }
        },
        "hdhome.org": {
            "xpath": "//a[@class='faqlink']/@href",
            "url": "getrss.php",
            "params": {
                "inclbookmarked": 0,
                "itemsmalldescr": 1,
                "showrows": 50,
                "search_mode": 1,
                "exp": 180
            }
        },
        "pthome.net": {
            "xpath": "//a[@class='faqlink']/@href",
            "url": "getrss.php",
            "params": {
                "inclbookmarked": 0,
                "itemsmalldescr": 1,
                "showrows": 50,
                "search_mode": 1,
                "exp": 180
            }
        },
        "ptsbao.club": {
            "xpath": "//a[@class='faqlink']/@href",
            "url": "getrss.php",
            "params": {
                "inclbookmarked": 0,
                "itemsmalldescr": 1,
                "showrows": 50,
                "search_mode": 1,
                "size": 0
            }
        },
        "leaves.red": {
            "xpath": "//a[@class='faqlink']/@href",
            "url": "getrss.php",
            "params": {
                "inclbookmarked": 0,
                "itemsmalldescr": 1,
                "showrows": 50,
                "search_mode": 0,
                "paid": 2
            }
        },
        "hdtime.org": {
            "xpath": "//a[@class='faqlink']/@href",
            "url": "getrss.php",
            "params": {
                "inclbookmarked": 0,
                "itemsmalldescr": 1,
                "showrows": 50,
                "search_mode": 0,
            }
        },
        "m-team.io": {
            "xpath": "//a[@class='faqlink']/@href",
            "url": "getrss.php",
            "params": {
                "showrows": 50,
                "inclbookmarked": 0,
                "itemsmalldescr": 1,
                "https": 1
            }
        },
        "u2.dmhy.org": {
            "xpath": "//a[@class='faqlink']/@href",
            "url": "getrss.php",
            "params": {
                "inclbookmarked": 0,
                "itemsmalldescr": 1,
                "showrows": 50,
                "search_mode": 1,
                "inclautochecked": 1,
                "trackerssl": 1
            }
        },
    }
    @staticmethod
    def parse(url, proxy: bool = False) -> List[dict]:
        """
@ -78,3 +289,52 @@ class RssHelper:
            except Exception as e2:
                print(str(e2))
        return ret_array
    def get_rss_link(self, url: str, cookie: str, ua: str, proxy: bool = False) -> Tuple[str, str]:
        """
        获取站点rss地址
        :param url: 站点地址
        :param cookie: 站点cookie
        :param ua: 站点ua
        :param proxy: 是否使用代理
        :return: rss地址、错误信息
        """
        try:
            # 获取站点域名
            domain = StringUtils.get_url_domain(url)
            # 获取配置
            site_conf = self.rss_link_conf.get(domain) or self.rss_link_conf.get("default")
            # RSS地址
            rss_url = urljoin(url, site_conf.get("url"))
            # RSS请求参数
            rss_params = site_conf.get("params")
            # 请求RSS页面
            if site_conf.get("render"):
                html_text = PlaywrightHelper().get_page_source(
                    url=rss_url,
                    cookies=cookie,
                    ua=ua,
                    proxies=settings.PROXY if proxy else None
                )
            else:
                res = RequestUtils(
                    cookies=cookie,
                    timeout=60,
                    ua=ua,
                    proxies=settings.PROXY if proxy else None
                ).post_res(url=rss_url, data=rss_params)
                if res:
                    html_text = res.text
                elif res is not None:
                    return "", f"获取 {url} RSS链接失败，错误码：{res.status_code}，错误原因：{res.reason}"
                else:
                    return "", f"获取RSS链接失败：无法连接 {url} "
            # 解析HTML
            html = etree.HTML(html_text)
            if html:
                rss_link = html.xpath(site_conf.get("xpath"))
                if rss_link:
                    return str(rss_link[-1]), ""
            return "", f"获取RSS链接失败：{url}"
        except Exception as e:
            return "", f"获取 {url} RSS链接失败：{str(e)}"