init

2023-06-06 07:15:17 +08:00
commit 4d06f86e62
217 changed files with 13959 additions and 0 deletions
--- a/app/helper/init.py
+++ b/app/helper/init.py
@@ -0,0 +1 @@
+from .module import ModuleHelper
--- a/app/helper/pycache/init.cpython-310.pyc
+++ b/app/helper/pycache/init.cpython-310.pyc
--- a/app/helper/pycache/cookiecloud.cpython-310.pyc
+++ b/app/helper/pycache/cookiecloud.cpython-310.pyc
--- a/app/helper/pycache/module.cpython-310.pyc
+++ b/app/helper/pycache/module.cpython-310.pyc
--- a/app/helper/pycache/rss.cpython-310.pyc
+++ b/app/helper/pycache/rss.cpython-310.pyc
--- a/app/helper/pycache/torrent.cpython-310.pyc
+++ b/app/helper/pycache/torrent.cpython-310.pyc
--- a/app/helper/cookiecloud.py
+++ b/app/helper/cookiecloud.py
@@ -0,0 +1,68 @@
+from typing import Tuple, Optional
+
+from app.utils.http import RequestUtils
+from app.utils.string import StringUtils
+
+
+class CookieCloudHelper:
+
+    _ignore_cookies: list = ["CookieAutoDeleteBrowsingDataCleanup", "CookieAutoDeleteCleaningDiscarded"]
+
+    def __init__(self, server, key, password):
+        self._server = server
+        self._key = key
+        self._password = password
+        self._req = RequestUtils(content_type="application/json")
+
+    def download(self) -> Tuple[Optional[dict], str]:
+        """
+        从CookieCloud下载数据
+        :return: Cookie数据、错误信息
+        """
+        if not self._server or not self._key or not self._password:
+            return None, "CookieCloud参数不正确"
+        req_url = "%s/get/%s" % (self._server, self._key)
+        ret = self._req.post_res(url=req_url, json={"password": self._password})
+        if ret and ret.status_code == 200:
+            result = ret.json()
+            if not result:
+                return {}, "未下载到数据"
+            if result.get("cookie_data"):
+                contents = result.get("cookie_data")
+            else:
+                contents = result
+            # 整理数据,使用domain域名的最后两级作为分组依据
+            domain_groups = {}
+            for site, cookies in contents.items():
+                for cookie in cookies:
+                    domain_key = StringUtils.get_url_domain(cookie.get("domain"))
+                    if not domain_groups.get(domain_key):
+                        domain_groups[domain_key] = [cookie]
+                    else:
+                        domain_groups[domain_key].append(cookie)
+            # 返回错误
+            ret_cookies = {}
+            # 索引器
+            for domain, content_list in domain_groups.items():
+                if not content_list:
+                    continue
+                # 只有cf的cookie过滤掉
+                cloudflare_cookie = True
+                for content in content_list:
+                    if content["name"] != "cf_clearance":
+                        cloudflare_cookie = False
+                        break
+                if cloudflare_cookie:
+                    continue
+                # 站点Cookie
+                cookie_str = ";".join(
+                    [f"{content.get('name')}={content.get('value')}"
+                     for content in content_list
+                     if content.get("name") and content.get("name") not in self._ignore_cookies]
+                )
+                ret_cookies[domain] = cookie_str
+            return ret_cookies, ""
+        elif ret:
+            return None, f"同步CookieCloud失败，错误码：{ret.status_code}"
+        else:
+            return None, "CookieCloud请求失败，请检查服务器地址、用户KEY及加密密码是否正确"
--- a/app/helper/module.py
+++ b/app/helper/module.py
@@ -0,0 +1,33 @@
+# -*- coding: utf-8 -*-
+import importlib
+import pkgutil
+
+
+class ModuleHelper:
+    """
+    模块动态加载
+    """
+
+    @classmethod
+    def load(cls, package_path, filter_func=lambda name, obj: True):
+        """
+        导入子模块
+        :param package_path: 父包名
+        :param filter_func: 子模块过滤函数，入参为模块名和模块对象，返回True则导入，否则不导入
+        :return:
+        """
+
+        submodules: list = []
+        packages = importlib.import_module(package_path)
+        for importer, package_name, _ in pkgutil.iter_modules(packages.__path__):
+            if package_name.startswith('_'):
+                continue
+            full_package_name = f'{package_path}.{package_name}'
+            module = importlib.import_module(full_package_name)
+            for name, obj in module.__dict__.items():
+                if name.startswith('_'):
+                    continue
+                if isinstance(obj, type) and filter_func(name, obj):
+                    submodules.append(obj)
+
+        return submodules
--- a/app/helper/rss.py
+++ b/app/helper/rss.py
@@ -0,0 +1,81 @@
+import xml.dom.minidom
+from typing import List
+
+from app.core import settings
+from app.utils.dom import DomUtils
+from app.utils.http import RequestUtils
+from app.utils.string import StringUtils
+
+
+class RssHelper:
+    
+    @staticmethod
+    def parse(url, proxy: bool = False) -> List[dict]:
+        """
+        解析RSS订阅URL，获取RSS中的种子信息
+        :param url: RSS地址
+        :param proxy: 是否使用代理
+        :return: 种子信息列表，如为None代表Rss过期
+        """
+        # 开始处理
+        ret_array: list = []
+        if not url:
+            return []
+        try:
+            ret = RequestUtils(proxies=settings.PROXY if proxy else None).get_res(url)
+            if not ret:
+                return []
+            ret.encoding = ret.apparent_encoding
+        except Exception as err:
+            print(str(err))
+            return []
+        if ret:
+            ret_xml = ret.text
+            try:
+                # 解析XML
+                dom_tree = xml.dom.minidom.parseString(ret_xml)
+                rootNode = dom_tree.documentElement
+                items = rootNode.getElementsByTagName("item")
+                for item in items:
+                    try:
+                        # 标题
+                        title = DomUtils.tag_value(item, "title", default="")
+                        if not title:
+                            continue
+                        # 描述
+                        description = DomUtils.tag_value(item, "description", default="")
+                        # 种子页面
+                        link = DomUtils.tag_value(item, "link", default="")
+                        # 种子链接
+                        enclosure = DomUtils.tag_value(item, "enclosure", "url", default="")
+                        if not enclosure and not link:
+                            continue
+                        # 部分RSS只有link没有enclosure
+                        if not enclosure and link:
+                            enclosure = link
+                            link = None
+                        # 大小
+                        size = DomUtils.tag_value(item, "enclosure", "length", default=0)
+                        if size and str(size).isdigit():
+                            size = int(size)
+                        else:
+                            size = 0
+                        # 发布日期
+                        pubdate = DomUtils.tag_value(item, "pubDate", default="")
+                        if pubdate:
+                            # 转换为时间
+                            pubdate = StringUtils.get_time(pubdate)
+                        # 返回对象
+                        tmp_dict = {'title': title,
+                                    'enclosure': enclosure,
+                                    'size': size,
+                                    'description': description,
+                                    'link': link,
+                                    'pubdate': pubdate}
+                        ret_array.append(tmp_dict)
+                    except Exception as e1:
+                        print(str(e1))
+                        continue
+            except Exception as e2:
+                print(str(e2))
+        return ret_array
--- a/app/helper/sites.cp310-win_amd64.pyd
+++ b/app/helper/sites.cp310-win_amd64.pyd
--- a/app/helper/torrent.py
+++ b/app/helper/torrent.py
@@ -0,0 +1,218 @@
+import datetime
+import re
+from pathlib import Path
+from typing import Tuple, Optional, List
+from urllib.parse import unquote
+
+from bencode import bdecode
+
+from app.core import settings, Context, MetaInfo
+from app.log import logger
+from app.utils.http import RequestUtils
+from app.utils.string import StringUtils
+from app.utils.types import MediaType
+
+
+class TorrentHelper:
+    """
+    种子帮助类
+    """
+    def download_torrent(self, url: str,
+                         cookie: str = None,
+                         ua: str = None,
+                         referer: str = None,
+                         proxy: bool = False) \
+            -> Tuple[Optional[Path], Optional[bytes], Optional[str], Optional[list], Optional[str]]:
+        """
+        把种子下载到本地
+        :return: 种子保存路径、种子内容、种子主目录、种子文件清单、错误信息
+        """
+        if url.startswith("magnet:"):
+            return None, None, "", [], f"{url} 为磁力链接"
+        req = RequestUtils(
+            ua=ua,
+            cookies=cookie,
+            referer=referer,
+            proxies=settings.PROXY if proxy else None
+        ).get_res(url=url, allow_redirects=False)
+        while req and req.status_code in [301, 302]:
+            url = req.headers['Location']
+            if url and url.startswith("magnet:"):
+                return None, None, "", [], f"获取到磁力链接：{url}"
+            req = RequestUtils(
+                ua=ua,
+                cookies=cookie,
+                referer=referer,
+                proxies=settings.PROXY if proxy else None
+            ).get_res(url=url, allow_redirects=False)
+        if req and req.status_code == 200:
+            if not req.content:
+                return None, None, "", [], "未下载到种子数据"
+            # 解析内容格式
+            if req.text and str(req.text).startswith("magnet:"):
+                # 磁力链接
+                return None, None, "", [], f"获取到磁力链接：{req.text}"
+            elif req.text and "下载种子文件" in req.text:
+                # 首次下载提示页面
+                skip_flag = False
+                try:
+                    form = re.findall(r'<form.*?action="(.*?)".*?>(.*?)</form>', req.text, re.S)
+                    if form:
+                        action = form[0][0]
+                        if not action or action == "?":
+                            action = url
+                        elif not action.startswith('http'):
+                            action = StringUtils.get_base_url(url) + action
+                        inputs = re.findall(r'<input.*?name="(.*?)".*?value="(.*?)".*?>', form[0][1], re.S)
+                        if action and inputs:
+                            data = {}
+                            for item in inputs:
+                                data[item[0]] = item[1]
+                            # 改写req
+                            req = RequestUtils(
+                                ua=ua,
+                                cookies=cookie,
+                                referer=referer,
+                                proxies=settings.PROXY if proxy else None
+                            ).post_res(url=action, data=data)
+                            if req and req.status_code == 200:
+                                # 检查是不是种子文件，如果不是抛出异常
+                                bdecode(req.content)
+                                # 跳过成功
+                                logger.info(f"触发了站点首次种子下载，已自动跳过：{url}")
+                                skip_flag = True
+                            elif req is not None:
+                                logger.warn(f"触发了站点首次种子下载，且无法自动跳过，"
+                                            f"返回码：{req.status_code}，错误原因：{req.reason}")
+                            else:
+                                logger.warn(f"触发了站点首次种子下载，且无法自动跳过：{url}")
+                except Exception as err:
+                    logger.warn(f"【Downloader】触发了站点首次种子下载，尝试自动跳过时出现错误：{err}，链接：{url}")
+
+                if not skip_flag:
+                    return None, None, "", [], "种子数据有误，请确认链接是否正确，如为PT站点则需手工在站点下载一次种子"
+            else:
+                # 检查是不是种子文件，如果不是仍然抛出异常
+                try:
+                    bdecode(req.content)
+                except Exception as err:
+                    print(str(err))
+                    return None, None, "", [], "种子数据有误，请确认链接是否正确"
+            # 读取种子文件名
+            file_name = self.__get_url_torrent_filename(req, url)
+            # 种子文件路径
+            file_path = Path(settings.TEMP_PATH) / file_name
+            # 种子内容
+            file_content: bytes = req.content
+            # 读取种子信息
+            file_folder, file_names, ret_msg = self.__get_torrent_fileinfo(file_content)
+            # 写入磁盘
+            file_path.write_bytes(file_content)
+            # 返回
+            return file_path, file_content, file_folder, file_names, ret_msg
+
+        elif req is None:
+            return None, None, "", [], "无法打开链接：%s" % url
+        elif req.status_code == 429:
+            return None, None, "", [], "触发站点流控，请稍后重试"
+        else:
+            return None, None, "", [], "下载种子出错，状态码：%s" % req.status_code
+
+    @staticmethod
+    def __get_torrent_fileinfo(content: bytes) -> Tuple[str, list, str]:
+        """
+        解析Torrent文件，获取文件清单
+        :return: 种子文件列表主目录、种子文件列表、错误信息
+        """
+        file_folder = ""
+        file_names = []
+        try:
+            torrent = bdecode(content)
+            if torrent.get("info"):
+                files = torrent.get("info", {}).get("files") or []
+                if files:
+                    for item in files:
+                        if item.get("path"):
+                            file_names.append(item["path"][0])
+                    file_folder = torrent.get("info", {}).get("name")
+                else:
+                    file_names.append(torrent.get("info", {}).get("name"))
+        except Exception as err:
+            return file_folder, file_names, "解析种子文件异常：%s" % str(err)
+        return file_folder, file_names, ""
+
+    @staticmethod
+    def __get_url_torrent_filename(req, url):
+        """
+        从下载请求中获取种子文件名
+        """
+        if not req:
+            return ""
+        disposition = req.headers.get('content-disposition') or ""
+        file_name = re.findall(r"filename=\"?(.+)\"?", disposition)
+        if file_name:
+            file_name = unquote(str(file_name[0].encode('ISO-8859-1').decode()).split(";")[0].strip())
+            if file_name.endswith('"'):
+                file_name = file_name[:-1]
+        elif url and url.endswith(".torrent"):
+            file_name = unquote(url.split("/")[-1])
+        else:
+            file_name = str(datetime.datetime.now())
+        return file_name
+
+    @staticmethod
+    def sort_group_torrents(torrent_list: List[Context]):
+        """
+        对媒体信息进行排序、去重
+        """
+        if not torrent_list:
+            return []
+
+        # 排序函数，标题、站点、资源类型、做种数量
+        def get_sort_str(_context):
+            _meta = _context.meta_info
+            _torrent = _context.torrent_info
+            season_len = str(len(_meta.get_season_list())).rjust(2, '0')
+            episode_len = str(len(_meta.get_episode_list())).rjust(4, '0')
+            # 排序：标题、资源类型、站点、做种、季集
+            return "%s%s%s%s" % (str(_torrent.title).ljust(100, ' '),
+                                 str(_torrent.pri_order).rjust(3, '0'),
+                                 str(_torrent.seeders).rjust(10, '0'),
+                                 "%s%s" % (season_len, episode_len))
+
+        # 匹配的资源中排序分组选最好的一个下载
+        # 按站点顺序、资源匹配顺序、做种人数下载数逆序排序
+        torrent_list = sorted(torrent_list, key=lambda x: get_sort_str(x), reverse=True)
+        # 控重
+        result = []
+        _added = []
+        # 排序后重新加入数组，按真实名称控重，即只取每个名称的第一个
+        for context in torrent_list:
+            # 控重的主链是名称、年份、季、集
+            meta = context.meta_info
+            media = context.media_info
+            if media.type != MediaType.MOVIE:
+                media_name = "%s%s" % (media.get_title_string(),
+                                       meta.get_season_episode_string())
+            else:
+                media_name = media.get_title_string()
+            if media_name not in _added:
+                _added.append(media_name)
+                result.append(context)
+
+        return result
+
+    @staticmethod
+    def get_torrent_episodes(files: list):
+        """
+        从种子的文件清单中获取所有集数
+        """
+        episodes = []
+        for file in files:
+            if Path(file).suffix not in settings.RMT_MEDIAEXT:
+                continue
+            meta = MetaInfo(file)
+            if not meta.begin_episode:
+                continue
+            episodes = list(set(episodes).union(set(meta.get_episode_list())))
+        return episodes