From 2b9975b9b2ea18cd47cb358a211566c6a259f31a Mon Sep 17 00:00:00 2001 From: jxxghp Date: Tue, 13 Jun 2023 12:34:38 +0800 Subject: [PATCH] =?UTF-8?q?add=20=E7=AB=99=E7=82=B9=E5=AD=97=E5=B9=95?= =?UTF-8?q?=E8=87=AA=E5=8A=A8=E4=B8=8B=E8=BD=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/helper/torrent.py | 39 ++++++++---- app/modules/subtitle/__init__.py | 106 ++++++++++++++++++++++++++++++- 2 files changed, 132 insertions(+), 13 deletions(-) diff --git a/app/helper/torrent.py b/app/helper/torrent.py index 544591f3..87503016 100644 --- a/app/helper/torrent.py +++ b/app/helper/torrent.py @@ -4,6 +4,7 @@ from pathlib import Path from typing import Tuple, Optional, List, Union from urllib.parse import unquote +from requests import Response from torrentool.api import Torrent from app.core.config import settings @@ -97,21 +98,13 @@ class TorrentHelper: # 检查是不是种子文件,如果不是仍然抛出异常 try: # 读取种子文件名 - file_name = self.__get_url_torrent_filename(req, url) + file_name = self.get_url_filename(req, url) # 种子文件路径 file_path = Path(settings.TEMP_PATH) / file_name # 保存到文件 file_path.write_bytes(req.content) # 获取种子目录和文件清单 - torrentinfo = Torrent.from_file(file_path) - # 获取目录名 - folder_name = torrentinfo.name - # 获取文件清单 - if len(torrentinfo.files) <= 1: - # 单文件种子 - file_list = [torrentinfo.name] - else: - file_list = [fileinfo.name for fileinfo in torrentinfo.files] + folder_name, file_list = self.get_torrent_info(file_path) # 成功拿到种子数据 return file_path, req.content, folder_name, file_list, "" except Exception as err: @@ -128,7 +121,31 @@ class TorrentHelper: return None, None, "", [], f"下载种子出错,状态码:{req.status_code}" @staticmethod - def __get_url_torrent_filename(req, url: str) -> str: + def get_torrent_info(torrent_path: Path) -> Tuple[str, List[str]]: + """ + 获取种子文件的文件夹名和文件清单 + :param torrent_path: 种子文件路径 + :return: 文件夹名、文件清单 + """ + if not torrent_path or not torrent_path.exists(): + return "", [] + try: + torrentinfo = Torrent.from_file(torrent_path) + # 获取目录名 + folder_name = torrentinfo.name + # 获取文件清单 + if len(torrentinfo.files) <= 1: + # 单文件种子 + file_list = [torrentinfo.name] + else: + file_list = [fileinfo.name for fileinfo in torrentinfo.files] + return folder_name, file_list + except Exception as err: + logger.error(f"种子文件解析失败:{err}") + return "", [] + + @staticmethod + def get_url_filename(req: Response, url: str) -> str: """ 从下载请求中获取种子文件名 """ diff --git a/app/modules/subtitle/__init__.py b/app/modules/subtitle/__init__.py index 2b3885aa..00ce7980 100644 --- a/app/modules/subtitle/__init__.py +++ b/app/modules/subtitle/__init__.py @@ -1,8 +1,17 @@ +import shutil from pathlib import Path from typing import Tuple, Union +from lxml import etree + +from app.core.config import settings from app.core.context import Context +from app.helper.torrent import TorrentHelper +from app.log import logger from app.modules import _ModuleBase +from app.utils.http import RequestUtils +from app.utils.string import StringUtils +from app.utils.system import SystemUtils class SubtitleModule(_ModuleBase): @@ -10,6 +19,11 @@ class SubtitleModule(_ModuleBase): 字幕下载模块 """ + # 站点详情页字幕下载链接识别XPATH + _SITE_SUBTITLE_XPATH = [ + '//td[@class="rowhead"][text()="字幕"]/following-sibling::td//a/@href', + ] + def init_module(self) -> None: pass @@ -21,10 +35,98 @@ class SubtitleModule(_ModuleBase): def download_added(self, context: Context, torrent_path: Path) -> None: """ - 添加下载任务成功后,从站点下载字幕 + 添加下载任务成功后,从站点下载字幕,保存到下载目录 :param context: 上下文,包括识别信息、媒体信息、种子信息 :param torrent_path: 种子文件地址 :return: None,该方法可被多个模块同时处理 """ - pass + # 种子信息 + torrent = context.torrent_info + if not torrent.page_url: + return + # 字幕下载目录 + logger.info("开始从站点下载字幕:%s" % torrent.page_url) + # 获取种子信息 + folder_name, _ = TorrentHelper.get_torrent_info(torrent_path) + download_dir = Path(settings.DOWNLOAD_PATH) / folder_name + if not download_dir.is_dir(): + logger.warn(f"下载目录不正确:{download_dir}") + return + # 读取网站代码 + request = RequestUtils(cookies=torrent.site_cookie, headers=torrent.site_ua) + res = request.get_res(torrent.page_url) + if res and res.status_code == 200: + if not res.text: + logger.warn(f"读取页面代码失败:{torrent.page_url}") + return + html = etree.HTML(res.text) + sublink_list = [] + for xpath in self._SITE_SUBTITLE_XPATH: + sublinks = html.xpath(xpath) + if sublinks: + for sublink in sublinks: + if not sublink: + continue + if not sublink.startswith("http"): + base_url = StringUtils.get_base_url(torrent.page_url) + if sublink.startswith("/"): + sublink = "%s%s" % (base_url, sublink) + else: + sublink = "%s/%s" % (base_url, sublink) + sublink_list.append(sublink) + # 下载所有字幕文件 + for sublink in sublink_list: + logger.info(f"找到字幕下载链接:{sublink},开始下载...") + # 下载 + ret = request.get_res(sublink) + if ret and ret.status_code == 200: + # 创建目录 + if not download_dir.exists(): + download_dir.mkdir(parents=True, exist_ok=True) + # 保存ZIP + file_name = TorrentHelper.get_url_filename(ret, sublink) + if not file_name: + logger.warn(f"链接不是字幕文件:{sublink}") + continue + if file_name.lower().endswith(".zip"): + # ZIP包 + zip_file = settings.TEMP_PATH / file_name + # 保存 + zip_file.write_bytes(ret.content) + # 解压路径 + zip_path = zip_file.with_name(zip_file.stem) + # 解压文件 + shutil.unpack_archive(zip_file, zip_path, format='zip') + # 遍历转移文件 + for sub_file in SystemUtils.list_files_with_extensions(zip_path, settings.RMT_SUBEXT): + target_sub_file = download_dir / sub_file.name + if target_sub_file.exists(): + logger.info(f"字幕文件已存在:{target_sub_file}") + continue + logger.info(f"转移字幕 {sub_file} 到 {target_sub_file} ...") + SystemUtils.copy(sub_file, target_sub_file) + # 删除临时文件 + try: + shutil.rmtree(zip_path) + zip_file.unlink() + except Exception as err: + logger.error(f"删除临时文件失败:{err}") + else: + sub_file = settings.TEMP_PATH / file_name + # 保存 + sub_file.write_bytes(ret.content) + target_sub_file = download_dir / sub_file.name + logger.info(f"转移字幕 {sub_file} 到 {target_sub_file}") + SystemUtils.copy(sub_file, target_sub_file) + else: + logger.error(f"下载字幕文件失败:{sublink}") + continue + if sublink_list: + logger.info(f"{torrent.page_url} 页面字幕下载完成") + else: + logger.warn(f"{torrent.page_url} 页面未找到字幕下载链接") + elif res is not None: + logger.warn(f"连接 {torrent.page_url} 失败,状态码:{res.status_code}") + else: + logger.warn(f"无法打开链接:{torrent.page_url}")