From 9efb2007fc3333bd98d8748007c8720602377b8d Mon Sep 17 00:00:00 2001 From: jxxghp Date: Thu, 15 Jun 2023 12:37:10 +0800 Subject: [PATCH] =?UTF-8?q?add=20=E8=87=AA=E5=8A=A8=E6=9B=B4=E6=96=B0Cooki?= =?UTF-8?q?e?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/chain/download.py | 2 +- app/chain/site_manage.py | 77 ----------- app/chain/site_message.py | 134 +++++++++++++++++++ app/chain/subscribe.py | 7 +- app/command.py | 50 ++++--- app/helper/browser.py | 56 +++++++- app/helper/cookie.py | 223 +++++++++++++++++++++++++++++++ app/helper/torrent.py | 2 +- app/modules/slack/slack.py | 2 +- app/modules/telegram/telegram.py | 4 +- 10 files changed, 445 insertions(+), 112 deletions(-) delete mode 100644 app/chain/site_manage.py create mode 100644 app/chain/site_message.py create mode 100644 app/helper/cookie.py diff --git a/app/chain/download.py b/app/chain/download.py index 64965819..f20f0c59 100644 --- a/app/chain/download.py +++ b/app/chain/download.py @@ -121,7 +121,7 @@ class DownloadChain(ChainBase): f"{_torrent.title} - {_torrent.enclosure},{error_msg}") self.post_message( title="添加下载任务失败:%s %s" - % (_media.title_year, _meta.get_season_episode_string()), + % (_media.title_year, _meta.season_episode), text=f"站点:{_torrent.site_name}\n" f"种子名称:{_meta.org_string}\n" f"种子链接:{_torrent.enclosure}\n" diff --git a/app/chain/site_manage.py b/app/chain/site_manage.py deleted file mode 100644 index 19c95128..00000000 --- a/app/chain/site_manage.py +++ /dev/null @@ -1,77 +0,0 @@ -from app.chain import ChainBase -from app.db.site_oper import SiteOper - - -class SiteManageChain(ChainBase): - """ - 站点远程管理处理链 - """ - - _sites: SiteOper = None - - def __init__(self): - super().__init__() - self._siteoper = SiteOper() - - def process(self): - """ - 查询所有站点,发送消息 - """ - site_list = self._siteoper.list() - if not site_list: - self.post_message(title="没有维护任何站点信息!") - title = f"共有 {len(site_list)} 个站点,回复 `/site_disable` `[id]` 禁用站点,回复 `/site_enable` `[id]` 启用站点:" - messages = [] - for site in site_list: - if site.render: - render_str = "🧭" - else: - render_str = "" - if site.is_active: - messages.append(f"{site.id}. [{site.name}]({site.url}){render_str}") - else: - messages.append(f"{site.id}. {site.name} 🈲️") - # 发送列表 - self.post_message(title=title, text="\n".join(messages)) - - def disable(self, arg_str): - """ - 禁用站点 - """ - if not arg_str: - return - arg_str = arg_str.strip() - if not arg_str.isdigit(): - return - site_id = int(arg_str) - site = self._siteoper.get(site_id) - if not site: - self.post_message(title=f"站点编号 {site_id} 不存在!") - return - # 禁用站点 - self._siteoper.update(site_id, { - "is_active": False - }) - # 重新发送消息 - self.process() - - def enable(self, arg_str): - """ - 启用站点 - """ - if not arg_str: - return - arg_str = arg_str.strip() - if not arg_str.isdigit(): - return - site_id = int(arg_str) - site = self._siteoper.get(site_id) - if not site: - self.post_message(title=f"站点编号 {site_id} 不存在!") - return - # 禁用站点 - self._siteoper.update(site_id, { - "is_active": True - }) - # 重新发送消息 - self.process() diff --git a/app/chain/site_message.py b/app/chain/site_message.py new file mode 100644 index 00000000..e19592cc --- /dev/null +++ b/app/chain/site_message.py @@ -0,0 +1,134 @@ +from app.chain import ChainBase +from app.core.config import settings +from app.db.site_oper import SiteOper +from app.helper.cookie import CookieHelper +from app.log import logger + + +class SiteMessageChain(ChainBase): + """ + 站点远程管理处理链 + """ + + _siteoper: SiteOper = None + _cookiehelper: CookieHelper = None + + def __init__(self): + super().__init__() + self._siteoper = SiteOper() + self._cookiehelper = CookieHelper() + + def process(self): + """ + 查询所有站点,发送消息 + """ + site_list = self._siteoper.list() + if not site_list: + self.post_message(title="没有维护任何站点信息!") + title = f"共有 {len(site_list)} 个站点,回复对应指令操作:" \ + f"\n- 禁用站点:/site_disable [id]" \ + f"\n- 启用站点:/site_enable [id]" \ + f"\n- 更新站点Cookie:/site_cookie [id] [username] [password]" + messages = [] + for site in site_list: + if site.render: + render_str = "🧭" + else: + render_str = "" + if site.is_active: + messages.append(f"{site.id}. [{site.name}]({site.url}){render_str}") + else: + messages.append(f"{site.id}. {site.name}") + # 发送列表 + self.post_message(title=title, text="\n".join(messages)) + + def disable(self, arg_str): + """ + 禁用站点 + """ + if not arg_str: + return + arg_str = arg_str.strip() + if not arg_str.isdigit(): + return + site_id = int(arg_str) + site = self._siteoper.get(site_id) + if not site: + self.post_message(title=f"站点编号 {site_id} 不存在!") + return + # 禁用站点 + self._siteoper.update(site_id, { + "is_active": False + }) + # 重新发送消息 + self.process() + + def enable(self, arg_str): + """ + 启用站点 + """ + if not arg_str: + return + arg_str = arg_str.strip() + if not arg_str.isdigit(): + return + site_id = int(arg_str) + site = self._siteoper.get(site_id) + if not site: + self.post_message(title=f"站点编号 {site_id} 不存在!") + return + # 禁用站点 + self._siteoper.update(site_id, { + "is_active": True + }) + # 重新发送消息 + self.process() + + def get_cookie(self, arg_str: str): + """ + 使用用户名密码更新站点Cookie + """ + err_title = "请输入正确的命令格式:/site_cookie [id] [username] [password]," \ + "[id]为站点编号,[uername]为站点用户名,[password]为站点密码" + if not arg_str: + self.post_message(title=err_title) + return + arg_str = arg_str.strip() + args = arg_str.split() + if len(args) != 3: + self.post_message(title=err_title) + return + site_id = args[0] + if not site_id.isdigit(): + self.post_message(title=err_title) + return + # 站点ID + site_id = int(site_id) + # 站点信息 + site_info = self._siteoper.get(site_id) + if not site_info: + self.post_message(title=f"站点编号 {site_id} 不存在!") + return + # 用户名 + username = args[1] + # 密码 + password = args[2] + # 更新站点Cookie + result = self._cookiehelper.get_site_cookie_ua( + url=site_info.url, + username=username, + password=password, + proxies=settings.PROXY if site_info.proxy else None + ) + if result: + cookie, ua, msg = result + if not cookie: + logger.error(msg) + self.post_message(title=f"【{site_info.name}】 Cookie&UA更新失败!", + text=f"错误原因:{msg}") + return + self._siteoper.update(site_id, { + "cookie": cookie, + "ua": ua + }) + self.post_message(title=f"【{site_info.name}】 Cookie&UA更新成功") diff --git a/app/chain/subscribe.py b/app/chain/subscribe.py index bc7dd4d6..bced38fc 100644 --- a/app/chain/subscribe.py +++ b/app/chain/subscribe.py @@ -303,7 +303,8 @@ class SubscribeChain(ChainBase): if not subscribes: self.post_message(title='没有任何订阅!') return - title = f"共有 {len(subscribes)} 个订阅,回复 `/subscribe_delete` `[id]` 删除订阅:" + title = f"共有 {len(subscribes)} 个订阅,回复对应指令操作: " \ + f"\n- 删除订阅:/subscribe_delete [id]" messages = [] for subscribe in subscribes: if subscribe.type == MediaType.MOVIE.value: @@ -313,8 +314,8 @@ class SubscribeChain(ChainBase): tmdb_link = f"https://www.themoviedb.org/tv/{subscribe.tmdbid}" messages.append(f"{subscribe.id}. [{subscribe.name}({subscribe.year})]({tmdb_link}) " f"第{subscribe.season}季 " - f"【{subscribe.total_episode - (subscribe.lack_episode or subscribe.total_episode)}" - f"/{subscribe.total_episode}】") + f"_{subscribe.total_episode - (subscribe.lack_episode or subscribe.total_episode)}" + f"/{subscribe.total_episode}_") # 发送列表 self.post_message(title=title, text='\n'.join(messages)) diff --git a/app/command.py b/app/command.py index 225ce410..1cdc2d6b 100644 --- a/app/command.py +++ b/app/command.py @@ -5,7 +5,7 @@ from typing import Any from app.chain import ChainBase from app.chain.cookiecloud import CookieCloudChain from app.chain.douban_sync import DoubanSyncChain -from app.chain.site_manage import SiteManageChain +from app.chain.site_message import SiteMessageChain from app.chain.subscribe import SubscribeChain from app.chain.transfer import TransferChain from app.core.event import eventmanager, EventManager @@ -44,7 +44,27 @@ class Command(metaclass=Singleton): self._commands = { "/cookiecloud": { "func": CookieCloudChain().process, - "description": "同步站点Cookie", + "description": "同步站点", + "data": {} + }, + "/sites": { + "func": SiteMessageChain().process, + "description": "查询站点", + "data": {} + }, + "/site_cookie": { + "func": SiteMessageChain().get_cookie, + "description": "更新站点Cookie", + "data": {} + }, + "/site_enable": { + "func": SiteMessageChain().enable, + "description": "启用站点", + "data": {} + }, + "/site_disable": { + "func": SiteMessageChain().disable, + "description": "禁用站点", "data": {} }, "/douban_sync": { @@ -52,6 +72,11 @@ class Command(metaclass=Singleton): "description": "同步豆瓣想看", "data": {} }, + "/subscribes": { + "func": SubscribeChain().list, + "description": "查询订阅", + "data": {} + }, "/subscribe_refresh": { "func": SubscribeChain().refresh, "description": "刷新订阅", @@ -64,11 +89,6 @@ class Command(metaclass=Singleton): 'state': 'R', } }, - "/subscribes": { - "func": SubscribeChain().list, - "description": "查询订阅", - "data": {} - }, "/subscribe_delete": { "func": SubscribeChain().delete, "description": "删除订阅", @@ -78,21 +98,6 @@ class Command(metaclass=Singleton): "func": TransferChain().process, "description": "下载文件整理", "data": {} - }, - "/sites": { - "func": SiteManageChain().process, - "description": "查询站点", - "data": {} - }, - "/site_enable": { - "func": SiteManageChain().enable, - "description": "启用站点", - "data": {} - }, - "/site_disable": { - "func": SiteManageChain().disable, - "description": "禁用站点", - "data": {} } } plugin_commands = self.pluginmanager.get_plugin_commands() @@ -179,6 +184,7 @@ class Command(metaclass=Singleton): command['func']() except Exception as err: logger.error(f"执行命令 {cmd} 出错:{str(err)}") + traceback.print_exc() @staticmethod def send_plugin_event(etype: EventType, data: dict) -> None: diff --git a/app/helper/browser.py b/app/helper/browser.py index f2da41d5..b6af24e1 100644 --- a/app/helper/browser.py +++ b/app/helper/browser.py @@ -1,4 +1,6 @@ -from playwright.sync_api import sync_playwright +from typing import Callable, Any + +from playwright.sync_api import sync_playwright, Page from cf_clearance import sync_cf_retry, sync_stealth from app.log import logger @@ -7,6 +9,53 @@ class PlaywrightHelper: def __init__(self, browser_type="chromium"): self.browser_type = browser_type + @staticmethod + def __pass_cloudflare(url: str, page: Page) -> bool: + """ + 尝试跳过cloudfare验证 + """ + sync_stealth(page, pure=True) + page.goto(url) + return sync_cf_retry(page) + + def action(self, url: str, + callback: Callable, + cookies: str = None, + ua: str = None, + proxies: dict = None, + headless: bool = False, + timeout: int = 30) -> Any: + """ + 访问网页,接收Page对象并执行操作 + :param url: 网页地址 + :param callback: 回调函数,需要接收page对象 + :param cookies: cookies + :param ua: user-agent + :param proxies: 代理 + :param headless: 是否无头模式 + :param timeout: 超时时间 + """ + try: + with sync_playwright() as playwright: + browser = playwright[self.browser_type].launch(headless=headless) + context = browser.new_context(user_agent=ua, proxy=proxies) + page = context.new_page() + if cookies: + page.set_extra_http_headers({"cookie": cookies}) + try: + if not self.__pass_cloudflare(url, page): + logger.warn("cloudflare challenge fail!") + page.wait_for_load_state("networkidle", timeout=timeout * 1000) + # 回调函数 + return callback(page) + except Exception as e: + logger.error(f"网页操作失败: {e}") + finally: + browser.close() + except Exception as e: + logger.error(f"网页操作失败: {e}") + return None + def get_page_source(self, url: str, cookies: str = None, ua: str = None, @@ -31,10 +80,7 @@ class PlaywrightHelper: if cookies: page.set_extra_http_headers({"cookie": cookies}) try: - sync_stealth(page, pure=True) - page.goto(url) - res = sync_cf_retry(page) - if not res: + if not self.__pass_cloudflare(url, page): logger.warn("cloudflare challenge fail!") page.wait_for_load_state("networkidle", timeout=timeout * 1000) source = page.content() diff --git a/app/helper/cookie.py b/app/helper/cookie.py new file mode 100644 index 00000000..4a9dface --- /dev/null +++ b/app/helper/cookie.py @@ -0,0 +1,223 @@ +import base64 +from typing import Tuple, Optional + +from lxml import etree +from playwright.sync_api import Page + +from app.helper.browser import PlaywrightHelper +from app.helper.ocr import OcrHelper +from app.log import logger +from app.utils.http import RequestUtils +from app.utils.site import SiteUtils +from app.utils.string import StringUtils + + +class CookieHelper: + # 站点登录界面元素XPATH + _SITE_LOGIN_XPATH = { + "username": [ + '//input[@name="username"]', + '//input[@id="form_item_username"]', + '//input[@id="username"]' + ], + "password": [ + '//input[@name="password"]', + '//input[@id="form_item_password"]', + '//input[@id="password"]' + ], + "captcha": [ + '//input[@name="imagestring"]', + '//input[@name="captcha"]', + '//input[@id="form_item_captcha"]' + ], + "captcha_img": [ + '//img[@alt="CAPTCHA"]/@src', + '//img[@alt="SECURITY CODE"]/@src', + '//img[@id="LAY-user-get-vercode"]/@src', + '//img[contains(@src,"/api/getCaptcha")]/@src' + ], + "submit": [ + '//input[@type="submit"]', + '//button[@type="submit"]', + '//button[@lay-filter="login"]', + '//button[@lay-filter="formLogin"]', + '//input[@type="button"][@value="登录"]' + ], + "error": [ + "//table[@class='main']//td[@class='text']/text()" + ], + "twostep": [ + '//input[@name="two_step_code"]', + '//input[@name="2fa_secret"]' + ] + } + + @staticmethod + def parse_cookies(cookies: list) -> str: + """ + 将浏览器返回的cookies转化为字符串 + """ + if not cookies: + return "" + cookie_str = "" + for cookie in cookies: + cookie_str += f"{cookie['name']}={cookie['value']}; " + return cookie_str + + def get_site_cookie_ua(self, + url: str, + username: str, + password: str, + proxies: dict = None) -> Tuple[Optional[str], Optional[str], str]: + """ + 获取站点cookie和ua + :param url: 站点地址 + :param username: 用户名 + :param password: 密码 + :param proxies: 代理 + :return: cookie、ua、message + """ + + def __page_handler(page: Page) -> Tuple[Optional[str], Optional[str], str]: + """ + 页面处理 + :return: Cookie和UA + """ + # 登录页面代码 + html_text = page.content() + if not html_text: + return None, None, "获取源码失败" + # 查找用户名输入框 + html = etree.HTML(html_text) + username_xpath = None + for xpath in self._SITE_LOGIN_XPATH.get("username"): + if html.xpath(xpath): + username_xpath = xpath + break + if not username_xpath: + return None, None, "未找到用户名输入框" + # 查找密码输入框 + password_xpath = None + for xpath in self._SITE_LOGIN_XPATH.get("password"): + if html.xpath(xpath): + password_xpath = xpath + break + if not password_xpath: + return None, None, "未找到密码输入框" + # 查找验证码输入框 + captcha_xpath = None + for xpath in self._SITE_LOGIN_XPATH.get("captcha"): + if html.xpath(xpath): + captcha_xpath = xpath + break + # 查找验证码图片 + captcha_img_url = None + if captcha_xpath: + for xpath in self._SITE_LOGIN_XPATH.get("captcha_img"): + if html.xpath(xpath): + captcha_img_url = html.xpath(xpath)[0] + break + if not captcha_img_url: + return None, None, "未找到验证码图片" + # 查找登录按钮 + submit_xpath = None + for xpath in self._SITE_LOGIN_XPATH.get("submit"): + if html.xpath(xpath): + submit_xpath = xpath + break + if not submit_xpath: + return None, None, "未找到登录按钮" + # 点击登录按钮 + try: + # 等待登录按钮准备好 + page.wait_for_selector(submit_xpath) + # 输入用户名 + page.fill(username_xpath, username) + # 输入密码 + page.fill(password_xpath, password) + # 识别验证码 + if captcha_xpath and captcha_img_url: + captcha_element = page.query_selector(captcha_xpath) + if captcha_element.is_visible(): + # 验证码图片地址 + code_url = self.__get_captcha_url(url, captcha_img_url) + # 获取当前的cookie和ua + cookie = self.parse_cookies(page.context.cookies()) + ua = page.evaluate("() => window.navigator.userAgent") + # 自动OCR识别验证码 + captcha = self.get_captcha_text(cookie=cookie, ua=ua, code_url=code_url) + if captcha: + logger.info("验证码地址为:%s,识别结果:%s" % (code_url, captcha)) + else: + return None, None, "验证码识别失败" + # 输入验证码 + captcha_element.fill(captcha) + else: + # 不可见元素不处理 + pass + # 点击登录按钮 + page.click(submit_xpath) + page.wait_for_load_state("networkidle", timeout=30 * 1000) + except Exception as e: + logger.error(f"仿真登录失败:{e}") + return None, None, f"仿真登录失败:{e}" + # 登录后的源码 + html_text = page.content() + if not html_text: + return None, None, "获取网页源码失败" + if SiteUtils.is_logged_in(html_text): + return self.parse_cookies(page.context.cookies()), \ + page.evaluate("() => window.navigator.userAgent"), "" + else: + # 读取错误信息 + error_xpath = None + for xpath in self._SITE_LOGIN_XPATH.get("error"): + if html.xpath(xpath): + error_xpath = xpath + break + if not error_xpath: + return None, None, "登录失败" + else: + error_msg = html.xpath(error_xpath)[0] + return None, None, error_msg + + if not url or not username or not password: + return None, None, "参数错误" + + return PlaywrightHelper().action(url=url, + callback=__page_handler, + proxies=proxies) + + @staticmethod + def get_captcha_base64(cookie: str, ua: str, image_url: str) -> str: + """ + 根据图片地址,使用浏览器获取验证码图片base64编码 + """ + if not image_url: + return "" + ret = RequestUtils(ua=ua, cookies=cookie).get_res(image_url) + if ret: + return base64.b64encode(ret.content).decode() + return "" + + def get_captcha_text(self, cookie: str, ua: str, code_url: str) -> str: + """ + 识别验证码图片的内容 + """ + code_b64 = self.get_captcha_base64(cookie=cookie, + ua=ua, + image_url=code_url) + if not code_b64: + return "" + return OcrHelper().get_captcha_text(image_b64=code_b64) + + @staticmethod + def __get_captcha_url(siteurl: str, imageurl: str) -> str: + """ + 获取验证码图片的URL + """ + if not siteurl or not imageurl: + return "" + if imageurl.startswith("/"): + imageurl = imageurl[1:] + return "%s/%s" % (StringUtils.get_base_url(siteurl), imageurl) diff --git a/app/helper/torrent.py b/app/helper/torrent.py index a5f7e610..2a98419b 100644 --- a/app/helper/torrent.py +++ b/app/helper/torrent.py @@ -209,7 +209,7 @@ class TorrentHelper: media = context.media_info if media.type == MediaType.TV: media_name = "%s%s" % (media.title_year, - meta.get_season_episode_string()) + meta.season_episode) else: media_name = media.title_year if media_name not in _added: diff --git a/app/modules/slack/slack.py b/app/modules/slack/slack.py index 0f7c1274..59e1ce2b 100644 --- a/app/modules/slack/slack.py +++ b/app/modules/slack/slack.py @@ -248,7 +248,7 @@ class Slack: "type": "section", "text": { "type": "mrkdwn", - "text": f"**{title}**" + "text": f"*{title}*" } } blocks = [title_section, { diff --git a/app/modules/telegram/telegram.py b/app/modules/telegram/telegram.py index 41a2441c..0033c1c7 100644 --- a/app/modules/telegram/telegram.py +++ b/app/modules/telegram/telegram.py @@ -79,9 +79,9 @@ class Telegram(metaclass=Singleton): try: if text: - caption = f"**{title}**\n{text}" + caption = f"*{title}*\n{text}" else: - caption = f"**{title}**" + caption = f"*{title}*" if userid: chat_id = userid