From c7d745a752fa35df1cacd305f9c0faa8723275ca Mon Sep 17 00:00:00 2001 From: jxxghp Date: Thu, 8 Jun 2023 13:18:26 +0800 Subject: [PATCH] add plugins --- app/chain/__init__.py | 2 +- app/chain/common.py | 4 +- app/chain/cookiecloud.py | 4 +- app/chain/douban_sync.py | 4 +- app/chain/identify.py | 4 +- app/chain/search.py | 4 +- app/chain/subscribe.py | 4 +- app/chain/transfer.py | 4 +- app/chain/user_message.py | 4 +- app/chain/webhook_message.py | 4 +- app/core/plugin_manager.py | 17 +- app/db/models/site.py | 1 + app/helper/cloudflare.py | 201 +++++++++ app/helper/ocr.py | 33 ++ app/plugins/__init__.py | 54 +-- app/plugins/autosignin/__init__.py | 225 ++++++++++ app/plugins/autosignin/sites/52pt.py | 145 +++++++ app/plugins/autosignin/sites/__init__.py | 46 ++ app/plugins/autosignin/sites/btschool.py | 73 ++++ app/plugins/autosignin/sites/chdbits.py | 146 +++++++ app/plugins/autosignin/sites/haidan.py | 65 +++ app/plugins/autosignin/sites/hares.py | 80 ++++ app/plugins/autosignin/sites/hdarea.py | 69 +++ app/plugins/autosignin/sites/hdchina.py | 117 +++++ app/plugins/autosignin/sites/hdcity.py | 66 +++ app/plugins/autosignin/sites/hdsky.py | 131 ++++++ app/plugins/autosignin/sites/hdupt.py | 81 ++++ app/plugins/autosignin/sites/opencd.py | 129 ++++++ app/plugins/autosignin/sites/pterclub.py | 58 +++ app/plugins/autosignin/sites/tjupt.py | 272 ++++++++++++ app/plugins/autosignin/sites/ttg.py | 96 +++++ app/plugins/autosignin/sites/u2.py | 122 ++++++ app/plugins/autosignin/sites/zhuque.py | 86 ++++ app/plugins/sitestatistics/__init__.py | 262 ++++++++++++ .../sitestatistics/siteuserinfo/__init__.py | 360 ++++++++++++++++ .../sitestatistics/siteuserinfo/discuz.py | 140 ++++++ .../sitestatistics/siteuserinfo/file_list.py | 119 ++++++ .../sitestatistics/siteuserinfo/gazelle.py | 164 +++++++ .../siteuserinfo/ipt_project.py | 94 ++++ .../sitestatistics/siteuserinfo/nexus_php.py | 401 ++++++++++++++++++ .../siteuserinfo/nexus_project.py | 25 ++ .../siteuserinfo/nexus_rabbit.py | 58 +++ .../siteuserinfo/small_horse.py | 111 +++++ .../sitestatistics/siteuserinfo/tnode.py | 104 +++++ .../siteuserinfo/torrent_leech.py | 110 +++++ .../sitestatistics/siteuserinfo/unit3d.py | 131 ++++++ app/schemas/site.py | 1 + app/utils/types.py | 17 + requirements.txt | 8 +- 49 files changed, 4383 insertions(+), 73 deletions(-) create mode 100644 app/helper/cloudflare.py create mode 100644 app/helper/ocr.py create mode 100644 app/plugins/autosignin/__init__.py create mode 100644 app/plugins/autosignin/sites/52pt.py create mode 100644 app/plugins/autosignin/sites/__init__.py create mode 100644 app/plugins/autosignin/sites/btschool.py create mode 100644 app/plugins/autosignin/sites/chdbits.py create mode 100644 app/plugins/autosignin/sites/haidan.py create mode 100644 app/plugins/autosignin/sites/hares.py create mode 100644 app/plugins/autosignin/sites/hdarea.py create mode 100644 app/plugins/autosignin/sites/hdchina.py create mode 100644 app/plugins/autosignin/sites/hdcity.py create mode 100644 app/plugins/autosignin/sites/hdsky.py create mode 100644 app/plugins/autosignin/sites/hdupt.py create mode 100644 app/plugins/autosignin/sites/opencd.py create mode 100644 app/plugins/autosignin/sites/pterclub.py create mode 100644 app/plugins/autosignin/sites/tjupt.py create mode 100644 app/plugins/autosignin/sites/ttg.py create mode 100644 app/plugins/autosignin/sites/u2.py create mode 100644 app/plugins/autosignin/sites/zhuque.py create mode 100644 app/plugins/sitestatistics/__init__.py create mode 100644 app/plugins/sitestatistics/siteuserinfo/__init__.py create mode 100644 app/plugins/sitestatistics/siteuserinfo/discuz.py create mode 100644 app/plugins/sitestatistics/siteuserinfo/file_list.py create mode 100644 app/plugins/sitestatistics/siteuserinfo/gazelle.py create mode 100644 app/plugins/sitestatistics/siteuserinfo/ipt_project.py create mode 100644 app/plugins/sitestatistics/siteuserinfo/nexus_php.py create mode 100644 app/plugins/sitestatistics/siteuserinfo/nexus_project.py create mode 100644 app/plugins/sitestatistics/siteuserinfo/nexus_rabbit.py create mode 100644 app/plugins/sitestatistics/siteuserinfo/small_horse.py create mode 100644 app/plugins/sitestatistics/siteuserinfo/tnode.py create mode 100644 app/plugins/sitestatistics/siteuserinfo/torrent_leech.py create mode 100644 app/plugins/sitestatistics/siteuserinfo/unit3d.py diff --git a/app/chain/__init__.py b/app/chain/__init__.py index ac67e090..fc1e5c90 100644 --- a/app/chain/__init__.py +++ b/app/chain/__init__.py @@ -7,7 +7,7 @@ from app.log import logger from app.utils.singleton import AbstractSingleton, Singleton -class _ChainBase(AbstractSingleton, metaclass=Singleton): +class ChainBase(AbstractSingleton, metaclass=Singleton): """ 处理链基类 """ diff --git a/app/chain/common.py b/app/chain/common.py index 101e8a02..a17b748a 100644 --- a/app/chain/common.py +++ b/app/chain/common.py @@ -2,7 +2,7 @@ import re from pathlib import Path from typing import List, Optional, Tuple, Set -from app.chain import _ChainBase +from app.chain import ChainBase from app.core import MediaInfo from app.core import TorrentInfo, Context from app.core.meta import MetaBase @@ -12,7 +12,7 @@ from app.utils.string import StringUtils from app.utils.types import MediaType -class CommonChain(_ChainBase): +class CommonChain(ChainBase): def __init__(self): super().__init__() diff --git a/app/chain/cookiecloud.py b/app/chain/cookiecloud.py index 78165699..d7297e73 100644 --- a/app/chain/cookiecloud.py +++ b/app/chain/cookiecloud.py @@ -1,6 +1,6 @@ from typing import Tuple -from app.chain import _ChainBase +from app.chain import ChainBase from app.core import settings from app.db.sites import Sites from app.helper.cookiecloud import CookieCloudHelper @@ -8,7 +8,7 @@ from app.helper.sites import SitesHelper from app.log import logger -class CookieCloudChain(_ChainBase): +class CookieCloudChain(ChainBase): """ 同步站点Cookie """ diff --git a/app/chain/douban_sync.py b/app/chain/douban_sync.py index 5e410780..4acefd2e 100644 --- a/app/chain/douban_sync.py +++ b/app/chain/douban_sync.py @@ -1,7 +1,7 @@ from pathlib import Path from typing import Optional -from app.chain import _ChainBase +from app.chain import ChainBase from app.chain.common import CommonChain from app.chain.search import SearchChain from app.core import settings, MetaInfo, MediaInfo @@ -10,7 +10,7 @@ from app.helper.rss import RssHelper from app.log import logger -class DoubanSyncChain(_ChainBase): +class DoubanSyncChain(ChainBase): """ 同步豆瓣想看数据 """ diff --git a/app/chain/identify.py b/app/chain/identify.py index f925640d..6e377eb0 100644 --- a/app/chain/identify.py +++ b/app/chain/identify.py @@ -1,11 +1,11 @@ from typing import Optional -from app.chain import _ChainBase +from app.chain import ChainBase from app.core import Context, MetaInfo, MediaInfo from app.log import logger -class IdentifyChain(_ChainBase): +class IdentifyChain(ChainBase): """ 识别处理链 """ diff --git a/app/chain/search.py b/app/chain/search.py index 7c4a258c..ee075234 100644 --- a/app/chain/search.py +++ b/app/chain/search.py @@ -1,6 +1,6 @@ from typing import Optional, List -from app.chain import _ChainBase +from app.chain import ChainBase from app.chain.common import CommonChain from app.core import Context, MetaInfo, MediaInfo, TorrentInfo, settings from app.core.meta import MetaBase @@ -8,7 +8,7 @@ from app.helper.sites import SitesHelper from app.log import logger -class SearchChain(_ChainBase): +class SearchChain(ChainBase): """ 站点资源搜索处理链 """ diff --git a/app/chain/subscribe.py b/app/chain/subscribe.py index 572ee20a..fdebba6a 100644 --- a/app/chain/subscribe.py +++ b/app/chain/subscribe.py @@ -1,6 +1,6 @@ from typing import Dict, List, Optional -from app.chain import _ChainBase +from app.chain import ChainBase from app.chain.common import CommonChain from app.chain.search import SearchChain from app.core import MetaInfo, TorrentInfo, Context, MediaInfo, settings @@ -11,7 +11,7 @@ from app.utils.string import StringUtils from app.utils.types import MediaType -class SubscribeChain(_ChainBase): +class SubscribeChain(ChainBase): """ 订阅处理链 """ diff --git a/app/chain/transfer.py b/app/chain/transfer.py index 2d11c789..3aded307 100644 --- a/app/chain/transfer.py +++ b/app/chain/transfer.py @@ -1,13 +1,13 @@ from pathlib import Path from typing import List, Optional -from app.chain import _ChainBase +from app.chain import ChainBase from app.core import MetaInfo, MediaInfo, settings from app.log import logger from app.utils.types import TorrentStatus -class TransferChain(_ChainBase): +class TransferChain(ChainBase): """ 文件转移处理链 """ diff --git a/app/chain/user_message.py b/app/chain/user_message.py index c267f496..0734304b 100644 --- a/app/chain/user_message.py +++ b/app/chain/user_message.py @@ -2,7 +2,7 @@ from typing import Dict from fastapi import Request -from app.chain import _ChainBase +from app.chain import ChainBase from app.chain.common import * from app.chain.search import SearchChain from app.core import MediaInfo, TorrentInfo, MetaInfo @@ -11,7 +11,7 @@ from app.log import logger from app.utils.types import EventType -class UserMessageChain(_ChainBase): +class UserMessageChain(ChainBase): """ 外来消息处理链 """ diff --git a/app/chain/webhook_message.py b/app/chain/webhook_message.py index d08e4114..014881d2 100644 --- a/app/chain/webhook_message.py +++ b/app/chain/webhook_message.py @@ -1,7 +1,7 @@ -from app.chain import _ChainBase +from app.chain import ChainBase -class WebhookMessageChain(_ChainBase): +class WebhookMessageChain(ChainBase): """ 响应Webhook事件 """ diff --git a/app/core/plugin_manager.py b/app/core/plugin_manager.py index d4b1e98d..4362ecd8 100644 --- a/app/core/plugin_manager.py +++ b/app/core/plugin_manager.py @@ -2,13 +2,11 @@ import traceback from threading import Thread from typing import Tuple, Optional, List, Any -from app.helper import ModuleHelper - from app.core import EventManager from app.db.systemconfigs import SystemConfigs +from app.helper import ModuleHelper from app.log import logger from app.utils.singleton import Singleton -from app.utils.types import SystemConfigKey class PluginManager(metaclass=Singleton): @@ -91,21 +89,16 @@ class PluginManager(metaclass=Singleton): ) # 排序 plugins.sort(key=lambda x: x.plugin_order if hasattr(x, "plugin_order") else 0) - # 用户已安装插件列表 - user_plugins = self.systemconfigs.get(SystemConfigKey.UserInstalledPlugins) or [] self._running_plugins = {} self._plugins = {} for plugin in plugins: plugin_id = plugin.__name__ self._plugins[plugin_id] = plugin - # 未安装的跳过加载 - if plugin_id not in user_plugins: - continue # 生成实例 self._running_plugins[plugin_id] = plugin() # 初始化配置 self.reload_plugin(plugin_id) - logger.info(f"加载插件:{plugin}") + logger.info(f"Plugin Loaded:{plugin.__name__}") def reload_plugin(self, pid: str): """ @@ -237,7 +230,6 @@ class PluginManager(metaclass=Singleton): 获取所有插件 """ all_confs = {} - installed_apps = self.systemconfigs.get(SystemConfigKey.UserInstalledPlugins) or [] for pid, plugin in self._plugins.items(): # 基本属性 conf = {} @@ -247,11 +239,6 @@ class PluginManager(metaclass=Singleton): continue # ID conf.update({"id": pid}) - # 安装状态 - if pid in installed_apps: - conf.update({"installed": True}) - else: - conf.update({"installed": False}) # 名称 if hasattr(plugin, "plugin_name"): conf.update({"name": plugin.plugin_name}) diff --git a/app/db/models/site.py b/app/db/models/site.py index a846defe..cdc95430 100644 --- a/app/db/models/site.py +++ b/app/db/models/site.py @@ -15,6 +15,7 @@ class Site(Base): rss = Column(String) cookie = Column(String) ua = Column(String) + proxy = Column(Integer) filter = Column(String) note = Column(String) limit_interval = Column(Integer) diff --git a/app/helper/cloudflare.py b/app/helper/cloudflare.py new file mode 100644 index 00000000..cc0945b1 --- /dev/null +++ b/app/helper/cloudflare.py @@ -0,0 +1,201 @@ +import time +import os + +from func_timeout import func_timeout, FunctionTimedOut +from pyquery import PyQuery +from selenium.common import TimeoutException +from selenium.webdriver import ActionChains +from selenium.webdriver.common.by import By +from selenium.webdriver.remote.webdriver import WebDriver +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.wait import WebDriverWait + +from app.log import logger + +ACCESS_DENIED_TITLES = [ + # Cloudflare + 'Access denied', + # Cloudflare http://bitturk.net/ Firefox + 'Attention Required! | Cloudflare' +] +ACCESS_DENIED_SELECTORS = [ + # Cloudflare + 'div.cf-error-title span.cf-code-label span', + # Cloudflare http://bitturk.net/ Firefox + '#cf-error-details div.cf-error-overview h1' +] +CHALLENGE_TITLES = [ + # Cloudflare + 'Just a moment...', + '请稍候…', + # DDoS-GUARD + 'DDOS-GUARD', +] +CHALLENGE_SELECTORS = [ + # Cloudflare + '#cf-challenge-running', '.ray_id', '.attack-box', '#cf-please-wait', '#challenge-spinner', '#trk_jschal_js', + # Custom CloudFlare for EbookParadijs, Film-Paleis, MuziekFabriek and Puur-Hollands + 'td.info #js_info', + # Fairlane / pararius.com + 'div.vc div.text-box h2' +] +SHORT_TIMEOUT = 6 +CF_TIMEOUT = int(os.getenv("NASTOOL_CF_TIMEOUT", "60")) + + +def resolve_challenge(driver: WebDriver, timeout=CF_TIMEOUT): + start_ts = time.time() + try: + func_timeout(timeout, _evil_logic, args=(driver,)) + return True + except FunctionTimedOut: + logger.error(f'Error solving the challenge. Timeout {timeout} after {round(time.time() - start_ts, 1)} seconds.') + return False + except Exception as e: + logger.error('Error solving the challenge. ' + str(e)) + return False + + +def under_challenge(html_text: str): + """ + Check if the page is under challenge + :param html_text: + :return: + """ + # get the page title + if not html_text: + return False + page_title = PyQuery(html_text)('title').text() + logger.debug("under_challenge page_title=" + page_title) + for title in CHALLENGE_TITLES: + if page_title.lower() == title.lower(): + return True + for selector in CHALLENGE_SELECTORS: + html_doc = PyQuery(html_text) + if html_doc(selector): + return True + return False + + +def _until_title_changes(driver: WebDriver, titles): + WebDriverWait(driver, SHORT_TIMEOUT).until_not(lambda x: _any_match_titles(x, titles)) + + +def _any_match_titles(driver: WebDriver, titles): + page_title = driver.title + for title in titles: + if page_title.lower() == title.lower(): + return True + return False + + +def _until_selectors_disappear(driver: WebDriver, selectors): + WebDriverWait(driver, SHORT_TIMEOUT).until_not(lambda x: _any_match_selectors(x, selectors)) + + +def _any_match_selectors(driver: WebDriver, selectors): + for selector in selectors: + html_doc = PyQuery(driver.page_source) + if html_doc(selector): + return True + return False + + +def _evil_logic(driver: WebDriver): + driver.implicitly_wait(SHORT_TIMEOUT) + # wait for the page + html_element = driver.find_element(By.TAG_NAME, "html") + + # find access denied titles + if _any_match_titles(driver, ACCESS_DENIED_TITLES): + raise Exception('Cloudflare has blocked this request. ' + 'Probably your IP is banned for this site, check in your web browser.') + # find access denied selectors + if _any_match_selectors(driver, ACCESS_DENIED_SELECTORS): + raise Exception('Cloudflare has blocked this request. ' + 'Probably your IP is banned for this site, check in your web browser.') + + # find challenge by title + challenge_found = False + if _any_match_titles(driver, CHALLENGE_TITLES): + challenge_found = True + logger.info("Challenge detected. Title found: " + driver.title) + if not challenge_found: + # find challenge by selectors + if _any_match_selectors(driver, CHALLENGE_SELECTORS): + challenge_found = True + logger.info("Challenge detected. Selector found") + + attempt = 0 + if challenge_found: + while True: + try: + attempt = attempt + 1 + # wait until the title changes + _until_title_changes(driver, CHALLENGE_TITLES) + + # then wait until all the selectors disappear + _until_selectors_disappear(driver, CHALLENGE_SELECTORS) + + # all elements not found + break + + except TimeoutException: + logger.debug("Timeout waiting for selector") + + click_verify(driver) + + # update the html (cloudflare reloads the page every 5 s) + html_element = driver.find_element(By.TAG_NAME, "html") + + # waits until cloudflare redirection ends + logger.debug("Waiting for redirect") + # noinspection PyBroadException + try: + WebDriverWait(driver, SHORT_TIMEOUT).until(EC.staleness_of(html_element)) + except Exception: + logger.debug("Timeout waiting for redirect") + + logger.info("Challenge solved!") + else: + logger.info("Challenge not detected!") + + +def click_verify(driver: WebDriver): + try: + logger.debug("Try to find the Cloudflare verify checkbox") + iframe = driver.find_element(By.XPATH, "//iframe[@title='Widget containing a Cloudflare security challenge']") + driver.switch_to.frame(iframe) + checkbox = driver.find_element( + by=By.XPATH, + value='//*[@id="cf-stage"]//label[@class="ctp-checkbox-label"]/input', + ) + if checkbox: + actions = ActionChains(driver) + actions.move_to_element_with_offset(checkbox, 5, 7) + actions.click(checkbox) + actions.perform() + logger.debug("Cloudflare verify checkbox found and clicked") + except Exception as e: + logger.debug(f"Cloudflare verify checkbox not found on the page: {str(e)}") + # print(e) + finally: + driver.switch_to.default_content() + + try: + logger.debug("Try to find the Cloudflare 'Verify you are human' button") + button = driver.find_element( + by=By.XPATH, + value="//input[@type='button' and @value='Verify you are human']", + ) + if button: + actions = ActionChains(driver) + actions.move_to_element_with_offset(button, 5, 7) + actions.click(button) + actions.perform() + logger.debug("The Cloudflare 'Verify you are human' button found and clicked") + except Exception as e: + logger.debug(f"The Cloudflare 'Verify you are human' button not found on the page:{str(e)}") + # print(e) + + time.sleep(2) diff --git a/app/helper/ocr.py b/app/helper/ocr.py new file mode 100644 index 00000000..f74f8623 --- /dev/null +++ b/app/helper/ocr.py @@ -0,0 +1,33 @@ +import base64 + +from app.utils.http import RequestUtils + + +class OcrHelper: + + _ocr_b64_url = "https://nastool.org/captcha/base64" + + def get_captcha_text(self, image_url=None, image_b64=None, cookie=None, ua=None): + """ + 根据图片地址,获取验证码图片,并识别内容 + :param image_url: 图片地址 + :param image_b64: 图片base64,跳过图片地址下载 + :param cookie: 下载图片使用的cookie + :param ua: 下载图片使用的ua + """ + if image_url: + ret = RequestUtils(headers=ua, + cookies=cookie).get_res(image_url) + if ret is not None: + image_bin = ret.content + if not image_bin: + return "" + image_b64 = base64.b64encode(image_bin).decode() + if not image_b64: + return "" + ret = RequestUtils(content_type="application/json").post_res( + url=self._ocr_b64_url, + json={"base64_img": image_b64}) + if ret: + return ret.json().get("result") + return "" diff --git a/app/plugins/__init__.py b/app/plugins/__init__.py index d96361a9..419a1976 100644 --- a/app/plugins/__init__.py +++ b/app/plugins/__init__.py @@ -1,23 +1,29 @@ from abc import ABCMeta, abstractmethod from pathlib import Path -from typing import Any +from typing import Any, Optional -from app.core import settings +from app.chain import ChainBase +from app.core import settings, Context from app.db.systemconfigs import SystemConfigs +class PluginChian(ChainBase): + """ + 插件处理链 + """ + + def process(self, *args, **kwargs) -> Optional[Context]: + pass + + class _PluginBase(metaclass=ABCMeta): """ 插件模块基类,通过继续该类实现插件功能 除内置属性外,还有以下方法可以扩展或调用: - - get_fields() 获取配置字典,用于生成插件配置表单 - - get_state() 获取插件启用状态,用于展示运行状态 - stop_service() 停止插件服务 - get_config() 获取配置信息 - update_config() 更新配置信息 - - init_config() 生效配置信息 - - get_page() 插件额外页面数据,在插件配置页面左下解按钮展示 - - get_script() 插件额外脚本(Javascript),将会写入插件页面,可在插件元素中绑定使用,,XX_PluginInit为初始化函数 + - init_plugin() 生效配置信息 - get_data_path() 获取插件数据保存目录 - get_command() 获取插件命令,使用消息机制通过远程控制 @@ -26,37 +32,9 @@ class _PluginBase(metaclass=ABCMeta): plugin_name: str = "" # 插件描述 plugin_desc: str = "" - # 插件图标 - plugin_icon: str = "" - # 主题色 - plugin_color: str = "" - # 插件版本 - plugin_version: str = "1.0" - # 插件作者 - plugin_author: str = "" - # 作者主页 - author_url: str = "" - # 插件配置项ID前缀:为了避免各插件配置表单相冲突,配置表单元素ID自动在前面加上此前缀 - plugin_config_prefix: str = "plugin_" - # 显示顺序 - plugin_order: int = 0 - # 可使用的用户级别 - auth_level: int = 1 - - @staticmethod - @abstractmethod - def get_fields() -> dict: - """ - 获取配置字典,用于生成表单 - """ - pass - - @abstractmethod - def get_state(self) -> bool: - """ - 获取插件启用状态 - """ - pass + + def __init__(self): + self.chain = PluginChian() @abstractmethod def init_plugin(self, config: dict = None): diff --git a/app/plugins/autosignin/__init__.py b/app/plugins/autosignin/__init__.py new file mode 100644 index 00000000..d2e0c0c0 --- /dev/null +++ b/app/plugins/autosignin/__init__.py @@ -0,0 +1,225 @@ +from multiprocessing.dummy import Pool as ThreadPool +from multiprocessing.pool import ThreadPool +from threading import Event +from typing import Any +from urllib.parse import urljoin + +from apscheduler.schedulers.background import BackgroundScheduler +from lxml import etree +from ruamel.yaml import CommentedMap + +from app.core import EventManager, settings, eventmanager +from app.helper import ModuleHelper +from app.helper.cloudflare import under_challenge +from app.helper.sites import SitesHelper +from app.log import logger +from app.plugins import _PluginBase +from app.utils.http import RequestUtils +from app.utils.timer import TimerUtils +from app.utils.types import EventType + + +class AutoSignIn(_PluginBase): + # 插件名称 + plugin_name = "站点自动签到" + # 插件描述 + plugin_desc = "站点每日自动模拟登录或签到,避免长期未登录封号。" + + # 私有属性 + sites: SitesHelper = None + # 事件管理器 + event: EventManager = None + # 定时器 + _scheduler = None + + # 加载的模块 + _site_schema: list = [] + # 退出事件 + _event = Event() + + def init_plugin(self, config: dict = None): + self.sites = SitesHelper() + self.event = EventManager() + + # 停止现有任务 + self.stop_service() + + # 加载模块 + self._site_schema = ModuleHelper.load('app.plugins.autosignin.sites', + filter_func=lambda _, obj: hasattr(obj, 'match')) + + # 定时服务 + self._scheduler = BackgroundScheduler(timezone=settings.TZ) + triggers = TimerUtils.random_scheduler(num_executions=2, + begin_hour=9, + end_hour=23, + max_interval=12 * 60, + min_interval=6 * 60) + for trigger in triggers: + self._scheduler.add_job(self.sign_in, "cron", hour=trigger.hour, minute=trigger.minute) + + # 启动任务 + if self._scheduler.get_jobs(): + self._scheduler.print_jobs() + self._scheduler.start() + + @staticmethod + def get_command() -> dict: + """ + 定义远程控制命令 + :return: 命令关键字、事件、描述、附带数据 + """ + return { + "cmd": "/pts", + "event": EventType.SiteSignin, + "desc": "站点自动签到", + "data": {} + } + + @eventmanager.register(EventType.SiteSignin) + def sign_in(self, event: Event = None): + """ + 自动签到 + """ + # 查询签到站点 + sign_sites = self.sites.get_indexers() + if not sign_sites: + logger.info("没有需要签到的站点") + return + + # 执行签到 + logger.info("开始执行签到任务 ...") + with ThreadPool(min(len(sign_sites), 5)) as p: + status = p.map(self.signin_site, sign_sites) + + if status: + logger.info("站点签到任务完成!") + # 发送通知 + self.chain.run_module("post_message", title="站点自动签到", text="\n".join(status)) + else: + logger.error("站点签到任务失败!") + + def __build_class(self, url) -> Any: + for site_schema in self._site_schema: + try: + if site_schema.match(url): + return site_schema + except Exception as e: + logger.error("站点模块加载失败:%s" % str(e)) + return None + + def signin_site(self, site_info: CommentedMap) -> str: + """ + 签到一个站点 + """ + site_module = self.__build_class(site_info.get("url")) + if site_module and hasattr(site_module, "signin"): + try: + status, msg = site_module().signin(site_info) + # 特殊站点直接返回签到信息,防止仿真签到、模拟登陆有歧义 + return msg + except Exception as e: + return f"【{site_info.get('name')}】签到失败:{str(e)}" + else: + return self.__signin_base(site_info) + + def __signin_base(self, site_info: CommentedMap) -> str: + """ + 通用签到处理 + :param site_info: 站点信息 + :return: 签到结果信息 + """ + if not site_info: + return "" + site = site_info.get("name") + site_url = site_info.get("url") + site_cookie = site_info.get("cookie") + ua = site_info.get("ua") + if not site_url or not site_cookie: + logger.warn(f"未配置 {site} 的站点地址或Cookie,无法签到") + return "" + # 模拟登录 + try: + # 访问链接 + checkin_url = site_url + if site_url.find("attendance.php") == -1: + # 拼登签到地址 + checkin_url = urljoin(site_url, "attendance.php") + logger.info(f"开始站点签到:{site},地址:{checkin_url}...") + res = RequestUtils(cookies=site_cookie, + headers=ua, + proxies=settings.PROXY if site_info.get("proxy") else None + ).get_res(url=checkin_url) + if not res and site_url != checkin_url: + logger.info(f"开始站点模拟登录:{site},地址:{site_url}...") + res = RequestUtils(cookies=site_cookie, + headers=ua, + proxies=settings.PROXY if site_info.get("proxy") else None + ).get_res(url=site_url) + # 判断登录状态 + if res and res.status_code in [200, 500, 403]: + if not self.is_logged_in(res.text): + if under_challenge(res.text): + msg = "站点被Cloudflare防护,请更换Cookie和UA!" + elif res.status_code == 200: + msg = "Cookie已失效" + else: + msg = f"状态码:{res.status_code}" + logger.warn(f"{site} 签到失败,{msg}") + return f"【{site}】签到失败,{msg}!" + else: + logger.info(f"{site} 签到成功") + return f"【{site}】签到成功" + elif res is not None: + logger.warn(f"{site} 签到失败,状态码:{res.status_code}") + return f"【{site}】签到失败,状态码:{res.status_code}!" + else: + logger.warn(f"{site} 签到失败,无法打开网站") + return f"【{site}】签到失败,无法打开网站!" + except Exception as e: + logger.warn("%s 签到失败:%s" % (site, str(e))) + return f"【{site}】签到失败:{str(e)}!" + + def stop_service(self): + """ + 退出插件 + """ + try: + if self._scheduler: + self._scheduler.remove_all_jobs() + if self._scheduler.running: + self._event.set() + self._scheduler.shutdown() + self._event.clear() + self._scheduler = None + except Exception as e: + logger.error("退出插件失败:%s" % str(e)) + + @classmethod + def is_logged_in(cls, html_text: str) -> bool: + """ + 判断站点是否已经登陆 + :param html_text: + :return: + """ + html = etree.HTML(html_text) + if not html: + return False + # 存在明显的密码输入框,说明未登录 + if html.xpath("//input[@type='password']"): + return False + # 是否存在登出和用户面板等链接 + xpaths = ['//a[contains(@href, "logout")' + ' or contains(@data-url, "logout")' + ' or contains(@href, "mybonus") ' + ' or contains(@onclick, "logout")' + ' or contains(@href, "usercp")]', + '//form[contains(@action, "logout")]'] + for xpath in xpaths: + if html.xpath(xpath): + return True + user_info_div = html.xpath('//div[@class="user-info-side"]') + if user_info_div: + return True + + return False diff --git a/app/plugins/autosignin/sites/52pt.py b/app/plugins/autosignin/sites/52pt.py new file mode 100644 index 00000000..ef805d4d --- /dev/null +++ b/app/plugins/autosignin/sites/52pt.py @@ -0,0 +1,145 @@ +import random +import re +from typing import Tuple + +from lxml import etree + +from app.core import settings +from app.log import logger +from app.plugins.autosignin.sites import _ISiteSigninHandler +from app.utils.http import RequestUtils +from app.utils.string import StringUtils + + +class FWpt(_ISiteSigninHandler): + """ + 52pt + 如果填写openai key则调用chatgpt获取答案 + 否则随机 + """ + # 匹配的站点Url,每一个实现类都需要设置为自己的站点Url + site_url = "52pt.site" + + # 已签到 + _sign_regex = ['今天已经签过到了'] + + # 签到成功,待补充 + _success_regex = ['\\d+点魔力值'] + + @classmethod + def match(cls, url: str) -> bool: + """ + 根据站点Url判断是否匹配当前站点签到类,大部分情况使用默认实现即可 + :param url: 站点Url + :return: 是否匹配,如匹配则会调用该类的signin方法 + """ + return True if StringUtils.url_equal(url, cls.site_url) else False + + def signin(self, site_info: dict) -> Tuple[bool, str]: + """ + 执行签到操作 + :param site_info: 站点信息,含有站点Url、站点Cookie、UA等信息 + :return: 签到结果信息 + """ + site = site_info.get("name") + site_cookie = site_info.get("cookie") + ua = site_info.get("ua") + proxy = settings.PROXY if site_info.get("proxy") else None + + # 判断今日是否已签到 + index_res = RequestUtils(cookies=site_cookie, + headers=ua, + proxies=proxy + ).get_res(url='https://52pt.site/bakatest.php') + + if not index_res or index_res.status_code != 200: + logger.error(f"签到失败,请检查站点连通性") + return False, f'【{site}】签到失败,请检查站点连通性' + + if "login.php" in index_res.text: + logger.error(f"签到失败,Cookie失效") + return False, f'【{site}】签到失败,Cookie失效' + + sign_status = self.sign_in_result(html_res=index_res.text, + regexs=self._sign_regex) + if sign_status: + logger.info(f"今日已签到") + return True, f'【{site}】今日已签到' + + # 没有签到则解析html + html = etree.HTML(index_res.text) + + if not html: + return False, f'【{site}】签到失败' + + # 获取页面问题、答案 + questionid = html.xpath("//input[@name='questionid']/@value")[0] + option_ids = html.xpath("//input[@name='choice[]']/@value") + question_str = html.xpath("//td[@class='text' and contains(text(),'请问:')]/text()")[0] + + # 正则获取问题 + match = re.search(r'请问:(.+)', question_str) + if match: + question_str = match.group(1) + logger.debug(f"获取到签到问题 {question_str}") + else: + logger.error(f"未获取到签到问题") + return False, f"【{site}】签到失败,未获取到签到问题" + + # 正确答案,默认随机,如果gpt返回则用gpt返回的答案提交 + choice = [option_ids[random.randint(0, len(option_ids) - 1)]] + + # 签到 + return self.__signin(questionid=questionid, + choice=choice, + site_cookie=site_cookie, + ua=ua, + proxy=proxy, + site=site) + + def __signin(self, questionid: str, + choice: list, + site: str, + site_cookie: str, + ua: str, + proxy: dict) -> Tuple[bool, str]: + """ + 签到请求 + questionid: 450 + choice[]: 8 + choice[]: 4 + usercomment: 此刻心情:无 + submit: 提交 + 多选会有多个choice[].... + """ + data = { + 'questionid': questionid, + 'choice[]': choice[0] if len(choice) == 1 else choice, + 'usercomment': '太难了!', + 'wantskip': '不会' + } + logger.debug(f"签到请求参数 {data}") + + sign_res = RequestUtils(cookies=site_cookie, + ua=ua, + proxies=proxy + ).post_res(url='https://52pt.site/bakatest.php', data=data) + if not sign_res or sign_res.status_code != 200: + logger.error(f"签到失败,签到接口请求失败") + return False, f'【{site}】签到失败,签到接口请求失败' + + # 判断是否签到成功 + sign_status = self.sign_in_result(html_res=sign_res.text, + regexs=self._success_regex) + if sign_status: + logger.info(f"{site}签到成功") + return True, f'【{site}】签到成功' + else: + sign_status = self.sign_in_result(html_res=sign_res.text, + regexs=self._sign_regex) + if sign_status: + logger.info(f"今日已签到") + return True, f'【{site}】今日已签到' + + logger.error(f"签到失败,请到页面查看") + return False, f'【{site}】签到失败,请到页面查看' diff --git a/app/plugins/autosignin/sites/__init__.py b/app/plugins/autosignin/sites/__init__.py new file mode 100644 index 00000000..60604cf1 --- /dev/null +++ b/app/plugins/autosignin/sites/__init__.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- +import re +from abc import ABCMeta, abstractmethod +from typing import Tuple + +from ruamel.yaml import CommentedMap + +from app.utils.string import StringUtils + + +class _ISiteSigninHandler(metaclass=ABCMeta): + """ + 实现站点签到的基类,所有站点签到类都需要继承此类,并实现match和signin方法 + 实现类放置到sitesignin目录下将会自动加载 + """ + # 匹配的站点Url,每一个实现类都需要设置为自己的站点Url + site_url = "" + + @abstractmethod + def match(self, url: str) -> bool: + """ + 根据站点Url判断是否匹配当前站点签到类,大部分情况使用默认实现即可 + :param url: 站点Url + :return: 是否匹配,如匹配则会调用该类的signin方法 + """ + return True if StringUtils.url_equal(url, self.site_url) else False + + @abstractmethod + def signin(self, site_info: CommentedMap) -> Tuple[bool, str]: + """ + 执行签到操作 + :param site_info: 站点信息,含有站点Url、站点Cookie、UA等信息 + :return: True|False,签到结果信息 + """ + pass + + @staticmethod + def sign_in_result(html_res: str, regexs: list) -> bool: + """ + 判断是否签到成功 + """ + html_text = re.sub(r"#\d+", "", re.sub(r"\d+px", "", html_res)) + for regex in regexs: + if re.search(str(regex), html_text): + return True + return False diff --git a/app/plugins/autosignin/sites/btschool.py b/app/plugins/autosignin/sites/btschool.py new file mode 100644 index 00000000..15913e45 --- /dev/null +++ b/app/plugins/autosignin/sites/btschool.py @@ -0,0 +1,73 @@ +from typing import Tuple + +from ruamel.yaml import CommentedMap + +from app.core import settings +from app.log import logger +from app.plugins.autosignin.sites import _ISiteSigninHandler +from app.utils.http import RequestUtils +from app.utils.string import StringUtils + + +class BTSchool(_ISiteSigninHandler): + """ + 学校签到 + """ + # 匹配的站点Url,每一个实现类都需要设置为自己的站点Url + site_url = "pt.btschool.club" + + # 已签到 + _sign_text = '每日签到' + + @classmethod + def match(cls, url) -> bool: + """ + 根据站点Url判断是否匹配当前站点签到类,大部分情况使用默认实现即可 + :param url: 站点Url + :return: 是否匹配,如匹配则会调用该类的signin方法 + """ + return True if StringUtils.url_equal(url, cls.site_url) else False + + def signin(self, site_info: CommentedMap) -> Tuple[bool, str]: + """ + 执行签到操作 + :param site_info: 站点信息,含有站点Url、站点Cookie、UA等信息 + :return: 签到结果信息 + """ + site = site_info.get("name") + site_cookie = site_info.get("cookie") + ua = site_info.get("ua") + proxy = settings.PROXY if site_info.get("proxy") else None + + logger.info(f"{site} 开始签到") + html_res = RequestUtils(cookies=site_cookie, + headers=ua, + proxies=proxy + ).get_res(url="https://pt.btschool.club") + + if not html_res or html_res.status_code != 200: + logger.error(f"签到失败,请检查站点连通性") + return False, f'【{site}】签到失败,请检查站点连通性' + + if "login.php" in html_res.text: + logger.error(f"签到失败,cookie失效") + return False, f'【{site}】签到失败,cookie失效' + + # 已签到 + if self._sign_text not in html_res.text: + logger.info(f"今日已签到") + return True, f'【{site}】今日已签到' + + sign_res = RequestUtils(cookies=site_cookie, + headers=ua, + proxies=proxy + ).get_res(url="https://pt.btschool.club/index.php?action=addbonus") + + if not sign_res or sign_res.status_code != 200: + logger.error(f"签到失败,签到接口请求失败") + return False, f'【{site}】签到失败,签到接口请求失败' + + # 签到成功 + if self._sign_text not in sign_res.text: + logger.info(f"签到成功") + return True, f'【{site}】签到成功' diff --git a/app/plugins/autosignin/sites/chdbits.py b/app/plugins/autosignin/sites/chdbits.py new file mode 100644 index 00000000..adf09544 --- /dev/null +++ b/app/plugins/autosignin/sites/chdbits.py @@ -0,0 +1,146 @@ +import random +import re +from typing import Tuple + +from lxml import etree +from ruamel.yaml import CommentedMap + +from app.core import settings +from app.log import logger +from app.plugins.autosignin.sites import _ISiteSigninHandler +from app.utils.http import RequestUtils +from app.utils.string import StringUtils + + +class CHDBits(_ISiteSigninHandler): + """ + 彩虹岛签到 + 如果填写openai key则调用chatgpt获取答案 + 否则随机 + """ + # 匹配的站点Url,每一个实现类都需要设置为自己的站点Url + site_url = "chdbits.co" + + # 已签到 + _sign_regex = ['今天已经签过到了'] + + # 签到成功,待补充 + _success_regex = ['\\d+点魔力值'] + + @classmethod + def match(cls, url: str) -> bool: + """ + 根据站点Url判断是否匹配当前站点签到类,大部分情况使用默认实现即可 + :param url: 站点Url + :return: 是否匹配,如匹配则会调用该类的signin方法 + """ + return True if StringUtils.url_equal(url, cls.site_url) else False + + def signin(self, site_info: CommentedMap) -> Tuple[bool, str]: + """ + 执行签到操作 + :param site_info: 站点信息,含有站点Url、站点Cookie、UA等信息 + :return: 签到结果信息 + """ + site = site_info.get("name") + site_cookie = site_info.get("cookie") + ua = site_info.get("ua") + proxy = settings.PROXY if site_info.get("proxy") else None + + # 判断今日是否已签到 + index_res = RequestUtils(cookies=site_cookie, + headers=ua, + proxies=proxy + ).get_res(url='https://chdbits.co/bakatest.php') + + if not index_res or index_res.status_code != 200: + logger.error(f"签到失败,请检查站点连通性") + return False, f'【{site}】签到失败,请检查站点连通性' + + if "login.php" in index_res.text: + logger.error(f"签到失败,cookie失效") + return False, f'【{site}】签到失败,cookie失效' + + sign_status = self.sign_in_result(html_res=index_res.text, + regexs=self._sign_regex) + if sign_status: + logger.info(f"今日已签到") + return True, f'【{site}】今日已签到' + + # 没有签到则解析html + html = etree.HTML(index_res.text) + + if not html: + return False, f'【{site}】签到失败' + + # 获取页面问题、答案 + questionid = html.xpath("//input[@name='questionid']/@value")[0] + option_ids = html.xpath("//input[@name='choice[]']/@value") + question_str = html.xpath("//td[@class='text' and contains(text(),'请问:')]/text()")[0] + + # 正则获取问题 + match = re.search(r'请问:(.+)', question_str) + if match: + question_str = match.group(1) + logger.debug(f"获取到签到问题 {question_str}") + else: + logger.error(f"未获取到签到问题") + return False, f"【{site}】签到失败,未获取到签到问题" + + # 正确答案,默认随机,如果gpt返回则用gpt返回的答案提交 + choice = [option_ids[random.randint(0, len(option_ids) - 1)]] + + # 签到 + return self.__signin(questionid=questionid, + choice=choice, + site_cookie=site_cookie, + ua=ua, + proxy=proxy, + site=site) + + def __signin(self, questionid: str, + choice: list, + site: str, + site_cookie: str, + ua: str, + proxy: dict) -> Tuple[bool, str]: + """ + 签到请求 + questionid: 450 + choice[]: 8 + choice[]: 4 + usercomment: 此刻心情:无 + submit: 提交 + 多选会有多个choice[].... + """ + data = { + 'questionid': questionid, + 'choice[]': choice[0] if len(choice) == 1 else choice, + 'usercomment': '太难了!', + 'wantskip': '不会' + } + logger.debug(f"签到请求参数 {data}") + + sign_res = RequestUtils(cookies=site_cookie, + ua=ua, + proxies=proxy + ).post_res(url='https://chdbits.co/bakatest.php', data=data) + if not sign_res or sign_res.status_code != 200: + logger.error(f"签到失败,签到接口请求失败") + return False, f'【{site}】签到失败,签到接口请求失败' + + # 判断是否签到成功 + sign_status = self.sign_in_result(html_res=sign_res.text, + regexs=self._success_regex) + if sign_status: + logger.info(f"{site} 签到成功") + return True, f'【{site}】签到成功' + else: + sign_status = self.sign_in_result(html_res=sign_res.text, + regexs=self._sign_regex) + if sign_status: + logger.info(f"今日已签到") + return True, f'【{site}】今日已签到' + + logger.error(f"签到失败,请到页面查看") + return False, f'【{site}】签到失败,请到页面查看' diff --git a/app/plugins/autosignin/sites/haidan.py b/app/plugins/autosignin/sites/haidan.py new file mode 100644 index 00000000..15342091 --- /dev/null +++ b/app/plugins/autosignin/sites/haidan.py @@ -0,0 +1,65 @@ +import random +import re +from typing import Tuple + +from lxml import etree +from ruamel.yaml import CommentedMap + +from app.core import settings +from app.log import logger +from app.plugins.autosignin.sites import _ISiteSigninHandler +from app.utils.http import RequestUtils +from app.utils.string import StringUtils + + +class HaiDan(_ISiteSigninHandler): + """ + 海胆签到 + """ + # 匹配的站点Url,每一个实现类都需要设置为自己的站点Url + site_url = "haidan.video" + + # 签到成功 + _succeed_regex = ['(?<=value=")已经打卡(?=")'] + + @classmethod + def match(cls, url: str) -> bool: + """ + 根据站点Url判断是否匹配当前站点签到类,大部分情况使用默认实现即可 + :param url: 站点Url + :return: 是否匹配,如匹配则会调用该类的signin方法 + """ + return True if StringUtils.url_equal(url, cls.site_url) else False + + def signin(self, site_info: CommentedMap) -> Tuple[bool, str]: + """ + 执行签到操作 + :param site_info: 站点信息,含有站点Url、站点Cookie、UA等信息 + :return: 签到结果信息 + """ + site = site_info.get("name") + site_cookie = site_info.get("cookie") + ua = site_info.get("ua") + proxy = settings.PROXY if site_info.get("proxy") else None + + # 签到 + sign_res = RequestUtils(cookies=site_cookie, + headers=ua, + proxies=proxy + ).get_res(url="https://www.haidan.video/signin.php") + if not sign_res or sign_res.status_code != 200: + logger.error(f"签到失败,请检查站点连通性") + return False, f'【{site}】签到失败,请检查站点连通性' + + if "login.php" in sign_res.text: + logger.error(f"签到失败,cookie失效") + return False, f'【{site}】签到失败,cookie失效' + + sign_status = self.sign_in_result(html_res=sign_res.text, + regexs=self._succeed_regex) + if sign_status: + logger.info(f"签到成功") + return True, f'【{site}】签到成功' + + logger.error(f"签到失败,签到接口返回 {sign_res.text}") + return False, f'【{site}】签到失败' diff --git a/app/plugins/autosignin/sites/hares.py b/app/plugins/autosignin/sites/hares.py new file mode 100644 index 00000000..c8bdca29 --- /dev/null +++ b/app/plugins/autosignin/sites/hares.py @@ -0,0 +1,80 @@ +import json +from typing import Tuple + +from ruamel.yaml import CommentedMap + +from app.core import settings +from app.log import logger +from app.plugins.autosignin.sites import _ISiteSigninHandler +from app.utils.http import RequestUtils +from app.utils.string import StringUtils + + +class Hares(_ISiteSigninHandler): + """ + 白兔签到 + """ + # 匹配的站点Url,每一个实现类都需要设置为自己的站点Url + site_url = "club.hares.top" + + # 已签到 + _sign_text = '已签到' + + @classmethod + def match(cls, url: str) -> bool: + """ + 根据站点Url判断是否匹配当前站点签到类,大部分情况使用默认实现即可 + :param url: 站点Url + :return: 是否匹配,如匹配则会调用该类的signin方法 + """ + return True if StringUtils.url_equal(url, cls.site_url) else False + + def signin(self, site_info: CommentedMap) -> Tuple[bool, str]: + """ + 执行签到操作 + :param site_info: 站点信息,含有站点Url、站点Cookie、UA等信息 + :return: 签到结果信息 + """ + site = site_info.get("name") + site_cookie = site_info.get("cookie") + ua = site_info.get("ua") + proxy = settings.PROXY if site_info.get("proxy") else None + + # 获取页面html + html_res = RequestUtils(cookies=site_cookie, + headers=ua, + proxies=proxy + ).get_res(url="https://club.hares.top") + if not html_res or html_res.status_code != 200: + logger.error(f"模拟访问失败,请检查站点连通性") + return False, f'【{site}】模拟访问失败,请检查站点连通性' + + if "login.php" in html_res.text: + logger.error(f"模拟访问失败,cookie失效") + return False, f'【{site}】模拟访问失败,cookie失效' + + # if self._sign_text in html_res.text: + # logger.info(f"今日已签到") + # return True, f'【{site}】今日已签到' + + headers = { + 'Accept': 'application/json', + "User-Agent": ua + } + sign_res = RequestUtils(cookies=site_cookie, + headers=headers, + proxies=proxy + ).get_res(url="https://club.hares.top/attendance.php?action=sign") + if not sign_res or sign_res.status_code != 200: + logger.error(f"签到失败,签到接口请求失败") + return False, f'【{site}】签到失败,签到接口请求失败' + + # {"code":1,"msg":"您今天已经签到过了"} + # {"code":0,"msg":"签到成功"} + sign_dict = json.loads(sign_res.text) + if sign_dict['code'] == 0: + logger.info(f"签到成功") + return True, f'【{site}】签到成功' + else: + logger.info(f"今日已签到") + return True, f'【{site}】今日已签到' diff --git a/app/plugins/autosignin/sites/hdarea.py b/app/plugins/autosignin/sites/hdarea.py new file mode 100644 index 00000000..f9132c82 --- /dev/null +++ b/app/plugins/autosignin/sites/hdarea.py @@ -0,0 +1,69 @@ +from typing import Tuple + +from ruamel.yaml import CommentedMap + +from app.core import settings +from app.log import logger +from app.plugins.autosignin.sites import _ISiteSigninHandler +from app.utils.http import RequestUtils +from app.utils.string import StringUtils + + +class HDArea(_ISiteSigninHandler): + """ + 好大签到 + """ + + # 匹配的站点Url,每一个实现类都需要设置为自己的站点Url + site_url = "hdarea.co" + + # 签到成功 + _success_text = "此次签到您获得" + _repeat_text = "请不要重复签到哦" + + @classmethod + def match(cls, url: str) -> bool: + """ + 根据站点Url判断是否匹配当前站点签到类,大部分情况使用默认实现即可 + :param url: 站点Url + :return: 是否匹配,如匹配则会调用该类的signin方法 + """ + return True if StringUtils.url_equal(url, cls.site_url) else False + + def signin(self, site_info: CommentedMap) -> Tuple[bool, str]: + """ + 执行签到操作 + :param site_info: 站点信息,含有站点Url、站点Cookie、UA等信息 + :return: 签到结果信息 + """ + site = site_info.get("name") + site_cookie = site_info.get("cookie") + ua = site_info.get("ua") + proxy = settings.PROXY if site_info.get("proxy") else None + + # 获取页面html + data = { + 'action': 'sign_in' + } + html_res = RequestUtils(cookies=site_cookie, + headers=ua, + proxies=proxy + ).post_res(url="https://www.hdarea.co/sign_in.php", data=data) + if not html_res or html_res.status_code != 200: + logger.error(f"签到失败,请检查站点连通性") + return False, f'【{site}】签到失败,请检查站点连通性' + + if "login.php" in html_res.text: + logger.error(f"签到失败,cookie失效") + return False, f'【{site}】签到失败,cookie失效' + + # 判断是否已签到 + # '已连续签到278天,此次签到您获得了100魔力值奖励!' + if self._success_text in html_res.text: + logger.info(f"签到成功") + return True, f'【{site}】签到成功' + if self._repeat_text in html_res.text: + logger.info(f"今日已签到") + return True, f'【{site}】今日已签到' + logger.error(f"签到失败,签到接口返回 {html_res.text}") + return False, f'【{site}】签到失败' diff --git a/app/plugins/autosignin/sites/hdchina.py b/app/plugins/autosignin/sites/hdchina.py new file mode 100644 index 00000000..a0bc6a46 --- /dev/null +++ b/app/plugins/autosignin/sites/hdchina.py @@ -0,0 +1,117 @@ +import json +from typing import Tuple + +from lxml import etree +from ruamel.yaml import CommentedMap + +from app.core import settings +from app.log import logger +from app.plugins.autosignin.sites import _ISiteSigninHandler +from app.utils.http import RequestUtils +from app.utils.string import StringUtils + + +class HDChina(_ISiteSigninHandler): + """ + 瓷器签到 + """ + # 匹配的站点Url,每一个实现类都需要设置为自己的站点Url + site_url = "hdchina.org" + + # 已签到 + _sign_regex = ['已签到'] + + @classmethod + def match(cls, url: str) -> bool: + """ + 根据站点Url判断是否匹配当前站点签到类,大部分情况使用默认实现即可 + :param url: 站点Url + :return: 是否匹配,如匹配则会调用该类的signin方法 + """ + return True if StringUtils.url_equal(url, cls.site_url) else False + + def signin(self, site_info: CommentedMap) -> Tuple[bool, str]: + """ + 执行签到操作 + :param site_info: 站点信息,含有站点Url、站点Cookie、UA等信息 + :return: 签到结果信息 + """ + site = site_info.get("name") + site_cookie = site_info.get("cookie") + ua = site_info.get("ua") + proxy = settings.PROXY if site_info.get("proxy") else None + + # 尝试解决瓷器cookie每天签到后过期,只保留hdchina=部分 + cookie = "" + # 按照分号进行字符串拆分 + sub_strs = site_cookie.split(";") + # 遍历每个子字符串 + for sub_str in sub_strs: + if "hdchina=" in sub_str: + # 如果子字符串包含"hdchina=",则保留该子字符串 + cookie += sub_str + ";" + + if "hdchina=" not in cookie: + logger.error(f"签到失败,cookie失效") + return False, f'【{site}】签到失败,cookie失效' + + site_cookie = cookie + # 获取页面html + html_res = RequestUtils(cookies=site_cookie, + headers=ua, + proxies=proxy + ).get_res(url="https://hdchina.org/index.php") + if not html_res or html_res.status_code != 200: + logger.error(f"签到失败,请检查站点连通性") + return False, f'【{site}】签到失败,请检查站点连通性' + + if "login.php" in html_res.text or "阻断页面" in html_res.text: + logger.error(f"签到失败,cookie失效") + return False, f'【{site}】签到失败,cookie失效' + + # 获取新返回的cookie进行签到 + site_cookie = ';'.join(['{}={}'.format(k, v) for k, v in html_res.cookies.get_dict().items()]) + + # 判断是否已签到 + html_res.encoding = "utf-8" + sign_status = self.sign_in_result(html_res=html_res.text, + regexs=self._sign_regex) + if sign_status: + logger.info(f"今日已签到") + return True, f'【{site}】今日已签到' + + # 没有签到则解析html + html = etree.HTML(html_res.text) + + if not html: + return False, f'【{site}】签到失败' + + # x_csrf + x_csrf = html.xpath("//meta[@name='x-csrf']/@content")[0] + if not x_csrf: + logger.error("签到失败,获取x-csrf失败") + return False, f'【{site}】签到失败' + logger.debug(f"获取到x-csrf {x_csrf}") + + # 签到 + data = { + 'csrf': x_csrf + } + sign_res = RequestUtils(cookies=site_cookie, + headers=ua, + proxies=proxy + ).post_res(url="https://hdchina.org/plugin_sign-in.php?cmd=signin", data=data) + if not sign_res or sign_res.status_code != 200: + logger.error(f"签到失败,签到接口请求失败") + return False, f'【{site}】签到失败,签到接口请求失败' + + sign_dict = json.loads(sign_res.text) + logger.debug(f"签到返回结果 {sign_dict}") + if sign_dict['state']: + # {'state': 'success', 'signindays': 10, 'integral': 20} + logger.info(f"签到成功") + return True, f'【{site}】签到成功' + else: + # {'state': False, 'msg': '不正确的CSRF / Incorrect CSRF token'} + logger.error(f"签到失败,不正确的CSRF / Incorrect CSRF token") + return False, f'【{site}】签到失败' diff --git a/app/plugins/autosignin/sites/hdcity.py b/app/plugins/autosignin/sites/hdcity.py new file mode 100644 index 00000000..87c02f5a --- /dev/null +++ b/app/plugins/autosignin/sites/hdcity.py @@ -0,0 +1,66 @@ +from typing import Tuple + +from ruamel.yaml import CommentedMap + +from app.core import settings +from app.log import logger +from app.plugins.autosignin.sites import _ISiteSigninHandler +from app.utils.http import RequestUtils +from app.utils.string import StringUtils + + +class HDCity(_ISiteSigninHandler): + """ + 城市签到 + """ + # 匹配的站点Url,每一个实现类都需要设置为自己的站点Url + site_url = "hdcity.city" + + # 签到成功 + _success_text = '本次签到获得魅力' + # 重复签到 + _repeat_text = '已签到' + + @classmethod + def match(cls, url: str) -> bool: + """ + 根据站点Url判断是否匹配当前站点签到类,大部分情况使用默认实现即可 + :param url: 站点Url + :return: 是否匹配,如匹配则会调用该类的signin方法 + """ + return True if StringUtils.url_equal(url, cls.site_url) else False + + def signin(self, site_info: CommentedMap) -> Tuple[bool, str]: + """ + 执行签到操作 + :param site_info: 站点信息,含有站点Url、站点Cookie、UA等信息 + :return: 签到结果信息 + """ + site = site_info.get("name") + site_cookie = site_info.get("cookie") + ua = site_info.get("ua") + proxy = settings.PROXY if site_info.get("proxy") else None + + # 获取页面html + html_res = RequestUtils(cookies=site_cookie, + headers=ua, + proxies=proxy + ).get_res(url="https://hdcity.city/sign") + if not html_res or html_res.status_code != 200: + logger.error(f"签到失败,请检查站点连通性") + return False, f'【{site}】签到失败,请检查站点连通性' + + if "login" in html_res.text: + logger.error(f"签到失败,cookie失效") + return False, f'【{site}】签到失败,cookie失效' + + # 判断是否已签到 + # '已连续签到278天,此次签到您获得了100魔力值奖励!' + if self._success_text in html_res.text: + logger.info(f"签到成功") + return True, f'【{site}】签到成功' + if self._repeat_text in html_res.text: + logger.info(f"今日已签到") + return True, f'【{site}】今日已签到' + logger.error(f"签到失败,签到接口返回 {html_res.text}") + return False, f'【{site}】签到失败' diff --git a/app/plugins/autosignin/sites/hdsky.py b/app/plugins/autosignin/sites/hdsky.py new file mode 100644 index 00000000..f57b5082 --- /dev/null +++ b/app/plugins/autosignin/sites/hdsky.py @@ -0,0 +1,131 @@ +import json +import time +from typing import Tuple + +from ruamel.yaml import CommentedMap + +from app.core import settings +from app.helper.ocr import OcrHelper +from app.log import logger +from app.plugins.autosignin.sites import _ISiteSigninHandler +from app.utils.http import RequestUtils +from app.utils.string import StringUtils + + +class HDSky(_ISiteSigninHandler): + """ + 天空ocr签到 + """ + # 匹配的站点Url,每一个实现类都需要设置为自己的站点Url + site_url = "hdsky.me" + + # 已签到 + _sign_regex = ['已签到'] + + @classmethod + def match(cls, url: str) -> bool: + """ + 根据站点Url判断是否匹配当前站点签到类,大部分情况使用默认实现即可 + :param url: 站点Url + :return: 是否匹配,如匹配则会调用该类的signin方法 + """ + return True if StringUtils.url_equal(url, cls.site_url) else False + + def signin(self, site_info: CommentedMap) -> Tuple[bool, str]: + """ + 执行签到操作 + :param site_info: 站点信息,含有站点Url、站点Cookie、UA等信息 + :return: 签到结果信息 + """ + site = site_info.get("name") + site_cookie = site_info.get("cookie") + ua = site_info.get("ua") + proxy = settings.PROXY if site_info.get("proxy") else None + + # 判断今日是否已签到 + index_res = RequestUtils(cookies=site_cookie, + headers=ua, + proxies=proxy + ).get_res(url='https://hdsky.me') + if not index_res or index_res.status_code != 200: + logger.error(f"签到失败,请检查站点连通性") + return False, f'【{site}】签到失败,请检查站点连通性' + + if "login.php" in index_res.text: + logger.error(f"签到失败,cookie失效") + return False, f'【{site}】签到失败,cookie失效' + + sign_status = self.sign_in_result(html_res=index_res.text, + regexs=self._sign_regex) + if sign_status: + logger.info(f"今日已签到") + return True, f'【{site}】今日已签到' + + # 获取验证码请求,考虑到网络问题获取失败,多获取几次试试 + res_times = 0 + img_hash = None + while not img_hash and res_times <= 3: + image_res = RequestUtils(cookies=site_cookie, + headers=ua, + proxies=proxy + ).post_res(url='https://hdsky.me/image_code_ajax.php', + data={'action': 'new'}) + if image_res and image_res.status_code == 200: + image_json = json.loads(image_res.text) + if image_json["success"]: + img_hash = image_json["code"] + break + res_times += 1 + logger.debug(f"获取{site}验证码失败,正在进行重试,目前重试次数 {res_times}") + time.sleep(1) + + # 获取到二维码hash + if img_hash: + # 完整验证码url + img_get_url = 'https://hdsky.me/image.php?action=regimage&imagehash=%s' % img_hash + logger.debug(f"获取到{site}验证码链接 {img_get_url}") + # ocr识别多次,获取6位验证码 + times = 0 + ocr_result = None + # 识别几次 + while times <= 3: + # ocr二维码识别 + ocr_result = OcrHelper().get_captcha_text(image_url=img_get_url, + cookie=site_cookie, + ua=ua) + logger.debug(f"ocr识别{site}验证码 {ocr_result}") + if ocr_result: + if len(ocr_result) == 6: + logger.info(f"ocr识别{site}验证码成功 {ocr_result}") + break + times += 1 + logger.debug(f"ocr识别{site}验证码失败,正在进行重试,目前重试次数 {times}") + time.sleep(1) + + if ocr_result: + # 组装请求参数 + data = { + 'action': 'showup', + 'imagehash': img_hash, + 'imagestring': ocr_result + } + # 访问签到链接 + res = RequestUtils(cookies=site_cookie, + headers=ua, + proxies=proxy + ).post_res(url='https://hdsky.me/showup.php', data=data) + if res and res.status_code == 200: + if json.loads(res.text)["success"]: + logger.info(f"签到成功") + return True, f'【{site}】签到成功' + elif str(json.loads(res.text)["message"]) == "date_unmatch": + # 重复签到 + logger.warn(f"重复成功") + return True, f'【{site}】今日已签到' + elif str(json.loads(res.text)["message"]) == "invalid_imagehash": + # 验证码错误 + logger.warn(f"签到失败:验证码错误") + return False, f'【{site}】签到失败:验证码错误' + + logger.error(f'签到失败:未获取到验证码') + return False, f'【{site}】签到失败:未获取到验证码' diff --git a/app/plugins/autosignin/sites/hdupt.py b/app/plugins/autosignin/sites/hdupt.py new file mode 100644 index 00000000..c3dbbc09 --- /dev/null +++ b/app/plugins/autosignin/sites/hdupt.py @@ -0,0 +1,81 @@ +import re +from typing import Tuple + +from ruamel.yaml import CommentedMap + +from app.core import settings +from app.log import logger +from app.plugins.autosignin.sites import _ISiteSigninHandler +from app.utils.http import RequestUtils +from app.utils.string import StringUtils + + +class HDUpt(_ISiteSigninHandler): + """ + hdu签到 + """ + # 匹配的站点Url,每一个实现类都需要设置为自己的站点Url + site_url = "pt.hdupt.com" + + # 已签到 + _sign_regex = [''] + + # 签到成功 + _success_text = '本次签到获得魅力' + + @classmethod + def match(cls, url: str) -> bool: + """ + 根据站点Url判断是否匹配当前站点签到类,大部分情况使用默认实现即可 + :param url: 站点Url + :return: 是否匹配,如匹配则会调用该类的signin方法 + """ + return True if StringUtils.url_equal(url, cls.site_url) else False + + def signin(self, site_info: CommentedMap) -> Tuple[bool, str]: + """ + 执行签到操作 + :param site_info: 站点信息,含有站点Url、站点Cookie、UA等信息 + :return: 签到结果信息 + """ + site = site_info.get("name") + site_cookie = site_info.get("cookie") + ua = site_info.get("ua") + proxy = settings.PROXY if site_info.get("proxy") else None + + # 获取页面html + index_res = RequestUtils(cookies=site_cookie, + headers=ua, + proxies=proxy + ).get_res(url="https://pt.hdupt.com") + if not index_res or index_res.status_code != 200: + logger.error(f"签到失败,请检查站点连通性") + return False, f'【{site}】签到失败,请检查站点连通性' + + if "login.php" in index_res.text: + logger.error(f"签到失败,cookie失效") + return False, f'【{site}】签到失败,cookie失效' + + sign_status = self.sign_in_result(html_res=index_res.text, + regexs=self._sign_regex) + if sign_status: + logger.info(f"今日已签到") + return True, f'【{site}】今日已签到' + + # 签到 + sign_res = RequestUtils(cookies=site_cookie, + headers=ua, + proxies=proxy + ).post_res(url="https://pt.hdupt.com/added.php?action=qiandao") + if not sign_res or sign_res.status_code != 200: + logger.error(f"签到失败,请检查站点连通性") + return False, f'【{site}】签到失败,请检查站点连通性' + + logger.debug(f"签到接口返回 {sign_res.text}") + # 判断是否已签到 sign_res.text = ".23" + if len(list(map(int, re.findall(r"\d+", sign_res.text)))) > 0: + logger.info(f"签到成功") + return True, f'【{site}】签到成功' + + logger.error(f"签到失败,签到接口返回 {sign_res.text}") + return False, f'【{site}】签到失败' diff --git a/app/plugins/autosignin/sites/opencd.py b/app/plugins/autosignin/sites/opencd.py new file mode 100644 index 00000000..bfaca8bb --- /dev/null +++ b/app/plugins/autosignin/sites/opencd.py @@ -0,0 +1,129 @@ +import json +import time +from typing import Tuple + +from lxml import etree +from ruamel.yaml import CommentedMap + +from app.core import settings +from app.helper.ocr import OcrHelper +from app.log import logger +from app.plugins.autosignin.sites import _ISiteSigninHandler +from app.utils.http import RequestUtils +from app.utils.string import StringUtils + + +class Opencd(_ISiteSigninHandler): + """ + 皇后ocr签到 + """ + # 匹配的站点Url,每一个实现类都需要设置为自己的站点Url + site_url = "open.cd" + + # 已签到 + _repeat_text = "/plugin_sign-in.php?cmd=show-log" + + @classmethod + def match(cls, url: str) -> bool: + """ + 根据站点Url判断是否匹配当前站点签到类,大部分情况使用默认实现即可 + :param url: 站点Url + :return: 是否匹配,如匹配则会调用该类的signin方法 + """ + return True if StringUtils.url_equal(url, cls.site_url) else False + + def signin(self, site_info: CommentedMap) -> Tuple[bool, str]: + """ + 执行签到操作 + :param site_info: 站点信息,含有站点Url、站点Cookie、UA等信息 + :return: 签到结果信息 + """ + site = site_info.get("name") + site_cookie = site_info.get("cookie") + ua = site_info.get("ua") + proxy = settings.PROXY if site_info.get("proxy") else None + + # 判断今日是否已签到 + index_res = RequestUtils(cookies=site_cookie, + headers=ua, + proxies=proxy + ).get_res(url='https://www.open.cd') + if not index_res or index_res.status_code != 200: + logger.error(f"签到失败,请检查站点连通性") + return False, f'【{site}】签到失败,请检查站点连通性' + + if "login.php" in index_res.text: + logger.error(f"签到失败,cookie失效") + return False, f'【{site}】签到失败,cookie失效' + + if self._repeat_text in index_res.text: + logger.info(f"今日已签到") + return True, f'【{site}】今日已签到' + + # 获取签到参数 + sign_param_res = RequestUtils(cookies=site_cookie, + headers=ua, + proxies=proxy + ).get_res(url='https://www.open.cd/plugin_sign-in.php') + if not sign_param_res or sign_param_res.status_code != 200: + logger.error(f"签到失败,请检查站点连通性") + return False, f'【{site}】签到失败,请检查站点连通性' + + # 没有签到则解析html + html = etree.HTML(sign_param_res.text) + if not html: + return False, f'【{site}】签到失败' + + # 签到参数 + img_url = html.xpath('//form[@id="frmSignin"]//img/@src')[0] + img_hash = html.xpath('//form[@id="frmSignin"]//input[@name="imagehash"]/@value')[0] + if not img_url or not img_hash: + logger.error(f"签到失败,获取签到参数失败") + return False, f'【{site}】签到失败,获取签到参数失败' + + # 完整验证码url + img_get_url = 'https://www.open.cd/%s' % img_url + logger.debug(f"获取到{site}验证码链接 {img_get_url}") + + # ocr识别多次,获取6位验证码 + times = 0 + ocr_result = None + # 识别几次 + while times <= 3: + # ocr二维码识别 + ocr_result = OcrHelper().get_captcha_text(image_url=img_get_url, + cookie=site_cookie, + ua=ua) + logger.debug(f"ocr识别{site}验证码 {ocr_result}") + if ocr_result: + if len(ocr_result) == 6: + logger.info(f"ocr识别{site}验证码成功 {ocr_result}") + break + times += 1 + logger.debug(f"ocr识别{site}验证码失败,正在进行重试,目前重试次数 {times}") + time.sleep(1) + + if ocr_result: + # 组装请求参数 + data = { + 'imagehash': img_hash, + 'imagestring': ocr_result + } + # 访问签到链接 + sign_res = RequestUtils(cookies=site_cookie, + headers=ua, + proxies=proxy + ).post_res(url='https://www.open.cd/plugin_sign-in.php?cmd=signin', data=data) + if sign_res and sign_res.status_code == 200: + logger.debug(f"sign_res返回 {sign_res.text}") + # sign_res.text = '{"state":"success","signindays":"0","integral":"10"}' + sign_dict = json.loads(sign_res.text) + if sign_dict['state']: + logger.info(f"签到成功") + return True, f'【{site}】签到成功' + else: + logger.error(f"签到失败,签到接口返回 {sign_dict}") + return False, f'【{site}】签到失败' + + logger.error(f'签到失败:未获取到验证码') + return False, f'【{site}】签到失败:未获取到验证码' diff --git a/app/plugins/autosignin/sites/pterclub.py b/app/plugins/autosignin/sites/pterclub.py new file mode 100644 index 00000000..fc6b67a1 --- /dev/null +++ b/app/plugins/autosignin/sites/pterclub.py @@ -0,0 +1,58 @@ +import json +from typing import Tuple + +from ruamel.yaml import CommentedMap + +from app.core import settings +from app.log import logger +from app.plugins.autosignin.sites import _ISiteSigninHandler +from app.utils.http import RequestUtils +from app.utils.string import StringUtils + + +class PTerClub(_ISiteSigninHandler): + """ + 猫签到 + """ + # 匹配的站点Url,每一个实现类都需要设置为自己的站点Url + site_url = "pterclub.com" + + @classmethod + def match(cls, url: str) -> bool: + """ + 根据站点Url判断是否匹配当前站点签到类,大部分情况使用默认实现即可 + :param url: 站点Url + :return: 是否匹配,如匹配则会调用该类的signin方法 + """ + return True if StringUtils.url_equal(url, cls.site_url) else False + + def signin(self, site_info: CommentedMap) -> Tuple[bool, str]: + """ + 执行签到操作 + :param site_info: 站点信息,含有站点Url、站点Cookie、UA等信息 + :return: 签到结果信息 + """ + site = site_info.get("name") + site_cookie = site_info.get("cookie") + ua = site_info.get("ua") + proxy = settings.PROXY if site_info.get("proxy") else None + + # 签到 + sign_res = RequestUtils(cookies=site_cookie, + headers=ua, + proxies=proxy + ).get_res(url="https://pterclub.com/attendance-ajax.php") + if not sign_res or sign_res.status_code != 200: + logger.error(f"签到失败,签到接口请求失败") + return False, f'【{site}】签到失败,请检查cookie是否失效' + + sign_dict = json.loads(sign_res.text) + if sign_dict['status'] == '1': + # {"status":"1","data":" (签到已成功300)","message":"

这是您的第237次签到, + # 已连续签到237天。

本次签到获得300克猫粮。

"} + logger.info(f"签到成功") + return True, f'【{site}】签到成功' + else: + # {"status":"0","data":"抱歉","message":"您今天已经签到过了,请勿重复刷新。"} + logger.info(f"今日已签到") + return True, f'【{site}】今日已签到' diff --git a/app/plugins/autosignin/sites/tjupt.py b/app/plugins/autosignin/sites/tjupt.py new file mode 100644 index 00000000..f66eed91 --- /dev/null +++ b/app/plugins/autosignin/sites/tjupt.py @@ -0,0 +1,272 @@ +import json +import os +import time +from io import BytesIO +from typing import Tuple + +from PIL import Image +from lxml import etree +from ruamel.yaml import CommentedMap + +from app.core import settings +from app.log import logger +from app.plugins.autosignin.sites import _ISiteSigninHandler +from app.utils.http import RequestUtils +from app.utils.string import StringUtils + + +class Tjupt(_ISiteSigninHandler): + """ + 北洋签到 + """ + # 匹配的站点Url,每一个实现类都需要设置为自己的站点Url + site_url = "tjupt.org" + + # 签到地址 + _sign_in_url = 'https://www.tjupt.org/attendance.php' + + # 已签到 + _sign_regex = ['今日已签到'] + + # 签到成功 + _succeed_regex = ['这是您的首次签到,本次签到获得\\d+个魔力值。', + '签到成功,这是您的第\\d+次签到,已连续签到\\d+天,本次签到获得\\d+个魔力值。', + '重新签到成功,本次签到获得\\d+个魔力值'] + + # 存储正确的答案,后续可直接查 + _answer_path = settings.TEMP_PATH / "signin/" + _answer_file = _answer_path / "tjupt.json" + + @classmethod + def match(cls, url: str) -> bool: + """ + 根据站点Url判断是否匹配当前站点签到类,大部分情况使用默认实现即可 + :param url: 站点Url + :return: 是否匹配,如匹配则会调用该类的signin方法 + """ + return True if StringUtils.url_equal(url, cls.site_url) else False + + def signin(self, site_info: CommentedMap) -> Tuple[bool, str]: + """ + 执行签到操作 + :param site_info: 站点信息,含有站点Url、站点Cookie、UA等信息 + :return: 签到结果信息 + """ + site = site_info.get("name") + site_cookie = site_info.get("cookie") + ua = site_info.get("ua") + proxy = settings.PROXY if site_info.get("proxy") else None + + # 创建正确答案存储目录 + if not os.path.exists(os.path.dirname(self._answer_file)): + os.makedirs(os.path.dirname(self._answer_file)) + + # 获取北洋签到页面html + html_res = RequestUtils(cookies=site_cookie, + headers=ua, + proxies=proxy + ).get_res(url=self._sign_in_url) + + # 获取签到后返回html,判断是否签到成功 + if not html_res or html_res.status_code != 200: + logger.error(f"签到失败,请检查站点连通性") + return False, f'【{site}】签到失败,请检查站点连通性' + + if "login.php" in html_res.text: + logger.error(f"签到失败,cookie失效") + return False, f'【{site}】签到失败,cookie失效' + + sign_status = self.sign_in_result(html_res=html_res.text, + regexs=self._sign_regex) + if sign_status: + logger.info(f"今日已签到") + return True, f'【{site}】今日已签到' + + # 没有签到则解析html + html = etree.HTML(html_res.text) + if not html: + return False, f'【{site}】签到失败' + img_url = html.xpath('//table[@class="captcha"]//img/@src')[0] + + if not img_url: + logger.error(f"签到失败,未获取到签到图片") + return False, f'【{site}】签到失败,未获取到签到图片' + + # 签到图片 + img_url = "https://www.tjupt.org" + img_url + logger.info(f"获取到签到图片 {img_url}") + # 获取签到图片hash + captcha_img_res = RequestUtils(cookies=site_cookie, + headers=ua, + proxies=proxy + ).get_res(url=img_url) + if not captcha_img_res or captcha_img_res.status_code != 200: + logger.error(f"签到图片 {img_url} 请求失败") + return False, f'【{site}】签到失败,未获取到签到图片' + captcha_img = Image.open(BytesIO(captcha_img_res.content)) + captcha_img_hash = self._tohash(captcha_img) + logger.debug(f"签到图片hash {captcha_img_hash}") + + # 签到答案选项 + values = html.xpath("//input[@name='answer']/@value") + options = html.xpath("//input[@name='answer']/following-sibling::text()") + + if not values or not options: + logger.error(f"签到失败,未获取到答案选项") + return False, f'【{site}】签到失败,未获取到答案选项' + + # value+选项 + answers = list(zip(values, options)) + logger.debug(f"获取到所有签到选项 {answers}") + + # 查询已有答案 + exits_answers = {} + try: + with open(self._answer_file, 'r') as f: + json_str = f.read() + exits_answers = json.loads(json_str) + # 查询本地本次验证码hash答案 + captcha_answer = exits_answers[captcha_img_hash] + + # 本地存在本次hash对应的正确答案再遍历查询 + if captcha_answer: + for value, answer in answers: + if str(captcha_answer) == str(answer): + # 确实是答案 + return self.__signin(answer=value, + site_cookie=site_cookie, + ua=ua, + proxy=proxy, + site=site) + except (FileNotFoundError, IOError, OSError) as e: + logger.debug(f"查询本地已知答案失败:{e},继续请求豆瓣查询") + + # 本地不存在正确答案则请求豆瓣查询匹配 + for value, answer in answers: + if answer: + # 豆瓣检索 + db_res = RequestUtils().get_res(url=f'https://movie.douban.com/j/subject_suggest?q={answer}') + if not db_res or db_res.status_code != 200: + logger.debug(f"签到选项 {answer} 未查询到豆瓣数据") + continue + + # 豆瓣返回结果 + db_answers = json.loads(db_res.text) + if not isinstance(db_answers, list): + db_answers = [db_answers] + + if len(db_answers) == 0: + logger.debug(f"签到选项 {answer} 查询到豆瓣数据为空") + + for db_answer in db_answers: + answer_img_url = db_answer['img'] + + # 获取答案hash + answer_img_res = RequestUtils().get_res(url=answer_img_url) + if not answer_img_res or answer_img_res.status_code != 200: + logger.debug(f"签到答案 {answer} {answer_img_url} 请求失败") + continue + + answer_img = Image.open(BytesIO(answer_img_res.content)) + answer_img_hash = self._tohash(answer_img) + logger.debug(f"签到答案图片hash {answer} {answer_img_hash}") + + # 获取选项图片与签到图片相似度,大于0.9默认是正确答案 + score = self._comparehash(captcha_img_hash, answer_img_hash) + logger.info(f"签到图片与选项 {answer} 豆瓣图片相似度 {score}") + if score > 0.9: + # 确实是答案 + return self.__signin(answer=value, + site_cookie=site_cookie, + ua=ua, + proxy=proxy, + site=site, + exits_answers=exits_answers, + captcha_img_hash=captcha_img_hash) + + # 间隔5s,防止请求太频繁被豆瓣屏蔽ip + time.sleep(5) + logger.error(f"豆瓣图片匹配,未获取到匹配答案") + + # 没有匹配签到成功,则签到失败 + return False, f'【{site}】签到失败,未获取到匹配答案' + + def __signin(self, answer, site_cookie, ua, proxy, site, exits_answers=None, captcha_img_hash=None): + """ + 签到请求 + """ + data = { + 'answer': answer, + 'submit': '提交' + } + logger.debug(f"提交data {data}") + sign_in_res = RequestUtils(cookies=site_cookie, + headers=ua, + proxies=proxy + ).post_res(url=self._sign_in_url, data=data) + if not sign_in_res or sign_in_res.status_code != 200: + logger.error(f"签到失败,签到接口请求失败") + return False, f'【{site}】签到失败,签到接口请求失败' + + # 获取签到后返回html,判断是否签到成功 + sign_status = self.sign_in_result(html_res=sign_in_res.text, + regexs=self._succeed_regex) + if sign_status: + logger.info(f"签到成功") + if exits_answers and captcha_img_hash: + # 签到成功写入本地文件 + self.__write_local_answer(exits_answers=exits_answers or {}, + captcha_img_hash=captcha_img_hash, + answer=answer) + return True, f'【{site}】签到成功' + else: + logger.error(f"签到失败,请到页面查看") + return False, f'【{site}】签到失败,请到页面查看' + + def __write_local_answer(self, exits_answers, captcha_img_hash, answer): + """ + 签到成功写入本地文件 + """ + try: + exits_answers[captcha_img_hash] = answer + # 序列化数据 + formatted_data = json.dumps(exits_answers, indent=4) + with open(self._answer_file, 'w') as f: + f.write(formatted_data) + except (FileNotFoundError, IOError, OSError) as e: + logger.debug(f"签到成功写入本地文件失败:{e}") + + @staticmethod + def _tohash(img, shape=(10, 10)): + """ + 获取图片hash + """ + img = img.resize(shape) + gray = img.convert('L') + s = 0 + hash_str = '' + for i in range(shape[1]): + for j in range(shape[0]): + s = s + gray.getpixel((j, i)) + avg = s / (shape[0] * shape[1]) + for i in range(shape[1]): + for j in range(shape[0]): + if gray.getpixel((j, i)) > avg: + hash_str = hash_str + '1' + else: + hash_str = hash_str + '0' + return hash_str + + @staticmethod + def _comparehash(hash1, hash2, shape=(10, 10)): + """ + 比较图片hash + 返回相似度 + """ + n = 0 + if len(hash1) != len(hash2): + return -1 + for i in range(len(hash1)): + if hash1[i] == hash2[i]: + n = n + 1 + return n / (shape[0] * shape[1]) diff --git a/app/plugins/autosignin/sites/ttg.py b/app/plugins/autosignin/sites/ttg.py new file mode 100644 index 00000000..156b9854 --- /dev/null +++ b/app/plugins/autosignin/sites/ttg.py @@ -0,0 +1,96 @@ +import re +from typing import Tuple + +from ruamel.yaml import CommentedMap + +from app.core import settings +from app.log import logger +from app.plugins.autosignin.sites import _ISiteSigninHandler +from app.utils.http import RequestUtils +from app.utils.string import StringUtils + + +class TTG(_ISiteSigninHandler): + """ + TTG签到 + """ + # 匹配的站点Url,每一个实现类都需要设置为自己的站点Url + site_url = "totheglory.im" + + # 已签到 + _sign_regex = ['已签到'] + _sign_text = '亲,您今天已签到过,不要太贪哦' + + # 签到成功 + _success_text = '您已连续签到' + + @classmethod + def match(cls, url: str) -> bool: + """ + 根据站点Url判断是否匹配当前站点签到类,大部分情况使用默认实现即可 + :param url: 站点Url + :return: 是否匹配,如匹配则会调用该类的signin方法 + """ + return True if StringUtils.url_equal(url, cls.site_url) else False + + def signin(self, site_info: CommentedMap) -> Tuple[bool, str]: + """ + 执行签到操作 + :param site_info: 站点信息,含有站点Url、站点Cookie、UA等信息 + :return: 签到结果信息 + """ + site = site_info.get("name") + site_cookie = site_info.get("cookie") + ua = site_info.get("ua") + proxy = settings.PROXY if site_info.get("proxy") else None + + # 获取页面html + html_res = RequestUtils(cookies=site_cookie, + headers=ua, + proxies=proxy + ).get_res(url="https://totheglory.im") + if not html_res or html_res.status_code != 200: + logger.error(f"签到失败,请检查站点连通性") + return False, f'【{site}】签到失败,请检查站点连通性' + + if "login.php" in html_res.text: + logger.error(f"签到失败,cookie失效") + return False, f'【{site}】签到失败,cookie失效' + + # 判断是否已签到 + html_res.encoding = "utf-8" + sign_status = self.sign_in_result(html_res=html_res.text, + regexs=self._sign_regex) + if sign_status: + logger.info(f"今日已签到") + return True, f'【{site}】今日已签到' + + # 获取签到参数 + signed_timestamp = re.search('(?<=signed_timestamp: ")\\d{10}', html_res.text).group() + signed_token = re.search('(?<=signed_token: ").*(?=")', html_res.text).group() + logger.debug(f"signed_timestamp={signed_timestamp} signed_token={signed_token}") + + data = { + 'signed_timestamp': signed_timestamp, + 'signed_token': signed_token + } + # 签到 + sign_res = RequestUtils(cookies=site_cookie, + headers=ua, + proxies=proxy + ).post_res(url="https://totheglory.im/signed.php", + data=data) + if not sign_res or sign_res.status_code != 200: + logger.error(f"签到失败,签到接口请求失败") + return False, f'【{site}】签到失败,签到接口请求失败' + + sign_res.encoding = "utf-8" + if self._success_text in sign_res.text: + logger.info(f"签到成功") + return True, f'【{site}】签到成功' + if self._sign_text in sign_res.text: + logger.info(f"今日已签到") + return True, f'【{site}】今日已签到' + + logger.error(f"签到失败,未知原因") + return False, f'【{site}】签到失败,未知原因' diff --git a/app/plugins/autosignin/sites/u2.py b/app/plugins/autosignin/sites/u2.py new file mode 100644 index 00000000..d1af38c5 --- /dev/null +++ b/app/plugins/autosignin/sites/u2.py @@ -0,0 +1,122 @@ +import datetime +import random +import re +from typing import Tuple + +from lxml import etree +from ruamel.yaml import CommentedMap + +from app.core import settings +from app.log import logger +from app.plugins.autosignin.sites import _ISiteSigninHandler +from app.utils.http import RequestUtils +from app.utils.string import StringUtils + + +class U2(_ISiteSigninHandler): + """ + U2签到 随机 + """ + # 匹配的站点Url,每一个实现类都需要设置为自己的站点Url + site_url = "u2.dmhy.org" + + # 已签到 + _sign_regex = ['已签到', + 'Show Up', + 'Показать', + '已簽到', + '已簽到'] + + # 签到成功 + _success_text = "window.location.href = 'showup.php';" + + @classmethod + def match(cls, url: str) -> bool: + """ + 根据站点Url判断是否匹配当前站点签到类,大部分情况使用默认实现即可 + :param url: 站点Url + :return: 是否匹配,如匹配则会调用该类的signin方法 + """ + return True if StringUtils.url_equal(url, cls.site_url) else False + + def signin(self, site_info: CommentedMap) -> Tuple[bool, str]: + """ + 执行签到操作 + :param site_info: 站点信息,含有站点Url、站点Cookie、UA等信息 + :return: 签到结果信息 + """ + site = site_info.get("name") + site_cookie = site_info.get("cookie") + ua = site_info.get("ua") + proxy = settings.PROXY if site_info.get("proxy") else None + + now = datetime.datetime.now() + # 判断当前时间是否小于9点 + if now.hour < 9: + logger.error(f"签到失败,9点前不签到") + return False, f'【{site}】签到失败,9点前不签到' + + # 获取页面html + html_res = RequestUtils(cookies=site_cookie, + headers=ua, + proxies=proxy + ).get_res(url="https://u2.dmhy.org/showup.php") + if not html_res or html_res.status_code != 200: + logger.error(f"签到失败,请检查站点连通性") + return False, f'【{site}】签到失败,请检查站点连通性' + + if "login.php" in html_res.text: + logger.error(f"签到失败,cookie失效") + return False, f'【{site}】签到失败,cookie失效' + + # 判断是否已签到 + html_res.encoding = "utf-8" + sign_status = self.sign_in_result(html_res=html_res.text, + regexs=self._sign_regex) + if sign_status: + logger.info(f"今日已签到") + return True, f'【{site}】今日已签到' + + # 没有签到则解析html + html = etree.HTML(html_res.text) + + if not html: + return False, f'【{site}】签到失败' + + # 获取签到参数 + req = html.xpath("//form//td/input[@name='req']/@value")[0] + hash_str = html.xpath("//form//td/input[@name='hash']/@value")[0] + form = html.xpath("//form//td/input[@name='form']/@value")[0] + submit_name = html.xpath("//form//td/input[@type='submit']/@name") + submit_value = html.xpath("//form//td/input[@type='submit']/@value") + if not re or not hash_str or not form or not submit_name or not submit_value: + logger.error("签到失败,未获取到相关签到参数") + return False, f'【{site}】签到失败' + + # 随机一个答案 + answer_num = random.randint(0, 3) + data = { + 'req': req, + 'hash': hash_str, + 'form': form, + 'message': '一切随缘~', + submit_name[answer_num]: submit_value[answer_num] + } + # 签到 + sign_res = RequestUtils(cookies=site_cookie, + headers=ua, + proxies=proxy + ).post_res(url="https://u2.dmhy.org/showup.php?action=show", + data=data) + if not sign_res or sign_res.status_code != 200: + logger.error(f"签到失败,签到接口请求失败") + return False, f'【{site}】签到失败,签到接口请求失败' + + # 判断是否签到成功 + # sign_res.text = "" + if self._success_text in sign_res.text: + logger.info(f"签到成功") + return True, f'【{site}】签到成功' + else: + logger.error(f"签到失败,未知原因") + return False, f'【{site}】签到失败,未知原因' diff --git a/app/plugins/autosignin/sites/zhuque.py b/app/plugins/autosignin/sites/zhuque.py new file mode 100644 index 00000000..270e0f4f --- /dev/null +++ b/app/plugins/autosignin/sites/zhuque.py @@ -0,0 +1,86 @@ +import json +from typing import Tuple + +from lxml import etree +from ruamel.yaml import CommentedMap + +from app.core import settings +from app.log import logger +from app.plugins.autosignin.sites import _ISiteSigninHandler +from app.utils.http import RequestUtils +from app.utils.string import StringUtils + + +class ZhuQue(_ISiteSigninHandler): + """ + ZHUQUE签到 + """ + # 匹配的站点Url,每一个实现类都需要设置为自己的站点Url + site_url = "zhuque.in" + + @classmethod + def match(cls, url: str) -> bool: + """ + 根据站点Url判断是否匹配当前站点签到类,大部分情况使用默认实现即可 + :param url: 站点Url + :return: 是否匹配,如匹配则会调用该类的signin方法 + """ + return True if StringUtils.url_equal(url, cls.site_url) else False + + def signin(self, site_info: CommentedMap) -> Tuple[bool, str]: + """ + 执行签到操作 + :param site_info: 站点信息,含有站点Url、站点Cookie、UA等信息 + :return: 签到结果信息 + """ + site = site_info.get("name") + site_cookie = site_info.get("cookie") + ua = site_info.get("ua") + proxy = settings.PROXY if site_info.get("proxy") else None + + # 获取页面html + html_res = RequestUtils(cookies=site_cookie, + headers=ua, + proxies=proxy + ).get_res(url="https://zhuque.in") + if not html_res or html_res.status_code != 200: + logger.error(f"模拟登录失败,请检查站点连通性") + return False, f'【{site}】模拟登录失败,请检查站点连通性' + + if "login.php" in html_res.text: + logger.error(f"模拟登录失败,cookie失效") + return False, f'【{site}】模拟登录失败,cookie失效' + + html = etree.HTML(html_res.text) + + if not html: + return False, f'【{site}】模拟登录失败' + + # 释放技能 + msg = '失败' + x_csrf_token = html.xpath("//meta[@name='x-csrf-token']/@content")[0] + if x_csrf_token: + data = { + "all": 1, + "resetModal": "true" + } + headers = { + "x-csrf-token": str(x_csrf_token), + "Content-Type": "application/json; charset=utf-8", + "User-Agent": ua + } + skill_res = RequestUtils(cookies=site_cookie, + headers=headers, + proxies=proxy + ).post_res(url="https://zhuque.in/api/gaming/fireGenshinCharacterMagic", json=data) + if not skill_res or skill_res.status_code != 200: + logger.error(f"模拟登录失败,释放技能失败") + + # '{"status":200,"data":{"code":"FIRE_GENSHIN_CHARACTER_MAGIC_SUCCESS","bonus":0}}' + skill_dict = json.loads(skill_res.text) + if skill_dict['status'] == 200: + bonus = int(skill_dict['data']['bonus']) + msg = f'成功,获得{bonus}魔力' + + logger.info(f'【{site}】模拟登录成功,技能释放{msg}') + return True, f'【{site}】模拟登录成功,技能释放{msg}' diff --git a/app/plugins/sitestatistics/__init__.py b/app/plugins/sitestatistics/__init__.py new file mode 100644 index 00000000..4abea59d --- /dev/null +++ b/app/plugins/sitestatistics/__init__.py @@ -0,0 +1,262 @@ +from datetime import datetime +from multiprocessing.dummy import Pool as ThreadPool +from threading import Lock +from typing import Optional, Any + +import requests +from ruamel.yaml import CommentedMap + +from app.core import settings +from app.helper import ModuleHelper +from app.helper.sites import SitesHelper +from app.log import logger +from app.plugins import _PluginBase +from app.plugins.sitestatistics.siteuserinfo import ISiteUserInfo +from app.utils.http import RequestUtils + +lock = Lock() + + +class SiteStatistics(_PluginBase): + sites = None + + _MAX_CONCURRENCY: int = 10 + _last_update_time: Optional[datetime] = None + _sites_data: dict = {} + _site_schema: list = None + + def init_plugin(self, config: dict = None): + # 加载模块 + self._site_schema = ModuleHelper.load('app.plugins.sitestatistics.siteuserinfo', + filter_func=lambda _, obj: hasattr(obj, 'schema')) + self._site_schema.sort(key=lambda x: x.order) + # 站点管理 + self.sites = SitesHelper() + # 站点上一次更新时间 + self._last_update_time = None + # 站点数据 + self._sites_data = {} + + def stop_service(self): + pass + + def __build_class(self, html_text: str) -> Any: + for site_schema in self._site_schema: + try: + if site_schema.match(html_text): + return site_schema + except Exception as e: + logger.error(f"站点 {site_schema.name} 匹配失败 {e}") + return None + + def build(self, url: str, site_name: str, + site_cookie: str = None, + ua: str = None, + proxy: bool = False) -> Any: + if not site_cookie: + return None + session = requests.Session() + logger.debug(f"站点 {site_name} url={url} site_cookie={site_cookie} ua={ua}") + proxies = settings.PROXY if proxy else None + res = RequestUtils(cookies=site_cookie, + session=session, + headers=ua, + proxies=proxies + ).get_res(url=url) + if res and res.status_code == 200: + if "charset=utf-8" in res.text or "charset=UTF-8" in res.text: + res.encoding = "UTF-8" + else: + res.encoding = res.apparent_encoding + html_text = res.text + # 第一次登录反爬 + if html_text.find("title") == -1: + i = html_text.find("window.location") + if i == -1: + return None + tmp_url = url + html_text[i:html_text.find(";")] \ + .replace("\"", "").replace("+", "").replace(" ", "").replace("window.location=", "") + res = RequestUtils(cookies=site_cookie, + session=session, + headers=ua, + proxies=proxies + ).get_res(url=tmp_url) + if res and res.status_code == 200: + if "charset=utf-8" in res.text or "charset=UTF-8" in res.text: + res.encoding = "UTF-8" + else: + res.encoding = res.apparent_encoding + html_text = res.text + if not html_text: + return None + else: + logger.error("站点 %s 被反爬限制:%s, 状态码:%s" % (site_name, url, res.status_code)) + return None + + # 兼容假首页情况,假首页通常没有 0: + for head, date, content in site_user_info.message_unread_contents: + msg_title = f"【站点 {site_user_info.site_name} 消息】" + msg_text = f"时间:{date}\n标题:{head}\n内容:\n{content}" + self.chain.run_module("post_message", title=msg_title, text=msg_text) + else: + self.chain.run_module("post_message", + title=f"站点 {site_user_info.site_name} 收到 " + f"{site_user_info.message_unread} 条新消息,请登陆查看") + + def refresh_all_site_data(self, force: bool = False, specify_sites: list = None): + """ + 多线程刷新站点下载上传量,默认间隔6小时 + """ + if not self.sites.get_indexers(): + return + + with lock: + + if not force \ + and not specify_sites \ + and self._last_update_time: + return + + if specify_sites \ + and not isinstance(specify_sites, list): + specify_sites = [specify_sites] + + # 没有指定站点,默认使用全部站点 + if not specify_sites: + refresh_sites = self.sites.get_indexers() + else: + refresh_sites = [site for site in self.sites.get_indexers() if + site.get("name") in specify_sites] + + if not refresh_sites: + return + + # 并发刷新 + with ThreadPool(min(len(refresh_sites), self._MAX_CONCURRENCY)) as p: + site_user_infos = p.map(self.__refresh_site_data, refresh_sites) + site_user_infos = [info for info in site_user_infos if info] + + print(site_user_infos) + # TODO 登记历史数据 + # TODO 实时用户数据 + # TODO 更新站点图标 + # TODO 实时做种信息 + + # 更新时间 + self._last_update_time = datetime.now() + + @staticmethod + def __todict(raw_statistics): + statistics = [] + for site in raw_statistics: + statistics.append({"site": site.SITE, + "username": site.USERNAME, + "user_level": site.USER_LEVEL, + "join_at": site.JOIN_AT, + "update_at": site.UPDATE_AT, + "upload": site.UPLOAD, + "download": site.DOWNLOAD, + "ratio": site.RATIO, + "seeding": site.SEEDING, + "leeching": site.LEECHING, + "seeding_size": site.SEEDING_SIZE, + "bonus": site.BONUS, + "url": site.URL, + "msg_unread": site.MSG_UNREAD + }) + return statistics diff --git a/app/plugins/sitestatistics/siteuserinfo/__init__.py b/app/plugins/sitestatistics/siteuserinfo/__init__.py new file mode 100644 index 00000000..38d01073 --- /dev/null +++ b/app/plugins/sitestatistics/siteuserinfo/__init__.py @@ -0,0 +1,360 @@ +# -*- coding: utf-8 -*- +import base64 +import json +import re +from abc import ABCMeta, abstractmethod +from typing import Optional +from urllib.parse import urljoin, urlsplit + +import requests +from lxml import etree +from requests import Session + +from app.core import settings +from app.helper.cloudflare import under_challenge +from app.log import logger +from app.utils.http import RequestUtils +from app.utils.types import SiteSchema + +SITE_BASE_ORDER = 1000 + + +class ISiteUserInfo(metaclass=ABCMeta): + # 站点模版 + schema = SiteSchema.NexusPhp + # 站点解析时判断顺序,值越小越先解析 + order = SITE_BASE_ORDER + + def __init__(self, site_name: str, + url: str, + site_cookie: str, + index_html: str, + session: Session = None, + ua: str = None, + emulate: bool = False, + proxy: bool = None): + super().__init__() + # 站点信息 + self.site_name = None + self.site_url = None + self.site_favicon = None + # 用户信息 + self.username = None + self.userid = None + # 未读消息 + self.message_unread = 0 + self.message_unread_contents = [] + + # 流量信息 + self.upload = 0 + self.download = 0 + self.ratio = 0 + + # 种子信息 + self.seeding = 0 + self.leeching = 0 + self.uploaded = 0 + self.completed = 0 + self.incomplete = 0 + self.seeding_size = 0 + self.leeching_size = 0 + self.uploaded_size = 0 + self.completed_size = 0 + self.incomplete_size = 0 + # 做种人数, 种子大小 + self.seeding_info = [] + + # 用户详细信息 + self.user_level = None + self.join_at = None + self.bonus = 0.0 + + # 错误信息 + self.err_msg = None + # 内部数据 + self._base_url = None + self._site_cookie = None + self._index_html = None + self._addition_headers = None + + # 站点页面 + self._brief_page = "index.php" + self._user_detail_page = "userdetails.php?id=" + self._user_traffic_page = "index.php" + self._torrent_seeding_page = "getusertorrentlistajax.php?userid=" + self._user_mail_unread_page = "messages.php?action=viewmailbox&box=1&unread=yes" + self._sys_mail_unread_page = "messages.php?action=viewmailbox&box=-2&unread=yes" + self._torrent_seeding_params = None + self._torrent_seeding_headers = None + + split_url = urlsplit(url) + self.site_name = site_name + self.site_url = url + self._base_url = f"{split_url.scheme}://{split_url.netloc}" + self._favicon_url = urljoin(self._base_url, "favicon.ico") + self.site_favicon = "" + self._site_cookie = site_cookie + self._index_html = index_html + self._session = session if session else requests.Session() + self._ua = ua + + self._emulate = emulate + self._proxy = proxy + + def site_schema(self): + """ + 站点解析模型 + :return: 站点解析模型 + """ + return self.schema + + @classmethod + def match(cls, html_text: str) -> bool: + """ + 是否匹配当前解析模型 + :param html_text: 站点首页html + :return: 是否匹配 + """ + pass + + def parse(self): + """ + 解析站点信息 + :return: + """ + self._parse_favicon(self._index_html) + if not self._parse_logged_in(self._index_html): + return + + self._parse_site_page(self._index_html) + self._parse_user_base_info(self._index_html) + self._pase_unread_msgs() + if self._user_traffic_page: + self._parse_user_traffic_info(self._get_page_content(urljoin(self._base_url, self._user_traffic_page))) + if self._user_detail_page: + self._parse_user_detail_info(self._get_page_content(urljoin(self._base_url, self._user_detail_page))) + + self._parse_seeding_pages() + self.seeding_info = json.dumps(self.seeding_info) + + def _pase_unread_msgs(self): + """ + 解析所有未读消息标题和内容 + :return: + """ + unread_msg_links = [] + if self.message_unread > 0: + links = {self._user_mail_unread_page, self._sys_mail_unread_page} + for link in links: + if not link: + continue + + msg_links = [] + next_page = self._parse_message_unread_links( + self._get_page_content(urljoin(self._base_url, link)), msg_links) + while next_page: + next_page = self._parse_message_unread_links( + self._get_page_content(urljoin(self._base_url, next_page)), msg_links) + + unread_msg_links.extend(msg_links) + + for msg_link in unread_msg_links: + logger.debug(f"{self.site_name} 信息链接 {msg_link}") + head, date, content = self._parse_message_content(self._get_page_content(urljoin(self._base_url, msg_link))) + logger.debug(f"{self.site_name} 标题 {head} 时间 {date} 内容 {content}") + self.message_unread_contents.append((head, date, content)) + + def _parse_seeding_pages(self): + if self._torrent_seeding_page: + # 第一页 + next_page = self._parse_user_torrent_seeding_info( + self._get_page_content(urljoin(self._base_url, self._torrent_seeding_page), + self._torrent_seeding_params, + self._torrent_seeding_headers)) + + # 其他页处理 + while next_page: + next_page = self._parse_user_torrent_seeding_info( + self._get_page_content(urljoin(urljoin(self._base_url, self._torrent_seeding_page), next_page), + self._torrent_seeding_params, + self._torrent_seeding_headers), + multi_page=True) + + @staticmethod + def _prepare_html_text(html_text): + """ + 处理掉HTML中的干扰部分 + """ + return re.sub(r"#\d+", "", re.sub(r"\d+px", "", html_text)) + + @abstractmethod + def _parse_message_unread_links(self, html_text: str, msg_links: list) -> Optional[str]: + """ + 获取未阅读消息链接 + :param html_text: + :return: + """ + pass + + def _parse_favicon(self, html_text): + """ + 解析站点favicon,返回base64 fav图标 + :param html_text: + :return: + """ + html = etree.HTML(html_text) + if html: + fav_link = html.xpath('//head/link[contains(@rel, "icon")]/@href') + if fav_link: + self._favicon_url = urljoin(self._base_url, fav_link[0]) + + res = RequestUtils(cookies=self._site_cookie, session=self._session, timeout=60, headers=self._ua).get_res( + url=self._favicon_url) + if res: + self.site_favicon = base64.b64encode(res.content).decode() + + def _get_page_content(self, url, params=None, headers=None): + """ + :param url: 网页地址 + :param params: post参数 + :param headers: 额外的请求头 + :return: + """ + req_headers = None + proxies = settings.PROXY if self._proxy else None + if self._ua or headers or self._addition_headers: + req_headers = {} + if headers: + req_headers.update(headers) + + req_headers.update({ + "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", + "User-Agent": f"{self._ua}" + }) + + if self._addition_headers: + req_headers.update(self._addition_headers) + + if params: + res = RequestUtils(cookies=self._site_cookie, + session=self._session, + timeout=60, + proxies=proxies, + headers=req_headers).post_res(url=url, data=params) + else: + res = RequestUtils(cookies=self._site_cookie, + session=self._session, + timeout=60, + proxies=proxies, + headers=req_headers).get_res(url=url) + if res is not None and res.status_code in (200, 500, 403): + # 如果cloudflare 有防护,尝试使用浏览器仿真 + if under_challenge(res.text): + logger.warn( + f"{self.site_name} 检测到Cloudflare,请更新Cookie和UA") + return "" + if "charset=utf-8" in res.text or "charset=UTF-8" in res.text: + res.encoding = "UTF-8" + else: + res.encoding = res.apparent_encoding + return res.text + + return "" + + @abstractmethod + def _parse_site_page(self, html_text: str): + """ + 解析站点相关信息页面 + :param html_text: + :return: + """ + pass + + @abstractmethod + def _parse_user_base_info(self, html_text: str): + """ + 解析用户基础信息 + :param html_text: + :return: + """ + pass + + def _parse_logged_in(self, html_text): + """ + 解析用户是否已经登陆 + :param html_text: + :return: True/False + """ + logged_in = self.is_logged_in(html_text) + if not logged_in: + self.err_msg = "未检测到已登陆,请检查cookies是否过期" + logger.warn(f"{self.site_name} 未登录,跳过后续操作") + + return logged_in + + @abstractmethod + def _parse_user_traffic_info(self, html_text: str): + """ + 解析用户的上传,下载,分享率等信息 + :param html_text: + :return: + """ + pass + + @abstractmethod + def _parse_user_torrent_seeding_info(self, html_text: str, multi_page: bool = False) -> Optional[str]: + """ + 解析用户的做种相关信息 + :param html_text: + :param multi_page: 是否多页数据 + :return: 下页地址 + """ + pass + + @abstractmethod + def _parse_user_detail_info(self, html_text: str): + """ + 解析用户的详细信息 + 加入时间/等级/魔力值等 + :param html_text: + :return: + """ + pass + + @abstractmethod + def _parse_message_content(self, html_text): + """ + 解析短消息内容 + :param html_text: + :return: head: message, date: time, content: message content + """ + pass + + @classmethod + def is_logged_in(cls, html_text: str) -> bool: + """ + 判断站点是否已经登陆 + :param html_text: + :return: + """ + html = etree.HTML(html_text) + if not html: + return False + # 存在明显的密码输入框,说明未登录 + if html.xpath("//input[@type='password']"): + return False + # 是否存在登出和用户面板等链接 + xpaths = ['//a[contains(@href, "logout")' + ' or contains(@data-url, "logout")' + ' or contains(@href, "mybonus") ' + ' or contains(@onclick, "logout")' + ' or contains(@href, "usercp")]', + '//form[contains(@action, "logout")]'] + for xpath in xpaths: + if html.xpath(xpath): + return True + user_info_div = html.xpath('//div[@class="user-info-side"]') + if user_info_div: + return True + + return False diff --git a/app/plugins/sitestatistics/siteuserinfo/discuz.py b/app/plugins/sitestatistics/siteuserinfo/discuz.py new file mode 100644 index 00000000..9c67f78f --- /dev/null +++ b/app/plugins/sitestatistics/siteuserinfo/discuz.py @@ -0,0 +1,140 @@ +# -*- coding: utf-8 -*- +import re +from typing import Optional + +from lxml import etree + +from app.plugins.sitestatistics.siteuserinfo import ISiteUserInfo, SITE_BASE_ORDER +from app.utils.string import StringUtils +from app.utils.types import SiteSchema + + +class DiscuzUserInfo(ISiteUserInfo): + schema = SiteSchema.DiscuzX + order = SITE_BASE_ORDER + 10 + + @classmethod + def match(cls, html_text: str) -> bool: + html = etree.HTML(html_text) + if not html: + return False + + printable_text = html.xpath("string(.)") if html else "" + return 'Powered by Discuz!' in printable_text + + def _parse_user_base_info(self, html_text: str): + html_text = self._prepare_html_text(html_text) + html = etree.HTML(html_text) + + user_info = html.xpath('//a[contains(@href, "&uid=")]') + if user_info: + user_id_match = re.search(r"&uid=(\d+)", user_info[0].attrib['href']) + if user_id_match and user_id_match.group().strip(): + self.userid = user_id_match.group(1) + self._torrent_seeding_page = f"forum.php?&mod=torrents&cat_5up=on" + self._user_detail_page = user_info[0].attrib['href'] + self.username = user_info[0].text.strip() + + def _parse_site_page(self, html_text: str): + # TODO + pass + + def _parse_user_detail_info(self, html_text: str): + """ + 解析用户额外信息,加入时间,等级 + :param html_text: + :return: + """ + html = etree.HTML(html_text) + if not html: + return None + + # 用户等级 + user_levels_text = html.xpath('//a[contains(@href, "usergroup")]/text()') + if user_levels_text: + self.user_level = user_levels_text[-1].strip() + + # 加入日期 + join_at_text = html.xpath('//li[em[text()="注册时间"]]/text()') + if join_at_text: + self.join_at = StringUtils.unify_datetime_str(join_at_text[0].strip()) + + # 分享率 + ratio_text = html.xpath('//li[contains(.//text(), "分享率")]//text()') + if ratio_text: + ratio_match = re.search(r"\(([\d,.]+)\)", ratio_text[0]) + if ratio_match and ratio_match.group(1).strip(): + self.bonus = StringUtils.str_float(ratio_match.group(1)) + + # 积分 + bouns_text = html.xpath('//li[em[text()="积分"]]/text()') + if bouns_text: + self.bonus = StringUtils.str_float(bouns_text[0].strip()) + + # 上传 + upload_text = html.xpath('//li[em[contains(text(),"上传量")]]/text()') + if upload_text: + self.upload = StringUtils.num_filesize(upload_text[0].strip().split('/')[-1]) + + # 下载 + download_text = html.xpath('//li[em[contains(text(),"下载量")]]/text()') + if download_text: + self.download = StringUtils.num_filesize(download_text[0].strip().split('/')[-1]) + + def _parse_user_torrent_seeding_info(self, html_text: str, multi_page: bool = False) -> Optional[str]: + """ + 做种相关信息 + :param html_text: + :param multi_page: 是否多页数据 + :return: 下页地址 + """ + html = etree.HTML(html_text) + if not html: + return None + + size_col = 3 + seeders_col = 4 + # 搜索size列 + if html.xpath('//tr[position()=1]/td[.//img[@class="size"] and .//img[@alt="size"]]'): + size_col = len(html.xpath('//tr[position()=1]/td[.//img[@class="size"] ' + 'and .//img[@alt="size"]]/preceding-sibling::td')) + 1 + # 搜索seeders列 + if html.xpath('//tr[position()=1]/td[.//img[@class="seeders"] and .//img[@alt="seeders"]]'): + seeders_col = len(html.xpath('//tr[position()=1]/td[.//img[@class="seeders"] ' + 'and .//img[@alt="seeders"]]/preceding-sibling::td')) + 1 + + page_seeding = 0 + page_seeding_size = 0 + page_seeding_info = [] + seeding_sizes = html.xpath(f'//tr[position()>1]/td[{size_col}]') + seeding_seeders = html.xpath(f'//tr[position()>1]/td[{seeders_col}]//text()') + if seeding_sizes and seeding_seeders: + page_seeding = len(seeding_sizes) + + for i in range(0, len(seeding_sizes)): + size = StringUtils.num_filesize(seeding_sizes[i].xpath("string(.)").strip()) + seeders = StringUtils.str_int(seeding_seeders[i]) + + page_seeding_size += size + page_seeding_info.append([seeders, size]) + + self.seeding += page_seeding + self.seeding_size += page_seeding_size + self.seeding_info.extend(page_seeding_info) + + # 是否存在下页数据 + next_page = None + next_page_text = html.xpath('//a[contains(.//text(), "下一页") or contains(.//text(), "下一頁")]/@href') + if next_page_text: + next_page = next_page_text[-1].strip() + + return next_page + + def _parse_user_traffic_info(self, html_text: str): + pass + + def _parse_message_unread_links(self, html_text: str, msg_links: list) -> Optional[str]: + return None + + def _parse_message_content(self, html_text): + return None, None, None diff --git a/app/plugins/sitestatistics/siteuserinfo/file_list.py b/app/plugins/sitestatistics/siteuserinfo/file_list.py new file mode 100644 index 00000000..0c4e4d54 --- /dev/null +++ b/app/plugins/sitestatistics/siteuserinfo/file_list.py @@ -0,0 +1,119 @@ +# -*- coding: utf-8 -*- +import re +from typing import Optional + +from lxml import etree + +from app.plugins.sitestatistics.siteuserinfo import ISiteUserInfo, SITE_BASE_ORDER +from app.utils.string import StringUtils +from app.utils.types import SiteSchema + + +class FileListSiteUserInfo(ISiteUserInfo): + schema = SiteSchema.FileList + order = SITE_BASE_ORDER + 50 + + @classmethod + def match(cls, html_text: str) -> bool: + html = etree.HTML(html_text) + if not html: + return False + + printable_text = html.xpath("string(.)") if html else "" + return 'Powered by FileList' in printable_text + + def _parse_site_page(self, html_text: str): + html_text = self._prepare_html_text(html_text) + + user_detail = re.search(r"userdetails.php\?id=(\d+)", html_text) + if user_detail and user_detail.group().strip(): + self._user_detail_page = user_detail.group().strip().lstrip('/') + self.userid = user_detail.group(1) + + self._torrent_seeding_page = f"snatchlist.php?id={self.userid}&action=torrents&type=seeding" + + def _parse_user_base_info(self, html_text: str): + html_text = self._prepare_html_text(html_text) + html = etree.HTML(html_text) + + ret = html.xpath(f'//a[contains(@href, "userdetails") and contains(@href, "{self.userid}")]//text()') + if ret: + self.username = str(ret[0]) + + def _parse_user_traffic_info(self, html_text: str): + """ + 上传/下载/分享率 [做种数/魔力值] + :param html_text: + :return: + """ + return + + def _parse_user_detail_info(self, html_text: str): + html_text = self._prepare_html_text(html_text) + html = etree.HTML(html_text) + + upload_html = html.xpath('//table//tr/td[text()="Uploaded"]/following-sibling::td//text()') + if upload_html: + self.upload = StringUtils.num_filesize(upload_html[0]) + download_html = html.xpath('//table//tr/td[text()="Downloaded"]/following-sibling::td//text()') + if download_html: + self.download = StringUtils.num_filesize(download_html[0]) + + self.ratio = 0 if self.download == 0 else self.upload / self.download + + user_level_html = html.xpath('//table//tr/td[text()="Class"]/following-sibling::td//text()') + if user_level_html: + self.user_level = user_level_html[0].strip() + + join_at_html = html.xpath('//table//tr/td[contains(text(), "Join")]/following-sibling::td//text()') + if join_at_html: + self.join_at = StringUtils.unify_datetime_str(join_at_html[0].strip()) + + bonus_html = html.xpath('//a[contains(@href, "shop.php")]') + if bonus_html: + self.bonus = StringUtils.str_float(bonus_html[0].xpath("string(.)").strip()) + pass + + def _parse_user_torrent_seeding_info(self, html_text: str, multi_page: bool = False) -> Optional[str]: + """ + 做种相关信息 + :param html_text: + :param multi_page: 是否多页数据 + :return: 下页地址 + """ + html = etree.HTML(html_text) + if not html: + return None + + size_col = 6 + seeders_col = 7 + + page_seeding = 0 + page_seeding_size = 0 + page_seeding_info = [] + seeding_sizes = html.xpath(f'//table/tr[position()>1]/td[{size_col}]') + seeding_seeders = html.xpath(f'//table/tr[position()>1]/td[{seeders_col}]') + if seeding_sizes and seeding_seeders: + page_seeding = len(seeding_sizes) + + for i in range(0, len(seeding_sizes)): + size = StringUtils.num_filesize(seeding_sizes[i].xpath("string(.)").strip()) + seeders = StringUtils.str_int(seeding_seeders[i].xpath("string(.)").strip()) + + page_seeding_size += size + page_seeding_info.append([seeders, size]) + + self.seeding += page_seeding + self.seeding_size += page_seeding_size + self.seeding_info.extend(page_seeding_info) + + # 是否存在下页数据 + next_page = None + + return next_page + + def _parse_message_unread_links(self, html_text: str, msg_links: list) -> Optional[str]: + return None + + def _parse_message_content(self, html_text): + return None, None, None diff --git a/app/plugins/sitestatistics/siteuserinfo/gazelle.py b/app/plugins/sitestatistics/siteuserinfo/gazelle.py new file mode 100644 index 00000000..cc53b0ba --- /dev/null +++ b/app/plugins/sitestatistics/siteuserinfo/gazelle.py @@ -0,0 +1,164 @@ +# -*- coding: utf-8 -*- +import re +from typing import Optional + +from lxml import etree + +from app.plugins.sitestatistics.siteuserinfo import ISiteUserInfo, SITE_BASE_ORDER +from app.utils.string import StringUtils +from app.utils.types import SiteSchema + + +class GazelleSiteUserInfo(ISiteUserInfo): + schema = SiteSchema.Gazelle + order = SITE_BASE_ORDER + + @classmethod + def match(cls, html_text: str) -> bool: + html = etree.HTML(html_text) + if not html: + return False + + printable_text = html.xpath("string(.)") if html else "" + + return "Powered by Gazelle" in printable_text or "DIC Music" in printable_text + + def _parse_user_base_info(self, html_text: str): + html_text = self._prepare_html_text(html_text) + html = etree.HTML(html_text) + + tmps = html.xpath('//a[contains(@href, "user.php?id=")]') + if tmps: + user_id_match = re.search(r"user.php\?id=(\d+)", tmps[0].attrib['href']) + if user_id_match and user_id_match.group().strip(): + self.userid = user_id_match.group(1) + self._torrent_seeding_page = f"torrents.php?type=seeding&userid={self.userid}" + self._user_detail_page = f"user.php?id={self.userid}" + self.username = tmps[0].text.strip() + + tmps = html.xpath('//*[@id="header-uploaded-value"]/@data-value') + if tmps: + self.upload = StringUtils.num_filesize(tmps[0]) + else: + tmps = html.xpath('//li[@id="stats_seeding"]/span/text()') + if tmps: + self.upload = StringUtils.num_filesize(tmps[0]) + + tmps = html.xpath('//*[@id="header-downloaded-value"]/@data-value') + if tmps: + self.download = StringUtils.num_filesize(tmps[0]) + else: + tmps = html.xpath('//li[@id="stats_leeching"]/span/text()') + if tmps: + self.download = StringUtils.num_filesize(tmps[0]) + + self.ratio = 0.0 if self.download <= 0.0 else round(self.upload / self.download, 3) + + tmps = html.xpath('//a[contains(@href, "bonus.php")]/@data-tooltip') + if tmps: + bonus_match = re.search(r"([\d,.]+)", tmps[0]) + if bonus_match and bonus_match.group(1).strip(): + self.bonus = StringUtils.str_float(bonus_match.group(1)) + else: + tmps = html.xpath('//a[contains(@href, "bonus.php")]') + if tmps: + bonus_text = tmps[0].xpath("string(.)") + bonus_match = re.search(r"([\d,.]+)", bonus_text) + if bonus_match and bonus_match.group(1).strip(): + self.bonus = StringUtils.str_float(bonus_match.group(1)) + + def _parse_site_page(self, html_text: str): + # TODO + pass + + def _parse_user_detail_info(self, html_text: str): + """ + 解析用户额外信息,加入时间,等级 + :param html_text: + :return: + """ + html = etree.HTML(html_text) + if not html: + return None + + # 用户等级 + user_levels_text = html.xpath('//*[@id="class-value"]/@data-value') + if user_levels_text: + self.user_level = user_levels_text[0].strip() + else: + user_levels_text = html.xpath('//li[contains(text(), "用户等级")]/text()') + if user_levels_text: + self.user_level = user_levels_text[0].split(':')[1].strip() + + # 加入日期 + join_at_text = html.xpath('//*[@id="join-date-value"]/@data-value') + if join_at_text: + self.join_at = StringUtils.unify_datetime_str(join_at_text[0].strip()) + else: + join_at_text = html.xpath( + '//div[contains(@class, "box_userinfo_stats")]//li[contains(text(), "加入时间")]/span/text()') + if join_at_text: + self.join_at = StringUtils.unify_datetime_str(join_at_text[0].strip()) + + def _parse_user_torrent_seeding_info(self, html_text: str, multi_page: bool = False) -> Optional[str]: + """ + 做种相关信息 + :param html_text: + :param multi_page: 是否多页数据 + :return: 下页地址 + """ + html = etree.HTML(html_text) + if not html: + return None + + size_col = 3 + # 搜索size列 + if html.xpath('//table[contains(@id, "torrent")]//tr[1]/td'): + size_col = len(html.xpath('//table[contains(@id, "torrent")]//tr[1]/td')) - 3 + # 搜索seeders列 + seeders_col = size_col + 2 + + page_seeding = 0 + page_seeding_size = 0 + page_seeding_info = [] + seeding_sizes = html.xpath(f'//table[contains(@id, "torrent")]//tr[position()>1]/td[{size_col}]') + seeding_seeders = html.xpath(f'//table[contains(@id, "torrent")]//tr[position()>1]/td[{seeders_col}]/text()') + if seeding_sizes and seeding_seeders: + page_seeding = len(seeding_sizes) + + for i in range(0, len(seeding_sizes)): + size = StringUtils.num_filesize(seeding_sizes[i].xpath("string(.)").strip()) + seeders = int(seeding_seeders[i]) + + page_seeding_size += size + page_seeding_info.append([seeders, size]) + + if multi_page: + self.seeding += page_seeding + self.seeding_size += page_seeding_size + self.seeding_info.extend(page_seeding_info) + else: + if not self.seeding: + self.seeding = page_seeding + if not self.seeding_size: + self.seeding_size = page_seeding_size + if not self.seeding_info: + self.seeding_info = page_seeding_info + + # 是否存在下页数据 + next_page = None + next_page_text = html.xpath('//a[contains(.//text(), "Next") or contains(.//text(), "下一页")]/@href') + if next_page_text: + next_page = next_page_text[-1].strip() + + return next_page + + def _parse_user_traffic_info(self, html_text: str): + # TODO + pass + + def _parse_message_unread_links(self, html_text: str, msg_links: list) -> Optional[str]: + return None + + def _parse_message_content(self, html_text): + return None, None, None diff --git a/app/plugins/sitestatistics/siteuserinfo/ipt_project.py b/app/plugins/sitestatistics/siteuserinfo/ipt_project.py new file mode 100644 index 00000000..26af3202 --- /dev/null +++ b/app/plugins/sitestatistics/siteuserinfo/ipt_project.py @@ -0,0 +1,94 @@ +# -*- coding: utf-8 -*- +import re +from typing import Optional + +from lxml import etree + +from app.plugins.sitestatistics.siteuserinfo import ISiteUserInfo, SITE_BASE_ORDER +from app.utils.string import StringUtils +from app.utils.types import SiteSchema + + +class IptSiteUserInfo(ISiteUserInfo): + schema = SiteSchema.Ipt + order = SITE_BASE_ORDER + 35 + + @classmethod + def match(cls, html_text: str) -> bool: + return 'IPTorrents' in html_text + + def _parse_user_base_info(self, html_text: str): + html_text = self._prepare_html_text(html_text) + html = etree.HTML(html_text) + tmps = html.xpath('//a[contains(@href, "/u/")]//text()') + tmps_id = html.xpath('//a[contains(@href, "/u/")]/@href') + if tmps: + self.username = str(tmps[-1]) + if tmps_id: + user_id_match = re.search(r"/u/(\d+)", tmps_id[0]) + if user_id_match and user_id_match.group().strip(): + self.userid = user_id_match.group(1) + self._user_detail_page = f"user.php?u={self.userid}" + self._torrent_seeding_page = f"peers?u={self.userid}" + + tmps = html.xpath('//div[@class = "stats"]/div/div') + if tmps: + self.upload = StringUtils.num_filesize(str(tmps[0].xpath('span/text()')[1]).strip()) + self.download = StringUtils.num_filesize(str(tmps[0].xpath('span/text()')[2]).strip()) + self.seeding = StringUtils.str_int(tmps[0].xpath('a')[2].xpath('text()')[0]) + self.leeching = StringUtils.str_int(tmps[0].xpath('a')[2].xpath('text()')[1]) + self.ratio = StringUtils.str_float(str(tmps[0].xpath('span/text()')[0]).strip().replace('-', '0')) + self.bonus = StringUtils.str_float(tmps[0].xpath('a')[3].xpath('text()')[0]) + + def _parse_site_page(self, html_text: str): + # TODO + pass + + def _parse_user_detail_info(self, html_text: str): + html = etree.HTML(html_text) + if not html: + return + + user_levels_text = html.xpath('//tr/th[text()="Class"]/following-sibling::td[1]/text()') + if user_levels_text: + self.user_level = user_levels_text[0].strip() + + # 加入日期 + join_at_text = html.xpath('//tr/th[text()="Join date"]/following-sibling::td[1]/text()') + if join_at_text: + self.join_at = StringUtils.unify_datetime_str(join_at_text[0].split(' (')[0]) + + def _parse_user_torrent_seeding_info(self, html_text: str, multi_page: bool = False) -> Optional[str]: + html = etree.HTML(html_text) + if not html: + return + # seeding start + seeding_end_pos = 3 + if html.xpath('//tr/td[text() = "Leechers"]'): + seeding_end_pos = len(html.xpath('//tr/td[text() = "Leechers"]/../preceding-sibling::tr')) + 1 + seeding_end_pos = seeding_end_pos - 3 + + page_seeding = 0 + page_seeding_size = 0 + seeding_torrents = html.xpath('//tr/td[text() = "Seeders"]/../following-sibling::tr/td[position()=6]/text()') + if seeding_torrents: + page_seeding = seeding_end_pos + for per_size in seeding_torrents[:seeding_end_pos]: + if '(' in per_size and ')' in per_size: + per_size = per_size.split('(')[-1] + per_size = per_size.split(')')[0] + + page_seeding_size += StringUtils.num_filesize(per_size) + + self.seeding = page_seeding + self.seeding_size = page_seeding_size + + def _parse_user_traffic_info(self, html_text: str): + # TODO + pass + + def _parse_message_unread_links(self, html_text: str, msg_links: list) -> Optional[str]: + return None + + def _parse_message_content(self, html_text): + return None, None, None diff --git a/app/plugins/sitestatistics/siteuserinfo/nexus_php.py b/app/plugins/sitestatistics/siteuserinfo/nexus_php.py new file mode 100644 index 00000000..d9ade094 --- /dev/null +++ b/app/plugins/sitestatistics/siteuserinfo/nexus_php.py @@ -0,0 +1,401 @@ +# -*- coding: utf-8 -*- +import re +from typing import Optional + +from lxml import etree + +from app.log import logger +from app.plugins.sitestatistics.siteuserinfo import ISiteUserInfo, SITE_BASE_ORDER +from app.utils.string import StringUtils +from app.utils.types import SiteSchema + + +class NexusPhpSiteUserInfo(ISiteUserInfo): + schema = SiteSchema.NexusPhp + order = SITE_BASE_ORDER * 2 + + @classmethod + def match(cls, html_text: str) -> bool: + """ + 默认使用NexusPhp解析 + :param html_text: + :return: + """ + return True + + def _parse_site_page(self, html_text: str): + html_text = self._prepare_html_text(html_text) + + user_detail = re.search(r"userdetails.php\?id=(\d+)", html_text) + if user_detail and user_detail.group().strip(): + self._user_detail_page = user_detail.group().strip().lstrip('/') + self.userid = user_detail.group(1) + self._torrent_seeding_page = f"getusertorrentlistajax.php?userid={self.userid}&type=seeding" + else: + user_detail = re.search(r"(userdetails)", html_text) + if user_detail and user_detail.group().strip(): + self._user_detail_page = user_detail.group().strip().lstrip('/') + self.userid = None + self._torrent_seeding_page = None + + def _parse_message_unread(self, html_text): + """ + 解析未读短消息数量 + :param html_text: + :return: + """ + html = etree.HTML(html_text) + if not html: + return + + message_labels = html.xpath('//a[@href="messages.php"]/..') + message_labels.extend(html.xpath('//a[contains(@href, "messages.php")]/..')) + if message_labels: + message_text = message_labels[0].xpath("string(.)") + + logger.debug(f"{self.site_name} 消息原始信息 {message_text}") + message_unread_match = re.findall(r"[^Date](信息箱\s*|\(|你有\xa0)(\d+)", message_text) + + if message_unread_match and len(message_unread_match[-1]) == 2: + self.message_unread = StringUtils.str_int(message_unread_match[-1][1]) + elif message_text.isdigit(): + self.message_unread = StringUtils.str_int(message_text) + + def _parse_user_base_info(self, html_text: str): + # 合并解析,减少额外请求调用 + self.__parse_user_traffic_info(html_text) + self._user_traffic_page = None + + self._parse_message_unread(html_text) + + html = etree.HTML(html_text) + if not html: + return + + ret = html.xpath(f'//a[contains(@href, "userdetails") and contains(@href, "{self.userid}")]//b//text()') + if ret: + self.username = str(ret[0]) + return + ret = html.xpath(f'//a[contains(@href, "userdetails") and contains(@href, "{self.userid}")]//text()') + if ret: + self.username = str(ret[0]) + + ret = html.xpath('//a[contains(@href, "userdetails")]//strong//text()') + if ret: + self.username = str(ret[0]) + return + + def __parse_user_traffic_info(self, html_text): + html_text = self._prepare_html_text(html_text) + upload_match = re.search(r"[^总]上[传傳]量?[::_<>/a-zA-Z-=\"'\s#;]+([\d,.\s]+[KMGTPI]*B)", html_text, + re.IGNORECASE) + self.upload = StringUtils.num_filesize(upload_match.group(1).strip()) if upload_match else 0 + download_match = re.search(r"[^总子影力]下[载載]量?[::_<>/a-zA-Z-=\"'\s#;]+([\d,.\s]+[KMGTPI]*B)", html_text, + re.IGNORECASE) + self.download = StringUtils.num_filesize(download_match.group(1).strip()) if download_match else 0 + ratio_match = re.search(r"分享率[::_<>/a-zA-Z-=\"'\s#;]+([\d,.\s]+)", html_text) + # 计算分享率 + calc_ratio = 0.0 if self.download <= 0.0 else round(self.upload / self.download, 3) + # 优先使用页面上的分享率 + self.ratio = StringUtils.str_float(ratio_match.group(1)) if ( + ratio_match and ratio_match.group(1).strip()) else calc_ratio + leeching_match = re.search(r"(Torrents leeching|下载中)[\u4E00-\u9FA5\D\s]+(\d+)[\s\S]+<", html_text) + self.leeching = StringUtils.str_int(leeching_match.group(2)) if leeching_match and leeching_match.group( + 2).strip() else 0 + html = etree.HTML(html_text) + has_ucoin, self.bonus = self.__parse_ucoin(html) + if has_ucoin: + return + tmps = html.xpath('//a[contains(@href,"mybonus")]/text()') if html else None + if tmps: + bonus_text = str(tmps[0]).strip() + bonus_match = re.search(r"([\d,.]+)", bonus_text) + if bonus_match and bonus_match.group(1).strip(): + self.bonus = StringUtils.str_float(bonus_match.group(1)) + return + bonus_match = re.search(r"mybonus.[\[\]::<>/a-zA-Z_\-=\"'\s#;.(使用魔力值豆]+\s*([\d,.]+)[<()&\s]", html_text) + try: + if bonus_match and bonus_match.group(1).strip(): + self.bonus = StringUtils.str_float(bonus_match.group(1)) + return + bonus_match = re.search(r"[魔力值|\]][\[\]::<>/a-zA-Z_\-=\"'\s#;]+\s*([\d,.]+|\"[\d,.]+\")[<>()&\s]", + html_text, + flags=re.S) + if bonus_match and bonus_match.group(1).strip(): + self.bonus = StringUtils.str_float(bonus_match.group(1).strip('"')) + except Exception as err: + logger.error(f"{self.site_name} 解析魔力值出错, 错误信息: {err}") + + @staticmethod + def __parse_ucoin(html): + """ + 解析ucoin, 统一转换为铜币 + :param html: + :return: + """ + if html: + gold, silver, copper = None, None, None + + golds = html.xpath('//span[@class = "ucoin-symbol ucoin-gold"]//text()') + if golds: + gold = StringUtils.str_float(str(golds[-1])) + silvers = html.xpath('//span[@class = "ucoin-symbol ucoin-silver"]//text()') + if silvers: + silver = StringUtils.str_float(str(silvers[-1])) + coppers = html.xpath('//span[@class = "ucoin-symbol ucoin-copper"]//text()') + if coppers: + copper = StringUtils.str_float(str(coppers[-1])) + if gold or silver or copper: + gold = gold if gold else 0 + silver = silver if silver else 0 + copper = copper if copper else 0 + return True, gold * 100 * 100 + silver * 100 + copper + return False, 0.0 + + def _parse_user_traffic_info(self, html_text: str): + """ + 上传/下载/分享率 [做种数/魔力值] + :param html_text: + :return: + """ + pass + + def _parse_user_torrent_seeding_info(self, html_text: str, multi_page: bool = False) -> Optional[str]: + """ + 做种相关信息 + :param html_text: + :param multi_page: 是否多页数据 + :return: 下页地址 + """ + html = etree.HTML(str(html_text).replace(r'\/', '/')) + if not html: + return None + + # 首页存在扩展链接,使用扩展链接 + seeding_url_text = html.xpath('//a[contains(@href,"torrents.php") ' + 'and contains(@href,"seeding")]/@href') + if multi_page is False and seeding_url_text and seeding_url_text[0].strip(): + self._torrent_seeding_page = seeding_url_text[0].strip() + return self._torrent_seeding_page + + size_col = 3 + seeders_col = 4 + # 搜索size列 + size_col_xpath = '//tr[position()=1]/' \ + 'td[(img[@class="size"] and img[@alt="size"])' \ + ' or (text() = "大小")' \ + ' or (a/img[@class="size" and @alt="size"])]' + if html.xpath(size_col_xpath): + size_col = len(html.xpath(f'{size_col_xpath}/preceding-sibling::td')) + 1 + # 搜索seeders列 + seeders_col_xpath = '//tr[position()=1]/' \ + 'td[(img[@class="seeders"] and img[@alt="seeders"])' \ + ' or (text() = "在做种")' \ + ' or (a/img[@class="seeders" and @alt="seeders"])]' + if html.xpath(seeders_col_xpath): + seeders_col = len(html.xpath(f'{seeders_col_xpath}/preceding-sibling::td')) + 1 + + page_seeding = 0 + page_seeding_size = 0 + page_seeding_info = [] + # 如果 table class="torrents",则增加table[@class="torrents"] + table_class = '//table[@class="torrents"]' if html.xpath('//table[@class="torrents"]') else '' + seeding_sizes = html.xpath(f'{table_class}//tr[position()>1]/td[{size_col}]') + seeding_seeders = html.xpath(f'{table_class}//tr[position()>1]/td[{seeders_col}]/b/a/text()') + if not seeding_seeders: + seeding_seeders = html.xpath(f'{table_class}//tr[position()>1]/td[{seeders_col}]//text()') + if seeding_sizes and seeding_seeders: + page_seeding = len(seeding_sizes) + + for i in range(0, len(seeding_sizes)): + size = StringUtils.num_filesize(seeding_sizes[i].xpath("string(.)").strip()) + seeders = StringUtils.str_int(seeding_seeders[i]) + + page_seeding_size += size + page_seeding_info.append([seeders, size]) + + self.seeding += page_seeding + self.seeding_size += page_seeding_size + self.seeding_info.extend(page_seeding_info) + + # 是否存在下页数据 + next_page = None + next_page_text = html.xpath('//a[contains(.//text(), "下一页") or contains(.//text(), "下一頁")]/@href') + if next_page_text: + next_page = next_page_text[-1].strip() + # fix up page url + if self.userid not in next_page: + next_page = f'{next_page}&userid={self.userid}&type=seeding' + + return next_page + + def _parse_user_detail_info(self, html_text: str): + """ + 解析用户额外信息,加入时间,等级 + :param html_text: + :return: + """ + html = etree.HTML(html_text) + if not html: + return + + self.__get_user_level(html) + + self.__fixup_traffic_info(html) + + # 加入日期 + join_at_text = html.xpath( + '//tr/td[text()="加入日期" or text()="注册日期" or *[text()="加入日期"]]/following-sibling::td[1]//text()' + '|//div/b[text()="加入日期"]/../text()') + if join_at_text: + self.join_at = StringUtils.unify_datetime_str(join_at_text[0].split(' (')[0].strip()) + + # 做种体积 & 做种数 + # seeding 页面获取不到的话,此处再获取一次 + seeding_sizes = html.xpath('//tr/td[text()="当前上传"]/following-sibling::td[1]//' + 'table[tr[1][td[4 and text()="尺寸"]]]//tr[position()>1]/td[4]') + seeding_seeders = html.xpath('//tr/td[text()="当前上传"]/following-sibling::td[1]//' + 'table[tr[1][td[5 and text()="做种者"]]]//tr[position()>1]/td[5]//text()') + tmp_seeding = len(seeding_sizes) + tmp_seeding_size = 0 + tmp_seeding_info = [] + for i in range(0, len(seeding_sizes)): + size = StringUtils.num_filesize(seeding_sizes[i].xpath("string(.)").strip()) + seeders = StringUtils.str_int(seeding_seeders[i]) + + tmp_seeding_size += size + tmp_seeding_info.append([seeders, size]) + + if not self.seeding_size: + self.seeding_size = tmp_seeding_size + if not self.seeding: + self.seeding = tmp_seeding + if not self.seeding_info: + self.seeding_info = tmp_seeding_info + + seeding_sizes = html.xpath('//tr/td[text()="做种统计"]/following-sibling::td[1]//text()') + if seeding_sizes: + seeding_match = re.search(r"总做种数:\s+(\d+)", seeding_sizes[0], re.IGNORECASE) + seeding_size_match = re.search(r"总做种体积:\s+([\d,.\s]+[KMGTPI]*B)", seeding_sizes[0], re.IGNORECASE) + tmp_seeding = StringUtils.str_int(seeding_match.group(1)) if ( + seeding_match and seeding_match.group(1)) else 0 + tmp_seeding_size = StringUtils.num_filesize( + seeding_size_match.group(1).strip()) if seeding_size_match else 0 + if not self.seeding_size: + self.seeding_size = tmp_seeding_size + if not self.seeding: + self.seeding = tmp_seeding + + self.__fixup_torrent_seeding_page(html) + + def __fixup_torrent_seeding_page(self, html): + """ + 修正种子页面链接 + :param html: + :return: + """ + # 单独的种子页面 + seeding_url_text = html.xpath('//a[contains(@href,"getusertorrentlist.php") ' + 'and contains(@href,"seeding")]/@href') + if seeding_url_text: + self._torrent_seeding_page = seeding_url_text[0].strip() + # 从JS调用种获取用户ID + seeding_url_text = html.xpath('//a[contains(@href, "javascript: getusertorrentlistajax") ' + 'and contains(@href,"seeding")]/@href') + csrf_text = html.xpath('//meta[@name="x-csrf"]/@content') + if not self._torrent_seeding_page and seeding_url_text: + user_js = re.search(r"javascript: getusertorrentlistajax\(\s*'(\d+)", seeding_url_text[0]) + if user_js and user_js.group(1).strip(): + self.userid = user_js.group(1).strip() + self._torrent_seeding_page = f"getusertorrentlistajax.php?userid={self.userid}&type=seeding" + elif seeding_url_text and csrf_text: + if csrf_text[0].strip(): + self._torrent_seeding_page \ + = f"ajax_getusertorrentlist.php" + self._torrent_seeding_params = {'userid': self.userid, 'type': 'seeding', 'csrf': csrf_text[0].strip()} + + # 分类做种模式 + # 临时屏蔽 + # seeding_url_text = html.xpath('//tr/td[text()="当前做种"]/following-sibling::td[1]' + # '/table//td/a[contains(@href,"seeding")]/@href') + # if seeding_url_text: + # self._torrent_seeding_page = seeding_url_text + + def __get_user_level(self, html): + # 等级 获取同一行等级数据,图片格式等级,取title信息,否则取文本信息 + user_levels_text = html.xpath('//tr/td[text()="等級" or text()="等级" or *[text()="等级"]]/' + 'following-sibling::td[1]/img[1]/@title') + if user_levels_text: + self.user_level = user_levels_text[0].strip() + return + + user_levels_text = html.xpath('//tr/td[text()="等級" or text()="等级"]/' + 'following-sibling::td[1 and not(img)]' + '|//tr/td[text()="等級" or text()="等级"]/' + 'following-sibling::td[1 and img[not(@title)]]') + if user_levels_text: + self.user_level = user_levels_text[0].xpath("string(.)").strip() + return + + user_levels_text = html.xpath('//tr/td[text()="等級" or text()="等级"]/' + 'following-sibling::td[1]') + if user_levels_text: + self.user_level = user_levels_text[0].xpath("string(.)").strip() + return + + user_levels_text = html.xpath('//a[contains(@href, "userdetails")]/text()') + if not self.user_level and user_levels_text: + for user_level_text in user_levels_text: + user_level_match = re.search(r"\[(.*)]", user_level_text) + if user_level_match and user_level_match.group(1).strip(): + self.user_level = user_level_match.group(1).strip() + break + + def _parse_message_unread_links(self, html_text: str, msg_links: list) -> Optional[str]: + html = etree.HTML(html_text) + if not html: + return None + + message_links = html.xpath('//tr[not(./td/img[@alt="Read"])]/td/a[contains(@href, "viewmessage")]/@href') + msg_links.extend(message_links) + # 是否存在下页数据 + next_page = None + next_page_text = html.xpath('//a[contains(.//text(), "下一页") or contains(.//text(), "下一頁")]/@href') + if next_page_text: + next_page = next_page_text[-1].strip() + + return next_page + + def _parse_message_content(self, html_text): + html = etree.HTML(html_text) + if not html: + return None, None, None + # 标题 + message_head_text = None + message_head = html.xpath('//h1/text()' + '|//div[@class="layui-card-header"]/span[1]/text()') + if message_head: + message_head_text = message_head[-1].strip() + + # 消息时间 + message_date_text = None + message_date = html.xpath('//h1/following-sibling::table[.//tr/td[@class="colhead"]]//tr[2]/td[2]' + '|//div[@class="layui-card-header"]/span[2]/span[2]') + if message_date: + message_date_text = message_date[0].xpath("string(.)").strip() + + # 消息内容 + message_content_text = None + message_content = html.xpath('//h1/following-sibling::table[.//tr/td[@class="colhead"]]//tr[3]/td' + '|//div[contains(@class,"layui-card-body")]') + if message_content: + message_content_text = message_content[0].xpath("string(.)").strip() + + return message_head_text, message_date_text, message_content_text + + def __fixup_traffic_info(self, html): + # fixup bonus + if not self.bonus: + bonus_text = html.xpath('//tr/td[text()="魔力值" or text()="猫粮"]/following-sibling::td[1]/text()') + if bonus_text: + self.bonus = StringUtils.str_float(bonus_text[0].strip()) diff --git a/app/plugins/sitestatistics/siteuserinfo/nexus_project.py b/app/plugins/sitestatistics/siteuserinfo/nexus_project.py new file mode 100644 index 00000000..54c49fe5 --- /dev/null +++ b/app/plugins/sitestatistics/siteuserinfo/nexus_project.py @@ -0,0 +1,25 @@ +# -*- coding: utf-8 -*- +import re + +from app.plugins.sitestatistics.siteuserinfo import SITE_BASE_ORDER +from app.plugins.sitestatistics.siteuserinfo.nexus_php import NexusPhpSiteUserInfo +from app.utils.types import SiteSchema + + +class NexusProjectSiteUserInfo(NexusPhpSiteUserInfo): + schema = SiteSchema.NexusProject + order = SITE_BASE_ORDER + 25 + + @classmethod + def match(cls, html_text: str) -> bool: + return 'Nexus Project' in html_text + + def _parse_site_page(self, html_text: str): + html_text = self._prepare_html_text(html_text) + + user_detail = re.search(r"userdetails.php\?id=(\d+)", html_text) + if user_detail and user_detail.group().strip(): + self._user_detail_page = user_detail.group().strip().lstrip('/') + self.userid = user_detail.group(1) + + self._torrent_seeding_page = f"viewusertorrents.php?id={self.userid}&show=seeding" diff --git a/app/plugins/sitestatistics/siteuserinfo/nexus_rabbit.py b/app/plugins/sitestatistics/siteuserinfo/nexus_rabbit.py new file mode 100644 index 00000000..07f865ef --- /dev/null +++ b/app/plugins/sitestatistics/siteuserinfo/nexus_rabbit.py @@ -0,0 +1,58 @@ +# -*- coding: utf-8 -*- +import json +from typing import Optional + +from lxml import etree + +from app.log import logger +from app.plugins.sitestatistics.siteuserinfo import SITE_BASE_ORDER +from app.plugins.sitestatistics.siteuserinfo.nexus_php import NexusPhpSiteUserInfo +from app.utils.types import SiteSchema + + +class NexusRabbitSiteUserInfo(NexusPhpSiteUserInfo): + schema = SiteSchema.NexusRabbit + order = SITE_BASE_ORDER + 5 + + @classmethod + def match(cls, html_text: str) -> bool: + html = etree.HTML(html_text) + if not html: + return False + + printable_text = html.xpath("string(.)") if html else "" + return 'Style by Rabbit' in printable_text + + def _parse_site_page(self, html_text: str): + super()._parse_site_page(html_text) + self._torrent_seeding_page = f"getusertorrentlistajax.php?page=1&limit=5000000&type=seeding&uid={self.userid}" + self._torrent_seeding_headers = {"Accept": "application/json, text/javascript, */*; q=0.01"} + + def _parse_user_torrent_seeding_info(self, html_text: str, multi_page: bool = False) -> Optional[str]: + """ + 做种相关信息 + :param html_text: + :param multi_page: 是否多页数据 + :return: 下页地址 + """ + + try: + torrents = json.loads(html_text).get('data') + except Exception as e: + logger.error(f"解析做种信息失败: {e}") + return + + page_seeding_size = 0 + page_seeding_info = [] + + page_seeding = len(torrents) + for torrent in torrents: + seeders = int(torrent.get('seeders', 0)) + size = int(torrent.get('size', 0)) + page_seeding_size += int(torrent.get('size', 0)) + + page_seeding_info.append([seeders, size]) + + self.seeding += page_seeding + self.seeding_size += page_seeding_size + self.seeding_info.extend(page_seeding_info) diff --git a/app/plugins/sitestatistics/siteuserinfo/small_horse.py b/app/plugins/sitestatistics/siteuserinfo/small_horse.py new file mode 100644 index 00000000..5a4cf8ff --- /dev/null +++ b/app/plugins/sitestatistics/siteuserinfo/small_horse.py @@ -0,0 +1,111 @@ +# -*- coding: utf-8 -*- +import re +from typing import Optional + +from lxml import etree + +from app.plugins.sitestatistics.siteuserinfo import ISiteUserInfo, SITE_BASE_ORDER +from app.utils.string import StringUtils +from app.utils.types import SiteSchema + + +class SmallHorseSiteUserInfo(ISiteUserInfo): + schema = SiteSchema.SmallHorse + order = SITE_BASE_ORDER + 30 + + @classmethod + def match(cls, html_text: str) -> bool: + return 'Small Horse' in html_text + + def _parse_site_page(self, html_text: str): + html_text = self._prepare_html_text(html_text) + + user_detail = re.search(r"user.php\?id=(\d+)", html_text) + if user_detail and user_detail.group().strip(): + self._user_detail_page = user_detail.group().strip().lstrip('/') + self.userid = user_detail.group(1) + self._torrent_seeding_page = f"torrents.php?type=seeding&userid={self.userid}" + self._user_traffic_page = f"user.php?id={self.userid}" + + def _parse_user_base_info(self, html_text: str): + html_text = self._prepare_html_text(html_text) + html = etree.HTML(html_text) + ret = html.xpath('//a[contains(@href, "user.php")]//text()') + if ret: + self.username = str(ret[0]) + + def _parse_user_traffic_info(self, html_text: str): + """ + 上传/下载/分享率 [做种数/魔力值] + :param html_text: + :return: + """ + html_text = self._prepare_html_text(html_text) + html = etree.HTML(html_text) + tmps = html.xpath('//ul[@class = "stats nobullet"]') + if tmps: + if tmps[1].xpath("li") and tmps[1].xpath("li")[0].xpath("span//text()"): + self.join_at = StringUtils.unify_datetime_str(tmps[1].xpath("li")[0].xpath("span//text()")[0]) + self.upload = StringUtils.num_filesize(str(tmps[1].xpath("li")[2].xpath("text()")[0]).split(":")[1].strip()) + self.download = StringUtils.num_filesize( + str(tmps[1].xpath("li")[3].xpath("text()")[0]).split(":")[1].strip()) + if tmps[1].xpath("li")[4].xpath("span//text()"): + self.ratio = StringUtils.str_float(str(tmps[1].xpath("li")[4].xpath("span//text()")[0]).replace('∞', '0')) + else: + self.ratio = StringUtils.str_float(str(tmps[1].xpath("li")[5].xpath("text()")[0]).split(":")[1]) + self.bonus = StringUtils.str_float(str(tmps[1].xpath("li")[5].xpath("text()")[0]).split(":")[1]) + self.user_level = str(tmps[3].xpath("li")[0].xpath("text()")[0]).split(":")[1].strip() + self.leeching = StringUtils.str_int( + (tmps[4].xpath("li")[6].xpath("text()")[0]).split(":")[1].replace("[", "")) + + def _parse_user_detail_info(self, html_text: str): + pass + + def _parse_user_torrent_seeding_info(self, html_text: str, multi_page: bool = False) -> Optional[str]: + """ + 做种相关信息 + :param html_text: + :param multi_page: 是否多页数据 + :return: 下页地址 + """ + html = etree.HTML(html_text) + if not html: + return None + + size_col = 6 + seeders_col = 8 + + page_seeding = 0 + page_seeding_size = 0 + page_seeding_info = [] + seeding_sizes = html.xpath(f'//table[@id="torrent_table"]//tr[position()>1]/td[{size_col}]') + seeding_seeders = html.xpath(f'//table[@id="torrent_table"]//tr[position()>1]/td[{seeders_col}]') + if seeding_sizes and seeding_seeders: + page_seeding = len(seeding_sizes) + + for i in range(0, len(seeding_sizes)): + size = StringUtils.num_filesize(seeding_sizes[i].xpath("string(.)").strip()) + seeders = StringUtils.str_int(seeding_seeders[i].xpath("string(.)").strip()) + + page_seeding_size += size + page_seeding_info.append([seeders, size]) + + self.seeding += page_seeding + self.seeding_size += page_seeding_size + self.seeding_info.extend(page_seeding_info) + + # 是否存在下页数据 + next_page = None + next_pages = html.xpath('//ul[@class="pagination"]/li[contains(@class,"active")]/following-sibling::li') + if next_pages and len(next_pages) > 1: + page_num = next_pages[0].xpath("string(.)").strip() + if page_num.isdigit(): + next_page = f"{self._torrent_seeding_page}&page={page_num}" + + return next_page + + def _parse_message_unread_links(self, html_text: str, msg_links: list) -> Optional[str]: + return None + + def _parse_message_content(self, html_text): + return None, None, None diff --git a/app/plugins/sitestatistics/siteuserinfo/tnode.py b/app/plugins/sitestatistics/siteuserinfo/tnode.py new file mode 100644 index 00000000..3ca99e3f --- /dev/null +++ b/app/plugins/sitestatistics/siteuserinfo/tnode.py @@ -0,0 +1,104 @@ +# -*- coding: utf-8 -*- +import json +import re +from typing import Optional + +from app.plugins.sitestatistics.siteuserinfo import ISiteUserInfo, SITE_BASE_ORDER +from app.utils.string import StringUtils +from app.utils.types import SiteSchema + + +class TNodeSiteUserInfo(ISiteUserInfo): + schema = SiteSchema.TNode + order = SITE_BASE_ORDER + 60 + + @classmethod + def match(cls, html_text: str) -> bool: + return 'Powered By TNode' in html_text + + def _parse_site_page(self, html_text: str): + html_text = self._prepare_html_text(html_text) + + # + csrf_token = re.search(r'', html_text) + if csrf_token: + self._addition_headers = {'X-CSRF-TOKEN': csrf_token.group(1)} + self._user_detail_page = "api/user/getMainInfo" + self._torrent_seeding_page = "api/user/listTorrentActivity?id=&type=seeding&page=1&size=20000" + + def _parse_logged_in(self, html_text): + """ + 判断是否登录成功, 通过判断是否存在用户信息 + 暂时跳过检测,待后续优化 + :param html_text: + :return: + """ + return True + + def _parse_user_base_info(self, html_text: str): + self.username = self.userid + + def _parse_user_traffic_info(self, html_text: str): + pass + + def _parse_user_detail_info(self, html_text: str): + detail = json.loads(html_text) + if detail.get("status") != 200: + return + + user_info = detail.get("data", {}) + self.userid = user_info.get("id") + self.username = user_info.get("username") + self.user_level = user_info.get("class", {}).get("name") + self.join_at = user_info.get("regTime", 0) + self.join_at = StringUtils.unify_datetime_str(str(self.join_at)) + + self.upload = user_info.get("upload") + self.download = user_info.get("download") + self.ratio = 0 if self.download <= 0 else round(self.upload / self.download, 3) + self.bonus = user_info.get("bonus") + + self.message_unread = user_info.get("unreadAdmin", 0) + user_info.get("unreadInbox", 0) + user_info.get( + "unreadSystem", 0) + pass + + def _parse_user_torrent_seeding_info(self, html_text: str, multi_page: bool = False) -> Optional[str]: + """ + 解析用户做种信息 + """ + seeding_info = json.loads(html_text) + if seeding_info.get("status") != 200: + return + + torrents = seeding_info.get("data", {}).get("torrents", []) + + page_seeding_size = 0 + page_seeding_info = [] + for torrent in torrents: + size = torrent.get("size", 0) + seeders = torrent.get("seeding", 0) + + page_seeding_size += size + page_seeding_info.append([seeders, size]) + + self.seeding += len(torrents) + self.seeding_size += page_seeding_size + self.seeding_info.extend(page_seeding_info) + + # 是否存在下页数据 + next_page = None + + return next_page + + def _parse_message_unread_links(self, html_text: str, msg_links: list) -> Optional[str]: + return None + + def _parse_message_content(self, html_text): + """ + 系统信息 api/message/listSystem?page=1&size=20 + 收件箱信息 api/message/listInbox?page=1&size=20 + 管理员信息 api/message/listAdmin?page=1&size=20 + :param html_text: + :return: + """ + return None, None, None diff --git a/app/plugins/sitestatistics/siteuserinfo/torrent_leech.py b/app/plugins/sitestatistics/siteuserinfo/torrent_leech.py new file mode 100644 index 00000000..72431d13 --- /dev/null +++ b/app/plugins/sitestatistics/siteuserinfo/torrent_leech.py @@ -0,0 +1,110 @@ +# -*- coding: utf-8 -*- +import re +from typing import Optional + +from lxml import etree + +from app.plugins.sitestatistics.siteuserinfo import ISiteUserInfo, SITE_BASE_ORDER +from app.utils.string import StringUtils +from app.utils.types import SiteSchema + + +class TorrentLeechSiteUserInfo(ISiteUserInfo): + schema = SiteSchema.TorrentLeech + order = SITE_BASE_ORDER + 40 + + @classmethod + def match(cls, html_text: str) -> bool: + return 'TorrentLeech' in html_text + + def _parse_site_page(self, html_text: str): + html_text = self._prepare_html_text(html_text) + + user_detail = re.search(r"/profile/([^/]+)/", html_text) + if user_detail and user_detail.group().strip(): + self._user_detail_page = user_detail.group().strip().lstrip('/') + self.userid = user_detail.group(1) + self._user_traffic_page = f"profile/{self.userid}/view" + self._torrent_seeding_page = f"profile/{self.userid}/seeding" + + def _parse_user_base_info(self, html_text: str): + self.username = self.userid + + def _parse_user_traffic_info(self, html_text: str): + """ + 上传/下载/分享率 [做种数/魔力值] + :param html_text: + :return: + """ + html_text = self._prepare_html_text(html_text) + html = etree.HTML(html_text) + upload_html = html.xpath('//div[contains(@class,"profile-uploaded")]//span/text()') + if upload_html: + self.upload = StringUtils.num_filesize(upload_html[0]) + download_html = html.xpath('//div[contains(@class,"profile-downloaded")]//span/text()') + if download_html: + self.download = StringUtils.num_filesize(download_html[0]) + ratio_html = html.xpath('//div[contains(@class,"profile-ratio")]//span/text()') + if ratio_html: + self.ratio = StringUtils.str_float(ratio_html[0].replace('∞', '0')) + + user_level_html = html.xpath('//table[contains(@class, "profileViewTable")]' + '//tr/td[text()="Class"]/following-sibling::td/text()') + if user_level_html: + self.user_level = user_level_html[0].strip() + + join_at_html = html.xpath('//table[contains(@class, "profileViewTable")]' + '//tr/td[text()="Registration date"]/following-sibling::td/text()') + if join_at_html: + self.join_at = StringUtils.unify_datetime_str(join_at_html[0].strip()) + + bonus_html = html.xpath('//span[contains(@class, "total-TL-points")]/text()') + if bonus_html: + self.bonus = StringUtils.str_float(bonus_html[0].strip()) + + def _parse_user_detail_info(self, html_text: str): + pass + + def _parse_user_torrent_seeding_info(self, html_text: str, multi_page: bool = False) -> Optional[str]: + """ + 做种相关信息 + :param html_text: + :param multi_page: 是否多页数据 + :return: 下页地址 + """ + html = etree.HTML(html_text) + if not html: + return None + + size_col = 2 + seeders_col = 7 + + page_seeding = 0 + page_seeding_size = 0 + page_seeding_info = [] + seeding_sizes = html.xpath(f'//tbody/tr/td[{size_col}]') + seeding_seeders = html.xpath(f'//tbody/tr/td[{seeders_col}]/text()') + if seeding_sizes and seeding_seeders: + page_seeding = len(seeding_sizes) + + for i in range(0, len(seeding_sizes)): + size = StringUtils.num_filesize(seeding_sizes[i].xpath("string(.)").strip()) + seeders = StringUtils.str_int(seeding_seeders[i]) + + page_seeding_size += size + page_seeding_info.append([seeders, size]) + + self.seeding += page_seeding + self.seeding_size += page_seeding_size + self.seeding_info.extend(page_seeding_info) + + # 是否存在下页数据 + next_page = None + + return next_page + + def _parse_message_unread_links(self, html_text: str, msg_links: list) -> Optional[str]: + return None + + def _parse_message_content(self, html_text): + return None, None, None diff --git a/app/plugins/sitestatistics/siteuserinfo/unit3d.py b/app/plugins/sitestatistics/siteuserinfo/unit3d.py new file mode 100644 index 00000000..a03430ac --- /dev/null +++ b/app/plugins/sitestatistics/siteuserinfo/unit3d.py @@ -0,0 +1,131 @@ +# -*- coding: utf-8 -*- +import re +from typing import Optional + +from lxml import etree + +from app.plugins.sitestatistics.siteuserinfo import ISiteUserInfo, SITE_BASE_ORDER +from app.utils.string import StringUtils +from app.utils.types import SiteSchema + + +class Unit3dSiteUserInfo(ISiteUserInfo): + schema = SiteSchema.Unit3d + order = SITE_BASE_ORDER + 15 + + @classmethod + def match(cls, html_text: str) -> bool: + return "unit3d.js" in html_text + + def _parse_user_base_info(self, html_text: str): + html_text = self._prepare_html_text(html_text) + html = etree.HTML(html_text) + + tmps = html.xpath('//a[contains(@href, "/users/") and contains(@href, "settings")]/@href') + if tmps: + user_name_match = re.search(r"/users/(.+)/settings", tmps[0]) + if user_name_match and user_name_match.group().strip(): + self.username = user_name_match.group(1) + self._torrent_seeding_page = f"/users/{self.username}/active?perPage=100&client=&seeding=include" + self._user_detail_page = f"/users/{self.username}" + + tmps = html.xpath('//a[contains(@href, "bonus/earnings")]') + if tmps: + bonus_text = tmps[0].xpath("string(.)") + bonus_match = re.search(r"([\d,.]+)", bonus_text) + if bonus_match and bonus_match.group(1).strip(): + self.bonus = StringUtils.str_float(bonus_match.group(1)) + + def _parse_site_page(self, html_text: str): + # TODO + pass + + def _parse_user_detail_info(self, html_text: str): + """ + 解析用户额外信息,加入时间,等级 + :param html_text: + :return: + """ + html = etree.HTML(html_text) + if not html: + return None + + # 用户等级 + user_levels_text = html.xpath('//div[contains(@class, "content")]//span[contains(@class, "badge-user")]/text()') + if user_levels_text: + self.user_level = user_levels_text[0].strip() + + # 加入日期 + join_at_text = html.xpath('//div[contains(@class, "content")]//h4[contains(text(), "注册日期") ' + 'or contains(text(), "註冊日期") ' + 'or contains(text(), "Registration date")]/text()') + if join_at_text: + self.join_at = StringUtils.unify_datetime_str( + join_at_text[0].replace('注册日期', '').replace('註冊日期', '').replace('Registration date', '')) + + def _parse_user_torrent_seeding_info(self, html_text: str, multi_page: bool = False) -> Optional[str]: + """ + 做种相关信息 + :param html_text: + :param multi_page: 是否多页数据 + :return: 下页地址 + """ + html = etree.HTML(html_text) + if not html: + return None + + size_col = 9 + seeders_col = 2 + # 搜索size列 + if html.xpath('//thead//th[contains(@class,"size")]'): + size_col = len(html.xpath('//thead//th[contains(@class,"size")][1]/preceding-sibling::th')) + 1 + # 搜索seeders列 + if html.xpath('//thead//th[contains(@class,"seeders")]'): + seeders_col = len(html.xpath('//thead//th[contains(@class,"seeders")]/preceding-sibling::th')) + 1 + + page_seeding = 0 + page_seeding_size = 0 + page_seeding_info = [] + seeding_sizes = html.xpath(f'//tr[position()]/td[{size_col}]') + seeding_seeders = html.xpath(f'//tr[position()]/td[{seeders_col}]') + if seeding_sizes and seeding_seeders: + page_seeding = len(seeding_sizes) + + for i in range(0, len(seeding_sizes)): + size = StringUtils.num_filesize(seeding_sizes[i].xpath("string(.)").strip()) + seeders = StringUtils.str_int(seeding_seeders[i].xpath("string(.)").strip()) + + page_seeding_size += size + page_seeding_info.append([seeders, size]) + + self.seeding += page_seeding + self.seeding_size += page_seeding_size + self.seeding_info.extend(page_seeding_info) + + # 是否存在下页数据 + next_page = None + next_pages = html.xpath('//ul[@class="pagination"]/li[contains(@class,"active")]/following-sibling::li') + if next_pages and len(next_pages) > 1: + page_num = next_pages[0].xpath("string(.)").strip() + if page_num.isdigit(): + next_page = f"{self._torrent_seeding_page}&page={page_num}" + + return next_page + + def _parse_user_traffic_info(self, html_text: str): + html_text = self._prepare_html_text(html_text) + upload_match = re.search(r"[^总]上[传傳]量?[::_<>/a-zA-Z-=\"'\s#;]+([\d,.\s]+[KMGTPI]*B)", html_text, + re.IGNORECASE) + self.upload = StringUtils.num_filesize(upload_match.group(1).strip()) if upload_match else 0 + download_match = re.search(r"[^总子影力]下[载載]量?[::_<>/a-zA-Z-=\"'\s#;]+([\d,.\s]+[KMGTPI]*B)", html_text, + re.IGNORECASE) + self.download = StringUtils.num_filesize(download_match.group(1).strip()) if download_match else 0 + ratio_match = re.search(r"分享率[::_<>/a-zA-Z-=\"'\s#;]+([\d,.\s]+)", html_text) + self.ratio = StringUtils.str_float(ratio_match.group(1)) if ( + ratio_match and ratio_match.group(1).strip()) else 0.0 + + def _parse_message_unread_links(self, html_text: str, msg_links: list) -> Optional[str]: + return None + + def _parse_message_content(self, html_text): + return None, None, None diff --git a/app/schemas/site.py b/app/schemas/site.py index 22caac9a..b264cc0e 100644 --- a/app/schemas/site.py +++ b/app/schemas/site.py @@ -12,6 +12,7 @@ class Site(BaseModel): rss: Optional[str] = None cookie: Optional[str] = None ua: Optional[str] = None + proxy: Optional[int] = 0 filter: Optional[str] = None note: Optional[str] = None limit_interval: Optional[int] = 0 diff --git a/app/utils/types.py b/app/utils/types.py index 0cbee166..f2296cab 100644 --- a/app/utils/types.py +++ b/app/utils/types.py @@ -17,9 +17,26 @@ class EventType(Enum): PluginReload = "plugin.reload" # 执行命令 CommandExcute = "command.excute" + # 站点签到 + SiteSignin = "site.signin" # 系统配置Key字典 class SystemConfigKey(Enum): # 用户已安装的插件 UserInstalledPlugins = "UserInstalledPlugins" + + +# 站点框架 +class SiteSchema(Enum): + DiscuzX = "Discuz!" + Gazelle = "Gazelle" + Ipt = "IPTorrents" + NexusPhp = "NexusPhp" + NexusProject = "NexusProject" + NexusRabbit = "NexusRabbit" + SmallHorse = "Small Horse" + Unit3d = "Unit3d" + TorrentLeech = "TorrentLeech" + FileList = "FileList" + TNode = "TNode" diff --git a/requirements.txt b/requirements.txt index 61cb5516..c8285de2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +Cython~=0.29.35 fast-bencode~=1.1.3 pydantic~=1.10.8 SQLAlchemy~=2.0.15 @@ -29,4 +30,9 @@ plexapi~=4.14.0 transmission-rpc~=4.3.0 feapder~=1.8.5 Jinja2~=3.1.2 -pyparsing~=3.0.9 \ No newline at end of file +pyparsing~=3.0.9 +func_timeout==4.3.5 +selenium~=4.9.1 +bs4~=0.0.1 +beautifulsoup4~=4.12.2 +pillow==9.5.0 \ No newline at end of file