fix hhanclub数据统计

This commit is contained in:
jxxghp
2023-08-30 14:51:55 +08:00
parent f8221bb526
commit 16289d86b6
3 changed files with 71 additions and 18 deletions

View File

@ -0,0 +1,60 @@
# -*- coding: utf-8 -*-
import re
from lxml import etree
from app.plugins.sitestatistic.siteuserinfo import SITE_BASE_ORDER, SiteSchema
from app.plugins.sitestatistic.siteuserinfo.nexus_php import NexusPhpSiteUserInfo
from app.utils.string import StringUtils
class NexusHhanclubSiteUserInfo(NexusPhpSiteUserInfo):
schema = SiteSchema.NexusHhanclub
order = SITE_BASE_ORDER + 20
@classmethod
def match(cls, html_text: str) -> bool:
return 'hhanclub.top' in html_text
def _parse_user_traffic_info(self, html_text):
super()._parse_user_traffic_info(html_text)
html_text = self._prepare_html_text(html_text)
html = etree.HTML(html_text)
# 上传、下载、分享率
upload_match = re.search(r"[_<>/a-zA-Z-=\"'\s#;]+([\d,.\s]+[KMGTPI]*B)",
html.xpath('//*[@id="user-info-panel"]/div[2]/div[2]/div[4]/text()')[0])
download_match = re.search(r"[_<>/a-zA-Z-=\"'\s#;]+([\d,.\s]+[KMGTPI]*B)",
html.xpath('//*[@id="user-info-panel"]/div[2]/div[2]/div[5]/text()')[0])
ratio_match = re.search(r"分享率][:_<>/a-zA-Z-=\"'\s#;]+([\d,.\s]+)",
html.xpath('//*[@id="user-info-panel"]/div[2]/div[1]/div[1]/div/text()')[0])
# 计算分享率
self.upload = StringUtils.num_filesize(upload_match.group(1).strip()) if upload_match else 0
self.download = StringUtils.num_filesize(download_match.group(1).strip()) if download_match else 0
# 优先使用页面上的分享率
calc_ratio = 0.0 if self.download <= 0.0 else round(self.upload / self.download, 3)
self.ratio = StringUtils.str_float(ratio_match.group(1)) if (
ratio_match and ratio_match.group(1).strip()) else calc_ratio
def _parse_user_detail_info(self, html_text: str):
"""
解析用户额外信息,加入时间,等级
:param html_text:
:return:
"""
super()._parse_user_detail_info(html_text)
html = etree.HTML(html_text)
if not html:
return
# 加入时间
join_at_text = html.xpath('//*[@id="mainContent"]/div/div[2]/div[4]/div[3]/span[2]/text()[1]')
if join_at_text:
self.join_at = StringUtils.unify_datetime_str(join_at_text[0].split(' (')[0].strip())
def _get_user_level(self, html):
super()._get_user_level(html)
self.user_level = html.xpath('//*[@id="mainContent"]/div/div[2]/div[2]/div[4]/img/@title')[0]