diff --git a/app/chain/search.py b/app/chain/search.py index a61d3621..086f4c92 100644 --- a/app/chain/search.py +++ b/app/chain/search.py @@ -135,28 +135,8 @@ class SearchChain(ChainBase): if not torrents: logger.warn(f'{keyword or mediainfo.title} 未搜索到资源') return [] - # 过滤种子 - if priority_rule is None: - # 取搜索优先级规则 - priority_rule = self.systemconfig.get(SystemConfigKey.SearchFilterRules) - if priority_rule: - logger.info(f'开始过滤资源,当前规则:{priority_rule} ...') - result: List[TorrentInfo] = self.filter_torrents(rule_string=priority_rule, - torrent_list=torrents, - season_episodes=season_episodes, - mediainfo=mediainfo) - if result is not None: - torrents = result - if not torrents: - logger.warn(f'{keyword or mediainfo.title} 没有符合优先级规则的资源') - return [] - # 使用过滤规则再次过滤 - torrents = self.filter_torrents_by_rule(torrents=torrents, - mediainfo=mediainfo, - filter_rule=filter_rule) - if not torrents: - logger.warn(f'{keyword or mediainfo.title} 没有符合过滤规则的资源') - return [] + # 开始新进度 + self.progress.start(ProgressKey.Search) # 匹配的资源 _match_torrents = [] # 总数 @@ -164,29 +144,32 @@ class SearchChain(ChainBase): # 已处理数 _count = 0 if mediainfo: - self.progress.start(ProgressKey.Search) - logger.info(f'开始匹配,总 {_total} 个资源 ...') # 英文标题应该在别名/原标题中,不需要再匹配 logger.info(f"标题:{mediainfo.title},原标题:{mediainfo.original_title},别名:{mediainfo.names}") self.progress.update(value=0, text=f'开始匹配,总 {_total} 个资源 ...', key=ProgressKey.Search) for torrent in torrents: _count += 1 - self.progress.update(value=(_count / _total) * 100, + self.progress.update(value=(_count / _total) * 96, text=f'正在匹配 {torrent.site_name},已完成 {_count} / {_total} ...', key=ProgressKey.Search) # 比对IMDBID if torrent.imdbid \ and mediainfo.imdb_id \ and torrent.imdbid == mediainfo.imdb_id: - logger.info(f'{mediainfo.title} 匹配到资源:{torrent.site_name} - {torrent.title}') + logger.info(f'{mediainfo.title} 通过IMDBID匹配到资源:{torrent.site_name} - {torrent.title}') _match_torrents.append(torrent) continue # 识别 torrent_meta = MetaInfo(title=torrent.title, subtitle=torrent.description) - # 比对类型 - if (torrent_meta.type == MediaType.TV and mediainfo.type != MediaType.TV) \ - or (torrent_meta.type != MediaType.TV and mediainfo.type == MediaType.TV): - logger.warn(f'{torrent.site_name} - {torrent.title} 类型不匹配') + # 比对种子识别类型 + if torrent_meta.type == MediaType.TV and mediainfo.type != MediaType.TV: + logger.warn(f'{torrent.site_name} - {torrent.title} 种子标题类型为 {torrent_meta.type.value},' + f'需要是 {mediainfo.type.value},不匹配') + continue + # 比对种子在站点中的类型 + if torrent.category == MediaType.TV.value and mediainfo.type != MediaType.TV: + logger.warn(f'{torrent.site_name} - {torrent.title} 种子在站点中归类为 {torrent.category},' + f'需要是 {mediainfo.type.value},不匹配') continue # 比对年份 if mediainfo.year: @@ -231,21 +214,55 @@ class SearchChain(ChainBase): break else: logger.warn(f'{torrent.site_name} - {torrent.title} 标题不匹配') - self.progress.update(value=100, + logger.info(f"匹配完成,共匹配到 {len(_match_torrents)} 个资源") + self.progress.update(value=97, text=f'匹配完成,共匹配到 {len(_match_torrents)} 个资源', key=ProgressKey.Search) - self.progress.end(ProgressKey.Search) else: _match_torrents = torrents - logger.info(f"匹配完成,共匹配到 {len(_match_torrents)} 个资源") + # 开始过滤 + self.progress.update(value=98, text=f'开始过滤,总 {len(_match_torrents)} 个资源,请稍候...', + key=ProgressKey.Search) + # 过滤种子 + if priority_rule is None: + # 取搜索优先级规则 + priority_rule = self.systemconfig.get(SystemConfigKey.SearchFilterRules) + if priority_rule: + logger.info(f'开始优先级规则过滤,当前规则:{priority_rule} ...') + result: List[TorrentInfo] = self.filter_torrents(rule_string=priority_rule, + torrent_list=_match_torrents, + season_episodes=season_episodes, + mediainfo=mediainfo) + if result is not None: + _match_torrents = result + if not _match_torrents: + logger.warn(f'{keyword or mediainfo.title} 没有符合优先级规则的资源') + return [] + # 使用过滤规则再次过滤 + if filter_rule: + logger.info(f'开始过滤规则过滤,当前规则:{filter_rule} ...') + _match_torrents = self.filter_torrents_by_rule(torrents=_match_torrents, + mediainfo=mediainfo, + filter_rule=filter_rule) + if not _match_torrents: + logger.warn(f'{keyword or mediainfo.title} 没有符合过滤规则的资源') + return [] # 去掉mediainfo中多余的数据 mediainfo.clear() # 组装上下文 contexts = [Context(meta_info=MetaInfo(title=torrent.title, subtitle=torrent.description), media_info=mediainfo, torrent_info=torrent) for torrent in _match_torrents] + + logger.info(f"过滤完成,剩余 {_total} 个资源") + self.progress.update(value=99, text=f'过滤完成,剩余 {_total} 个资源', key=ProgressKey.Search) # 排序 + self.progress.update(value=100, + text=f'正在对 {len(contexts)} 个资源进行排序,请稍候...', + key=ProgressKey.Search) contexts = self.torrenthelper.sort_torrents(contexts) + # 结束进度 + self.progress.end(ProgressKey.Search) # 返回 return contexts diff --git a/app/chain/subscribe.py b/app/chain/subscribe.py index 03073ec6..7c06377c 100644 --- a/app/chain/subscribe.py +++ b/app/chain/subscribe.py @@ -566,12 +566,14 @@ class SubscribeChain(ChainBase): if torrent_mediainfo.douban_id \ and torrent_mediainfo.douban_id != mediainfo.douban_id: continue - logger.info(f'{mediainfo.title_year} 通过媒体信息匹配到资源:{torrent_info.site_name} - {torrent_info.title}') + logger.info(f'{mediainfo.title_year} 通过媒体信ID匹配到资源:{torrent_info.site_name} - {torrent_info.title}') else: # 按标题匹配 - # 比对类型 - if (torrent_meta.type == MediaType.TV and mediainfo.type != MediaType.TV) \ - or (torrent_meta.type != MediaType.TV and mediainfo.type == MediaType.TV): + # 比对种子识别类型 + if torrent_meta.type == MediaType.TV and mediainfo.type != MediaType.TV: + continue + # 比对种子在站点中的类型 + if torrent_info.category == MediaType.TV.value and mediainfo.type != MediaType.TV: continue # 比对年份 if mediainfo.year: diff --git a/app/chain/torrents.py b/app/chain/torrents.py index 6974e832..5ad18ed2 100644 --- a/app/chain/torrents.py +++ b/app/chain/torrents.py @@ -16,7 +16,7 @@ from app.helper.sites import SitesHelper from app.helper.torrent import TorrentHelper from app.log import logger from app.schemas import Notification -from app.schemas.types import SystemConfigKey, MessageChannel, NotificationType +from app.schemas.types import SystemConfigKey, MessageChannel, NotificationType, MediaType from app.utils.singleton import Singleton from app.utils.string import StringUtils @@ -184,6 +184,10 @@ class TorrentsChain(ChainBase, metaclass=Singleton): logger.info(f'处理资源:{torrent.title} ...') # 识别 meta = MetaInfo(title=torrent.title, subtitle=torrent.description) + # 使用站点种子分类,校正类型识别 + if meta.type != MediaType.TV \ + and torrent.category == MediaType.TV.value: + meta.type = MediaType.TV # 识别媒体信息 mediainfo: MediaInfo = self.mediachain.recognize_by_meta(meta) if not mediainfo: diff --git a/app/core/context.py b/app/core/context.py index 29d00afb..3b9b8fc8 100644 --- a/app/core/context.py +++ b/app/core/context.py @@ -57,6 +57,8 @@ class TorrentInfo: labels: list = field(default_factory=list) # 种子优先级 pri_order: int = 0 + # 种子分类 电影/电视剧 + category: str = None def __setattr__(self, name: str, value: Any): self.__dict__[name] = value diff --git a/app/modules/indexer/spider.py b/app/modules/indexer/spider.py index d13ab9ac..ea8669bc 100644 --- a/app/modules/indexer/spider.py +++ b/app/modules/indexer/spider.py @@ -3,7 +3,7 @@ import datetime import re import traceback from typing import List -from urllib.parse import quote, urlencode +from urllib.parse import quote, urlencode, urlparse, parse_qs import chardet from jinja2 import Template @@ -276,7 +276,7 @@ class TorrentSpider: return self.parse(page_source) def __get_title(self, torrent): - # title default + # title default text if 'title' not in self.fields: return selector = self.fields.get('title', {}) @@ -306,7 +306,7 @@ class TorrentSpider: selector.get('filters')) def __get_description(self, torrent): - # title optional + # title optional text if 'description' not in self.fields: return selector = self.fields.get('description', {}) @@ -352,7 +352,7 @@ class TorrentSpider: selector.get('filters')) def __get_detail(self, torrent): - # details + # details page text if 'details' not in self.fields: return selector = self.fields.get('details', {}) @@ -373,7 +373,7 @@ class TorrentSpider: self.torrents_info['page_url'] = detail_link def __get_download(self, torrent): - # download link + # download link text if 'download' not in self.fields: return selector = self.fields.get('download', {}) @@ -403,7 +403,7 @@ class TorrentSpider: selector.get('filters')) def __get_size(self, torrent): - # torrent size + # torrent size int if 'size' not in self.fields: return selector = self.fields.get('size', {}) @@ -420,7 +420,7 @@ class TorrentSpider: self.torrents_info['size'] = 0 def __get_leechers(self, torrent): - # torrent leechers + # torrent leechers int if 'leechers' not in self.fields: return selector = self.fields.get('leechers', {}) @@ -438,7 +438,7 @@ class TorrentSpider: self.torrents_info['peers'] = 0 def __get_seeders(self, torrent): - # torrent leechers + # torrent leechers int if 'seeders' not in self.fields: return selector = self.fields.get('seeders', {}) @@ -456,7 +456,7 @@ class TorrentSpider: self.torrents_info['seeders'] = 0 def __get_grabs(self, torrent): - # torrent grabs + # torrent grabs int if 'grabs' not in self.fields: return selector = self.fields.get('grabs', {}) @@ -474,7 +474,7 @@ class TorrentSpider: self.torrents_info['grabs'] = 0 def __get_pubdate(self, torrent): - # torrent pubdate + # torrent pubdate yyyy-mm-dd hh:mm:ss if 'date_added' not in self.fields: return selector = self.fields.get('date_added', {}) @@ -486,7 +486,7 @@ class TorrentSpider: selector.get('filters')) def __get_date_elapsed(self, torrent): - # torrent pubdate + # torrent data elaspsed text if 'date_elapsed' not in self.fields: return selector = self.fields.get('date_elapsed', {}) @@ -498,7 +498,7 @@ class TorrentSpider: selector.get('filters')) def __get_downloadvolumefactor(self, torrent): - # downloadvolumefactor + # downloadvolumefactor int selector = self.fields.get('downloadvolumefactor', {}) if not selector: return @@ -521,7 +521,7 @@ class TorrentSpider: self.torrents_info['downloadvolumefactor'] = int(downloadvolumefactor.group(1)) def __get_uploadvolumefactor(self, torrent): - # uploadvolumefactor + # uploadvolumefactor int selector = self.fields.get('uploadvolumefactor', {}) if not selector: return @@ -544,7 +544,7 @@ class TorrentSpider: self.torrents_info['uploadvolumefactor'] = int(uploadvolumefactor.group(1)) def __get_labels(self, torrent): - # labels + # labels ['label1', 'label2'] if 'labels' not in self.fields: return selector = self.fields.get('labels', {}) @@ -557,7 +557,7 @@ class TorrentSpider: self.torrents_info['labels'] = [] def __get_free_date(self, torrent): - # free date + # free date yyyy-mm-dd hh:mm:ss if 'freedate' not in self.fields: return selector = self.fields.get('freedate', {}) @@ -569,7 +569,7 @@ class TorrentSpider: selector.get('filters')) def __get_hit_and_run(self, torrent): - # hitandrun + # hitandrun True/False if 'hr' not in self.fields: return selector = self.fields.get('hr', {}) @@ -579,28 +579,71 @@ class TorrentSpider: else: self.torrents_info['hit_and_run'] = False + def __get_category(self, torrent): + # category 电影/电视剧 + if 'category' not in self.fields: + return + selector = self.fields.get('category', {}) + category = torrent(selector.get('selector', '')).clone() + self.__remove(category, selector) + items = self.__attribute_or_text(category, selector) + category_value = self.__index(items, selector) + category_value = self.__filter_text(category_value, + selector.get('filters')) + if category_value and self.category: + tv_cats = [str(cat.get("id")) for cat in self.category.get("tv") or []] + movie_cats = [str(cat.get("id")) for cat in self.category.get("movie") or []] + if category_value in tv_cats \ + and category_value not in movie_cats: + self.torrents_info['category'] = MediaType.TV.value + elif category_value in movie_cats: + self.torrents_info['category'] = MediaType.MOVIE.value + else: + self.torrents_info['category'] = MediaType.UNKNOWN.value + else: + self.torrents_info['category'] = MediaType.UNKNOWN.value + def get_info(self, torrent) -> dict: """ 解析单条种子数据 """ self.torrents_info = {} try: + # 标题 self.__get_title(torrent) + # 描述 self.__get_description(torrent) + # 详情页面 self.__get_detail(torrent) + # 下载链接 self.__get_download(torrent) + # 完成数 self.__get_grabs(torrent) + # 下载数 self.__get_leechers(torrent) + # 做种数 self.__get_seeders(torrent) + # 大小 self.__get_size(torrent) + # IMDBID self.__get_imdbid(torrent) + # 下载系数 self.__get_downloadvolumefactor(torrent) + # 上传系数 self.__get_uploadvolumefactor(torrent) + # 发布时间 self.__get_pubdate(torrent) + # 已发布时间 self.__get_date_elapsed(torrent) + # 免费载止时间 self.__get_free_date(torrent) + # 标签 self.__get_labels(torrent) + # HR self.__get_hit_and_run(torrent) + # 分类 + self.__get_category(torrent) + except Exception as err: logger.error("%s 搜索出现错误:%s" % (self.indexername, str(err))) return self.torrents_info @@ -632,6 +675,11 @@ class TorrentSpider: text = text.strip() elif method_name == "appendleft": text = f"{args}{text}" + elif method_name == "querystring": + parsed_url = urlparse(text) + query_params = parse_qs(parsed_url.query) + param_value = query_params.get(args) + text = param_value[0] if param_value else '' except Exception as err: logger.debug(f'过滤器 {method_name} 处理失败:{str(err)} - {traceback.format_exc()}') return text.strip()