From f365d93316195c7e6c14c1c318518926ac367d7c Mon Sep 17 00:00:00 2001 From: jxxghp Date: Wed, 10 Apr 2024 20:02:02 +0800 Subject: [PATCH] fix torrents match --- app/chain/search.py | 73 +++---------------------------------- app/chain/subscribe.py | 57 ++++------------------------- app/chain/torrents.py | 4 ++- app/helper/torrent.py | 82 +++++++++++++++++++++++++++++++++++++++++- 4 files changed, 95 insertions(+), 121 deletions(-) diff --git a/app/chain/search.py b/app/chain/search.py index da403e67..7b785068 100644 --- a/app/chain/search.py +++ b/app/chain/search.py @@ -1,5 +1,4 @@ import pickle -import re import traceback from concurrent.futures import ThreadPoolExecutor, as_completed from datetime import datetime @@ -18,7 +17,6 @@ from app.helper.torrent import TorrentHelper from app.log import logger from app.schemas import NotExistMediaInfo from app.schemas.types import MediaType, ProgressKey, SystemConfigKey, EventType -from app.utils.string import StringUtils class SearchChain(ChainBase): @@ -179,73 +177,12 @@ class SearchChain(ChainBase): torrent_meta = MetaInfo(title=torrent.title, subtitle=torrent.description) if torrent.title != torrent_meta.org_string: logger.info(f"种子名称应用识别词后发生改变:{torrent.title} => {torrent_meta.org_string}") - # 比对种子识别类型 - if torrent_meta.type == MediaType.TV and mediainfo.type != MediaType.TV: - logger.warn(f'{torrent.site_name} - {torrent.title} 种子标题类型为 {torrent_meta.type.value},' - f'不匹配 {mediainfo.type.value}') - continue - # 比对种子在站点中的类型 - if torrent.category == MediaType.TV.value and mediainfo.type != MediaType.TV: - logger.warn(f'{torrent.site_name} - {torrent.title} 种子在站点中归类为 {torrent.category},' - f'不匹配 {mediainfo.type.value}') - continue - # 比对年份 - if mediainfo.year: - if mediainfo.type == MediaType.TV: - # 剧集年份,每季的年份可能不同 - if torrent_meta.year and torrent_meta.year not in [year for year in - mediainfo.season_years.values()]: - logger.warn(f'{torrent.site_name} - {torrent.title} 年份不匹配 {mediainfo.season_years}') - continue - else: - # 电影年份,上下浮动1年 - if torrent_meta.year not in [str(int(mediainfo.year) - 1), - mediainfo.year, - str(int(mediainfo.year) + 1)]: - logger.warn(f'{torrent.site_name} - {torrent.title} 年份不匹配 {mediainfo.year}') - continue - # 识别的中英文名 - meta_names = { - StringUtils.clear_upper(torrent_meta.cn_name), - StringUtils.clear_upper(torrent_meta.en_name) - } - {""} - # 媒体标题、原标题 - media_titles = { - StringUtils.clear_upper(mediainfo.title), - StringUtils.clear_upper(mediainfo.original_title) - } - {""} - # 比对标题和原语种标题 - if meta_names.intersection(media_titles): - logger.info(f'{mediainfo.title} 通过标题匹配到资源:{torrent.site_name} - {torrent.title}') + # 比对种子 + if self.torrenthelper.match_torrent(mediainfo=mediainfo, + torrent_meta=torrent_meta, + torrent=torrent): + # 匹配成功 _match_torrents.append(torrent) - continue - # 比对别名和译名 - media_names = {StringUtils.clear_upper(name) for name in mediainfo.names if name} - if media_names: - if meta_names.intersection(media_names): - logger.info(f'{mediainfo.title} 通过别名或译名匹配到资源:{torrent.site_name} - {torrent.title}') - _match_torrents.append(torrent) - continue - # 标题拆分 - titles = [StringUtils.clear_upper(t) for t in re.split(r'[\s/【】.\[\]\-]+', - torrent_meta.org_string) if t] - # 在标题中判断是否存在标题、原语种标题、别名、译名 - if meta_names.intersection(titles) or media_names.intersection(titles): - logger.info(f'{mediainfo.title} 通过标题匹配到资源:{torrent.site_name} - {torrent.title},' - f'标题:{torrent.title}') - _match_torrents.append(torrent) - continue - # 在副标题中判断是否存在标题、原语种标题、别名、译名 - if torrent.description: - subtitles = {StringUtils.clear_upper(t) for t in re.split(r'[\s/|]+', - torrent.description) if t} - if meta_names.intersection(subtitles) or media_names.intersection(subtitles): - logger.info(f'{mediainfo.title} 通过副标题匹配到资源:{torrent.site_name} - {torrent.title},' - f'副标题:{torrent.description}') - _match_torrents.append(torrent) - continue - # 未匹配 - logger.warn(f'{torrent.site_name} - {torrent.title} 标题不匹配,识别名称:{meta_names}') # 匹配完成 logger.info(f"匹配完成,共匹配到 {len(_match_torrents)} 个资源") self.progress.update(value=97, diff --git a/app/chain/subscribe.py b/app/chain/subscribe.py index 8fd4203e..3b7b0cf0 100644 --- a/app/chain/subscribe.py +++ b/app/chain/subscribe.py @@ -1,6 +1,5 @@ import json import random -import re import time from datetime import datetime from typing import Dict, List, Optional, Union, Tuple @@ -23,7 +22,6 @@ from app.helper.torrent import TorrentHelper from app.log import logger from app.schemas import NotExistMediaInfo, Notification from app.schemas.types import MediaType, SystemConfigKey, MessageChannel, NotificationType, EventType -from app.utils.string import StringUtils class SubscribeChain(ChainBase): @@ -589,9 +587,9 @@ class SubscribeChain(ChainBase): torrent_meta = context.meta_info torrent_mediainfo = context.media_info torrent_info = context.torrent_info - # 如果识别了媒体信息,则比对TMDBID和类型 if torrent_mediainfo.tmdb_id or torrent_mediainfo.douban_id: + # 直接比对媒体信息 if torrent_mediainfo.type != mediainfo.type: continue if torrent_mediainfo.tmdb_id \ @@ -603,55 +601,12 @@ class SubscribeChain(ChainBase): logger.info( f'{mediainfo.title_year} 通过媒体信ID匹配到资源:{torrent_info.site_name} - {torrent_info.title}') else: - # 按标题匹配 - # 比对种子识别类型 - if torrent_meta.type == MediaType.TV and mediainfo.type != MediaType.TV: + # 没有torrent_mediainfo媒体信息,按标题匹配 + if not self.torrenthelper.match_torrent(mediainfo=mediainfo, + torrent_meta=torrent_meta, + torrent=torrent_info, + logerror=False): continue - # 比对种子在站点中的类型 - if torrent_info.category == MediaType.TV.value and mediainfo.type != MediaType.TV: - continue - # 比对年份 - if mediainfo.year: - if mediainfo.type == MediaType.TV: - # 剧集年份,每季的年份可能不同 - if torrent_meta.year and torrent_meta.year not in [year for year in - mediainfo.season_years.values()]: - continue - else: - # 电影年份,上下浮动1年 - if torrent_meta.year not in [str(int(mediainfo.year) - 1), - mediainfo.year, - str(int(mediainfo.year) + 1)]: - continue - # 标题匹配标志 - title_match = False - # 比对标题和原语种标题 - meta_name = StringUtils.clear_upper(torrent_meta.name) - if meta_name in [ - StringUtils.clear_upper(mediainfo.title), - StringUtils.clear_upper(mediainfo.original_title) - ]: - title_match = True - # 在副标题中判断是否存在标题与原语种标题 - if not title_match and torrent_info.description: - subtitle = re.split(r'[\s/|]+', torrent_info.description) - if (StringUtils.is_chinese(mediainfo.title) - and str(mediainfo.title) in subtitle) \ - or (StringUtils.is_chinese(mediainfo.original_title) - and str(mediainfo.original_title) in subtitle): - title_match = True - # 比对别名和译名 - if not title_match: - for name in mediainfo.names: - if StringUtils.clear_upper(name) == meta_name: - title_match = True - break - if not title_match: - continue - # 标题匹配成功 - logger.info( - f'{mediainfo.title_year} 通过名称匹配到资源:{torrent_info.site_name} - {torrent_info.title}') - # 优先级过滤规则 if subscribe.best_version: priority_rule = self.systemconfig.get(SystemConfigKey.BestVersionFilterRules) diff --git a/app/chain/torrents.py b/app/chain/torrents.py index 5ad18ed2..bc63073c 100644 --- a/app/chain/torrents.py +++ b/app/chain/torrents.py @@ -184,6 +184,8 @@ class TorrentsChain(ChainBase, metaclass=Singleton): logger.info(f'处理资源:{torrent.title} ...') # 识别 meta = MetaInfo(title=torrent.title, subtitle=torrent.description) + if torrent.title != meta.org_string: + logger.info(f'种子名称应用识别词后发生改变:{torrent.title} => {meta.org_string}') # 使用站点种子分类,校正类型识别 if meta.type != MediaType.TV \ and torrent.category == MediaType.TV.value: @@ -191,7 +193,7 @@ class TorrentsChain(ChainBase, metaclass=Singleton): # 识别媒体信息 mediainfo: MediaInfo = self.mediachain.recognize_by_meta(meta) if not mediainfo: - logger.warn(f'未识别到媒体信息,标题:{torrent.title}') + logger.warn(f'{torrent.title} 未识别到媒体信息') # 存储空的媒体信息 mediainfo = MediaInfo() # 清理多余数据 diff --git a/app/helper/torrent.py b/app/helper/torrent.py index deefd912..3a3624ba 100644 --- a/app/helper/torrent.py +++ b/app/helper/torrent.py @@ -330,7 +330,7 @@ class TorrentHelper(metaclass=Singleton): f"{torrent_info.description} " f"{' '.join(torrent_info.labels or [])} " f"{torrent_info.volume_factor}") - + # 最少做种人数 min_seeders = filter_rule.get("min_seeders") if min_seeders and torrent_info.seeders < int(min_seeders): @@ -402,3 +402,83 @@ class TorrentHelper(metaclass=Singleton): f"{torrent_info.title} {StringUtils.str_filesize(torrent_info.size)} 不匹配大小规则 {size}") return False return True + + @staticmethod + def match_torrent(mediainfo: MediaInfo, torrent_meta: MetaInfo, + torrent: TorrentInfo, logerror: bool = True) -> bool: + """ + 检查种子是否匹配媒体信息 + :param mediainfo: 需要匹配的媒体信息 + :param torrent_meta: 种子识别信息 + :param torrent: 种子信息 + :param logerror: 是否记录错误日志 + """ + # 要匹配的媒体标题、原标题 + media_titles = { + StringUtils.clear_upper(mediainfo.title), + StringUtils.clear_upper(mediainfo.original_title) + } - {""} + # 要匹配的媒体别名、译名 + media_names = {StringUtils.clear_upper(name) for name in mediainfo.names if name} + # 识别的种子中英文名 + meta_names = { + StringUtils.clear_upper(torrent_meta.cn_name), + StringUtils.clear_upper(torrent_meta.en_name) + } - {""} + # 比对种子识别类型 + if torrent_meta.type == MediaType.TV and mediainfo.type != MediaType.TV: + if logerror: + logger.warn(f'{torrent.site_name} - {torrent.title} 种子标题类型为 {torrent_meta.type.value},' + f'不匹配 {mediainfo.type.value}') + return False + # 比对种子在站点中的类型 + if torrent.category == MediaType.TV.value and mediainfo.type != MediaType.TV: + if logerror: + logger.warn(f'{torrent.site_name} - {torrent.title} 种子在站点中归类为 {torrent.category},' + f'不匹配 {mediainfo.type.value}') + return False + # 比对年份 + if mediainfo.year: + if mediainfo.type == MediaType.TV: + # 剧集年份,每季的年份可能不同 + if torrent_meta.year and torrent_meta.year not in [year for year in + mediainfo.season_years.values()]: + if logerror: + logger.warn(f'{torrent.site_name} - {torrent.title} 年份不匹配 {mediainfo.season_years}') + return False + else: + # 电影年份,上下浮动1年 + if torrent_meta.year not in [str(int(mediainfo.year) - 1), + mediainfo.year, + str(int(mediainfo.year) + 1)]: + if logerror: + logger.warn(f'{torrent.site_name} - {torrent.title} 年份不匹配 {mediainfo.year}') + return False + # 比对标题和原语种标题 + if meta_names.intersection(media_titles): + logger.info(f'{mediainfo.title} 通过标题匹配到资源:{torrent.site_name} - {torrent.title}') + return True + # 比对别名和译名 + if media_names: + if meta_names.intersection(media_names): + logger.info(f'{mediainfo.title} 通过别名或译名匹配到资源:{torrent.site_name} - {torrent.title}') + return True + # 标题拆分 + titles = [StringUtils.clear_upper(t) for t in re.split(r'[\s/【】.\[\]\-]+', + torrent_meta.org_string) if t] + # 在标题中判断是否存在标题、原语种标题、别名、译名 + if meta_names.intersection(titles) or media_names.intersection(titles): + logger.info(f'{mediainfo.title} 通过标题匹配到资源:{torrent.site_name} - {torrent.title}') + return True + # 在副标题中判断是否存在标题、原语种标题、别名、译名 + if torrent.description: + subtitles = {StringUtils.clear_upper(t) for t in re.split(r'[\s/|]+', + torrent.description) if t} + if meta_names.intersection(subtitles) or media_names.intersection(subtitles): + logger.info(f'{mediainfo.title} 通过副标题匹配到资源:{torrent.site_name} - {torrent.title},' + f'副标题:{torrent.description}') + return True + # 未匹配 + if logerror: + logger.warn(f'{torrent.site_name} - {torrent.title} 标题不匹配,识别名称:{meta_names}') + return False