fix torrents match
This commit is contained in:
parent
facd20ba3c
commit
f365d93316
@ -1,5 +1,4 @@
|
||||
import pickle
|
||||
import re
|
||||
import traceback
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from datetime import datetime
|
||||
@ -18,7 +17,6 @@ from app.helper.torrent import TorrentHelper
|
||||
from app.log import logger
|
||||
from app.schemas import NotExistMediaInfo
|
||||
from app.schemas.types import MediaType, ProgressKey, SystemConfigKey, EventType
|
||||
from app.utils.string import StringUtils
|
||||
|
||||
|
||||
class SearchChain(ChainBase):
|
||||
@ -179,73 +177,12 @@ class SearchChain(ChainBase):
|
||||
torrent_meta = MetaInfo(title=torrent.title, subtitle=torrent.description)
|
||||
if torrent.title != torrent_meta.org_string:
|
||||
logger.info(f"种子名称应用识别词后发生改变:{torrent.title} => {torrent_meta.org_string}")
|
||||
# 比对种子识别类型
|
||||
if torrent_meta.type == MediaType.TV and mediainfo.type != MediaType.TV:
|
||||
logger.warn(f'{torrent.site_name} - {torrent.title} 种子标题类型为 {torrent_meta.type.value},'
|
||||
f'不匹配 {mediainfo.type.value}')
|
||||
continue
|
||||
# 比对种子在站点中的类型
|
||||
if torrent.category == MediaType.TV.value and mediainfo.type != MediaType.TV:
|
||||
logger.warn(f'{torrent.site_name} - {torrent.title} 种子在站点中归类为 {torrent.category},'
|
||||
f'不匹配 {mediainfo.type.value}')
|
||||
continue
|
||||
# 比对年份
|
||||
if mediainfo.year:
|
||||
if mediainfo.type == MediaType.TV:
|
||||
# 剧集年份,每季的年份可能不同
|
||||
if torrent_meta.year and torrent_meta.year not in [year for year in
|
||||
mediainfo.season_years.values()]:
|
||||
logger.warn(f'{torrent.site_name} - {torrent.title} 年份不匹配 {mediainfo.season_years}')
|
||||
continue
|
||||
else:
|
||||
# 电影年份,上下浮动1年
|
||||
if torrent_meta.year not in [str(int(mediainfo.year) - 1),
|
||||
mediainfo.year,
|
||||
str(int(mediainfo.year) + 1)]:
|
||||
logger.warn(f'{torrent.site_name} - {torrent.title} 年份不匹配 {mediainfo.year}')
|
||||
continue
|
||||
# 识别的中英文名
|
||||
meta_names = {
|
||||
StringUtils.clear_upper(torrent_meta.cn_name),
|
||||
StringUtils.clear_upper(torrent_meta.en_name)
|
||||
} - {""}
|
||||
# 媒体标题、原标题
|
||||
media_titles = {
|
||||
StringUtils.clear_upper(mediainfo.title),
|
||||
StringUtils.clear_upper(mediainfo.original_title)
|
||||
} - {""}
|
||||
# 比对标题和原语种标题
|
||||
if meta_names.intersection(media_titles):
|
||||
logger.info(f'{mediainfo.title} 通过标题匹配到资源:{torrent.site_name} - {torrent.title}')
|
||||
# 比对种子
|
||||
if self.torrenthelper.match_torrent(mediainfo=mediainfo,
|
||||
torrent_meta=torrent_meta,
|
||||
torrent=torrent):
|
||||
# 匹配成功
|
||||
_match_torrents.append(torrent)
|
||||
continue
|
||||
# 比对别名和译名
|
||||
media_names = {StringUtils.clear_upper(name) for name in mediainfo.names if name}
|
||||
if media_names:
|
||||
if meta_names.intersection(media_names):
|
||||
logger.info(f'{mediainfo.title} 通过别名或译名匹配到资源:{torrent.site_name} - {torrent.title}')
|
||||
_match_torrents.append(torrent)
|
||||
continue
|
||||
# 标题拆分
|
||||
titles = [StringUtils.clear_upper(t) for t in re.split(r'[\s/【】.\[\]\-]+',
|
||||
torrent_meta.org_string) if t]
|
||||
# 在标题中判断是否存在标题、原语种标题、别名、译名
|
||||
if meta_names.intersection(titles) or media_names.intersection(titles):
|
||||
logger.info(f'{mediainfo.title} 通过标题匹配到资源:{torrent.site_name} - {torrent.title},'
|
||||
f'标题:{torrent.title}')
|
||||
_match_torrents.append(torrent)
|
||||
continue
|
||||
# 在副标题中判断是否存在标题、原语种标题、别名、译名
|
||||
if torrent.description:
|
||||
subtitles = {StringUtils.clear_upper(t) for t in re.split(r'[\s/|]+',
|
||||
torrent.description) if t}
|
||||
if meta_names.intersection(subtitles) or media_names.intersection(subtitles):
|
||||
logger.info(f'{mediainfo.title} 通过副标题匹配到资源:{torrent.site_name} - {torrent.title},'
|
||||
f'副标题:{torrent.description}')
|
||||
_match_torrents.append(torrent)
|
||||
continue
|
||||
# 未匹配
|
||||
logger.warn(f'{torrent.site_name} - {torrent.title} 标题不匹配,识别名称:{meta_names}')
|
||||
# 匹配完成
|
||||
logger.info(f"匹配完成,共匹配到 {len(_match_torrents)} 个资源")
|
||||
self.progress.update(value=97,
|
||||
|
@ -1,6 +1,5 @@
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import time
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Optional, Union, Tuple
|
||||
@ -23,7 +22,6 @@ from app.helper.torrent import TorrentHelper
|
||||
from app.log import logger
|
||||
from app.schemas import NotExistMediaInfo, Notification
|
||||
from app.schemas.types import MediaType, SystemConfigKey, MessageChannel, NotificationType, EventType
|
||||
from app.utils.string import StringUtils
|
||||
|
||||
|
||||
class SubscribeChain(ChainBase):
|
||||
@ -589,9 +587,9 @@ class SubscribeChain(ChainBase):
|
||||
torrent_meta = context.meta_info
|
||||
torrent_mediainfo = context.media_info
|
||||
torrent_info = context.torrent_info
|
||||
|
||||
# 如果识别了媒体信息,则比对TMDBID和类型
|
||||
if torrent_mediainfo.tmdb_id or torrent_mediainfo.douban_id:
|
||||
# 直接比对媒体信息
|
||||
if torrent_mediainfo.type != mediainfo.type:
|
||||
continue
|
||||
if torrent_mediainfo.tmdb_id \
|
||||
@ -603,55 +601,12 @@ class SubscribeChain(ChainBase):
|
||||
logger.info(
|
||||
f'{mediainfo.title_year} 通过媒体信ID匹配到资源:{torrent_info.site_name} - {torrent_info.title}')
|
||||
else:
|
||||
# 按标题匹配
|
||||
# 比对种子识别类型
|
||||
if torrent_meta.type == MediaType.TV and mediainfo.type != MediaType.TV:
|
||||
# 没有torrent_mediainfo媒体信息,按标题匹配
|
||||
if not self.torrenthelper.match_torrent(mediainfo=mediainfo,
|
||||
torrent_meta=torrent_meta,
|
||||
torrent=torrent_info,
|
||||
logerror=False):
|
||||
continue
|
||||
# 比对种子在站点中的类型
|
||||
if torrent_info.category == MediaType.TV.value and mediainfo.type != MediaType.TV:
|
||||
continue
|
||||
# 比对年份
|
||||
if mediainfo.year:
|
||||
if mediainfo.type == MediaType.TV:
|
||||
# 剧集年份,每季的年份可能不同
|
||||
if torrent_meta.year and torrent_meta.year not in [year for year in
|
||||
mediainfo.season_years.values()]:
|
||||
continue
|
||||
else:
|
||||
# 电影年份,上下浮动1年
|
||||
if torrent_meta.year not in [str(int(mediainfo.year) - 1),
|
||||
mediainfo.year,
|
||||
str(int(mediainfo.year) + 1)]:
|
||||
continue
|
||||
# 标题匹配标志
|
||||
title_match = False
|
||||
# 比对标题和原语种标题
|
||||
meta_name = StringUtils.clear_upper(torrent_meta.name)
|
||||
if meta_name in [
|
||||
StringUtils.clear_upper(mediainfo.title),
|
||||
StringUtils.clear_upper(mediainfo.original_title)
|
||||
]:
|
||||
title_match = True
|
||||
# 在副标题中判断是否存在标题与原语种标题
|
||||
if not title_match and torrent_info.description:
|
||||
subtitle = re.split(r'[\s/|]+', torrent_info.description)
|
||||
if (StringUtils.is_chinese(mediainfo.title)
|
||||
and str(mediainfo.title) in subtitle) \
|
||||
or (StringUtils.is_chinese(mediainfo.original_title)
|
||||
and str(mediainfo.original_title) in subtitle):
|
||||
title_match = True
|
||||
# 比对别名和译名
|
||||
if not title_match:
|
||||
for name in mediainfo.names:
|
||||
if StringUtils.clear_upper(name) == meta_name:
|
||||
title_match = True
|
||||
break
|
||||
if not title_match:
|
||||
continue
|
||||
# 标题匹配成功
|
||||
logger.info(
|
||||
f'{mediainfo.title_year} 通过名称匹配到资源:{torrent_info.site_name} - {torrent_info.title}')
|
||||
|
||||
# 优先级过滤规则
|
||||
if subscribe.best_version:
|
||||
priority_rule = self.systemconfig.get(SystemConfigKey.BestVersionFilterRules)
|
||||
|
@ -184,6 +184,8 @@ class TorrentsChain(ChainBase, metaclass=Singleton):
|
||||
logger.info(f'处理资源:{torrent.title} ...')
|
||||
# 识别
|
||||
meta = MetaInfo(title=torrent.title, subtitle=torrent.description)
|
||||
if torrent.title != meta.org_string:
|
||||
logger.info(f'种子名称应用识别词后发生改变:{torrent.title} => {meta.org_string}')
|
||||
# 使用站点种子分类,校正类型识别
|
||||
if meta.type != MediaType.TV \
|
||||
and torrent.category == MediaType.TV.value:
|
||||
@ -191,7 +193,7 @@ class TorrentsChain(ChainBase, metaclass=Singleton):
|
||||
# 识别媒体信息
|
||||
mediainfo: MediaInfo = self.mediachain.recognize_by_meta(meta)
|
||||
if not mediainfo:
|
||||
logger.warn(f'未识别到媒体信息,标题:{torrent.title}')
|
||||
logger.warn(f'{torrent.title} 未识别到媒体信息')
|
||||
# 存储空的媒体信息
|
||||
mediainfo = MediaInfo()
|
||||
# 清理多余数据
|
||||
|
@ -402,3 +402,83 @@ class TorrentHelper(metaclass=Singleton):
|
||||
f"{torrent_info.title} {StringUtils.str_filesize(torrent_info.size)} 不匹配大小规则 {size}")
|
||||
return False
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def match_torrent(mediainfo: MediaInfo, torrent_meta: MetaInfo,
|
||||
torrent: TorrentInfo, logerror: bool = True) -> bool:
|
||||
"""
|
||||
检查种子是否匹配媒体信息
|
||||
:param mediainfo: 需要匹配的媒体信息
|
||||
:param torrent_meta: 种子识别信息
|
||||
:param torrent: 种子信息
|
||||
:param logerror: 是否记录错误日志
|
||||
"""
|
||||
# 要匹配的媒体标题、原标题
|
||||
media_titles = {
|
||||
StringUtils.clear_upper(mediainfo.title),
|
||||
StringUtils.clear_upper(mediainfo.original_title)
|
||||
} - {""}
|
||||
# 要匹配的媒体别名、译名
|
||||
media_names = {StringUtils.clear_upper(name) for name in mediainfo.names if name}
|
||||
# 识别的种子中英文名
|
||||
meta_names = {
|
||||
StringUtils.clear_upper(torrent_meta.cn_name),
|
||||
StringUtils.clear_upper(torrent_meta.en_name)
|
||||
} - {""}
|
||||
# 比对种子识别类型
|
||||
if torrent_meta.type == MediaType.TV and mediainfo.type != MediaType.TV:
|
||||
if logerror:
|
||||
logger.warn(f'{torrent.site_name} - {torrent.title} 种子标题类型为 {torrent_meta.type.value},'
|
||||
f'不匹配 {mediainfo.type.value}')
|
||||
return False
|
||||
# 比对种子在站点中的类型
|
||||
if torrent.category == MediaType.TV.value and mediainfo.type != MediaType.TV:
|
||||
if logerror:
|
||||
logger.warn(f'{torrent.site_name} - {torrent.title} 种子在站点中归类为 {torrent.category},'
|
||||
f'不匹配 {mediainfo.type.value}')
|
||||
return False
|
||||
# 比对年份
|
||||
if mediainfo.year:
|
||||
if mediainfo.type == MediaType.TV:
|
||||
# 剧集年份,每季的年份可能不同
|
||||
if torrent_meta.year and torrent_meta.year not in [year for year in
|
||||
mediainfo.season_years.values()]:
|
||||
if logerror:
|
||||
logger.warn(f'{torrent.site_name} - {torrent.title} 年份不匹配 {mediainfo.season_years}')
|
||||
return False
|
||||
else:
|
||||
# 电影年份,上下浮动1年
|
||||
if torrent_meta.year not in [str(int(mediainfo.year) - 1),
|
||||
mediainfo.year,
|
||||
str(int(mediainfo.year) + 1)]:
|
||||
if logerror:
|
||||
logger.warn(f'{torrent.site_name} - {torrent.title} 年份不匹配 {mediainfo.year}')
|
||||
return False
|
||||
# 比对标题和原语种标题
|
||||
if meta_names.intersection(media_titles):
|
||||
logger.info(f'{mediainfo.title} 通过标题匹配到资源:{torrent.site_name} - {torrent.title}')
|
||||
return True
|
||||
# 比对别名和译名
|
||||
if media_names:
|
||||
if meta_names.intersection(media_names):
|
||||
logger.info(f'{mediainfo.title} 通过别名或译名匹配到资源:{torrent.site_name} - {torrent.title}')
|
||||
return True
|
||||
# 标题拆分
|
||||
titles = [StringUtils.clear_upper(t) for t in re.split(r'[\s/【】.\[\]\-]+',
|
||||
torrent_meta.org_string) if t]
|
||||
# 在标题中判断是否存在标题、原语种标题、别名、译名
|
||||
if meta_names.intersection(titles) or media_names.intersection(titles):
|
||||
logger.info(f'{mediainfo.title} 通过标题匹配到资源:{torrent.site_name} - {torrent.title}')
|
||||
return True
|
||||
# 在副标题中判断是否存在标题、原语种标题、别名、译名
|
||||
if torrent.description:
|
||||
subtitles = {StringUtils.clear_upper(t) for t in re.split(r'[\s/|]+',
|
||||
torrent.description) if t}
|
||||
if meta_names.intersection(subtitles) or media_names.intersection(subtitles):
|
||||
logger.info(f'{mediainfo.title} 通过副标题匹配到资源:{torrent.site_name} - {torrent.title},'
|
||||
f'副标题:{torrent.description}')
|
||||
return True
|
||||
# 未匹配
|
||||
if logerror:
|
||||
logger.warn(f'{torrent.site_name} - {torrent.title} 标题不匹配,识别名称:{meta_names}')
|
||||
return False
|
||||
|
Loading…
x
Reference in New Issue
Block a user