fix torrents match

This commit is contained in:
jxxghp 2024-04-10 20:02:02 +08:00
parent facd20ba3c
commit f365d93316
4 changed files with 95 additions and 121 deletions

View File

@ -1,5 +1,4 @@
import pickle
import re
import traceback
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime
@ -18,7 +17,6 @@ from app.helper.torrent import TorrentHelper
from app.log import logger
from app.schemas import NotExistMediaInfo
from app.schemas.types import MediaType, ProgressKey, SystemConfigKey, EventType
from app.utils.string import StringUtils
class SearchChain(ChainBase):
@ -179,73 +177,12 @@ class SearchChain(ChainBase):
torrent_meta = MetaInfo(title=torrent.title, subtitle=torrent.description)
if torrent.title != torrent_meta.org_string:
logger.info(f"种子名称应用识别词后发生改变:{torrent.title} => {torrent_meta.org_string}")
# 比对种子识别类型
if torrent_meta.type == MediaType.TV and mediainfo.type != MediaType.TV:
logger.warn(f'{torrent.site_name} - {torrent.title} 种子标题类型为 {torrent_meta.type.value}'
f'不匹配 {mediainfo.type.value}')
continue
# 比对种子在站点中的类型
if torrent.category == MediaType.TV.value and mediainfo.type != MediaType.TV:
logger.warn(f'{torrent.site_name} - {torrent.title} 种子在站点中归类为 {torrent.category}'
f'不匹配 {mediainfo.type.value}')
continue
# 比对年份
if mediainfo.year:
if mediainfo.type == MediaType.TV:
# 剧集年份,每季的年份可能不同
if torrent_meta.year and torrent_meta.year not in [year for year in
mediainfo.season_years.values()]:
logger.warn(f'{torrent.site_name} - {torrent.title} 年份不匹配 {mediainfo.season_years}')
continue
else:
# 电影年份上下浮动1年
if torrent_meta.year not in [str(int(mediainfo.year) - 1),
mediainfo.year,
str(int(mediainfo.year) + 1)]:
logger.warn(f'{torrent.site_name} - {torrent.title} 年份不匹配 {mediainfo.year}')
continue
# 识别的中英文名
meta_names = {
StringUtils.clear_upper(torrent_meta.cn_name),
StringUtils.clear_upper(torrent_meta.en_name)
} - {""}
# 媒体标题、原标题
media_titles = {
StringUtils.clear_upper(mediainfo.title),
StringUtils.clear_upper(mediainfo.original_title)
} - {""}
# 比对标题和原语种标题
if meta_names.intersection(media_titles):
logger.info(f'{mediainfo.title} 通过标题匹配到资源:{torrent.site_name} - {torrent.title}')
# 比对种子
if self.torrenthelper.match_torrent(mediainfo=mediainfo,
torrent_meta=torrent_meta,
torrent=torrent):
# 匹配成功
_match_torrents.append(torrent)
continue
# 比对别名和译名
media_names = {StringUtils.clear_upper(name) for name in mediainfo.names if name}
if media_names:
if meta_names.intersection(media_names):
logger.info(f'{mediainfo.title} 通过别名或译名匹配到资源:{torrent.site_name} - {torrent.title}')
_match_torrents.append(torrent)
continue
# 标题拆分
titles = [StringUtils.clear_upper(t) for t in re.split(r'[\s/【】.\[\]\-]+',
torrent_meta.org_string) if t]
# 在标题中判断是否存在标题、原语种标题、别名、译名
if meta_names.intersection(titles) or media_names.intersection(titles):
logger.info(f'{mediainfo.title} 通过标题匹配到资源:{torrent.site_name} - {torrent.title}'
f'标题:{torrent.title}')
_match_torrents.append(torrent)
continue
# 在副标题中判断是否存在标题、原语种标题、别名、译名
if torrent.description:
subtitles = {StringUtils.clear_upper(t) for t in re.split(r'[\s/|]+',
torrent.description) if t}
if meta_names.intersection(subtitles) or media_names.intersection(subtitles):
logger.info(f'{mediainfo.title} 通过副标题匹配到资源:{torrent.site_name} - {torrent.title}'
f'副标题:{torrent.description}')
_match_torrents.append(torrent)
continue
# 未匹配
logger.warn(f'{torrent.site_name} - {torrent.title} 标题不匹配,识别名称:{meta_names}')
# 匹配完成
logger.info(f"匹配完成,共匹配到 {len(_match_torrents)} 个资源")
self.progress.update(value=97,

View File

@ -1,6 +1,5 @@
import json
import random
import re
import time
from datetime import datetime
from typing import Dict, List, Optional, Union, Tuple
@ -23,7 +22,6 @@ from app.helper.torrent import TorrentHelper
from app.log import logger
from app.schemas import NotExistMediaInfo, Notification
from app.schemas.types import MediaType, SystemConfigKey, MessageChannel, NotificationType, EventType
from app.utils.string import StringUtils
class SubscribeChain(ChainBase):
@ -589,9 +587,9 @@ class SubscribeChain(ChainBase):
torrent_meta = context.meta_info
torrent_mediainfo = context.media_info
torrent_info = context.torrent_info
# 如果识别了媒体信息则比对TMDBID和类型
if torrent_mediainfo.tmdb_id or torrent_mediainfo.douban_id:
# 直接比对媒体信息
if torrent_mediainfo.type != mediainfo.type:
continue
if torrent_mediainfo.tmdb_id \
@ -603,55 +601,12 @@ class SubscribeChain(ChainBase):
logger.info(
f'{mediainfo.title_year} 通过媒体信ID匹配到资源{torrent_info.site_name} - {torrent_info.title}')
else:
# 按标题匹配
# 比对种子识别类型
if torrent_meta.type == MediaType.TV and mediainfo.type != MediaType.TV:
# 没有torrent_mediainfo媒体信息按标题匹配
if not self.torrenthelper.match_torrent(mediainfo=mediainfo,
torrent_meta=torrent_meta,
torrent=torrent_info,
logerror=False):
continue
# 比对种子在站点中的类型
if torrent_info.category == MediaType.TV.value and mediainfo.type != MediaType.TV:
continue
# 比对年份
if mediainfo.year:
if mediainfo.type == MediaType.TV:
# 剧集年份,每季的年份可能不同
if torrent_meta.year and torrent_meta.year not in [year for year in
mediainfo.season_years.values()]:
continue
else:
# 电影年份上下浮动1年
if torrent_meta.year not in [str(int(mediainfo.year) - 1),
mediainfo.year,
str(int(mediainfo.year) + 1)]:
continue
# 标题匹配标志
title_match = False
# 比对标题和原语种标题
meta_name = StringUtils.clear_upper(torrent_meta.name)
if meta_name in [
StringUtils.clear_upper(mediainfo.title),
StringUtils.clear_upper(mediainfo.original_title)
]:
title_match = True
# 在副标题中判断是否存在标题与原语种标题
if not title_match and torrent_info.description:
subtitle = re.split(r'[\s/|]+', torrent_info.description)
if (StringUtils.is_chinese(mediainfo.title)
and str(mediainfo.title) in subtitle) \
or (StringUtils.is_chinese(mediainfo.original_title)
and str(mediainfo.original_title) in subtitle):
title_match = True
# 比对别名和译名
if not title_match:
for name in mediainfo.names:
if StringUtils.clear_upper(name) == meta_name:
title_match = True
break
if not title_match:
continue
# 标题匹配成功
logger.info(
f'{mediainfo.title_year} 通过名称匹配到资源:{torrent_info.site_name} - {torrent_info.title}')
# 优先级过滤规则
if subscribe.best_version:
priority_rule = self.systemconfig.get(SystemConfigKey.BestVersionFilterRules)

View File

@ -184,6 +184,8 @@ class TorrentsChain(ChainBase, metaclass=Singleton):
logger.info(f'处理资源:{torrent.title} ...')
# 识别
meta = MetaInfo(title=torrent.title, subtitle=torrent.description)
if torrent.title != meta.org_string:
logger.info(f'种子名称应用识别词后发生改变:{torrent.title} => {meta.org_string}')
# 使用站点种子分类,校正类型识别
if meta.type != MediaType.TV \
and torrent.category == MediaType.TV.value:
@ -191,7 +193,7 @@ class TorrentsChain(ChainBase, metaclass=Singleton):
# 识别媒体信息
mediainfo: MediaInfo = self.mediachain.recognize_by_meta(meta)
if not mediainfo:
logger.warn(f'未识别到媒体信息,标题:{torrent.title}')
logger.warn(f'{torrent.title} 未识别到媒体信息')
# 存储空的媒体信息
mediainfo = MediaInfo()
# 清理多余数据

View File

@ -402,3 +402,83 @@ class TorrentHelper(metaclass=Singleton):
f"{torrent_info.title} {StringUtils.str_filesize(torrent_info.size)} 不匹配大小规则 {size}")
return False
return True
@staticmethod
def match_torrent(mediainfo: MediaInfo, torrent_meta: MetaInfo,
torrent: TorrentInfo, logerror: bool = True) -> bool:
"""
检查种子是否匹配媒体信息
:param mediainfo: 需要匹配的媒体信息
:param torrent_meta: 种子识别信息
:param torrent: 种子信息
:param logerror: 是否记录错误日志
"""
# 要匹配的媒体标题、原标题
media_titles = {
StringUtils.clear_upper(mediainfo.title),
StringUtils.clear_upper(mediainfo.original_title)
} - {""}
# 要匹配的媒体别名、译名
media_names = {StringUtils.clear_upper(name) for name in mediainfo.names if name}
# 识别的种子中英文名
meta_names = {
StringUtils.clear_upper(torrent_meta.cn_name),
StringUtils.clear_upper(torrent_meta.en_name)
} - {""}
# 比对种子识别类型
if torrent_meta.type == MediaType.TV and mediainfo.type != MediaType.TV:
if logerror:
logger.warn(f'{torrent.site_name} - {torrent.title} 种子标题类型为 {torrent_meta.type.value}'
f'不匹配 {mediainfo.type.value}')
return False
# 比对种子在站点中的类型
if torrent.category == MediaType.TV.value and mediainfo.type != MediaType.TV:
if logerror:
logger.warn(f'{torrent.site_name} - {torrent.title} 种子在站点中归类为 {torrent.category}'
f'不匹配 {mediainfo.type.value}')
return False
# 比对年份
if mediainfo.year:
if mediainfo.type == MediaType.TV:
# 剧集年份,每季的年份可能不同
if torrent_meta.year and torrent_meta.year not in [year for year in
mediainfo.season_years.values()]:
if logerror:
logger.warn(f'{torrent.site_name} - {torrent.title} 年份不匹配 {mediainfo.season_years}')
return False
else:
# 电影年份上下浮动1年
if torrent_meta.year not in [str(int(mediainfo.year) - 1),
mediainfo.year,
str(int(mediainfo.year) + 1)]:
if logerror:
logger.warn(f'{torrent.site_name} - {torrent.title} 年份不匹配 {mediainfo.year}')
return False
# 比对标题和原语种标题
if meta_names.intersection(media_titles):
logger.info(f'{mediainfo.title} 通过标题匹配到资源:{torrent.site_name} - {torrent.title}')
return True
# 比对别名和译名
if media_names:
if meta_names.intersection(media_names):
logger.info(f'{mediainfo.title} 通过别名或译名匹配到资源:{torrent.site_name} - {torrent.title}')
return True
# 标题拆分
titles = [StringUtils.clear_upper(t) for t in re.split(r'[\s/【】.\[\]\-]+',
torrent_meta.org_string) if t]
# 在标题中判断是否存在标题、原语种标题、别名、译名
if meta_names.intersection(titles) or media_names.intersection(titles):
logger.info(f'{mediainfo.title} 通过标题匹配到资源:{torrent.site_name} - {torrent.title}')
return True
# 在副标题中判断是否存在标题、原语种标题、别名、译名
if torrent.description:
subtitles = {StringUtils.clear_upper(t) for t in re.split(r'[\s/|]+',
torrent.description) if t}
if meta_names.intersection(subtitles) or media_names.intersection(subtitles):
logger.info(f'{mediainfo.title} 通过副标题匹配到资源:{torrent.site_name} - {torrent.title}'
f'副标题:{torrent.description}')
return True
# 未匹配
if logerror:
logger.warn(f'{torrent.site_name} - {torrent.title} 标题不匹配,识别名称:{meta_names}')
return False