fix torrents match

This commit is contained in:
jxxghp 2024-04-10 20:02:02 +08:00
parent facd20ba3c
commit f365d93316
4 changed files with 95 additions and 121 deletions

View File

@ -1,5 +1,4 @@
import pickle import pickle
import re
import traceback import traceback
from concurrent.futures import ThreadPoolExecutor, as_completed from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime from datetime import datetime
@ -18,7 +17,6 @@ from app.helper.torrent import TorrentHelper
from app.log import logger from app.log import logger
from app.schemas import NotExistMediaInfo from app.schemas import NotExistMediaInfo
from app.schemas.types import MediaType, ProgressKey, SystemConfigKey, EventType from app.schemas.types import MediaType, ProgressKey, SystemConfigKey, EventType
from app.utils.string import StringUtils
class SearchChain(ChainBase): class SearchChain(ChainBase):
@ -179,73 +177,12 @@ class SearchChain(ChainBase):
torrent_meta = MetaInfo(title=torrent.title, subtitle=torrent.description) torrent_meta = MetaInfo(title=torrent.title, subtitle=torrent.description)
if torrent.title != torrent_meta.org_string: if torrent.title != torrent_meta.org_string:
logger.info(f"种子名称应用识别词后发生改变:{torrent.title} => {torrent_meta.org_string}") logger.info(f"种子名称应用识别词后发生改变:{torrent.title} => {torrent_meta.org_string}")
# 比对种子识别类型 # 比对种子
if torrent_meta.type == MediaType.TV and mediainfo.type != MediaType.TV: if self.torrenthelper.match_torrent(mediainfo=mediainfo,
logger.warn(f'{torrent.site_name} - {torrent.title} 种子标题类型为 {torrent_meta.type.value}' torrent_meta=torrent_meta,
f'不匹配 {mediainfo.type.value}') torrent=torrent):
continue # 匹配成功
# 比对种子在站点中的类型
if torrent.category == MediaType.TV.value and mediainfo.type != MediaType.TV:
logger.warn(f'{torrent.site_name} - {torrent.title} 种子在站点中归类为 {torrent.category}'
f'不匹配 {mediainfo.type.value}')
continue
# 比对年份
if mediainfo.year:
if mediainfo.type == MediaType.TV:
# 剧集年份,每季的年份可能不同
if torrent_meta.year and torrent_meta.year not in [year for year in
mediainfo.season_years.values()]:
logger.warn(f'{torrent.site_name} - {torrent.title} 年份不匹配 {mediainfo.season_years}')
continue
else:
# 电影年份上下浮动1年
if torrent_meta.year not in [str(int(mediainfo.year) - 1),
mediainfo.year,
str(int(mediainfo.year) + 1)]:
logger.warn(f'{torrent.site_name} - {torrent.title} 年份不匹配 {mediainfo.year}')
continue
# 识别的中英文名
meta_names = {
StringUtils.clear_upper(torrent_meta.cn_name),
StringUtils.clear_upper(torrent_meta.en_name)
} - {""}
# 媒体标题、原标题
media_titles = {
StringUtils.clear_upper(mediainfo.title),
StringUtils.clear_upper(mediainfo.original_title)
} - {""}
# 比对标题和原语种标题
if meta_names.intersection(media_titles):
logger.info(f'{mediainfo.title} 通过标题匹配到资源:{torrent.site_name} - {torrent.title}')
_match_torrents.append(torrent) _match_torrents.append(torrent)
continue
# 比对别名和译名
media_names = {StringUtils.clear_upper(name) for name in mediainfo.names if name}
if media_names:
if meta_names.intersection(media_names):
logger.info(f'{mediainfo.title} 通过别名或译名匹配到资源:{torrent.site_name} - {torrent.title}')
_match_torrents.append(torrent)
continue
# 标题拆分
titles = [StringUtils.clear_upper(t) for t in re.split(r'[\s/【】.\[\]\-]+',
torrent_meta.org_string) if t]
# 在标题中判断是否存在标题、原语种标题、别名、译名
if meta_names.intersection(titles) or media_names.intersection(titles):
logger.info(f'{mediainfo.title} 通过标题匹配到资源:{torrent.site_name} - {torrent.title}'
f'标题:{torrent.title}')
_match_torrents.append(torrent)
continue
# 在副标题中判断是否存在标题、原语种标题、别名、译名
if torrent.description:
subtitles = {StringUtils.clear_upper(t) for t in re.split(r'[\s/|]+',
torrent.description) if t}
if meta_names.intersection(subtitles) or media_names.intersection(subtitles):
logger.info(f'{mediainfo.title} 通过副标题匹配到资源:{torrent.site_name} - {torrent.title}'
f'副标题:{torrent.description}')
_match_torrents.append(torrent)
continue
# 未匹配
logger.warn(f'{torrent.site_name} - {torrent.title} 标题不匹配,识别名称:{meta_names}')
# 匹配完成 # 匹配完成
logger.info(f"匹配完成,共匹配到 {len(_match_torrents)} 个资源") logger.info(f"匹配完成,共匹配到 {len(_match_torrents)} 个资源")
self.progress.update(value=97, self.progress.update(value=97,

View File

@ -1,6 +1,5 @@
import json import json
import random import random
import re
import time import time
from datetime import datetime from datetime import datetime
from typing import Dict, List, Optional, Union, Tuple from typing import Dict, List, Optional, Union, Tuple
@ -23,7 +22,6 @@ from app.helper.torrent import TorrentHelper
from app.log import logger from app.log import logger
from app.schemas import NotExistMediaInfo, Notification from app.schemas import NotExistMediaInfo, Notification
from app.schemas.types import MediaType, SystemConfigKey, MessageChannel, NotificationType, EventType from app.schemas.types import MediaType, SystemConfigKey, MessageChannel, NotificationType, EventType
from app.utils.string import StringUtils
class SubscribeChain(ChainBase): class SubscribeChain(ChainBase):
@ -589,9 +587,9 @@ class SubscribeChain(ChainBase):
torrent_meta = context.meta_info torrent_meta = context.meta_info
torrent_mediainfo = context.media_info torrent_mediainfo = context.media_info
torrent_info = context.torrent_info torrent_info = context.torrent_info
# 如果识别了媒体信息则比对TMDBID和类型 # 如果识别了媒体信息则比对TMDBID和类型
if torrent_mediainfo.tmdb_id or torrent_mediainfo.douban_id: if torrent_mediainfo.tmdb_id or torrent_mediainfo.douban_id:
# 直接比对媒体信息
if torrent_mediainfo.type != mediainfo.type: if torrent_mediainfo.type != mediainfo.type:
continue continue
if torrent_mediainfo.tmdb_id \ if torrent_mediainfo.tmdb_id \
@ -603,55 +601,12 @@ class SubscribeChain(ChainBase):
logger.info( logger.info(
f'{mediainfo.title_year} 通过媒体信ID匹配到资源{torrent_info.site_name} - {torrent_info.title}') f'{mediainfo.title_year} 通过媒体信ID匹配到资源{torrent_info.site_name} - {torrent_info.title}')
else: else:
# 按标题匹配 # 没有torrent_mediainfo媒体信息按标题匹配
# 比对种子识别类型 if not self.torrenthelper.match_torrent(mediainfo=mediainfo,
if torrent_meta.type == MediaType.TV and mediainfo.type != MediaType.TV: torrent_meta=torrent_meta,
torrent=torrent_info,
logerror=False):
continue continue
# 比对种子在站点中的类型
if torrent_info.category == MediaType.TV.value and mediainfo.type != MediaType.TV:
continue
# 比对年份
if mediainfo.year:
if mediainfo.type == MediaType.TV:
# 剧集年份,每季的年份可能不同
if torrent_meta.year and torrent_meta.year not in [year for year in
mediainfo.season_years.values()]:
continue
else:
# 电影年份上下浮动1年
if torrent_meta.year not in [str(int(mediainfo.year) - 1),
mediainfo.year,
str(int(mediainfo.year) + 1)]:
continue
# 标题匹配标志
title_match = False
# 比对标题和原语种标题
meta_name = StringUtils.clear_upper(torrent_meta.name)
if meta_name in [
StringUtils.clear_upper(mediainfo.title),
StringUtils.clear_upper(mediainfo.original_title)
]:
title_match = True
# 在副标题中判断是否存在标题与原语种标题
if not title_match and torrent_info.description:
subtitle = re.split(r'[\s/|]+', torrent_info.description)
if (StringUtils.is_chinese(mediainfo.title)
and str(mediainfo.title) in subtitle) \
or (StringUtils.is_chinese(mediainfo.original_title)
and str(mediainfo.original_title) in subtitle):
title_match = True
# 比对别名和译名
if not title_match:
for name in mediainfo.names:
if StringUtils.clear_upper(name) == meta_name:
title_match = True
break
if not title_match:
continue
# 标题匹配成功
logger.info(
f'{mediainfo.title_year} 通过名称匹配到资源:{torrent_info.site_name} - {torrent_info.title}')
# 优先级过滤规则 # 优先级过滤规则
if subscribe.best_version: if subscribe.best_version:
priority_rule = self.systemconfig.get(SystemConfigKey.BestVersionFilterRules) priority_rule = self.systemconfig.get(SystemConfigKey.BestVersionFilterRules)

View File

@ -184,6 +184,8 @@ class TorrentsChain(ChainBase, metaclass=Singleton):
logger.info(f'处理资源:{torrent.title} ...') logger.info(f'处理资源:{torrent.title} ...')
# 识别 # 识别
meta = MetaInfo(title=torrent.title, subtitle=torrent.description) meta = MetaInfo(title=torrent.title, subtitle=torrent.description)
if torrent.title != meta.org_string:
logger.info(f'种子名称应用识别词后发生改变:{torrent.title} => {meta.org_string}')
# 使用站点种子分类,校正类型识别 # 使用站点种子分类,校正类型识别
if meta.type != MediaType.TV \ if meta.type != MediaType.TV \
and torrent.category == MediaType.TV.value: and torrent.category == MediaType.TV.value:
@ -191,7 +193,7 @@ class TorrentsChain(ChainBase, metaclass=Singleton):
# 识别媒体信息 # 识别媒体信息
mediainfo: MediaInfo = self.mediachain.recognize_by_meta(meta) mediainfo: MediaInfo = self.mediachain.recognize_by_meta(meta)
if not mediainfo: if not mediainfo:
logger.warn(f'未识别到媒体信息,标题:{torrent.title}') logger.warn(f'{torrent.title} 未识别到媒体信息')
# 存储空的媒体信息 # 存储空的媒体信息
mediainfo = MediaInfo() mediainfo = MediaInfo()
# 清理多余数据 # 清理多余数据

View File

@ -330,7 +330,7 @@ class TorrentHelper(metaclass=Singleton):
f"{torrent_info.description} " f"{torrent_info.description} "
f"{' '.join(torrent_info.labels or [])} " f"{' '.join(torrent_info.labels or [])} "
f"{torrent_info.volume_factor}") f"{torrent_info.volume_factor}")
# 最少做种人数 # 最少做种人数
min_seeders = filter_rule.get("min_seeders") min_seeders = filter_rule.get("min_seeders")
if min_seeders and torrent_info.seeders < int(min_seeders): if min_seeders and torrent_info.seeders < int(min_seeders):
@ -402,3 +402,83 @@ class TorrentHelper(metaclass=Singleton):
f"{torrent_info.title} {StringUtils.str_filesize(torrent_info.size)} 不匹配大小规则 {size}") f"{torrent_info.title} {StringUtils.str_filesize(torrent_info.size)} 不匹配大小规则 {size}")
return False return False
return True return True
@staticmethod
def match_torrent(mediainfo: MediaInfo, torrent_meta: MetaInfo,
torrent: TorrentInfo, logerror: bool = True) -> bool:
"""
检查种子是否匹配媒体信息
:param mediainfo: 需要匹配的媒体信息
:param torrent_meta: 种子识别信息
:param torrent: 种子信息
:param logerror: 是否记录错误日志
"""
# 要匹配的媒体标题、原标题
media_titles = {
StringUtils.clear_upper(mediainfo.title),
StringUtils.clear_upper(mediainfo.original_title)
} - {""}
# 要匹配的媒体别名、译名
media_names = {StringUtils.clear_upper(name) for name in mediainfo.names if name}
# 识别的种子中英文名
meta_names = {
StringUtils.clear_upper(torrent_meta.cn_name),
StringUtils.clear_upper(torrent_meta.en_name)
} - {""}
# 比对种子识别类型
if torrent_meta.type == MediaType.TV and mediainfo.type != MediaType.TV:
if logerror:
logger.warn(f'{torrent.site_name} - {torrent.title} 种子标题类型为 {torrent_meta.type.value}'
f'不匹配 {mediainfo.type.value}')
return False
# 比对种子在站点中的类型
if torrent.category == MediaType.TV.value and mediainfo.type != MediaType.TV:
if logerror:
logger.warn(f'{torrent.site_name} - {torrent.title} 种子在站点中归类为 {torrent.category}'
f'不匹配 {mediainfo.type.value}')
return False
# 比对年份
if mediainfo.year:
if mediainfo.type == MediaType.TV:
# 剧集年份,每季的年份可能不同
if torrent_meta.year and torrent_meta.year not in [year for year in
mediainfo.season_years.values()]:
if logerror:
logger.warn(f'{torrent.site_name} - {torrent.title} 年份不匹配 {mediainfo.season_years}')
return False
else:
# 电影年份上下浮动1年
if torrent_meta.year not in [str(int(mediainfo.year) - 1),
mediainfo.year,
str(int(mediainfo.year) + 1)]:
if logerror:
logger.warn(f'{torrent.site_name} - {torrent.title} 年份不匹配 {mediainfo.year}')
return False
# 比对标题和原语种标题
if meta_names.intersection(media_titles):
logger.info(f'{mediainfo.title} 通过标题匹配到资源:{torrent.site_name} - {torrent.title}')
return True
# 比对别名和译名
if media_names:
if meta_names.intersection(media_names):
logger.info(f'{mediainfo.title} 通过别名或译名匹配到资源:{torrent.site_name} - {torrent.title}')
return True
# 标题拆分
titles = [StringUtils.clear_upper(t) for t in re.split(r'[\s/【】.\[\]\-]+',
torrent_meta.org_string) if t]
# 在标题中判断是否存在标题、原语种标题、别名、译名
if meta_names.intersection(titles) or media_names.intersection(titles):
logger.info(f'{mediainfo.title} 通过标题匹配到资源:{torrent.site_name} - {torrent.title}')
return True
# 在副标题中判断是否存在标题、原语种标题、别名、译名
if torrent.description:
subtitles = {StringUtils.clear_upper(t) for t in re.split(r'[\s/|]+',
torrent.description) if t}
if meta_names.intersection(subtitles) or media_names.intersection(subtitles):
logger.info(f'{mediainfo.title} 通过副标题匹配到资源:{torrent.site_name} - {torrent.title}'
f'副标题:{torrent.description}')
return True
# 未匹配
if logerror:
logger.warn(f'{torrent.site_name} - {torrent.title} 标题不匹配,识别名称:{meta_names}')
return False