fix torrents match
This commit is contained in:
parent
facd20ba3c
commit
f365d93316
@ -1,5 +1,4 @@
|
|||||||
import pickle
|
import pickle
|
||||||
import re
|
|
||||||
import traceback
|
import traceback
|
||||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
@ -18,7 +17,6 @@ from app.helper.torrent import TorrentHelper
|
|||||||
from app.log import logger
|
from app.log import logger
|
||||||
from app.schemas import NotExistMediaInfo
|
from app.schemas import NotExistMediaInfo
|
||||||
from app.schemas.types import MediaType, ProgressKey, SystemConfigKey, EventType
|
from app.schemas.types import MediaType, ProgressKey, SystemConfigKey, EventType
|
||||||
from app.utils.string import StringUtils
|
|
||||||
|
|
||||||
|
|
||||||
class SearchChain(ChainBase):
|
class SearchChain(ChainBase):
|
||||||
@ -179,73 +177,12 @@ class SearchChain(ChainBase):
|
|||||||
torrent_meta = MetaInfo(title=torrent.title, subtitle=torrent.description)
|
torrent_meta = MetaInfo(title=torrent.title, subtitle=torrent.description)
|
||||||
if torrent.title != torrent_meta.org_string:
|
if torrent.title != torrent_meta.org_string:
|
||||||
logger.info(f"种子名称应用识别词后发生改变:{torrent.title} => {torrent_meta.org_string}")
|
logger.info(f"种子名称应用识别词后发生改变:{torrent.title} => {torrent_meta.org_string}")
|
||||||
# 比对种子识别类型
|
# 比对种子
|
||||||
if torrent_meta.type == MediaType.TV and mediainfo.type != MediaType.TV:
|
if self.torrenthelper.match_torrent(mediainfo=mediainfo,
|
||||||
logger.warn(f'{torrent.site_name} - {torrent.title} 种子标题类型为 {torrent_meta.type.value},'
|
torrent_meta=torrent_meta,
|
||||||
f'不匹配 {mediainfo.type.value}')
|
torrent=torrent):
|
||||||
continue
|
# 匹配成功
|
||||||
# 比对种子在站点中的类型
|
|
||||||
if torrent.category == MediaType.TV.value and mediainfo.type != MediaType.TV:
|
|
||||||
logger.warn(f'{torrent.site_name} - {torrent.title} 种子在站点中归类为 {torrent.category},'
|
|
||||||
f'不匹配 {mediainfo.type.value}')
|
|
||||||
continue
|
|
||||||
# 比对年份
|
|
||||||
if mediainfo.year:
|
|
||||||
if mediainfo.type == MediaType.TV:
|
|
||||||
# 剧集年份,每季的年份可能不同
|
|
||||||
if torrent_meta.year and torrent_meta.year not in [year for year in
|
|
||||||
mediainfo.season_years.values()]:
|
|
||||||
logger.warn(f'{torrent.site_name} - {torrent.title} 年份不匹配 {mediainfo.season_years}')
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
# 电影年份,上下浮动1年
|
|
||||||
if torrent_meta.year not in [str(int(mediainfo.year) - 1),
|
|
||||||
mediainfo.year,
|
|
||||||
str(int(mediainfo.year) + 1)]:
|
|
||||||
logger.warn(f'{torrent.site_name} - {torrent.title} 年份不匹配 {mediainfo.year}')
|
|
||||||
continue
|
|
||||||
# 识别的中英文名
|
|
||||||
meta_names = {
|
|
||||||
StringUtils.clear_upper(torrent_meta.cn_name),
|
|
||||||
StringUtils.clear_upper(torrent_meta.en_name)
|
|
||||||
} - {""}
|
|
||||||
# 媒体标题、原标题
|
|
||||||
media_titles = {
|
|
||||||
StringUtils.clear_upper(mediainfo.title),
|
|
||||||
StringUtils.clear_upper(mediainfo.original_title)
|
|
||||||
} - {""}
|
|
||||||
# 比对标题和原语种标题
|
|
||||||
if meta_names.intersection(media_titles):
|
|
||||||
logger.info(f'{mediainfo.title} 通过标题匹配到资源:{torrent.site_name} - {torrent.title}')
|
|
||||||
_match_torrents.append(torrent)
|
_match_torrents.append(torrent)
|
||||||
continue
|
|
||||||
# 比对别名和译名
|
|
||||||
media_names = {StringUtils.clear_upper(name) for name in mediainfo.names if name}
|
|
||||||
if media_names:
|
|
||||||
if meta_names.intersection(media_names):
|
|
||||||
logger.info(f'{mediainfo.title} 通过别名或译名匹配到资源:{torrent.site_name} - {torrent.title}')
|
|
||||||
_match_torrents.append(torrent)
|
|
||||||
continue
|
|
||||||
# 标题拆分
|
|
||||||
titles = [StringUtils.clear_upper(t) for t in re.split(r'[\s/【】.\[\]\-]+',
|
|
||||||
torrent_meta.org_string) if t]
|
|
||||||
# 在标题中判断是否存在标题、原语种标题、别名、译名
|
|
||||||
if meta_names.intersection(titles) or media_names.intersection(titles):
|
|
||||||
logger.info(f'{mediainfo.title} 通过标题匹配到资源:{torrent.site_name} - {torrent.title},'
|
|
||||||
f'标题:{torrent.title}')
|
|
||||||
_match_torrents.append(torrent)
|
|
||||||
continue
|
|
||||||
# 在副标题中判断是否存在标题、原语种标题、别名、译名
|
|
||||||
if torrent.description:
|
|
||||||
subtitles = {StringUtils.clear_upper(t) for t in re.split(r'[\s/|]+',
|
|
||||||
torrent.description) if t}
|
|
||||||
if meta_names.intersection(subtitles) or media_names.intersection(subtitles):
|
|
||||||
logger.info(f'{mediainfo.title} 通过副标题匹配到资源:{torrent.site_name} - {torrent.title},'
|
|
||||||
f'副标题:{torrent.description}')
|
|
||||||
_match_torrents.append(torrent)
|
|
||||||
continue
|
|
||||||
# 未匹配
|
|
||||||
logger.warn(f'{torrent.site_name} - {torrent.title} 标题不匹配,识别名称:{meta_names}')
|
|
||||||
# 匹配完成
|
# 匹配完成
|
||||||
logger.info(f"匹配完成,共匹配到 {len(_match_torrents)} 个资源")
|
logger.info(f"匹配完成,共匹配到 {len(_match_torrents)} 个资源")
|
||||||
self.progress.update(value=97,
|
self.progress.update(value=97,
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
import json
|
import json
|
||||||
import random
|
import random
|
||||||
import re
|
|
||||||
import time
|
import time
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Dict, List, Optional, Union, Tuple
|
from typing import Dict, List, Optional, Union, Tuple
|
||||||
@ -23,7 +22,6 @@ from app.helper.torrent import TorrentHelper
|
|||||||
from app.log import logger
|
from app.log import logger
|
||||||
from app.schemas import NotExistMediaInfo, Notification
|
from app.schemas import NotExistMediaInfo, Notification
|
||||||
from app.schemas.types import MediaType, SystemConfigKey, MessageChannel, NotificationType, EventType
|
from app.schemas.types import MediaType, SystemConfigKey, MessageChannel, NotificationType, EventType
|
||||||
from app.utils.string import StringUtils
|
|
||||||
|
|
||||||
|
|
||||||
class SubscribeChain(ChainBase):
|
class SubscribeChain(ChainBase):
|
||||||
@ -589,9 +587,9 @@ class SubscribeChain(ChainBase):
|
|||||||
torrent_meta = context.meta_info
|
torrent_meta = context.meta_info
|
||||||
torrent_mediainfo = context.media_info
|
torrent_mediainfo = context.media_info
|
||||||
torrent_info = context.torrent_info
|
torrent_info = context.torrent_info
|
||||||
|
|
||||||
# 如果识别了媒体信息,则比对TMDBID和类型
|
# 如果识别了媒体信息,则比对TMDBID和类型
|
||||||
if torrent_mediainfo.tmdb_id or torrent_mediainfo.douban_id:
|
if torrent_mediainfo.tmdb_id or torrent_mediainfo.douban_id:
|
||||||
|
# 直接比对媒体信息
|
||||||
if torrent_mediainfo.type != mediainfo.type:
|
if torrent_mediainfo.type != mediainfo.type:
|
||||||
continue
|
continue
|
||||||
if torrent_mediainfo.tmdb_id \
|
if torrent_mediainfo.tmdb_id \
|
||||||
@ -603,55 +601,12 @@ class SubscribeChain(ChainBase):
|
|||||||
logger.info(
|
logger.info(
|
||||||
f'{mediainfo.title_year} 通过媒体信ID匹配到资源:{torrent_info.site_name} - {torrent_info.title}')
|
f'{mediainfo.title_year} 通过媒体信ID匹配到资源:{torrent_info.site_name} - {torrent_info.title}')
|
||||||
else:
|
else:
|
||||||
# 按标题匹配
|
# 没有torrent_mediainfo媒体信息,按标题匹配
|
||||||
# 比对种子识别类型
|
if not self.torrenthelper.match_torrent(mediainfo=mediainfo,
|
||||||
if torrent_meta.type == MediaType.TV and mediainfo.type != MediaType.TV:
|
torrent_meta=torrent_meta,
|
||||||
|
torrent=torrent_info,
|
||||||
|
logerror=False):
|
||||||
continue
|
continue
|
||||||
# 比对种子在站点中的类型
|
|
||||||
if torrent_info.category == MediaType.TV.value and mediainfo.type != MediaType.TV:
|
|
||||||
continue
|
|
||||||
# 比对年份
|
|
||||||
if mediainfo.year:
|
|
||||||
if mediainfo.type == MediaType.TV:
|
|
||||||
# 剧集年份,每季的年份可能不同
|
|
||||||
if torrent_meta.year and torrent_meta.year not in [year for year in
|
|
||||||
mediainfo.season_years.values()]:
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
# 电影年份,上下浮动1年
|
|
||||||
if torrent_meta.year not in [str(int(mediainfo.year) - 1),
|
|
||||||
mediainfo.year,
|
|
||||||
str(int(mediainfo.year) + 1)]:
|
|
||||||
continue
|
|
||||||
# 标题匹配标志
|
|
||||||
title_match = False
|
|
||||||
# 比对标题和原语种标题
|
|
||||||
meta_name = StringUtils.clear_upper(torrent_meta.name)
|
|
||||||
if meta_name in [
|
|
||||||
StringUtils.clear_upper(mediainfo.title),
|
|
||||||
StringUtils.clear_upper(mediainfo.original_title)
|
|
||||||
]:
|
|
||||||
title_match = True
|
|
||||||
# 在副标题中判断是否存在标题与原语种标题
|
|
||||||
if not title_match and torrent_info.description:
|
|
||||||
subtitle = re.split(r'[\s/|]+', torrent_info.description)
|
|
||||||
if (StringUtils.is_chinese(mediainfo.title)
|
|
||||||
and str(mediainfo.title) in subtitle) \
|
|
||||||
or (StringUtils.is_chinese(mediainfo.original_title)
|
|
||||||
and str(mediainfo.original_title) in subtitle):
|
|
||||||
title_match = True
|
|
||||||
# 比对别名和译名
|
|
||||||
if not title_match:
|
|
||||||
for name in mediainfo.names:
|
|
||||||
if StringUtils.clear_upper(name) == meta_name:
|
|
||||||
title_match = True
|
|
||||||
break
|
|
||||||
if not title_match:
|
|
||||||
continue
|
|
||||||
# 标题匹配成功
|
|
||||||
logger.info(
|
|
||||||
f'{mediainfo.title_year} 通过名称匹配到资源:{torrent_info.site_name} - {torrent_info.title}')
|
|
||||||
|
|
||||||
# 优先级过滤规则
|
# 优先级过滤规则
|
||||||
if subscribe.best_version:
|
if subscribe.best_version:
|
||||||
priority_rule = self.systemconfig.get(SystemConfigKey.BestVersionFilterRules)
|
priority_rule = self.systemconfig.get(SystemConfigKey.BestVersionFilterRules)
|
||||||
|
@ -184,6 +184,8 @@ class TorrentsChain(ChainBase, metaclass=Singleton):
|
|||||||
logger.info(f'处理资源:{torrent.title} ...')
|
logger.info(f'处理资源:{torrent.title} ...')
|
||||||
# 识别
|
# 识别
|
||||||
meta = MetaInfo(title=torrent.title, subtitle=torrent.description)
|
meta = MetaInfo(title=torrent.title, subtitle=torrent.description)
|
||||||
|
if torrent.title != meta.org_string:
|
||||||
|
logger.info(f'种子名称应用识别词后发生改变:{torrent.title} => {meta.org_string}')
|
||||||
# 使用站点种子分类,校正类型识别
|
# 使用站点种子分类,校正类型识别
|
||||||
if meta.type != MediaType.TV \
|
if meta.type != MediaType.TV \
|
||||||
and torrent.category == MediaType.TV.value:
|
and torrent.category == MediaType.TV.value:
|
||||||
@ -191,7 +193,7 @@ class TorrentsChain(ChainBase, metaclass=Singleton):
|
|||||||
# 识别媒体信息
|
# 识别媒体信息
|
||||||
mediainfo: MediaInfo = self.mediachain.recognize_by_meta(meta)
|
mediainfo: MediaInfo = self.mediachain.recognize_by_meta(meta)
|
||||||
if not mediainfo:
|
if not mediainfo:
|
||||||
logger.warn(f'未识别到媒体信息,标题:{torrent.title}')
|
logger.warn(f'{torrent.title} 未识别到媒体信息')
|
||||||
# 存储空的媒体信息
|
# 存储空的媒体信息
|
||||||
mediainfo = MediaInfo()
|
mediainfo = MediaInfo()
|
||||||
# 清理多余数据
|
# 清理多余数据
|
||||||
|
@ -402,3 +402,83 @@ class TorrentHelper(metaclass=Singleton):
|
|||||||
f"{torrent_info.title} {StringUtils.str_filesize(torrent_info.size)} 不匹配大小规则 {size}")
|
f"{torrent_info.title} {StringUtils.str_filesize(torrent_info.size)} 不匹配大小规则 {size}")
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def match_torrent(mediainfo: MediaInfo, torrent_meta: MetaInfo,
|
||||||
|
torrent: TorrentInfo, logerror: bool = True) -> bool:
|
||||||
|
"""
|
||||||
|
检查种子是否匹配媒体信息
|
||||||
|
:param mediainfo: 需要匹配的媒体信息
|
||||||
|
:param torrent_meta: 种子识别信息
|
||||||
|
:param torrent: 种子信息
|
||||||
|
:param logerror: 是否记录错误日志
|
||||||
|
"""
|
||||||
|
# 要匹配的媒体标题、原标题
|
||||||
|
media_titles = {
|
||||||
|
StringUtils.clear_upper(mediainfo.title),
|
||||||
|
StringUtils.clear_upper(mediainfo.original_title)
|
||||||
|
} - {""}
|
||||||
|
# 要匹配的媒体别名、译名
|
||||||
|
media_names = {StringUtils.clear_upper(name) for name in mediainfo.names if name}
|
||||||
|
# 识别的种子中英文名
|
||||||
|
meta_names = {
|
||||||
|
StringUtils.clear_upper(torrent_meta.cn_name),
|
||||||
|
StringUtils.clear_upper(torrent_meta.en_name)
|
||||||
|
} - {""}
|
||||||
|
# 比对种子识别类型
|
||||||
|
if torrent_meta.type == MediaType.TV and mediainfo.type != MediaType.TV:
|
||||||
|
if logerror:
|
||||||
|
logger.warn(f'{torrent.site_name} - {torrent.title} 种子标题类型为 {torrent_meta.type.value},'
|
||||||
|
f'不匹配 {mediainfo.type.value}')
|
||||||
|
return False
|
||||||
|
# 比对种子在站点中的类型
|
||||||
|
if torrent.category == MediaType.TV.value and mediainfo.type != MediaType.TV:
|
||||||
|
if logerror:
|
||||||
|
logger.warn(f'{torrent.site_name} - {torrent.title} 种子在站点中归类为 {torrent.category},'
|
||||||
|
f'不匹配 {mediainfo.type.value}')
|
||||||
|
return False
|
||||||
|
# 比对年份
|
||||||
|
if mediainfo.year:
|
||||||
|
if mediainfo.type == MediaType.TV:
|
||||||
|
# 剧集年份,每季的年份可能不同
|
||||||
|
if torrent_meta.year and torrent_meta.year not in [year for year in
|
||||||
|
mediainfo.season_years.values()]:
|
||||||
|
if logerror:
|
||||||
|
logger.warn(f'{torrent.site_name} - {torrent.title} 年份不匹配 {mediainfo.season_years}')
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
# 电影年份,上下浮动1年
|
||||||
|
if torrent_meta.year not in [str(int(mediainfo.year) - 1),
|
||||||
|
mediainfo.year,
|
||||||
|
str(int(mediainfo.year) + 1)]:
|
||||||
|
if logerror:
|
||||||
|
logger.warn(f'{torrent.site_name} - {torrent.title} 年份不匹配 {mediainfo.year}')
|
||||||
|
return False
|
||||||
|
# 比对标题和原语种标题
|
||||||
|
if meta_names.intersection(media_titles):
|
||||||
|
logger.info(f'{mediainfo.title} 通过标题匹配到资源:{torrent.site_name} - {torrent.title}')
|
||||||
|
return True
|
||||||
|
# 比对别名和译名
|
||||||
|
if media_names:
|
||||||
|
if meta_names.intersection(media_names):
|
||||||
|
logger.info(f'{mediainfo.title} 通过别名或译名匹配到资源:{torrent.site_name} - {torrent.title}')
|
||||||
|
return True
|
||||||
|
# 标题拆分
|
||||||
|
titles = [StringUtils.clear_upper(t) for t in re.split(r'[\s/【】.\[\]\-]+',
|
||||||
|
torrent_meta.org_string) if t]
|
||||||
|
# 在标题中判断是否存在标题、原语种标题、别名、译名
|
||||||
|
if meta_names.intersection(titles) or media_names.intersection(titles):
|
||||||
|
logger.info(f'{mediainfo.title} 通过标题匹配到资源:{torrent.site_name} - {torrent.title}')
|
||||||
|
return True
|
||||||
|
# 在副标题中判断是否存在标题、原语种标题、别名、译名
|
||||||
|
if torrent.description:
|
||||||
|
subtitles = {StringUtils.clear_upper(t) for t in re.split(r'[\s/|]+',
|
||||||
|
torrent.description) if t}
|
||||||
|
if meta_names.intersection(subtitles) or media_names.intersection(subtitles):
|
||||||
|
logger.info(f'{mediainfo.title} 通过副标题匹配到资源:{torrent.site_name} - {torrent.title},'
|
||||||
|
f'副标题:{torrent.description}')
|
||||||
|
return True
|
||||||
|
# 未匹配
|
||||||
|
if logerror:
|
||||||
|
logger.warn(f'{torrent.site_name} - {torrent.title} 标题不匹配,识别名称:{meta_names}')
|
||||||
|
return False
|
||||||
|
Loading…
x
Reference in New Issue
Block a user