fix torrents match

2024-04-10 20:02:02 +08:00
parent facd20ba3c
commit f365d93316
4 changed files with 95 additions and 121 deletions
--- a/app/chain/search.py
+++ b/app/chain/search.py
@ -1,5 +1,4 @@
 import pickle
 import re
 import traceback
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from datetime import datetime
@ -18,7 +17,6 @@ from app.helper.torrent import TorrentHelper
 from app.log import logger
 from app.schemas import NotExistMediaInfo
 from app.schemas.types import MediaType, ProgressKey, SystemConfigKey, EventType
 from app.utils.string import StringUtils
 class SearchChain(ChainBase):
@ -179,73 +177,12 @@ class SearchChain(ChainBase):
                torrent_meta = MetaInfo(title=torrent.title, subtitle=torrent.description)
                if torrent.title != torrent_meta.org_string:
                    logger.info(f"种子名称应用识别词后发生改变：{torrent.title} => {torrent_meta.org_string}")
-                # 比对种子识别类型
+                # 比对种子
-                if torrent_meta.type == MediaType.TV and mediainfo.type != MediaType.TV:
+                if self.torrenthelper.match_torrent(mediainfo=mediainfo,
-                    logger.warn(f'{torrent.site_name} - {torrent.title} 种子标题类型为 {torrent_meta.type.value}，'
+                                                    torrent_meta=torrent_meta,
-                                f'不匹配 {mediainfo.type.value}')
+                                                    torrent=torrent):
-                    continue
+                    # 匹配成功
                # 比对种子在站点中的类型
                if torrent.category == MediaType.TV.value and mediainfo.type != MediaType.TV:
                    logger.warn(f'{torrent.site_name} - {torrent.title} 种子在站点中归类为 {torrent.category}，'
                                f'不匹配 {mediainfo.type.value}')
                    continue
                # 比对年份
                if mediainfo.year:
                    if mediainfo.type == MediaType.TV:
                        # 剧集年份，每季的年份可能不同
                        if torrent_meta.year and torrent_meta.year not in [year for year in
                                                                           mediainfo.season_years.values()]:
                            logger.warn(f'{torrent.site_name} - {torrent.title} 年份不匹配 {mediainfo.season_years}')
                            continue
                    else:
                        # 电影年份，上下浮动1年
                        if torrent_meta.year not in [str(int(mediainfo.year) - 1),
                                                     mediainfo.year,
                                                     str(int(mediainfo.year) + 1)]:
                            logger.warn(f'{torrent.site_name} - {torrent.title} 年份不匹配 {mediainfo.year}')
                            continue
                # 识别的中英文名
                meta_names = {
                                 StringUtils.clear_upper(torrent_meta.cn_name),
                                 StringUtils.clear_upper(torrent_meta.en_name)
                             } - {""}
                # 媒体标题、原标题
                media_titles = {
                                   StringUtils.clear_upper(mediainfo.title),
                                   StringUtils.clear_upper(mediainfo.original_title)
                               } - {""}
                # 比对标题和原语种标题
                if meta_names.intersection(media_titles):
                    logger.info(f'{mediainfo.title} 通过标题匹配到资源：{torrent.site_name} - {torrent.title}')
                    _match_torrents.append(torrent)
                    continue
                # 比对别名和译名
                media_names = {StringUtils.clear_upper(name) for name in mediainfo.names if name}
                if media_names:
                    if meta_names.intersection(media_names):
                        logger.info(f'{mediainfo.title} 通过别名或译名匹配到资源：{torrent.site_name} - {torrent.title}')
                        _match_torrents.append(torrent)
                        continue
                # 标题拆分
                titles = [StringUtils.clear_upper(t) for t in re.split(r'[\s/【】.\[\]\-]+',
                                                                       torrent_meta.org_string) if t]
                # 在标题中判断是否存在标题、原语种标题、别名、译名
                if meta_names.intersection(titles) or media_names.intersection(titles):
                    logger.info(f'{mediainfo.title} 通过标题匹配到资源：{torrent.site_name} - {torrent.title}，'
                                f'标题：{torrent.title}')
                    _match_torrents.append(torrent)
                    continue
                # 在副标题中判断是否存在标题、原语种标题、别名、译名
                if torrent.description:
                    subtitles = {StringUtils.clear_upper(t) for t in re.split(r'[\s/|]+',
                                                                              torrent.description) if t}
                    if meta_names.intersection(subtitles) or media_names.intersection(subtitles):
                        logger.info(f'{mediainfo.title} 通过副标题匹配到资源：{torrent.site_name} - {torrent.title}，'
                                    f'副标题：{torrent.description}')
                        _match_torrents.append(torrent)
                        continue
                # 未匹配
                logger.warn(f'{torrent.site_name} - {torrent.title} 标题不匹配，识别名称：{meta_names}')
            # 匹配完成
            logger.info(f"匹配完成，共匹配到 {len(_match_torrents)} 个资源")
            self.progress.update(value=97,
--- a/app/chain/subscribe.py
+++ b/app/chain/subscribe.py
@ -1,6 +1,5 @@
 import json
 import random
 import re
 import time
 from datetime import datetime
 from typing import Dict, List, Optional, Union, Tuple
@ -23,7 +22,6 @@ from app.helper.torrent import TorrentHelper
 from app.log import logger
 from app.schemas import NotExistMediaInfo, Notification
 from app.schemas.types import MediaType, SystemConfigKey, MessageChannel, NotificationType, EventType
 from app.utils.string import StringUtils
 class SubscribeChain(ChainBase):
@ -589,9 +587,9 @@ class SubscribeChain(ChainBase):
                    torrent_meta = context.meta_info
                    torrent_mediainfo = context.media_info
                    torrent_info = context.torrent_info
                    # 如果识别了媒体信息，则比对TMDBID和类型
                    if torrent_mediainfo.tmdb_id or torrent_mediainfo.douban_id:
                        # 直接比对媒体信息
                        if torrent_mediainfo.type != mediainfo.type:
                            continue
                        if torrent_mediainfo.tmdb_id \
@ -603,55 +601,12 @@ class SubscribeChain(ChainBase):
                        logger.info(
                            f'{mediainfo.title_year} 通过媒体信ID匹配到资源：{torrent_info.site_name} - {torrent_info.title}')
                    else:
-                        # 按标题匹配
+                        # 没有torrent_mediainfo媒体信息，按标题匹配
-                        # 比对种子识别类型
+                        if not self.torrenthelper.match_torrent(mediainfo=mediainfo,
-                        if torrent_meta.type == MediaType.TV and mediainfo.type != MediaType.TV:
+                                                                torrent_meta=torrent_meta,
                                                                torrent=torrent_info,
                                                                logerror=False):
                            continue
                        # 比对种子在站点中的类型
                        if torrent_info.category == MediaType.TV.value and mediainfo.type != MediaType.TV:
                            continue
                        # 比对年份
                        if mediainfo.year:
                            if mediainfo.type == MediaType.TV:
                                # 剧集年份，每季的年份可能不同
                                if torrent_meta.year and torrent_meta.year not in [year for year in
                                                                                   mediainfo.season_years.values()]:
                                    continue
                            else:
                                # 电影年份，上下浮动1年
                                if torrent_meta.year not in [str(int(mediainfo.year) - 1),
                                                             mediainfo.year,
                                                             str(int(mediainfo.year) + 1)]:
                                    continue
                        # 标题匹配标志
                        title_match = False
                        # 比对标题和原语种标题
                        meta_name = StringUtils.clear_upper(torrent_meta.name)
                        if meta_name in [
                            StringUtils.clear_upper(mediainfo.title),
                            StringUtils.clear_upper(mediainfo.original_title)
                        ]:
                            title_match = True
                        # 在副标题中判断是否存在标题与原语种标题
                        if not title_match and torrent_info.description:
                            subtitle = re.split(r'[\s/|]+', torrent_info.description)
                            if (StringUtils.is_chinese(mediainfo.title)
                                and str(mediainfo.title) in subtitle) \
                                    or (StringUtils.is_chinese(mediainfo.original_title)
                                        and str(mediainfo.original_title) in subtitle):
                                title_match = True
                        # 比对别名和译名
                        if not title_match:
                            for name in mediainfo.names:
                                if StringUtils.clear_upper(name) == meta_name:
                                    title_match = True
                                    break
                        if not title_match:
                            continue
                        # 标题匹配成功
                        logger.info(
                            f'{mediainfo.title_year} 通过名称匹配到资源：{torrent_info.site_name} - {torrent_info.title}')
                    # 优先级过滤规则
                    if subscribe.best_version:
                        priority_rule = self.systemconfig.get(SystemConfigKey.BestVersionFilterRules)
--- a/app/chain/torrents.py
+++ b/app/chain/torrents.py
@ -184,6 +184,8 @@ class TorrentsChain(ChainBase, metaclass=Singleton):
                    logger.info(f'处理资源：{torrent.title} ...')
                    # 识别
                    meta = MetaInfo(title=torrent.title, subtitle=torrent.description)
                    if torrent.title != meta.org_string:
                        logger.info(f'种子名称应用识别词后发生改变：{torrent.title} => {meta.org_string}')
                    # 使用站点种子分类，校正类型识别
                    if meta.type != MediaType.TV \
                            and torrent.category == MediaType.TV.value:
@ -191,7 +193,7 @@ class TorrentsChain(ChainBase, metaclass=Singleton):
                    # 识别媒体信息
                    mediainfo: MediaInfo = self.mediachain.recognize_by_meta(meta)
                    if not mediainfo:
-                        logger.warn(f'未识别到媒体信息，标题：{torrent.title}')
+                        logger.warn(f'{torrent.title} 未识别到媒体信息')
                        # 存储空的媒体信息
                        mediainfo = MediaInfo()
                    # 清理多余数据
--- a/app/helper/torrent.py
+++ b/app/helper/torrent.py
@ -330,7 +330,7 @@ class TorrentHelper(metaclass=Singleton):
                   f"{torrent_info.description} "
                   f"{' '.join(torrent_info.labels or [])} "
                   f"{torrent_info.volume_factor}")
-        
+
        # 最少做种人数
        min_seeders = filter_rule.get("min_seeders")
        if min_seeders and torrent_info.seeders < int(min_seeders):
@ -402,3 +402,83 @@ class TorrentHelper(metaclass=Singleton):
                            f"{torrent_info.title} {StringUtils.str_filesize(torrent_info.size)} 不匹配大小规则 {size}")
                        return False
        return True
    @staticmethod
    def match_torrent(mediainfo: MediaInfo, torrent_meta: MetaInfo,
                      torrent: TorrentInfo, logerror: bool = True) -> bool:
        """
        检查种子是否匹配媒体信息
        :param mediainfo: 需要匹配的媒体信息
        :param torrent_meta: 种子识别信息
        :param torrent: 种子信息
        :param logerror: 是否记录错误日志
        """
        # 要匹配的媒体标题、原标题
        media_titles = {
                           StringUtils.clear_upper(mediainfo.title),
                           StringUtils.clear_upper(mediainfo.original_title)
                       } - {""}
        # 要匹配的媒体别名、译名
        media_names = {StringUtils.clear_upper(name) for name in mediainfo.names if name}
        # 识别的种子中英文名
        meta_names = {
                         StringUtils.clear_upper(torrent_meta.cn_name),
                         StringUtils.clear_upper(torrent_meta.en_name)
                     } - {""}
        # 比对种子识别类型
        if torrent_meta.type == MediaType.TV and mediainfo.type != MediaType.TV:
            if logerror:
                logger.warn(f'{torrent.site_name} - {torrent.title} 种子标题类型为 {torrent_meta.type.value}，'
                            f'不匹配 {mediainfo.type.value}')
            return False
        # 比对种子在站点中的类型
        if torrent.category == MediaType.TV.value and mediainfo.type != MediaType.TV:
            if logerror:
                logger.warn(f'{torrent.site_name} - {torrent.title} 种子在站点中归类为 {torrent.category}，'
                            f'不匹配 {mediainfo.type.value}')
            return False
        # 比对年份
        if mediainfo.year:
            if mediainfo.type == MediaType.TV:
                # 剧集年份，每季的年份可能不同
                if torrent_meta.year and torrent_meta.year not in [year for year in
                                                                   mediainfo.season_years.values()]:
                    if logerror:
                        logger.warn(f'{torrent.site_name} - {torrent.title} 年份不匹配 {mediainfo.season_years}')
                    return False
            else:
                # 电影年份，上下浮动1年
                if torrent_meta.year not in [str(int(mediainfo.year) - 1),
                                             mediainfo.year,
                                             str(int(mediainfo.year) + 1)]:
                    if logerror:
                        logger.warn(f'{torrent.site_name} - {torrent.title} 年份不匹配 {mediainfo.year}')
                    return False
        # 比对标题和原语种标题
        if meta_names.intersection(media_titles):
            logger.info(f'{mediainfo.title} 通过标题匹配到资源：{torrent.site_name} - {torrent.title}')
            return True
        # 比对别名和译名
        if media_names:
            if meta_names.intersection(media_names):
                logger.info(f'{mediainfo.title} 通过别名或译名匹配到资源：{torrent.site_name} - {torrent.title}')
                return True
        # 标题拆分
        titles = [StringUtils.clear_upper(t) for t in re.split(r'[\s/【】.\[\]\-]+',
                                                               torrent_meta.org_string) if t]
        # 在标题中判断是否存在标题、原语种标题、别名、译名
        if meta_names.intersection(titles) or media_names.intersection(titles):
            logger.info(f'{mediainfo.title} 通过标题匹配到资源：{torrent.site_name} - {torrent.title}')
            return True
        # 在副标题中判断是否存在标题、原语种标题、别名、译名
        if torrent.description:
            subtitles = {StringUtils.clear_upper(t) for t in re.split(r'[\s/|]+',
                                                                      torrent.description) if t}
            if meta_names.intersection(subtitles) or media_names.intersection(subtitles):
                logger.info(f'{mediainfo.title} 通过副标题匹配到资源：{torrent.site_name} - {torrent.title}，'
                            f'副标题：{torrent.description}')
                return True
        # 未匹配
        if logerror:
            logger.warn(f'{torrent.site_name} - {torrent.title} 标题不匹配，识别名称：{meta_names}')
        return False