feat：使用站点种子归类优化识别匹配

fix：优先级规则复杂时，过滤时间很长，调整到最后 fix #1432
2024-02-29 17:18:01 +08:00
parent 2e661f8759
commit 5dd36e95e0
5 changed files with 127 additions and 54 deletions
--- a/app/chain/search.py
+++ b/app/chain/search.py
@@ -135,28 +135,8 @@ class SearchChain(ChainBase):
        if not torrents:
            logger.warn(f'{keyword or mediainfo.title} 未搜索到资源')
            return []
-        # 过滤种子
-        if priority_rule is None:
-            # 取搜索优先级规则
-            priority_rule = self.systemconfig.get(SystemConfigKey.SearchFilterRules)
-        if priority_rule:
-            logger.info(f'开始过滤资源，当前规则：{priority_rule} ...')
-            result: List[TorrentInfo] = self.filter_torrents(rule_string=priority_rule,
-                                                             torrent_list=torrents,
-                                                             season_episodes=season_episodes,
-                                                             mediainfo=mediainfo)
-            if result is not None:
-                torrents = result
-            if not torrents:
-                logger.warn(f'{keyword or mediainfo.title} 没有符合优先级规则的资源')
-                return []
-        # 使用过滤规则再次过滤
-        torrents = self.filter_torrents_by_rule(torrents=torrents,
-                                                mediainfo=mediainfo,
-                                                filter_rule=filter_rule)
-        if not torrents:
-            logger.warn(f'{keyword or mediainfo.title} 没有符合过滤规则的资源')
-            return []
+        # 开始新进度
+        self.progress.start(ProgressKey.Search)
        # 匹配的资源
        _match_torrents = []
        # 总数
@@ -164,29 +144,32 @@ class SearchChain(ChainBase):
        # 已处理数
        _count = 0
        if mediainfo:
-            self.progress.start(ProgressKey.Search)
-            logger.info(f'开始匹配，总 {_total} 个资源 ...')
            # 英文标题应该在别名/原标题中，不需要再匹配
            logger.info(f"标题：{mediainfo.title}，原标题：{mediainfo.original_title}，别名：{mediainfo.names}")
            self.progress.update(value=0, text=f'开始匹配，总 {_total} 个资源 ...', key=ProgressKey.Search)
            for torrent in torrents:
                _count += 1
-                self.progress.update(value=(_count / _total) * 100,
+                self.progress.update(value=(_count / _total) * 96,
                                     text=f'正在匹配 {torrent.site_name}，已完成 {_count} / {_total} ...',
                                     key=ProgressKey.Search)
                # 比对IMDBID
                if torrent.imdbid \
                        and mediainfo.imdb_id \
                        and torrent.imdbid == mediainfo.imdb_id:
-                    logger.info(f'{mediainfo.title} 匹配到资源：{torrent.site_name} - {torrent.title}')
+                    logger.info(f'{mediainfo.title} 通过IMDBID匹配到资源：{torrent.site_name} - {torrent.title}')
                    _match_torrents.append(torrent)
                    continue
                # 识别
                torrent_meta = MetaInfo(title=torrent.title, subtitle=torrent.description)
-                # 比对类型
-                if (torrent_meta.type == MediaType.TV and mediainfo.type != MediaType.TV) \
-                        or (torrent_meta.type != MediaType.TV and mediainfo.type == MediaType.TV):
-                    logger.warn(f'{torrent.site_name} - {torrent.title} 类型不匹配')
+                # 比对种子识别类型
+                if torrent_meta.type == MediaType.TV and mediainfo.type != MediaType.TV:
+                    logger.warn(f'{torrent.site_name} - {torrent.title} 种子标题类型为 {torrent_meta.type.value}，'
+                                f'需要是 {mediainfo.type.value}，不匹配')
+                    continue
+                # 比对种子在站点中的类型
+                if torrent.category == MediaType.TV.value and mediainfo.type != MediaType.TV:
+                    logger.warn(f'{torrent.site_name} - {torrent.title} 种子在站点中归类为 {torrent.category}，'
+                                f'需要是 {mediainfo.type.value}，不匹配')
                    continue
                # 比对年份
                if mediainfo.year:
@@ -231,21 +214,55 @@ class SearchChain(ChainBase):
                        break
                else:
                    logger.warn(f'{torrent.site_name} - {torrent.title} 标题不匹配')
-            self.progress.update(value=100,
+            logger.info(f"匹配完成，共匹配到 {len(_match_torrents)} 个资源")
+            self.progress.update(value=97,
                                 text=f'匹配完成，共匹配到 {len(_match_torrents)} 个资源',
                                 key=ProgressKey.Search)
-            self.progress.end(ProgressKey.Search)
        else:
            _match_torrents = torrents
-        logger.info(f"匹配完成，共匹配到 {len(_match_torrents)} 个资源")
+        # 开始过滤
+        self.progress.update(value=98, text=f'开始过滤，总 {len(_match_torrents)} 个资源，请稍候...',
+                             key=ProgressKey.Search)
+        # 过滤种子
+        if priority_rule is None:
+            # 取搜索优先级规则
+            priority_rule = self.systemconfig.get(SystemConfigKey.SearchFilterRules)
+        if priority_rule:
+            logger.info(f'开始优先级规则过滤，当前规则：{priority_rule} ...')
+            result: List[TorrentInfo] = self.filter_torrents(rule_string=priority_rule,
+                                                             torrent_list=_match_torrents,
+                                                             season_episodes=season_episodes,
+                                                             mediainfo=mediainfo)
+            if result is not None:
+                _match_torrents = result
+            if not _match_torrents:
+                logger.warn(f'{keyword or mediainfo.title} 没有符合优先级规则的资源')
+                return []
+        # 使用过滤规则再次过滤
+        if filter_rule:
+            logger.info(f'开始过滤规则过滤，当前规则：{filter_rule} ...')
+            _match_torrents = self.filter_torrents_by_rule(torrents=_match_torrents,
+                                                           mediainfo=mediainfo,
+                                                           filter_rule=filter_rule)
+            if not _match_torrents:
+                logger.warn(f'{keyword or mediainfo.title} 没有符合过滤规则的资源')
+                return []
        # 去掉mediainfo中多余的数据
        mediainfo.clear()
        # 组装上下文
        contexts = [Context(meta_info=MetaInfo(title=torrent.title, subtitle=torrent.description),
                            media_info=mediainfo,
                            torrent_info=torrent) for torrent in _match_torrents]
+
+        logger.info(f"过滤完成，剩余 {_total} 个资源")
+        self.progress.update(value=99, text=f'过滤完成，剩余 {_total} 个资源', key=ProgressKey.Search)
        # 排序
+        self.progress.update(value=100,
+                             text=f'正在对 {len(contexts)} 个资源进行排序，请稍候...',
+                             key=ProgressKey.Search)
        contexts = self.torrenthelper.sort_torrents(contexts)
+        # 结束进度
+        self.progress.end(ProgressKey.Search)
        # 返回
        return contexts

--- a/app/chain/subscribe.py
+++ b/app/chain/subscribe.py
@@ -566,12 +566,14 @@ class SubscribeChain(ChainBase):
                        if torrent_mediainfo.douban_id \
                                and torrent_mediainfo.douban_id != mediainfo.douban_id:
                            continue
-                        logger.info(f'{mediainfo.title_year} 通过媒体信息匹配到资源：{torrent_info.site_name} - {torrent_info.title}')
+                        logger.info(f'{mediainfo.title_year} 通过媒体信ID匹配到资源：{torrent_info.site_name} - {torrent_info.title}')
                    else:
                        # 按标题匹配
-                        # 比对类型
-                        if (torrent_meta.type == MediaType.TV and mediainfo.type != MediaType.TV) \
-                                or (torrent_meta.type != MediaType.TV and mediainfo.type == MediaType.TV):
+                        # 比对种子识别类型
+                        if torrent_meta.type == MediaType.TV and mediainfo.type != MediaType.TV:
+                            continue
+                        # 比对种子在站点中的类型
+                        if torrent_info.category == MediaType.TV.value and mediainfo.type != MediaType.TV:
                            continue
                        # 比对年份
                        if mediainfo.year:
--- a/app/chain/torrents.py
+++ b/app/chain/torrents.py
@@ -16,7 +16,7 @@ from app.helper.sites import SitesHelper
 from app.helper.torrent import TorrentHelper
 from app.log import logger
 from app.schemas import Notification
-from app.schemas.types import SystemConfigKey, MessageChannel, NotificationType
+from app.schemas.types import SystemConfigKey, MessageChannel, NotificationType, MediaType
 from app.utils.singleton import Singleton
 from app.utils.string import StringUtils

@@ -184,6 +184,10 @@ class TorrentsChain(ChainBase, metaclass=Singleton):
                    logger.info(f'处理资源：{torrent.title} ...')
                    # 识别
                    meta = MetaInfo(title=torrent.title, subtitle=torrent.description)
+                    # 使用站点种子分类，校正类型识别
+                    if meta.type != MediaType.TV \
+                            and torrent.category == MediaType.TV.value:
+                        meta.type = MediaType.TV
                    # 识别媒体信息
                    mediainfo: MediaInfo = self.mediachain.recognize_by_meta(meta)
                    if not mediainfo: