From 1d3754de3a0897c35927726509d1c6aaa1193d10 Mon Sep 17 00:00:00 2001
From: jxxghp <jxxghp@gmail.com>
Date: Sun, 11 Jun 2023 07:45:12 +0800
Subject: [PATCH] fix tmdb match

---
 app/chain/search.py            |  20 +++--
 app/core/context.py            |   5 ++
 app/modules/themoviedb/tmdb.py | 148 +++++++++++++++------------------
 app/utils/string.py            |   7 +-
 4 files changed, 89 insertions(+), 91 deletions(-)

diff --git a/app/chain/search.py b/app/chain/search.py
index ce67a192..ad6a3326 100644
--- a/app/chain/search.py
+++ b/app/chain/search.py
@@ -7,6 +7,7 @@ from app.core.meta import MetaBase
 from app.core.metainfo import MetaInfo
 from app.helper.sites import SitesHelper
 from app.log import logger
+from app.utils.string import StringUtils
 
 
 class SearchChain(ChainBase):
@@ -70,20 +71,23 @@ class SearchChain(ChainBase):
                 if torrent.imdbid \
                         and mediainfo.imdb_id \
                         and torrent.imdbid == mediainfo.imdb_id:
+                    logger.info(f'{mediainfo.title} 匹配到资源：{torrent.title}')
                     _match_torrents.append(torrent)
                     continue
                 # 识别
                 torrent_meta = MetaInfo(torrent.title, torrent.description)
-                # 识别媒体信息
-                torrent_mediainfo: MediaInfo = self.recognize_media(meta=torrent_meta)
-                if not torrent_mediainfo:
-                    logger.warn(f'未识别到媒体信息，标题：{torrent.title}')
-                    continue
-                # 过滤
-                if torrent_mediainfo.tmdb_id == mediainfo.tmdb_id \
-                        and torrent_mediainfo.type == mediainfo.type:
+                # 比对标题
+                if torrent_meta.get_name() in [mediainfo.title, mediainfo.original_title]:
                     logger.info(f'{mediainfo.title} 匹配到资源：{torrent.title}')
                     _match_torrents.append(torrent)
+                    continue
+                # 比对别名和译名
+                for name in mediainfo.names:
+                    if StringUtils.clear(name).strip().upper() == \
+                            StringUtils.clear(torrent_meta.get_name()).strip().upper():
+                        logger.info(f'{mediainfo.title} 匹配到资源：{torrent.title}')
+                        _match_torrents.append(torrent)
+                        break
         else:
             _match_torrents = torrents
         # 组装上下文返回
diff --git a/app/core/context.py b/app/core/context.py
index a3860cc4..596f6f5e 100644
--- a/app/core/context.py
+++ b/app/core/context.py
@@ -118,6 +118,8 @@ class MediaInfo:
     vote_average: int = 0
     # 描述
     overview: Optional[str] = None
+    # 所有别名和译名
+    names: Optional[list] = []
     # 各季的剧集清单信息
     seasons: Optional[dict] = {}
     # 二级分类
@@ -134,6 +136,7 @@ class MediaInfo:
     def __init__(self, tmdb_info: dict = None, douban_info: dict = None):
         # 初始化
         self.seasons = {}
+        self.names = []
         self.directors = []
         self.actors = []
         self.tmdb_info = {}
@@ -276,6 +279,8 @@ class MediaInfo:
             self.backdrop_path = f"https://{settings.TMDB_IMAGE_DOMAIN}/t/p/original{info.get('backdrop_path')}"
         # 导演和演员
         self.directors, self.actors = __directors_actors(info)
+        # 别名和译名
+        self.names = info.get('names')
 
     def set_douban_info(self, info: dict):
         """
diff --git a/app/modules/themoviedb/tmdb.py b/app/modules/themoviedb/tmdb.py
index f26d857d..b10c53ef 100644
--- a/app/modules/themoviedb/tmdb.py
+++ b/app/modules/themoviedb/tmdb.py
@@ -1,6 +1,6 @@
 import traceback
 from functools import lru_cache
-from typing import Optional, Tuple, List
+from typing import Optional, List
 
 import zhconv
 from lxml import etree
@@ -108,27 +108,24 @@ class TmdbHelper:
             return False
         if not isinstance(tmdb_names, list):
             tmdb_names = [tmdb_names]
-        file_name = StringUtils.clear_special_chars(file_name).upper()
+        file_name = StringUtils.clear(file_name).upper()
         for tmdb_name in tmdb_names:
-            tmdb_name = StringUtils.clear_special_chars(tmdb_name).strip().upper()
+            tmdb_name = StringUtils.clear(tmdb_name).strip().upper()
             if file_name == tmdb_name:
                 return True
         return False
 
-    def __get_names(self, mtype: MediaType, tmdb_id: int) -> Tuple[Optional[dict], List[str]]:
+    @staticmethod
+    def __get_names(tmdb_info: dict) -> List[str]:
         """
         搜索tmdb中所有的标题和译名，用于名称匹配
-        :param mtype: 类型：电影、电视剧、动漫
-        :param tmdb_id: TMDB的ID
+        :param tmdb_info: TMDB信息
         :return: 所有译名的清单
         """
-        if not mtype or not tmdb_id:
-            return {}, []
-        ret_names = []
-        tmdb_info = self.get_info(mtype=mtype, tmdbid=tmdb_id)
         if not tmdb_info:
-            return tmdb_info, []
-        if mtype == MediaType.MOVIE:
+            return []
+        ret_names = []
+        if tmdb_info.get('media_type') == MediaType.MOVIE:
             alternative_titles = tmdb_info.get("alternative_titles", {}).get("titles", [])
             for alternative_title in alternative_titles:
                 title = alternative_title.get("title")
@@ -150,7 +147,7 @@ class TmdbHelper:
                 name = translation.get("data", {}).get("name")
                 if name and name not in ret_names:
                     ret_names.append(name)
-        return tmdb_info, ret_names
+        return ret_names
 
     def match(self, name: str,
               mtype: MediaType,
@@ -239,7 +236,7 @@ class TmdbHelper:
             logger.debug(f"{name} 未找到相关电影信息!")
             return {}
         else:
-            info = {}
+            # 匹配标题、原标题
             if year:
                 for movie in movies:
                     if movie.get('release_date'):
@@ -254,25 +251,22 @@ class TmdbHelper:
                     if self.__compare_names(name, movie.get('title')) \
                             or self.__compare_names(name, movie.get('original_title')):
                         return movie
-            if not info:
-                index = 0
-                for movie in movies:
-                    if year:
-                        if not movie.get('release_date'):
-                            continue
-                        if movie.get('release_date')[0:4] != str(year):
-                            continue
-                        index += 1
-                        info, names = self.__get_names(MediaType.MOVIE, movie.get("id"))
-                        if self.__compare_names(name, names):
-                            return info
-                    else:
-                        index += 1
-                        info, names = self.__get_names(MediaType.MOVIE, movie.get("id"))
-                        if self.__compare_names(name, names):
-                            return info
-                    if index > 5:
-                        break
+            # 匹配别名、译名
+            index = 0
+            for movie in movies:
+                # 有年份先过滤
+                if year:
+                    if not movie.get('release_date'):
+                        continue
+                    if movie.get('release_date')[0:4] != str(year):
+                        continue
+                index += 1
+                if not movie.get("names"):
+                    movie = self.get_info(MediaType.MOVIE, movie.get("id"))
+                if movie and self.__compare_names(name, movie.get("names")):
+                    return movie
+                if index > 5:
+                    break
         return {}
 
     def __search_tv_by_name(self, name: str, year: str) -> Optional[dict]:
@@ -299,7 +293,7 @@ class TmdbHelper:
             logger.debug(f"{name} 未找到相关剧集信息!")
             return {}
         else:
-            info = {}
+            # 匹配标题、原标题
             if year:
                 for tv in tvs:
                     if tv.get('first_air_date'):
@@ -314,25 +308,22 @@ class TmdbHelper:
                     if self.__compare_names(name, tv.get('name')) \
                             or self.__compare_names(name, tv.get('original_name')):
                         return tv
-            if not info:
-                index = 0
-                for tv in tvs:
-                    if year:
-                        if not tv.get('first_air_date'):
-                            continue
-                        if tv.get('first_air_date')[0:4] != str(year):
-                            continue
-                        index += 1
-                        info, names = self.__get_names(MediaType.TV, tv.get("id"))
-                        if self.__compare_names(name, names):
-                            return info
-                    else:
-                        index += 1
-                        info, names = self.__get_names(MediaType.TV, tv.get("id"))
-                        if self.__compare_names(name, names):
-                            return info
-                    if index > 5:
-                        break
+            # 匹配别名、译名
+            index = 0
+            for tv in tvs:
+                # 有年份先过滤
+                if year:
+                    if not tv.get('first_air_date'):
+                        continue
+                    if tv.get('first_air_date')[0:4] != str(year):
+                        continue
+                index += 1
+                if not tv.get("names"):
+                    tv = self.get_info(MediaType.TV, tv.get("id"))
+                if self.__compare_names(name, tv.get("names")):
+                    return tv
+                if index > 5:
+                    break
         return {}
 
     def __search_tv_by_season(self, name: str, season_year: str, season_number: int) -> Optional[dict]:
@@ -374,18 +365,20 @@ class TmdbHelper:
             logger.debug("%s 未找到季%s相关信息!" % (name, season_number))
             return {}
         else:
+            # 匹配标题、原标题
             for tv in tvs:
                 if (self.__compare_names(name, tv.get('name'))
                     or self.__compare_names(name, tv.get('original_name'))) \
                         and (tv.get('first_air_date') and tv.get('first_air_date')[0:4] == str(season_year)):
                     return tv
-
+            # 匹配别名、译名
             for tv in tvs[:5]:
-                info, names = self.__get_names(MediaType.TV, tv.get("id"))
-                if not self.__compare_names(name, names):
+                if not tv.get("names"):
+                    tv = self.get_info(MediaType.TV, tv.get("id"))
+                if not self.__compare_names(name, tv.get("names")):
                     continue
-                if __season_match(tv_info=info, _season_year=season_year):
-                    return info
+                if __season_match(tv_info=tv, _season_year=season_year):
+                    return tv
         return {}
 
     @staticmethod
@@ -444,35 +437,30 @@ class TmdbHelper:
         logger.debug(f"API返回：{str(self.search.total_results)}")
         if len(multis) == 0:
             logger.debug(f"{name} 未找到相关媒体息!")
-            return {}
         else:
-            info = {}
+            # 匹配标题、原标题
             for multi in multis:
                 if multi.get("media_type") == "movie":
                     if self.__compare_names(name, multi.get('title')) \
                             or self.__compare_names(name, multi.get('original_title')):
-                        info = multi
+                        return multi
                 elif multi.get("media_type") == "tv":
                     if self.__compare_names(name, multi.get('name')) \
                             or self.__compare_names(name, multi.get('original_name')):
-                        info = multi
-            if not info:
-                for multi in multis[:5]:
-                    if multi.get("media_type") == "movie":
-                        movie_info, names = self.__get_names(MediaType.MOVIE, multi.get("id"))
-                        if self.__compare_names(name, names):
-                            info = movie_info
-                    elif multi.get("media_type") == "tv":
-                        tv_info, names = self.__get_names(MediaType.TV, multi.get("id"))
-                        if self.__compare_names(name, names):
-                            info = tv_info
-        # 返回
-        if info:
-            info['media_type'] = MediaType.MOVIE if info.get('media_type') in ['movie',
-                                                                               MediaType.MOVIE] else MediaType.TV
-        else:
-            logger.info("%s 在TMDB中未找到媒体信息!" % name)
-        return info
+                        return multi
+            # 匹配别名、译名
+            for multi in multis[:5]:
+                if multi.get("media_type") == "movie":
+                    if not multi.get("names"):
+                        multi = self.get_info(MediaType.MOVIE, multi.get("id"))
+                    if self.__compare_names(name, multi.get("names")):
+                        return multi
+                elif multi.get("media_type") == "tv":
+                    if not multi.get("names"):
+                        multi = self.get_info(MediaType.TV, multi.get("id"))
+                    if self.__compare_names(name, multi.get("names")):
+                        return multi
+        return {}
 
     @lru_cache(maxsize=128)
     def search_web(self, name: str, mtype: MediaType) -> Optional[dict]:
@@ -565,6 +553,8 @@ class TmdbHelper:
         if tmdb_info:
             # 转换genreid
             tmdb_info['genre_ids'] = __get_genre_ids(tmdb_info.get('genres'))
+            # 别名和译名
+            tmdb_info['names'] = self.__get_names(tmdb_info)
             # 转换中文标题
             self.__update_tmdbinfo_cn_title(tmdb_info)
 
diff --git a/app/utils/string.py b/app/utils/string.py
index f5c7ae14..932ab862 100644
--- a/app/utils/string.py
+++ b/app/utils/string.py
@@ -148,8 +148,8 @@ class StringUtils:
         return 0.0
 
     @staticmethod
-    def clear_special_chars(text: Union[list, str], replace_word: str = "",
-                            allow_space: bool = False) -> Union[list, str]:
+    def clear(text: Union[list, str], replace_word: str = "",
+              allow_space: bool = False) -> Union[list, str]:
         """
         忽略特殊字符
         """
@@ -167,7 +167,7 @@ class StringUtils:
             else:
                 return re.sub(r"\s+", " ", text).strip()
         else:
-            return [StringUtils.clear_special_chars(x) for x in text]
+            return [StringUtils.clear(x) for x in text]
 
     @staticmethod
     def str_filesize(size: Union[str, float, int], pre: int = 2) -> str:
@@ -552,4 +552,3 @@ class StringUtils:
             # 端口号不是整数，返回 None 表示无效
             return None, None
         return domain, port
-