From 1d3754de3a0897c35927726509d1c6aaa1193d10 Mon Sep 17 00:00:00 2001 From: jxxghp Date: Sun, 11 Jun 2023 07:45:12 +0800 Subject: [PATCH] fix tmdb match --- app/chain/search.py | 20 +++-- app/core/context.py | 5 ++ app/modules/themoviedb/tmdb.py | 148 +++++++++++++++------------------ app/utils/string.py | 7 +- 4 files changed, 89 insertions(+), 91 deletions(-) diff --git a/app/chain/search.py b/app/chain/search.py index ce67a192..ad6a3326 100644 --- a/app/chain/search.py +++ b/app/chain/search.py @@ -7,6 +7,7 @@ from app.core.meta import MetaBase from app.core.metainfo import MetaInfo from app.helper.sites import SitesHelper from app.log import logger +from app.utils.string import StringUtils class SearchChain(ChainBase): @@ -70,20 +71,23 @@ class SearchChain(ChainBase): if torrent.imdbid \ and mediainfo.imdb_id \ and torrent.imdbid == mediainfo.imdb_id: + logger.info(f'{mediainfo.title} 匹配到资源:{torrent.title}') _match_torrents.append(torrent) continue # 识别 torrent_meta = MetaInfo(torrent.title, torrent.description) - # 识别媒体信息 - torrent_mediainfo: MediaInfo = self.recognize_media(meta=torrent_meta) - if not torrent_mediainfo: - logger.warn(f'未识别到媒体信息,标题:{torrent.title}') - continue - # 过滤 - if torrent_mediainfo.tmdb_id == mediainfo.tmdb_id \ - and torrent_mediainfo.type == mediainfo.type: + # 比对标题 + if torrent_meta.get_name() in [mediainfo.title, mediainfo.original_title]: logger.info(f'{mediainfo.title} 匹配到资源:{torrent.title}') _match_torrents.append(torrent) + continue + # 比对别名和译名 + for name in mediainfo.names: + if StringUtils.clear(name).strip().upper() == \ + StringUtils.clear(torrent_meta.get_name()).strip().upper(): + logger.info(f'{mediainfo.title} 匹配到资源:{torrent.title}') + _match_torrents.append(torrent) + break else: _match_torrents = torrents # 组装上下文返回 diff --git a/app/core/context.py b/app/core/context.py index a3860cc4..596f6f5e 100644 --- a/app/core/context.py +++ b/app/core/context.py @@ -118,6 +118,8 @@ class MediaInfo: vote_average: int = 0 # 描述 overview: Optional[str] = None + # 所有别名和译名 + names: Optional[list] = [] # 各季的剧集清单信息 seasons: Optional[dict] = {} # 二级分类 @@ -134,6 +136,7 @@ class MediaInfo: def __init__(self, tmdb_info: dict = None, douban_info: dict = None): # 初始化 self.seasons = {} + self.names = [] self.directors = [] self.actors = [] self.tmdb_info = {} @@ -276,6 +279,8 @@ class MediaInfo: self.backdrop_path = f"https://{settings.TMDB_IMAGE_DOMAIN}/t/p/original{info.get('backdrop_path')}" # 导演和演员 self.directors, self.actors = __directors_actors(info) + # 别名和译名 + self.names = info.get('names') def set_douban_info(self, info: dict): """ diff --git a/app/modules/themoviedb/tmdb.py b/app/modules/themoviedb/tmdb.py index f26d857d..b10c53ef 100644 --- a/app/modules/themoviedb/tmdb.py +++ b/app/modules/themoviedb/tmdb.py @@ -1,6 +1,6 @@ import traceback from functools import lru_cache -from typing import Optional, Tuple, List +from typing import Optional, List import zhconv from lxml import etree @@ -108,27 +108,24 @@ class TmdbHelper: return False if not isinstance(tmdb_names, list): tmdb_names = [tmdb_names] - file_name = StringUtils.clear_special_chars(file_name).upper() + file_name = StringUtils.clear(file_name).upper() for tmdb_name in tmdb_names: - tmdb_name = StringUtils.clear_special_chars(tmdb_name).strip().upper() + tmdb_name = StringUtils.clear(tmdb_name).strip().upper() if file_name == tmdb_name: return True return False - def __get_names(self, mtype: MediaType, tmdb_id: int) -> Tuple[Optional[dict], List[str]]: + @staticmethod + def __get_names(tmdb_info: dict) -> List[str]: """ 搜索tmdb中所有的标题和译名,用于名称匹配 - :param mtype: 类型:电影、电视剧、动漫 - :param tmdb_id: TMDB的ID + :param tmdb_info: TMDB信息 :return: 所有译名的清单 """ - if not mtype or not tmdb_id: - return {}, [] - ret_names = [] - tmdb_info = self.get_info(mtype=mtype, tmdbid=tmdb_id) if not tmdb_info: - return tmdb_info, [] - if mtype == MediaType.MOVIE: + return [] + ret_names = [] + if tmdb_info.get('media_type') == MediaType.MOVIE: alternative_titles = tmdb_info.get("alternative_titles", {}).get("titles", []) for alternative_title in alternative_titles: title = alternative_title.get("title") @@ -150,7 +147,7 @@ class TmdbHelper: name = translation.get("data", {}).get("name") if name and name not in ret_names: ret_names.append(name) - return tmdb_info, ret_names + return ret_names def match(self, name: str, mtype: MediaType, @@ -239,7 +236,7 @@ class TmdbHelper: logger.debug(f"{name} 未找到相关电影信息!") return {} else: - info = {} + # 匹配标题、原标题 if year: for movie in movies: if movie.get('release_date'): @@ -254,25 +251,22 @@ class TmdbHelper: if self.__compare_names(name, movie.get('title')) \ or self.__compare_names(name, movie.get('original_title')): return movie - if not info: - index = 0 - for movie in movies: - if year: - if not movie.get('release_date'): - continue - if movie.get('release_date')[0:4] != str(year): - continue - index += 1 - info, names = self.__get_names(MediaType.MOVIE, movie.get("id")) - if self.__compare_names(name, names): - return info - else: - index += 1 - info, names = self.__get_names(MediaType.MOVIE, movie.get("id")) - if self.__compare_names(name, names): - return info - if index > 5: - break + # 匹配别名、译名 + index = 0 + for movie in movies: + # 有年份先过滤 + if year: + if not movie.get('release_date'): + continue + if movie.get('release_date')[0:4] != str(year): + continue + index += 1 + if not movie.get("names"): + movie = self.get_info(MediaType.MOVIE, movie.get("id")) + if movie and self.__compare_names(name, movie.get("names")): + return movie + if index > 5: + break return {} def __search_tv_by_name(self, name: str, year: str) -> Optional[dict]: @@ -299,7 +293,7 @@ class TmdbHelper: logger.debug(f"{name} 未找到相关剧集信息!") return {} else: - info = {} + # 匹配标题、原标题 if year: for tv in tvs: if tv.get('first_air_date'): @@ -314,25 +308,22 @@ class TmdbHelper: if self.__compare_names(name, tv.get('name')) \ or self.__compare_names(name, tv.get('original_name')): return tv - if not info: - index = 0 - for tv in tvs: - if year: - if not tv.get('first_air_date'): - continue - if tv.get('first_air_date')[0:4] != str(year): - continue - index += 1 - info, names = self.__get_names(MediaType.TV, tv.get("id")) - if self.__compare_names(name, names): - return info - else: - index += 1 - info, names = self.__get_names(MediaType.TV, tv.get("id")) - if self.__compare_names(name, names): - return info - if index > 5: - break + # 匹配别名、译名 + index = 0 + for tv in tvs: + # 有年份先过滤 + if year: + if not tv.get('first_air_date'): + continue + if tv.get('first_air_date')[0:4] != str(year): + continue + index += 1 + if not tv.get("names"): + tv = self.get_info(MediaType.TV, tv.get("id")) + if self.__compare_names(name, tv.get("names")): + return tv + if index > 5: + break return {} def __search_tv_by_season(self, name: str, season_year: str, season_number: int) -> Optional[dict]: @@ -374,18 +365,20 @@ class TmdbHelper: logger.debug("%s 未找到季%s相关信息!" % (name, season_number)) return {} else: + # 匹配标题、原标题 for tv in tvs: if (self.__compare_names(name, tv.get('name')) or self.__compare_names(name, tv.get('original_name'))) \ and (tv.get('first_air_date') and tv.get('first_air_date')[0:4] == str(season_year)): return tv - + # 匹配别名、译名 for tv in tvs[:5]: - info, names = self.__get_names(MediaType.TV, tv.get("id")) - if not self.__compare_names(name, names): + if not tv.get("names"): + tv = self.get_info(MediaType.TV, tv.get("id")) + if not self.__compare_names(name, tv.get("names")): continue - if __season_match(tv_info=info, _season_year=season_year): - return info + if __season_match(tv_info=tv, _season_year=season_year): + return tv return {} @staticmethod @@ -444,35 +437,30 @@ class TmdbHelper: logger.debug(f"API返回:{str(self.search.total_results)}") if len(multis) == 0: logger.debug(f"{name} 未找到相关媒体息!") - return {} else: - info = {} + # 匹配标题、原标题 for multi in multis: if multi.get("media_type") == "movie": if self.__compare_names(name, multi.get('title')) \ or self.__compare_names(name, multi.get('original_title')): - info = multi + return multi elif multi.get("media_type") == "tv": if self.__compare_names(name, multi.get('name')) \ or self.__compare_names(name, multi.get('original_name')): - info = multi - if not info: - for multi in multis[:5]: - if multi.get("media_type") == "movie": - movie_info, names = self.__get_names(MediaType.MOVIE, multi.get("id")) - if self.__compare_names(name, names): - info = movie_info - elif multi.get("media_type") == "tv": - tv_info, names = self.__get_names(MediaType.TV, multi.get("id")) - if self.__compare_names(name, names): - info = tv_info - # 返回 - if info: - info['media_type'] = MediaType.MOVIE if info.get('media_type') in ['movie', - MediaType.MOVIE] else MediaType.TV - else: - logger.info("%s 在TMDB中未找到媒体信息!" % name) - return info + return multi + # 匹配别名、译名 + for multi in multis[:5]: + if multi.get("media_type") == "movie": + if not multi.get("names"): + multi = self.get_info(MediaType.MOVIE, multi.get("id")) + if self.__compare_names(name, multi.get("names")): + return multi + elif multi.get("media_type") == "tv": + if not multi.get("names"): + multi = self.get_info(MediaType.TV, multi.get("id")) + if self.__compare_names(name, multi.get("names")): + return multi + return {} @lru_cache(maxsize=128) def search_web(self, name: str, mtype: MediaType) -> Optional[dict]: @@ -565,6 +553,8 @@ class TmdbHelper: if tmdb_info: # 转换genreid tmdb_info['genre_ids'] = __get_genre_ids(tmdb_info.get('genres')) + # 别名和译名 + tmdb_info['names'] = self.__get_names(tmdb_info) # 转换中文标题 self.__update_tmdbinfo_cn_title(tmdb_info) diff --git a/app/utils/string.py b/app/utils/string.py index f5c7ae14..932ab862 100644 --- a/app/utils/string.py +++ b/app/utils/string.py @@ -148,8 +148,8 @@ class StringUtils: return 0.0 @staticmethod - def clear_special_chars(text: Union[list, str], replace_word: str = "", - allow_space: bool = False) -> Union[list, str]: + def clear(text: Union[list, str], replace_word: str = "", + allow_space: bool = False) -> Union[list, str]: """ 忽略特殊字符 """ @@ -167,7 +167,7 @@ class StringUtils: else: return re.sub(r"\s+", " ", text).strip() else: - return [StringUtils.clear_special_chars(x) for x in text] + return [StringUtils.clear(x) for x in text] @staticmethod def str_filesize(size: Union[str, float, int], pre: int = 2) -> str: @@ -552,4 +552,3 @@ class StringUtils: # 端口号不是整数,返回 None 表示无效 return None, None return domain, port -