fix tmdb match

This commit is contained in:
jxxghp 2023-06-11 07:45:12 +08:00
parent 77c09bddcb
commit 1d3754de3a
4 changed files with 89 additions and 91 deletions

View File

@ -7,6 +7,7 @@ from app.core.meta import MetaBase
from app.core.metainfo import MetaInfo from app.core.metainfo import MetaInfo
from app.helper.sites import SitesHelper from app.helper.sites import SitesHelper
from app.log import logger from app.log import logger
from app.utils.string import StringUtils
class SearchChain(ChainBase): class SearchChain(ChainBase):
@ -70,20 +71,23 @@ class SearchChain(ChainBase):
if torrent.imdbid \ if torrent.imdbid \
and mediainfo.imdb_id \ and mediainfo.imdb_id \
and torrent.imdbid == mediainfo.imdb_id: and torrent.imdbid == mediainfo.imdb_id:
logger.info(f'{mediainfo.title} 匹配到资源:{torrent.title}')
_match_torrents.append(torrent) _match_torrents.append(torrent)
continue continue
# 识别 # 识别
torrent_meta = MetaInfo(torrent.title, torrent.description) torrent_meta = MetaInfo(torrent.title, torrent.description)
# 识别媒体信息 # 比对标题
torrent_mediainfo: MediaInfo = self.recognize_media(meta=torrent_meta) if torrent_meta.get_name() in [mediainfo.title, mediainfo.original_title]:
if not torrent_mediainfo:
logger.warn(f'未识别到媒体信息,标题:{torrent.title}')
continue
# 过滤
if torrent_mediainfo.tmdb_id == mediainfo.tmdb_id \
and torrent_mediainfo.type == mediainfo.type:
logger.info(f'{mediainfo.title} 匹配到资源:{torrent.title}') logger.info(f'{mediainfo.title} 匹配到资源:{torrent.title}')
_match_torrents.append(torrent) _match_torrents.append(torrent)
continue
# 比对别名和译名
for name in mediainfo.names:
if StringUtils.clear(name).strip().upper() == \
StringUtils.clear(torrent_meta.get_name()).strip().upper():
logger.info(f'{mediainfo.title} 匹配到资源:{torrent.title}')
_match_torrents.append(torrent)
break
else: else:
_match_torrents = torrents _match_torrents = torrents
# 组装上下文返回 # 组装上下文返回

View File

@ -118,6 +118,8 @@ class MediaInfo:
vote_average: int = 0 vote_average: int = 0
# 描述 # 描述
overview: Optional[str] = None overview: Optional[str] = None
# 所有别名和译名
names: Optional[list] = []
# 各季的剧集清单信息 # 各季的剧集清单信息
seasons: Optional[dict] = {} seasons: Optional[dict] = {}
# 二级分类 # 二级分类
@ -134,6 +136,7 @@ class MediaInfo:
def __init__(self, tmdb_info: dict = None, douban_info: dict = None): def __init__(self, tmdb_info: dict = None, douban_info: dict = None):
# 初始化 # 初始化
self.seasons = {} self.seasons = {}
self.names = []
self.directors = [] self.directors = []
self.actors = [] self.actors = []
self.tmdb_info = {} self.tmdb_info = {}
@ -276,6 +279,8 @@ class MediaInfo:
self.backdrop_path = f"https://{settings.TMDB_IMAGE_DOMAIN}/t/p/original{info.get('backdrop_path')}" self.backdrop_path = f"https://{settings.TMDB_IMAGE_DOMAIN}/t/p/original{info.get('backdrop_path')}"
# 导演和演员 # 导演和演员
self.directors, self.actors = __directors_actors(info) self.directors, self.actors = __directors_actors(info)
# 别名和译名
self.names = info.get('names')
def set_douban_info(self, info: dict): def set_douban_info(self, info: dict):
""" """

View File

@ -1,6 +1,6 @@
import traceback import traceback
from functools import lru_cache from functools import lru_cache
from typing import Optional, Tuple, List from typing import Optional, List
import zhconv import zhconv
from lxml import etree from lxml import etree
@ -108,27 +108,24 @@ class TmdbHelper:
return False return False
if not isinstance(tmdb_names, list): if not isinstance(tmdb_names, list):
tmdb_names = [tmdb_names] tmdb_names = [tmdb_names]
file_name = StringUtils.clear_special_chars(file_name).upper() file_name = StringUtils.clear(file_name).upper()
for tmdb_name in tmdb_names: for tmdb_name in tmdb_names:
tmdb_name = StringUtils.clear_special_chars(tmdb_name).strip().upper() tmdb_name = StringUtils.clear(tmdb_name).strip().upper()
if file_name == tmdb_name: if file_name == tmdb_name:
return True return True
return False return False
def __get_names(self, mtype: MediaType, tmdb_id: int) -> Tuple[Optional[dict], List[str]]: @staticmethod
def __get_names(tmdb_info: dict) -> List[str]:
""" """
搜索tmdb中所有的标题和译名用于名称匹配 搜索tmdb中所有的标题和译名用于名称匹配
:param mtype: 类型电影电视剧动漫 :param tmdb_info: TMDB信息
:param tmdb_id: TMDB的ID
:return: 所有译名的清单 :return: 所有译名的清单
""" """
if not mtype or not tmdb_id:
return {}, []
ret_names = []
tmdb_info = self.get_info(mtype=mtype, tmdbid=tmdb_id)
if not tmdb_info: if not tmdb_info:
return tmdb_info, [] return []
if mtype == MediaType.MOVIE: ret_names = []
if tmdb_info.get('media_type') == MediaType.MOVIE:
alternative_titles = tmdb_info.get("alternative_titles", {}).get("titles", []) alternative_titles = tmdb_info.get("alternative_titles", {}).get("titles", [])
for alternative_title in alternative_titles: for alternative_title in alternative_titles:
title = alternative_title.get("title") title = alternative_title.get("title")
@ -150,7 +147,7 @@ class TmdbHelper:
name = translation.get("data", {}).get("name") name = translation.get("data", {}).get("name")
if name and name not in ret_names: if name and name not in ret_names:
ret_names.append(name) ret_names.append(name)
return tmdb_info, ret_names return ret_names
def match(self, name: str, def match(self, name: str,
mtype: MediaType, mtype: MediaType,
@ -239,7 +236,7 @@ class TmdbHelper:
logger.debug(f"{name} 未找到相关电影信息!") logger.debug(f"{name} 未找到相关电影信息!")
return {} return {}
else: else:
info = {} # 匹配标题、原标题
if year: if year:
for movie in movies: for movie in movies:
if movie.get('release_date'): if movie.get('release_date'):
@ -254,25 +251,22 @@ class TmdbHelper:
if self.__compare_names(name, movie.get('title')) \ if self.__compare_names(name, movie.get('title')) \
or self.__compare_names(name, movie.get('original_title')): or self.__compare_names(name, movie.get('original_title')):
return movie return movie
if not info: # 匹配别名、译名
index = 0 index = 0
for movie in movies: for movie in movies:
if year: # 有年份先过滤
if not movie.get('release_date'): if year:
continue if not movie.get('release_date'):
if movie.get('release_date')[0:4] != str(year): continue
continue if movie.get('release_date')[0:4] != str(year):
index += 1 continue
info, names = self.__get_names(MediaType.MOVIE, movie.get("id")) index += 1
if self.__compare_names(name, names): if not movie.get("names"):
return info movie = self.get_info(MediaType.MOVIE, movie.get("id"))
else: if movie and self.__compare_names(name, movie.get("names")):
index += 1 return movie
info, names = self.__get_names(MediaType.MOVIE, movie.get("id")) if index > 5:
if self.__compare_names(name, names): break
return info
if index > 5:
break
return {} return {}
def __search_tv_by_name(self, name: str, year: str) -> Optional[dict]: def __search_tv_by_name(self, name: str, year: str) -> Optional[dict]:
@ -299,7 +293,7 @@ class TmdbHelper:
logger.debug(f"{name} 未找到相关剧集信息!") logger.debug(f"{name} 未找到相关剧集信息!")
return {} return {}
else: else:
info = {} # 匹配标题、原标题
if year: if year:
for tv in tvs: for tv in tvs:
if tv.get('first_air_date'): if tv.get('first_air_date'):
@ -314,25 +308,22 @@ class TmdbHelper:
if self.__compare_names(name, tv.get('name')) \ if self.__compare_names(name, tv.get('name')) \
or self.__compare_names(name, tv.get('original_name')): or self.__compare_names(name, tv.get('original_name')):
return tv return tv
if not info: # 匹配别名、译名
index = 0 index = 0
for tv in tvs: for tv in tvs:
if year: # 有年份先过滤
if not tv.get('first_air_date'): if year:
continue if not tv.get('first_air_date'):
if tv.get('first_air_date')[0:4] != str(year): continue
continue if tv.get('first_air_date')[0:4] != str(year):
index += 1 continue
info, names = self.__get_names(MediaType.TV, tv.get("id")) index += 1
if self.__compare_names(name, names): if not tv.get("names"):
return info tv = self.get_info(MediaType.TV, tv.get("id"))
else: if self.__compare_names(name, tv.get("names")):
index += 1 return tv
info, names = self.__get_names(MediaType.TV, tv.get("id")) if index > 5:
if self.__compare_names(name, names): break
return info
if index > 5:
break
return {} return {}
def __search_tv_by_season(self, name: str, season_year: str, season_number: int) -> Optional[dict]: def __search_tv_by_season(self, name: str, season_year: str, season_number: int) -> Optional[dict]:
@ -374,18 +365,20 @@ class TmdbHelper:
logger.debug("%s 未找到季%s相关信息!" % (name, season_number)) logger.debug("%s 未找到季%s相关信息!" % (name, season_number))
return {} return {}
else: else:
# 匹配标题、原标题
for tv in tvs: for tv in tvs:
if (self.__compare_names(name, tv.get('name')) if (self.__compare_names(name, tv.get('name'))
or self.__compare_names(name, tv.get('original_name'))) \ or self.__compare_names(name, tv.get('original_name'))) \
and (tv.get('first_air_date') and tv.get('first_air_date')[0:4] == str(season_year)): and (tv.get('first_air_date') and tv.get('first_air_date')[0:4] == str(season_year)):
return tv return tv
# 匹配别名、译名
for tv in tvs[:5]: for tv in tvs[:5]:
info, names = self.__get_names(MediaType.TV, tv.get("id")) if not tv.get("names"):
if not self.__compare_names(name, names): tv = self.get_info(MediaType.TV, tv.get("id"))
if not self.__compare_names(name, tv.get("names")):
continue continue
if __season_match(tv_info=info, _season_year=season_year): if __season_match(tv_info=tv, _season_year=season_year):
return info return tv
return {} return {}
@staticmethod @staticmethod
@ -444,35 +437,30 @@ class TmdbHelper:
logger.debug(f"API返回{str(self.search.total_results)}") logger.debug(f"API返回{str(self.search.total_results)}")
if len(multis) == 0: if len(multis) == 0:
logger.debug(f"{name} 未找到相关媒体息!") logger.debug(f"{name} 未找到相关媒体息!")
return {}
else: else:
info = {} # 匹配标题、原标题
for multi in multis: for multi in multis:
if multi.get("media_type") == "movie": if multi.get("media_type") == "movie":
if self.__compare_names(name, multi.get('title')) \ if self.__compare_names(name, multi.get('title')) \
or self.__compare_names(name, multi.get('original_title')): or self.__compare_names(name, multi.get('original_title')):
info = multi return multi
elif multi.get("media_type") == "tv": elif multi.get("media_type") == "tv":
if self.__compare_names(name, multi.get('name')) \ if self.__compare_names(name, multi.get('name')) \
or self.__compare_names(name, multi.get('original_name')): or self.__compare_names(name, multi.get('original_name')):
info = multi return multi
if not info: # 匹配别名、译名
for multi in multis[:5]: for multi in multis[:5]:
if multi.get("media_type") == "movie": if multi.get("media_type") == "movie":
movie_info, names = self.__get_names(MediaType.MOVIE, multi.get("id")) if not multi.get("names"):
if self.__compare_names(name, names): multi = self.get_info(MediaType.MOVIE, multi.get("id"))
info = movie_info if self.__compare_names(name, multi.get("names")):
elif multi.get("media_type") == "tv": return multi
tv_info, names = self.__get_names(MediaType.TV, multi.get("id")) elif multi.get("media_type") == "tv":
if self.__compare_names(name, names): if not multi.get("names"):
info = tv_info multi = self.get_info(MediaType.TV, multi.get("id"))
# 返回 if self.__compare_names(name, multi.get("names")):
if info: return multi
info['media_type'] = MediaType.MOVIE if info.get('media_type') in ['movie', return {}
MediaType.MOVIE] else MediaType.TV
else:
logger.info("%s 在TMDB中未找到媒体信息!" % name)
return info
@lru_cache(maxsize=128) @lru_cache(maxsize=128)
def search_web(self, name: str, mtype: MediaType) -> Optional[dict]: def search_web(self, name: str, mtype: MediaType) -> Optional[dict]:
@ -565,6 +553,8 @@ class TmdbHelper:
if tmdb_info: if tmdb_info:
# 转换genreid # 转换genreid
tmdb_info['genre_ids'] = __get_genre_ids(tmdb_info.get('genres')) tmdb_info['genre_ids'] = __get_genre_ids(tmdb_info.get('genres'))
# 别名和译名
tmdb_info['names'] = self.__get_names(tmdb_info)
# 转换中文标题 # 转换中文标题
self.__update_tmdbinfo_cn_title(tmdb_info) self.__update_tmdbinfo_cn_title(tmdb_info)

View File

@ -148,8 +148,8 @@ class StringUtils:
return 0.0 return 0.0
@staticmethod @staticmethod
def clear_special_chars(text: Union[list, str], replace_word: str = "", def clear(text: Union[list, str], replace_word: str = "",
allow_space: bool = False) -> Union[list, str]: allow_space: bool = False) -> Union[list, str]:
""" """
忽略特殊字符 忽略特殊字符
""" """
@ -167,7 +167,7 @@ class StringUtils:
else: else:
return re.sub(r"\s+", " ", text).strip() return re.sub(r"\s+", " ", text).strip()
else: else:
return [StringUtils.clear_special_chars(x) for x in text] return [StringUtils.clear(x) for x in text]
@staticmethod @staticmethod
def str_filesize(size: Union[str, float, int], pre: int = 2) -> str: def str_filesize(size: Union[str, float, int], pre: int = 2) -> str:
@ -552,4 +552,3 @@ class StringUtils:
# 端口号不是整数,返回 None 表示无效 # 端口号不是整数,返回 None 表示无效
return None, None return None, None
return domain, port return domain, port