This commit is contained in:
jxxghp
2023-06-06 07:15:17 +08:00
commit 4d06f86e62
217 changed files with 13959 additions and 0 deletions

View File

@ -0,0 +1,149 @@
from typing import Optional, List, Tuple, Union
from app.core import settings, MediaInfo
from app.core.meta import MetaBase
from app.modules import _ModuleBase
from app.modules.themoviedb.category import CategoryHelper
from app.modules.themoviedb.tmdb import TmdbHelper
from app.modules.themoviedb.tmdb_cache import TmdbCache
from app.utils.types import MediaType
class TheMovieDb(_ModuleBase):
"""
TMDB媒体信息匹配
"""
# 元数据缓存
cache: TmdbCache = None
# TMDB
tmdb: TmdbHelper = None
# 二级分类
category: CategoryHelper = None
def init_module(self) -> None:
self.cache = TmdbCache()
self.tmdb = TmdbHelper()
self.category = CategoryHelper()
def init_setting(self) -> Tuple[str, Union[str, bool]]:
pass
def recognize_media(self, meta: MetaBase,
tmdbid: str = None) -> Optional[MediaInfo]:
"""
识别媒体信息
:param meta: 识别的元数据
:param tmdbid: tmdbid
:return: 识别的媒体信息,包括剧集信息
"""
if not meta:
return None
cache_info = self.cache.get(meta)
if not cache_info:
# 缓存没有或者强制不使用缓存
if tmdbid:
# 直接查询详情
info = self.tmdb.get_tmdb_info(mtype=meta.type, tmdbid=tmdbid)
else:
if meta.type != MediaType.TV and not meta.year:
info = self.tmdb.search_multi_tmdb(meta.get_name())
else:
if meta.type == MediaType.TV:
# 确定是电视
info = self.tmdb.search_tmdb(name=meta.get_name(),
year=meta.year,
mtype=meta.type,
season_year=meta.year,
season_number=meta.begin_season
)
if meta.year:
# 非严格模式下去掉年份再查一次
info = self.tmdb.search_tmdb(name=meta.get_name(),
mtype=meta.type)
else:
# 有年份先按电影查
info = self.tmdb.search_tmdb(name=meta.get_name(),
year=meta.year,
mtype=MediaType.MOVIE)
# 没有再按电视剧查
if not info:
info = self.tmdb.search_tmdb(name=meta.get_name(),
year=meta.year,
mtype=MediaType.TV
)
if not info:
# 非严格模式下去掉年份和类型再查一次
info = self.tmdb.search_multi_tmdb(name=meta.get_name())
if not info:
# 从网站查询
info = self.tmdb.search_tmdb_web(name=meta.get_name(),
mtype=meta.type)
# 补充全量信息
if info and not info.get("genres"):
info = self.tmdb.get_tmdb_info(mtype=info.get("media_type"),
tmdbid=info.get("id"))
# 保存到缓存
self.cache.update(meta, info)
else:
# 使用缓存信息
if cache_info.get("title"):
info = self.tmdb.get_tmdb_info(mtype=cache_info.get("type"),
tmdbid=cache_info.get("id"))
else:
info = None
# 赋值TMDB信息并返回
mediainfo = MediaInfo(tmdb_info=info)
# 确定二级分类
if info:
if info.get('media_type') == MediaType.MOVIE:
cat = self.category.get_movie_category(info)
else:
cat = self.category.get_tv_category(info)
mediainfo.set_category(cat)
return mediainfo
def search_medias(self, meta: MetaBase) -> Optional[List[MediaInfo]]:
"""
搜索媒体信息
:param meta: 识别的元数据
:reutrn: 媒体信息
"""
# 未启用时返回None
if settings.SEARCH_SOURCE != "themoviedb":
return None
if not meta.get_name():
return []
if not meta.type and not meta.year:
results = self.tmdb.search_multi_tmdbinfos(meta.get_name())
else:
if not meta.type:
results = list(
set(self.tmdb.search_movie_tmdbinfos(meta.get_name(), meta.year))
.union(set(self.tmdb.search_tv_tmdbinfos(meta.get_name(), meta.year)))
)
# 组合结果的情况下要排序
results = sorted(
results,
key=lambda x: x.get("release_date") or x.get("first_air_date") or "0000-00-00",
reverse=True
)
elif meta.type == MediaType.MOVIE:
results = self.tmdb.search_movie_tmdbinfos(meta.get_name(), meta.year)
else:
results = self.tmdb.search_tv_tmdbinfos(meta.get_name(), meta.year)
return [MediaInfo(tmdb_info=info) for info in results]
def scrape_metadata(self, path: str, mediainfo: MediaInfo) -> None:
"""
TODO 刮削元数据
:param path: 媒体文件路径
:param mediainfo: 识别的媒体信息
:return: 成功或失败
"""
if settings.SCRAP_SOURCE != "themoviedb":
return None

View File

@ -0,0 +1,128 @@
import shutil
from pathlib import Path
import ruamel.yaml
from app.core import settings
from app.log import logger
from app.utils.singleton import Singleton
class CategoryHelper(metaclass=Singleton):
def __init__(self):
self._category_path: Path = settings.CONFIG_PATH / "category.yaml"
# 二级分类策略关闭
if not settings.LIBRARY_CATEGORY:
return
try:
if not self._category_path.exists():
shutil.copy(settings.INNER_CONFIG_PATH / "category.yaml", self._category_path)
with open(self._category_path, mode='r', encoding='utf-8') as f:
try:
yaml = ruamel.yaml.YAML()
self._categorys = yaml.load(f)
except Exception as e:
logger.warn(f"二级分类策略配置文件格式出现严重错误!请检查:{str(e)}")
self._categorys = {}
except Exception as err:
logger.warn(f"二级分类策略配置文件加载出错:{err}")
if self._categorys:
self._movie_categorys = self._categorys.get('movie')
self._tv_categorys = self._categorys.get('tv')
logger.info(f"已加载二级分类策略 category.yaml")
@property
def is_movie_category(self) -> bool:
"""
获取电影分类标志
"""
if self._movie_categorys:
return True
return False
@property
def is_tv_category(self) -> bool:
"""
获取电视剧分类标志
"""
if self._tv_categorys:
return True
return False
@property
def movie_categorys(self) -> list:
"""
获取电影分类清单
"""
if not self._movie_categorys:
return []
return self._movie_categorys.keys()
@property
def tv_categorys(self) -> list:
"""
获取电视剧分类清单
"""
if not self._tv_categorys:
return []
return self._tv_categorys.keys()
def get_movie_category(self, tmdb_info) -> str:
"""
判断电影的分类
:param tmdb_info: 识别的TMDB中的信息
:return: 二级分类的名称
"""
return self.get_category(self._movie_categorys, tmdb_info)
def get_tv_category(self, tmdb_info) -> str:
"""
判断电视剧的分类
:param tmdb_info: 识别的TMDB中的信息
:return: 二级分类的名称
"""
return self.get_category(self._tv_categorys, tmdb_info)
@staticmethod
def get_category(categorys: dict, tmdb_info: dict) -> str:
"""
根据 TMDB信息与分类配置文件进行比较确定所属分类
:param categorys: 分类配置
:param tmdb_info: TMDB信息
:return: 分类的名称
"""
if not tmdb_info:
return ""
if not categorys:
return ""
for key, item in categorys.items():
if not item:
return key
match_flag = True
for attr, value in item.items():
if not value:
continue
info_value = tmdb_info.get(attr)
if not info_value:
match_flag = False
continue
elif attr == "production_countries":
info_values = [str(val.get("iso_3166_1")).upper() for val in info_value]
else:
if isinstance(info_value, list):
info_values = [str(val).upper() for val in info_value]
else:
info_values = [str(info_value).upper()]
if value.find(",") != -1:
values = [str(val).upper() for val in value.split(",")]
else:
values = [str(value).upper()]
if not set(values).intersection(set(info_values)):
match_flag = False
if match_flag:
return key
return ""

View File

@ -0,0 +1,895 @@
from functools import lru_cache
from typing import Optional, Tuple, List
import zhconv
from lxml import etree
from tmdbv3api import TMDb, Search, Movie, TV
from tmdbv3api.exceptions import TMDbException
from app.core import settings
from app.log import logger
from app.utils.http import RequestUtils
from app.utils.string import StringUtils
from app.utils.types import MediaType
class TmdbHelper:
"""
TMDB识别匹配
"""
tmdb: TMDb = None
search: Search = None
movie: Movie = None
tv: TV = None
def __init__(self):
# TMDB主体
self.tmdb = TMDb()
# 域名
self.tmdb.domain = settings.TMDB_API_DOMAIN
# 开启缓存
self.tmdb.cache = True
# 缓存大小
self.tmdb.REQUEST_CACHE_MAXSIZE = 256
# APIKEY
self.tmdb.api_key = settings.TMDB_API_KEY
# 语种
self.tmdb.language = 'zh'
# 代理
self.tmdb.proxies = settings.PROXY
# 调试模式
self.tmdb.debug = False
# 查询对象
self.search = Search()
self.movie = Movie()
self.tv = TV()
def search_multi_tmdbinfos(self, title: str) -> List[dict]:
"""
同时查询模糊匹配的电影、电视剧TMDB信息
"""
if not title:
return []
ret_infos = []
multis = self.search.multi({"query": title}) or []
for multi in multis:
if multi.get("media_type") in ["movie", "tv"]:
multi['media_type'] = MediaType.MOVIE if multi.get("media_type") == "movie" else MediaType.TV
ret_infos.append(multi)
return ret_infos
def search_movie_tmdbinfos(self, title: str, year: str) -> List[dict]:
"""
查询模糊匹配的所有电影TMDB信息
"""
if not title:
return []
ret_infos = []
if year:
movies = self.search.movies({"query": title, "year": year}) or []
else:
movies = self.search.movies({"query": title}) or []
for movie in movies:
if title in movie.get("title"):
movie['media_type'] = MediaType.MOVIE
ret_infos.append(movie)
return ret_infos
def search_tv_tmdbinfos(self, title: str, year: str) -> List[dict]:
"""
查询模糊匹配的所有电视剧TMDB信息
"""
if not title:
return []
ret_infos = []
if year:
tvs = self.search.tv_shows({"query": title, "first_air_date_year": year}) or []
else:
tvs = self.search.tv_shows({"query": title}) or []
for tv in tvs:
if title in tv.get("name"):
tv['media_type'] = MediaType.TV
ret_infos.append(tv)
return ret_infos
@staticmethod
def __compare_tmdb_names(file_name: str, tmdb_names: list) -> bool:
"""
比较文件名是否匹配,忽略大小写和特殊字符
:param file_name: 识别的文件名或者种子名
:param tmdb_names: TMDB返回的译名
:return: True or False
"""
if not file_name or not tmdb_names:
return False
if not isinstance(tmdb_names, list):
tmdb_names = [tmdb_names]
file_name = StringUtils.clear_special_chars(file_name).upper()
for tmdb_name in tmdb_names:
tmdb_name = StringUtils.clear_special_chars(tmdb_name).strip().upper()
if file_name == tmdb_name:
return True
return False
def __get_tmdb_names(self, mtype: MediaType, tmdb_id: str) -> Tuple[Optional[dict], List[str]]:
"""
搜索tmdb中所有的标题和译名用于名称匹配
:param mtype: 类型:电影、电视剧、动漫
:param tmdb_id: TMDB的ID
:return: 所有译名的清单
"""
if not mtype or not tmdb_id:
return {}, []
ret_names = []
tmdb_info = self.get_tmdb_info(mtype=mtype, tmdbid=tmdb_id)
if not tmdb_info:
return tmdb_info, []
if mtype == MediaType.MOVIE:
alternative_titles = tmdb_info.get("alternative_titles", {}).get("titles", [])
for alternative_title in alternative_titles:
title = alternative_title.get("title")
if title and title not in ret_names:
ret_names.append(title)
translations = tmdb_info.get("translations", {}).get("translations", [])
for translation in translations:
title = translation.get("data", {}).get("title")
if title and title not in ret_names:
ret_names.append(title)
else:
alternative_titles = tmdb_info.get("alternative_titles", {}).get("results", [])
for alternative_title in alternative_titles:
name = alternative_title.get("title")
if name and name not in ret_names:
ret_names.append(name)
translations = tmdb_info.get("translations", {}).get("translations", [])
for translation in translations:
name = translation.get("data", {}).get("name")
if name and name not in ret_names:
ret_names.append(name)
return tmdb_info, ret_names
def search_tmdb(self, name: str,
mtype: MediaType,
year: str = None,
season_year: str = None,
season_number: int = None) -> Optional[dict]:
"""
搜索tmdb中的媒体信息匹配返回一条尽可能正确的信息
:param name: 剑索的名称
:param mtype: 类型:电影、电视剧
:param year: 年份,如要是季集需要是首播年份(first_air_date)
:param season_year: 当前季集年份
:param season_number: 季集,整数
:return: TMDB的INFO同时会将mtype赋值到media_type中
"""
if not self.search:
return None
if not name:
return None
# TMDB搜索
info = {}
if mtype == MediaType.MOVIE:
year_range = [year]
if year:
year_range.append(str(int(year) + 1))
year_range.append(str(int(year) - 1))
for year in year_range:
logger.debug(
f"正在识别{mtype.value}{name}, 年份={year} ...")
info = self.__search_movie_by_name(name, year)
if info:
info['media_type'] = MediaType.MOVIE
logger.info("%s 识别到 电影TMDBID=%s, 名称=%s, 上映日期=%s" % (
name,
info.get('id'),
info.get('title'),
info.get('release_date')))
break
else:
# 有当前季和当前季集年份,使用精确匹配
if season_year and season_number:
logger.debug(
f"正在识别{mtype.value}{name}, 季集={season_number}, 季集年份={season_year} ...")
info = self.__search_tv_by_season(name,
season_year,
season_number)
if not info:
logger.debug(
f"正在识别{mtype.value}{name}, 年份={year} ...")
info = self.__search_tv_by_name(name,
year)
if info:
info['media_type'] = MediaType.TV
logger.info("%s 识别到 电视剧TMDBID=%s, 名称=%s, 首播日期=%s" % (
name,
info.get('id'),
info.get('name'),
info.get('first_air_date')))
# 返回
if not info:
logger.info("%s 以年份 %s 在TMDB中未找到%s信息!" % (
name, year, mtype.value if mtype else ""))
return info
def __search_movie_by_name(self, name: str, year: str) -> Optional[dict]:
"""
根据名称查询电影TMDB匹配
:param name: 识别的文件名或种子名
:param year: 电影上映日期
:return: 匹配的媒体信息
"""
try:
if year:
movies = self.search.movies({"query": name, "year": year})
else:
movies = self.search.movies({"query": name})
except TMDbException as err:
logger.error(f"连接TMDB出错{err}")
return None
except Exception as e:
logger.error(f"连接TMDB出错{str(e)}")
return None
logger.debug(f"API返回{str(self.search.total_results)}")
if len(movies) == 0:
logger.debug(f"{name} 未找到相关电影信息!")
return {}
else:
info = {}
if year:
for movie in movies:
if movie.get('release_date'):
if self.__compare_tmdb_names(name, movie.get('title')) \
and movie.get('release_date')[0:4] == str(year):
return movie
if self.__compare_tmdb_names(name, movie.get('original_title')) \
and movie.get('release_date')[0:4] == str(year):
return movie
else:
for movie in movies:
if self.__compare_tmdb_names(name, movie.get('title')) \
or self.__compare_tmdb_names(name, movie.get('original_title')):
return movie
if not info:
index = 0
for movie in movies:
if year:
if not movie.get('release_date'):
continue
if movie.get('release_date')[0:4] != str(year):
continue
index += 1
info, names = self.__get_tmdb_names(MediaType.MOVIE, movie.get("id"))
if self.__compare_tmdb_names(name, names):
return info
else:
index += 1
info, names = self.__get_tmdb_names(MediaType.MOVIE, movie.get("id"))
if self.__compare_tmdb_names(name, names):
return info
if index > 5:
break
return {}
def __search_tv_by_name(self, name: str, year: str) -> Optional[dict]:
"""
根据名称查询电视剧TMDB匹配
:param name: 识别的文件名或者种子名
:param year: 电视剧的首播年份
:return: 匹配的媒体信息
"""
try:
if year:
tvs = self.search.tv_shows({"query": name, "first_air_date_year": year})
else:
tvs = self.search.tv_shows({"query": name})
except TMDbException as err:
logger.error(f"连接TMDB出错{err}")
return None
except Exception as e:
logger.error(f"连接TMDB出错{str(e)}")
return None
logger.debug(f"API返回{str(self.search.total_results)}")
if len(tvs) == 0:
logger.debug(f"{name} 未找到相关剧集信息!")
return {}
else:
info = {}
if year:
for tv in tvs:
if tv.get('first_air_date'):
if self.__compare_tmdb_names(name, tv.get('name')) \
and tv.get('first_air_date')[0:4] == str(year):
return tv
if self.__compare_tmdb_names(name, tv.get('original_name')) \
and tv.get('first_air_date')[0:4] == str(year):
return tv
else:
for tv in tvs:
if self.__compare_tmdb_names(name, tv.get('name')) \
or self.__compare_tmdb_names(name, tv.get('original_name')):
return tv
if not info:
index = 0
for tv in tvs:
if year:
if not tv.get('first_air_date'):
continue
if tv.get('first_air_date')[0:4] != str(year):
continue
index += 1
info, names = self.__get_tmdb_names(MediaType.TV, tv.get("id"))
if self.__compare_tmdb_names(name, names):
return info
else:
index += 1
info, names = self.__get_tmdb_names(MediaType.TV, tv.get("id"))
if self.__compare_tmdb_names(name, names):
return info
if index > 5:
break
return {}
def __search_tv_by_season(self, name: str, season_year: str, season_number: int) -> Optional[dict]:
"""
根据电视剧的名称和季的年份及序号匹配TMDB
:param name: 识别的文件名或者种子名
:param season_year: 季的年份
:param season_number: 季序号
:return: 匹配的媒体信息
"""
def __season_match(tv_info: dict, _season_year: str) -> bool:
if not tv_info:
return False
try:
seasons = self.__get_tmdb_tv_seasons(tv_info)
for season, season_info in seasons.values():
if season_info.get("air_date"):
if season.get("air_date")[0:4] == str(_season_year) \
and season == int(season_number):
return True
except Exception as e1:
logger.error(f"连接TMDB出错{e1}")
return False
return False
try:
tvs = self.search.tv_shows({"query": name})
except TMDbException as err:
logger.error(f"连接TMDB出错{err}")
return None
except Exception as e:
logger.error(f"连接TMDB出错{e}")
return None
if len(tvs) == 0:
logger.debug("%s 未找到季%s相关信息!" % (name, season_number))
return {}
else:
for tv in tvs:
if (self.__compare_tmdb_names(name, tv.get('name'))
or self.__compare_tmdb_names(name, tv.get('original_name'))) \
and (tv.get('first_air_date') and tv.get('first_air_date')[0:4] == str(season_year)):
return tv
for tv in tvs[:5]:
info, names = self.__get_tmdb_names(MediaType.TV, tv.get("id"))
if not self.__compare_tmdb_names(name, names):
continue
if __season_match(tv_info=info, _season_year=season_year):
return info
return {}
@staticmethod
def __get_tmdb_tv_seasons(tv_info: dict) -> Optional[dict]:
"""
查询TMDB电视剧的所有季
:param tv_info: TMDB 的季信息
:return: 包括每季集数的字典
"""
"""
"seasons": [
{
"air_date": "2006-01-08",
"episode_count": 11,
"id": 3722,
"name": "特别篇",
"overview": "",
"poster_path": "/snQYndfsEr3Sto2jOmkmsQuUXAQ.jpg",
"season_number": 0
},
{
"air_date": "2005-03-27",
"episode_count": 9,
"id": 3718,
"name": "第 1 季",
"overview": "",
"poster_path": "/foM4ImvUXPrD2NvtkHyixq5vhPx.jpg",
"season_number": 1
}
]
"""
if not tv_info:
return {}
ret_seasons = {}
for season_info in tv_info.get("seasons") or []:
if not season_info.get("season_number"):
continue
ret_seasons[season_info.get("season_number")] = season_info
return ret_seasons
def search_multi_tmdb(self, name: str) -> Optional[dict]:
"""
根据名称同时查询电影和电视剧,不带年份
:param name: 识别的文件名或种子名
:return: 匹配的媒体信息
"""
try:
multis = self.search.multi({"query": name}) or []
except TMDbException as err:
logger.error(f"连接TMDB出错{err}")
return None
except Exception as e:
logger.error(f"连接TMDB出错{str(e)}")
return None
logger.debug(f"API返回{str(self.search.total_results)}")
if len(multis) == 0:
logger.debug(f"{name} 未找到相关媒体息!")
return {}
else:
info = {}
for multi in multis:
if multi.get("media_type") == "movie":
if self.__compare_tmdb_names(name, multi.get('title')) \
or self.__compare_tmdb_names(name, multi.get('original_title')):
info = multi
elif multi.get("media_type") == "tv":
if self.__compare_tmdb_names(name, multi.get('name')) \
or self.__compare_tmdb_names(name, multi.get('original_name')):
info = multi
if not info:
for multi in multis[:5]:
if multi.get("media_type") == "movie":
movie_info, names = self.__get_tmdb_names(MediaType.MOVIE, multi.get("id"))
if self.__compare_tmdb_names(name, names):
info = movie_info
elif multi.get("media_type") == "tv":
tv_info, names = self.__get_tmdb_names(MediaType.TV, multi.get("id"))
if self.__compare_tmdb_names(name, names):
info = tv_info
# 返回
if info:
info['media_type'] = MediaType.MOVIE if info.get('media_type') in ['movie',
MediaType.MOVIE] else MediaType.TV
else:
logger.info("%s 在TMDB中未找到媒体信息!" % name)
return info
@lru_cache(maxsize=128)
def search_tmdb_web(self, name: str, mtype: MediaType) -> Optional[dict]:
"""
搜索TMDB网站直接抓取结果结果只有一条时才返回
:param name: 名称
:param mtype: 媒体类型
"""
if not name:
return None
if StringUtils.is_chinese(name):
return {}
logger.info("正在从TheDbMovie网站查询%s ..." % name)
tmdb_url = "https://www.themoviedb.org/search?query=%s" % name
res = RequestUtils(timeout=5).get_res(url=tmdb_url)
if res and res.status_code == 200:
html_text = res.text
if not html_text:
return None
try:
tmdb_links = []
html = etree.HTML(html_text)
if mtype == MediaType.TV:
links = html.xpath("//a[@data-id and @data-media-type='tv']/@href")
else:
links = html.xpath("//a[@data-id]/@href")
for link in links:
if not link or (not link.startswith("/tv") and not link.startswith("/movie")):
continue
if link not in tmdb_links:
tmdb_links.append(link)
if len(tmdb_links) == 1:
tmdbinfo = self.get_tmdb_info(
mtype=MediaType.TV if tmdb_links[0].startswith("/tv") else MediaType.MOVIE,
tmdbid=tmdb_links[0].split("/")[-1])
if tmdbinfo:
if mtype == MediaType.TV and tmdbinfo.get('media_type') != MediaType.TV:
return {}
if tmdbinfo.get('media_type') == MediaType.MOVIE:
logger.info("%s 从WEB识别到 电影TMDBID=%s, 名称=%s, 上映日期=%s" % (
name,
tmdbinfo.get('id'),
tmdbinfo.get('title'),
tmdbinfo.get('release_date')))
else:
logger.info("%s 从WEB识别到 电视剧TMDBID=%s, 名称=%s, 首播日期=%s" % (
name,
tmdbinfo.get('id'),
tmdbinfo.get('name'),
tmdbinfo.get('first_air_date')))
return tmdbinfo
elif len(tmdb_links) > 1:
logger.info("%s TMDB网站返回数据过多%s" % (name, len(tmdb_links)))
else:
logger.info("%s TMDB网站未查询到媒体信息" % name)
except Exception as err:
print(str(err))
return None
return None
def get_tmdb_info(self,
mtype: MediaType,
tmdbid: str) -> dict:
"""
给定TMDB号查询一条媒体信息
:param mtype: 类型:电影、电视剧、动漫,为空时都查(此时用不上年份)
:param tmdbid: TMDB的ID有tmdbid时优先使用tmdbid否则使用年份和标题
"""
def __get_genre_ids(genres: list) -> list:
"""
从TMDB详情中获取genre_id列表
"""
if not genres:
return []
genre_ids = []
for genre in genres:
genre_ids.append(genre.get('id'))
return genre_ids
# 设置语言
if mtype == MediaType.MOVIE:
tmdb_info = self.__get_tmdb_movie_detail(tmdbid)
if tmdb_info:
tmdb_info['media_type'] = MediaType.MOVIE
else:
tmdb_info = self.__get_tmdb_tv_detail(tmdbid)
if tmdb_info:
tmdb_info['media_type'] = MediaType.TV
if tmdb_info:
# 转换genreid
tmdb_info['genre_ids'] = __get_genre_ids(tmdb_info.get('genres'))
# 转换中文标题
self.__update_tmdbinfo_cn_title(tmdb_info)
return tmdb_info
@staticmethod
def __update_tmdbinfo_cn_title(tmdb_info: dict):
"""
更新TMDB信息中的中文名称
"""
def __get_tmdb_chinese_title(tmdbinfo):
"""
从别名中获取中文标题
"""
if not tmdbinfo:
return None
if tmdbinfo.get("media_type") == MediaType.MOVIE:
alternative_titles = tmdbinfo.get("alternative_titles", {}).get("titles", [])
else:
alternative_titles = tmdbinfo.get("alternative_titles", {}).get("results", [])
for alternative_title in alternative_titles:
iso_3166_1 = alternative_title.get("iso_3166_1")
if iso_3166_1 == "CN":
title = alternative_title.get("title")
if title and StringUtils.is_chinese(title) \
and zhconv.convert(title, "zh-hans") == title:
return title
return tmdbinfo.get("title") if tmdbinfo.get("media_type") == MediaType.MOVIE else tmdbinfo.get("name")
# 查找中文名
org_title = tmdb_info.get("title") \
if tmdb_info.get("media_type") == MediaType.MOVIE \
else tmdb_info.get("name")
if not StringUtils.is_chinese(org_title):
cn_title = __get_tmdb_chinese_title(tmdb_info)
if cn_title and cn_title != org_title:
if tmdb_info.get("media_type") == MediaType.MOVIE:
tmdb_info['title'] = cn_title
else:
tmdb_info['name'] = cn_title
def __get_tmdb_movie_detail(self,
tmdbid: str,
append_to_response: str = "images,"
"credits,"
"alternative_titles,"
"translations,"
"external_ids") -> Optional[dict]:
"""
获取电影的详情
:param tmdbid: TMDB ID
:return: TMDB信息
"""
"""
{
"adult": false,
"backdrop_path": "/r9PkFnRUIthgBp2JZZzD380MWZy.jpg",
"belongs_to_collection": {
"id": 94602,
"name": "穿靴子的猫(系列)",
"poster_path": "/anHwj9IupRoRZZ98WTBvHpTiE6A.jpg",
"backdrop_path": "/feU1DWV5zMWxXUHJyAIk3dHRQ9c.jpg"
},
"budget": 90000000,
"genres": [
{
"id": 16,
"name": "动画"
},
{
"id": 28,
"name": "动作"
},
{
"id": 12,
"name": "冒险"
},
{
"id": 35,
"name": "喜剧"
},
{
"id": 10751,
"name": "家庭"
},
{
"id": 14,
"name": "奇幻"
}
],
"homepage": "",
"id": 315162,
"imdb_id": "tt3915174",
"original_language": "en",
"original_title": "Puss in Boots: The Last Wish",
"overview": "时隔11年臭屁自大又爱卖萌的猫大侠回来了如今的猫大侠安东尼奥·班德拉斯 配音),依旧幽默潇洒又不拘小节、数次“花式送命”后,九条命如今只剩一条,于是不得不请求自己的老搭档兼“宿敌”——迷人的软爪妞(萨尔玛·海耶克 配音)来施以援手来恢复自己的九条生命。",
"popularity": 8842.129,
"poster_path": "/rnn30OlNPiC3IOoWHKoKARGsBRK.jpg",
"production_companies": [
{
"id": 33,
"logo_path": "/8lvHyhjr8oUKOOy2dKXoALWKdp0.png",
"name": "Universal Pictures",
"origin_country": "US"
},
{
"id": 521,
"logo_path": "/kP7t6RwGz2AvvTkvnI1uteEwHet.png",
"name": "DreamWorks Animation",
"origin_country": "US"
}
],
"production_countries": [
{
"iso_3166_1": "US",
"name": "United States of America"
}
],
"release_date": "2022-12-07",
"revenue": 260725470,
"runtime": 102,
"spoken_languages": [
{
"english_name": "English",
"iso_639_1": "en",
"name": "English"
},
{
"english_name": "Spanish",
"iso_639_1": "es",
"name": "Español"
}
],
"status": "Released",
"tagline": "",
"title": "穿靴子的猫2",
"video": false,
"vote_average": 8.614,
"vote_count": 2291
}
"""
if not self.movie:
return {}
try:
logger.info("正在查询TMDB电影%s ..." % tmdbid)
tmdbinfo = self.movie.details(tmdbid, append_to_response)
if tmdbinfo:
logger.info(f"{tmdbid} 查询结果:{tmdbinfo.get('title')}")
return tmdbinfo or {}
except Exception as e:
print(str(e))
return None
def __get_tmdb_tv_detail(self,
tmdbid: str,
append_to_response: str = "images,"
"credits,"
"alternative_titles,"
"translations,"
"external_ids") -> Optional[dict]:
"""
获取电视剧的详情
:param tmdbid: TMDB ID
:return: TMDB信息
"""
"""
{
"adult": false,
"backdrop_path": "/uDgy6hyPd82kOHh6I95FLtLnj6p.jpg",
"created_by": [
{
"id": 35796,
"credit_id": "5e84f06a3344c600153f6a57",
"name": "Craig Mazin",
"gender": 2,
"profile_path": "/uEhna6qcMuyU5TP7irpTUZ2ZsZc.jpg"
},
{
"id": 1295692,
"credit_id": "5e84f03598f1f10016a985c0",
"name": "Neil Druckmann",
"gender": 2,
"profile_path": "/bVUsM4aYiHbeSYE1xAw2H5Z1ANU.jpg"
}
],
"episode_run_time": [],
"first_air_date": "2023-01-15",
"genres": [
{
"id": 18,
"name": "剧情"
},
{
"id": 10765,
"name": "Sci-Fi & Fantasy"
},
{
"id": 10759,
"name": "动作冒险"
}
],
"homepage": "https://www.hbo.com/the-last-of-us",
"id": 100088,
"in_production": true,
"languages": [
"en"
],
"last_air_date": "2023-01-15",
"last_episode_to_air": {
"air_date": "2023-01-15",
"episode_number": 1,
"id": 2181581,
"name": "当你迷失在黑暗中",
"overview": "在一场全球性的流行病摧毁了文明之后,一个顽强的幸存者负责照顾一个 14 岁的小女孩,她可能是人类最后的希望。",
"production_code": "",
"runtime": 81,
"season_number": 1,
"show_id": 100088,
"still_path": "/aRquEWm8wWF1dfa9uZ1TXLvVrKD.jpg",
"vote_average": 8,
"vote_count": 33
},
"name": "最后生还者",
"next_episode_to_air": {
"air_date": "2023-01-22",
"episode_number": 2,
"id": 4071039,
"name": "虫草变异菌",
"overview": "",
"production_code": "",
"runtime": 55,
"season_number": 1,
"show_id": 100088,
"still_path": "/jkUtYTmeap6EvkHI4n0j5IRFrIr.jpg",
"vote_average": 10,
"vote_count": 1
},
"networks": [
{
"id": 49,
"name": "HBO",
"logo_path": "/tuomPhY2UtuPTqqFnKMVHvSb724.png",
"origin_country": "US"
}
],
"number_of_episodes": 9,
"number_of_seasons": 1,
"origin_country": [
"US"
],
"original_language": "en",
"original_name": "The Last of Us",
"overview": "不明真菌疫情肆虐之后的美国被真菌感染的人都变成了可怕的怪物乔尔Joel为了换回武器答应将小女孩儿艾莉Ellie送到指定地点由此开始了两人穿越美国的漫漫旅程。",
"popularity": 5585.639,
"poster_path": "/nOY3VBFO0VnlN9nlRombnMTztyh.jpg",
"production_companies": [
{
"id": 3268,
"logo_path": "/tuomPhY2UtuPTqqFnKMVHvSb724.png",
"name": "HBO",
"origin_country": "US"
},
{
"id": 11073,
"logo_path": "/aCbASRcI1MI7DXjPbSW9Fcv9uGR.png",
"name": "Sony Pictures Television Studios",
"origin_country": "US"
},
{
"id": 23217,
"logo_path": "/kXBZdQigEf6QiTLzo6TFLAa7jKD.png",
"name": "Naughty Dog",
"origin_country": "US"
},
{
"id": 115241,
"logo_path": null,
"name": "The Mighty Mint",
"origin_country": "US"
},
{
"id": 119645,
"logo_path": null,
"name": "Word Games",
"origin_country": "US"
},
{
"id": 125281,
"logo_path": "/3hV8pyxzAJgEjiSYVv1WZ0ZYayp.png",
"name": "PlayStation Productions",
"origin_country": "US"
}
],
"production_countries": [
{
"iso_3166_1": "US",
"name": "United States of America"
}
],
"seasons": [
{
"air_date": "2023-01-15",
"episode_count": 9,
"id": 144593,
"name": "第 1 季",
"overview": "",
"poster_path": "/aUQKIpZZ31KWbpdHMCmaV76u78T.jpg",
"season_number": 1
}
],
"spoken_languages": [
{
"english_name": "English",
"iso_639_1": "en",
"name": "English"
}
],
"status": "Returning Series",
"tagline": "",
"type": "Scripted",
"vote_average": 8.924,
"vote_count": 601
}
"""
if not self.tv:
return {}
try:
logger.info("正在查询TMDB电视剧%s ..." % tmdbid)
tmdbinfo = self.tv.details(tmdbid, append_to_response)
if tmdbinfo:
logger.info(f"{tmdbid} 查询结果:{tmdbinfo.get('name')}")
return tmdbinfo or {}
except Exception as e:
print(str(e))
return None

View File

@ -0,0 +1,235 @@
import pickle
import random
import threading
import time
from pathlib import Path
from threading import RLock
from typing import Optional
from app.core import settings
from app.core.meta import MetaBase
from app.utils.singleton import Singleton
from app.utils.types import MediaType
lock = RLock()
CACHE_EXPIRE_TIMESTAMP_STR = "cache_expire_timestamp"
EXPIRE_TIMESTAMP = 7 * 24 * 3600
class TmdbCache(metaclass=Singleton):
"""
TMDB缓存数据
{
"id": '',
"title": '',
"year": '',
"type": MediaType
}
"""
_meta_data: dict = {}
# 缓存文件路径
_meta_path: Path = None
# TMDB缓存过期
_tmdb_cache_expire: bool = True
# 自动保存暗隔时间
_save_interval: int = 600
def __init__(self):
# 创建计时器
self.timer = threading.Timer(self._save_interval, self.save)
self.init_config()
def init_config(self):
self._meta_path = settings.TEMP_PATH / "__tmdb_cache__"
self._meta_data = self.__load(self._meta_path)
def clear(self):
"""
清空所有TMDB缓存
"""
with lock:
self._meta_data = {}
@staticmethod
def __get_key(meta: MetaBase) -> str:
"""
获取缓存KEY
"""
return f"[{meta.type.value}]{meta.get_name()}-{meta.year}-{meta.begin_season}"
def get(self, meta: MetaBase):
"""
根据KEY值获取缓存值
"""
key = self.__get_key(meta)
with lock:
info: dict = self._meta_data.get(key)
if info:
expire = info.get(CACHE_EXPIRE_TIMESTAMP_STR)
if not expire or int(time.time()) < expire:
info[CACHE_EXPIRE_TIMESTAMP_STR] = int(time.time()) + EXPIRE_TIMESTAMP
self.update(meta, info)
elif expire and self._tmdb_cache_expire:
self.delete(key)
return info or {}
def delete(self, key: str) -> dict:
"""
删除缓存信息
@param key: 缓存key
@return: 被删除的缓存内容
"""
with lock:
return self._meta_data.pop(key, None)
def delete_by_tmdbid(self, tmdbid: str) -> None:
"""
清空对应TMDBID的所有缓存记录以强制更新TMDB中最新的数据
"""
for key in list(self._meta_data):
if str(self._meta_data.get(key, {}).get("id")) == str(tmdbid):
with lock:
self._meta_data.pop(key)
def delete_unknown(self) -> None:
"""
清除未识别的缓存记录以便重新搜索TMDB
"""
for key in list(self._meta_data):
if str(self._meta_data.get(key, {}).get("id")) == '0':
with lock:
self._meta_data.pop(key)
def modify(self, key: str, title: str) -> dict:
"""
删除缓存信息
@param key: 缓存key
@param title: 标题
@return: 被修改后缓存内容
"""
with lock:
if self._meta_data.get(key):
self._meta_data[key]['title'] = title
self._meta_data[key][CACHE_EXPIRE_TIMESTAMP_STR] = int(time.time()) + EXPIRE_TIMESTAMP
return self._meta_data.get(key)
@staticmethod
def __load(path) -> dict:
"""
从文件中加载缓存
"""
try:
if Path(path).exists():
with open(path, 'rb') as f:
data = pickle.load(f)
return data
return {}
except Exception as e:
print(str(e))
return {}
def update(self, meta: MetaBase, info: dict) -> None:
"""
新增或更新缓存条目
"""
if info:
# 缓存标题
cache_title = info.get("title") \
if info.get("media_type") == MediaType.MOVIE else info.get("name")
# 缓存年份
cache_year = info.get('release_date') \
if info.get("media_type") == MediaType.MOVIE else info.get('first_air_date')
if cache_year:
cache_year = cache_year[:4]
self._meta_data[self.__get_key(meta)] = {
"id": info.get("id"),
"type": info.get("media_type"),
"year": cache_year,
"title": cache_title,
"poster_path": info.get("poster_path"),
"backdrop_path": info.get("backdrop_path"),
CACHE_EXPIRE_TIMESTAMP_STR: int(time.time()) + EXPIRE_TIMESTAMP
}
else:
self._meta_data[self.__get_key(meta)] = {'id': 0}
def save(self, force: bool = False) -> None:
"""
保存缓存数据到文件
"""
meta_data = self.__load(self._meta_path)
new_meta_data = {k: v for k, v in self._meta_data.items() if str(v.get("id")) != '0'}
if not force \
and not self._random_sample(new_meta_data) \
and meta_data.keys() == new_meta_data.keys():
return
with open(self._meta_path, 'wb') as f:
pickle.dump(new_meta_data, f, pickle.HIGHEST_PROTOCOL)
if not force:
# 重新创建计时器
self.timer = threading.Timer(self._save_interval, self.save)
# 启动计时器
self.timer.start()
def _random_sample(self, new_meta_data: dict) -> bool:
"""
采样分析是否需要保存
"""
ret = False
if len(new_meta_data) < 25:
keys = list(new_meta_data.keys())
for k in keys:
info = new_meta_data.get(k)
expire = info.get(CACHE_EXPIRE_TIMESTAMP_STR)
if not expire:
ret = True
info[CACHE_EXPIRE_TIMESTAMP_STR] = int(time.time()) + EXPIRE_TIMESTAMP
elif int(time.time()) >= expire:
ret = True
if self._tmdb_cache_expire:
new_meta_data.pop(k)
else:
count = 0
keys = random.sample(new_meta_data.keys(), 25)
for k in keys:
info = new_meta_data.get(k)
expire = info.get(CACHE_EXPIRE_TIMESTAMP_STR)
if not expire:
ret = True
info[CACHE_EXPIRE_TIMESTAMP_STR] = int(time.time()) + EXPIRE_TIMESTAMP
elif int(time.time()) >= expire:
ret = True
if self._tmdb_cache_expire:
new_meta_data.pop(k)
count += 1
if count >= 5:
ret |= self._random_sample(new_meta_data)
return ret
def get_title(self, key: str) -> Optional[str]:
"""
获取缓存的标题
"""
cache_media_info = self._meta_data.get(key)
if not cache_media_info or not cache_media_info.get("id"):
return None
return cache_media_info.get("title")
def set_title(self, key: str, cn_title: str) -> None:
"""
重新设置缓存标题
"""
cache_media_info = self._meta_data.get(key)
if not cache_media_info:
return
self._meta_data[key]['title'] = cn_title
def __del__(self):
"""
退出
"""
self.timer.cancel()