fix scrap

This commit is contained in:
jxxghp 2024-06-22 19:59:24 +08:00
parent b43e591e4c
commit 0c266726ea
6 changed files with 121 additions and 89 deletions

View File

@ -522,6 +522,14 @@ class ChainBase(metaclass=ABCMeta):
self.run_module("scrape_metadata", path=path, mediainfo=mediainfo, metainfo=metainfo, self.run_module("scrape_metadata", path=path, mediainfo=mediainfo, metainfo=metainfo,
transfer_type=transfer_type, force_nfo=force_nfo, force_img=force_img) transfer_type=transfer_type, force_nfo=force_nfo, force_img=force_img)
def metadata_img(self, mediainfo: MediaInfo, season: int = None) -> dict:
"""
获取图片名称和url
:param mediainfo: 媒体信息
:param season: 季号
"""
return self.run_module("metadata_img", mediainfo=mediainfo, season=season)
def media_category(self) -> Optional[Dict[str, list]]: def media_category(self) -> Optional[Dict[str, list]]:
""" """
获取媒体分类 获取媒体分类

View File

@ -480,21 +480,12 @@ class MediaChain(ChainBase, metaclass=Singleton):
__save_file(_storage=storage, _drive_id=fileitem.drive_id, _fileid=fileitem.fileid, __save_file(_storage=storage, _drive_id=fileitem.drive_id, _fileid=fileitem.fileid,
_path=nfo_path, _content=season_nfo) _path=nfo_path, _content=season_nfo)
# TMDB季poster图片 # TMDB季poster图片
if settings.SCRAP_SOURCE == "themoviedb": image_dict = self.metadata_img(mediainfo=mediainfo, season=season_meta.begin_season)
sea_seq = str(meta.begin_season).rjust(2, '0') if image_dict:
# 查询季剧详情 for image_name, image_url in image_dict.items():
seasoninfo = self.tmdb_info(tmdbid=mediainfo.tmdb_id, mtype=MediaType.TV, image_path = filepath.with_name(image_name)
season=meta.begin_season)
if not seasoninfo:
logger.warn(f"无法获取 {mediainfo.title_year}{meta.begin_season}季 的媒体信息!")
return
# 生成季poster图片
if seasoninfo.get("poster_path"):
# 下载图片 # 下载图片
content = __save_image(f"https://{settings.TMDB_IMAGE_DOMAIN}/t/p/original" content = __save_image(image_url)
f"{seasoninfo.get('poster_path')}")
image_path = filepath.with_name(f"season{sea_seq}"
f"-poster{Path(seasoninfo.get('poster_path')).suffix}")
# 保存图片文件到当前目录 # 保存图片文件到当前目录
__save_file(_storage=storage, _drive_id=fileitem.drive_id, _fileid=fileitem.fileid, __save_file(_storage=storage, _drive_id=fileitem.drive_id, _fileid=fileitem.fileid,
_path=image_path, _content=content) _path=image_path, _content=content)
@ -509,15 +500,12 @@ class MediaChain(ChainBase, metaclass=Singleton):
__save_file(_storage=storage, _drive_id=fileitem.drive_id, _fileid=fileitem.fileid, __save_file(_storage=storage, _drive_id=fileitem.drive_id, _fileid=fileitem.fileid,
_path=nfo_path, _content=tv_nfo) _path=nfo_path, _content=tv_nfo)
# 生成目录图片 # 生成目录图片
for attr_name, attr_value in vars(mediainfo).items(): image_dict = self.metadata_img(mediainfo=mediainfo)
if attr_name \ if image_dict:
and attr_name.endswith("_path") \ for image_name, image_url in image_dict.items():
and attr_value \
and isinstance(attr_value, str) \
and attr_value.startswith("http"):
image_name = attr_name.replace("_path", "") + Path(attr_value).suffix
image_path = filepath.parent.with_name(image_name) image_path = filepath.parent.with_name(image_name)
content = __save_image(attr_value) # 下载图片
content = __save_image(image_url)
# 保存图片文件到当前目录 # 保存图片文件到当前目录
__save_file(_storage=storage, _drive_id=fileitem.drive_id, _fileid=fileitem.fileid, __save_file(_storage=storage, _drive_id=fileitem.drive_id, _fileid=fileitem.fileid,
_path=image_path, _content=content) _path=image_path, _content=content)

View File

@ -773,7 +773,16 @@ class DoubanModule(_ModuleBase):
""" """
if settings.SCRAP_SOURCE != "douban": if settings.SCRAP_SOURCE != "douban":
return None return None
return self.scraper.get_meta_nfo(mediainfo=mediainfo, season=season) return self.scraper.get_metadata_nfo(mediainfo=mediainfo, season=season)
def metadata_img(self, mediainfo: MediaInfo, **kwargs) -> dict:
"""
获取图片名称和url
:param mediainfo: 媒体信息
"""
if settings.SCRAP_SOURCE != "douban":
return {}
return self.scraper.get_metadata_img(mediainfo=mediainfo)
def obtain_images(self, mediainfo: MediaInfo) -> Optional[MediaInfo]: def obtain_images(self, mediainfo: MediaInfo) -> Optional[MediaInfo]:
""" """

View File

@ -17,7 +17,7 @@ class DoubanScraper:
_force_nfo = False _force_nfo = False
_force_img = False _force_img = False
def get_meta_nfo(self, mediainfo: MediaInfo, season: int = None) -> Optional[str]: def get_metadata_nfo(self, mediainfo: MediaInfo, season: int = None) -> Optional[str]:
""" """
获取NFO文件内容文本 获取NFO文件内容文本
:param mediainfo: 媒体信息 :param mediainfo: 媒体信息
@ -38,6 +38,19 @@ class DoubanScraper:
return None return None
@staticmethod
def get_metadata_img(mediainfo: MediaInfo) -> Optional[dict]:
"""
获取图片内容
:param mediainfo: 媒体信息
"""
ret_dict = {}
if mediainfo.poster_path:
ret_dict[f"poster{Path(mediainfo.poster_path).suffix}"] = mediainfo.poster_path
if mediainfo.backdrop_path:
ret_dict[f"backdrop{Path(mediainfo.backdrop_path).suffix}"] = mediainfo.backdrop_path
return ret_dict
def gen_scraper_files(self, meta: MetaBase, mediainfo: MediaInfo, def gen_scraper_files(self, meta: MetaBase, mediainfo: MediaInfo,
file_path: Path, transfer_type: str, file_path: Path, transfer_type: str,
force_nfo: bool = False, force_img: bool = False): force_nfo: bool = False, force_img: bool = False):
@ -68,15 +81,11 @@ class DoubanScraper:
self.__gen_movie_nfo_file(mediainfo=mediainfo, self.__gen_movie_nfo_file(mediainfo=mediainfo,
file_path=file_path) file_path=file_path)
# 生成电影图片 # 生成电影图片
image_path = file_path.with_name(f"poster{Path(mediainfo.poster_path).suffix}") image_dict = self.get_metadata_img(mediainfo)
if self._force_img or not image_path.exists(): for img_name, img_url in image_dict.items():
self.__save_image(url=mediainfo.poster_path, image_path = file_path.with_name(img_name)
file_path=image_path)
# 背景图
if mediainfo.backdrop_path:
image_path = file_path.with_name(f"backdrop{Path(mediainfo.backdrop_path).suffix}")
if self._force_img or not image_path.exists(): if self._force_img or not image_path.exists():
self.__save_image(url=mediainfo.backdrop_path, self.__save_image(url=img_url,
file_path=image_path) file_path=image_path)
# 电视剧 # 电视剧
else: else:
@ -86,15 +95,11 @@ class DoubanScraper:
self.__gen_tv_nfo_file(mediainfo=mediainfo, self.__gen_tv_nfo_file(mediainfo=mediainfo,
dir_path=file_path.parents[1]) dir_path=file_path.parents[1])
# 生成根目录图片 # 生成根目录图片
image_path = file_path.with_name(f"poster{Path(mediainfo.poster_path).suffix}") image_dict = self.get_metadata_img(mediainfo)
if self._force_img or not image_path.exists(): for img_name, img_url in image_dict.items():
self.__save_image(url=mediainfo.poster_path, image_path = file_path.with_name(img_name)
file_path=image_path)
# 背景图
if mediainfo.backdrop_path:
image_path = file_path.with_name(f"backdrop{Path(mediainfo.backdrop_path).suffix}")
if self._force_img or not image_path.exists(): if self._force_img or not image_path.exists():
self.__save_image(url=mediainfo.backdrop_path, self.__save_image(url=img_url,
file_path=image_path) file_path=image_path)
# 季目录NFO # 季目录NFO
if self._force_nfo or not file_path.with_name("season.nfo").exists(): if self._force_nfo or not file_path.with_name("season.nfo").exists():

View File

@ -347,7 +347,17 @@ class TheMovieDbModule(_ModuleBase):
""" """
if settings.SCRAP_SOURCE != "themoviedb": if settings.SCRAP_SOURCE != "themoviedb":
return None return None
return self.scraper.get_meta_nfo(meta=meta, mediainfo=mediainfo, season=season, episode=episode) return self.scraper.get_metadata_nfo(meta=meta, mediainfo=mediainfo, season=season, episode=episode)
def metadata_img(self, mediainfo: MediaInfo, season: int = None) -> dict:
"""
获取图片名称和url
:param mediainfo: 媒体信息
:param season: 季号
"""
if settings.SCRAP_SOURCE != "themoviedb":
return {}
return self.scraper.get_metadata_img(mediainfo=mediainfo, season=season)
def tmdb_discover(self, mtype: MediaType, sort_by: str, with_genres: str, with_original_language: str, def tmdb_discover(self, mtype: MediaType, sort_by: str, with_genres: str, with_original_language: str,
page: int = 1) -> Optional[List[MediaInfo]]: page: int = 1) -> Optional[List[MediaInfo]]:

View File

@ -1,6 +1,6 @@
import traceback import traceback
from pathlib import Path from pathlib import Path
from typing import Union, Optional from typing import Union, Optional, Tuple
from xml.dom import minidom from xml.dom import minidom
from requests import RequestException from requests import RequestException
@ -26,8 +26,8 @@ class TmdbScraper:
def __init__(self, tmdb): def __init__(self, tmdb):
self.tmdb = tmdb self.tmdb = tmdb
def get_meta_nfo(self, meta: MetaBase, mediainfo: MediaInfo, def get_metadata_nfo(self, meta: MetaBase, mediainfo: MediaInfo,
season: int = None, episode: int = None) -> Optional[str]: season: int = None, episode: int = None) -> Optional[str]:
""" """
获取NFO文件内容文本 获取NFO文件内容文本
:param meta: 元数据 :param meta: 元数据
@ -58,6 +58,48 @@ class TmdbScraper:
return None return None
def get_metadata_img(self, mediainfo: MediaInfo, season: int = None) -> dict:
"""
获取图片名称和url
:param mediainfo: 媒体信息
:param season: 季号
"""
images = {}
if mediainfo.type == MediaType.MOVIE:
for attr_name, attr_value in vars(mediainfo).items():
if attr_value \
and attr_name.endswith("_path") \
and attr_value \
and isinstance(attr_value, str) \
and attr_value.startswith("http"):
image_name = attr_name.replace("_path", "") + Path(attr_value).suffix
images[image_name] = attr_value
else:
if season:
# 查询季信息
seasoninfo = self.tmdb.get_tv_season_detail(mediainfo.tmdb_id, season)
if seasoninfo:
# TMDB季poster图片
poster_name, poster_url = self.get_season_poster(seasoninfo, season)
if poster_name and poster_url:
images[poster_name] = poster_url
return images
@staticmethod
def get_season_poster(seasoninfo: dict, season: int) -> Tuple[str, str]:
"""
获取季的海报
"""
# TMDB季poster图片
sea_seq = str(season).rjust(2, '0')
if seasoninfo.get("poster_path"):
# 后缀
ext = Path(seasoninfo.get('poster_path')).suffix
# URL
url = f"https://{settings.TMDB_IMAGE_DOMAIN}/t/p/original{seasoninfo.get('poster_path')}"
image_name = f"season{sea_seq}-poster{ext}"
return image_name, url
@staticmethod @staticmethod
def __get_episode_detail(seasoninfo: dict, episode: int) -> dict: def __get_episode_detail(seasoninfo: dict, episode: int) -> dict:
""" """
@ -97,17 +139,11 @@ class TmdbScraper:
self.__gen_movie_nfo_file(mediainfo=mediainfo, self.__gen_movie_nfo_file(mediainfo=mediainfo,
file_path=file_path) file_path=file_path)
# 生成电影图片 # 生成电影图片
for attr_name, attr_value in vars(mediainfo).items(): image_dict = self.get_metadata_img(mediainfo=mediainfo)
if attr_value \ for image_name, image_url in image_dict.items():
and attr_name.endswith("_path") \ image_path = file_path.with_name(image_name)
and attr_value \ if self._force_img or not image_path.exists():
and isinstance(attr_value, str) \ self.__save_image(url=image_url, file_path=image_path)
and attr_value.startswith("http"):
image_name = attr_name.replace("_path", "") + Path(attr_value).suffix
image_path = file_path.with_name(image_name)
if self._force_img or not image_path.exists():
self.__save_image(url=attr_value,
file_path=image_path)
# 电视剧,路径为每一季的文件名 名称/Season xx/名称 SxxExx.xxx # 电视剧,路径为每一季的文件名 名称/Season xx/名称 SxxExx.xxx
else: else:
# 如果有上游传入的元信息则使用,否则使用文件名识别 # 如果有上游传入的元信息则使用,否则使用文件名识别
@ -120,18 +156,11 @@ class TmdbScraper:
self.__gen_tv_nfo_file(mediainfo=mediainfo, self.__gen_tv_nfo_file(mediainfo=mediainfo,
dir_path=file_path.parents[1]) dir_path=file_path.parents[1])
# 生成根目录图片 # 生成根目录图片
for attr_name, attr_value in vars(mediainfo).items(): image_dict = self.get_metadata_img(mediainfo=mediainfo)
if attr_name \ for image_name, image_url in image_dict.items():
and attr_name.endswith("_path") \ image_path = file_path.parent.with_name(image_name)
and not attr_name.startswith("season") \ if self._force_img or not image_path.exists():
and attr_value \ self.__save_image(url=image_url, file_path=image_path)
and isinstance(attr_value, str) \
and attr_value.startswith("http"):
image_name = attr_name.replace("_path", "") + Path(attr_value).suffix
image_path = file_path.parent.with_name(image_name)
if self._force_img or not image_path.exists():
self.__save_image(url=attr_value,
file_path=image_path)
# 查询季信息 # 查询季信息
seasoninfo = self.tmdb.get_tv_season_detail(mediainfo.tmdb_id, meta.begin_season) seasoninfo = self.tmdb.get_tv_season_detail(mediainfo.tmdb_id, meta.begin_season)
if seasoninfo: if seasoninfo:
@ -140,29 +169,12 @@ class TmdbScraper:
self.__gen_tv_season_nfo_file(seasoninfo=seasoninfo, self.__gen_tv_season_nfo_file(seasoninfo=seasoninfo,
season=meta.begin_season, season=meta.begin_season,
season_path=file_path) season_path=file_path)
# TMDB季poster图片 # TMDB季图片
sea_seq = str(meta.begin_season).rjust(2, '0') poster_name, poster_url = self.get_season_poster(seasoninfo, meta.begin_season)
if seasoninfo.get("poster_path"): if poster_name and poster_url:
# 后缀 image_path = file_path.parent.with_name(poster_name)
ext = Path(seasoninfo.get('poster_path')).suffix
# URL
url = f"https://{settings.TMDB_IMAGE_DOMAIN}/t/p/original{seasoninfo.get('poster_path')}"
image_path = file_path.parent.with_name(f"season{sea_seq}-poster{ext}")
if self._force_img or not image_path.exists(): if self._force_img or not image_path.exists():
self.__save_image(url=url, file_path=image_path) self.__save_image(url=poster_url, file_path=image_path)
# 季的其它图片
for attr_name, attr_value in vars(mediainfo).items():
if attr_value \
and attr_name.startswith("season") \
and not attr_name.endswith("poster_path") \
and attr_value \
and isinstance(attr_value, str) \
and attr_value.startswith("http"):
image_name = attr_name.replace("_path", "") + Path(attr_value).suffix
image_path = file_path.parent.with_name(image_name)
if self._force_img or not image_path.exists():
self.__save_image(url=attr_value,
file_path=image_path)
# 查询集详情 # 查询集详情
episodeinfo = self.__get_episode_detail(seasoninfo, meta.begin_episode) episodeinfo = self.__get_episode_detail(seasoninfo, meta.begin_episode)
if episodeinfo: if episodeinfo: