fix scrap

This commit is contained in:
jxxghp 2024-06-22 19:59:24 +08:00
parent b43e591e4c
commit 0c266726ea
6 changed files with 121 additions and 89 deletions

View File

@ -522,6 +522,14 @@ class ChainBase(metaclass=ABCMeta):
self.run_module("scrape_metadata", path=path, mediainfo=mediainfo, metainfo=metainfo,
transfer_type=transfer_type, force_nfo=force_nfo, force_img=force_img)
def metadata_img(self, mediainfo: MediaInfo, season: int = None) -> dict:
"""
获取图片名称和url
:param mediainfo: 媒体信息
:param season: 季号
"""
return self.run_module("metadata_img", mediainfo=mediainfo, season=season)
def media_category(self) -> Optional[Dict[str, list]]:
"""
获取媒体分类

View File

@ -480,21 +480,12 @@ class MediaChain(ChainBase, metaclass=Singleton):
__save_file(_storage=storage, _drive_id=fileitem.drive_id, _fileid=fileitem.fileid,
_path=nfo_path, _content=season_nfo)
# TMDB季poster图片
if settings.SCRAP_SOURCE == "themoviedb":
sea_seq = str(meta.begin_season).rjust(2, '0')
# 查询季剧详情
seasoninfo = self.tmdb_info(tmdbid=mediainfo.tmdb_id, mtype=MediaType.TV,
season=meta.begin_season)
if not seasoninfo:
logger.warn(f"无法获取 {mediainfo.title_year}{meta.begin_season}季 的媒体信息!")
return
# 生成季poster图片
if seasoninfo.get("poster_path"):
image_dict = self.metadata_img(mediainfo=mediainfo, season=season_meta.begin_season)
if image_dict:
for image_name, image_url in image_dict.items():
image_path = filepath.with_name(image_name)
# 下载图片
content = __save_image(f"https://{settings.TMDB_IMAGE_DOMAIN}/t/p/original"
f"{seasoninfo.get('poster_path')}")
image_path = filepath.with_name(f"season{sea_seq}"
f"-poster{Path(seasoninfo.get('poster_path')).suffix}")
content = __save_image(image_url)
# 保存图片文件到当前目录
__save_file(_storage=storage, _drive_id=fileitem.drive_id, _fileid=fileitem.fileid,
_path=image_path, _content=content)
@ -509,15 +500,12 @@ class MediaChain(ChainBase, metaclass=Singleton):
__save_file(_storage=storage, _drive_id=fileitem.drive_id, _fileid=fileitem.fileid,
_path=nfo_path, _content=tv_nfo)
# 生成目录图片
for attr_name, attr_value in vars(mediainfo).items():
if attr_name \
and attr_name.endswith("_path") \
and attr_value \
and isinstance(attr_value, str) \
and attr_value.startswith("http"):
image_name = attr_name.replace("_path", "") + Path(attr_value).suffix
image_dict = self.metadata_img(mediainfo=mediainfo)
if image_dict:
for image_name, image_url in image_dict.items():
image_path = filepath.parent.with_name(image_name)
content = __save_image(attr_value)
# 下载图片
content = __save_image(image_url)
# 保存图片文件到当前目录
__save_file(_storage=storage, _drive_id=fileitem.drive_id, _fileid=fileitem.fileid,
_path=image_path, _content=content)

View File

@ -773,7 +773,16 @@ class DoubanModule(_ModuleBase):
"""
if settings.SCRAP_SOURCE != "douban":
return None
return self.scraper.get_meta_nfo(mediainfo=mediainfo, season=season)
return self.scraper.get_metadata_nfo(mediainfo=mediainfo, season=season)
def metadata_img(self, mediainfo: MediaInfo, **kwargs) -> dict:
"""
获取图片名称和url
:param mediainfo: 媒体信息
"""
if settings.SCRAP_SOURCE != "douban":
return {}
return self.scraper.get_metadata_img(mediainfo=mediainfo)
def obtain_images(self, mediainfo: MediaInfo) -> Optional[MediaInfo]:
"""

View File

@ -17,7 +17,7 @@ class DoubanScraper:
_force_nfo = False
_force_img = False
def get_meta_nfo(self, mediainfo: MediaInfo, season: int = None) -> Optional[str]:
def get_metadata_nfo(self, mediainfo: MediaInfo, season: int = None) -> Optional[str]:
"""
获取NFO文件内容文本
:param mediainfo: 媒体信息
@ -38,6 +38,19 @@ class DoubanScraper:
return None
@staticmethod
def get_metadata_img(mediainfo: MediaInfo) -> Optional[dict]:
"""
获取图片内容
:param mediainfo: 媒体信息
"""
ret_dict = {}
if mediainfo.poster_path:
ret_dict[f"poster{Path(mediainfo.poster_path).suffix}"] = mediainfo.poster_path
if mediainfo.backdrop_path:
ret_dict[f"backdrop{Path(mediainfo.backdrop_path).suffix}"] = mediainfo.backdrop_path
return ret_dict
def gen_scraper_files(self, meta: MetaBase, mediainfo: MediaInfo,
file_path: Path, transfer_type: str,
force_nfo: bool = False, force_img: bool = False):
@ -68,15 +81,11 @@ class DoubanScraper:
self.__gen_movie_nfo_file(mediainfo=mediainfo,
file_path=file_path)
# 生成电影图片
image_path = file_path.with_name(f"poster{Path(mediainfo.poster_path).suffix}")
image_dict = self.get_metadata_img(mediainfo)
for img_name, img_url in image_dict.items():
image_path = file_path.with_name(img_name)
if self._force_img or not image_path.exists():
self.__save_image(url=mediainfo.poster_path,
file_path=image_path)
# 背景图
if mediainfo.backdrop_path:
image_path = file_path.with_name(f"backdrop{Path(mediainfo.backdrop_path).suffix}")
if self._force_img or not image_path.exists():
self.__save_image(url=mediainfo.backdrop_path,
self.__save_image(url=img_url,
file_path=image_path)
# 电视剧
else:
@ -86,15 +95,11 @@ class DoubanScraper:
self.__gen_tv_nfo_file(mediainfo=mediainfo,
dir_path=file_path.parents[1])
# 生成根目录图片
image_path = file_path.with_name(f"poster{Path(mediainfo.poster_path).suffix}")
image_dict = self.get_metadata_img(mediainfo)
for img_name, img_url in image_dict.items():
image_path = file_path.with_name(img_name)
if self._force_img or not image_path.exists():
self.__save_image(url=mediainfo.poster_path,
file_path=image_path)
# 背景图
if mediainfo.backdrop_path:
image_path = file_path.with_name(f"backdrop{Path(mediainfo.backdrop_path).suffix}")
if self._force_img or not image_path.exists():
self.__save_image(url=mediainfo.backdrop_path,
self.__save_image(url=img_url,
file_path=image_path)
# 季目录NFO
if self._force_nfo or not file_path.with_name("season.nfo").exists():

View File

@ -347,7 +347,17 @@ class TheMovieDbModule(_ModuleBase):
"""
if settings.SCRAP_SOURCE != "themoviedb":
return None
return self.scraper.get_meta_nfo(meta=meta, mediainfo=mediainfo, season=season, episode=episode)
return self.scraper.get_metadata_nfo(meta=meta, mediainfo=mediainfo, season=season, episode=episode)
def metadata_img(self, mediainfo: MediaInfo, season: int = None) -> dict:
"""
获取图片名称和url
:param mediainfo: 媒体信息
:param season: 季号
"""
if settings.SCRAP_SOURCE != "themoviedb":
return {}
return self.scraper.get_metadata_img(mediainfo=mediainfo, season=season)
def tmdb_discover(self, mtype: MediaType, sort_by: str, with_genres: str, with_original_language: str,
page: int = 1) -> Optional[List[MediaInfo]]:

View File

@ -1,6 +1,6 @@
import traceback
from pathlib import Path
from typing import Union, Optional
from typing import Union, Optional, Tuple
from xml.dom import minidom
from requests import RequestException
@ -26,7 +26,7 @@ class TmdbScraper:
def __init__(self, tmdb):
self.tmdb = tmdb
def get_meta_nfo(self, meta: MetaBase, mediainfo: MediaInfo,
def get_metadata_nfo(self, meta: MetaBase, mediainfo: MediaInfo,
season: int = None, episode: int = None) -> Optional[str]:
"""
获取NFO文件内容文本
@ -58,6 +58,48 @@ class TmdbScraper:
return None
def get_metadata_img(self, mediainfo: MediaInfo, season: int = None) -> dict:
"""
获取图片名称和url
:param mediainfo: 媒体信息
:param season: 季号
"""
images = {}
if mediainfo.type == MediaType.MOVIE:
for attr_name, attr_value in vars(mediainfo).items():
if attr_value \
and attr_name.endswith("_path") \
and attr_value \
and isinstance(attr_value, str) \
and attr_value.startswith("http"):
image_name = attr_name.replace("_path", "") + Path(attr_value).suffix
images[image_name] = attr_value
else:
if season:
# 查询季信息
seasoninfo = self.tmdb.get_tv_season_detail(mediainfo.tmdb_id, season)
if seasoninfo:
# TMDB季poster图片
poster_name, poster_url = self.get_season_poster(seasoninfo, season)
if poster_name and poster_url:
images[poster_name] = poster_url
return images
@staticmethod
def get_season_poster(seasoninfo: dict, season: int) -> Tuple[str, str]:
"""
获取季的海报
"""
# TMDB季poster图片
sea_seq = str(season).rjust(2, '0')
if seasoninfo.get("poster_path"):
# 后缀
ext = Path(seasoninfo.get('poster_path')).suffix
# URL
url = f"https://{settings.TMDB_IMAGE_DOMAIN}/t/p/original{seasoninfo.get('poster_path')}"
image_name = f"season{sea_seq}-poster{ext}"
return image_name, url
@staticmethod
def __get_episode_detail(seasoninfo: dict, episode: int) -> dict:
"""
@ -97,17 +139,11 @@ class TmdbScraper:
self.__gen_movie_nfo_file(mediainfo=mediainfo,
file_path=file_path)
# 生成电影图片
for attr_name, attr_value in vars(mediainfo).items():
if attr_value \
and attr_name.endswith("_path") \
and attr_value \
and isinstance(attr_value, str) \
and attr_value.startswith("http"):
image_name = attr_name.replace("_path", "") + Path(attr_value).suffix
image_dict = self.get_metadata_img(mediainfo=mediainfo)
for image_name, image_url in image_dict.items():
image_path = file_path.with_name(image_name)
if self._force_img or not image_path.exists():
self.__save_image(url=attr_value,
file_path=image_path)
self.__save_image(url=image_url, file_path=image_path)
# 电视剧,路径为每一季的文件名 名称/Season xx/名称 SxxExx.xxx
else:
# 如果有上游传入的元信息则使用,否则使用文件名识别
@ -120,18 +156,11 @@ class TmdbScraper:
self.__gen_tv_nfo_file(mediainfo=mediainfo,
dir_path=file_path.parents[1])
# 生成根目录图片
for attr_name, attr_value in vars(mediainfo).items():
if attr_name \
and attr_name.endswith("_path") \
and not attr_name.startswith("season") \
and attr_value \
and isinstance(attr_value, str) \
and attr_value.startswith("http"):
image_name = attr_name.replace("_path", "") + Path(attr_value).suffix
image_dict = self.get_metadata_img(mediainfo=mediainfo)
for image_name, image_url in image_dict.items():
image_path = file_path.parent.with_name(image_name)
if self._force_img or not image_path.exists():
self.__save_image(url=attr_value,
file_path=image_path)
self.__save_image(url=image_url, file_path=image_path)
# 查询季信息
seasoninfo = self.tmdb.get_tv_season_detail(mediainfo.tmdb_id, meta.begin_season)
if seasoninfo:
@ -140,29 +169,12 @@ class TmdbScraper:
self.__gen_tv_season_nfo_file(seasoninfo=seasoninfo,
season=meta.begin_season,
season_path=file_path)
# TMDB季poster图片
sea_seq = str(meta.begin_season).rjust(2, '0')
if seasoninfo.get("poster_path"):
# 后缀
ext = Path(seasoninfo.get('poster_path')).suffix
# URL
url = f"https://{settings.TMDB_IMAGE_DOMAIN}/t/p/original{seasoninfo.get('poster_path')}"
image_path = file_path.parent.with_name(f"season{sea_seq}-poster{ext}")
# TMDB季图片
poster_name, poster_url = self.get_season_poster(seasoninfo, meta.begin_season)
if poster_name and poster_url:
image_path = file_path.parent.with_name(poster_name)
if self._force_img or not image_path.exists():
self.__save_image(url=url, file_path=image_path)
# 季的其它图片
for attr_name, attr_value in vars(mediainfo).items():
if attr_value \
and attr_name.startswith("season") \
and not attr_name.endswith("poster_path") \
and attr_value \
and isinstance(attr_value, str) \
and attr_value.startswith("http"):
image_name = attr_name.replace("_path", "") + Path(attr_value).suffix
image_path = file_path.parent.with_name(image_name)
if self._force_img or not image_path.exists():
self.__save_image(url=attr_value,
file_path=image_path)
self.__save_image(url=poster_url, file_path=image_path)
# 查询集详情
episodeinfo = self.__get_episode_detail(seasoninfo, meta.begin_episode)
if episodeinfo: