fix scrap
This commit is contained in:
parent
b43e591e4c
commit
0c266726ea
@ -522,6 +522,14 @@ class ChainBase(metaclass=ABCMeta):
|
|||||||
self.run_module("scrape_metadata", path=path, mediainfo=mediainfo, metainfo=metainfo,
|
self.run_module("scrape_metadata", path=path, mediainfo=mediainfo, metainfo=metainfo,
|
||||||
transfer_type=transfer_type, force_nfo=force_nfo, force_img=force_img)
|
transfer_type=transfer_type, force_nfo=force_nfo, force_img=force_img)
|
||||||
|
|
||||||
|
def metadata_img(self, mediainfo: MediaInfo, season: int = None) -> dict:
|
||||||
|
"""
|
||||||
|
获取图片名称和url
|
||||||
|
:param mediainfo: 媒体信息
|
||||||
|
:param season: 季号
|
||||||
|
"""
|
||||||
|
return self.run_module("metadata_img", mediainfo=mediainfo, season=season)
|
||||||
|
|
||||||
def media_category(self) -> Optional[Dict[str, list]]:
|
def media_category(self) -> Optional[Dict[str, list]]:
|
||||||
"""
|
"""
|
||||||
获取媒体分类
|
获取媒体分类
|
||||||
|
@ -480,21 +480,12 @@ class MediaChain(ChainBase, metaclass=Singleton):
|
|||||||
__save_file(_storage=storage, _drive_id=fileitem.drive_id, _fileid=fileitem.fileid,
|
__save_file(_storage=storage, _drive_id=fileitem.drive_id, _fileid=fileitem.fileid,
|
||||||
_path=nfo_path, _content=season_nfo)
|
_path=nfo_path, _content=season_nfo)
|
||||||
# TMDB季poster图片
|
# TMDB季poster图片
|
||||||
if settings.SCRAP_SOURCE == "themoviedb":
|
image_dict = self.metadata_img(mediainfo=mediainfo, season=season_meta.begin_season)
|
||||||
sea_seq = str(meta.begin_season).rjust(2, '0')
|
if image_dict:
|
||||||
# 查询季剧详情
|
for image_name, image_url in image_dict.items():
|
||||||
seasoninfo = self.tmdb_info(tmdbid=mediainfo.tmdb_id, mtype=MediaType.TV,
|
image_path = filepath.with_name(image_name)
|
||||||
season=meta.begin_season)
|
|
||||||
if not seasoninfo:
|
|
||||||
logger.warn(f"无法获取 {mediainfo.title_year} 第{meta.begin_season}季 的媒体信息!")
|
|
||||||
return
|
|
||||||
# 生成季poster图片
|
|
||||||
if seasoninfo.get("poster_path"):
|
|
||||||
# 下载图片
|
# 下载图片
|
||||||
content = __save_image(f"https://{settings.TMDB_IMAGE_DOMAIN}/t/p/original"
|
content = __save_image(image_url)
|
||||||
f"{seasoninfo.get('poster_path')}")
|
|
||||||
image_path = filepath.with_name(f"season{sea_seq}"
|
|
||||||
f"-poster{Path(seasoninfo.get('poster_path')).suffix}")
|
|
||||||
# 保存图片文件到当前目录
|
# 保存图片文件到当前目录
|
||||||
__save_file(_storage=storage, _drive_id=fileitem.drive_id, _fileid=fileitem.fileid,
|
__save_file(_storage=storage, _drive_id=fileitem.drive_id, _fileid=fileitem.fileid,
|
||||||
_path=image_path, _content=content)
|
_path=image_path, _content=content)
|
||||||
@ -509,15 +500,12 @@ class MediaChain(ChainBase, metaclass=Singleton):
|
|||||||
__save_file(_storage=storage, _drive_id=fileitem.drive_id, _fileid=fileitem.fileid,
|
__save_file(_storage=storage, _drive_id=fileitem.drive_id, _fileid=fileitem.fileid,
|
||||||
_path=nfo_path, _content=tv_nfo)
|
_path=nfo_path, _content=tv_nfo)
|
||||||
# 生成目录图片
|
# 生成目录图片
|
||||||
for attr_name, attr_value in vars(mediainfo).items():
|
image_dict = self.metadata_img(mediainfo=mediainfo)
|
||||||
if attr_name \
|
if image_dict:
|
||||||
and attr_name.endswith("_path") \
|
for image_name, image_url in image_dict.items():
|
||||||
and attr_value \
|
|
||||||
and isinstance(attr_value, str) \
|
|
||||||
and attr_value.startswith("http"):
|
|
||||||
image_name = attr_name.replace("_path", "") + Path(attr_value).suffix
|
|
||||||
image_path = filepath.parent.with_name(image_name)
|
image_path = filepath.parent.with_name(image_name)
|
||||||
content = __save_image(attr_value)
|
# 下载图片
|
||||||
|
content = __save_image(image_url)
|
||||||
# 保存图片文件到当前目录
|
# 保存图片文件到当前目录
|
||||||
__save_file(_storage=storage, _drive_id=fileitem.drive_id, _fileid=fileitem.fileid,
|
__save_file(_storage=storage, _drive_id=fileitem.drive_id, _fileid=fileitem.fileid,
|
||||||
_path=image_path, _content=content)
|
_path=image_path, _content=content)
|
||||||
|
@ -773,7 +773,16 @@ class DoubanModule(_ModuleBase):
|
|||||||
"""
|
"""
|
||||||
if settings.SCRAP_SOURCE != "douban":
|
if settings.SCRAP_SOURCE != "douban":
|
||||||
return None
|
return None
|
||||||
return self.scraper.get_meta_nfo(mediainfo=mediainfo, season=season)
|
return self.scraper.get_metadata_nfo(mediainfo=mediainfo, season=season)
|
||||||
|
|
||||||
|
def metadata_img(self, mediainfo: MediaInfo, **kwargs) -> dict:
|
||||||
|
"""
|
||||||
|
获取图片名称和url
|
||||||
|
:param mediainfo: 媒体信息
|
||||||
|
"""
|
||||||
|
if settings.SCRAP_SOURCE != "douban":
|
||||||
|
return {}
|
||||||
|
return self.scraper.get_metadata_img(mediainfo=mediainfo)
|
||||||
|
|
||||||
def obtain_images(self, mediainfo: MediaInfo) -> Optional[MediaInfo]:
|
def obtain_images(self, mediainfo: MediaInfo) -> Optional[MediaInfo]:
|
||||||
"""
|
"""
|
||||||
|
@ -17,7 +17,7 @@ class DoubanScraper:
|
|||||||
_force_nfo = False
|
_force_nfo = False
|
||||||
_force_img = False
|
_force_img = False
|
||||||
|
|
||||||
def get_meta_nfo(self, mediainfo: MediaInfo, season: int = None) -> Optional[str]:
|
def get_metadata_nfo(self, mediainfo: MediaInfo, season: int = None) -> Optional[str]:
|
||||||
"""
|
"""
|
||||||
获取NFO文件内容文本
|
获取NFO文件内容文本
|
||||||
:param mediainfo: 媒体信息
|
:param mediainfo: 媒体信息
|
||||||
@ -38,6 +38,19 @@ class DoubanScraper:
|
|||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_metadata_img(mediainfo: MediaInfo) -> Optional[dict]:
|
||||||
|
"""
|
||||||
|
获取图片内容
|
||||||
|
:param mediainfo: 媒体信息
|
||||||
|
"""
|
||||||
|
ret_dict = {}
|
||||||
|
if mediainfo.poster_path:
|
||||||
|
ret_dict[f"poster{Path(mediainfo.poster_path).suffix}"] = mediainfo.poster_path
|
||||||
|
if mediainfo.backdrop_path:
|
||||||
|
ret_dict[f"backdrop{Path(mediainfo.backdrop_path).suffix}"] = mediainfo.backdrop_path
|
||||||
|
return ret_dict
|
||||||
|
|
||||||
def gen_scraper_files(self, meta: MetaBase, mediainfo: MediaInfo,
|
def gen_scraper_files(self, meta: MetaBase, mediainfo: MediaInfo,
|
||||||
file_path: Path, transfer_type: str,
|
file_path: Path, transfer_type: str,
|
||||||
force_nfo: bool = False, force_img: bool = False):
|
force_nfo: bool = False, force_img: bool = False):
|
||||||
@ -68,15 +81,11 @@ class DoubanScraper:
|
|||||||
self.__gen_movie_nfo_file(mediainfo=mediainfo,
|
self.__gen_movie_nfo_file(mediainfo=mediainfo,
|
||||||
file_path=file_path)
|
file_path=file_path)
|
||||||
# 生成电影图片
|
# 生成电影图片
|
||||||
image_path = file_path.with_name(f"poster{Path(mediainfo.poster_path).suffix}")
|
image_dict = self.get_metadata_img(mediainfo)
|
||||||
if self._force_img or not image_path.exists():
|
for img_name, img_url in image_dict.items():
|
||||||
self.__save_image(url=mediainfo.poster_path,
|
image_path = file_path.with_name(img_name)
|
||||||
file_path=image_path)
|
|
||||||
# 背景图
|
|
||||||
if mediainfo.backdrop_path:
|
|
||||||
image_path = file_path.with_name(f"backdrop{Path(mediainfo.backdrop_path).suffix}")
|
|
||||||
if self._force_img or not image_path.exists():
|
if self._force_img or not image_path.exists():
|
||||||
self.__save_image(url=mediainfo.backdrop_path,
|
self.__save_image(url=img_url,
|
||||||
file_path=image_path)
|
file_path=image_path)
|
||||||
# 电视剧
|
# 电视剧
|
||||||
else:
|
else:
|
||||||
@ -86,15 +95,11 @@ class DoubanScraper:
|
|||||||
self.__gen_tv_nfo_file(mediainfo=mediainfo,
|
self.__gen_tv_nfo_file(mediainfo=mediainfo,
|
||||||
dir_path=file_path.parents[1])
|
dir_path=file_path.parents[1])
|
||||||
# 生成根目录图片
|
# 生成根目录图片
|
||||||
image_path = file_path.with_name(f"poster{Path(mediainfo.poster_path).suffix}")
|
image_dict = self.get_metadata_img(mediainfo)
|
||||||
if self._force_img or not image_path.exists():
|
for img_name, img_url in image_dict.items():
|
||||||
self.__save_image(url=mediainfo.poster_path,
|
image_path = file_path.with_name(img_name)
|
||||||
file_path=image_path)
|
|
||||||
# 背景图
|
|
||||||
if mediainfo.backdrop_path:
|
|
||||||
image_path = file_path.with_name(f"backdrop{Path(mediainfo.backdrop_path).suffix}")
|
|
||||||
if self._force_img or not image_path.exists():
|
if self._force_img or not image_path.exists():
|
||||||
self.__save_image(url=mediainfo.backdrop_path,
|
self.__save_image(url=img_url,
|
||||||
file_path=image_path)
|
file_path=image_path)
|
||||||
# 季目录NFO
|
# 季目录NFO
|
||||||
if self._force_nfo or not file_path.with_name("season.nfo").exists():
|
if self._force_nfo or not file_path.with_name("season.nfo").exists():
|
||||||
|
@ -347,7 +347,17 @@ class TheMovieDbModule(_ModuleBase):
|
|||||||
"""
|
"""
|
||||||
if settings.SCRAP_SOURCE != "themoviedb":
|
if settings.SCRAP_SOURCE != "themoviedb":
|
||||||
return None
|
return None
|
||||||
return self.scraper.get_meta_nfo(meta=meta, mediainfo=mediainfo, season=season, episode=episode)
|
return self.scraper.get_metadata_nfo(meta=meta, mediainfo=mediainfo, season=season, episode=episode)
|
||||||
|
|
||||||
|
def metadata_img(self, mediainfo: MediaInfo, season: int = None) -> dict:
|
||||||
|
"""
|
||||||
|
获取图片名称和url
|
||||||
|
:param mediainfo: 媒体信息
|
||||||
|
:param season: 季号
|
||||||
|
"""
|
||||||
|
if settings.SCRAP_SOURCE != "themoviedb":
|
||||||
|
return {}
|
||||||
|
return self.scraper.get_metadata_img(mediainfo=mediainfo, season=season)
|
||||||
|
|
||||||
def tmdb_discover(self, mtype: MediaType, sort_by: str, with_genres: str, with_original_language: str,
|
def tmdb_discover(self, mtype: MediaType, sort_by: str, with_genres: str, with_original_language: str,
|
||||||
page: int = 1) -> Optional[List[MediaInfo]]:
|
page: int = 1) -> Optional[List[MediaInfo]]:
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
import traceback
|
import traceback
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Union, Optional
|
from typing import Union, Optional, Tuple
|
||||||
from xml.dom import minidom
|
from xml.dom import minidom
|
||||||
|
|
||||||
from requests import RequestException
|
from requests import RequestException
|
||||||
@ -26,8 +26,8 @@ class TmdbScraper:
|
|||||||
def __init__(self, tmdb):
|
def __init__(self, tmdb):
|
||||||
self.tmdb = tmdb
|
self.tmdb = tmdb
|
||||||
|
|
||||||
def get_meta_nfo(self, meta: MetaBase, mediainfo: MediaInfo,
|
def get_metadata_nfo(self, meta: MetaBase, mediainfo: MediaInfo,
|
||||||
season: int = None, episode: int = None) -> Optional[str]:
|
season: int = None, episode: int = None) -> Optional[str]:
|
||||||
"""
|
"""
|
||||||
获取NFO文件内容文本
|
获取NFO文件内容文本
|
||||||
:param meta: 元数据
|
:param meta: 元数据
|
||||||
@ -58,6 +58,48 @@ class TmdbScraper:
|
|||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def get_metadata_img(self, mediainfo: MediaInfo, season: int = None) -> dict:
|
||||||
|
"""
|
||||||
|
获取图片名称和url
|
||||||
|
:param mediainfo: 媒体信息
|
||||||
|
:param season: 季号
|
||||||
|
"""
|
||||||
|
images = {}
|
||||||
|
if mediainfo.type == MediaType.MOVIE:
|
||||||
|
for attr_name, attr_value in vars(mediainfo).items():
|
||||||
|
if attr_value \
|
||||||
|
and attr_name.endswith("_path") \
|
||||||
|
and attr_value \
|
||||||
|
and isinstance(attr_value, str) \
|
||||||
|
and attr_value.startswith("http"):
|
||||||
|
image_name = attr_name.replace("_path", "") + Path(attr_value).suffix
|
||||||
|
images[image_name] = attr_value
|
||||||
|
else:
|
||||||
|
if season:
|
||||||
|
# 查询季信息
|
||||||
|
seasoninfo = self.tmdb.get_tv_season_detail(mediainfo.tmdb_id, season)
|
||||||
|
if seasoninfo:
|
||||||
|
# TMDB季poster图片
|
||||||
|
poster_name, poster_url = self.get_season_poster(seasoninfo, season)
|
||||||
|
if poster_name and poster_url:
|
||||||
|
images[poster_name] = poster_url
|
||||||
|
return images
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_season_poster(seasoninfo: dict, season: int) -> Tuple[str, str]:
|
||||||
|
"""
|
||||||
|
获取季的海报
|
||||||
|
"""
|
||||||
|
# TMDB季poster图片
|
||||||
|
sea_seq = str(season).rjust(2, '0')
|
||||||
|
if seasoninfo.get("poster_path"):
|
||||||
|
# 后缀
|
||||||
|
ext = Path(seasoninfo.get('poster_path')).suffix
|
||||||
|
# URL
|
||||||
|
url = f"https://{settings.TMDB_IMAGE_DOMAIN}/t/p/original{seasoninfo.get('poster_path')}"
|
||||||
|
image_name = f"season{sea_seq}-poster{ext}"
|
||||||
|
return image_name, url
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def __get_episode_detail(seasoninfo: dict, episode: int) -> dict:
|
def __get_episode_detail(seasoninfo: dict, episode: int) -> dict:
|
||||||
"""
|
"""
|
||||||
@ -97,17 +139,11 @@ class TmdbScraper:
|
|||||||
self.__gen_movie_nfo_file(mediainfo=mediainfo,
|
self.__gen_movie_nfo_file(mediainfo=mediainfo,
|
||||||
file_path=file_path)
|
file_path=file_path)
|
||||||
# 生成电影图片
|
# 生成电影图片
|
||||||
for attr_name, attr_value in vars(mediainfo).items():
|
image_dict = self.get_metadata_img(mediainfo=mediainfo)
|
||||||
if attr_value \
|
for image_name, image_url in image_dict.items():
|
||||||
and attr_name.endswith("_path") \
|
image_path = file_path.with_name(image_name)
|
||||||
and attr_value \
|
if self._force_img or not image_path.exists():
|
||||||
and isinstance(attr_value, str) \
|
self.__save_image(url=image_url, file_path=image_path)
|
||||||
and attr_value.startswith("http"):
|
|
||||||
image_name = attr_name.replace("_path", "") + Path(attr_value).suffix
|
|
||||||
image_path = file_path.with_name(image_name)
|
|
||||||
if self._force_img or not image_path.exists():
|
|
||||||
self.__save_image(url=attr_value,
|
|
||||||
file_path=image_path)
|
|
||||||
# 电视剧,路径为每一季的文件名 名称/Season xx/名称 SxxExx.xxx
|
# 电视剧,路径为每一季的文件名 名称/Season xx/名称 SxxExx.xxx
|
||||||
else:
|
else:
|
||||||
# 如果有上游传入的元信息则使用,否则使用文件名识别
|
# 如果有上游传入的元信息则使用,否则使用文件名识别
|
||||||
@ -120,18 +156,11 @@ class TmdbScraper:
|
|||||||
self.__gen_tv_nfo_file(mediainfo=mediainfo,
|
self.__gen_tv_nfo_file(mediainfo=mediainfo,
|
||||||
dir_path=file_path.parents[1])
|
dir_path=file_path.parents[1])
|
||||||
# 生成根目录图片
|
# 生成根目录图片
|
||||||
for attr_name, attr_value in vars(mediainfo).items():
|
image_dict = self.get_metadata_img(mediainfo=mediainfo)
|
||||||
if attr_name \
|
for image_name, image_url in image_dict.items():
|
||||||
and attr_name.endswith("_path") \
|
image_path = file_path.parent.with_name(image_name)
|
||||||
and not attr_name.startswith("season") \
|
if self._force_img or not image_path.exists():
|
||||||
and attr_value \
|
self.__save_image(url=image_url, file_path=image_path)
|
||||||
and isinstance(attr_value, str) \
|
|
||||||
and attr_value.startswith("http"):
|
|
||||||
image_name = attr_name.replace("_path", "") + Path(attr_value).suffix
|
|
||||||
image_path = file_path.parent.with_name(image_name)
|
|
||||||
if self._force_img or not image_path.exists():
|
|
||||||
self.__save_image(url=attr_value,
|
|
||||||
file_path=image_path)
|
|
||||||
# 查询季信息
|
# 查询季信息
|
||||||
seasoninfo = self.tmdb.get_tv_season_detail(mediainfo.tmdb_id, meta.begin_season)
|
seasoninfo = self.tmdb.get_tv_season_detail(mediainfo.tmdb_id, meta.begin_season)
|
||||||
if seasoninfo:
|
if seasoninfo:
|
||||||
@ -140,29 +169,12 @@ class TmdbScraper:
|
|||||||
self.__gen_tv_season_nfo_file(seasoninfo=seasoninfo,
|
self.__gen_tv_season_nfo_file(seasoninfo=seasoninfo,
|
||||||
season=meta.begin_season,
|
season=meta.begin_season,
|
||||||
season_path=file_path)
|
season_path=file_path)
|
||||||
# TMDB季poster图片
|
# TMDB季图片
|
||||||
sea_seq = str(meta.begin_season).rjust(2, '0')
|
poster_name, poster_url = self.get_season_poster(seasoninfo, meta.begin_season)
|
||||||
if seasoninfo.get("poster_path"):
|
if poster_name and poster_url:
|
||||||
# 后缀
|
image_path = file_path.parent.with_name(poster_name)
|
||||||
ext = Path(seasoninfo.get('poster_path')).suffix
|
|
||||||
# URL
|
|
||||||
url = f"https://{settings.TMDB_IMAGE_DOMAIN}/t/p/original{seasoninfo.get('poster_path')}"
|
|
||||||
image_path = file_path.parent.with_name(f"season{sea_seq}-poster{ext}")
|
|
||||||
if self._force_img or not image_path.exists():
|
if self._force_img or not image_path.exists():
|
||||||
self.__save_image(url=url, file_path=image_path)
|
self.__save_image(url=poster_url, file_path=image_path)
|
||||||
# 季的其它图片
|
|
||||||
for attr_name, attr_value in vars(mediainfo).items():
|
|
||||||
if attr_value \
|
|
||||||
and attr_name.startswith("season") \
|
|
||||||
and not attr_name.endswith("poster_path") \
|
|
||||||
and attr_value \
|
|
||||||
and isinstance(attr_value, str) \
|
|
||||||
and attr_value.startswith("http"):
|
|
||||||
image_name = attr_name.replace("_path", "") + Path(attr_value).suffix
|
|
||||||
image_path = file_path.parent.with_name(image_name)
|
|
||||||
if self._force_img or not image_path.exists():
|
|
||||||
self.__save_image(url=attr_value,
|
|
||||||
file_path=image_path)
|
|
||||||
# 查询集详情
|
# 查询集详情
|
||||||
episodeinfo = self.__get_episode_detail(seasoninfo, meta.begin_episode)
|
episodeinfo = self.__get_episode_detail(seasoninfo, meta.begin_episode)
|
||||||
if episodeinfo:
|
if episodeinfo:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user