fix scrap

2024-06-22 19:59:24 +08:00
parent b43e591e4c
commit 0c266726ea
6 changed files with 121 additions and 89 deletions
--- a/app/chain/init.py
+++ b/app/chain/init.py
@ -522,6 +522,14 @@ class ChainBase(metaclass=ABCMeta):
        self.run_module("scrape_metadata", path=path, mediainfo=mediainfo, metainfo=metainfo,
                        transfer_type=transfer_type, force_nfo=force_nfo, force_img=force_img)
    def metadata_img(self, mediainfo: MediaInfo, season: int = None) -> dict:
        """
        获取图片名称和url
        :param mediainfo: 媒体信息
        :param season: 季号
        """
        return self.run_module("metadata_img", mediainfo=mediainfo, season=season)
    def media_category(self) -> Optional[Dict[str, list]]:
        """
        获取媒体分类
--- a/app/chain/media.py
+++ b/app/chain/media.py
@ -480,21 +480,12 @@ class MediaChain(ChainBase, metaclass=Singleton):
                        __save_file(_storage=storage, _drive_id=fileitem.drive_id, _fileid=fileitem.fileid,
                                    _path=nfo_path, _content=season_nfo)
                        # TMDB季poster图片
-                        if settings.SCRAP_SOURCE == "themoviedb":
+                        image_dict = self.metadata_img(mediainfo=mediainfo, season=season_meta.begin_season)
-                            sea_seq = str(meta.begin_season).rjust(2, '0')
+                        if image_dict:
-                            # 查询季剧详情
+                            for image_name, image_url in image_dict.items():
-                            seasoninfo = self.tmdb_info(tmdbid=mediainfo.tmdb_id, mtype=MediaType.TV,
+                                image_path = filepath.with_name(image_name)
                                                        season=meta.begin_season)
                            if not seasoninfo:
                                logger.warn(f"无法获取 {mediainfo.title_year} 第{meta.begin_season}季 的媒体信息！")
                                return
                            # 生成季poster图片
                            if seasoninfo.get("poster_path"):
                                # 下载图片
-                                content = __save_image(f"https://{settings.TMDB_IMAGE_DOMAIN}/t/p/original"
+                                content = __save_image(image_url)
                                                       f"{seasoninfo.get('poster_path')}")
                                image_path = filepath.with_name(f"season{sea_seq}"
                                                                f"-poster{Path(seasoninfo.get('poster_path')).suffix}")
                                # 保存图片文件到当前目录
                                __save_file(_storage=storage, _drive_id=fileitem.drive_id, _fileid=fileitem.fileid,
                                            _path=image_path, _content=content)
@ -509,15 +500,12 @@ class MediaChain(ChainBase, metaclass=Singleton):
                        __save_file(_storage=storage, _drive_id=fileitem.drive_id, _fileid=fileitem.fileid,
                                    _path=nfo_path, _content=tv_nfo)
                        # 生成目录图片
-                        for attr_name, attr_value in vars(mediainfo).items():
+                        image_dict = self.metadata_img(mediainfo=mediainfo)
-                            if attr_name \
+                        if image_dict:
-                                    and attr_name.endswith("_path") \
+                            for image_name, image_url in image_dict.items():
                                    and attr_value \
                                    and isinstance(attr_value, str) \
                                    and attr_value.startswith("http"):
                                image_name = attr_name.replace("_path", "") + Path(attr_value).suffix
                                image_path = filepath.parent.with_name(image_name)
-                                content = __save_image(attr_value)
+                                # 下载图片
                                content = __save_image(image_url)
                                # 保存图片文件到当前目录
                                __save_file(_storage=storage, _drive_id=fileitem.drive_id, _fileid=fileitem.fileid,
                                            _path=image_path, _content=content)
--- a/app/modules/douban/init.py
+++ b/app/modules/douban/init.py
@ -773,7 +773,16 @@ class DoubanModule(_ModuleBase):
        """
        if settings.SCRAP_SOURCE != "douban":
            return None
-        return self.scraper.get_meta_nfo(mediainfo=mediainfo, season=season)
+        return self.scraper.get_metadata_nfo(mediainfo=mediainfo, season=season)
    def metadata_img(self, mediainfo: MediaInfo, **kwargs) -> dict:
        """
        获取图片名称和url
        :param mediainfo: 媒体信息
        """
        if settings.SCRAP_SOURCE != "douban":
            return {}
        return self.scraper.get_metadata_img(mediainfo=mediainfo)
    def obtain_images(self, mediainfo: MediaInfo) -> Optional[MediaInfo]:
        """
--- a/app/modules/douban/scraper.py
+++ b/app/modules/douban/scraper.py
@ -17,7 +17,7 @@ class DoubanScraper:
    _force_nfo = False
    _force_img = False
-    def get_meta_nfo(self, mediainfo: MediaInfo, season: int = None) -> Optional[str]:
+    def get_metadata_nfo(self, mediainfo: MediaInfo, season: int = None) -> Optional[str]:
        """
        获取NFO文件内容文本
        :param mediainfo: 媒体信息
@ -38,6 +38,19 @@ class DoubanScraper:
        return None
    @staticmethod
    def get_metadata_img(mediainfo: MediaInfo) -> Optional[dict]:
        """
        获取图片内容
        :param mediainfo: 媒体信息
        """
        ret_dict = {}
        if mediainfo.poster_path:
            ret_dict[f"poster{Path(mediainfo.poster_path).suffix}"] = mediainfo.poster_path
        if mediainfo.backdrop_path:
            ret_dict[f"backdrop{Path(mediainfo.backdrop_path).suffix}"] = mediainfo.backdrop_path
        return ret_dict
    def gen_scraper_files(self, meta: MetaBase, mediainfo: MediaInfo,
                          file_path: Path, transfer_type: str,
                          force_nfo: bool = False, force_img: bool = False):
@ -68,15 +81,11 @@ class DoubanScraper:
                    self.__gen_movie_nfo_file(mediainfo=mediainfo,
                                              file_path=file_path)
                # 生成电影图片
-                image_path = file_path.with_name(f"poster{Path(mediainfo.poster_path).suffix}")
+                image_dict = self.get_metadata_img(mediainfo)
-                if self._force_img or not image_path.exists():
+                for img_name, img_url in image_dict.items():
-                    self.__save_image(url=mediainfo.poster_path,
+                    image_path = file_path.with_name(img_name)
                                      file_path=image_path)
                # 背景图
                if mediainfo.backdrop_path:
                    image_path = file_path.with_name(f"backdrop{Path(mediainfo.backdrop_path).suffix}")
                    if self._force_img or not image_path.exists():
-                        self.__save_image(url=mediainfo.backdrop_path,
+                        self.__save_image(url=img_url,
                                          file_path=image_path)
            # 电视剧
            else:
@ -86,15 +95,11 @@ class DoubanScraper:
                    self.__gen_tv_nfo_file(mediainfo=mediainfo,
                                           dir_path=file_path.parents[1])
                # 生成根目录图片
-                image_path = file_path.with_name(f"poster{Path(mediainfo.poster_path).suffix}")
+                image_dict = self.get_metadata_img(mediainfo)
-                if self._force_img or not image_path.exists():
+                for img_name, img_url in image_dict.items():
-                    self.__save_image(url=mediainfo.poster_path,
+                    image_path = file_path.with_name(img_name)
                                      file_path=image_path)
                # 背景图
                if mediainfo.backdrop_path:
                    image_path = file_path.with_name(f"backdrop{Path(mediainfo.backdrop_path).suffix}")
                    if self._force_img or not image_path.exists():
-                        self.__save_image(url=mediainfo.backdrop_path,
+                        self.__save_image(url=img_url,
                                          file_path=image_path)
                # 季目录NFO
                if self._force_nfo or not file_path.with_name("season.nfo").exists():
--- a/app/modules/themoviedb/init.py
+++ b/app/modules/themoviedb/init.py
@ -347,7 +347,17 @@ class TheMovieDbModule(_ModuleBase):
        """
        if settings.SCRAP_SOURCE != "themoviedb":
            return None
-        return self.scraper.get_meta_nfo(meta=meta, mediainfo=mediainfo, season=season, episode=episode)
+        return self.scraper.get_metadata_nfo(meta=meta, mediainfo=mediainfo, season=season, episode=episode)
    def metadata_img(self, mediainfo: MediaInfo, season: int = None) -> dict:
        """
        获取图片名称和url
        :param mediainfo: 媒体信息
        :param season: 季号
        """
        if settings.SCRAP_SOURCE != "themoviedb":
            return {}
        return self.scraper.get_metadata_img(mediainfo=mediainfo, season=season)
    def tmdb_discover(self, mtype: MediaType, sort_by: str, with_genres: str, with_original_language: str,
                      page: int = 1) -> Optional[List[MediaInfo]]:
--- a/app/modules/themoviedb/scraper.py
+++ b/app/modules/themoviedb/scraper.py
@ -1,6 +1,6 @@
 import traceback
 from pathlib import Path
-from typing import Union, Optional
+from typing import Union, Optional, Tuple
 from xml.dom import minidom
 from requests import RequestException
@ -26,8 +26,8 @@ class TmdbScraper:
    def __init__(self, tmdb):
        self.tmdb = tmdb
-    def get_meta_nfo(self, meta: MetaBase, mediainfo: MediaInfo,
+    def get_metadata_nfo(self, meta: MetaBase, mediainfo: MediaInfo,
-                     season: int = None, episode: int = None) -> Optional[str]:
+                         season: int = None, episode: int = None) -> Optional[str]:
        """
        获取NFO文件内容文本
        :param meta: 元数据
@ -58,6 +58,48 @@ class TmdbScraper:
        return None
    def get_metadata_img(self, mediainfo: MediaInfo, season: int = None) -> dict:
        """
        获取图片名称和url
        :param mediainfo: 媒体信息
        :param season: 季号
        """
        images = {}
        if mediainfo.type == MediaType.MOVIE:
            for attr_name, attr_value in vars(mediainfo).items():
                if attr_value \
                        and attr_name.endswith("_path") \
                        and attr_value \
                        and isinstance(attr_value, str) \
                        and attr_value.startswith("http"):
                    image_name = attr_name.replace("_path", "") + Path(attr_value).suffix
                    images[image_name] = attr_value
        else:
            if season:
                # 查询季信息
                seasoninfo = self.tmdb.get_tv_season_detail(mediainfo.tmdb_id, season)
                if seasoninfo:
                    # TMDB季poster图片
                    poster_name, poster_url = self.get_season_poster(seasoninfo, season)
                    if poster_name and poster_url:
                        images[poster_name] = poster_url
        return images
    @staticmethod
    def get_season_poster(seasoninfo: dict, season: int) -> Tuple[str, str]:
        """
        获取季的海报
        """
        # TMDB季poster图片
        sea_seq = str(season).rjust(2, '0')
        if seasoninfo.get("poster_path"):
            # 后缀
            ext = Path(seasoninfo.get('poster_path')).suffix
            # URL
            url = f"https://{settings.TMDB_IMAGE_DOMAIN}/t/p/original{seasoninfo.get('poster_path')}"
            image_name = f"season{sea_seq}-poster{ext}"
            return image_name, url
    @staticmethod
    def __get_episode_detail(seasoninfo: dict, episode: int) -> dict:
        """
@ -97,17 +139,11 @@ class TmdbScraper:
                    self.__gen_movie_nfo_file(mediainfo=mediainfo,
                                              file_path=file_path)
                # 生成电影图片
-                for attr_name, attr_value in vars(mediainfo).items():
+                image_dict = self.get_metadata_img(mediainfo=mediainfo)
-                    if attr_value \
+                for image_name, image_url in image_dict.items():
-                            and attr_name.endswith("_path") \
+                    image_path = file_path.with_name(image_name)
-                            and attr_value \
+                    if self._force_img or not image_path.exists():
-                            and isinstance(attr_value, str) \
+                        self.__save_image(url=image_url, file_path=image_path)
                            and attr_value.startswith("http"):
                        image_name = attr_name.replace("_path", "") + Path(attr_value).suffix
                        image_path = file_path.with_name(image_name)
                        if self._force_img or not image_path.exists():
                            self.__save_image(url=attr_value,
                                              file_path=image_path)
            # 电视剧，路径为每一季的文件名 名称/Season xx/名称 SxxExx.xxx
            else:
                # 如果有上游传入的元信息则使用，否则使用文件名识别
@ -120,18 +156,11 @@ class TmdbScraper:
                    self.__gen_tv_nfo_file(mediainfo=mediainfo,
                                           dir_path=file_path.parents[1])
                # 生成根目录图片
-                for attr_name, attr_value in vars(mediainfo).items():
+                image_dict = self.get_metadata_img(mediainfo=mediainfo)
-                    if attr_name \
+                for image_name, image_url in image_dict.items():
-                            and attr_name.endswith("_path") \
+                    image_path = file_path.parent.with_name(image_name)
-                            and not attr_name.startswith("season") \
+                    if self._force_img or not image_path.exists():
-                            and attr_value \
+                        self.__save_image(url=image_url, file_path=image_path)
                            and isinstance(attr_value, str) \
                            and attr_value.startswith("http"):
                        image_name = attr_name.replace("_path", "") + Path(attr_value).suffix
                        image_path = file_path.parent.with_name(image_name)
                        if self._force_img or not image_path.exists():
                            self.__save_image(url=attr_value,
                                              file_path=image_path)
                # 查询季信息
                seasoninfo = self.tmdb.get_tv_season_detail(mediainfo.tmdb_id, meta.begin_season)
                if seasoninfo:
@ -140,29 +169,12 @@ class TmdbScraper:
                        self.__gen_tv_season_nfo_file(seasoninfo=seasoninfo,
                                                      season=meta.begin_season,
                                                      season_path=file_path)
-                    # TMDB季poster图片
+                    # TMDB季图片
-                    sea_seq = str(meta.begin_season).rjust(2, '0')
+                    poster_name, poster_url = self.get_season_poster(seasoninfo, meta.begin_season)
-                    if seasoninfo.get("poster_path"):
+                    if poster_name and poster_url:
-                        # 后缀
+                        image_path = file_path.parent.with_name(poster_name)
                        ext = Path(seasoninfo.get('poster_path')).suffix
                        # URL
                        url = f"https://{settings.TMDB_IMAGE_DOMAIN}/t/p/original{seasoninfo.get('poster_path')}"
                        image_path = file_path.parent.with_name(f"season{sea_seq}-poster{ext}")
                        if self._force_img or not image_path.exists():
-                            self.__save_image(url=url, file_path=image_path)
+                            self.__save_image(url=poster_url, file_path=image_path)
                    # 季的其它图片
                    for attr_name, attr_value in vars(mediainfo).items():
                        if attr_value \
                                and attr_name.startswith("season") \
                                and not attr_name.endswith("poster_path") \
                                and attr_value \
                                and isinstance(attr_value, str) \
                                and attr_value.startswith("http"):
                            image_name = attr_name.replace("_path", "") + Path(attr_value).suffix
                            image_path = file_path.parent.with_name(image_name)
                            if self._force_img or not image_path.exists():
                                self.__save_image(url=attr_value,
                                                  file_path=image_path)
                # 查询集详情
                episodeinfo = self.__get_episode_detail(seasoninfo, meta.begin_episode)
                if episodeinfo: