fix scrap

2024-06-22 19:59:24 +08:00
parent b43e591e4c
commit 0c266726ea
6 changed files with 121 additions and 89 deletions
--- a/app/chain/init.py
+++ b/app/chain/init.py
@ -522,6 +522,14 @@ class ChainBase(metaclass=ABCMeta):
        self.run_module("scrape_metadata", path=path, mediainfo=mediainfo, metainfo=metainfo,
                        transfer_type=transfer_type, force_nfo=force_nfo, force_img=force_img)

+    def metadata_img(self, mediainfo: MediaInfo, season: int = None) -> dict:
+        """
+        获取图片名称和url
+        :param mediainfo: 媒体信息
+        :param season: 季号
+        """
+        return self.run_module("metadata_img", mediainfo=mediainfo, season=season)
+
    def media_category(self) -> Optional[Dict[str, list]]:
        """
        获取媒体分类
--- a/app/chain/media.py
+++ b/app/chain/media.py
@ -480,21 +480,12 @@ class MediaChain(ChainBase, metaclass=Singleton):
                        __save_file(_storage=storage, _drive_id=fileitem.drive_id, _fileid=fileitem.fileid,
                                    _path=nfo_path, _content=season_nfo)
                        # TMDB季poster图片
-                        if settings.SCRAP_SOURCE == "themoviedb":
-                            sea_seq = str(meta.begin_season).rjust(2, '0')
-                            # 查询季剧详情
-                            seasoninfo = self.tmdb_info(tmdbid=mediainfo.tmdb_id, mtype=MediaType.TV,
-                                                        season=meta.begin_season)
-                            if not seasoninfo:
-                                logger.warn(f"无法获取 {mediainfo.title_year} 第{meta.begin_season}季 的媒体信息！")
-                                return
-                            # 生成季poster图片
-                            if seasoninfo.get("poster_path"):
+                        image_dict = self.metadata_img(mediainfo=mediainfo, season=season_meta.begin_season)
+                        if image_dict:
+                            for image_name, image_url in image_dict.items():
+                                image_path = filepath.with_name(image_name)
                                # 下载图片
-                                content = __save_image(f"https://{settings.TMDB_IMAGE_DOMAIN}/t/p/original"
-                                                       f"{seasoninfo.get('poster_path')}")
-                                image_path = filepath.with_name(f"season{sea_seq}"
-                                                                f"-poster{Path(seasoninfo.get('poster_path')).suffix}")
+                                content = __save_image(image_url)
                                # 保存图片文件到当前目录
                                __save_file(_storage=storage, _drive_id=fileitem.drive_id, _fileid=fileitem.fileid,
                                            _path=image_path, _content=content)
@ -509,15 +500,12 @@ class MediaChain(ChainBase, metaclass=Singleton):
                        __save_file(_storage=storage, _drive_id=fileitem.drive_id, _fileid=fileitem.fileid,
                                    _path=nfo_path, _content=tv_nfo)
                        # 生成目录图片
-                        for attr_name, attr_value in vars(mediainfo).items():
-                            if attr_name \
-                                    and attr_name.endswith("_path") \
-                                    and attr_value \
-                                    and isinstance(attr_value, str) \
-                                    and attr_value.startswith("http"):
-                                image_name = attr_name.replace("_path", "") + Path(attr_value).suffix
+                        image_dict = self.metadata_img(mediainfo=mediainfo)
+                        if image_dict:
+                            for image_name, image_url in image_dict.items():
                                image_path = filepath.parent.with_name(image_name)
-                                content = __save_image(attr_value)
+                                # 下载图片
+                                content = __save_image(image_url)
                                # 保存图片文件到当前目录
                                __save_file(_storage=storage, _drive_id=fileitem.drive_id, _fileid=fileitem.fileid,
                                            _path=image_path, _content=content)
--- a/app/modules/douban/init.py
+++ b/app/modules/douban/init.py
@ -773,7 +773,16 @@ class DoubanModule(_ModuleBase):
        """
        if settings.SCRAP_SOURCE != "douban":
            return None
-        return self.scraper.get_meta_nfo(mediainfo=mediainfo, season=season)
+        return self.scraper.get_metadata_nfo(mediainfo=mediainfo, season=season)
+
+    def metadata_img(self, mediainfo: MediaInfo, **kwargs) -> dict:
+        """
+        获取图片名称和url
+        :param mediainfo: 媒体信息
+        """
+        if settings.SCRAP_SOURCE != "douban":
+            return {}
+        return self.scraper.get_metadata_img(mediainfo=mediainfo)

    def obtain_images(self, mediainfo: MediaInfo) -> Optional[MediaInfo]:
        """
--- a/app/modules/douban/scraper.py
+++ b/app/modules/douban/scraper.py
@ -17,7 +17,7 @@ class DoubanScraper:
    _force_nfo = False
    _force_img = False

-    def get_meta_nfo(self, mediainfo: MediaInfo, season: int = None) -> Optional[str]:
+    def get_metadata_nfo(self, mediainfo: MediaInfo, season: int = None) -> Optional[str]:
        """
        获取NFO文件内容文本
        :param mediainfo: 媒体信息
@ -38,6 +38,19 @@ class DoubanScraper:

        return None

+    @staticmethod
+    def get_metadata_img(mediainfo: MediaInfo) -> Optional[dict]:
+        """
+        获取图片内容
+        :param mediainfo: 媒体信息
+        """
+        ret_dict = {}
+        if mediainfo.poster_path:
+            ret_dict[f"poster{Path(mediainfo.poster_path).suffix}"] = mediainfo.poster_path
+        if mediainfo.backdrop_path:
+            ret_dict[f"backdrop{Path(mediainfo.backdrop_path).suffix}"] = mediainfo.backdrop_path
+        return ret_dict
+
    def gen_scraper_files(self, meta: MetaBase, mediainfo: MediaInfo,
                          file_path: Path, transfer_type: str,
                          force_nfo: bool = False, force_img: bool = False):
@ -68,15 +81,11 @@ class DoubanScraper:
                    self.__gen_movie_nfo_file(mediainfo=mediainfo,
                                              file_path=file_path)
                # 生成电影图片
-                image_path = file_path.with_name(f"poster{Path(mediainfo.poster_path).suffix}")
-                if self._force_img or not image_path.exists():
-                    self.__save_image(url=mediainfo.poster_path,
-                                      file_path=image_path)
-                # 背景图
-                if mediainfo.backdrop_path:
-                    image_path = file_path.with_name(f"backdrop{Path(mediainfo.backdrop_path).suffix}")
+                image_dict = self.get_metadata_img(mediainfo)
+                for img_name, img_url in image_dict.items():
+                    image_path = file_path.with_name(img_name)
                    if self._force_img or not image_path.exists():
-                        self.__save_image(url=mediainfo.backdrop_path,
+                        self.__save_image(url=img_url,
                                          file_path=image_path)
            # 电视剧
            else:
@ -86,15 +95,11 @@ class DoubanScraper:
                    self.__gen_tv_nfo_file(mediainfo=mediainfo,
                                           dir_path=file_path.parents[1])
                # 生成根目录图片
-                image_path = file_path.with_name(f"poster{Path(mediainfo.poster_path).suffix}")
-                if self._force_img or not image_path.exists():
-                    self.__save_image(url=mediainfo.poster_path,
-                                      file_path=image_path)
-                # 背景图
-                if mediainfo.backdrop_path:
-                    image_path = file_path.with_name(f"backdrop{Path(mediainfo.backdrop_path).suffix}")
+                image_dict = self.get_metadata_img(mediainfo)
+                for img_name, img_url in image_dict.items():
+                    image_path = file_path.with_name(img_name)
                    if self._force_img or not image_path.exists():
-                        self.__save_image(url=mediainfo.backdrop_path,
+                        self.__save_image(url=img_url,
                                          file_path=image_path)
                # 季目录NFO
                if self._force_nfo or not file_path.with_name("season.nfo").exists():
--- a/app/modules/themoviedb/init.py
+++ b/app/modules/themoviedb/init.py
@ -347,7 +347,17 @@ class TheMovieDbModule(_ModuleBase):
        """
        if settings.SCRAP_SOURCE != "themoviedb":
            return None
-        return self.scraper.get_meta_nfo(meta=meta, mediainfo=mediainfo, season=season, episode=episode)
+        return self.scraper.get_metadata_nfo(meta=meta, mediainfo=mediainfo, season=season, episode=episode)
+
+    def metadata_img(self, mediainfo: MediaInfo, season: int = None) -> dict:
+        """
+        获取图片名称和url
+        :param mediainfo: 媒体信息
+        :param season: 季号
+        """
+        if settings.SCRAP_SOURCE != "themoviedb":
+            return {}
+        return self.scraper.get_metadata_img(mediainfo=mediainfo, season=season)

    def tmdb_discover(self, mtype: MediaType, sort_by: str, with_genres: str, with_original_language: str,
                      page: int = 1) -> Optional[List[MediaInfo]]:
--- a/app/modules/themoviedb/scraper.py
+++ b/app/modules/themoviedb/scraper.py
@ -1,6 +1,6 @@
 import traceback
 from pathlib import Path
-from typing import Union, Optional
+from typing import Union, Optional, Tuple
 from xml.dom import minidom

 from requests import RequestException
@ -26,8 +26,8 @@ class TmdbScraper:
    def __init__(self, tmdb):
        self.tmdb = tmdb

-    def get_meta_nfo(self, meta: MetaBase, mediainfo: MediaInfo,
-                     season: int = None, episode: int = None) -> Optional[str]:
+    def get_metadata_nfo(self, meta: MetaBase, mediainfo: MediaInfo,
+                         season: int = None, episode: int = None) -> Optional[str]:
        """
        获取NFO文件内容文本
        :param meta: 元数据
@ -58,6 +58,48 @@ class TmdbScraper:

        return None

+    def get_metadata_img(self, mediainfo: MediaInfo, season: int = None) -> dict:
+        """
+        获取图片名称和url
+        :param mediainfo: 媒体信息
+        :param season: 季号
+        """
+        images = {}
+        if mediainfo.type == MediaType.MOVIE:
+            for attr_name, attr_value in vars(mediainfo).items():
+                if attr_value \
+                        and attr_name.endswith("_path") \
+                        and attr_value \
+                        and isinstance(attr_value, str) \
+                        and attr_value.startswith("http"):
+                    image_name = attr_name.replace("_path", "") + Path(attr_value).suffix
+                    images[image_name] = attr_value
+        else:
+            if season:
+                # 查询季信息
+                seasoninfo = self.tmdb.get_tv_season_detail(mediainfo.tmdb_id, season)
+                if seasoninfo:
+                    # TMDB季poster图片
+                    poster_name, poster_url = self.get_season_poster(seasoninfo, season)
+                    if poster_name and poster_url:
+                        images[poster_name] = poster_url
+        return images
+
+    @staticmethod
+    def get_season_poster(seasoninfo: dict, season: int) -> Tuple[str, str]:
+        """
+        获取季的海报
+        """
+        # TMDB季poster图片
+        sea_seq = str(season).rjust(2, '0')
+        if seasoninfo.get("poster_path"):
+            # 后缀
+            ext = Path(seasoninfo.get('poster_path')).suffix
+            # URL
+            url = f"https://{settings.TMDB_IMAGE_DOMAIN}/t/p/original{seasoninfo.get('poster_path')}"
+            image_name = f"season{sea_seq}-poster{ext}"
+            return image_name, url
+
    @staticmethod
    def __get_episode_detail(seasoninfo: dict, episode: int) -> dict:
        """
@ -97,17 +139,11 @@ class TmdbScraper:
                    self.__gen_movie_nfo_file(mediainfo=mediainfo,
                                              file_path=file_path)
                # 生成电影图片
-                for attr_name, attr_value in vars(mediainfo).items():
-                    if attr_value \
-                            and attr_name.endswith("_path") \
-                            and attr_value \
-                            and isinstance(attr_value, str) \
-                            and attr_value.startswith("http"):
-                        image_name = attr_name.replace("_path", "") + Path(attr_value).suffix
-                        image_path = file_path.with_name(image_name)
-                        if self._force_img or not image_path.exists():
-                            self.__save_image(url=attr_value,
-                                              file_path=image_path)
+                image_dict = self.get_metadata_img(mediainfo=mediainfo)
+                for image_name, image_url in image_dict.items():
+                    image_path = file_path.with_name(image_name)
+                    if self._force_img or not image_path.exists():
+                        self.__save_image(url=image_url, file_path=image_path)
            # 电视剧，路径为每一季的文件名 名称/Season xx/名称 SxxExx.xxx
            else:
                # 如果有上游传入的元信息则使用，否则使用文件名识别
@ -120,18 +156,11 @@ class TmdbScraper:
                    self.__gen_tv_nfo_file(mediainfo=mediainfo,
                                           dir_path=file_path.parents[1])
                # 生成根目录图片
-                for attr_name, attr_value in vars(mediainfo).items():
-                    if attr_name \
-                            and attr_name.endswith("_path") \
-                            and not attr_name.startswith("season") \
-                            and attr_value \
-                            and isinstance(attr_value, str) \
-                            and attr_value.startswith("http"):
-                        image_name = attr_name.replace("_path", "") + Path(attr_value).suffix
-                        image_path = file_path.parent.with_name(image_name)
-                        if self._force_img or not image_path.exists():
-                            self.__save_image(url=attr_value,
-                                              file_path=image_path)
+                image_dict = self.get_metadata_img(mediainfo=mediainfo)
+                for image_name, image_url in image_dict.items():
+                    image_path = file_path.parent.with_name(image_name)
+                    if self._force_img or not image_path.exists():
+                        self.__save_image(url=image_url, file_path=image_path)
                # 查询季信息
                seasoninfo = self.tmdb.get_tv_season_detail(mediainfo.tmdb_id, meta.begin_season)
                if seasoninfo:
@ -140,29 +169,12 @@ class TmdbScraper:
                        self.__gen_tv_season_nfo_file(seasoninfo=seasoninfo,
                                                      season=meta.begin_season,
                                                      season_path=file_path)
-                    # TMDB季poster图片
-                    sea_seq = str(meta.begin_season).rjust(2, '0')
-                    if seasoninfo.get("poster_path"):
-                        # 后缀
-                        ext = Path(seasoninfo.get('poster_path')).suffix
-                        # URL
-                        url = f"https://{settings.TMDB_IMAGE_DOMAIN}/t/p/original{seasoninfo.get('poster_path')}"
-                        image_path = file_path.parent.with_name(f"season{sea_seq}-poster{ext}")
+                    # TMDB季图片
+                    poster_name, poster_url = self.get_season_poster(seasoninfo, meta.begin_season)
+                    if poster_name and poster_url:
+                        image_path = file_path.parent.with_name(poster_name)
                        if self._force_img or not image_path.exists():
-                            self.__save_image(url=url, file_path=image_path)
-                    # 季的其它图片
-                    for attr_name, attr_value in vars(mediainfo).items():
-                        if attr_value \
-                                and attr_name.startswith("season") \
-                                and not attr_name.endswith("poster_path") \
-                                and attr_value \
-                                and isinstance(attr_value, str) \
-                                and attr_value.startswith("http"):
-                            image_name = attr_name.replace("_path", "") + Path(attr_value).suffix
-                            image_path = file_path.parent.with_name(image_name)
-                            if self._force_img or not image_path.exists():
-                                self.__save_image(url=attr_value,
-                                                  file_path=image_path)
+                            self.__save_image(url=poster_url, file_path=image_path)
                # 查询集详情
                episodeinfo = self.__get_episode_detail(seasoninfo, meta.begin_episode)
                if episodeinfo: