fix douban scraper

2023-06-06 21:24:02 +08:00
parent cfe90a4522
commit 6405e087da
2 changed files with 333 additions and 40 deletions
--- a/app/modules/themoviedb/init.py
+++ b/app/modules/themoviedb/init.py
@@ -161,19 +161,14 @@ class TheMovieDb(_ModuleBase):
                continue
            logger.info(f"开始刮削媒体库文件：{file} ...")
            self.gen_scraper_files(mediainfo=mediainfo,
-                                   file_path=file,
-                                   force_nfo=force_nfo,
-                                   force_pic=force_pic)
+                                   file_path=file)
            logger.info(f"{file} 刮削完成")

-    def gen_scraper_files(self, mediainfo: MediaInfo, file_path: Path,
-                          force_nfo: bool = False, force_pic: bool = False):
+    def gen_scraper_files(self, mediainfo: MediaInfo, file_path: Path):
        """
        生成刮削文件
        :param mediainfo: 媒体信息
        :param file_path: 文件路径
-        :param force_nfo: 强制刮削nfo
-        :param force_pic: 强制刮削图片
        """

        def __get_episode_detail(_seasoninfo: dict, _episode: int):
@@ -189,9 +184,8 @@ class TheMovieDb(_ModuleBase):
            # 电影
            if mediainfo.type == MediaType.MOVIE:
                # 强制或者不已存在时才处理
-                if force_nfo \
-                        or (not file_path.with_name("movie.nfo").exists()
-                            and not file_path.with_suffix(".nfo").exists()):
+                if not file_path.with_name("movie.nfo").exists() \
+                        and not file_path.with_suffix(".nfo").exists():
                    #  生成电影描述文件
                    self.__gen_movie_nfo_file(mediainfo=mediainfo,
                                              file_path=file_path)
@@ -202,20 +196,17 @@ class TheMovieDb(_ModuleBase):
                            and attr_value.startswith("http"):
                        image_name = attr_name.replace("_path", "") + Path(attr_value).suffix
                        self.__save_image(url=attr_value,
-                                          file_path=file_path.parent / image_name,
-                                          force=force_pic)
+                                          file_path=file_path.parent / image_name)
            # 电视剧
            else:
                # 识别
                meta = MetaInfo(file_path.stem)
-                # 根目录NFO
-                if force_nfo \
-                        or (not file_path.with_name("tvshow.nfo").exists()
-                            and not file_path.with_suffix(".nfo").exists()):
+                # 不存在时才处理
+                if not file_path.parent.with_name("tvshow.nfo").exists() \
+                        and not file_path.parent.with_name(file_path.stem + ".nfo").exists():
                    # 根目录描述文件
                    self.__gen_tv_nfo_file(mediainfo=mediainfo,
-                                           dir_path=file_path.parents[1],
-                                           force_nfo=force_nfo)
+                                           dir_path=file_path.parents[1])
                # 生成根目录图片
                for attr_name, attr_value in vars(mediainfo).items():
                    if attr_value \
@@ -224,16 +215,14 @@ class TheMovieDb(_ModuleBase):
                            and attr_value.startswith("http"):
                        image_name = attr_name.replace("_path", "") + Path(attr_value).suffix
                        self.__save_image(url=attr_value,
-                                          file_path=file_path.parents[1] / image_name,
-                                          force=force_pic)
+                                          file_path=file_path.parents[1] / image_name)
                # 查询季信息
                seasoninfo = self.tmdb.get_tv_season_detail(mediainfo.tmdb_id, meta.begin_season)
                if seasoninfo:
                    # 季目录NFO
                    self.__gen_tv_season_nfo_file(seasoninfo=seasoninfo,
                                                  season=meta.begin_season,
-                                                  season_path=file_path.parent,
-                                                  force_nfo=force_nfo)
+                                                  season_path=file_path.parent)
                    # 季的图片
                    for attr_name, attr_value in vars(mediainfo).items():
                        if attr_value \
@@ -244,8 +233,7 @@ class TheMovieDb(_ModuleBase):
                                                                       f"{str(meta.begin_season).rjust(2, '0')}-")\
                                         + Path(attr_value).suffix
                            self.__save_image(url=attr_value,
-                                              file_path=file_path.parent / image_name,
-                                              force=force_pic)
+                                              file_path=file_path.parent / image_name)
                # 查询集详情
                episodeinfo = __get_episode_detail(seasoninfo, meta.begin_episode)
                if episodeinfo:
@@ -253,18 +241,19 @@ class TheMovieDb(_ModuleBase):
                    self.__gen_tv_episode_nfo_file(episodeinfo=episodeinfo,
                                                   season=meta.begin_season,
                                                   episode=meta.begin_episode,
-                                                   file_path=file_path,
-                                                   force_nfo=force_nfo)
+                                                   file_path=file_path)
                    # 集的图片
                    if episodeinfo.get('still_path'):
                        self.__save_image(f"https://image.tmdb.org/t/p/original{episodeinfo.get('still_path')}",
-                                          file_path.with_suffix(".jpg"),
-                                          force_pic)
+                                          file_path.with_suffix(".jpg"))
        except Exception as e:
            logger.error(f"{file_path} 刮削失败：{e}")

    @staticmethod
    def __gen_common_nfo(mediainfo: MediaInfo, doc, root):
+        """
+        生成公共NFO
+        """
        # TMDBINFO
        tmdbinfo = mediainfo.tmdb_info
        # 添加时间
@@ -335,6 +324,8 @@ class TheMovieDb(_ModuleBase):
        :param file_path: 电影文件路径
        :param force_nfo: 是否强制生成NFO文件
        """
+        if not force_nfo and file_path.with_suffix(".nfo").exists():
+            return
        # 开始生成XML
        logger.info(f"正在生成电影NFO文件：{file_path.name}")
        doc = minidom.Document()
@@ -351,7 +342,7 @@ class TheMovieDb(_ModuleBase):
        # 年份
        DomUtils.add_node(doc, root, "year", mediainfo.year or "")
        # 保存
-        self.__save_nfo(doc, file_path.with_suffix(".nfo"), force=force_nfo)
+        self.__save_nfo(doc, file_path.with_suffix(".nfo"))

    def __gen_tv_nfo_file(self,
                          mediainfo: MediaInfo,
@@ -363,6 +354,8 @@ class TheMovieDb(_ModuleBase):
        :param dir_path: 电视剧根目录
        :param force_nfo: 是否强制生成NFO文件
        """
+        if not force_nfo and dir_path.joinpath("tvshow.nfo").exists():
+            return
        # 开始生成XML
        logger.info(f"正在生成电视剧NFO文件：{dir_path.name}")
        doc = minidom.Document()
@@ -381,7 +374,7 @@ class TheMovieDb(_ModuleBase):
        DomUtils.add_node(doc, root, "season", "-1")
        DomUtils.add_node(doc, root, "episode", "-1")
        # 保存
-        self.__save_nfo(doc, dir_path.with_name("tvshow.nfo"), force_nfo)
+        self.__save_nfo(doc, dir_path.joinpath("tvshow.nfo"))

    def __gen_tv_season_nfo_file(self, seasoninfo: dict, season: int, season_path: Path,
                                 force_nfo: bool = False):
@@ -392,6 +385,10 @@ class TheMovieDb(_ModuleBase):
        :param season_path: 电视剧季的目录
        :param force_nfo: 是否强制生成NFO文件
        """
+
+        if not force_nfo and season_path.with_name("season.nfo").exists():
+            return
+
        logger.info(f"正在生成季NFO文件：{season_path.name}")
        doc = minidom.Document()
        root = DomUtils.add_node(doc, doc, "season")
@@ -412,7 +409,7 @@ class TheMovieDb(_ModuleBase):
        # seasonnumber
        DomUtils.add_node(doc, root, "seasonnumber", str(season))
        # 保存
-        self.__save_nfo(doc, season_path.with_name("season.nfo"), force_nfo)
+        self.__save_nfo(doc, season_path.with_name("season.nfo"))

    def __gen_tv_episode_nfo_file(self,
                                  episodeinfo: dict,
@@ -428,6 +425,8 @@ class TheMovieDb(_ModuleBase):
        :param file_path: 集文件的路径
        :param force_nfo: 是否强制生成NFO文件
        """
+        if not force_nfo and file_path.with_suffix(".nfo").exists():
+            return
        # 开始生成集的信息
        logger.info(f"正在生成剧集NFO文件：{file_path.name}")
        doc = minidom.Document()
@@ -473,14 +472,14 @@ class TheMovieDb(_ModuleBase):
                DomUtils.add_node(doc, xactor, "type", "Actor")
                DomUtils.add_node(doc, xactor, "tmdbid", actor.get("id") or "")
        # 保存文件
-        self.__save_nfo(doc, file_path.with_suffix(".nfo"), force_nfo)
+        self.__save_nfo(doc, file_path.with_suffix(".nfo"))

    @staticmethod
-    def __save_image(url: str, file_path: Path, force: bool = False):
+    def __save_image(url: str, file_path: Path):
        """
-        下载poster.jpg并保存
+        下载图片并保存
        """
-        if not force and file_path.exists():
+        if file_path.exists():
            return
        try:
            logger.info(f"正在下载{file_path.stem}图片：{url} ...")
@@ -494,10 +493,12 @@ class TheMovieDb(_ModuleBase):
            logger.error(f"{file_path.stem}图片下载失败：{err}")

    @staticmethod
-    def __save_nfo(doc, file_path: Path, force: bool = False):
-        if not force and file_path.exists():
+    def __save_nfo(doc, file_path: Path):
+        """
+        保存NFO
+        """
+        if file_path.exists():
            return
-        logger.info(f"正在保存NFO文件：{file_path.name}")
        xml_str = doc.toprettyxml(indent="  ", encoding="utf-8")
        file_path.write_bytes(xml_str)
        logger.info(f"NFO文件已保存：{file_path.name}")