fix scrape api

2024-06-21 19:19:10 +08:00
parent f9f4d97a51
commit e0a251b339
8 changed files with 198 additions and 53 deletions
--- a/app/api/endpoints/media.py
+++ b/app/api/endpoints/media.py
@@ -97,30 +97,31 @@ def search(title: str,
    return result[(page - 1) * count:page * count]


-@router.get("/scrape", summary="刮削媒体信息", response_model=schemas.Response)
+@router.post("/scrape/{storage}", summary="刮削媒体信息", response_model=schemas.Response)
 def scrape(fileitem: schemas.FileItem,
           storage: str = "local",
           _: schemas.TokenPayload = Depends(verify_token)) -> Any:
    """
    刮削媒体信息
    """
-    if not fileitem:
+    if not fileitem or not fileitem.path:
        return schemas.Response(success=False, message="刮削路径无效")
    chain = MediaChain()
    # 识别媒体信息
-    meta = MetaInfoPath(fileitem.path)
+    scrape_path = Path(fileitem.path)
+    meta = MetaInfoPath(scrape_path)
    mediainfo = chain.recognize_media(meta)
    if not media_info:
        return schemas.Response(success=False, message="刮削失败，无法识别媒体信息")
    if storage == "local":
-        scrape_path = Path(fileitem.path)
        if not scrape_path.exists():
            return schemas.Response(success=False, message="刮削路径不存在")
-        # 刮削
+        # 刮削本地
        chain.scrape_metadata(path=scrape_path, mediainfo=mediainfo, transfer_type=settings.TRANSFER_TYPE)
    else:
        if not fileitem.fileid:
            return schemas.Response(success=False, message="刮削文件ID无效")
+        # 刮削在线
        chain.scrape_metadata_online(storage=storage, fileitem=fileitem, meta=meta, mediainfo=mediainfo)
    return schemas.Response(success=True, message="刮削完成")

--- a/app/chain/init.py
+++ b/app/chain/init.py
@@ -231,14 +231,15 @@ class ChainBase(metaclass=ABCMeta):
        """
        return self.run_module("tvdb_info", tvdbid=tvdbid)

-    def tmdb_info(self, tmdbid: int, mtype: MediaType) -> Optional[dict]:
+    def tmdb_info(self, tmdbid: int, mtype: MediaType, season: int = None) -> Optional[dict]:
        """
        获取TMDB信息
        :param tmdbid: int
        :param mtype:  媒体类型
+        :param season: 季
        :return: TVDB信息
        """
-        return self.run_module("tmdb_info", tmdbid=tmdbid, mtype=mtype)
+        return self.run_module("tmdb_info", tmdbid=tmdbid, mtype=mtype, season=season)

    def bangumi_info(self, bangumiid: int) -> Optional[dict]:
        """
--- a/app/chain/media.py
+++ b/app/chain/media.py
@@ -15,6 +15,7 @@ from app.helper.aliyun import AliyunHelper
 from app.helper.u115 import U115Helper
 from app.log import logger
 from app.schemas.types import EventType, MediaType
+from app.utils.http import RequestUtils
 from app.utils.singleton import Singleton
 from app.utils.string import StringUtils

@@ -332,72 +333,203 @@ class MediaChain(ChainBase, metaclass=Singleton):
        return None

    def scrape_metadata_online(self, storage: str, fileitem: schemas.FileItem,
-                               meta: MetaBase, mediainfo: MediaInfo):
+                               meta: MetaBase, mediainfo: MediaInfo, init_folder: bool = True):
        """
        远程刮削媒体信息（网盘等）
        """

-        def __list_files(s: str, f: str):
-            if s == "aliyun":
-                return AliyunHelper().list(parent_file_id=f)
-            if s == "u115":
-                return U115Helper().list(parent_file_id=f)
+        def __list_files(_storage: str, _fileid: str, _path: str = None, _drive_id: str = None):
+            if _storage == "aliyun":
+                return AliyunHelper().list(drive_id=_drive_id, parent_file_id=_fileid, path=_path)
+            if _storage == "u115":
+                return U115Helper().list(parent_file_id=_fileid, path=_path)
            return []

-        def __upload_file(s: str, p: str, f: Path):
-            if s == "aliyun":
-                return AliyunHelper().upload(parent_file_id=p, file_path=f)
-            if s == "u115":
-                return U115Helper().upload(parent_file_id=p, file_path=f)
+        def __upload_file(_storage: str, _fileid: str, _path: Path):
+            if _storage == "aliyun":
+                return AliyunHelper().upload(parent_file_id=_fileid, file_path=_path)
+            if _storage == "u115":
+                return U115Helper().upload(parent_file_id=_fileid, file_path=_path)
+
+        def __save_image(u: str, f: Path):
+            """
+            下载图片并保存
+            """
+            try:
+                logger.info(f"正在下载{f.stem}图片：{u} ...")
+                r = RequestUtils(proxies=settings.PROXY).get_res(url=u)
+                if r:
+                    f.write_bytes(r.content)
+                else:
+                    logger.info(f"{f.stem}图片下载失败，请检查网络连通性！")
+            except Exception as err:
+                logger.error(f"{f.stem}图片下载失败：{str(err)}！")

        if storage not in ["aliyun", "u115"]:
            logger.warn(f"不支持的存储类型：{storage}")
            return
+
+        # 当前文件路径
        filepath = Path(fileitem.path)
+        if fileitem.type == "file" \
+                and (not filepath.suffix or filepath.suffix.lower() not in settings.RMT_MEDIAEXT):
+            return
+        logger.info(f"开始刮削：{filepath} ...")
        if mediainfo.type == MediaType.MOVIE:
+            # 电影
            if fileitem.type == "file":
                # 电影文件
+                logger.info(f"正在生成电影nfo：{mediainfo.title_year} - {filepath.name}")
                movie_nfo = self.meta_nfo(meta=meta, mediainfo=mediainfo)
                if not movie_nfo:
-                    logger.warn(f"无法生成电影NFO文件：{meta.name}")
+                    logger.warn(f"{filepath.name} nfo文件生成失败！")
                    return
                # 写入到临时目录
                nfo_path = settings.TEMP_PATH / f"{filepath.stem}.nfo"
                nfo_path.write_bytes(movie_nfo)
                # 上传NFO文件
+                logger.info(f"上传NFO文件：{nfo_path.name} ...")
                __upload_file(storage, fileitem.parent_fileid, nfo_path)
+                logger.info(f"{nfo_path.name} 上传成功")
            else:
                # 电影目录
-                files = __list_files(storage, fileitem.fileid)
+                files = __list_files(_storage=storage, _fileid=fileitem.fileid,
+                                     _drive_id=fileitem.drive_id, _path=fileitem.path)
                for file in files:
                    self.scrape_metadata_online(storage=storage, fileitem=file,
-                                                meta=meta, mediainfo=mediainfo)
+                                                meta=meta, mediainfo=mediainfo,
+                                                init_folder=False)
+                # 生成图片文件和上传
+                if init_folder:
+                    for attr_name, attr_value in vars(mediainfo).items():
+                        if attr_value \
+                                and attr_name.endswith("_path") \
+                                and attr_value \
+                                and isinstance(attr_value, str) \
+                                and attr_value.startswith("http"):
+                            image_name = attr_name.replace("_path", "") + Path(attr_value).suffix
+                            # 写入nfo到根目录
+                            image_path = settings.TEMP_PATH / image_name
+                            __save_image(attr_value, image_path)
+                            # 上传图片文件到当前目录
+                            logger.info(f"上传图片文件：{image_path.name} ...")
+                            __upload_file(storage, fileitem.fileid, image_path)
+                            logger.info(f"{image_path.name} 上传成功")
        else:
            # 电视剧
            if fileitem.type == "file":
-                # 电视剧文件
-                tv_nfo = self.meta_nfo(meta=meta, mediainfo=mediainfo, season=meta.begin_season, episode=meta.begin_episode)
-                if not tv_nfo:
-                    logger.warn(f"无法生成电视剧NFO文件：{meta.name}")
+                # 当前为集文件，重新识别季集
+                file_meta = MetaInfoPath(filepath)
+                if not file_meta.begin_episode:
+                    logger.warn(f"{filepath.name} 无法识别文件集数！")
+                    return
+                file_mediainfo = self.recognize_media(meta=file_meta)
+                if not file_mediainfo:
+                    logger.warn(f"{filepath.name} 无法识别文件媒体信息！")
+                    return
+                # 获取集的nfo文件
+                episode_nfo = self.meta_nfo(meta=file_meta, mediainfo=file_mediainfo,
+                                            season=file_meta.begin_season, episode=file_meta.begin_episode)
+                if not episode_nfo:
+                    logger.warn(f"{filepath.name} nfo生成失败！")
                    return
                # 写入到临时目录
                nfo_path = settings.TEMP_PATH / f"{filepath.stem}.nfo"
-                nfo_path.write_bytes(tv_nfo)
-                # 上传NFO文件
+                nfo_path.write_bytes(episode_nfo)
+                # 上传NFO文件，到文件当前目录下
+                logger.info(f"上传NFO文件：{nfo_path.name} ...")
                __upload_file(storage, fileitem.parent_fileid, nfo_path)
-            else:
-                # 根目录
-                tv_nfo = self.meta_nfo(meta=meta, mediainfo=mediainfo)
-                if not tv_nfo:
-                    logger.warn(f"无法生成电视剧NFO文件：{meta.name}")
-                    return
-                # 写入nfo到根目录
-                nfo_path = settings.TEMP_PATH / f"tvshow.nfo"
-                nfo_path.write_bytes(tv_nfo)
-                # 上传NFO文件
-                __upload_file(storage, fileitem.fileid, nfo_path)
-                # 递归刮削目录内的文件和子目录
-                files = __list_files(storage, fileitem.fileid)
+                logger.info(f"{nfo_path.name} 上传成功")
+            elif meta.begin_season:
+                # 当前为季的目录，处理目录内的文件
+                files = __list_files(_storage=storage, _fileid=fileitem.fileid,
+                                     _drive_id=fileitem.drive_id, _path=fileitem.path)
                for file in files:
                    self.scrape_metadata_online(storage=storage, fileitem=file,
-                                                meta=meta, mediainfo=mediainfo)
+                                                meta=meta, mediainfo=mediainfo,
+                                                init_folder=False)
+                # 生成季的nfo和图片
+                if init_folder:
+                    # 季nfo
+                    season_nfo = self.meta_nfo(meta=meta, mediainfo=mediainfo, season=meta.begin_season)
+                    if not season_nfo:
+                        logger.warn(f"无法生成电视剧季nfo文件：{meta.name}")
+                        return
+                    # 写入nfo到根目录
+                    nfo_path = settings.TEMP_PATH / "season.nfo"
+                    nfo_path.write_bytes(season_nfo)
+                    # 上传NFO文件
+                    logger.info(f"上传NFO文件：{nfo_path.name} ...")
+                    __upload_file(storage, fileitem.fileid, nfo_path)
+                    logger.info(f"{nfo_path.name} 上传成功")
+                    # TMDB季poster图片
+                    sea_seq = str(meta.begin_season).rjust(2, '0')
+                    # 查询季剧详情
+                    seasoninfo = self.tmdb_info(tmdbid=mediainfo.tmdb_id, mtype=MediaType.TV,
+                                                season=meta.begin_season)
+                    if not seasoninfo:
+                        logger.warn(f"无法获取 {mediainfo.title_year} 第{meta.begin_season}季 的媒体信息！")
+                        return
+                    if seasoninfo.get("poster_path"):
+                        # 下载图片
+                        ext = Path(seasoninfo.get('poster_path')).suffix
+                        url = f"https://{settings.TMDB_IMAGE_DOMAIN}/t/p/original{seasoninfo.get('poster_path')}"
+                        image_path = filepath.parent.with_name(f"season{sea_seq}-poster{ext}")
+                        __save_image(url, image_path)
+                        # 上传图片文件到当前目录
+                        logger.info(f"上传图片文件：{image_path.name} ...")
+                        __upload_file(storage, fileitem.fileid, image_path)
+                        logger.info(f"{image_path.name} 上传成功")
+                    # 季的其它图片
+                    for attr_name, attr_value in vars(mediainfo).items():
+                        if attr_value \
+                                and attr_name.startswith("season") \
+                                and not attr_name.endswith("poster_path") \
+                                and attr_value \
+                                and isinstance(attr_value, str) \
+                                and attr_value.startswith("http"):
+                            image_name = attr_name.replace("_path", "") + Path(attr_value).suffix
+                            image_path = filepath.parent.with_name(image_name)
+                            __save_image(attr_value, image_path)
+                            # 上传图片文件到当前目录
+                            logger.info(f"上传图片文件：{image_path.name} ...")
+                            __upload_file(storage, fileitem.fileid, image_path)
+                            logger.info(f"{image_path.name} 上传成功")
+            else:
+                # 当前为根目录，处理目录内的文件
+                files = __list_files(_storage=storage, _fileid=fileitem.fileid,
+                                     _drive_id=fileitem.drive_id, _path=fileitem.path)
+                for file in files:
+                    self.scrape_metadata_online(storage=storage, fileitem=file,
+                                                meta=meta, mediainfo=mediainfo,
+                                                init_folder=False)
+                # 生成根目录的nfo和图片
+                if init_folder:
+                    tv_nfo = self.meta_nfo(meta=meta, mediainfo=mediainfo)
+                    if not tv_nfo:
+                        logger.warn(f"无法生成电视剧nfo文件：{meta.name}")
+                        return
+                    # 写入nfo到根目录
+                    nfo_path = settings.TEMP_PATH / "tvshow.nfo"
+                    nfo_path.write_bytes(tv_nfo)
+                    # 上传NFO文件
+                    logger.info(f"上传NFO文件：{nfo_path.name} ...")
+                    __upload_file(storage, fileitem.fileid, nfo_path)
+                    logger.info(f"{nfo_path.name} 上传成功")
+                    # 生成根目录图片
+                    for attr_name, attr_value in vars(mediainfo).items():
+                        if attr_name \
+                                and attr_name.endswith("_path") \
+                                and not attr_name.startswith("season") \
+                                and attr_value \
+                                and isinstance(attr_value, str) \
+                                and attr_value.startswith("http"):
+                            image_name = attr_name.replace("_path", "") + Path(attr_value).suffix
+                            image_path = filepath.parent.with_name(image_name)
+                            __save_image(attr_value, image_path)
+                            # 上传图片文件到当前目录
+                            logger.info(f"上传图片文件：{image_path.name} ...")
+                            __upload_file(storage, fileitem.fileid, image_path)
+                            logger.info(f"{image_path.name} 上传成功")
+
+        logger.info(f"{filepath.name} 刮削完成")
--- a/app/helper/aliyun.py
+++ b/app/helper/aliyun.py
@@ -69,7 +69,11 @@ class AliyunHelper:
        if res is None:
            logger.warn("无法连接到阿里云盘！")
            return
-        result = res.json()
+        try:
+            result = res.json()
+        except Exception as err:
+            logger.error(f"解析阿里云盘返回数据失败：{str(err)}")
+            return
        code = result.get("code")
        message = result.get("message")
        display_message = result.get("display_message")
@@ -336,7 +340,7 @@ class AliyunHelper:
                    fileid=parent_file_id,
                    drive_id=params.get("resourceDriveId"),
                    parent_fileid="root",
-                    type="folder",
+                    type="dir",
                    path="/资源库/",
                    name="资源库"
                ),
@@ -344,7 +348,7 @@ class AliyunHelper:
                    fileid=parent_file_id,
                    drive_id=params.get("backDriveId"),
                    parent_fileid="root",
-                    type="folder",
+                    type="dir",
                    path="/备份盘/",
                    name="备份盘"
                )
@@ -386,8 +390,8 @@ class AliyunHelper:
        return [schemas.FileItem(
            fileid=fileinfo.get("file_id"),
            parent_fileid=fileinfo.get("parent_file_id"),
-            type="file",
-            path=f"{path}{fileinfo.get('name')}",
+            type="dir" if fileinfo.get("type") == "folder" else "file",
+            path=f"{path}{fileinfo.get('name')}" + ("/" if fileinfo.get("type") == "folder" else ""),
            name=fileinfo.get("name"),
            size=fileinfo.get("size"),
            extension=fileinfo.get("file_extension"),
@@ -472,7 +476,7 @@ class AliyunHelper:
                fileid=result.get("file_id"),
                drive_id=result.get("drive_id"),
                parent_fileid=result.get("parent_file_id"),
-                type=result.get("type"),
+                type="file",
                name=result.get("name"),
                size=result.get("size"),
                extension=result.get("file_extension"),
--- a/app/helper/u115.py
+++ b/app/helper/u115.py
@@ -150,7 +150,7 @@ class U115Helper(metaclass=Singleton):
                fileid=item.file_id,
                parent_fileid=item.parent_id,
                type="dir" if item.is_dir else "file",
-                path=f"{path}{item.name}" + "/" if item.is_dir else "",
+                path=f"{path}{item.name}" + ("/" if item.is_dir else ""),
                name=item.name,
                size=item.size,
                extension=Path(item.name).suffix[1:],
--- a/app/modules/douban/scraper.py
+++ b/app/modules/douban/scraper.py
@@ -136,7 +136,8 @@ class DoubanScraper:
        :param file_path: 电影文件路径
        """
        # 开始生成XML
-        logger.info(f"正在生成电影NFO文件：{file_path.name}")
+        if file_path:
+            logger.info(f"正在生成电影NFO文件：{file_path.name}")
        doc = minidom.Document()
        root = DomUtils.add_node(doc, doc, "movie")
        # 公共部分
--- a/app/modules/themoviedb/init.py
+++ b/app/modules/themoviedb/init.py
@@ -216,14 +216,18 @@ class TheMovieDbModule(_ModuleBase):
                                      tmdbid=info.get("id"))
        return info

-    def tmdb_info(self, tmdbid: int, mtype: MediaType) -> Optional[dict]:
+    def tmdb_info(self, tmdbid: int, mtype: MediaType, season: int = None) -> Optional[dict]:
        """
        获取TMDB信息
        :param tmdbid: int
        :param mtype:  媒体类型
+        :param season:  季号
        :return: TVDB信息
        """
-        return self.tmdb.get_info(mtype=mtype, tmdbid=tmdbid)
+        if not season:
+            return self.tmdb.get_info(mtype=mtype, tmdbid=tmdbid)
+        else:
+            return self.tmdb.get_tv_season_detail(tmdbid=tmdbid, season=season)

    def media_category(self) -> Optional[Dict[str, list]]:
        """
--- a/app/modules/themoviedb/scraper.py
+++ b/app/modules/themoviedb/scraper.py
@@ -305,7 +305,8 @@ class TmdbScraper:
        :param season: 季号
        :param season_path: 电视剧季的目录
        """
-        logger.info(f"正在生成季NFO文件：{season_path.name}")
+        if season_path:
+            logger.info(f"正在生成季NFO文件：{season_path.name}")
        doc = minidom.Document()
        root = DomUtils.add_node(doc, doc, "season")
        # 简介
@@ -343,7 +344,8 @@ class TmdbScraper:
        :param file_path: 集文件的路径
        """
        # 开始生成集的信息
-        logger.info(f"正在生成剧集NFO文件：{file_path.name}")
+        if file_path:
+            logger.info(f"正在生成剧集NFO文件：{file_path.name}")
        doc = minidom.Document()
        root = DomUtils.add_node(doc, doc, "episodedetails")
        # TMDBID