fix scrape api

This commit is contained in:
jxxghp 2024-06-21 19:19:10 +08:00
parent f9f4d97a51
commit e0a251b339
8 changed files with 198 additions and 53 deletions

View File

@ -97,30 +97,31 @@ def search(title: str,
return result[(page - 1) * count:page * count] return result[(page - 1) * count:page * count]
@router.get("/scrape", summary="刮削媒体信息", response_model=schemas.Response) @router.post("/scrape/{storage}", summary="刮削媒体信息", response_model=schemas.Response)
def scrape(fileitem: schemas.FileItem, def scrape(fileitem: schemas.FileItem,
storage: str = "local", storage: str = "local",
_: schemas.TokenPayload = Depends(verify_token)) -> Any: _: schemas.TokenPayload = Depends(verify_token)) -> Any:
""" """
刮削媒体信息 刮削媒体信息
""" """
if not fileitem: if not fileitem or not fileitem.path:
return schemas.Response(success=False, message="刮削路径无效") return schemas.Response(success=False, message="刮削路径无效")
chain = MediaChain() chain = MediaChain()
# 识别媒体信息 # 识别媒体信息
meta = MetaInfoPath(fileitem.path) scrape_path = Path(fileitem.path)
meta = MetaInfoPath(scrape_path)
mediainfo = chain.recognize_media(meta) mediainfo = chain.recognize_media(meta)
if not media_info: if not media_info:
return schemas.Response(success=False, message="刮削失败,无法识别媒体信息") return schemas.Response(success=False, message="刮削失败,无法识别媒体信息")
if storage == "local": if storage == "local":
scrape_path = Path(fileitem.path)
if not scrape_path.exists(): if not scrape_path.exists():
return schemas.Response(success=False, message="刮削路径不存在") return schemas.Response(success=False, message="刮削路径不存在")
# 刮削 # 刮削本地
chain.scrape_metadata(path=scrape_path, mediainfo=mediainfo, transfer_type=settings.TRANSFER_TYPE) chain.scrape_metadata(path=scrape_path, mediainfo=mediainfo, transfer_type=settings.TRANSFER_TYPE)
else: else:
if not fileitem.fileid: if not fileitem.fileid:
return schemas.Response(success=False, message="刮削文件ID无效") return schemas.Response(success=False, message="刮削文件ID无效")
# 刮削在线
chain.scrape_metadata_online(storage=storage, fileitem=fileitem, meta=meta, mediainfo=mediainfo) chain.scrape_metadata_online(storage=storage, fileitem=fileitem, meta=meta, mediainfo=mediainfo)
return schemas.Response(success=True, message="刮削完成") return schemas.Response(success=True, message="刮削完成")

View File

@ -231,14 +231,15 @@ class ChainBase(metaclass=ABCMeta):
""" """
return self.run_module("tvdb_info", tvdbid=tvdbid) return self.run_module("tvdb_info", tvdbid=tvdbid)
def tmdb_info(self, tmdbid: int, mtype: MediaType) -> Optional[dict]: def tmdb_info(self, tmdbid: int, mtype: MediaType, season: int = None) -> Optional[dict]:
""" """
获取TMDB信息 获取TMDB信息
:param tmdbid: int :param tmdbid: int
:param mtype: 媒体类型 :param mtype: 媒体类型
:param season:
:return: TVDB信息 :return: TVDB信息
""" """
return self.run_module("tmdb_info", tmdbid=tmdbid, mtype=mtype) return self.run_module("tmdb_info", tmdbid=tmdbid, mtype=mtype, season=season)
def bangumi_info(self, bangumiid: int) -> Optional[dict]: def bangumi_info(self, bangumiid: int) -> Optional[dict]:
""" """

View File

@ -15,6 +15,7 @@ from app.helper.aliyun import AliyunHelper
from app.helper.u115 import U115Helper from app.helper.u115 import U115Helper
from app.log import logger from app.log import logger
from app.schemas.types import EventType, MediaType from app.schemas.types import EventType, MediaType
from app.utils.http import RequestUtils
from app.utils.singleton import Singleton from app.utils.singleton import Singleton
from app.utils.string import StringUtils from app.utils.string import StringUtils
@ -332,72 +333,203 @@ class MediaChain(ChainBase, metaclass=Singleton):
return None return None
def scrape_metadata_online(self, storage: str, fileitem: schemas.FileItem, def scrape_metadata_online(self, storage: str, fileitem: schemas.FileItem,
meta: MetaBase, mediainfo: MediaInfo): meta: MetaBase, mediainfo: MediaInfo, init_folder: bool = True):
""" """
远程刮削媒体信息网盘等 远程刮削媒体信息网盘等
""" """
def __list_files(s: str, f: str): def __list_files(_storage: str, _fileid: str, _path: str = None, _drive_id: str = None):
if s == "aliyun": if _storage == "aliyun":
return AliyunHelper().list(parent_file_id=f) return AliyunHelper().list(drive_id=_drive_id, parent_file_id=_fileid, path=_path)
if s == "u115": if _storage == "u115":
return U115Helper().list(parent_file_id=f) return U115Helper().list(parent_file_id=_fileid, path=_path)
return [] return []
def __upload_file(s: str, p: str, f: Path): def __upload_file(_storage: str, _fileid: str, _path: Path):
if s == "aliyun": if _storage == "aliyun":
return AliyunHelper().upload(parent_file_id=p, file_path=f) return AliyunHelper().upload(parent_file_id=_fileid, file_path=_path)
if s == "u115": if _storage == "u115":
return U115Helper().upload(parent_file_id=p, file_path=f) return U115Helper().upload(parent_file_id=_fileid, file_path=_path)
def __save_image(u: str, f: Path):
"""
下载图片并保存
"""
try:
logger.info(f"正在下载{f.stem}图片:{u} ...")
r = RequestUtils(proxies=settings.PROXY).get_res(url=u)
if r:
f.write_bytes(r.content)
else:
logger.info(f"{f.stem}图片下载失败,请检查网络连通性!")
except Exception as err:
logger.error(f"{f.stem}图片下载失败:{str(err)}")
if storage not in ["aliyun", "u115"]: if storage not in ["aliyun", "u115"]:
logger.warn(f"不支持的存储类型:{storage}") logger.warn(f"不支持的存储类型:{storage}")
return return
# 当前文件路径
filepath = Path(fileitem.path) filepath = Path(fileitem.path)
if fileitem.type == "file" \
and (not filepath.suffix or filepath.suffix.lower() not in settings.RMT_MEDIAEXT):
return
logger.info(f"开始刮削:{filepath} ...")
if mediainfo.type == MediaType.MOVIE: if mediainfo.type == MediaType.MOVIE:
# 电影
if fileitem.type == "file": if fileitem.type == "file":
# 电影文件 # 电影文件
logger.info(f"正在生成电影nfo{mediainfo.title_year} - {filepath.name}")
movie_nfo = self.meta_nfo(meta=meta, mediainfo=mediainfo) movie_nfo = self.meta_nfo(meta=meta, mediainfo=mediainfo)
if not movie_nfo: if not movie_nfo:
logger.warn(f"无法生成电影NFO文件{meta.name}") logger.warn(f"{filepath.name} nfo文件生成失败")
return return
# 写入到临时目录 # 写入到临时目录
nfo_path = settings.TEMP_PATH / f"{filepath.stem}.nfo" nfo_path = settings.TEMP_PATH / f"{filepath.stem}.nfo"
nfo_path.write_bytes(movie_nfo) nfo_path.write_bytes(movie_nfo)
# 上传NFO文件 # 上传NFO文件
logger.info(f"上传NFO文件{nfo_path.name} ...")
__upload_file(storage, fileitem.parent_fileid, nfo_path) __upload_file(storage, fileitem.parent_fileid, nfo_path)
logger.info(f"{nfo_path.name} 上传成功")
else: else:
# 电影目录 # 电影目录
files = __list_files(storage, fileitem.fileid) files = __list_files(_storage=storage, _fileid=fileitem.fileid,
_drive_id=fileitem.drive_id, _path=fileitem.path)
for file in files: for file in files:
self.scrape_metadata_online(storage=storage, fileitem=file, self.scrape_metadata_online(storage=storage, fileitem=file,
meta=meta, mediainfo=mediainfo) meta=meta, mediainfo=mediainfo,
init_folder=False)
# 生成图片文件和上传
if init_folder:
for attr_name, attr_value in vars(mediainfo).items():
if attr_value \
and attr_name.endswith("_path") \
and attr_value \
and isinstance(attr_value, str) \
and attr_value.startswith("http"):
image_name = attr_name.replace("_path", "") + Path(attr_value).suffix
# 写入nfo到根目录
image_path = settings.TEMP_PATH / image_name
__save_image(attr_value, image_path)
# 上传图片文件到当前目录
logger.info(f"上传图片文件:{image_path.name} ...")
__upload_file(storage, fileitem.fileid, image_path)
logger.info(f"{image_path.name} 上传成功")
else: else:
# 电视剧 # 电视剧
if fileitem.type == "file": if fileitem.type == "file":
# 电视剧文件 # 当前为集文件,重新识别季集
tv_nfo = self.meta_nfo(meta=meta, mediainfo=mediainfo, season=meta.begin_season, episode=meta.begin_episode) file_meta = MetaInfoPath(filepath)
if not tv_nfo: if not file_meta.begin_episode:
logger.warn(f"无法生成电视剧NFO文件{meta.name}") logger.warn(f"{filepath.name} 无法识别文件集数!")
return
file_mediainfo = self.recognize_media(meta=file_meta)
if not file_mediainfo:
logger.warn(f"{filepath.name} 无法识别文件媒体信息!")
return
# 获取集的nfo文件
episode_nfo = self.meta_nfo(meta=file_meta, mediainfo=file_mediainfo,
season=file_meta.begin_season, episode=file_meta.begin_episode)
if not episode_nfo:
logger.warn(f"{filepath.name} nfo生成失败")
return return
# 写入到临时目录 # 写入到临时目录
nfo_path = settings.TEMP_PATH / f"{filepath.stem}.nfo" nfo_path = settings.TEMP_PATH / f"{filepath.stem}.nfo"
nfo_path.write_bytes(tv_nfo) nfo_path.write_bytes(episode_nfo)
# 上传NFO文件 # 上传NFO文件到文件当前目录下
logger.info(f"上传NFO文件{nfo_path.name} ...")
__upload_file(storage, fileitem.parent_fileid, nfo_path) __upload_file(storage, fileitem.parent_fileid, nfo_path)
else: logger.info(f"{nfo_path.name} 上传成功")
# 根目录 elif meta.begin_season:
tv_nfo = self.meta_nfo(meta=meta, mediainfo=mediainfo) # 当前为季的目录,处理目录内的文件
if not tv_nfo: files = __list_files(_storage=storage, _fileid=fileitem.fileid,
logger.warn(f"无法生成电视剧NFO文件{meta.name}") _drive_id=fileitem.drive_id, _path=fileitem.path)
return
# 写入nfo到根目录
nfo_path = settings.TEMP_PATH / f"tvshow.nfo"
nfo_path.write_bytes(tv_nfo)
# 上传NFO文件
__upload_file(storage, fileitem.fileid, nfo_path)
# 递归刮削目录内的文件和子目录
files = __list_files(storage, fileitem.fileid)
for file in files: for file in files:
self.scrape_metadata_online(storage=storage, fileitem=file, self.scrape_metadata_online(storage=storage, fileitem=file,
meta=meta, mediainfo=mediainfo) meta=meta, mediainfo=mediainfo,
init_folder=False)
# 生成季的nfo和图片
if init_folder:
# 季nfo
season_nfo = self.meta_nfo(meta=meta, mediainfo=mediainfo, season=meta.begin_season)
if not season_nfo:
logger.warn(f"无法生成电视剧季nfo文件{meta.name}")
return
# 写入nfo到根目录
nfo_path = settings.TEMP_PATH / "season.nfo"
nfo_path.write_bytes(season_nfo)
# 上传NFO文件
logger.info(f"上传NFO文件{nfo_path.name} ...")
__upload_file(storage, fileitem.fileid, nfo_path)
logger.info(f"{nfo_path.name} 上传成功")
# TMDB季poster图片
sea_seq = str(meta.begin_season).rjust(2, '0')
# 查询季剧详情
seasoninfo = self.tmdb_info(tmdbid=mediainfo.tmdb_id, mtype=MediaType.TV,
season=meta.begin_season)
if not seasoninfo:
logger.warn(f"无法获取 {mediainfo.title_year}{meta.begin_season}季 的媒体信息!")
return
if seasoninfo.get("poster_path"):
# 下载图片
ext = Path(seasoninfo.get('poster_path')).suffix
url = f"https://{settings.TMDB_IMAGE_DOMAIN}/t/p/original{seasoninfo.get('poster_path')}"
image_path = filepath.parent.with_name(f"season{sea_seq}-poster{ext}")
__save_image(url, image_path)
# 上传图片文件到当前目录
logger.info(f"上传图片文件:{image_path.name} ...")
__upload_file(storage, fileitem.fileid, image_path)
logger.info(f"{image_path.name} 上传成功")
# 季的其它图片
for attr_name, attr_value in vars(mediainfo).items():
if attr_value \
and attr_name.startswith("season") \
and not attr_name.endswith("poster_path") \
and attr_value \
and isinstance(attr_value, str) \
and attr_value.startswith("http"):
image_name = attr_name.replace("_path", "") + Path(attr_value).suffix
image_path = filepath.parent.with_name(image_name)
__save_image(attr_value, image_path)
# 上传图片文件到当前目录
logger.info(f"上传图片文件:{image_path.name} ...")
__upload_file(storage, fileitem.fileid, image_path)
logger.info(f"{image_path.name} 上传成功")
else:
# 当前为根目录,处理目录内的文件
files = __list_files(_storage=storage, _fileid=fileitem.fileid,
_drive_id=fileitem.drive_id, _path=fileitem.path)
for file in files:
self.scrape_metadata_online(storage=storage, fileitem=file,
meta=meta, mediainfo=mediainfo,
init_folder=False)
# 生成根目录的nfo和图片
if init_folder:
tv_nfo = self.meta_nfo(meta=meta, mediainfo=mediainfo)
if not tv_nfo:
logger.warn(f"无法生成电视剧nfo文件{meta.name}")
return
# 写入nfo到根目录
nfo_path = settings.TEMP_PATH / "tvshow.nfo"
nfo_path.write_bytes(tv_nfo)
# 上传NFO文件
logger.info(f"上传NFO文件{nfo_path.name} ...")
__upload_file(storage, fileitem.fileid, nfo_path)
logger.info(f"{nfo_path.name} 上传成功")
# 生成根目录图片
for attr_name, attr_value in vars(mediainfo).items():
if attr_name \
and attr_name.endswith("_path") \
and not attr_name.startswith("season") \
and attr_value \
and isinstance(attr_value, str) \
and attr_value.startswith("http"):
image_name = attr_name.replace("_path", "") + Path(attr_value).suffix
image_path = filepath.parent.with_name(image_name)
__save_image(attr_value, image_path)
# 上传图片文件到当前目录
logger.info(f"上传图片文件:{image_path.name} ...")
__upload_file(storage, fileitem.fileid, image_path)
logger.info(f"{image_path.name} 上传成功")
logger.info(f"{filepath.name} 刮削完成")

View File

@ -69,7 +69,11 @@ class AliyunHelper:
if res is None: if res is None:
logger.warn("无法连接到阿里云盘!") logger.warn("无法连接到阿里云盘!")
return return
result = res.json() try:
result = res.json()
except Exception as err:
logger.error(f"解析阿里云盘返回数据失败:{str(err)}")
return
code = result.get("code") code = result.get("code")
message = result.get("message") message = result.get("message")
display_message = result.get("display_message") display_message = result.get("display_message")
@ -336,7 +340,7 @@ class AliyunHelper:
fileid=parent_file_id, fileid=parent_file_id,
drive_id=params.get("resourceDriveId"), drive_id=params.get("resourceDriveId"),
parent_fileid="root", parent_fileid="root",
type="folder", type="dir",
path="/资源库/", path="/资源库/",
name="资源库" name="资源库"
), ),
@ -344,7 +348,7 @@ class AliyunHelper:
fileid=parent_file_id, fileid=parent_file_id,
drive_id=params.get("backDriveId"), drive_id=params.get("backDriveId"),
parent_fileid="root", parent_fileid="root",
type="folder", type="dir",
path="/备份盘/", path="/备份盘/",
name="备份盘" name="备份盘"
) )
@ -386,8 +390,8 @@ class AliyunHelper:
return [schemas.FileItem( return [schemas.FileItem(
fileid=fileinfo.get("file_id"), fileid=fileinfo.get("file_id"),
parent_fileid=fileinfo.get("parent_file_id"), parent_fileid=fileinfo.get("parent_file_id"),
type="file", type="dir" if fileinfo.get("type") == "folder" else "file",
path=f"{path}{fileinfo.get('name')}", path=f"{path}{fileinfo.get('name')}" + ("/" if fileinfo.get("type") == "folder" else ""),
name=fileinfo.get("name"), name=fileinfo.get("name"),
size=fileinfo.get("size"), size=fileinfo.get("size"),
extension=fileinfo.get("file_extension"), extension=fileinfo.get("file_extension"),
@ -472,7 +476,7 @@ class AliyunHelper:
fileid=result.get("file_id"), fileid=result.get("file_id"),
drive_id=result.get("drive_id"), drive_id=result.get("drive_id"),
parent_fileid=result.get("parent_file_id"), parent_fileid=result.get("parent_file_id"),
type=result.get("type"), type="file",
name=result.get("name"), name=result.get("name"),
size=result.get("size"), size=result.get("size"),
extension=result.get("file_extension"), extension=result.get("file_extension"),

View File

@ -150,7 +150,7 @@ class U115Helper(metaclass=Singleton):
fileid=item.file_id, fileid=item.file_id,
parent_fileid=item.parent_id, parent_fileid=item.parent_id,
type="dir" if item.is_dir else "file", type="dir" if item.is_dir else "file",
path=f"{path}{item.name}" + "/" if item.is_dir else "", path=f"{path}{item.name}" + ("/" if item.is_dir else ""),
name=item.name, name=item.name,
size=item.size, size=item.size,
extension=Path(item.name).suffix[1:], extension=Path(item.name).suffix[1:],

View File

@ -136,7 +136,8 @@ class DoubanScraper:
:param file_path: 电影文件路径 :param file_path: 电影文件路径
""" """
# 开始生成XML # 开始生成XML
logger.info(f"正在生成电影NFO文件{file_path.name}") if file_path:
logger.info(f"正在生成电影NFO文件{file_path.name}")
doc = minidom.Document() doc = minidom.Document()
root = DomUtils.add_node(doc, doc, "movie") root = DomUtils.add_node(doc, doc, "movie")
# 公共部分 # 公共部分

View File

@ -216,14 +216,18 @@ class TheMovieDbModule(_ModuleBase):
tmdbid=info.get("id")) tmdbid=info.get("id"))
return info return info
def tmdb_info(self, tmdbid: int, mtype: MediaType) -> Optional[dict]: def tmdb_info(self, tmdbid: int, mtype: MediaType, season: int = None) -> Optional[dict]:
""" """
获取TMDB信息 获取TMDB信息
:param tmdbid: int :param tmdbid: int
:param mtype: 媒体类型 :param mtype: 媒体类型
:param season: 季号
:return: TVDB信息 :return: TVDB信息
""" """
return self.tmdb.get_info(mtype=mtype, tmdbid=tmdbid) if not season:
return self.tmdb.get_info(mtype=mtype, tmdbid=tmdbid)
else:
return self.tmdb.get_tv_season_detail(tmdbid=tmdbid, season=season)
def media_category(self) -> Optional[Dict[str, list]]: def media_category(self) -> Optional[Dict[str, list]]:
""" """

View File

@ -305,7 +305,8 @@ class TmdbScraper:
:param season: 季号 :param season: 季号
:param season_path: 电视剧季的目录 :param season_path: 电视剧季的目录
""" """
logger.info(f"正在生成季NFO文件{season_path.name}") if season_path:
logger.info(f"正在生成季NFO文件{season_path.name}")
doc = minidom.Document() doc = minidom.Document()
root = DomUtils.add_node(doc, doc, "season") root = DomUtils.add_node(doc, doc, "season")
# 简介 # 简介
@ -343,7 +344,8 @@ class TmdbScraper:
:param file_path: 集文件的路径 :param file_path: 集文件的路径
""" """
# 开始生成集的信息 # 开始生成集的信息
logger.info(f"正在生成剧集NFO文件{file_path.name}") if file_path:
logger.info(f"正在生成剧集NFO文件{file_path.name}")
doc = minidom.Document() doc = minidom.Document()
root = DomUtils.add_node(doc, doc, "episodedetails") root = DomUtils.add_node(doc, doc, "episodedetails")
# TMDBID # TMDBID