fix LibraryScraper

This commit is contained in:
jxxghp 2023-08-03 19:10:27 +08:00
parent 1125cfa6ee
commit 10a240e581
2 changed files with 209 additions and 9 deletions

13
app/helper/nfo.py Normal file
View File

@ -0,0 +1,13 @@
import xml.etree.ElementTree as ET
from pathlib import Path
class NfoReader:
def __init__(self, xml_file_path: Path):
self.xml_file_path = xml_file_path
self.tree = ET.parse(xml_file_path)
self.root = self.tree.getroot()
def get_element_value(self, element_path):
element = self.root.find(element_path)
return element.text if element is not None else None

View File

@ -1,3 +1,4 @@
from pathlib import Path
from threading import Event
from typing import List, Tuple, Dict, Any
@ -5,8 +6,13 @@ from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.triggers.cron import CronTrigger
from app.core.config import settings
from app.core.context import MediaInfo
from app.core.metainfo import MetaInfo
from app.helper.nfo import NfoReader
from app.log import logger
from app.plugins import _PluginBase
from app.schemas import MediaType
from app.utils.system import SystemUtils
class LibraryScraper(_PluginBase):
@ -38,8 +44,8 @@ class LibraryScraper(_PluginBase):
# 限速开关
_enabled = False
_cron = None
_scraper_paths = None
_exclude_paths = None
_scraper_paths = ""
_exclude_paths = ""
# 退出事件
_event = Event()
@ -59,8 +65,12 @@ class LibraryScraper(_PluginBase):
self._scheduler = BackgroundScheduler(timezone=settings.TZ)
if self._cron:
logger.info(f"媒体库刮削服务启动,周期:{self._cron}")
try:
self._scheduler.add_job(self.__libraryscraper,
CronTrigger.from_crontab(self._cron))
except Exception as e:
logger.error(f"媒体库刮削服务启动失败,原因:{e}")
self.systemmessage.put(f"媒体库刮削服务启动失败,原因:{e}")
else:
logger.info(f"媒体库刮削服务启动周期每7天")
self._scheduler.add_job(self.__libraryscraper,
@ -81,7 +91,105 @@ class LibraryScraper(_PluginBase):
pass
def get_form(self) -> Tuple[List[dict], Dict[str, Any]]:
pass
return [
{
'component': 'VForm',
'content': [
{
'component': 'VRow',
'content': [
{
'component': 'VCol',
'props': {
'cols': 12,
'md': 6
},
'content': [
{
'component': 'VSwitch',
'props': {
'model': 'enabled',
'label': '启用插件',
}
}
]
}
]
},
{
'component': 'VRow',
'content': [
{
'component': 'VCol',
'props': {
'cols': 12,
'md': 6
},
'content': [
{
'component': 'VTextField',
'props': {
'model': 'cron',
'label': '执行周期',
'placeholder': '5位cron表达式留空自动'
}
}
]
}
]
},
{
'component': 'VRow',
'content': [
{
'component': 'VCol',
'props': {
'cols': 12
},
'content': [
{
'component': 'VTextarea',
'props': {
'model': 'scraper_paths',
'label': '削刮路径',
'rows': 5,
'placeholder': '每一行一个目录'
}
}
]
}
]
},
{
'component': 'VRow',
'content': [
{
'component': 'VCol',
'props': {
'cols': 12
},
'content': [
{
'component': 'VTextarea',
'props': {
'model': 'exclude_paths',
'label': '排除路径',
'rows': 2,
'placeholder': '每一行一个目录'
}
}
]
}
]
}
]
}
], {
"enabled": False,
"cron": "0 0 */7 * *",
"scraper_paths": "",
"err_hosts": ""
}
def get_page(self) -> List[dict]:
pass
@ -90,17 +198,96 @@ class LibraryScraper(_PluginBase):
"""
开始刮削媒体库
"""
if not self._scraper_paths:
return
# 已选择的目录
logger.info(f"开始刮削媒体库:{self._scraper_path} ...")
for path in self._scraper_path:
paths = self._scraper_paths.split("\n")
for path in paths:
if not path:
continue
if not Path(path).exists():
logger.warning(f"媒体库刮削路径不存在:{path}")
continue
logger.info(f"开始刮削媒体库:{path} ...")
if self._event.is_set():
logger.info(f"媒体库刮削服务停止")
return
# TODO 刮削目录
# 刮削目录
self.__scrape_dir(Path(path))
logger.info(f"媒体库刮削完成")
def __scrape_dir(self, path: Path):
"""
削刮一个目录
"""
exclude_paths = self._exclude_paths.split("\n")
# 查找目录下所有的文件
files = SystemUtils.list_files_with_extensions(path, settings.RMT_MEDIAEXT)
for file in files:
# 排除目录
exclude_flag = False
for exclude_path in exclude_paths:
if file.is_relative_to(Path(exclude_path)):
exclude_flag = True
break
if exclude_flag:
logger.debug(f"{file} 在排除目录中,跳过 ...")
continue
# 识别媒体文件
meta_info = MetaInfo(file.name)
# 优先读取本地nfo文件
tmdbid = None
if meta_info.type == MediaType.MOVIE:
# 电影
movie_nfo = file.parent / "movie.nfo"
if movie_nfo.exists():
tmdbid = self.__get_tmdbid_from_nfo(movie_nfo)
file_nfo = file.with_suffix(".nfo")
if not tmdbid and file_nfo.exists():
tmdbid = self.__get_tmdbid_from_nfo(file_nfo)
else:
# 电视剧
tv_nfo = file.parent.parent / "tvshow.nfo"
if tv_nfo.exists():
tmdbid = self.__get_tmdbid_from_nfo(tv_nfo)
if tmdbid:
logger.info(f"读取到本地nfo文件的tmdbid{tmdbid}")
# 识别媒体信息
mediainfo: MediaInfo = self.chain.recognize_media(tmdbid=tmdbid, mtype=meta_info.type)
else:
# 识别媒体信息
mediainfo: MediaInfo = self.chain.recognize_media(meta=meta_info)
if not mediainfo:
logger.warn(f"未识别到媒体信息:{file}")
continue
# 开始刮削
self.chain.scrape_metadata(path=path, mediainfo=mediainfo)
@staticmethod
def __get_tmdbid_from_nfo(file_path: Path):
"""
从nfo文件中获取信息
:param file_path:
:return: tmdbid
"""
if not file_path:
return None
xpaths = [
"uniqueid[@type='Tmdb']",
"uniqueid[@type='tmdb']",
"uniqueid[@type='TMDB']",
"tmdbid"
]
reader = NfoReader(file_path)
for xpath in xpaths:
try:
tmdbid = reader.get_element_value(xpath)
if tmdbid:
return tmdbid
except Exception as err:
print(str(err))
return None
def stop_service(self):
"""
退出插件