From 05f1a24199e9dcfd87062c5320c7ff09192b71cb Mon Sep 17 00:00:00 2001 From: jxxghp Date: Thu, 9 Nov 2023 17:32:26 +0800 Subject: [PATCH] =?UTF-8?q?feat=20=E6=94=AF=E6=8C=81=E8=B1=86=E7=93=A3?= =?UTF-8?q?=E5=81=9A=E4=B8=BA=E8=AF=86=E5=88=AB=E6=BA=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 5 +- app/api/endpoints/douban.py | 14 -- app/api/endpoints/download.py | 22 +-- app/api/endpoints/media.py | 40 ++--- app/api/endpoints/search.py | 40 +++-- app/chain/__init__.py | 37 +++-- app/chain/douban.py | 50 ------- app/chain/download.py | 5 +- app/chain/media.py | 82 ++++++++++- app/chain/search.py | 12 +- app/chain/subscribe.py | 66 +++++---- app/chain/tmdb.py | 6 +- app/chain/transfer.py | 29 ++-- app/core/config.py | 8 +- app/core/context.py | 23 ++- app/core/meta/metabase.py | 3 + app/core/metainfo.py | 9 +- app/db/mediaserver_oper.py | 4 +- app/db/subscribe_oper.py | 1 + app/modules/douban/__init__.py | 186 ++++++++++++++++++----- app/modules/douban/apiv2.py | 6 + app/modules/douban/douban_cache.py | 228 +++++++++++++++++++++++++++++ app/modules/fanart/__init__.py | 10 +- app/modules/themoviedb/__init__.py | 37 ++++- tests/test_recognize.py | 6 +- 25 files changed, 690 insertions(+), 239 deletions(-) create mode 100644 app/modules/douban/douban_cache.py diff --git a/README.md b/README.md index c43b6043..0118a7d4 100644 --- a/README.md +++ b/README.md @@ -78,9 +78,11 @@ MoviePilot需要配套下载器和媒体服务器配合使用。 - **TMDB_API_DOMAIN:** TMDB API地址,默认`api.themoviedb.org`,也可配置为`api.tmdb.org`或其它中转代理服务地址,能连通即可 - **TMDB_IMAGE_DOMAIN:** TMDB图片地址,默认`image.tmdb.org`,可配置为其它中转代理以加速TMDB图片显示,如:`static-mdb.v.geilijiasu.com` - **WALLPAPER:** 登录首页电影海报,`tmdb`/`bing`,默认`tmdb` +- **SEARCH_SOURCE:** 媒体信息搜索来源,`themoviedb`/`douban`,默认`themoviedb` +- **RECOGNIZE_SOURCE:** 媒体信息识别来源,`themoviedb`/`douban`,默认`themoviedb` +- **SCRAP_SOURCE:** 刮削元数据及图片使用的数据源,`themoviedb`/`douban`,默认`themoviedb` --- - **SCRAP_METADATA:** 刮削入库的媒体文件,`true`/`false`,默认`true` -- **SCRAP_SOURCE:** 刮削元数据及图片使用的数据源,`themoviedb`/`douban`,默认`themoviedb` - **SCRAP_FOLLOW_TMDB:** 新增已入库媒体是否跟随TMDB信息变化,`true`/`false`,默认`true` --- - **❗TRANSFER_TYPE:** 整理转移方式,支持`link`/`copy`/`move`/`softlink`/`rclone_copy`/`rclone_move` **注意:在`link`和`softlink`转移方式下,转移后的文件会继承源文件的权限掩码,不受`UMASK`影响;rclone需要自行映射rclone配置目录到容器中或在容器内完成rclone配置,节点名称必须为:`MP`** @@ -100,7 +102,6 @@ MoviePilot需要配套下载器和媒体服务器配合使用。 - **SUBSCRIBE_MODE:** 订阅模式,`rss`/`spider`,默认`spider`,`rss`模式通过定时刷新RSS来匹配订阅(RSS地址会自动获取,也可手动维护),对站点压力小,同时可设置订阅刷新周期,24小时运行,但订阅和下载通知不能过滤和显示免费,推荐使用rss模式。 - **SUBSCRIBE_RSS_INTERVAL:** RSS订阅模式刷新时间间隔(分钟),默认`30`分钟,不能小于5分钟。 - **SUBSCRIBE_SEARCH:** 订阅搜索,`true`/`false`,默认`false`,开启后会每隔24小时对所有订阅进行全量搜索,以补齐缺失剧集(一般情况下正常订阅即可,订阅搜索只做为兜底,会增加站点压力,不建议开启)。 -- **SEARCH_SOURCE:** 媒体信息搜索来源,`themoviedb`/`douban`,默认`themoviedb` - **AUTO_DOWNLOAD_USER:** 远程交互搜索时自动择优下载的用户ID,多个用户使用,分割,未设置需要选择资源或者回复`0` --- - **OCR_HOST:** OCR识别服务器地址,格式:`http(s)://ip:port`,用于识别站点验证码实现自动登录获取Cookie等,不配置默认使用内建服务器`https://movie-pilot.org`,可使用 [这个镜像](https://hub.docker.com/r/jxxghp/moviepilot-ocr) 自行搭建。 diff --git a/app/api/endpoints/douban.py b/app/api/endpoints/douban.py index 6128e5bf..a7974b58 100644 --- a/app/api/endpoints/douban.py +++ b/app/api/endpoints/douban.py @@ -28,20 +28,6 @@ def douban_img(imgurl: str) -> Any: return None -@router.get("/recognize/{doubanid}", summary="豆瓣ID识别", response_model=schemas.Context) -def recognize_doubanid(doubanid: str, - _: schemas.TokenPayload = Depends(verify_token)) -> Any: - """ - 根据豆瓣ID识别媒体信息 - """ - # 识别媒体信息 - context = DoubanChain().recognize_by_doubanid(doubanid=doubanid) - if context: - return context.to_dict() - else: - return schemas.Context() - - @router.get("/showing", summary="豆瓣正在热映", response_model=List[schemas.MediaInfo]) def movie_showing(page: int = 1, count: int = 30, diff --git a/app/api/endpoints/download.py b/app/api/endpoints/download.py index 262ec67f..b2267074 100644 --- a/app/api/endpoints/download.py +++ b/app/api/endpoints/download.py @@ -3,14 +3,13 @@ from typing import Any, List from fastapi import APIRouter, Depends, HTTPException from app import schemas -from app.db.models.user import User -from app.db.userauth import get_current_active_user -from app.chain.douban import DoubanChain from app.chain.download import DownloadChain from app.chain.media import MediaChain from app.core.context import MediaInfo, Context, TorrentInfo from app.core.metainfo import MetaInfo from app.core.security import verify_token +from app.db.models.user import User +from app.db.userauth import get_current_active_user from app.schemas import NotExistMediaInfo, MediaType router = APIRouter() @@ -61,20 +60,13 @@ def exists(media_in: schemas.MediaInfo, 查询缺失媒体信息 """ # 媒体信息 - mediainfo = MediaInfo() meta = MetaInfo(title=media_in.title) - if media_in.tmdb_id: - mediainfo.from_dict(media_in.dict()) - elif media_in.douban_id: - context = DoubanChain().recognize_by_doubanid(doubanid=media_in.douban_id) - if context: - mediainfo = context.media_info - meta = context.meta_info + mtype = MediaType(media_in.type) if media_in.type else None + if media_in.tmdb_id or media_in.douban_id: + mediainfo = MediaChain().recognize_media(meta=meta, mtype=mtype, + tmdbid=media_in.tmdb_id, doubanid=media_in.douban_id) else: - context = MediaChain().recognize_by_title(title=f"{media_in.title} {media_in.year}") - if context: - mediainfo = context.media_info - meta = context.meta_info + mediainfo = MediaChain().recognize_by_title(title=f"{media_in.title} {media_in.year}") # 查询缺失信息 if not mediainfo or not mediainfo.tmdb_id: raise HTTPException(status_code=404, detail="媒体信息不存在") diff --git a/app/api/endpoints/media.py b/app/api/endpoints/media.py index f2cd8a9e..2a871f2d 100644 --- a/app/api/endpoints/media.py +++ b/app/api/endpoints/media.py @@ -4,10 +4,8 @@ from fastapi import APIRouter, Depends from sqlalchemy.orm import Session from app import schemas -from app.chain.douban import DoubanChain from app.chain.media import MediaChain -from app.chain.tmdb import TmdbChain -from app.core.context import MediaInfo +from app.core.config import settings from app.core.metainfo import MetaInfo from app.core.security import verify_token from app.db import get_db @@ -81,26 +79,30 @@ def exists(title: str = None, @router.get("/{mediaid}", summary="查询媒体详情", response_model=schemas.MediaInfo) -def tmdb_info(mediaid: str, type_name: str, - _: schemas.TokenPayload = Depends(verify_token)) -> Any: +def media_info(mediaid: str, type_name: str, + _: schemas.TokenPayload = Depends(verify_token)) -> Any: """ 根据媒体ID查询themoviedb或豆瓣媒体信息,type_name: 电影/电视剧 """ mtype = MediaType(type_name) + tmdbid, doubanid = None, None if mediaid.startswith("tmdb:"): - result = TmdbChain().tmdb_info(int(mediaid[5:]), mtype) - return MediaInfo(tmdb_info=result).to_dict() + tmdbid = int(mediaid[5:]) elif mediaid.startswith("douban:"): - # 查询豆瓣信息 - doubaninfo = DoubanChain().douban_info(doubanid=mediaid[7:]) - if not doubaninfo: - return schemas.MediaInfo() - result = DoubanChain().recognize_by_doubaninfo(doubaninfo) - if result: - # TMDB - return result.media_info.to_dict() - else: - # 豆瓣 - return MediaInfo(douban_info=doubaninfo).to_dict() - else: + doubanid = mediaid[7:] + if not tmdbid and not doubanid: return schemas.MediaInfo() + if settings.RECOGNIZE_SOURCE == "themoviedb": + if not tmdbid and doubanid: + tmdbinfo = MediaChain().get_tmdbinfo_by_doubanid(doubanid=doubanid, mtype=mtype) + if tmdbinfo: + tmdbid = tmdbinfo.get("id") + else: + if not doubanid and tmdbid: + doubaninfo = MediaChain().get_doubaninfo_by_tmdbid(tmdbid=tmdbid, mtype=mtype) + if doubaninfo: + doubanid = doubaninfo.get("id") + mediainfo = MediaChain().recognize_media(tmdbid=tmdbid, doubanid=doubanid, mtype=mtype) + if mediainfo: + return mediainfo.to_dict() + return schemas.MediaInfo() diff --git a/app/api/endpoints/search.py b/app/api/endpoints/search.py index 61816d0c..9820ba5e 100644 --- a/app/api/endpoints/search.py +++ b/app/api/endpoints/search.py @@ -3,8 +3,9 @@ from typing import List, Any from fastapi import APIRouter, Depends from app import schemas -from app.chain.douban import DoubanChain +from app.chain.media import MediaChain from app.chain.search import SearchChain +from app.core.config import settings from app.core.security import verify_token from app.schemas.types import MediaType @@ -21,27 +22,36 @@ async def search_latest(_: schemas.TokenPayload = Depends(verify_token)) -> Any: @router.get("/media/{mediaid}", summary="精确搜索资源", response_model=List[schemas.Context]) -def search_by_tmdbid(mediaid: str, - mtype: str = None, - area: str = "title", - _: schemas.TokenPayload = Depends(verify_token)) -> Any: +def search_by_id(mediaid: str, + mtype: str = None, + area: str = "title", + _: schemas.TokenPayload = Depends(verify_token)) -> Any: """ 根据TMDBID/豆瓣ID精确搜索站点资源 tmdb:/douban:/ """ + torrents = [] + if mtype: + mtype = MediaType(mtype) if mediaid.startswith("tmdb:"): tmdbid = int(mediaid.replace("tmdb:", "")) - if mtype: - mtype = MediaType(mtype) - torrents = SearchChain().search_by_tmdbid(tmdbid=tmdbid, mtype=mtype, area=area) + if settings.RECOGNIZE_SOURCE == "douban": + # 通过TMDBID识别豆瓣ID + doubaninfo = MediaChain().get_doubaninfo_by_tmdbid(tmdbid=tmdbid, mtype=mtype) + if doubaninfo: + torrents = SearchChain().search_by_id(doubanid=doubaninfo.get("id"), + mtype=mtype, area=area) + else: + torrents = SearchChain().search_by_id(tmdbid=tmdbid, mtype=mtype, area=area) elif mediaid.startswith("douban:"): doubanid = mediaid.replace("douban:", "") - # 识别豆瓣信息 - context = DoubanChain().recognize_by_doubanid(doubanid) - if not context or not context.media_info or not context.media_info.tmdb_id: - return [] - torrents = SearchChain().search_by_tmdbid(tmdbid=context.media_info.tmdb_id, - mtype=context.media_info.type, - area=area) + if settings.RECOGNIZE_SOURCE == "themoviedb": + # 通过豆瓣ID识别TMDBID + tmdbinfo = MediaChain().get_tmdbinfo_by_doubanid(doubanid=doubanid, mtype=mtype) + if tmdbinfo: + torrents = SearchChain().search_by_id(tmdbid=tmdbinfo.get("id"), + mtype=mtype, area=area) + else: + torrents = SearchChain().search_by_id(doubanid=doubanid, mtype=mtype, area=area) else: return [] return [torrent.to_dict() for torrent in torrents] diff --git a/app/chain/__init__.py b/app/chain/__init__.py index b95bbd38..bfbe846a 100644 --- a/app/chain/__init__.py +++ b/app/chain/__init__.py @@ -107,28 +107,34 @@ class ChainBase(metaclass=ABCMeta): # 中止继续执行 break except Exception as err: - logger.error(f"运行模块 {method} 出错:{module.__class__.__name__} - {str(err)}\n{traceback.print_exc()}") + logger.error( + f"运行模块 {method} 出错:{module.__class__.__name__} - {str(err)}\n{traceback.print_exc()}") return result def recognize_media(self, meta: MetaBase = None, mtype: MediaType = None, - tmdbid: int = None) -> Optional[MediaInfo]: + tmdbid: int = None, + doubanid: str = None) -> Optional[MediaInfo]: """ 识别媒体信息 :param meta: 识别的元数据 :param mtype: 识别的媒体类型,与tmdbid配套 :param tmdbid: tmdbid + :param doubanid: 豆瓣ID :return: 识别的媒体信息,包括剧集信息 """ + # 识别用名中含指定信息情形 + if not mtype and meta and meta.type in [MediaType.TV, MediaType.MOVIE]: + mtype = meta.type if not tmdbid and hasattr(meta, "tmdbid"): - # 识别用名中含指定信息情形 tmdbid = meta.tmdbid - if not mtype and meta.type in [MediaType.TV, MediaType.MOVIE]: - mtype = meta.type - return self.run_module("recognize_media", meta=meta, mtype=mtype, tmdbid=tmdbid) + if not doubanid and hasattr(meta, "doubanid"): + doubanid = meta.doubanid + return self.run_module("recognize_media", meta=meta, mtype=mtype, + tmdbid=tmdbid, doubanid=doubanid) def match_doubaninfo(self, name: str, imdbid: str = None, - mtype: str = None, year: str = None, season: int = None) -> Optional[dict]: + mtype: MediaType = None, year: str = None, season: int = None) -> Optional[dict]: """ 搜索和匹配豆瓣信息 :param name: 标题 @@ -140,6 +146,18 @@ class ChainBase(metaclass=ABCMeta): return self.run_module("match_doubaninfo", name=name, imdbid=imdbid, mtype=mtype, year=year, season=season) + def match_tmdbinfo(self, name: str, mtype: MediaType = None, + year: str = None, season: int = None) -> Optional[dict]: + """ + 搜索和匹配TMDB信息 + :param name: 标题 + :param mtype: 类型 + :param year: 年份 + :param season: 季 + """ + return self.run_module("match_tmdbinfo", name=name, + mtype=mtype, year=year, season=season) + def obtain_images(self, mediainfo: MediaInfo) -> Optional[MediaInfo]: """ 补充抓取媒体信息图片 @@ -164,13 +182,14 @@ class ChainBase(metaclass=ABCMeta): image_prefix=image_prefix, image_type=image_type, season=season, episode=episode) - def douban_info(self, doubanid: str) -> Optional[dict]: + def douban_info(self, doubanid: str, mtype: MediaType = None) -> Optional[dict]: """ 获取豆瓣信息 :param doubanid: 豆瓣ID + :param mtype: 媒体类型 :return: 豆瓣信息 """ - return self.run_module("douban_info", doubanid=doubanid) + return self.run_module("douban_info", doubanid=doubanid, mtype=mtype) def tvdb_info(self, tvdbid: int) -> Optional[dict]: """ diff --git a/app/chain/douban.py b/app/chain/douban.py index 5605f089..77102b1c 100644 --- a/app/chain/douban.py +++ b/app/chain/douban.py @@ -1,10 +1,6 @@ from typing import Optional, List from app.chain import ChainBase -from app.core.context import Context -from app.core.context import MediaInfo -from app.core.metainfo import MetaInfo -from app.log import logger from app.schemas import MediaType from app.utils.singleton import Singleton @@ -14,52 +10,6 @@ class DoubanChain(ChainBase, metaclass=Singleton): 豆瓣处理链,单例运行 """ - def recognize_by_doubanid(self, doubanid: str) -> Optional[Context]: - """ - 根据豆瓣ID识别媒体信息 - """ - logger.info(f'开始识别媒体信息,豆瓣ID:{doubanid} ...') - # 查询豆瓣信息 - doubaninfo = self.douban_info(doubanid=doubanid) - if not doubaninfo: - logger.warn(f'未查询到豆瓣信息,豆瓣ID:{doubanid}') - return None - return self.recognize_by_doubaninfo(doubaninfo) - - def recognize_by_doubaninfo(self, doubaninfo: dict) -> Optional[Context]: - """ - 根据豆瓣信息识别媒体信息 - """ - # 优先使用原标题匹配 - season_meta = None - if doubaninfo.get("original_title"): - meta = MetaInfo(title=doubaninfo.get("original_title")) - season_meta = MetaInfo(title=doubaninfo.get("title")) - # 合并季 - meta.begin_season = season_meta.begin_season - else: - meta = MetaInfo(title=doubaninfo.get("title")) - # 年份 - if doubaninfo.get("year"): - meta.year = doubaninfo.get("year") - # 处理类型 - if isinstance(doubaninfo.get('media_type'), MediaType): - meta.type = doubaninfo.get('media_type') - else: - meta.type = MediaType.MOVIE if doubaninfo.get("type") == "movie" else MediaType.TV - # 使用原标题识别媒体信息 - mediainfo = self.recognize_media(meta=meta, mtype=meta.type) - if not mediainfo: - if season_meta and season_meta.name != meta.name: - # 使用主标题识别媒体信息 - mediainfo = self.recognize_media(meta=season_meta, mtype=season_meta.type) - if not mediainfo: - logger.warn(f'{meta.name} 未识别到TMDB媒体信息') - return Context(meta_info=meta, media_info=MediaInfo(douban_info=doubaninfo)) - logger.info(f'识别到媒体信息:{mediainfo.type.value} {mediainfo.title_year} {meta.season}') - mediainfo.set_douban_info(doubaninfo) - return Context(meta_info=meta, media_info=mediainfo) - def movie_top250(self, page: int = 1, count: int = 30) -> List[dict]: """ 获取豆瓣电影TOP250 diff --git a/app/chain/download.py b/app/chain/download.py index a2146f07..b326c4b7 100644 --- a/app/chain/download.py +++ b/app/chain/download.py @@ -682,6 +682,7 @@ class DownloadChain(ChainBase): if mediainfo.type == MediaType.MOVIE: # 电影 itemid = self.mediaserver.get_item_id(mtype=mediainfo.type.value, + title=mediainfo.title, tmdbid=mediainfo.tmdb_id) exists_movies: Optional[ExistMediaInfo] = self.media_exists(mediainfo=mediainfo, itemid=itemid) if exists_movies: @@ -692,7 +693,8 @@ class DownloadChain(ChainBase): if not mediainfo.seasons: # 补充媒体信息 mediainfo: MediaInfo = self.recognize_media(mtype=mediainfo.type, - tmdbid=mediainfo.tmdb_id) + tmdbid=mediainfo.tmdb_id, + doubanid=mediainfo.douban_id) if not mediainfo: logger.error(f"媒体信息识别失败!") return False, {} @@ -701,6 +703,7 @@ class DownloadChain(ChainBase): return False, {} # 电视剧 itemid = self.mediaserver.get_item_id(mtype=mediainfo.type.value, + title=mediainfo.title, tmdbid=mediainfo.tmdb_id, season=mediainfo.season) # 媒体库已存在的剧集 diff --git a/app/chain/media.py b/app/chain/media.py index 1a508224..c94e277f 100644 --- a/app/chain/media.py +++ b/app/chain/media.py @@ -14,7 +14,6 @@ from app.schemas.types import EventType, MediaType from app.utils.singleton import Singleton from app.utils.string import StringUtils - recognize_lock = Lock() @@ -27,7 +26,7 @@ class MediaChain(ChainBase, metaclass=Singleton): # 临时识别结果 {title, name, year, season, episode} recognize_temp: Optional[dict] = None - def recognize_by_title(self, title: str, subtitle: str = None) -> Optional[Context]: + def recognize_by_title(self, title: str, subtitle: str = None) -> Optional[MediaInfo]: """ 根据主副标题识别媒体信息 """ @@ -43,13 +42,13 @@ class MediaChain(ChainBase, metaclass=Singleton): mediainfo = self.recognize_help(title=title, org_meta=metainfo) if not mediainfo: logger.warn(f'{title} 未识别到媒体信息') - return Context(meta_info=metainfo) + return None # 识别成功 logger.info(f'{title} 识别到媒体信息:{mediainfo.type.value} {mediainfo.title_year}') # 更新媒体图片 self.obtain_images(mediainfo=mediainfo) # 返回上下文 - return Context(meta_info=metainfo, media_info=mediainfo) + return mediainfo def recognize_help(self, title: str, org_meta: MetaBase) -> Optional[MediaInfo]: """ @@ -190,3 +189,78 @@ class MediaChain(ChainBase, metaclass=Singleton): logger.info(f"{content} 搜索到 {len(medias)} 条相关媒体信息") # 识别的元数据,媒体信息列表 return meta, medias + + def get_tmdbinfo_by_doubanid(self, doubanid: str, mtype: MediaType = None) -> Optional[dict]: + """ + 根据豆瓣ID获取TMDB信息 + """ + tmdbinfo = None + doubaninfo = self.douban_info(doubanid=doubanid, mtype=mtype) + if doubaninfo: + # 优先使用原标题匹配 + season_meta = None + if doubaninfo.get("original_title"): + meta = MetaInfo(title=doubaninfo.get("original_title")) + season_meta = MetaInfo(title=doubaninfo.get("title")) + # 合并季 + meta.begin_season = season_meta.begin_season + else: + meta = MetaInfo(title=doubaninfo.get("title")) + # 年份 + if doubaninfo.get("year"): + meta.year = doubaninfo.get("year") + # 处理类型 + if isinstance(doubaninfo.get('media_type'), MediaType): + meta.type = doubaninfo.get('media_type') + else: + meta.type = MediaType.MOVIE if doubaninfo.get("type") == "movie" else MediaType.TV + # 使用原标题识别TMDB媒体信息 + tmdbinfo = self.match_tmdbinfo( + name=meta.name, + year=meta.year, + mtype=mtype or meta.type, + season=meta.begin_season + ) + if not tmdbinfo: + if season_meta and season_meta.name != meta.name: + # 使用主标题识别媒体信息 + tmdbinfo = self.match_tmdbinfo( + name=season_meta.name, + year=meta.year, + mtype=mtype or meta.type, + season=meta.begin_season + ) + return tmdbinfo + + def get_doubaninfo_by_tmdbid(self, tmdbid: int, + mtype: MediaType = None, season: int = None) -> Optional[dict]: + """ + 根据TMDBID获取豆瓣信息 + """ + tmdbinfo = self.tmdb_info(tmdbid=tmdbid, mtype=mtype) + if tmdbinfo: + # 名称 + name = tmdbinfo.get("title") or tmdbinfo.get("name") + # 年份 + year = None + if tmdbinfo.get('release_date'): + year = tmdbinfo['release_date'][:4] + elif tmdbinfo.get('seasons') and season: + for seainfo in tmdbinfo['seasons']: + # 季 + season_number = seainfo.get("season_number") + if not season_number: + continue + air_date = seainfo.get("air_date") + if air_date and season_number == season: + year = air_date[:4] + break + # IMDBID + imdbid = tmdbinfo.get("external_ids", {}).get("imdb_id") + return self.match_doubaninfo( + name=name, + year=year, + mtype=mtype, + imdbid=imdbid + ) + return None diff --git a/app/chain/search.py b/app/chain/search.py index 301e2ecc..85050184 100644 --- a/app/chain/search.py +++ b/app/chain/search.py @@ -31,14 +31,16 @@ class SearchChain(ChainBase): self.systemconfig = SystemConfigOper() self.torrenthelper = TorrentHelper() - def search_by_tmdbid(self, tmdbid: int, mtype: MediaType = None, area: str = "title") -> List[Context]: + def search_by_id(self, tmdbid: int = None, doubanid: str = None, + mtype: MediaType = None, area: str = "title") -> List[Context]: """ - 根据TMDB ID搜索资源,精确匹配,但不不过滤本地存在的资源 + 根据TMDBID/豆瓣ID搜索资源,精确匹配,但不不过滤本地存在的资源 :param tmdbid: TMDB ID + :param doubanid: 豆瓣 ID :param mtype: 媒体,电影 or 电视剧 :param area: 搜索范围,title or imdbid """ - mediainfo = self.recognize_media(tmdbid=tmdbid, mtype=mtype) + mediainfo = self.recognize_media(tmdbid=tmdbid, doubanid=doubanid, mtype=mtype) if not mediainfo: logger.error(f'{tmdbid} 媒体信息识别失败!') return [] @@ -96,7 +98,8 @@ class SearchChain(ChainBase): # 补充媒体信息 if not mediainfo.names: mediainfo: MediaInfo = self.recognize_media(mtype=mediainfo.type, - tmdbid=mediainfo.tmdb_id) + tmdbid=mediainfo.tmdb_id, + doubanid=mediainfo.douban_id) if not mediainfo: logger.error(f'媒体信息识别失败!') return [] @@ -154,6 +157,7 @@ class SearchChain(ChainBase): if mediainfo: self.progress.start(ProgressKey.Search) logger.info(f'开始匹配,总 {_total} 个资源 ...') + logger.info(f"标题:{mediainfo.title},原标题:{mediainfo.original_title},别名:{mediainfo.names}") self.progress.update(value=0, text=f'开始匹配,总 {_total} 个资源 ...', key=ProgressKey.Search) for torrent in torrents: _count += 1 diff --git a/app/chain/subscribe.py b/app/chain/subscribe.py index 3bf96939..efa44021 100644 --- a/app/chain/subscribe.py +++ b/app/chain/subscribe.py @@ -6,10 +6,11 @@ from datetime import datetime from typing import Dict, List, Optional, Union, Tuple from app.chain import ChainBase -from app.chain.douban import DoubanChain from app.chain.download import DownloadChain +from app.chain.media import MediaChain from app.chain.search import SearchChain from app.chain.torrents import TorrentsChain +from app.core.config import settings from app.core.context import TorrentInfo, Context, MediaInfo from app.core.meta import MetaBase from app.core.metainfo import MetaInfo @@ -51,29 +52,32 @@ class SubscribeChain(ChainBase): 识别媒体信息并添加订阅 """ logger.info(f'开始添加订阅,标题:{title} ...') - metainfo = None mediainfo = None - if not tmdbid and doubanid: - # 将豆瓣信息转换为TMDB信息 - context = DoubanChain().recognize_by_doubanid(doubanid) - if context: - metainfo = context.meta_info - mediainfo = context.media_info + metainfo = MetaInfo(title) + if year: + metainfo.year = year + if mtype: + metainfo.type = mtype + if season: + metainfo.type = MediaType.TV + metainfo.begin_season = season + # 识别媒体信息 + if settings.RECOGNIZE_SOURCE == "themoviedb": + # TMDB识别模式 + if not tmdbid and doubanid: + # 将豆瓣信息转换为TMDB信息 + tmdbinfo = MediaChain().get_tmdbinfo_by_doubanid(doubanid=doubanid, mtype=mtype) + if tmdbinfo: + mediainfo = MediaInfo(tmdb_info=tmdbinfo) + else: + # 识别TMDB信息 + mediainfo = self.recognize_media(meta=metainfo, mtype=mtype, tmdbid=tmdbid) else: - # 识别元数据 - metainfo = MetaInfo(title) - if year: - metainfo.year = year - if mtype: - metainfo.type = mtype - if season: - metainfo.type = MediaType.TV - metainfo.begin_season = season - # 识别媒体信息 - mediainfo = self.recognize_media(meta=metainfo, mtype=mtype, tmdbid=tmdbid) + # 豆瓣识别模式 + mediainfo = self.recognize_media(meta=metainfo, mtype=mtype, doubanid=doubanid) # 识别失败 - if not mediainfo or not metainfo or not mediainfo.tmdb_id: - logger.warn(f'未识别到媒体信息,标题:{title},tmdbid:{tmdbid}') + if not mediainfo: + logger.warn(f'未识别到媒体信息,标题:{title},tmdbid:{tmdbid},doubanid:{doubanid}') return None, "未识别到媒体信息" # 更新媒体图片 self.obtain_images(mediainfo=mediainfo) @@ -86,16 +90,17 @@ class SubscribeChain(ChainBase): if not mediainfo.seasons: # 补充媒体信息 mediainfo = self.recognize_media(mtype=mediainfo.type, - tmdbid=mediainfo.tmdb_id) + tmdbid=mediainfo.tmdb_id, + doubanid=mediainfo.douban_id) if not mediainfo: logger.error(f"媒体信息识别失败!") return None, "媒体信息识别失败" if not mediainfo.seasons: - logger.error(f"媒体信息中没有季集信息,标题:{title},tmdbid:{tmdbid}") + logger.error(f"媒体信息中没有季集信息,标题:{title},tmdbid:{tmdbid},doubanid:{doubanid}") return None, "媒体信息中没有季集信息" total_episode = len(mediainfo.seasons.get(season) or []) if not total_episode: - logger.error(f'未获取到总集数,标题:{title},tmdbid:{tmdbid}') + logger.error(f'未获取到总集数,标题:{title},tmdbid:{tmdbid}, doubanid:{doubanid}') return None, f"未获取到第 {season} 季的总集数" kwargs.update({ 'total_episode': total_episode @@ -106,8 +111,7 @@ class SubscribeChain(ChainBase): 'lack_episode': kwargs.get('total_episode') }) # 添加订阅 - sid, err_msg = self.subscribeoper.add(mediainfo, doubanid=doubanid, - season=season, username=username, **kwargs) + sid, err_msg = self.subscribeoper.add(mediainfo, season=season, username=username, **kwargs) if not sid: logger.error(f'{mediainfo.title_year} {err_msg}') if not exist_ok and message: @@ -179,7 +183,9 @@ class SubscribeChain(ChainBase): meta.begin_season = subscribe.season or None meta.type = MediaType(subscribe.type) # 识别媒体信息 - mediainfo: MediaInfo = self.recognize_media(meta=meta, mtype=meta.type, tmdbid=subscribe.tmdbid) + mediainfo: MediaInfo = self.recognize_media(meta=meta, mtype=meta.type, + tmdbid=subscribe.tmdbid, + doubanid=subscribe.doubanid) if not mediainfo: logger.warn(f'未识别到媒体信息,标题:{subscribe.name},tmdbid:{subscribe.tmdbid}') continue @@ -475,7 +481,8 @@ class SubscribeChain(ChainBase): meta.begin_season = subscribe.season or None meta.type = MediaType(subscribe.type) # 识别媒体信息 - mediainfo: MediaInfo = self.recognize_media(meta=meta, mtype=meta.type, tmdbid=subscribe.tmdbid) + mediainfo: MediaInfo = self.recognize_media(meta=meta, mtype=meta.type, + tmdbid=subscribe.tmdbid, doubanid=subscribe.doubanid) if not mediainfo: logger.warn(f'未识别到媒体信息,标题:{subscribe.name},tmdbid:{subscribe.tmdbid}') continue @@ -661,7 +668,8 @@ class SubscribeChain(ChainBase): meta.begin_season = subscribe.season or None meta.type = MediaType(subscribe.type) # 识别媒体信息 - mediainfo: MediaInfo = self.recognize_media(meta=meta, mtype=meta.type, tmdbid=subscribe.tmdbid) + mediainfo: MediaInfo = self.recognize_media(meta=meta, mtype=meta.type, + tmdbid=subscribe.tmdbid, doubanid=subscribe.doubanid) if not mediainfo: logger.warn(f'未识别到媒体信息,标题:{subscribe.name},tmdbid:{subscribe.tmdbid}') continue diff --git a/app/chain/tmdb.py b/app/chain/tmdb.py index 0d49c2b1..6e00c257 100644 --- a/app/chain/tmdb.py +++ b/app/chain/tmdb.py @@ -25,17 +25,21 @@ class TmdbChain(ChainBase, metaclass=Singleton): :param page: 页码 :return: 媒体信息列表 """ + if settings.RECOGNIZE_SOURCE != "themoviedb": + return None return self.run_module("tmdb_discover", mtype=mtype, sort_by=sort_by, with_genres=with_genres, with_original_language=with_original_language, page=page) - def tmdb_trending(self, page: int = 1) -> List[dict]: + def tmdb_trending(self, page: int = 1) -> Optional[List[dict]]: """ TMDB流行趋势 :param page: 第几页 :return: TMDB信息列表 """ + if settings.RECOGNIZE_SOURCE != "themoviedb": + return None return self.run_module("tmdb_trending", page=page) def tmdb_seasons(self, tmdbid: int) -> List[schemas.TmdbSeason]: diff --git a/app/chain/transfer.py b/app/chain/transfer.py index eef4b1ee..5c86a2b8 100644 --- a/app/chain/transfer.py +++ b/app/chain/transfer.py @@ -66,7 +66,8 @@ class TransferChain(ChainBase): mtype = MediaType(downloadhis.type) # 按TMDBID识别 mediainfo = self.recognize_media(mtype=mtype, - tmdbid=downloadhis.tmdbid) + tmdbid=downloadhis.tmdbid, + doubanid=downloadhis.doubanid) else: # 非MoviePilot下载的任务,按文件识别 mediainfo = None @@ -449,7 +450,7 @@ class TransferChain(ChainBase): def args_error(): self.post_message(Notification(channel=channel, - title="请输入正确的命令格式:/redo [id] [tmdbid]|[类型]," + title="请输入正确的命令格式:/redo [id] [tmdbid/豆瓣id]|[类型]," "[id]历史记录编号", userid=userid)) if not arg_str: @@ -464,31 +465,32 @@ class TransferChain(ChainBase): if not logid.isdigit(): args_error() return - # TMDB ID - tmdb_strs = arg_strs[1].split('|') - tmdbid = tmdb_strs[0] + # TMDBID/豆瓣ID + id_strs = arg_strs[1].split('|') + media_id = id_strs[0] if not logid.isdigit(): args_error() return # 类型 - type_str = tmdb_strs[1] if len(tmdb_strs) > 1 else None + type_str = id_strs[1] if len(id_strs) > 1 else None if not type_str or type_str not in [MediaType.MOVIE.value, MediaType.TV.value]: args_error() return state, errmsg = self.re_transfer(logid=int(logid), - mtype=MediaType(type_str), tmdbid=int(tmdbid)) + mtype=MediaType(type_str), + mediaid=media_id) if not state: self.post_message(Notification(channel=channel, title="手动整理失败", text=errmsg, userid=userid)) return - def re_transfer(self, logid: int, - mtype: MediaType = None, tmdbid: int = None) -> Tuple[bool, str]: + def re_transfer(self, logid: int, mtype: MediaType = None, + mediaid: str = None) -> Tuple[bool, str]: """ 根据历史记录,重新识别转移,只支持简单条件 :param logid: 历史记录ID :param mtype: 媒体类型 - :param tmdbid: TMDB ID + :param mediaid: TMDB ID/豆瓣ID """ # 查询历史记录 history: TransferHistory = self.transferhis.get(logid) @@ -501,13 +503,14 @@ class TransferChain(ChainBase): return False, f"源目录不存在:{src_path}" dest_path = Path(history.dest) if history.dest else None # 查询媒体信息 - if mtype and tmdbid: - mediainfo = self.recognize_media(mtype=mtype, tmdbid=tmdbid) + if mtype and mediaid: + mediainfo = self.recognize_media(mtype=mtype, tmdbid=int(mediaid) if str(mediaid).isdigit() else None, + doubanid=mediaid) else: meta = MetaInfoPath(src_path) mediainfo = self.recognize_media(meta=meta) if not mediainfo: - return False, f"未识别到媒体信息,类型:{mtype.value},tmdbid:{tmdbid}" + return False, f"未识别到媒体信息,类型:{mtype.value},id:{mediaid}" # 重新执行转移 logger.info(f"{src_path.name} 识别为:{mediainfo.title_year}") # 更新媒体图片 diff --git a/app/core/config.py b/app/core/config.py index 19275123..f703b078 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -43,14 +43,16 @@ class Settings(BaseSettings): WALLPAPER: str = "tmdb" # 网络代理 IP:PORT PROXY_HOST: str = None - # 媒体信息搜索来源 + # 媒体识别来源 themoviedb/douban + RECOGNIZE_SOURCE: str = "themoviedb" + # 媒体信息搜索来源 themoviedb/douban SEARCH_SOURCE: str = "themoviedb" + # 刮削来源 themoviedb/douban + SCRAP_SOURCE: str = "themoviedb" # 刮削入库的媒体文件 SCRAP_METADATA: bool = True # 新增已入库媒体是否跟随TMDB信息变化 SCRAP_FOLLOW_TMDB: bool = True - # 刮削来源 - SCRAP_SOURCE: str = "themoviedb" # TMDB图片地址 TMDB_IMAGE_DOMAIN: str = "image.tmdb.org" # TMDB API地址 diff --git a/app/core/context.py b/app/core/context.py index a7bdbbbf..e7687ce4 100644 --- a/app/core/context.py +++ b/app/core/context.py @@ -414,7 +414,6 @@ class MediaInfo: # 豆瓣ID self.douban_id = str(info.get("id")) # 类型 - if not self.type: if isinstance(info.get('media_type'), MediaType): self.type = info.get('media_type') @@ -422,10 +421,10 @@ class MediaInfo: self.type = MediaType.MOVIE if info.get("type") == "movie" else MediaType.TV # 标题 if not self.title: - self.title = info.get("title") # 识别标题中的季 - meta = MetaInfo(self.title) + meta = MetaInfo(info.get("title")) self.season = meta.begin_season + self.title = meta.name # 原语种标题 if not self.original_title: self.original_title = info.get("original_title") @@ -472,14 +471,22 @@ class MediaInfo: self.actors = info.get("actors") or [] # 别名 if not self.names: - self.names = info.get("aka") or [] + akas = info.get("aka") + if akas: + self.names = [re.sub(r'\([港台豆友译名]+\)', "", aka) for aka in akas] # 剧集 if self.type == MediaType.TV and not self.seasons: meta = MetaInfo(info.get("title")) - if meta.begin_season: - episodes_count = info.get("episodes_count") - if episodes_count: - self.seasons[meta.begin_season] = list(range(1, episodes_count + 1)) + season = meta.begin_season or 1 + episodes_count = info.get("episodes_count") + if episodes_count: + self.seasons[season] = list(range(1, episodes_count + 1)) + # 季年份 + if self.type == MediaType.TV and not self.season_years: + season = self.season or 1 + self.season_years = { + season: self.year + } # 剩余属性赋值 for key, value in info.items(): if not hasattr(self, key): diff --git a/app/core/meta/metabase.py b/app/core/meta/metabase.py index 116b0203..89ca1d6e 100644 --- a/app/core/meta/metabase.py +++ b/app/core/meta/metabase.py @@ -59,6 +59,9 @@ class MetaBase(object): audio_encode: Optional[str] = None # 应用的识别词信息 apply_words: Optional[List[str]] = None + # 附加信息 + tmdbid: int = None + doubanid: str = None # 副标题解析 _subtitle_flag = False diff --git a/app/core/metainfo.py b/app/core/metainfo.py index cf7b5c10..45d8f20a 100644 --- a/app/core/metainfo.py +++ b/app/core/metainfo.py @@ -36,6 +36,8 @@ def MetaInfo(title: str, subtitle: str = None) -> MetaBase: # 修正媒体信息 if metainfo.get('tmdbid'): meta.tmdbid = metainfo['tmdbid'] + if metainfo.get('doubanid'): + meta.tmdbid = metainfo['doubanid'] if metainfo.get('type'): meta.type = metainfo['type'] if metainfo.get('begin_season'): @@ -93,6 +95,7 @@ def find_metainfo(title: str) -> Tuple[str, dict]: """ metainfo = { 'tmdbid': None, + 'doubanid': None, 'type': None, 'begin_season': None, 'end_season': None, @@ -108,10 +111,14 @@ def find_metainfo(title: str) -> Tuple[str, dict]: if not results: return title, metainfo for result in results: - tmdbid = re.findall(r'(?<=tmdbid=)\d+', result) # 查找tmdbid信息 + tmdbid = re.findall(r'(?<=tmdbid=)\d+', result) if tmdbid and tmdbid[0].isdigit(): metainfo['tmdbid'] = tmdbid[0] + # 查找豆瓣id信息 + doubanid = re.findall(r'(?<=doubanid=)\d+', result) + if doubanid and doubanid[0].isdigit(): + metainfo['doubanid'] = doubanid[0] # 查找媒体类型 mtype = re.findall(r'(?<=type=)\d+', result) if mtype: diff --git a/app/db/mediaserver_oper.py b/app/db/mediaserver_oper.py index 6eb8764c..d76a5313 100644 --- a/app/db/mediaserver_oper.py +++ b/app/db/mediaserver_oper.py @@ -39,10 +39,12 @@ class MediaServerOper(DbOper): # 优先按TMDBID查 item = MediaServerItem.exist_by_tmdbid(self._db, tmdbid=kwargs.get("tmdbid"), mtype=kwargs.get("mtype")) - else: + elif kwargs.get("title"): # 按标题、类型、年份查 item = MediaServerItem.exists_by_title(self._db, title=kwargs.get("title"), mtype=kwargs.get("mtype"), year=kwargs.get("year")) + else: + return None if not item: return None diff --git a/app/db/subscribe_oper.py b/app/db/subscribe_oper.py index 4f7501db..9cda995f 100644 --- a/app/db/subscribe_oper.py +++ b/app/db/subscribe_oper.py @@ -23,6 +23,7 @@ class SubscribeOper(DbOper): tmdbid=mediainfo.tmdb_id, imdbid=mediainfo.imdb_id, tvdbid=mediainfo.tvdb_id, + doubanid=mediainfo.douban_id, poster=mediainfo.get_poster_image(), backdrop=mediainfo.get_backdrop_image(), vote=mediainfo.vote_average, diff --git a/app/modules/douban/__init__.py b/app/modules/douban/__init__.py index c7bb1ba9..42b3573b 100644 --- a/app/modules/douban/__init__.py +++ b/app/modules/douban/__init__.py @@ -9,6 +9,7 @@ from app.core.metainfo import MetaInfo from app.log import logger from app.modules import _ModuleBase from app.modules.douban.apiv2 import DoubanApi +from app.modules.douban.douban_cache import DoubanCache from app.modules.douban.scraper import DoubanScraper from app.schemas.types import MediaType from app.utils.common import retry @@ -18,10 +19,12 @@ from app.utils.system import SystemUtils class DoubanModule(_ModuleBase): doubanapi: DoubanApi = None scraper: DoubanScraper = None + cache: DoubanCache = None def init_module(self) -> None: self.doubanapi = DoubanApi() self.scraper = DoubanScraper() + self.cache = DoubanCache() def stop(self): pass @@ -29,10 +32,87 @@ class DoubanModule(_ModuleBase): def init_setting(self) -> Tuple[str, Union[str, bool]]: pass - def douban_info(self, doubanid: str) -> Optional[dict]: + def recognize_media(self, meta: MetaBase = None, + mtype: MediaType = None, + doubanid: str = None, + **kwargs) -> Optional[MediaInfo]: + """ + 识别媒体信息 + :param meta: 识别的元数据 + :param mtype: 识别的媒体类型,与doubanid配套 + :param doubanid: 豆瓣ID + :return: 识别的媒体信息,包括剧集信息 + """ + if settings.RECOGNIZE_SOURCE != "douban": + return None + + if not meta: + cache_info = {} + else: + if mtype: + meta.type = mtype + cache_info = self.cache.get(meta) + if not cache_info: + # 缓存没有或者强制不使用缓存 + if doubanid: + # 直接查询详情 + info = self.douban_info(doubanid=doubanid, mtype=mtype or meta.type) + elif meta: + if meta.begin_season: + logger.info(f"正在识别 {meta.name} 第{meta.begin_season}季 ...") + else: + logger.info(f"正在识别 {meta.name} ...") + # 匹配豆瓣信息 + match_info = self.match_doubaninfo(name=meta.name, + mtype=mtype or meta.type, + year=meta.year, + season=meta.begin_season) + if match_info: + # 匹配到豆瓣信息 + info = self.douban_info( + doubanid=match_info.get("id"), + mtype=mtype or meta.type + ) + else: + logger.info(f"{meta.name if meta else doubanid} 未匹配到豆瓣媒体信息") + return None + else: + logger.error("识别媒体信息时未提供元数据或豆瓣ID") + return None + # 保存到缓存 + if meta: + self.cache.update(meta, info) + else: + # 使用缓存信息 + if cache_info.get("title"): + logger.info(f"{meta.name} 使用豆瓣识别缓存:{cache_info.get('title')}") + info = self.douban_info(mtype=cache_info.get("type"), + doubanid=cache_info.get("id")) + else: + logger.info(f"{meta.name} 使用豆瓣识别缓存:无法识别") + info = None + + if info: + # 赋值TMDB信息并返回 + mediainfo = MediaInfo(douban_info=info) + if meta: + logger.info(f"{meta.name} 豆瓣识别结果:{mediainfo.type.value} " + f"{mediainfo.title_year} " + f"{mediainfo.douban_id}") + else: + logger.info(f"{doubanid} 豆瓣识别结果:{mediainfo.type.value} " + f"{mediainfo.title_year}") + return mediainfo + else: + logger.info(f"{meta.name if meta else doubanid} 未匹配到豆瓣媒体信息") + + return None + + def douban_info(self, doubanid: str, mtype: MediaType = None) -> Optional[dict]: """ 获取豆瓣信息 :param doubanid: 豆瓣ID + :param mtype: 媒体类型 :return: 豆瓣信息 """ """ @@ -300,22 +380,40 @@ class DoubanModule(_ModuleBase): "interest_cmt_earlier_tip_desc": "该短评的发布时间早于公开上映时间,作者可能通过其他渠道提前观看,请谨慎参考。其评分将不计入总评分。" } """ + + def __douban_tv(): + """ + 获取豆瓣剧集信息 + """ + info = self.doubanapi.tv_detail(doubanid) + if info: + celebrities = self.doubanapi.tv_celebrities(doubanid) + if celebrities: + info["directors"] = celebrities.get("directors") + info["actors"] = celebrities.get("actors") + return info + + def __douban_movie(): + """ + 获取豆瓣电影信息 + """ + info = self.doubanapi.movie_detail(doubanid) + if info: + celebrities = self.doubanapi.movie_celebrities(doubanid) + if celebrities: + info["directors"] = celebrities.get("directors") + info["actors"] = celebrities.get("actors") + return info + if not doubanid: return None logger.info(f"开始获取豆瓣信息:{doubanid} ...") - douban_info = self.doubanapi.movie_detail(doubanid) - if douban_info: - celebrities = self.doubanapi.movie_celebrities(doubanid) - if celebrities: - douban_info["directors"] = celebrities.get("directors") - douban_info["actors"] = celebrities.get("actors") + if mtype == MediaType.TV: + return __douban_tv() + elif mtype == MediaType.MOVIE: + return __douban_movie() else: - douban_info = self.doubanapi.tv_detail(doubanid) - celebrities = self.doubanapi.tv_celebrities(doubanid) - if douban_info and celebrities: - douban_info["directors"] = celebrities.get("directors") - douban_info["actors"] = celebrities.get("actors") - return douban_info + return __douban_movie() or __douban_tv() def douban_discover(self, mtype: MediaType, sort: str, tags: str, page: int = 1, count: int = 30) -> Optional[List[dict]]: @@ -407,12 +505,12 @@ class DoubanModule(_ModuleBase): @retry(Exception, 5, 3, 3, logger=logger) def match_doubaninfo(self, name: str, imdbid: str = None, - mtype: str = None, year: str = None, season: int = None) -> dict: + mtype: MediaType = None, year: str = None, season: int = None) -> dict: """ 搜索和匹配豆瓣信息 :param name: 名称 :param imdbid: IMDB ID - :param mtype: 类型 电影/电视剧 + :param mtype: 类型 :param year: 年份 :param season: 季号 """ @@ -441,7 +539,7 @@ class DoubanModule(_ModuleBase): type_name = item_obj.get("type_name") if type_name not in [MediaType.TV.value, MediaType.MOVIE.value]: continue - if mtype and mtype != type_name: + if mtype and mtype.value != type_name: continue if mtype == MediaType.TV and not season: season = 1 @@ -486,17 +584,20 @@ class DoubanModule(_ModuleBase): meta = MetaInfo(path.stem) if not meta.name: return - # 根据名称查询豆瓣数据 - doubaninfo = self.match_doubaninfo(name=mediainfo.title, - imdbid=mediainfo.imdb_id, - mtype=mediainfo.type.value, - year=mediainfo.year, - season=meta.begin_season) - if not doubaninfo: - logger.warn(f"未找到 {mediainfo.title} 的豆瓣信息") - return # 查询豆瓣详情 - doubaninfo = self.douban_info(doubaninfo.get("id")) + if not mediainfo.douban_id: + # 根据名称查询豆瓣数据 + doubaninfo = self.match_doubaninfo(name=mediainfo.title, + imdbid=mediainfo.imdb_id, + mtype=mediainfo.type, + year=mediainfo.year) + if not doubaninfo: + logger.warn(f"未找到 {mediainfo.title} 的豆瓣信息") + return + doubaninfo = self.douban_info(doubanid=doubaninfo.get("id"), mtype=mediainfo.type) + else: + doubaninfo = self.douban_info(doubanid=mediainfo.douban_id, + mtype=mediainfo.type) # 刮削路径 scrape_path = path / path.name self.scraper.gen_scraper_files(meta=meta, @@ -513,17 +614,21 @@ class DoubanModule(_ModuleBase): meta = MetaInfo(file.stem) if not meta.name: continue - # 根据名称查询豆瓣数据 - doubaninfo = self.match_doubaninfo(name=mediainfo.title, - imdbid=mediainfo.imdb_id, - mtype=mediainfo.type.value, - year=mediainfo.year, - season=meta.begin_season) - if not doubaninfo: - logger.warn(f"未找到 {mediainfo.title} 的豆瓣信息") - break - # 查询豆瓣详情 - doubaninfo = self.douban_info(doubaninfo.get("id")) + if not mediainfo.douban_id: + # 根据名称查询豆瓣数据 + doubaninfo = self.match_doubaninfo(name=mediainfo.title, + imdbid=mediainfo.imdb_id, + mtype=mediainfo.type, + year=mediainfo.year, + season=meta.begin_season) + if not doubaninfo: + logger.warn(f"未找到 {mediainfo.title} 的豆瓣信息") + break + # 查询豆瓣详情 + doubaninfo = self.douban_info(doubanid=doubaninfo.get("id"), mtype=mediainfo.type) + else: + doubaninfo = self.douban_info(doubanid=mediainfo.douban_id, + mtype=mediainfo.type) # 刮削 self.scraper.gen_scraper_files(meta=meta, mediainfo=MediaInfo(douban_info=doubaninfo), @@ -532,3 +637,10 @@ class DoubanModule(_ModuleBase): except Exception as e: logger.error(f"刮削文件 {file} 失败,原因:{str(e)}") logger.info(f"{path} 刮削完成") + + def clear_cache(self): + """ + 清除缓存 + """ + self.doubanapi.clear_cache() + self.cache.clear() diff --git a/app/modules/douban/apiv2.py b/app/modules/douban/apiv2.py index 3998ac1d..1c0b076a 100644 --- a/app/modules/douban/apiv2.py +++ b/app/modules/douban/apiv2.py @@ -427,6 +427,12 @@ class DoubanApi(metaclass=Singleton): return self.__invoke(self._urls["doulist_items"] % subject_id, start=start, count=count, _ts=ts) + def clear_cache(self): + """ + 清空LRU缓存 + """ + self.__invoke.cache_clear() + def __del__(self): if self._session: self._session.close() diff --git a/app/modules/douban/douban_cache.py b/app/modules/douban/douban_cache.py new file mode 100644 index 00000000..c595abc4 --- /dev/null +++ b/app/modules/douban/douban_cache.py @@ -0,0 +1,228 @@ +import pickle +import random +import time +from pathlib import Path +from threading import RLock +from typing import Optional + +from app.core.config import settings +from app.core.meta import MetaBase +from app.utils.singleton import Singleton +from app.schemas.types import MediaType + +lock = RLock() + +CACHE_EXPIRE_TIMESTAMP_STR = "cache_expire_timestamp" +EXPIRE_TIMESTAMP = settings.CACHE_CONF.get('meta') + + +class DoubanCache(metaclass=Singleton): + """ + 豆瓣缓存数据 + { + "id": '', + "title": '', + "year": '', + "type": MediaType + } + """ + _meta_data: dict = {} + # 缓存文件路径 + _meta_path: Path = None + # TMDB缓存过期 + _tmdb_cache_expire: bool = True + + def __init__(self): + self._meta_path = settings.TEMP_PATH / "__douban_cache__" + self._meta_data = self.__load(self._meta_path) + + def clear(self): + """ + 清空所有TMDB缓存 + """ + with lock: + self._meta_data = {} + + @staticmethod + def __get_key(meta: MetaBase) -> str: + """ + 获取缓存KEY + """ + return f"[{meta.type.value if meta.type else '未知'}]{meta.name}-{meta.year}-{meta.begin_season}" + + def get(self, meta: MetaBase): + """ + 根据KEY值获取缓存值 + """ + key = self.__get_key(meta) + with lock: + info: dict = self._meta_data.get(key) + if info: + expire = info.get(CACHE_EXPIRE_TIMESTAMP_STR) + if not expire or int(time.time()) < expire: + info[CACHE_EXPIRE_TIMESTAMP_STR] = int(time.time()) + EXPIRE_TIMESTAMP + self._meta_data[key] = info + elif expire and self._tmdb_cache_expire: + self.delete(key) + return info or {} + + def delete(self, key: str) -> dict: + """ + 删除缓存信息 + @param key: 缓存key + @return: 被删除的缓存内容 + """ + with lock: + return self._meta_data.pop(key, None) + + def delete_by_doubanid(self, doubanid: str) -> None: + """ + 清空对应豆瓣ID的所有缓存记录,以强制更新TMDB中最新的数据 + """ + for key in list(self._meta_data): + if self._meta_data.get(key, {}).get("id") == doubanid: + with lock: + self._meta_data.pop(key) + + def delete_unknown(self) -> None: + """ + 清除未识别的缓存记录,以便重新搜索TMDB + """ + for key in list(self._meta_data): + if self._meta_data.get(key, {}).get("id") == "0": + with lock: + self._meta_data.pop(key) + + def modify(self, key: str, title: str) -> dict: + """ + 删除缓存信息 + @param key: 缓存key + @param title: 标题 + @return: 被修改后缓存内容 + """ + with lock: + if self._meta_data.get(key): + self._meta_data[key]['title'] = title + self._meta_data[key][CACHE_EXPIRE_TIMESTAMP_STR] = int(time.time()) + EXPIRE_TIMESTAMP + return self._meta_data.get(key) + + @staticmethod + def __load(path: Path) -> dict: + """ + 从文件中加载缓存 + """ + try: + if path.exists(): + with open(path, 'rb') as f: + data = pickle.load(f) + return data + return {} + except Exception as e: + print(str(e)) + return {} + + def update(self, meta: MetaBase, info: dict) -> None: + """ + 新增或更新缓存条目 + """ + with lock: + if info: + # 缓存标题 + cache_title = info.get("title") \ + if info.get("media_type") == MediaType.MOVIE else info.get("name") + # 缓存年份 + cache_year = info.get('release_date') \ + if info.get("media_type") == MediaType.MOVIE else info.get('first_air_date') + if cache_year: + cache_year = cache_year[:4] + # 类型 + if isinstance(info.get('media_type'), MediaType): + mtype = info.get('media_type') + else: + mtype = MediaType.MOVIE if info.get("type") == "movie" else MediaType.TV + # 海报 + poster_path = info.get("pic", {}).get("large") + if not poster_path and info.get("cover_url"): + poster_path = info.get("cover_url") + if not poster_path and info.get("cover"): + poster_path = info.get("cover").get("url") + self._meta_data[self.__get_key(meta)] = { + "id": info.get("id"), + "type": mtype, + "year": cache_year, + "title": cache_title, + "poster_path": poster_path, + CACHE_EXPIRE_TIMESTAMP_STR: int(time.time()) + EXPIRE_TIMESTAMP + } + elif info is not None: + # None时不缓存,此时代表网络错误,允许重复请求 + self._meta_data[self.__get_key(meta)] = {'id': "0"} + + def save(self, force: bool = False) -> None: + """ + 保存缓存数据到文件 + """ + + meta_data = self.__load(self._meta_path) + new_meta_data = {k: v for k, v in self._meta_data.items() if v.get("id")} + + if not force \ + and not self._random_sample(new_meta_data) \ + and meta_data.keys() == new_meta_data.keys(): + return + + with open(self._meta_path, 'wb') as f: + pickle.dump(new_meta_data, f, pickle.HIGHEST_PROTOCOL) + + def _random_sample(self, new_meta_data: dict) -> bool: + """ + 采样分析是否需要保存 + """ + ret = False + if len(new_meta_data) < 25: + keys = list(new_meta_data.keys()) + for k in keys: + info = new_meta_data.get(k) + expire = info.get(CACHE_EXPIRE_TIMESTAMP_STR) + if not expire: + ret = True + info[CACHE_EXPIRE_TIMESTAMP_STR] = int(time.time()) + EXPIRE_TIMESTAMP + elif int(time.time()) >= expire: + ret = True + if self._tmdb_cache_expire: + new_meta_data.pop(k) + else: + count = 0 + keys = random.sample(sorted(new_meta_data.keys()), 25) + for k in keys: + info = new_meta_data.get(k) + expire = info.get(CACHE_EXPIRE_TIMESTAMP_STR) + if not expire: + ret = True + info[CACHE_EXPIRE_TIMESTAMP_STR] = int(time.time()) + EXPIRE_TIMESTAMP + elif int(time.time()) >= expire: + ret = True + if self._tmdb_cache_expire: + new_meta_data.pop(k) + count += 1 + if count >= 5: + ret |= self._random_sample(new_meta_data) + return ret + + def get_title(self, key: str) -> Optional[str]: + """ + 获取缓存的标题 + """ + cache_media_info = self._meta_data.get(key) + if not cache_media_info or not cache_media_info.get("id"): + return None + return cache_media_info.get("title") + + def set_title(self, key: str, cn_title: str) -> None: + """ + 重新设置缓存标题 + """ + cache_media_info = self._meta_data.get(key) + if not cache_media_info: + return + self._meta_data[key]['title'] = cn_title diff --git a/app/modules/fanart/__init__.py b/app/modules/fanart/__init__.py index ec914d65..7bacc7ed 100644 --- a/app/modules/fanart/__init__.py +++ b/app/modules/fanart/__init__.py @@ -326,17 +326,19 @@ class FanartModule(_ModuleBase): :param mediainfo: 识别的媒体信息 :return: 更新后的媒体信息 """ + if not mediainfo.tmdb_id and not mediainfo.tvdb_id: + return None if mediainfo.type == MediaType.MOVIE: result = self.__request_fanart(mediainfo.type, mediainfo.tmdb_id) else: if mediainfo.tvdb_id: result = self.__request_fanart(mediainfo.type, mediainfo.tvdb_id) else: - logger.info(f"{mediainfo.title_year} 没有tvdbid,无法获取Fanart图片") - return + logger.info(f"{mediainfo.title_year} 没有tvdbid,无法获取fanart图片") + return None if not result or result.get('status') == 'error': - logger.warn(f"没有获取到 {mediainfo.title_year} 的Fanart图片数据") - return + logger.warn(f"没有获取到 {mediainfo.title_year} 的fanart图片数据") + return None # 获取所有图片 for name, images in result.items(): if not images: diff --git a/app/modules/themoviedb/__init__.py b/app/modules/themoviedb/__init__.py index def52a95..b46b289d 100644 --- a/app/modules/themoviedb/__init__.py +++ b/app/modules/themoviedb/__init__.py @@ -43,7 +43,8 @@ class TheMovieDbModule(_ModuleBase): def recognize_media(self, meta: MetaBase = None, mtype: MediaType = None, - tmdbid: int = None) -> Optional[MediaInfo]: + tmdbid: int = None, + **kwargs) -> Optional[MediaInfo]: """ 识别媒体信息 :param meta: 识别的元数据 @@ -51,6 +52,9 @@ class TheMovieDbModule(_ModuleBase): :param tmdbid: tmdbid :return: 识别的媒体信息,包括剧集信息 """ + if settings.RECOGNIZE_SOURCE != "themoviedb": + return None + if not meta: cache_info = {} else: @@ -112,11 +116,11 @@ class TheMovieDbModule(_ModuleBase): else: # 使用缓存信息 if cache_info.get("title"): - logger.info(f"{meta.name} 使用识别缓存:{cache_info.get('title')}") + logger.info(f"{meta.name} 使用TMDB识别缓存:{cache_info.get('title')}") info = self.tmdb.get_info(mtype=cache_info.get("type"), tmdbid=cache_info.get("id")) else: - logger.info(f"{meta.name} 使用识别缓存:无法识别") + logger.info(f"{meta.name} 使用TMDB识别缓存:无法识别") info = None if info: @@ -129,11 +133,11 @@ class TheMovieDbModule(_ModuleBase): mediainfo = MediaInfo(tmdb_info=info) mediainfo.set_category(cat) if meta: - logger.info(f"{meta.name} 识别结果:{mediainfo.type.value} " + logger.info(f"{meta.name} TMDB识别结果:{mediainfo.type.value} " f"{mediainfo.title_year} " f"{mediainfo.tmdb_id}") else: - logger.info(f"{tmdbid} 识别结果:{mediainfo.type.value} " + logger.info(f"{tmdbid} TMDB识别结果:{mediainfo.type.value} " f"{mediainfo.title_year}") # 补充剧集年份 @@ -143,10 +147,31 @@ class TheMovieDbModule(_ModuleBase): mediainfo.season_years = episode_years return mediainfo else: - logger.info(f"{meta.name if meta else tmdbid} 未匹配到媒体信息") + logger.info(f"{meta.name if meta else tmdbid} 未匹配到TMDB媒体信息") return None + def match_doubaninfo(self, name: str, mtype: MediaType = None, + year: str = None, season: int = None) -> dict: + """ + 搜索和匹配TMDB信息 + :param name: 名称 + :param mtype: 类型 + :param year: 年份 + :param season: 季号 + """ + # 搜索 + logger.info(f"开始使用 名称:{name}、年份:{year} 匹配TMDB信息 ...") + info = self.tmdb.match(name=name, + year=year, + mtype=mtype, + season_year=year, + season_number=season) + if info and not info.get("genres"): + info = self.tmdb.get_info(mtype=info.get("media_type"), + tmdbid=info.get("id")) + return info + def tmdb_info(self, tmdbid: int, mtype: MediaType) -> Optional[dict]: """ 获取TMDB信息 diff --git a/tests/test_recognize.py b/tests/test_recognize.py index 8db3eb1b..1d7a9317 100644 --- a/tests/test_recognize.py +++ b/tests/test_recognize.py @@ -15,7 +15,7 @@ class RecognizeTest(TestCase): pass def test_recognize(self): - result = MediaChain().recognize_by_title(title="我和我的祖国 2019") - self.assertEqual(result.media_info.tmdb_id, 612845) - exists = DownloadChain().get_no_exists_info(MetaInfo("我和我的祖国 2019"), result.media_info) + media_info = MediaChain().recognize_by_title(title="我和我的祖国 2019") + self.assertEqual(media_info.tmdb_id, 612845) + exists = DownloadChain().get_no_exists_info(MetaInfo("我和我的祖国 2019"), media_info) self.assertTrue(exists[0])