fix anime match

This commit is contained in:
jxxghp 2024-04-09 13:20:28 +08:00
parent 1ed511034c
commit ac87c778f4
6 changed files with 101 additions and 73 deletions

View File

@ -21,7 +21,7 @@ async def search_latest(_: schemas.TokenPayload = Depends(verify_token)) -> Any:
return [torrent.to_dict() for torrent in torrents] return [torrent.to_dict() for torrent in torrents]
@router.get("/media/{mediaid}", summary="精确搜索资源", response_model=List[schemas.Context]) @router.get("/media/{mediaid}", summary="精确搜索资源", response_model=schemas.Response)
def search_by_id(mediaid: str, def search_by_id(mediaid: str,
mtype: str = None, mtype: str = None,
area: str = "title", area: str = "title",
@ -40,6 +40,8 @@ def search_by_id(mediaid: str,
if doubaninfo: if doubaninfo:
torrents = SearchChain().search_by_id(doubanid=doubaninfo.get("id"), torrents = SearchChain().search_by_id(doubanid=doubaninfo.get("id"),
mtype=mtype, area=area) mtype=mtype, area=area)
else:
return schemas.Response(success=False, message="未识别到豆瓣媒体信息")
else: else:
torrents = SearchChain().search_by_id(tmdbid=tmdbid, mtype=mtype, area=area) torrents = SearchChain().search_by_id(tmdbid=tmdbid, mtype=mtype, area=area)
elif mediaid.startswith("douban:"): elif mediaid.startswith("douban:"):
@ -50,6 +52,8 @@ def search_by_id(mediaid: str,
if tmdbinfo: if tmdbinfo:
torrents = SearchChain().search_by_id(tmdbid=tmdbinfo.get("id"), torrents = SearchChain().search_by_id(tmdbid=tmdbinfo.get("id"),
mtype=mtype, area=area) mtype=mtype, area=area)
else:
return schemas.Response(success=False, message="未识别到TMDB媒体信息")
else: else:
torrents = SearchChain().search_by_id(doubanid=doubanid, mtype=mtype, area=area) torrents = SearchChain().search_by_id(doubanid=doubanid, mtype=mtype, area=area)
elif mediaid.startswith("bangumi:"): elif mediaid.startswith("bangumi:"):
@ -60,15 +64,23 @@ def search_by_id(mediaid: str,
if tmdbinfo: if tmdbinfo:
torrents = SearchChain().search_by_id(tmdbid=tmdbinfo.get("id"), torrents = SearchChain().search_by_id(tmdbid=tmdbinfo.get("id"),
mtype=mtype, area=area) mtype=mtype, area=area)
else:
return schemas.Response(success=False, message="未识别到TMDB媒体信息")
else: else:
# 通过BangumiID识别豆瓣ID # 通过BangumiID识别豆瓣ID
doubaninfo = MediaChain().get_doubaninfo_by_bangumiid(bangumiid=bangumiid) doubaninfo = MediaChain().get_doubaninfo_by_bangumiid(bangumiid=bangumiid)
if doubaninfo: if doubaninfo:
torrents = SearchChain().search_by_id(doubanid=doubaninfo.get("id"), torrents = SearchChain().search_by_id(doubanid=doubaninfo.get("id"),
mtype=mtype, area=area) mtype=mtype, area=area)
else:
return schemas.Response(success=False, message="未识别到豆瓣媒体信息")
else: else:
return [] return schemas.Response(success=False, message="未知的媒体ID")
return [torrent.to_dict() for torrent in torrents]
if not torrents:
return schemas.Response(success=False, message="未搜索到任何资源")
else:
return schemas.Response(success=True, data=[torrent.to_dict() for torrent in torrents])
@router.get("/title", summary="模糊搜索资源", response_model=List[schemas.TorrentInfo]) @router.get("/title", summary="模糊搜索资源", response_model=List[schemas.TorrentInfo])

View File

@ -195,14 +195,11 @@ class MediaChain(ChainBase, metaclass=Singleton):
doubaninfo = self.douban_info(doubanid=doubanid, mtype=mtype) doubaninfo = self.douban_info(doubanid=doubanid, mtype=mtype)
if doubaninfo: if doubaninfo:
# 优先使用原标题匹配 # 优先使用原标题匹配
season_meta = None
if doubaninfo.get("original_title"): if doubaninfo.get("original_title"):
meta = MetaInfo(title=doubaninfo.get("original_title"))
season_meta = MetaInfo(title=doubaninfo.get("title"))
# 合并季
meta.begin_season = season_meta.begin_season
else:
meta = MetaInfo(title=doubaninfo.get("title")) meta = MetaInfo(title=doubaninfo.get("title"))
meta_org = MetaInfo(title=doubaninfo.get("original_title"))
else:
meta_org = meta = MetaInfo(title=doubaninfo.get("title"))
# 年份 # 年份
if doubaninfo.get("year"): if doubaninfo.get("year"):
meta.year = doubaninfo.get("year") meta.year = doubaninfo.get("year")
@ -211,22 +208,19 @@ class MediaChain(ChainBase, metaclass=Singleton):
meta.type = doubaninfo.get('media_type') meta.type = doubaninfo.get('media_type')
else: else:
meta.type = MediaType.MOVIE if doubaninfo.get("type") == "movie" else MediaType.TV meta.type = MediaType.MOVIE if doubaninfo.get("type") == "movie" else MediaType.TV
# 使用原标题识别TMDB媒体信息 # 匹配TMDB信息
tmdbinfo = self.match_tmdbinfo( meta_names = list(dict.fromkeys([k for k in [meta_org.name,
name=meta.name, meta.cn_name,
year=meta.year, meta.en_name] if k]))
mtype=mtype or meta.type, for name in meta_names:
season=meta.begin_season tmdbinfo = self.match_tmdbinfo(
) name=name,
if not tmdbinfo: year=meta.year,
if season_meta and season_meta.name != meta.name: mtype=mtype or meta.type,
# 使用主标题识别媒体信息 season=meta.begin_season
tmdbinfo = self.match_tmdbinfo( )
name=season_meta.name, if tmdbinfo:
year=meta.year, break
mtype=mtype or meta.type,
season=meta.begin_season
)
return tmdbinfo return tmdbinfo
def get_tmdbinfo_by_bangumiid(self, bangumiid: int) -> Optional[dict]: def get_tmdbinfo_by_bangumiid(self, bangumiid: int) -> Optional[dict]:
@ -236,23 +230,29 @@ class MediaChain(ChainBase, metaclass=Singleton):
bangumiinfo = self.bangumi_info(bangumiid=bangumiid) bangumiinfo = self.bangumi_info(bangumiid=bangumiid)
if bangumiinfo: if bangumiinfo:
# 优先使用原标题匹配 # 优先使用原标题匹配
if bangumiinfo.get("name"): if bangumiinfo.get("name_cn"):
meta = MetaInfo(title=bangumiinfo.get("name")) meta = MetaInfo(title=bangumiinfo.get("name"))
meta_cn = MetaInfo(title=bangumiinfo.get("name_cn"))
else: else:
meta = MetaInfo(title=bangumiinfo.get("name_cn")) meta_cn = meta = MetaInfo(title=bangumiinfo.get("name"))
# 年份 # 年份
release_date = bangumiinfo.get("date") or bangumiinfo.get("air_date") release_date = bangumiinfo.get("date") or bangumiinfo.get("air_date")
if release_date: if release_date:
year = release_date[:4] year = release_date[:4]
else: else:
year = None year = None
# 使用名称识别TMDB媒体信息 # 识别TMDB媒体信息
return self.match_tmdbinfo( meta_names = list(dict.fromkeys([k for k in [meta_cn.name,
name=meta.name, meta.name] if k]))
year=year, for name in meta_names:
mtype=MediaType.TV, tmdbinfo = self.match_tmdbinfo(
season=meta.begin_season name=name,
) year=year,
mtype=MediaType.TV,
season=meta.begin_season
)
if tmdbinfo:
return tmdbinfo
return None return None
def get_doubaninfo_by_tmdbid(self, tmdbid: int, def get_doubaninfo_by_tmdbid(self, tmdbid: int,

View File

@ -201,39 +201,39 @@ class SearchChain(ChainBase):
} - {""} } - {""}
# 媒体标题、原标题 # 媒体标题、原标题
media_titles = { media_titles = {
StringUtils.clear_upper(mediainfo.title), StringUtils.clear_upper(mediainfo.title),
StringUtils.clear_upper(mediainfo.original_title) StringUtils.clear_upper(mediainfo.original_title)
} - {None} } - {""}
# 译名、别名
media_names = {StringUtils.clear_upper(name) for name in mediainfo.names}
# 比对标题和原语种标题 # 比对标题和原语种标题
if meta_names.intersection(media_titles): if meta_names.intersection(media_titles):
logger.info(f'{mediainfo.title} 通过标题匹配到资源:{torrent.site_name} - {torrent.title}') logger.info(f'{mediainfo.title} 通过标题匹配到资源:{torrent.site_name} - {torrent.title}')
_match_torrents.append(torrent) _match_torrents.append(torrent)
continue continue
# 在标题中判断是否存在标题与原语种标题 # 比对别名和译名
media_names = {StringUtils.clear_upper(name) for name in mediainfo.names if name}
if media_names:
if meta_names.intersection(media_names):
logger.info(f'{mediainfo.title} 通过别名或译名匹配到资源:{torrent.site_name} - {torrent.title}')
_match_torrents.append(torrent)
continue
# 标题拆分
titles = [StringUtils.clear_upper(t) for t in re.split(r'[\s/【】.\[\]\-]+', titles = [StringUtils.clear_upper(t) for t in re.split(r'[\s/【】.\[\]\-]+',
torrent.title)] torrent.title) if t]
if meta_names.intersection(titles): # 在标题中判断是否存在标题、原语种标题、别名、译名
if meta_names.intersection(titles) or media_names.intersection(titles):
logger.info(f'{mediainfo.title} 通过标题匹配到资源:{torrent.site_name} - {torrent.title}' logger.info(f'{mediainfo.title} 通过标题匹配到资源:{torrent.site_name} - {torrent.title}'
f'标题:{torrent.title}') f'标题:{torrent.title}')
_match_torrents.append(torrent) _match_torrents.append(torrent)
continue continue
# 在副标题中判断是否存在标题与原语种标题 # 在副标题中判断是否存在标题、原语种标题、别名、译名
if torrent.description: if torrent.description:
subtitles = {StringUtils.clear_upper(t) for t in re.split(r'[\s/|]+', subtitles = {StringUtils.clear_upper(t) for t in re.split(r'[\s/|]+',
torrent.description)} torrent.description) if t}
if meta_names.intersection(subtitles): if meta_names.intersection(subtitles) or media_names.intersection(subtitles):
logger.info(f'{mediainfo.title} 通过副标题匹配到资源:{torrent.site_name} - {torrent.title}' logger.info(f'{mediainfo.title} 通过副标题匹配到资源:{torrent.site_name} - {torrent.title}'
f'副标题:{torrent.description}') f'副标题:{torrent.description}')
_match_torrents.append(torrent) _match_torrents.append(torrent)
continue continue
# 比对别名和译名
if media_names:
if meta_names.intersection(media_names):
logger.info(f'{mediainfo.title} 通过别名或译名匹配到资源:{torrent.site_name} - {torrent.title}')
_match_torrents.append(torrent)
continue
# 未匹配 # 未匹配
logger.warn(f'{torrent.site_name} - {torrent.title} 标题不匹配') logger.warn(f'{torrent.site_name} - {torrent.title} 标题不匹配')
# 匹配完成 # 匹配完成

View File

@ -32,8 +32,6 @@ class MetaAnime(MetaBase):
if anitopy_info: if anitopy_info:
# 名称 # 名称
name = anitopy_info.get("anime_title") name = anitopy_info.get("anime_title")
if name and name.find("/") != -1:
name = name.split("/")[-1].strip()
if not name or name in self._anime_no_words or (len(name) < 5 and not StringUtils.is_chinese(name)): if not name or name in self._anime_no_words or (len(name) < 5 and not StringUtils.is_chinese(name)):
anitopy_info = anitopy.parse("[ANIME]" + title) anitopy_info = anitopy.parse("[ANIME]" + title)
if anitopy_info: if anitopy_info:
@ -44,23 +42,41 @@ class MetaAnime(MetaBase):
name = name_match.group(1).strip() name = name_match.group(1).strip()
# 拆份中英文名称 # 拆份中英文名称
if name: if name:
lastword_type = "" _split_flag = True
for word in name.split(): # 按/拆分中英文
if not word: if name.find("/") != -1:
continue names = name.split("/")
if word.endswith(']'): if StringUtils.is_chinese(names[0]):
word = word[:-1] self.cn_name = names[0]
if word.isdigit(): if len(names) > 1:
if lastword_type == "cn": self.en_name = names[1]
self.cn_name = "%s %s" % (self.cn_name or "", word) _split_flag = False
elif lastword_type == "en": elif StringUtils.is_chinese(names[-1]):
self.en_name = "%s %s" % (self.en_name or "", word) self.cn_name = names[-1]
elif StringUtils.is_chinese(word): if len(names) > 1:
self.cn_name = "%s %s" % (self.cn_name or "", word) self.en_name = names[0]
lastword_type = "cn" _split_flag = False
else: else:
self.en_name = "%s %s" % (self.en_name or "", word) name = names[-1]
lastword_type = "en" # 拆分中英文
if _split_flag:
lastword_type = ""
for word in name.split():
if not word:
continue
if word.endswith(']'):
word = word[:-1]
if word.isdigit():
if lastword_type == "cn":
self.cn_name = "%s %s" % (self.cn_name or "", word)
elif lastword_type == "en":
self.en_name = "%s %s" % (self.en_name or "", word)
elif StringUtils.is_chinese(word):
self.cn_name = "%s %s" % (self.cn_name or "", word)
lastword_type = "cn"
else:
self.en_name = "%s %s" % (self.en_name or "", word)
lastword_type = "en"
if self.cn_name: if self.cn_name:
_, self.cn_name, _, _, _, _ = StringUtils.get_keyword(self.cn_name) _, self.cn_name, _, _, _, _ = StringUtils.get_keyword(self.cn_name)
if self.cn_name: if self.cn_name:

View File

@ -86,8 +86,8 @@ class DoubanModule(_ModuleBase):
info = self.douban_info(doubanid=doubanid, mtype=mtype or meta.type) info = self.douban_info(doubanid=doubanid, mtype=mtype or meta.type)
elif meta: elif meta:
info = {} info = {}
# 使用中英文名分别识别 # 使用中英文名分别识别,去重去空,但要保持顺序
names = {meta.cn_name, meta.en_name} - {None} names = list(dict.fromkeys([k for k in [meta.cn_name, meta.en_name] if k]))
for name in names: for name in names:
if meta.begin_season: if meta.begin_season:
logger.info(f"正在识别 {name}{meta.begin_season}季 ...") logger.info(f"正在识别 {name}{meta.begin_season}季 ...")

View File

@ -96,8 +96,8 @@ class TheMovieDbModule(_ModuleBase):
info = self.tmdb.get_info(mtype=mtype, tmdbid=tmdbid) info = self.tmdb.get_info(mtype=mtype, tmdbid=tmdbid)
elif meta: elif meta:
info = {} info = {}
# 使用中英文名分别识别 # 使用中英文名分别识别,去重去空,但要保持顺序
names = {meta.cn_name, meta.en_name} - {None} names = list(dict.fromkeys([k for k in [meta.cn_name, meta.en_name] if k]))
for name in names: for name in names:
if meta.begin_season: if meta.begin_season:
logger.info(f"正在识别 {name}{meta.begin_season}季 ...") logger.info(f"正在识别 {name}{meta.begin_season}季 ...")