fix anime match

This commit is contained in:
jxxghp 2024-04-09 13:20:28 +08:00
parent 1ed511034c
commit ac87c778f4
6 changed files with 101 additions and 73 deletions

View File

@ -21,7 +21,7 @@ async def search_latest(_: schemas.TokenPayload = Depends(verify_token)) -> Any:
return [torrent.to_dict() for torrent in torrents]
@router.get("/media/{mediaid}", summary="精确搜索资源", response_model=List[schemas.Context])
@router.get("/media/{mediaid}", summary="精确搜索资源", response_model=schemas.Response)
def search_by_id(mediaid: str,
mtype: str = None,
area: str = "title",
@ -40,6 +40,8 @@ def search_by_id(mediaid: str,
if doubaninfo:
torrents = SearchChain().search_by_id(doubanid=doubaninfo.get("id"),
mtype=mtype, area=area)
else:
return schemas.Response(success=False, message="未识别到豆瓣媒体信息")
else:
torrents = SearchChain().search_by_id(tmdbid=tmdbid, mtype=mtype, area=area)
elif mediaid.startswith("douban:"):
@ -50,6 +52,8 @@ def search_by_id(mediaid: str,
if tmdbinfo:
torrents = SearchChain().search_by_id(tmdbid=tmdbinfo.get("id"),
mtype=mtype, area=area)
else:
return schemas.Response(success=False, message="未识别到TMDB媒体信息")
else:
torrents = SearchChain().search_by_id(doubanid=doubanid, mtype=mtype, area=area)
elif mediaid.startswith("bangumi:"):
@ -60,15 +64,23 @@ def search_by_id(mediaid: str,
if tmdbinfo:
torrents = SearchChain().search_by_id(tmdbid=tmdbinfo.get("id"),
mtype=mtype, area=area)
else:
return schemas.Response(success=False, message="未识别到TMDB媒体信息")
else:
# 通过BangumiID识别豆瓣ID
doubaninfo = MediaChain().get_doubaninfo_by_bangumiid(bangumiid=bangumiid)
if doubaninfo:
torrents = SearchChain().search_by_id(doubanid=doubaninfo.get("id"),
mtype=mtype, area=area)
else:
return schemas.Response(success=False, message="未识别到豆瓣媒体信息")
else:
return []
return [torrent.to_dict() for torrent in torrents]
return schemas.Response(success=False, message="未知的媒体ID")
if not torrents:
return schemas.Response(success=False, message="未搜索到任何资源")
else:
return schemas.Response(success=True, data=[torrent.to_dict() for torrent in torrents])
@router.get("/title", summary="模糊搜索资源", response_model=List[schemas.TorrentInfo])

View File

@ -195,14 +195,11 @@ class MediaChain(ChainBase, metaclass=Singleton):
doubaninfo = self.douban_info(doubanid=doubanid, mtype=mtype)
if doubaninfo:
# 优先使用原标题匹配
season_meta = None
if doubaninfo.get("original_title"):
meta = MetaInfo(title=doubaninfo.get("original_title"))
season_meta = MetaInfo(title=doubaninfo.get("title"))
# 合并季
meta.begin_season = season_meta.begin_season
else:
meta = MetaInfo(title=doubaninfo.get("title"))
meta_org = MetaInfo(title=doubaninfo.get("original_title"))
else:
meta_org = meta = MetaInfo(title=doubaninfo.get("title"))
# 年份
if doubaninfo.get("year"):
meta.year = doubaninfo.get("year")
@ -211,22 +208,19 @@ class MediaChain(ChainBase, metaclass=Singleton):
meta.type = doubaninfo.get('media_type')
else:
meta.type = MediaType.MOVIE if doubaninfo.get("type") == "movie" else MediaType.TV
# 使用原标题识别TMDB媒体信息
tmdbinfo = self.match_tmdbinfo(
name=meta.name,
year=meta.year,
mtype=mtype or meta.type,
season=meta.begin_season
)
if not tmdbinfo:
if season_meta and season_meta.name != meta.name:
# 使用主标题识别媒体信息
tmdbinfo = self.match_tmdbinfo(
name=season_meta.name,
year=meta.year,
mtype=mtype or meta.type,
season=meta.begin_season
)
# 匹配TMDB信息
meta_names = list(dict.fromkeys([k for k in [meta_org.name,
meta.cn_name,
meta.en_name] if k]))
for name in meta_names:
tmdbinfo = self.match_tmdbinfo(
name=name,
year=meta.year,
mtype=mtype or meta.type,
season=meta.begin_season
)
if tmdbinfo:
break
return tmdbinfo
def get_tmdbinfo_by_bangumiid(self, bangumiid: int) -> Optional[dict]:
@ -236,23 +230,29 @@ class MediaChain(ChainBase, metaclass=Singleton):
bangumiinfo = self.bangumi_info(bangumiid=bangumiid)
if bangumiinfo:
# 优先使用原标题匹配
if bangumiinfo.get("name"):
if bangumiinfo.get("name_cn"):
meta = MetaInfo(title=bangumiinfo.get("name"))
meta_cn = MetaInfo(title=bangumiinfo.get("name_cn"))
else:
meta = MetaInfo(title=bangumiinfo.get("name_cn"))
meta_cn = meta = MetaInfo(title=bangumiinfo.get("name"))
# 年份
release_date = bangumiinfo.get("date") or bangumiinfo.get("air_date")
if release_date:
year = release_date[:4]
else:
year = None
# 使用名称识别TMDB媒体信息
return self.match_tmdbinfo(
name=meta.name,
year=year,
mtype=MediaType.TV,
season=meta.begin_season
)
# 识别TMDB媒体信息
meta_names = list(dict.fromkeys([k for k in [meta_cn.name,
meta.name] if k]))
for name in meta_names:
tmdbinfo = self.match_tmdbinfo(
name=name,
year=year,
mtype=MediaType.TV,
season=meta.begin_season
)
if tmdbinfo:
return tmdbinfo
return None
def get_doubaninfo_by_tmdbid(self, tmdbid: int,

View File

@ -201,39 +201,39 @@ class SearchChain(ChainBase):
} - {""}
# 媒体标题、原标题
media_titles = {
StringUtils.clear_upper(mediainfo.title),
StringUtils.clear_upper(mediainfo.original_title)
} - {None}
# 译名、别名
media_names = {StringUtils.clear_upper(name) for name in mediainfo.names}
StringUtils.clear_upper(mediainfo.title),
StringUtils.clear_upper(mediainfo.original_title)
} - {""}
# 比对标题和原语种标题
if meta_names.intersection(media_titles):
logger.info(f'{mediainfo.title} 通过标题匹配到资源:{torrent.site_name} - {torrent.title}')
_match_torrents.append(torrent)
continue
# 在标题中判断是否存在标题与原语种标题
# 比对别名和译名
media_names = {StringUtils.clear_upper(name) for name in mediainfo.names if name}
if media_names:
if meta_names.intersection(media_names):
logger.info(f'{mediainfo.title} 通过别名或译名匹配到资源:{torrent.site_name} - {torrent.title}')
_match_torrents.append(torrent)
continue
# 标题拆分
titles = [StringUtils.clear_upper(t) for t in re.split(r'[\s/【】.\[\]\-]+',
torrent.title)]
if meta_names.intersection(titles):
torrent.title) if t]
# 在标题中判断是否存在标题、原语种标题、别名、译名
if meta_names.intersection(titles) or media_names.intersection(titles):
logger.info(f'{mediainfo.title} 通过标题匹配到资源:{torrent.site_name} - {torrent.title}'
f'标题:{torrent.title}')
_match_torrents.append(torrent)
continue
# 在副标题中判断是否存在标题与原语种标题
# 在副标题中判断是否存在标题、原语种标题、别名、译名
if torrent.description:
subtitles = {StringUtils.clear_upper(t) for t in re.split(r'[\s/|]+',
torrent.description)}
if meta_names.intersection(subtitles):
torrent.description) if t}
if meta_names.intersection(subtitles) or media_names.intersection(subtitles):
logger.info(f'{mediainfo.title} 通过副标题匹配到资源:{torrent.site_name} - {torrent.title}'
f'副标题:{torrent.description}')
_match_torrents.append(torrent)
continue
# 比对别名和译名
if media_names:
if meta_names.intersection(media_names):
logger.info(f'{mediainfo.title} 通过别名或译名匹配到资源:{torrent.site_name} - {torrent.title}')
_match_torrents.append(torrent)
continue
# 未匹配
logger.warn(f'{torrent.site_name} - {torrent.title} 标题不匹配')
# 匹配完成

View File

@ -32,8 +32,6 @@ class MetaAnime(MetaBase):
if anitopy_info:
# 名称
name = anitopy_info.get("anime_title")
if name and name.find("/") != -1:
name = name.split("/")[-1].strip()
if not name or name in self._anime_no_words or (len(name) < 5 and not StringUtils.is_chinese(name)):
anitopy_info = anitopy.parse("[ANIME]" + title)
if anitopy_info:
@ -44,23 +42,41 @@ class MetaAnime(MetaBase):
name = name_match.group(1).strip()
# 拆份中英文名称
if name:
lastword_type = ""
for word in name.split():
if not word:
continue
if word.endswith(']'):
word = word[:-1]
if word.isdigit():
if lastword_type == "cn":
self.cn_name = "%s %s" % (self.cn_name or "", word)
elif lastword_type == "en":
self.en_name = "%s %s" % (self.en_name or "", word)
elif StringUtils.is_chinese(word):
self.cn_name = "%s %s" % (self.cn_name or "", word)
lastword_type = "cn"
_split_flag = True
# 按/拆分中英文
if name.find("/") != -1:
names = name.split("/")
if StringUtils.is_chinese(names[0]):
self.cn_name = names[0]
if len(names) > 1:
self.en_name = names[1]
_split_flag = False
elif StringUtils.is_chinese(names[-1]):
self.cn_name = names[-1]
if len(names) > 1:
self.en_name = names[0]
_split_flag = False
else:
self.en_name = "%s %s" % (self.en_name or "", word)
lastword_type = "en"
name = names[-1]
# 拆分中英文
if _split_flag:
lastword_type = ""
for word in name.split():
if not word:
continue
if word.endswith(']'):
word = word[:-1]
if word.isdigit():
if lastword_type == "cn":
self.cn_name = "%s %s" % (self.cn_name or "", word)
elif lastword_type == "en":
self.en_name = "%s %s" % (self.en_name or "", word)
elif StringUtils.is_chinese(word):
self.cn_name = "%s %s" % (self.cn_name or "", word)
lastword_type = "cn"
else:
self.en_name = "%s %s" % (self.en_name or "", word)
lastword_type = "en"
if self.cn_name:
_, self.cn_name, _, _, _, _ = StringUtils.get_keyword(self.cn_name)
if self.cn_name:

View File

@ -86,8 +86,8 @@ class DoubanModule(_ModuleBase):
info = self.douban_info(doubanid=doubanid, mtype=mtype or meta.type)
elif meta:
info = {}
# 使用中英文名分别识别
names = {meta.cn_name, meta.en_name} - {None}
# 使用中英文名分别识别,去重去空,但要保持顺序
names = list(dict.fromkeys([k for k in [meta.cn_name, meta.en_name] if k]))
for name in names:
if meta.begin_season:
logger.info(f"正在识别 {name}{meta.begin_season}季 ...")

View File

@ -96,8 +96,8 @@ class TheMovieDbModule(_ModuleBase):
info = self.tmdb.get_info(mtype=mtype, tmdbid=tmdbid)
elif meta:
info = {}
# 使用中英文名分别识别
names = {meta.cn_name, meta.en_name} - {None}
# 使用中英文名分别识别,去重去空,但要保持顺序
names = list(dict.fromkeys([k for k in [meta.cn_name, meta.en_name] if k]))
for name in names:
if meta.begin_season:
logger.info(f"正在识别 {name}{meta.begin_season}季 ...")