init
This commit is contained in:
3
app/core/meta/__init__.py
Normal file
3
app/core/meta/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from .metabase import MetaBase
|
||||
from .metavideo import MetaVideo
|
||||
from .metaanime import MetaAnime
|
BIN
app/core/meta/__pycache__/__init__.cpython-310.pyc
Normal file
BIN
app/core/meta/__pycache__/__init__.cpython-310.pyc
Normal file
Binary file not shown.
BIN
app/core/meta/__pycache__/metaanime.cpython-310.pyc
Normal file
BIN
app/core/meta/__pycache__/metaanime.cpython-310.pyc
Normal file
Binary file not shown.
BIN
app/core/meta/__pycache__/metabase.cpython-310.pyc
Normal file
BIN
app/core/meta/__pycache__/metabase.cpython-310.pyc
Normal file
Binary file not shown.
BIN
app/core/meta/__pycache__/metavideo.cpython-310.pyc
Normal file
BIN
app/core/meta/__pycache__/metavideo.cpython-310.pyc
Normal file
Binary file not shown.
BIN
app/core/meta/__pycache__/release_groups.cpython-310.pyc
Normal file
BIN
app/core/meta/__pycache__/release_groups.cpython-310.pyc
Normal file
Binary file not shown.
218
app/core/meta/metaanime.py
Normal file
218
app/core/meta/metaanime.py
Normal file
@@ -0,0 +1,218 @@
|
||||
import re
|
||||
import zhconv
|
||||
import anitopy
|
||||
from app.core.meta.metabase import MetaBase
|
||||
from app.core.meta.release_groups import ReleaseGroupsMatcher
|
||||
from app.utils.string import StringUtils
|
||||
from app.utils.types import MediaType
|
||||
|
||||
|
||||
class MetaAnime(MetaBase):
|
||||
"""
|
||||
识别动漫
|
||||
"""
|
||||
_anime_no_words = ['CHS&CHT', 'MP4', 'GB MP4', 'WEB-DL']
|
||||
_name_nostring_re = r"S\d{2}\s*-\s*S\d{2}|S\d{2}|\s+S\d{1,2}|EP?\d{2,4}\s*-\s*EP?\d{2,4}|EP?\d{2,4}|\s+EP?\d{1,4}"
|
||||
|
||||
def __init__(self, title: str, subtitle: str = None, isfile: bool = False):
|
||||
super().__init__(title, subtitle, isfile)
|
||||
if not title:
|
||||
return
|
||||
# 调用第三方模块识别动漫
|
||||
try:
|
||||
original_title = title
|
||||
# 字幕组信息会被预处理掉
|
||||
anitopy_info_origin = anitopy.parse(title)
|
||||
title = self.__prepare_title(title)
|
||||
anitopy_info = anitopy.parse(title)
|
||||
if anitopy_info:
|
||||
# 名称
|
||||
name = anitopy_info.get("anime_title")
|
||||
if name and name.find("/") != -1:
|
||||
name = name.split("/")[-1].strip()
|
||||
if not name or name in self._anime_no_words or (len(name) < 5 and not StringUtils.is_chinese(name)):
|
||||
anitopy_info = anitopy.parse("[ANIME]" + title)
|
||||
if anitopy_info:
|
||||
name = anitopy_info.get("anime_title")
|
||||
if not name or name in self._anime_no_words or (len(name) < 5 and not StringUtils.is_chinese(name)):
|
||||
name_match = re.search(r'\[(.+?)]', title)
|
||||
if name_match and name_match.group(1):
|
||||
name = name_match.group(1).strip()
|
||||
# 拆份中英文名称
|
||||
if name:
|
||||
lastword_type = ""
|
||||
for word in name.split():
|
||||
if not word:
|
||||
continue
|
||||
if word.endswith(']'):
|
||||
word = word[:-1]
|
||||
if word.isdigit():
|
||||
if lastword_type == "cn":
|
||||
self.cn_name = "%s %s" % (self.cn_name or "", word)
|
||||
elif lastword_type == "en":
|
||||
self.en_name = "%s %s" % (self.en_name or "", word)
|
||||
elif StringUtils.is_chinese(word):
|
||||
self.cn_name = "%s %s" % (self.cn_name or "", word)
|
||||
lastword_type = "cn"
|
||||
else:
|
||||
self.en_name = "%s %s" % (self.en_name or "", word)
|
||||
lastword_type = "en"
|
||||
if self.cn_name:
|
||||
_, self.cn_name, _, _, _, _ = StringUtils.get_keyword(self.cn_name)
|
||||
if self.cn_name:
|
||||
self.cn_name = re.sub(r'%s' % self._name_nostring_re, '', self.cn_name, flags=re.IGNORECASE).strip()
|
||||
self.cn_name = zhconv.convert(self.cn_name, "zh-hans")
|
||||
if self.en_name:
|
||||
self.en_name = re.sub(r'%s' % self._name_nostring_re, '', self.en_name, flags=re.IGNORECASE).strip().title()
|
||||
self._name = StringUtils.str_title(self.en_name)
|
||||
# 年份
|
||||
year = anitopy_info.get("anime_year")
|
||||
if str(year).isdigit():
|
||||
self.year = str(year)
|
||||
# 季号
|
||||
anime_season = anitopy_info.get("anime_season")
|
||||
if isinstance(anime_season, list):
|
||||
if len(anime_season) == 1:
|
||||
begin_season = anime_season[0]
|
||||
end_season = None
|
||||
else:
|
||||
begin_season = anime_season[0]
|
||||
end_season = anime_season[-1]
|
||||
elif anime_season:
|
||||
begin_season = anime_season
|
||||
end_season = None
|
||||
else:
|
||||
begin_season = None
|
||||
end_season = None
|
||||
if begin_season:
|
||||
self.begin_season = int(begin_season)
|
||||
if end_season and int(end_season) != self.begin_season:
|
||||
self.end_season = int(end_season)
|
||||
self.total_seasons = (self.end_season - self.begin_season) + 1
|
||||
else:
|
||||
self.total_seasons = 1
|
||||
self.type = MediaType.TV
|
||||
# 集号
|
||||
episode_number = anitopy_info.get("episode_number")
|
||||
if isinstance(episode_number, list):
|
||||
if len(episode_number) == 1:
|
||||
begin_episode = episode_number[0]
|
||||
end_episode = None
|
||||
else:
|
||||
begin_episode = episode_number[0]
|
||||
end_episode = episode_number[-1]
|
||||
elif episode_number:
|
||||
begin_episode = episode_number
|
||||
end_episode = None
|
||||
else:
|
||||
begin_episode = None
|
||||
end_episode = None
|
||||
if begin_episode:
|
||||
try:
|
||||
self.begin_episode = int(begin_episode)
|
||||
if end_episode and int(end_episode) != self.begin_episode:
|
||||
self.end_episode = int(end_episode)
|
||||
self.total_episodes = (self.end_episode - self.begin_episode) + 1
|
||||
else:
|
||||
self.total_episodes = 1
|
||||
except Exception as err:
|
||||
print(str(err))
|
||||
self.begin_episode = None
|
||||
self.end_episode = None
|
||||
self.type = MediaType.TV
|
||||
# 类型
|
||||
if not self.type:
|
||||
anime_type = anitopy_info.get('anime_type')
|
||||
if isinstance(anime_type, list):
|
||||
anime_type = anime_type[0]
|
||||
if anime_type and anime_type.upper() == "TV":
|
||||
self.type = MediaType.TV
|
||||
else:
|
||||
self.type = MediaType.MOVIE
|
||||
# 分辨率
|
||||
self.resource_pix = anitopy_info.get("video_resolution")
|
||||
if isinstance(self.resource_pix, list):
|
||||
self.resource_pix = self.resource_pix[0]
|
||||
if self.resource_pix:
|
||||
if re.search(r'x', self.resource_pix, re.IGNORECASE):
|
||||
self.resource_pix = re.split(r'[Xx]', self.resource_pix)[-1] + "p"
|
||||
else:
|
||||
self.resource_pix = self.resource_pix.lower()
|
||||
if str(self.resource_pix).isdigit():
|
||||
self.resource_pix = str(self.resource_pix) + "p"
|
||||
# 制作组/字幕组
|
||||
self.resource_team = \
|
||||
ReleaseGroupsMatcher().match(title=original_title) or \
|
||||
anitopy_info_origin.get("release_group") or None
|
||||
# 视频编码
|
||||
self.video_encode = anitopy_info.get("video_term")
|
||||
if isinstance(self.video_encode, list):
|
||||
self.video_encode = self.video_encode[0]
|
||||
# 音频编码
|
||||
self.audio_encode = anitopy_info.get("audio_term")
|
||||
if isinstance(self.audio_encode, list):
|
||||
self.audio_encode = self.audio_encode[0]
|
||||
# 解析副标题,只要季和集
|
||||
self.init_subtitle(self.org_string)
|
||||
if not self._subtitle_flag and self.subtitle:
|
||||
self.init_subtitle(self.subtitle)
|
||||
if not self.type:
|
||||
self.type = MediaType.TV
|
||||
except Exception as e:
|
||||
print(str(e))
|
||||
|
||||
@staticmethod
|
||||
def __prepare_title(title: str):
|
||||
"""
|
||||
对命名进行预处理
|
||||
"""
|
||||
if not title:
|
||||
return title
|
||||
# 所有【】换成[]
|
||||
title = title.replace("【", "[").replace("】", "]").strip()
|
||||
# 截掉xx番剧漫
|
||||
match = re.search(r"新番|月?番|[日美国][漫剧]", title)
|
||||
if match and match.span()[1] < len(title) - 1:
|
||||
title = re.sub(".*番.|.*[日美国][漫剧].", "", title)
|
||||
elif match:
|
||||
title = title[:title.rfind('[')]
|
||||
# 截掉分类
|
||||
first_item = title.split(']')[0]
|
||||
if first_item and re.search(r"[动漫画纪录片电影视连续剧集日美韩中港台海外亚洲华语大陆综艺原盘高清]{2,}|TV|Animation|Movie|Documentar|Anime",
|
||||
zhconv.convert(first_item, "zh-hans"),
|
||||
re.IGNORECASE):
|
||||
title = re.sub(r"^[^]]*]", "", title).strip()
|
||||
# 去掉大小
|
||||
title = re.sub(r'[0-9.]+\s*[MGT]i?B(?![A-Z]+)', "", title, flags=re.IGNORECASE)
|
||||
# 将TVxx改为xx
|
||||
title = re.sub(r"\[TV\s+(\d{1,4})", r"[\1", title, flags=re.IGNORECASE)
|
||||
# 将4K转为2160p
|
||||
title = re.sub(r'\[4k]', '2160p', title, flags=re.IGNORECASE)
|
||||
# 处理/分隔的中英文标题
|
||||
names = title.split("]")
|
||||
if len(names) > 1 and title.find("- ") == -1:
|
||||
titles = []
|
||||
for name in names:
|
||||
if not name:
|
||||
continue
|
||||
left_char = ''
|
||||
if name.startswith('['):
|
||||
left_char = '['
|
||||
name = name[1:]
|
||||
if name and name.find("/") != -1:
|
||||
if name.split("/")[-1].strip():
|
||||
titles.append("%s%s" % (left_char, name.split("/")[-1].strip()))
|
||||
else:
|
||||
titles.append("%s%s" % (left_char, name.split("/")[0].strip()))
|
||||
elif name:
|
||||
if StringUtils.is_chinese(name) and not StringUtils.is_all_chinese(name):
|
||||
if not re.search(r"\[\d+", name, re.IGNORECASE):
|
||||
name = re.sub(r'[\d|#::\-()()\u4e00-\u9fff]', '', name).strip()
|
||||
if not name or name.strip().isdigit():
|
||||
continue
|
||||
if name == '[':
|
||||
titles.append("")
|
||||
else:
|
||||
titles.append("%s%s" % (left_char, name.strip()))
|
||||
return "]".join(titles)
|
||||
return title
|
427
app/core/meta/metabase.py
Normal file
427
app/core/meta/metabase.py
Normal file
@@ -0,0 +1,427 @@
|
||||
from typing import Union, Optional
|
||||
|
||||
import cn2an
|
||||
import regex as re
|
||||
|
||||
from app.utils.string import StringUtils
|
||||
from app.utils.types import MediaType
|
||||
|
||||
|
||||
class MetaBase(object):
|
||||
"""
|
||||
媒体信息基类
|
||||
"""
|
||||
# 是否处理的文件
|
||||
isfile: bool = False
|
||||
# 原字符串
|
||||
org_string: Optional[str] = None
|
||||
# 副标题
|
||||
subtitle: Optional[str] = None
|
||||
# 类型 电影、电视剧
|
||||
type: Optional[MediaType] = None
|
||||
# 识别的中文名
|
||||
cn_name: Optional[str] = None
|
||||
# 识别的英文名
|
||||
en_name: Optional[str] = None
|
||||
# 年份
|
||||
year: Optional[str] = None
|
||||
# 总季数
|
||||
total_seasons: int = 0
|
||||
# 识别的开始季 数字
|
||||
begin_season: Optional[int] = None
|
||||
# 识别的结束季 数字
|
||||
end_season: Optional[int] = None
|
||||
# 总集数
|
||||
total_episodes: int = 0
|
||||
# 识别的开始集
|
||||
begin_episode: Optional[int] = None
|
||||
# 识别的结束集
|
||||
end_episode: Optional[int] = None
|
||||
# Partx Cd Dvd Disk Disc
|
||||
part: Optional[str] = None
|
||||
# 识别的资源类型
|
||||
resource_type: Optional[str] = None
|
||||
# 识别的效果
|
||||
resource_effect: Optional[str] = None
|
||||
# 识别的分辨率
|
||||
resource_pix: Optional[str] = None
|
||||
# 识别的制作组/字幕组
|
||||
resource_team: Optional[str] = None
|
||||
# 视频编码
|
||||
video_encode: Optional[str] = None
|
||||
# 音频编码
|
||||
audio_encode: Optional[str] = None
|
||||
|
||||
# 副标题解析
|
||||
_subtitle_flag = False
|
||||
_subtitle_season_re = r"(?<![全共]\s*)[第\s]+([0-9一二三四五六七八九十S\-]+)\s*季(?!\s*[全共])"
|
||||
_subtitle_season_all_re = r"[全共]\s*([0-9一二三四五六七八九十]+)\s*季|([0-9一二三四五六七八九十]+)\s*季\s*全"
|
||||
_subtitle_episode_re = r"(?<![全共]\s*)[第\s]+([0-9一二三四五六七八九十百零EP\-]+)\s*[集话話期](?!\s*[全共])"
|
||||
_subtitle_episode_all_re = r"([0-9一二三四五六七八九十百零]+)\s*集\s*全|[全共]\s*([0-9一二三四五六七八九十百零]+)\s*[集话話期]"
|
||||
|
||||
def __init__(self, title: str, subtitle: str = None, isfile: bool = False):
|
||||
if not title:
|
||||
return
|
||||
self.org_string = title
|
||||
self.subtitle = subtitle
|
||||
self.isfile = isfile
|
||||
|
||||
def get_name(self):
|
||||
"""
|
||||
返回名称
|
||||
"""
|
||||
if self.cn_name and StringUtils.is_all_chinese(self.cn_name):
|
||||
return self.cn_name
|
||||
elif self.en_name:
|
||||
return self.en_name
|
||||
elif self.cn_name:
|
||||
return self.cn_name
|
||||
return ""
|
||||
|
||||
def init_subtitle(self, title_text: str):
|
||||
"""
|
||||
副标题识别
|
||||
"""
|
||||
if not title_text:
|
||||
return
|
||||
title_text = f" {title_text} "
|
||||
if re.search(r'[全第季集话話期]', title_text, re.IGNORECASE):
|
||||
# 第x季
|
||||
season_str = re.search(r'%s' % self._subtitle_season_re, title_text, re.IGNORECASE)
|
||||
if season_str:
|
||||
seasons = season_str.group(1)
|
||||
if seasons:
|
||||
seasons = seasons.upper().replace("S", "").strip()
|
||||
else:
|
||||
return
|
||||
try:
|
||||
end_season = None
|
||||
if seasons.find('-') != -1:
|
||||
seasons = seasons.split('-')
|
||||
begin_season = int(cn2an.cn2an(seasons[0].strip(), mode='smart'))
|
||||
if len(seasons) > 1:
|
||||
end_season = int(cn2an.cn2an(seasons[1].strip(), mode='smart'))
|
||||
else:
|
||||
begin_season = int(cn2an.cn2an(seasons, mode='smart'))
|
||||
except Exception as err:
|
||||
print(str(err))
|
||||
return
|
||||
if self.begin_season is None and isinstance(begin_season, int):
|
||||
self.begin_season = begin_season
|
||||
self.total_seasons = 1
|
||||
if self.begin_season is not None \
|
||||
and self.end_season is None \
|
||||
and isinstance(end_season, int) \
|
||||
and end_season != self.begin_season:
|
||||
self.end_season = end_season
|
||||
self.total_seasons = (self.end_season - self.begin_season) + 1
|
||||
self.type = MediaType.TV
|
||||
self._subtitle_flag = True
|
||||
# 第x集
|
||||
episode_str = re.search(r'%s' % self._subtitle_episode_re, title_text, re.IGNORECASE)
|
||||
if episode_str:
|
||||
episodes = episode_str.group(1)
|
||||
if episodes:
|
||||
episodes = episodes.upper().replace("E", "").replace("P", "").strip()
|
||||
else:
|
||||
return
|
||||
try:
|
||||
end_episode = None
|
||||
if episodes.find('-') != -1:
|
||||
episodes = episodes.split('-')
|
||||
begin_episode = int(cn2an.cn2an(episodes[0].strip(), mode='smart'))
|
||||
if len(episodes) > 1:
|
||||
end_episode = int(cn2an.cn2an(episodes[1].strip(), mode='smart'))
|
||||
else:
|
||||
begin_episode = int(cn2an.cn2an(episodes, mode='smart'))
|
||||
except Exception as err:
|
||||
print(str(err))
|
||||
return
|
||||
if self.begin_episode is None and isinstance(begin_episode, int):
|
||||
self.begin_episode = begin_episode
|
||||
self.total_episodes = 1
|
||||
if self.begin_episode is not None \
|
||||
and self.end_episode is None \
|
||||
and isinstance(end_episode, int) \
|
||||
and end_episode != self.begin_episode:
|
||||
self.end_episode = end_episode
|
||||
self.total_episodes = (self.end_episode - self.begin_episode) + 1
|
||||
self.type = MediaType.TV
|
||||
self._subtitle_flag = True
|
||||
# x集全
|
||||
episode_all_str = re.search(r'%s' % self._subtitle_episode_all_re, title_text, re.IGNORECASE)
|
||||
if episode_all_str:
|
||||
episode_all = episode_all_str.group(1)
|
||||
if not episode_all:
|
||||
episode_all = episode_all_str.group(2)
|
||||
if episode_all and self.begin_episode is None:
|
||||
try:
|
||||
self.total_episodes = int(cn2an.cn2an(episode_all.strip(), mode='smart'))
|
||||
except Exception as err:
|
||||
print(str(err))
|
||||
return
|
||||
self.begin_episode = None
|
||||
self.end_episode = None
|
||||
self.type = MediaType.TV
|
||||
self._subtitle_flag = True
|
||||
# 全x季 x季全
|
||||
season_all_str = re.search(r"%s" % self._subtitle_season_all_re, title_text, re.IGNORECASE)
|
||||
if season_all_str:
|
||||
season_all = season_all_str.group(1)
|
||||
if not season_all:
|
||||
season_all = season_all_str.group(2)
|
||||
if season_all and self.begin_season is None and self.begin_episode is None:
|
||||
try:
|
||||
self.total_seasons = int(cn2an.cn2an(season_all.strip(), mode='smart'))
|
||||
except Exception as err:
|
||||
print(str(err))
|
||||
return
|
||||
self.begin_season = 1
|
||||
self.end_season = self.total_seasons
|
||||
self.type = MediaType.TV
|
||||
self._subtitle_flag = True
|
||||
|
||||
def is_in_season(self, season: Union[list, int, str]):
|
||||
"""
|
||||
是否包含季
|
||||
"""
|
||||
if isinstance(season, list):
|
||||
if self.end_season is not None:
|
||||
meta_season = list(range(self.begin_season, self.end_season + 1))
|
||||
else:
|
||||
if self.begin_season is not None:
|
||||
meta_season = [self.begin_season]
|
||||
else:
|
||||
meta_season = [1]
|
||||
|
||||
return set(meta_season).issuperset(set(season))
|
||||
else:
|
||||
if self.end_season is not None:
|
||||
return self.begin_season <= int(season) <= self.end_season
|
||||
else:
|
||||
if self.begin_season is not None:
|
||||
return int(season) == self.begin_season
|
||||
else:
|
||||
return int(season) == 1
|
||||
|
||||
def is_in_episode(self, episode: Union[list, int, str]):
|
||||
"""
|
||||
是否包含集
|
||||
"""
|
||||
if isinstance(episode, list):
|
||||
if self.end_episode is not None:
|
||||
meta_episode = list(range(self.begin_episode, self.end_episode + 1))
|
||||
else:
|
||||
meta_episode = [self.begin_episode]
|
||||
return set(meta_episode).issuperset(set(episode))
|
||||
else:
|
||||
if self.end_episode is not None:
|
||||
return self.begin_episode <= int(episode) <= self.end_episode
|
||||
else:
|
||||
return int(episode) == self.begin_episode
|
||||
|
||||
def get_season_string(self):
|
||||
"""
|
||||
返回季字符串
|
||||
"""
|
||||
if self.begin_season is not None:
|
||||
return "S%s" % str(self.begin_season).rjust(2, "0") \
|
||||
if self.end_season is None \
|
||||
else "S%s-S%s" % \
|
||||
(str(self.begin_season).rjust(2, "0"),
|
||||
str(self.end_season).rjust(2, "0"))
|
||||
else:
|
||||
if self.type == MediaType.MOVIE:
|
||||
return ""
|
||||
else:
|
||||
return "S01"
|
||||
|
||||
def get_season_item(self):
|
||||
"""
|
||||
返回begin_season 的Sxx
|
||||
"""
|
||||
if self.begin_season is not None:
|
||||
return "S%s" % str(self.begin_season).rjust(2, "0")
|
||||
else:
|
||||
if self.type == MediaType.MOVIE:
|
||||
return ""
|
||||
else:
|
||||
return "S01"
|
||||
|
||||
def get_season_seq(self):
|
||||
"""
|
||||
返回begin_season 的数字
|
||||
"""
|
||||
if self.begin_season is not None:
|
||||
return str(self.begin_season)
|
||||
else:
|
||||
if self.type == MediaType.MOVIE:
|
||||
return ""
|
||||
else:
|
||||
return "1"
|
||||
|
||||
def get_season_list(self):
|
||||
"""
|
||||
返回季的数组
|
||||
"""
|
||||
if self.begin_season is None:
|
||||
if self.type == MediaType.MOVIE:
|
||||
return []
|
||||
else:
|
||||
return [1]
|
||||
elif self.end_season is not None:
|
||||
return [season for season in range(self.begin_season, self.end_season + 1)]
|
||||
else:
|
||||
return [self.begin_season]
|
||||
|
||||
def set_season(self, sea: Union[list, int, str]):
|
||||
"""
|
||||
更新季
|
||||
"""
|
||||
if not sea:
|
||||
return
|
||||
if isinstance(sea, list):
|
||||
if len(sea) == 1 and str(sea[0]).isdigit():
|
||||
self.begin_season = int(sea[0])
|
||||
self.end_season = None
|
||||
elif len(sea) > 1 and str(sea[0]).isdigit() and str(sea[-1]).isdigit():
|
||||
self.begin_season = int(sea[0])
|
||||
self.end_season = int(sea[-1])
|
||||
elif str(sea).isdigit():
|
||||
self.begin_season = int(sea)
|
||||
self.end_season = None
|
||||
|
||||
def set_episode(self, ep: Union[list, int, str]):
|
||||
"""
|
||||
更新集
|
||||
"""
|
||||
if not ep:
|
||||
return
|
||||
if isinstance(ep, list):
|
||||
if len(ep) == 1 and str(ep[0]).isdigit():
|
||||
self.begin_episode = int(ep[0])
|
||||
self.end_episode = None
|
||||
elif len(ep) > 1 and str(ep[0]).isdigit() and str(ep[-1]).isdigit():
|
||||
self.begin_episode = int(ep[0])
|
||||
self.end_episode = int(ep[-1])
|
||||
elif str(ep).isdigit():
|
||||
self.begin_episode = int(ep)
|
||||
self.end_episode = None
|
||||
|
||||
#
|
||||
def get_episode_string(self):
|
||||
"""
|
||||
返回集字符串
|
||||
"""
|
||||
if self.begin_episode is not None:
|
||||
return "E%s" % str(self.begin_episode).rjust(2, "0") \
|
||||
if self.end_episode is None \
|
||||
else "E%s-E%s" % \
|
||||
(
|
||||
str(self.begin_episode).rjust(2, "0"),
|
||||
str(self.end_episode).rjust(2, "0"))
|
||||
else:
|
||||
return ""
|
||||
|
||||
def get_episode_list(self):
|
||||
"""
|
||||
返回集的数组
|
||||
"""
|
||||
if self.begin_episode is None:
|
||||
return []
|
||||
elif self.end_episode is not None:
|
||||
return [episode for episode in range(self.begin_episode, self.end_episode + 1)]
|
||||
else:
|
||||
return [self.begin_episode]
|
||||
|
||||
def get_episode_items(self):
|
||||
"""
|
||||
返回集的并列表达方式,用于支持单文件多集
|
||||
"""
|
||||
return "E%s" % "E".join(str(episode).rjust(2, '0') for episode in self.get_episode_list())
|
||||
|
||||
def get_episode_seqs(self):
|
||||
"""
|
||||
返回单文件多集的集数表达方式,用于支持单文件多集
|
||||
"""
|
||||
episodes = self.get_episode_list()
|
||||
if episodes:
|
||||
# 集 xx
|
||||
if len(episodes) == 1:
|
||||
return str(episodes[0])
|
||||
else:
|
||||
return "%s-%s" % (episodes[0], episodes[-1])
|
||||
else:
|
||||
return ""
|
||||
|
||||
def get_episode_seq(self):
|
||||
"""
|
||||
返回begin_episode 的数字
|
||||
"""
|
||||
episodes = self.get_episode_list()
|
||||
if episodes:
|
||||
return str(episodes[0])
|
||||
else:
|
||||
return ""
|
||||
|
||||
def get_season_episode_string(self):
|
||||
"""
|
||||
返回季集字符串
|
||||
"""
|
||||
if self.type == MediaType.MOVIE:
|
||||
return ""
|
||||
else:
|
||||
seaion = self.get_season_string()
|
||||
episode = self.get_episode_string()
|
||||
if seaion and episode:
|
||||
return "%s %s" % (seaion, episode)
|
||||
elif seaion:
|
||||
return "%s" % seaion
|
||||
elif episode:
|
||||
return "%s" % episode
|
||||
return ""
|
||||
|
||||
def get_resource_type_string(self):
|
||||
"""
|
||||
返回资源类型字符串,含分辨率
|
||||
"""
|
||||
ret_string = ""
|
||||
if self.resource_type:
|
||||
ret_string = f"{ret_string} {self.resource_type}"
|
||||
if self.resource_effect:
|
||||
ret_string = f"{ret_string} {self.resource_effect}"
|
||||
if self.resource_pix:
|
||||
ret_string = f"{ret_string} {self.resource_pix}"
|
||||
return ret_string
|
||||
|
||||
def get_edtion_string(self):
|
||||
"""
|
||||
返回资源类型字符串,不含分辨率
|
||||
"""
|
||||
ret_string = ""
|
||||
if self.resource_type:
|
||||
ret_string = f"{ret_string} {self.resource_type}"
|
||||
if self.resource_effect:
|
||||
ret_string = f"{ret_string} {self.resource_effect}"
|
||||
return ret_string.strip()
|
||||
|
||||
def get_resource_team_string(self):
|
||||
"""
|
||||
返回发布组/字幕组字符串
|
||||
"""
|
||||
if self.resource_team:
|
||||
return self.resource_team
|
||||
else:
|
||||
return ""
|
||||
|
||||
def get_video_encode_string(self):
|
||||
"""
|
||||
返回视频编码
|
||||
"""
|
||||
return self.video_encode or ""
|
||||
|
||||
def get_audio_encode_string(self):
|
||||
"""
|
||||
返回音频编码
|
||||
"""
|
||||
return self.audio_encode or ""
|
557
app/core/meta/metavideo.py
Normal file
557
app/core/meta/metavideo.py
Normal file
@@ -0,0 +1,557 @@
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
from app.core.config import settings
|
||||
from app.core.meta.metabase import MetaBase
|
||||
from app.core.meta.release_groups import ReleaseGroupsMatcher
|
||||
from app.utils.string import StringUtils
|
||||
from app.utils.tokens import Tokens
|
||||
from app.utils.types import MediaType
|
||||
|
||||
|
||||
class MetaVideo(MetaBase):
|
||||
"""
|
||||
识别电影、电视剧
|
||||
"""
|
||||
# 控制标位区
|
||||
_stop_name_flag = False
|
||||
_stop_cnname_flag = False
|
||||
_last_token = ""
|
||||
_last_token_type = ""
|
||||
_continue_flag = True
|
||||
_unknown_name_str = ""
|
||||
_source = ""
|
||||
_effect = []
|
||||
# 正则式区
|
||||
_season_re = r"S(\d{2})|^S(\d{1,2})$|S(\d{1,2})E"
|
||||
_episode_re = r"EP?(\d{2,4})$|^EP?(\d{1,4})$|^S\d{1,2}EP?(\d{1,4})$|S\d{2}EP?(\d{2,4})"
|
||||
_part_re = r"(^PART[0-9ABI]{0,2}$|^CD[0-9]{0,2}$|^DVD[0-9]{0,2}$|^DISK[0-9]{0,2}$|^DISC[0-9]{0,2}$)"
|
||||
_roman_numerals = r"^(?=[MDCLXVI])M*(C[MD]|D?C{0,3})(X[CL]|L?X{0,3})(I[XV]|V?I{0,3})$"
|
||||
_source_re = r"^BLURAY$|^HDTV$|^UHDTV$|^HDDVD$|^WEBRIP$|^DVDRIP$|^BDRIP$|^BLU$|^WEB$|^BD$|^HDRip$"
|
||||
_effect_re = r"^REMUX$|^UHD$|^SDR$|^HDR\d*$|^DOLBY$|^DOVI$|^DV$|^3D$|^REPACK$"
|
||||
_resources_type_re = r"%s|%s" % (_source_re, _effect_re)
|
||||
_name_no_begin_re = r"^\[.+?]"
|
||||
_name_no_chinese_re = r".*版|.*字幕"
|
||||
_name_se_words = ['共', '第', '季', '集', '话', '話', '期']
|
||||
_name_nostring_re = r"^PTS|^JADE|^AOD|^CHC|^[A-Z]{1,4}TV[\-0-9UVHDK]*" \
|
||||
r"|HBO$|\s+HBO|\d{1,2}th|\d{1,2}bit|NETFLIX|AMAZON|IMAX|^3D|\s+3D|^BBC\s+|\s+BBC|BBC$|DISNEY\+?|XXX|\s+DC$" \
|
||||
r"|[第\s共]+[0-9一二三四五六七八九十\-\s]+季" \
|
||||
r"|[第\s共]+[0-9一二三四五六七八九十百零\-\s]+[集话話]" \
|
||||
r"|连载|日剧|美剧|电视剧|动画片|动漫|欧美|西德|日韩|超高清|高清|蓝光|翡翠台|梦幻天堂·龙网|★?\d*月?新番" \
|
||||
r"|最终季|合集|[多中国英葡法俄日韩德意西印泰台港粤双文语简繁体特效内封官译外挂]+字幕|版本|出品|台版|港版|\w+字幕组" \
|
||||
r"|未删减版|UNCUT$|UNRATE$|WITH EXTRAS$|RERIP$|SUBBED$|PROPER$|REPACK$|SEASON$|EPISODE$|Complete$|Extended$|Extended Version$" \
|
||||
r"|S\d{2}\s*-\s*S\d{2}|S\d{2}|\s+S\d{1,2}|EP?\d{2,4}\s*-\s*EP?\d{2,4}|EP?\d{2,4}|\s+EP?\d{1,4}" \
|
||||
r"|CD[\s.]*[1-9]|DVD[\s.]*[1-9]|DISK[\s.]*[1-9]|DISC[\s.]*[1-9]" \
|
||||
r"|[248]K|\d{3,4}[PIX]+" \
|
||||
r"|CD[\s.]*[1-9]|DVD[\s.]*[1-9]|DISK[\s.]*[1-9]|DISC[\s.]*[1-9]"
|
||||
_resources_pix_re = r"^[SBUHD]*(\d{3,4}[PI]+)|\d{3,4}X(\d{3,4})"
|
||||
_resources_pix_re2 = r"(^[248]+K)"
|
||||
_video_encode_re = r"^[HX]26[45]$|^AVC$|^HEVC$|^VC\d?$|^MPEG\d?$|^Xvid$|^DivX$|^HDR\d*$"
|
||||
_audio_encode_re = r"^DTS\d?$|^DTSHD$|^DTSHDMA$|^Atmos$|^TrueHD\d?$|^AC3$|^\dAudios?$|^DDP\d?$|^DD\d?$|^LPCM\d?$|^AAC\d?$|^FLAC\d?$|^HD\d?$|^MA\d?$"
|
||||
|
||||
def __init__(self, title: str, subtitle: str = None, isfile: bool = False):
|
||||
super().__init__(title, subtitle, isfile)
|
||||
if not title:
|
||||
return
|
||||
original_title = title
|
||||
self._source = ""
|
||||
self._effect = []
|
||||
# 判断是否纯数字命名
|
||||
title_path = Path(title)
|
||||
if title_path.suffix.lower() in settings.RMT_MEDIAEXT \
|
||||
and title_path.stem.isdigit() \
|
||||
and len(title_path.stem) < 5:
|
||||
self.begin_episode = int(title_path.stem)
|
||||
self.type = MediaType.TV
|
||||
return
|
||||
# 去掉名称中第1个[]的内容
|
||||
title = re.sub(r'%s' % self._name_no_begin_re, "", title, count=1)
|
||||
# 把xxxx-xxxx年份换成前一个年份,常出现在季集上
|
||||
title = re.sub(r'([\s.]+)(\d{4})-(\d{4})', r'\1\2', title)
|
||||
# 把大小去掉
|
||||
title = re.sub(r'[0-9.]+\s*[MGT]i?B(?![A-Z]+)', "", title, flags=re.IGNORECASE)
|
||||
# 把年月日去掉
|
||||
title = re.sub(r'\d{4}[\s._-]\d{1,2}[\s._-]\d{1,2}', "", title)
|
||||
# 拆分tokens
|
||||
tokens = Tokens(title)
|
||||
self.tokens = tokens
|
||||
# 解析名称、年份、季、集、资源类型、分辨率等
|
||||
token = tokens.get_next()
|
||||
while token:
|
||||
# Part
|
||||
self.__init_part(token)
|
||||
# 标题
|
||||
if self._continue_flag:
|
||||
self.__init_name(token)
|
||||
# 年份
|
||||
if self._continue_flag:
|
||||
self.__init_year(token)
|
||||
# 分辨率
|
||||
if self._continue_flag:
|
||||
self.__init_resource_pix(token)
|
||||
# 季
|
||||
if self._continue_flag:
|
||||
self.__init_season(token)
|
||||
# 集
|
||||
if self._continue_flag:
|
||||
self.__init_episode(token)
|
||||
# 资源类型
|
||||
if self._continue_flag:
|
||||
self.__init_resource_type(token)
|
||||
# 视频编码
|
||||
if self._continue_flag:
|
||||
self.__init_video_encode(token)
|
||||
# 音频编码
|
||||
if self._continue_flag:
|
||||
self.__init_audio_encode(token)
|
||||
# 取下一个,直到没有为卡
|
||||
token = tokens.get_next()
|
||||
self._continue_flag = True
|
||||
# 合成质量
|
||||
if self._effect:
|
||||
self._effect.reverse()
|
||||
self.resource_effect = " ".join(self._effect)
|
||||
if self._source:
|
||||
self.resource_type = self._source.strip()
|
||||
# 提取原盘DIY
|
||||
if self.resource_type and "BluRay" in self.resource_type:
|
||||
if (self.subtitle and re.findall(r'D[Ii]Y', self.subtitle)) \
|
||||
or re.findall(r'-D[Ii]Y@', original_title):
|
||||
self.resource_type = f"{self.resource_type} DIY"
|
||||
# 解析副标题,只要季和集
|
||||
self.init_subtitle(self.org_string)
|
||||
if not self._subtitle_flag and self.subtitle:
|
||||
self.init_subtitle(self.subtitle)
|
||||
# 没有识别出类型时默认为电影
|
||||
if not self.type:
|
||||
self.type = MediaType.MOVIE
|
||||
# 去掉名字中不需要的干扰字符,过短的纯数字不要
|
||||
self.cn_name = self.__fix_name(self.cn_name)
|
||||
self.en_name = StringUtils.str_title(self.__fix_name(self.en_name))
|
||||
# 处理part
|
||||
if self.part and self.part.upper() == "PART":
|
||||
self.part = None
|
||||
# 制作组/字幕组
|
||||
self.resource_team = ReleaseGroupsMatcher().match(title=original_title) or None
|
||||
|
||||
def __fix_name(self, name: str):
|
||||
if not name:
|
||||
return name
|
||||
name = re.sub(r'%s' % self._name_nostring_re, '', name,
|
||||
flags=re.IGNORECASE).strip()
|
||||
name = re.sub(r'\s+', ' ', name)
|
||||
if name.isdigit() \
|
||||
and int(name) < 1800 \
|
||||
and not self.year \
|
||||
and not self.begin_season \
|
||||
and not self.resource_pix \
|
||||
and not self.resource_type \
|
||||
and not self.audio_encode \
|
||||
and not self.video_encode:
|
||||
if self.begin_episode is None:
|
||||
self.begin_episode = int(name)
|
||||
name = None
|
||||
elif self.is_in_episode(int(name)) and not self.begin_season:
|
||||
name = None
|
||||
return name
|
||||
|
||||
def __init_name(self, token: str):
|
||||
if not token:
|
||||
return
|
||||
# 回收标题
|
||||
if self._unknown_name_str:
|
||||
if not self.cn_name:
|
||||
if not self.en_name:
|
||||
self.en_name = self._unknown_name_str
|
||||
elif self._unknown_name_str != self.year:
|
||||
self.en_name = "%s %s" % (self.en_name, self._unknown_name_str)
|
||||
self._last_token_type = "enname"
|
||||
self._unknown_name_str = ""
|
||||
if self._stop_name_flag:
|
||||
return
|
||||
if token.upper() == "AKA":
|
||||
self._continue_flag = False
|
||||
self._stop_name_flag = True
|
||||
return
|
||||
if token in self._name_se_words:
|
||||
self._last_token_type = 'name_se_words'
|
||||
return
|
||||
if StringUtils.is_chinese(token):
|
||||
# 含有中文,直接做为标题(连着的数字或者英文会保留),且不再取用后面出现的中文
|
||||
self._last_token_type = "cnname"
|
||||
if not self.cn_name:
|
||||
self.cn_name = token
|
||||
elif not self._stop_cnname_flag:
|
||||
if not re.search("%s" % self._name_no_chinese_re, token, flags=re.IGNORECASE) \
|
||||
and not re.search("%s" % self._name_se_words, token, flags=re.IGNORECASE):
|
||||
self.cn_name = "%s %s" % (self.cn_name, token)
|
||||
self._stop_cnname_flag = True
|
||||
else:
|
||||
is_roman_digit = re.search(self._roman_numerals, token)
|
||||
# 阿拉伯数字或者罗马数字
|
||||
if token.isdigit() or is_roman_digit:
|
||||
# 第季集后面的不要
|
||||
if self._last_token_type == 'name_se_words':
|
||||
return
|
||||
if self.get_name():
|
||||
# 名字后面以 0 开头的不要,极有可能是集
|
||||
if token.startswith('0'):
|
||||
return
|
||||
# 检查是否真正的数字
|
||||
if token.isdigit():
|
||||
try:
|
||||
int(token)
|
||||
except ValueError:
|
||||
return
|
||||
# 中文名后面跟的数字不是年份的极有可能是集
|
||||
if not is_roman_digit \
|
||||
and self._last_token_type == "cnname" \
|
||||
and int(token) < 1900:
|
||||
return
|
||||
if (token.isdigit() and len(token) < 4) or is_roman_digit:
|
||||
# 4位以下的数字或者罗马数字,拼装到已有标题中
|
||||
if self._last_token_type == "cnname":
|
||||
self.cn_name = "%s %s" % (self.cn_name, token)
|
||||
elif self._last_token_type == "enname":
|
||||
self.en_name = "%s %s" % (self.en_name, token)
|
||||
self._continue_flag = False
|
||||
elif token.isdigit() and len(token) == 4:
|
||||
# 4位数字,可能是年份,也可能真的是标题的一部分,也有可能是集
|
||||
if not self._unknown_name_str:
|
||||
self._unknown_name_str = token
|
||||
else:
|
||||
# 名字未出现前的第一个数字,记下来
|
||||
if not self._unknown_name_str:
|
||||
self._unknown_name_str = token
|
||||
elif re.search(r"%s" % self._season_re, token, re.IGNORECASE):
|
||||
# 季的处理
|
||||
if self.en_name and re.search(r"SEASON$", self.en_name, re.IGNORECASE):
|
||||
# 如果匹配到季,英文名结尾为Season,说明Season属于标题,不应在后续作为干扰词去除
|
||||
self.en_name += ' '
|
||||
self._stop_name_flag = True
|
||||
return
|
||||
elif re.search(r"%s" % self._episode_re, token, re.IGNORECASE) \
|
||||
or re.search(r"(%s)" % self._resources_type_re, token, re.IGNORECASE) \
|
||||
or re.search(r"%s" % self._resources_pix_re, token, re.IGNORECASE):
|
||||
# 集、来源、版本等不要
|
||||
self._stop_name_flag = True
|
||||
return
|
||||
else:
|
||||
# 后缀名不要
|
||||
if ".%s".lower() % token in settings.RMT_MEDIAEXT:
|
||||
return
|
||||
# 英文或者英文+数字,拼装起来
|
||||
if self.en_name:
|
||||
self.en_name = "%s %s" % (self.en_name, token)
|
||||
else:
|
||||
self.en_name = token
|
||||
self._last_token_type = "enname"
|
||||
|
||||
def __init_part(self, token: str):
|
||||
if not self.get_name():
|
||||
return
|
||||
if not self.year \
|
||||
and not self.begin_season \
|
||||
and not self.begin_episode \
|
||||
and not self.resource_pix \
|
||||
and not self.resource_type:
|
||||
return
|
||||
re_res = re.search(r"%s" % self._part_re, token, re.IGNORECASE)
|
||||
if re_res:
|
||||
if not self.part:
|
||||
self.part = re_res.group(1)
|
||||
nextv = self.tokens.cur()
|
||||
if nextv \
|
||||
and ((nextv.isdigit() and (len(nextv) == 1 or len(nextv) == 2 and nextv.startswith('0')))
|
||||
or nextv.upper() in ['A', 'B', 'C', 'I', 'II', 'III']):
|
||||
self.part = "%s%s" % (self.part, nextv)
|
||||
self.tokens.get_next()
|
||||
self._last_token_type = "part"
|
||||
self._continue_flag = False
|
||||
self._stop_name_flag = False
|
||||
|
||||
def __init_year(self, token: str):
|
||||
if not self.get_name():
|
||||
return
|
||||
if not token.isdigit():
|
||||
return
|
||||
if len(token) != 4:
|
||||
return
|
||||
if not 1900 < int(token) < 2050:
|
||||
return
|
||||
if self.year:
|
||||
if self.en_name:
|
||||
self.en_name = "%s %s" % (self.en_name.strip(), self.year)
|
||||
elif self.cn_name:
|
||||
self.cn_name = "%s %s" % (self.cn_name, self.year)
|
||||
elif self.en_name and re.search(r"SEASON$", self.en_name, re.IGNORECASE):
|
||||
# 如果匹配到年,且英文名结尾为Season,说明Season属于标题,不应在后续作为干扰词去除
|
||||
self.en_name += ' '
|
||||
self.year = token
|
||||
self._last_token_type = "year"
|
||||
self._continue_flag = False
|
||||
self._stop_name_flag = True
|
||||
|
||||
def __init_resource_pix(self, token: str):
|
||||
if not self.get_name():
|
||||
return
|
||||
re_res = re.findall(r"%s" % self._resources_pix_re, token, re.IGNORECASE)
|
||||
if re_res:
|
||||
self._last_token_type = "pix"
|
||||
self._continue_flag = False
|
||||
self._stop_name_flag = True
|
||||
resource_pix = None
|
||||
for pixs in re_res:
|
||||
if isinstance(pixs, tuple):
|
||||
pix_t = None
|
||||
for pix_i in pixs:
|
||||
if pix_i:
|
||||
pix_t = pix_i
|
||||
break
|
||||
if pix_t:
|
||||
resource_pix = pix_t
|
||||
else:
|
||||
resource_pix = pixs
|
||||
if resource_pix and not self.resource_pix:
|
||||
self.resource_pix = resource_pix.lower()
|
||||
break
|
||||
if self.resource_pix \
|
||||
and self.resource_pix.isdigit() \
|
||||
and self.resource_pix[-1] not in 'kpi':
|
||||
self.resource_pix = "%sp" % self.resource_pix
|
||||
else:
|
||||
re_res = re.search(r"%s" % self._resources_pix_re2, token, re.IGNORECASE)
|
||||
if re_res:
|
||||
self._last_token_type = "pix"
|
||||
self._continue_flag = False
|
||||
self._stop_name_flag = True
|
||||
if not self.resource_pix:
|
||||
self.resource_pix = re_res.group(1).lower()
|
||||
|
||||
def __init_season(self, token: str):
|
||||
re_res = re.findall(r"%s" % self._season_re, token, re.IGNORECASE)
|
||||
if re_res:
|
||||
self._last_token_type = "season"
|
||||
self.type = MediaType.TV
|
||||
self._stop_name_flag = True
|
||||
self._continue_flag = True
|
||||
for se in re_res:
|
||||
if isinstance(se, tuple):
|
||||
se_t = None
|
||||
for se_i in se:
|
||||
if se_i and str(se_i).isdigit():
|
||||
se_t = se_i
|
||||
break
|
||||
if se_t:
|
||||
se = int(se_t)
|
||||
else:
|
||||
break
|
||||
else:
|
||||
se = int(se)
|
||||
if self.begin_season is None:
|
||||
self.begin_season = se
|
||||
self.total_seasons = 1
|
||||
else:
|
||||
if se > self.begin_season:
|
||||
self.end_season = se
|
||||
self.total_seasons = (self.end_season - self.begin_season) + 1
|
||||
if self.isfile and self.total_seasons > 1:
|
||||
self.end_season = None
|
||||
self.total_seasons = 1
|
||||
elif token.isdigit():
|
||||
try:
|
||||
int(token)
|
||||
except ValueError:
|
||||
return
|
||||
if self._last_token_type == "SEASON" \
|
||||
and self.begin_season is None \
|
||||
and len(token) < 3:
|
||||
self.begin_season = int(token)
|
||||
self.total_seasons = 1
|
||||
self._last_token_type = "season"
|
||||
self._stop_name_flag = True
|
||||
self._continue_flag = False
|
||||
self.type = MediaType.TV
|
||||
elif token.upper() == "SEASON" and self.begin_season is None:
|
||||
self._last_token_type = "SEASON"
|
||||
|
||||
def __init_episode(self, token: str):
|
||||
re_res = re.findall(r"%s" % self._episode_re, token, re.IGNORECASE)
|
||||
if re_res:
|
||||
self._last_token_type = "episode"
|
||||
self._continue_flag = False
|
||||
self._stop_name_flag = True
|
||||
self.type = MediaType.TV
|
||||
for se in re_res:
|
||||
if isinstance(se, tuple):
|
||||
se_t = None
|
||||
for se_i in se:
|
||||
if se_i and str(se_i).isdigit():
|
||||
se_t = se_i
|
||||
break
|
||||
if se_t:
|
||||
se = int(se_t)
|
||||
else:
|
||||
break
|
||||
else:
|
||||
se = int(se)
|
||||
if self.begin_episode is None:
|
||||
self.begin_episode = se
|
||||
self.total_episodes = 1
|
||||
else:
|
||||
if se > self.begin_episode:
|
||||
self.end_episode = se
|
||||
self.total_episodes = (self.end_episode - self.begin_episode) + 1
|
||||
if self.isfile and self.total_episodes > 2:
|
||||
self.end_episode = None
|
||||
self.total_episodes = 1
|
||||
elif token.isdigit():
|
||||
try:
|
||||
int(token)
|
||||
except ValueError:
|
||||
return
|
||||
if self.begin_episode is not None \
|
||||
and self.end_episode is None \
|
||||
and len(token) < 5 \
|
||||
and int(token) > self.begin_episode \
|
||||
and self._last_token_type == "episode":
|
||||
self.end_episode = int(token)
|
||||
self.total_episodes = (self.end_episode - self.begin_episode) + 1
|
||||
if self.isfile and self.total_episodes > 2:
|
||||
self.end_episode = None
|
||||
self.total_episodes = 1
|
||||
self._continue_flag = False
|
||||
self.type = MediaType.TV
|
||||
elif self.begin_episode is None \
|
||||
and 1 < len(token) < 4 \
|
||||
and self._last_token_type != "year" \
|
||||
and self._last_token_type != "videoencode" \
|
||||
and token != self._unknown_name_str:
|
||||
self.begin_episode = int(token)
|
||||
self.total_episodes = 1
|
||||
self._last_token_type = "episode"
|
||||
self._continue_flag = False
|
||||
self._stop_name_flag = True
|
||||
self.type = MediaType.TV
|
||||
elif self._last_token_type == "EPISODE" \
|
||||
and self.begin_episode is None \
|
||||
and len(token) < 5:
|
||||
self.begin_episode = int(token)
|
||||
self.total_episodes = 1
|
||||
self._last_token_type = "episode"
|
||||
self._continue_flag = False
|
||||
self._stop_name_flag = True
|
||||
self.type = MediaType.TV
|
||||
elif token.upper() == "EPISODE":
|
||||
self._last_token_type = "EPISODE"
|
||||
|
||||
def __init_resource_type(self, token):
|
||||
if not self.get_name():
|
||||
return
|
||||
source_res = re.search(r"(%s)" % self._source_re, token, re.IGNORECASE)
|
||||
if source_res:
|
||||
self._last_token_type = "source"
|
||||
self._continue_flag = False
|
||||
self._stop_name_flag = True
|
||||
if not self._source:
|
||||
self._source = source_res.group(1)
|
||||
self._last_token = self._source.upper()
|
||||
return
|
||||
elif token.upper() == "DL" \
|
||||
and self._last_token_type == "source" \
|
||||
and self._last_token == "WEB":
|
||||
self._source = "WEB-DL"
|
||||
self._continue_flag = False
|
||||
return
|
||||
elif token.upper() == "RAY" \
|
||||
and self._last_token_type == "source" \
|
||||
and self._last_token == "BLU":
|
||||
self._source = "BluRay"
|
||||
self._continue_flag = False
|
||||
return
|
||||
elif token.upper() == "WEBDL":
|
||||
self._source = "WEB-DL"
|
||||
self._continue_flag = False
|
||||
return
|
||||
effect_res = re.search(r"(%s)" % self._effect_re, token, re.IGNORECASE)
|
||||
if effect_res:
|
||||
self._last_token_type = "effect"
|
||||
self._continue_flag = False
|
||||
self._stop_name_flag = True
|
||||
effect = effect_res.group(1)
|
||||
if effect not in self._effect:
|
||||
self._effect.append(effect)
|
||||
self._last_token = effect.upper()
|
||||
|
||||
def __init_video_encode(self, token: str):
|
||||
if not self.get_name():
|
||||
return
|
||||
if not self.year \
|
||||
and not self.resource_pix \
|
||||
and not self.resource_type \
|
||||
and not self.begin_season \
|
||||
and not self.begin_episode:
|
||||
return
|
||||
re_res = re.search(r"(%s)" % self._video_encode_re, token, re.IGNORECASE)
|
||||
if re_res:
|
||||
self._continue_flag = False
|
||||
self._stop_name_flag = True
|
||||
self._last_token_type = "videoencode"
|
||||
if not self.video_encode:
|
||||
self.video_encode = re_res.group(1).upper()
|
||||
self._last_token = self.video_encode
|
||||
elif self.video_encode == "10bit":
|
||||
self.video_encode = f"{re_res.group(1).upper()} 10bit"
|
||||
self._last_token = re_res.group(1).upper()
|
||||
elif token.upper() in ['H', 'X']:
|
||||
self._continue_flag = False
|
||||
self._stop_name_flag = True
|
||||
self._last_token_type = "videoencode"
|
||||
self._last_token = token.upper() if token.upper() == "H" else token.lower()
|
||||
elif token in ["264", "265"] \
|
||||
and self._last_token_type == "videoencode" \
|
||||
and self._last_token in ['H', 'X']:
|
||||
self.video_encode = "%s%s" % (self._last_token, token)
|
||||
elif token.isdigit() \
|
||||
and self._last_token_type == "videoencode" \
|
||||
and self._last_token in ['VC', 'MPEG']:
|
||||
self.video_encode = "%s%s" % (self._last_token, token)
|
||||
elif token.upper() == "10BIT":
|
||||
self._last_token_type = "videoencode"
|
||||
if not self.video_encode:
|
||||
self.video_encode = "10bit"
|
||||
else:
|
||||
self.video_encode = f"{self.video_encode} 10bit"
|
||||
|
||||
def __init_audio_encode(self, token: str):
|
||||
if not self.get_name():
|
||||
return
|
||||
if not self.year \
|
||||
and not self.resource_pix \
|
||||
and not self.resource_type \
|
||||
and not self.begin_season \
|
||||
and not self.begin_episode:
|
||||
return
|
||||
re_res = re.search(r"(%s)" % self._audio_encode_re, token, re.IGNORECASE)
|
||||
if re_res:
|
||||
self._continue_flag = False
|
||||
self._stop_name_flag = True
|
||||
self._last_token_type = "audioencode"
|
||||
self._last_token = re_res.group(1).upper()
|
||||
if not self.audio_encode:
|
||||
self.audio_encode = re_res.group(1)
|
||||
else:
|
||||
if self.audio_encode.upper() == "DTS":
|
||||
self.audio_encode = "%s-%s" % (self.audio_encode, re_res.group(1))
|
||||
else:
|
||||
self.audio_encode = "%s %s" % (self.audio_encode, re_res.group(1))
|
||||
elif token.isdigit() \
|
||||
and self._last_token_type == "audioencode":
|
||||
if self.audio_encode:
|
||||
if self._last_token.isdigit():
|
||||
self.audio_encode = "%s.%s" % (self.audio_encode, token)
|
||||
elif self.audio_encode[-1].isdigit():
|
||||
self.audio_encode = "%s %s.%s" % (self.audio_encode[:-1], self.audio_encode[-1], token)
|
||||
else:
|
||||
self.audio_encode = "%s %s" % (self.audio_encode, token)
|
||||
self._last_token = token
|
111
app/core/meta/release_groups.py
Normal file
111
app/core/meta/release_groups.py
Normal file
@@ -0,0 +1,111 @@
|
||||
import regex as re
|
||||
|
||||
from app.utils.singleton import Singleton
|
||||
|
||||
|
||||
class ReleaseGroupsMatcher(metaclass=Singleton):
|
||||
"""
|
||||
识别制作组、字幕组
|
||||
"""
|
||||
__release_groups: str = None
|
||||
custom_release_groups: str = None
|
||||
custom_separator: str = None
|
||||
RELEASE_GROUPS: dict = {
|
||||
"0ff": ['FF(?:(?:A|WE)B|CD|E(?:DU|B)|TV)'],
|
||||
"1pt": [],
|
||||
"52pt": [],
|
||||
"audiences": ['Audies', 'AD(?:Audio|E(?:|book)|Music|Web)'],
|
||||
"azusa": [],
|
||||
"beitai": ['BeiTai'],
|
||||
"btschool": ['Bts(?:CHOOL|HD|PAD|TV)', 'Zone'],
|
||||
"carpt": ['CarPT'],
|
||||
"chdbits": ['CHD(?:|Bits|PAD|(?:|HK)TV|WEB)', 'StBOX', 'OneHD', 'Lee', 'xiaopie'],
|
||||
"discfan": [],
|
||||
"dragonhd": [],
|
||||
"eastgame": ['(?:(?:iNT|(?:HALFC|Mini(?:S|H|FH)D))-|)TLF'],
|
||||
"filelist": [],
|
||||
"gainbound": ['(?:DG|GBWE)B'],
|
||||
"hares": ['Hares(?:|(?:M|T)V|Web)'],
|
||||
"hd4fans": [],
|
||||
"hdarea": ['HDA(?:pad|rea|TV)', 'EPiC'],
|
||||
"hdatmos": [],
|
||||
"hdbd": [],
|
||||
"hdchina": ['HDC(?:|hina|TV)', 'k9611', 'tudou', 'iHD'],
|
||||
"hddolby": ['D(?:ream|BTV)', '(?:HD|QHstudI)o'],
|
||||
"hdfans": ['beAst(?:|TV)'],
|
||||
"hdhome": ['HDH(?:|ome|Pad|TV|WEB)'],
|
||||
"hdpt": ['HDPT(?:|Web)'],
|
||||
"hdsky": ['HDS(?:|ky|TV|Pad|WEB)', 'AQLJ'],
|
||||
"hdtime": [],
|
||||
"HDU": [],
|
||||
"hdvideo": [],
|
||||
"hdzone": ['HDZ(?:|one)'],
|
||||
"hhanclub": ['HHWEB'],
|
||||
"hitpt": [],
|
||||
"htpt": ['HTPT'],
|
||||
"iptorrents": [],
|
||||
"joyhd": [],
|
||||
"keepfrds": ['FRDS', 'Yumi', 'cXcY'],
|
||||
"lemonhd": ['L(?:eague(?:(?:C|H)D|(?:M|T)V|NF|WEB)|HD)', 'i18n', 'CiNT'],
|
||||
"mteam": ['MTeam(?:|TV)', 'MPAD'],
|
||||
"nanyangpt": [],
|
||||
"nicept": [],
|
||||
"oshen": [],
|
||||
"ourbits": ['Our(?:Bits|TV)', 'FLTTH', 'Ao', 'PbK', 'MGs', 'iLove(?:HD|TV)'],
|
||||
"piggo": ['PiGo(?:NF|(?:H|WE)B)'],
|
||||
"ptchina": [],
|
||||
"pterclub": ['PTer(?:|DIY|Game|(?:M|T)V|WEB)'],
|
||||
"pthome": ['PTH(?:|Audio|eBook|music|ome|tv|WEB)'],
|
||||
"ptmsg": [],
|
||||
"ptsbao": ['PTsbao', 'OPS', 'F(?:Fans(?:AIeNcE|BD|D(?:VD|IY)|TV|WEB)|HDMv)', 'SGXT'],
|
||||
"pttime": [],
|
||||
"putao": ['PuTao'],
|
||||
"soulvoice": [],
|
||||
"springsunday": ['CMCT(?:|V)'],
|
||||
"sharkpt": ['Shark(?:|WEB|DIY|TV|MV)'],
|
||||
"tccf": [],
|
||||
"tjupt": ['TJUPT'],
|
||||
"totheglory": ['TTG', 'WiKi', 'NGB', 'DoA', '(?:ARi|ExRE)N'],
|
||||
"U2": [],
|
||||
"ultrahd": [],
|
||||
"others": ['B(?:MDru|eyondHD|TN)', 'C(?:fandora|trlhd|MRG)', 'DON', 'EVO', 'FLUX', 'HONE(?:|yG)',
|
||||
'N(?:oGroup|T(?:b|G))', 'PandaMoon', 'SMURF', 'T(?:EPES|aengoo|rollHD )'],
|
||||
"anime": ['ANi', 'HYSUB', 'KTXP', 'LoliHouse', 'MCE', 'Nekomoe kissaten', '(?:Lilith|NC)-Raws', '织梦字幕组']
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
release_groups = []
|
||||
for site_groups in self.RELEASE_GROUPS.values():
|
||||
for release_group in site_groups:
|
||||
release_groups.append(release_group)
|
||||
self.__release_groups = '|'.join(release_groups)
|
||||
|
||||
def match(self, title: str = None, groups: str = None):
|
||||
"""
|
||||
:param title: 资源标题或文件名
|
||||
:param groups: 制作组/字幕组
|
||||
:return: 匹配结果
|
||||
"""
|
||||
if not title:
|
||||
return ""
|
||||
if not groups:
|
||||
if self.custom_release_groups:
|
||||
groups = f"{self.__release_groups}|{self.custom_release_groups}"
|
||||
else:
|
||||
groups = self.__release_groups
|
||||
title = f"{title} "
|
||||
groups_re = re.compile(r"(?<=[-@\[£【&])(?:%s)(?=[@.\s\]\[】&])" % groups, re.I)
|
||||
# 处理一个制作组识别多次的情况,保留顺序
|
||||
unique_groups = []
|
||||
for item in re.findall(groups_re, title):
|
||||
if item not in unique_groups:
|
||||
unique_groups.append(item)
|
||||
separator = self.custom_separator or "@"
|
||||
return separator.join(unique_groups)
|
||||
|
||||
def update_custom(self, release_groups: str = None, separator: str = None):
|
||||
"""
|
||||
更新自定义制作组/字幕组,自定义分隔符
|
||||
"""
|
||||
self.custom_release_groups = release_groups
|
||||
self.custom_separator = separator
|
Reference in New Issue
Block a user