import traceback from dataclasses import dataclass, asdict from typing import Union, Optional, List, Self import cn2an import regex as re from app.log import logger from app.utils.string import StringUtils from app.schemas.types import MediaType @dataclass class MetaBase(object): """ 媒体信息基类 """ # 是否处理的文件 isfile: bool = False # 原标题字符串(未经过识别词处理) title: str = "" # 识别用字符串(经过识别词处理后) org_string: Optional[str] = None # 副标题 subtitle: Optional[str] = None # 类型 电影、电视剧 type: MediaType = MediaType.UNKNOWN # 识别的中文名 cn_name: Optional[str] = None # 识别的英文名 en_name: Optional[str] = None # 年份 year: Optional[str] = None # 总季数 total_season: int = 0 # 识别的开始季 数字 begin_season: Optional[int] = None # 识别的结束季 数字 end_season: Optional[int] = None # 总集数 total_episode: int = 0 # 识别的开始集 begin_episode: Optional[int] = None # 识别的结束集 end_episode: Optional[int] = None # Partx Cd Dvd Disk Disc part: Optional[str] = None # 识别的资源类型 resource_type: Optional[str] = None # 识别的效果 resource_effect: Optional[str] = None # 识别的分辨率 resource_pix: Optional[str] = None # 识别的制作组/字幕组 resource_team: Optional[str] = None # 识别的自定义占位符 customization: Optional[str] = None # 视频编码 video_encode: Optional[str] = None # 音频编码 audio_encode: Optional[str] = None # 应用的识别词信息 apply_words: Optional[List[str]] = None # 附加信息 tmdbid: int = None doubanid: str = None # 副标题解析 _subtitle_flag = False _title_episodel_re = r"Episode\s+(\d{1,4})" _subtitle_season_re = r"(? str: """ 返回名称 """ if self.cn_name and StringUtils.is_all_chinese(self.cn_name): return self.cn_name elif self.en_name: return self.en_name elif self.cn_name: return self.cn_name return "" @name.setter def name(self, name: str): """ 设置名称 """ if StringUtils.is_all_chinese(name): self.cn_name = name else: self.en_name = name self.cn_name = None def init_subtitle(self, title_text: str): """ 副标题识别 """ if not title_text: return title_text = f" {title_text} " if re.search(r"%s" % self._title_episodel_re, title_text, re.IGNORECASE): episode_str = re.search(r'%s' % self._title_episodel_re, title_text, re.IGNORECASE) if episode_str: try: episode = int(episode_str.group(1)) except Exception as err: logger.debug(f'识别集失败:{str(err)} - {traceback.format_exc()}') return if episode >= 10000: return if self.begin_episode is None: self.begin_episode = episode self.total_episode = 1 self.type = MediaType.TV self._subtitle_flag = True elif re.search(r'[全第季集话話期幕]', title_text, re.IGNORECASE): # 全x季 x季全 season_all_str = re.search(r"%s" % self._subtitle_season_all_re, title_text, re.IGNORECASE) if season_all_str: season_all = season_all_str.group(1) if not season_all: season_all = season_all_str.group(2) if season_all and self.begin_season is None and self.begin_episode is None: try: self.total_season = int(cn2an.cn2an(season_all.strip(), mode='smart')) except Exception as err: logger.debug(f'识别季失败:{str(err)} - {traceback.format_exc()}') return self.begin_season = 1 self.end_season = self.total_season self.type = MediaType.TV self._subtitle_flag = True return # 第x季 season_str = re.search(r'%s' % self._subtitle_season_re, title_text, re.IGNORECASE) if season_str: seasons = season_str.group(1) if seasons: seasons = seasons.upper().replace("S", "").strip() else: return try: end_season = None if seasons.find('-') != -1: seasons = seasons.split('-') begin_season = int(cn2an.cn2an(seasons[0].strip(), mode='smart')) if len(seasons) > 1: end_season = int(cn2an.cn2an(seasons[1].strip(), mode='smart')) else: begin_season = int(cn2an.cn2an(seasons, mode='smart')) except Exception as err: logger.debug(f'识别季失败:{str(err)} - {traceback.format_exc()}') return if begin_season and begin_season > 100: return if end_season and end_season > 100: return if self.begin_season is None and isinstance(begin_season, int): self.begin_season = begin_season self.total_season = 1 if self.begin_season is not None \ and self.end_season is None \ and isinstance(end_season, int) \ and end_season != self.begin_season: self.end_season = end_season self.total_season = (self.end_season - self.begin_season) + 1 self.type = MediaType.TV self._subtitle_flag = True # 第x-x集 第x集-x集 episode_between_str = re.search(r'%s' % self._subtitle_episode_between_re, title_text, re.IGNORECASE) if episode_between_str: episodes = episode_between_str.groups() if episodes: begin_episode = episodes[0] end_episode = episodes[1] else: return try: begin_episode = int(cn2an.cn2an(begin_episode.strip(), mode='smart')) end_episode = int(cn2an.cn2an(end_episode.strip(), mode='smart')) except Exception as err: logger.debug(f'识别集失败:{str(err)} - {traceback.format_exc()}') return if begin_episode and begin_episode >= 10000: return if end_episode and end_episode >= 10000: return if self.begin_episode is None and isinstance(begin_episode, int): self.begin_episode = begin_episode self.total_episode = 1 if self.begin_episode is not None \ and self.end_episode is None \ and isinstance(end_episode, int) \ and end_episode != self.begin_episode: self.end_episode = end_episode self.total_episode = (self.end_episode - self.begin_episode) + 1 self.type = MediaType.TV self._subtitle_flag = True return # 第x集 episode_str = re.search(r'%s' % self._subtitle_episode_re, title_text, re.IGNORECASE) if episode_str: episodes = episode_str.group(1) if episodes: episodes = episodes.upper().replace("E", "").replace("P", "").strip() else: return try: end_episode = None if episodes.find('-') != -1: episodes = episodes.split('-') begin_episode = int(cn2an.cn2an(episodes[0].strip(), mode='smart')) if len(episodes) > 1: end_episode = int(cn2an.cn2an(episodes[1].strip(), mode='smart')) else: begin_episode = int(cn2an.cn2an(episodes, mode='smart')) except Exception as err: logger.debug(f'识别集失败:{str(err)} - {traceback.format_exc()}') return if begin_episode and begin_episode >= 10000: return if end_episode and end_episode >= 10000: return if self.begin_episode is None and isinstance(begin_episode, int): self.begin_episode = begin_episode self.total_episode = 1 if self.begin_episode is not None \ and self.end_episode is None \ and isinstance(end_episode, int) \ and end_episode != self.begin_episode: self.end_episode = end_episode self.total_episode = (self.end_episode - self.begin_episode) + 1 self.type = MediaType.TV self._subtitle_flag = True return # x集全 episode_all_str = re.search(r'%s' % self._subtitle_episode_all_re, title_text, re.IGNORECASE) if episode_all_str: episode_all = episode_all_str.group(1) if not episode_all: episode_all = episode_all_str.group(2) if episode_all and self.begin_episode is None: try: self.total_episode = int(cn2an.cn2an(episode_all.strip(), mode='smart')) except Exception as err: logger.debug(f'识别集失败:{str(err)} - {traceback.format_exc()}') return self.begin_episode = None self.end_episode = None self.type = MediaType.TV self._subtitle_flag = True return @property def season(self) -> str: """ 返回开始季、结束季字符串,确定是剧集没有季的返回S01 """ if self.begin_season is not None: return "S%s" % str(self.begin_season).rjust(2, "0") \ if self.end_season is None \ else "S%s-S%s" % \ (str(self.begin_season).rjust(2, "0"), str(self.end_season).rjust(2, "0")) else: if self.type == MediaType.TV: return "S01" else: return "" @property def sea(self) -> str: """ 返回开始季字符串,确定是剧集没有季的返回空 """ if self.begin_season is not None: return self.season else: return "" @property def season_seq(self) -> str: """ 返回begin_season 的数字,电视剧没有季的返回1 """ if self.begin_season is not None: return str(self.begin_season) else: if self.type == MediaType.TV: return "1" else: return "" @property def season_list(self) -> List[int]: """ 返回季的数组 """ if self.begin_season is None: if self.type == MediaType.TV: return [1] else: return [] elif self.end_season is not None: return [season for season in range(self.begin_season, self.end_season + 1)] else: return [self.begin_season] @property def episode(self) -> str: """ 返回开始集、结束集字符串 """ if self.begin_episode is not None: return "E%s" % str(self.begin_episode).rjust(2, "0") \ if self.end_episode is None \ else "E%s-E%s" % \ ( str(self.begin_episode).rjust(2, "0"), str(self.end_episode).rjust(2, "0")) else: return "" @property def episode_list(self) -> List[int]: """ 返回集的数组 """ if self.begin_episode is None: return [] elif self.end_episode is not None: return [episode for episode in range(self.begin_episode, self.end_episode + 1)] else: return [self.begin_episode] @property def episodes(self) -> str: """ 返回集的并列表达方式,用于支持单文件多集 """ return "E%s" % "E".join(str(episode).rjust(2, '0') for episode in self.episode_list) @property def episode_seqs(self) -> str: """ 返回单文件多集的集数表达方式,用于支持单文件多集 """ episodes = self.episode_list if episodes: # 集 xx if len(episodes) == 1: return str(episodes[0]) else: return "%s-%s" % (episodes[0], episodes[-1]) else: return "" @property def episode_seq(self) -> str: """ 返回begin_episode 的数字 """ episodes = self.episode_list if episodes: return str(episodes[0]) else: return "" @property def season_episode(self) -> str: """ 返回季集字符串 """ if self.type == MediaType.TV: seaion = self.season episode = self.episode if seaion and episode: return "%s %s" % (seaion, episode) elif seaion: return "%s" % seaion elif episode: return "%s" % episode else: return "" return "" @property def resource_term(self) -> str: """ 返回资源类型字符串,含分辨率 """ ret_string = "" if self.resource_type: ret_string = f"{ret_string} {self.resource_type}" if self.resource_effect: ret_string = f"{ret_string} {self.resource_effect}" if self.resource_pix: ret_string = f"{ret_string} {self.resource_pix}" return ret_string @property def edition(self) -> str: """ 返回资源类型字符串,不含分辨率 """ ret_string = "" if self.resource_type: ret_string = f"{ret_string} {self.resource_type}" if self.resource_effect: ret_string = f"{ret_string} {self.resource_effect}" return ret_string.strip() @property def release_group(self) -> str: """ 返回发布组/字幕组字符串 """ if self.resource_team: return self.resource_team else: return "" @property def video_term(self) -> str: """ 返回视频编码 """ return self.video_encode or "" @property def audio_term(self) -> str: """ 返回音频编码 """ return self.audio_encode or "" def is_in_season(self, season: Union[list, int, str]) -> bool: """ 是否包含季 """ if isinstance(season, list): if self.end_season is not None: meta_season = list(range(self.begin_season, self.end_season + 1)) else: if self.begin_season is not None: meta_season = [self.begin_season] else: meta_season = [1] return set(meta_season).issuperset(set(season)) else: if self.end_season is not None: return self.begin_season <= int(season) <= self.end_season else: if self.begin_season is not None: return int(season) == self.begin_season else: return int(season) == 1 def is_in_episode(self, episode: Union[list, int, str]) -> bool: """ 是否包含集 """ if isinstance(episode, list): if self.end_episode is not None: meta_episode = list(range(self.begin_episode, self.end_episode + 1)) else: meta_episode = [self.begin_episode] return set(meta_episode).issuperset(set(episode)) else: if self.end_episode is not None: return self.begin_episode <= int(episode) <= self.end_episode else: return int(episode) == self.begin_episode def set_season(self, sea: Union[list, int, str]): """ 更新季 """ if not sea: return if isinstance(sea, list): if len(sea) == 1 and str(sea[0]).isdigit(): self.begin_season = int(sea[0]) self.end_season = None elif len(sea) > 1 and str(sea[0]).isdigit() and str(sea[-1]).isdigit(): self.begin_season = int(sea[0]) self.end_season = int(sea[-1]) elif str(sea).isdigit(): self.begin_season = int(sea) self.end_season = None def set_episode(self, ep: Union[list, int, str]): """ 更新集 """ if not ep: return if isinstance(ep, list): if len(ep) == 1 and str(ep[0]).isdigit(): self.begin_episode = int(ep[0]) self.end_episode = None elif len(ep) > 1 and str(ep[0]).isdigit() and str(ep[-1]).isdigit(): self.begin_episode = int(ep[0]) self.end_episode = int(ep[-1]) self.total_episode = (self.end_episode - self.begin_episode) + 1 elif str(ep).isdigit(): self.begin_episode = int(ep) self.end_episode = None def set_episodes(self, begin: int, end: int): """ 设置开始集结束集 """ if begin: self.begin_episode = begin if end: self.end_episode = end if self.begin_episode and self.end_episode: self.total_episode = (self.end_episode - self.begin_episode) + 1 def merge(self, meta: Self): """ 全并Meta信息 """ # 类型 if self.type == MediaType.UNKNOWN \ and meta.type != MediaType.UNKNOWN: self.type = meta.type # 名称 if not self.name: self.cn_name = meta.cn_name self.en_name = meta.en_name # 年份 if not self.year: self.year = meta.year # 季 if (self.type == MediaType.TV and not self.begin_season): self.begin_season = meta.begin_season self.end_season = meta.end_season self.total_season = meta.total_season # 开始集 if (self.type == MediaType.TV and not self.begin_episode): self.begin_episode = meta.begin_episode self.end_episode = meta.end_episode self.total_episode = meta.total_episode # 版本 if not self.resource_type: self.resource_type = meta.resource_type # 分辨率 if not self.resource_pix: self.resource_pix = meta.resource_pix # 制作组/字幕组 if not self.resource_team: self.resource_team = meta.resource_team # 自定义占位符 if not self.customization: self.customization = meta.customization # 特效 if not self.resource_effect: self.resource_effect = meta.resource_effect # 视频编码 if not self.video_encode: self.video_encode = meta.video_encode # 音频编码 if not self.audio_encode: self.audio_encode = meta.audio_encode # Part if not self.part: self.part = meta.part def to_dict(self): """ 转为字典 """ dicts = asdict(self) dicts["type"] = self.type.value if self.type else None dicts["season_episode"] = self.season_episode dicts["edition"] = self.edition dicts["name"] = self.name return dicts