diff --git a/app/chain/message.py b/app/chain/message.py index a53bf3ed..e83dc3b5 100644 --- a/app/chain/message.py +++ b/app/chain/message.py @@ -151,6 +151,7 @@ class MessageChain(ChainBase): f"{self._current_meta.sea} 媒体库中已存在", userid=userid)) return + # 添加订阅,状态为N self.subscribechain.add(title=mediainfo.title, year=mediainfo.year, mtype=mediainfo.type, @@ -182,7 +183,7 @@ class MessageChain(ChainBase): else: # 未完成下载 logger.info(f'{self._current_media.title_year} 未下载未完整,添加订阅 ...') - # 添加订阅 + # 添加订阅,状态为R self.subscribechain.add(title=self._current_media.title, year=self._current_media.year, mtype=self._current_media.type, @@ -190,7 +191,8 @@ class MessageChain(ChainBase): season=self._current_meta.begin_season, channel=channel, userid=userid, - username=username) + username=username, + state="R") else: # 下载种子 context: Context = cache_list[int(text) - 1] @@ -203,13 +205,13 @@ class MessageChain(ChainBase): if not cache_data: # 没有缓存 self.post_message(Notification( - channel=channel, title="输入有误!", userid=userid)) + channel=channel, title="输入有误!", userid=userid)) return if self._current_page == 0: # 第一页 self.post_message(Notification( - channel=channel, title="已经是第一页了!", userid=userid)) + channel=channel, title="已经是第一页了!", userid=userid)) return cache_type: str = cache_data.get('type') cache_list: list = cache_data.get('items') @@ -242,7 +244,7 @@ class MessageChain(ChainBase): if not cache_data: # 没有缓存 self.post_message(Notification( - channel=channel, title="输入有误!", userid=userid)) + channel=channel, title="输入有误!", userid=userid)) return cache_type: str = cache_data.get('type') cache_list: list = cache_data.get('items') @@ -253,7 +255,7 @@ class MessageChain(ChainBase): if not cache_list: # 没有数据 self.post_message(Notification( - channel=channel, title="已经是最后一页了!", userid=userid)) + channel=channel, title="已经是最后一页了!", userid=userid)) return else: if cache_type == "Torrent": @@ -282,12 +284,12 @@ class MessageChain(ChainBase): # 识别 if not meta.name: self.post_message(Notification( - channel=channel, title="无法识别输入内容!", userid=userid)) + channel=channel, title="无法识别输入内容!", userid=userid)) return # 开始搜索 if not medias: self.post_message(Notification( - channel=channel, title=f"{meta.name} 没有找到对应的媒体信息!", userid=userid)) + channel=channel, title=f"{meta.name} 没有找到对应的媒体信息!", userid=userid)) return logger.info(f"搜索到 {len(medias)} 条相关媒体信息") # 记录当前状态 diff --git a/app/core/meta/metaanime.py b/app/core/meta/metaanime.py index 606022df..98816aaf 100644 --- a/app/core/meta/metaanime.py +++ b/app/core/meta/metaanime.py @@ -2,7 +2,7 @@ import re import zhconv import anitopy from app.core.meta.metabase import MetaBase -from app.core.meta.release_groups import ReleaseGroupsMatcher +from app.core.meta.releasegroup import ReleaseGroupsMatcher from app.utils.string import StringUtils from app.schemas.types import MediaType diff --git a/app/core/meta/metabase.py b/app/core/meta/metabase.py index 6721e218..a4440256 100644 --- a/app/core/meta/metabase.py +++ b/app/core/meta/metabase.py @@ -15,7 +15,9 @@ class MetaBase(object): """ # 是否处理的文件 isfile: bool = False - # 原字符串 + # 原标题字符串 + title: str = "" + # 识别用字符串 org_string: Optional[str] = None # 副标题 subtitle: Optional[str] = None @@ -53,6 +55,8 @@ class MetaBase(object): video_encode: Optional[str] = None # 音频编码 audio_encode: Optional[str] = None + # 应用的识别词信息 + apply_words: Optional[List[str]] = None # 副标题解析 _subtitle_flag = False diff --git a/app/core/meta/metavideo.py b/app/core/meta/metavideo.py index 8ce94706..fa2feddb 100644 --- a/app/core/meta/metavideo.py +++ b/app/core/meta/metavideo.py @@ -3,7 +3,7 @@ from pathlib import Path from app.core.config import settings from app.core.meta.metabase import MetaBase -from app.core.meta.release_groups import ReleaseGroupsMatcher +from app.core.meta.releasegroup import ReleaseGroupsMatcher from app.utils.string import StringUtils from app.utils.tokens import Tokens from app.schemas.types import MediaType diff --git a/app/core/meta/release_groups.py b/app/core/meta/releasegroup.py similarity index 86% rename from app/core/meta/release_groups.py rename to app/core/meta/releasegroup.py index b360d3d4..0d49d709 100644 --- a/app/core/meta/release_groups.py +++ b/app/core/meta/releasegroup.py @@ -1,5 +1,7 @@ import regex as re +from app.db.systemconfig_oper import SystemConfigOper +from app.schemas.types import SystemConfigKey from app.utils.singleton import Singleton @@ -8,8 +10,7 @@ class ReleaseGroupsMatcher(metaclass=Singleton): 识别制作组、字幕组 """ __release_groups: str = None - custom_release_groups: str = None - custom_separator: str = None + # 内置组 RELEASE_GROUPS: dict = { "0ff": ['FF(?:(?:A|WE)B|CD|E(?:DU|B)|TV)'], "1pt": [], @@ -74,6 +75,7 @@ class ReleaseGroupsMatcher(metaclass=Singleton): } def __init__(self): + self.systemconfig = SystemConfigOper() release_groups = [] for site_groups in self.RELEASE_GROUPS.values(): for release_group in site_groups: @@ -89,8 +91,10 @@ class ReleaseGroupsMatcher(metaclass=Singleton): if not title: return "" if not groups: - if self.custom_release_groups: - groups = f"{self.__release_groups}|{self.custom_release_groups}" + # 自定义组 + custom_release_groups = self.systemconfig.get(SystemConfigKey.CustomReleaseGroups) + if custom_release_groups: + groups = f"{self.__release_groups}|{custom_release_groups}" else: groups = self.__release_groups title = f"{title} " @@ -100,12 +104,4 @@ class ReleaseGroupsMatcher(metaclass=Singleton): for item in re.findall(groups_re, title): if item not in unique_groups: unique_groups.append(item) - separator = self.custom_separator or "@" - return separator.join(unique_groups) - - def update_custom(self, release_groups: str = None, separator: str = None): - """ - 更新自定义制作组/字幕组,自定义分隔符 - """ - self.custom_release_groups = release_groups - self.custom_separator = separator + return "@".join(unique_groups) diff --git a/app/core/meta/words.py b/app/core/meta/words.py new file mode 100644 index 00000000..b3480ceb --- /dev/null +++ b/app/core/meta/words.py @@ -0,0 +1,118 @@ +from typing import List, Tuple + +import cn2an +import regex as re + +from app.db.systemconfig_oper import SystemConfigOper +from app.log import logger +from app.schemas.types import SystemConfigKey +from app.utils.singleton import Singleton + + +class WordsMatcher(metaclass=Singleton): + + def __init__(self): + self.systemconfig = SystemConfigOper() + + def prepare(self, title: str) -> Tuple[str, List[str]]: + """ + 预处理标题,支持三种格式 + 1:屏蔽词 + 2:被替换词 => 替换词 + 3:前定位词 <> 后定位词 >> 偏移量(EP) + """ + appley_words = [] + # 读取自定义识别词 + words: List[str] = self.systemconfig.get(SystemConfigKey.CustomIdentifiers) or [] + for word in words: + if not word: + continue + try: + if word.count(" => "): + # 替换词 + strings = word.split(" => ") + title, message, state = self.__replace_regex(title, strings[0], strings[1]) + elif word.count(" >> ") and word.count(" <> "): + # 集偏移 + strings = word.split(" <> ") + offsets = strings[1].split(" >> ") + title, message, state = self.__episode_offset(title, strings[0], strings[1], + offsets[1]) + else: + # 屏蔽词 + title, message, state = self.__replace_regex(title, word, "") + + if state: + appley_words.append(word) + else: + logger.error(f"自定义识别词替换失败:{message}") + except Exception as err: + print(str(err)) + + return title, appley_words + + @staticmethod + def __replace_regex(title: str, replaced: str, replace: str) -> Tuple[str, str, bool]: + """ + 正则替换 + """ + try: + if not re.findall(r'%s' % replaced, title): + return title, "", False + else: + return re.sub(r'%s' % replaced, r'%s' % replace, title), "", True + except Exception as err: + print(str(err)) + return title, str(err), False + + @staticmethod + def __episode_offset(title: str, front: str, back: str, offset: str) -> Tuple[str, str, bool]: + """ + 集数偏移 + """ + try: + if back and not re.findall(r'%s' % back, title): + return title, "", False + if front and not re.findall(r'%s' % front, title): + return title, "", False + offset_word_info_re = re.compile(r'(?<=%s.*?)[0-9一二三四五六七八九十]+(?=.*?%s)' % (front, back)) + episode_nums_str = re.findall(offset_word_info_re, title) + if not episode_nums_str: + return title, "", False + episode_nums_offset_str = [] + offset_order_flag = False + for episode_num_str in episode_nums_str: + episode_num_int = int(cn2an.cn2an(episode_num_str, "smart")) + offset_caculate = offset.replace("EP", str(episode_num_int)) + episode_num_offset_int = int(eval(offset_caculate)) + # 向前偏移 + if episode_num_int > episode_num_offset_int: + offset_order_flag = True + # 向后偏移 + elif episode_num_int < episode_num_offset_int: + offset_order_flag = False + # 原值是中文数字,转换回中文数字,阿拉伯数字则还原0的填充 + if not episode_num_str.isdigit(): + episode_num_offset_str = cn2an.an2cn(episode_num_offset_int, "low") + else: + count_0 = re.findall(r"^0+", episode_num_str) + if count_0: + episode_num_offset_str = f"{count_0[0]}{episode_num_offset_int}" + else: + episode_num_offset_str = str(episode_num_offset_int) + episode_nums_offset_str.append(episode_num_offset_str) + episode_nums_dict = dict(zip(episode_nums_str, episode_nums_offset_str)) + # 集数向前偏移,集数按升序处理 + if offset_order_flag: + episode_nums_list = sorted(episode_nums_dict.items(), key=lambda x: x[1]) + # 集数向后偏移,集数按降序处理 + else: + episode_nums_list = sorted(episode_nums_dict.items(), key=lambda x: x[1], reverse=True) + for episode_num in episode_nums_list: + episode_offset_re = re.compile( + r'(?<=%s.*?)%s(?=.*?%s)' % (front, episode_num[0], back)) + title = re.sub(episode_offset_re, r'%s' % episode_num[1], title) + return title, "", True + except Exception as err: + print(str(err)) + return title, str(err), False diff --git a/app/core/metainfo.py b/app/core/metainfo.py index 331e677a..6a583b92 100644 --- a/app/core/metainfo.py +++ b/app/core/metainfo.py @@ -3,27 +3,37 @@ from pathlib import Path import regex as re from app.core.config import settings -from app.core.meta import MetaAnime, MetaVideo +from app.core.meta import MetaAnime, MetaVideo, MetaBase +from app.core.meta.words import WordsMatcher -def MetaInfo(title: str, subtitle: str = None): +def MetaInfo(title: str, subtitle: str = None) -> MetaBase: """ 媒体整理入口,根据名称和副标题,判断是哪种类型的识别,返回对应对象 :param title: 标题、种子名、文件名 :param subtitle: 副标题、描述 :return: MetaAnime、MetaVideo """ - + # 原标题 + org_title = title + # 预处理标题 + title, apply_words = WordsMatcher().prepare(title) # 判断是否处理文件 if title and Path(title).suffix.lower() in settings.RMT_MEDIAEXT: isfile = True else: isfile = False + # 识别 + meta = MetaAnime(title, subtitle, isfile) if is_anime(title) else MetaVideo(title, subtitle, isfile) + # 记录原标题 + meta.title = org_title + # 记录使用的识别词 + meta.apply_words = apply_words or [] - return MetaAnime(title, subtitle, isfile) if is_anime(title) else MetaVideo(title, subtitle, isfile) + return meta -def is_anime(name: str): +def is_anime(name: str) -> bool: """ 判断是否为动漫 :param name: 名称 diff --git a/app/modules/words/__init__.py b/app/modules/words/__init__.py deleted file mode 100644 index 0fb2cd8a..00000000 --- a/app/modules/words/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -from typing import Tuple, Union - -from app.modules import _ModuleBase - - -class WordsModule(_ModuleBase): - """ - 字幕下载模块 - """ - - def init_module(self) -> None: - pass - - def init_setting(self) -> Tuple[str, Union[str, bool]]: - pass - - def stop(self) -> None: - pass - - def prepare_recognize(self, title: str, - subtitle: str = None) -> Tuple[str, str]: - """ - 处理各类特别命名,以便识别 - :param title: 标题 - :param subtitle: 副标题 - :return: 处理后的标题、副标题,该方法可被多个模块同时处理 - """ - pass diff --git a/app/schemas/types.py b/app/schemas/types.py index 6dd360b0..a37c58ec 100644 --- a/app/schemas/types.py +++ b/app/schemas/types.py @@ -42,6 +42,10 @@ class SystemConfigKey(Enum): TorrentsPriority = "TorrentsPriority" # 通知消息渠道设置 NotificationChannels = "NotificationChannels" + # 自定义制作组/字幕组 + CustomReleaseGroups = "CustomReleaseGroups" + # 自定义识别词 + CustomIdentifiers = "CustomIdentifiers" # 站点框架