feat 支持自定义词表
This commit is contained in:
parent
5f613874db
commit
2b9c4b081e
@ -151,6 +151,7 @@ class MessageChain(ChainBase):
|
|||||||
f"{self._current_meta.sea} 媒体库中已存在",
|
f"{self._current_meta.sea} 媒体库中已存在",
|
||||||
userid=userid))
|
userid=userid))
|
||||||
return
|
return
|
||||||
|
# 添加订阅,状态为N
|
||||||
self.subscribechain.add(title=mediainfo.title,
|
self.subscribechain.add(title=mediainfo.title,
|
||||||
year=mediainfo.year,
|
year=mediainfo.year,
|
||||||
mtype=mediainfo.type,
|
mtype=mediainfo.type,
|
||||||
@ -182,7 +183,7 @@ class MessageChain(ChainBase):
|
|||||||
else:
|
else:
|
||||||
# 未完成下载
|
# 未完成下载
|
||||||
logger.info(f'{self._current_media.title_year} 未下载未完整,添加订阅 ...')
|
logger.info(f'{self._current_media.title_year} 未下载未完整,添加订阅 ...')
|
||||||
# 添加订阅
|
# 添加订阅,状态为R
|
||||||
self.subscribechain.add(title=self._current_media.title,
|
self.subscribechain.add(title=self._current_media.title,
|
||||||
year=self._current_media.year,
|
year=self._current_media.year,
|
||||||
mtype=self._current_media.type,
|
mtype=self._current_media.type,
|
||||||
@ -190,7 +191,8 @@ class MessageChain(ChainBase):
|
|||||||
season=self._current_meta.begin_season,
|
season=self._current_meta.begin_season,
|
||||||
channel=channel,
|
channel=channel,
|
||||||
userid=userid,
|
userid=userid,
|
||||||
username=username)
|
username=username,
|
||||||
|
state="R")
|
||||||
else:
|
else:
|
||||||
# 下载种子
|
# 下载种子
|
||||||
context: Context = cache_list[int(text) - 1]
|
context: Context = cache_list[int(text) - 1]
|
||||||
@ -203,13 +205,13 @@ class MessageChain(ChainBase):
|
|||||||
if not cache_data:
|
if not cache_data:
|
||||||
# 没有缓存
|
# 没有缓存
|
||||||
self.post_message(Notification(
|
self.post_message(Notification(
|
||||||
channel=channel, title="输入有误!", userid=userid))
|
channel=channel, title="输入有误!", userid=userid))
|
||||||
return
|
return
|
||||||
|
|
||||||
if self._current_page == 0:
|
if self._current_page == 0:
|
||||||
# 第一页
|
# 第一页
|
||||||
self.post_message(Notification(
|
self.post_message(Notification(
|
||||||
channel=channel, title="已经是第一页了!", userid=userid))
|
channel=channel, title="已经是第一页了!", userid=userid))
|
||||||
return
|
return
|
||||||
cache_type: str = cache_data.get('type')
|
cache_type: str = cache_data.get('type')
|
||||||
cache_list: list = cache_data.get('items')
|
cache_list: list = cache_data.get('items')
|
||||||
@ -242,7 +244,7 @@ class MessageChain(ChainBase):
|
|||||||
if not cache_data:
|
if not cache_data:
|
||||||
# 没有缓存
|
# 没有缓存
|
||||||
self.post_message(Notification(
|
self.post_message(Notification(
|
||||||
channel=channel, title="输入有误!", userid=userid))
|
channel=channel, title="输入有误!", userid=userid))
|
||||||
return
|
return
|
||||||
cache_type: str = cache_data.get('type')
|
cache_type: str = cache_data.get('type')
|
||||||
cache_list: list = cache_data.get('items')
|
cache_list: list = cache_data.get('items')
|
||||||
@ -253,7 +255,7 @@ class MessageChain(ChainBase):
|
|||||||
if not cache_list:
|
if not cache_list:
|
||||||
# 没有数据
|
# 没有数据
|
||||||
self.post_message(Notification(
|
self.post_message(Notification(
|
||||||
channel=channel, title="已经是最后一页了!", userid=userid))
|
channel=channel, title="已经是最后一页了!", userid=userid))
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
if cache_type == "Torrent":
|
if cache_type == "Torrent":
|
||||||
@ -282,12 +284,12 @@ class MessageChain(ChainBase):
|
|||||||
# 识别
|
# 识别
|
||||||
if not meta.name:
|
if not meta.name:
|
||||||
self.post_message(Notification(
|
self.post_message(Notification(
|
||||||
channel=channel, title="无法识别输入内容!", userid=userid))
|
channel=channel, title="无法识别输入内容!", userid=userid))
|
||||||
return
|
return
|
||||||
# 开始搜索
|
# 开始搜索
|
||||||
if not medias:
|
if not medias:
|
||||||
self.post_message(Notification(
|
self.post_message(Notification(
|
||||||
channel=channel, title=f"{meta.name} 没有找到对应的媒体信息!", userid=userid))
|
channel=channel, title=f"{meta.name} 没有找到对应的媒体信息!", userid=userid))
|
||||||
return
|
return
|
||||||
logger.info(f"搜索到 {len(medias)} 条相关媒体信息")
|
logger.info(f"搜索到 {len(medias)} 条相关媒体信息")
|
||||||
# 记录当前状态
|
# 记录当前状态
|
||||||
|
@ -2,7 +2,7 @@ import re
|
|||||||
import zhconv
|
import zhconv
|
||||||
import anitopy
|
import anitopy
|
||||||
from app.core.meta.metabase import MetaBase
|
from app.core.meta.metabase import MetaBase
|
||||||
from app.core.meta.release_groups import ReleaseGroupsMatcher
|
from app.core.meta.releasegroup import ReleaseGroupsMatcher
|
||||||
from app.utils.string import StringUtils
|
from app.utils.string import StringUtils
|
||||||
from app.schemas.types import MediaType
|
from app.schemas.types import MediaType
|
||||||
|
|
||||||
|
@ -15,7 +15,9 @@ class MetaBase(object):
|
|||||||
"""
|
"""
|
||||||
# 是否处理的文件
|
# 是否处理的文件
|
||||||
isfile: bool = False
|
isfile: bool = False
|
||||||
# 原字符串
|
# 原标题字符串
|
||||||
|
title: str = ""
|
||||||
|
# 识别用字符串
|
||||||
org_string: Optional[str] = None
|
org_string: Optional[str] = None
|
||||||
# 副标题
|
# 副标题
|
||||||
subtitle: Optional[str] = None
|
subtitle: Optional[str] = None
|
||||||
@ -53,6 +55,8 @@ class MetaBase(object):
|
|||||||
video_encode: Optional[str] = None
|
video_encode: Optional[str] = None
|
||||||
# 音频编码
|
# 音频编码
|
||||||
audio_encode: Optional[str] = None
|
audio_encode: Optional[str] = None
|
||||||
|
# 应用的识别词信息
|
||||||
|
apply_words: Optional[List[str]] = None
|
||||||
|
|
||||||
# 副标题解析
|
# 副标题解析
|
||||||
_subtitle_flag = False
|
_subtitle_flag = False
|
||||||
|
@ -3,7 +3,7 @@ from pathlib import Path
|
|||||||
|
|
||||||
from app.core.config import settings
|
from app.core.config import settings
|
||||||
from app.core.meta.metabase import MetaBase
|
from app.core.meta.metabase import MetaBase
|
||||||
from app.core.meta.release_groups import ReleaseGroupsMatcher
|
from app.core.meta.releasegroup import ReleaseGroupsMatcher
|
||||||
from app.utils.string import StringUtils
|
from app.utils.string import StringUtils
|
||||||
from app.utils.tokens import Tokens
|
from app.utils.tokens import Tokens
|
||||||
from app.schemas.types import MediaType
|
from app.schemas.types import MediaType
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
import regex as re
|
import regex as re
|
||||||
|
|
||||||
|
from app.db.systemconfig_oper import SystemConfigOper
|
||||||
|
from app.schemas.types import SystemConfigKey
|
||||||
from app.utils.singleton import Singleton
|
from app.utils.singleton import Singleton
|
||||||
|
|
||||||
|
|
||||||
@ -8,8 +10,7 @@ class ReleaseGroupsMatcher(metaclass=Singleton):
|
|||||||
识别制作组、字幕组
|
识别制作组、字幕组
|
||||||
"""
|
"""
|
||||||
__release_groups: str = None
|
__release_groups: str = None
|
||||||
custom_release_groups: str = None
|
# 内置组
|
||||||
custom_separator: str = None
|
|
||||||
RELEASE_GROUPS: dict = {
|
RELEASE_GROUPS: dict = {
|
||||||
"0ff": ['FF(?:(?:A|WE)B|CD|E(?:DU|B)|TV)'],
|
"0ff": ['FF(?:(?:A|WE)B|CD|E(?:DU|B)|TV)'],
|
||||||
"1pt": [],
|
"1pt": [],
|
||||||
@ -74,6 +75,7 @@ class ReleaseGroupsMatcher(metaclass=Singleton):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
self.systemconfig = SystemConfigOper()
|
||||||
release_groups = []
|
release_groups = []
|
||||||
for site_groups in self.RELEASE_GROUPS.values():
|
for site_groups in self.RELEASE_GROUPS.values():
|
||||||
for release_group in site_groups:
|
for release_group in site_groups:
|
||||||
@ -89,8 +91,10 @@ class ReleaseGroupsMatcher(metaclass=Singleton):
|
|||||||
if not title:
|
if not title:
|
||||||
return ""
|
return ""
|
||||||
if not groups:
|
if not groups:
|
||||||
if self.custom_release_groups:
|
# 自定义组
|
||||||
groups = f"{self.__release_groups}|{self.custom_release_groups}"
|
custom_release_groups = self.systemconfig.get(SystemConfigKey.CustomReleaseGroups)
|
||||||
|
if custom_release_groups:
|
||||||
|
groups = f"{self.__release_groups}|{custom_release_groups}"
|
||||||
else:
|
else:
|
||||||
groups = self.__release_groups
|
groups = self.__release_groups
|
||||||
title = f"{title} "
|
title = f"{title} "
|
||||||
@ -100,12 +104,4 @@ class ReleaseGroupsMatcher(metaclass=Singleton):
|
|||||||
for item in re.findall(groups_re, title):
|
for item in re.findall(groups_re, title):
|
||||||
if item not in unique_groups:
|
if item not in unique_groups:
|
||||||
unique_groups.append(item)
|
unique_groups.append(item)
|
||||||
separator = self.custom_separator or "@"
|
return "@".join(unique_groups)
|
||||||
return separator.join(unique_groups)
|
|
||||||
|
|
||||||
def update_custom(self, release_groups: str = None, separator: str = None):
|
|
||||||
"""
|
|
||||||
更新自定义制作组/字幕组,自定义分隔符
|
|
||||||
"""
|
|
||||||
self.custom_release_groups = release_groups
|
|
||||||
self.custom_separator = separator
|
|
118
app/core/meta/words.py
Normal file
118
app/core/meta/words.py
Normal file
@ -0,0 +1,118 @@
|
|||||||
|
from typing import List, Tuple
|
||||||
|
|
||||||
|
import cn2an
|
||||||
|
import regex as re
|
||||||
|
|
||||||
|
from app.db.systemconfig_oper import SystemConfigOper
|
||||||
|
from app.log import logger
|
||||||
|
from app.schemas.types import SystemConfigKey
|
||||||
|
from app.utils.singleton import Singleton
|
||||||
|
|
||||||
|
|
||||||
|
class WordsMatcher(metaclass=Singleton):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.systemconfig = SystemConfigOper()
|
||||||
|
|
||||||
|
def prepare(self, title: str) -> Tuple[str, List[str]]:
|
||||||
|
"""
|
||||||
|
预处理标题,支持三种格式
|
||||||
|
1:屏蔽词
|
||||||
|
2:被替换词 => 替换词
|
||||||
|
3:前定位词 <> 后定位词 >> 偏移量(EP)
|
||||||
|
"""
|
||||||
|
appley_words = []
|
||||||
|
# 读取自定义识别词
|
||||||
|
words: List[str] = self.systemconfig.get(SystemConfigKey.CustomIdentifiers) or []
|
||||||
|
for word in words:
|
||||||
|
if not word:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
if word.count(" => "):
|
||||||
|
# 替换词
|
||||||
|
strings = word.split(" => ")
|
||||||
|
title, message, state = self.__replace_regex(title, strings[0], strings[1])
|
||||||
|
elif word.count(" >> ") and word.count(" <> "):
|
||||||
|
# 集偏移
|
||||||
|
strings = word.split(" <> ")
|
||||||
|
offsets = strings[1].split(" >> ")
|
||||||
|
title, message, state = self.__episode_offset(title, strings[0], strings[1],
|
||||||
|
offsets[1])
|
||||||
|
else:
|
||||||
|
# 屏蔽词
|
||||||
|
title, message, state = self.__replace_regex(title, word, "")
|
||||||
|
|
||||||
|
if state:
|
||||||
|
appley_words.append(word)
|
||||||
|
else:
|
||||||
|
logger.error(f"自定义识别词替换失败:{message}")
|
||||||
|
except Exception as err:
|
||||||
|
print(str(err))
|
||||||
|
|
||||||
|
return title, appley_words
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def __replace_regex(title: str, replaced: str, replace: str) -> Tuple[str, str, bool]:
|
||||||
|
"""
|
||||||
|
正则替换
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if not re.findall(r'%s' % replaced, title):
|
||||||
|
return title, "", False
|
||||||
|
else:
|
||||||
|
return re.sub(r'%s' % replaced, r'%s' % replace, title), "", True
|
||||||
|
except Exception as err:
|
||||||
|
print(str(err))
|
||||||
|
return title, str(err), False
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def __episode_offset(title: str, front: str, back: str, offset: str) -> Tuple[str, str, bool]:
|
||||||
|
"""
|
||||||
|
集数偏移
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if back and not re.findall(r'%s' % back, title):
|
||||||
|
return title, "", False
|
||||||
|
if front and not re.findall(r'%s' % front, title):
|
||||||
|
return title, "", False
|
||||||
|
offset_word_info_re = re.compile(r'(?<=%s.*?)[0-9一二三四五六七八九十]+(?=.*?%s)' % (front, back))
|
||||||
|
episode_nums_str = re.findall(offset_word_info_re, title)
|
||||||
|
if not episode_nums_str:
|
||||||
|
return title, "", False
|
||||||
|
episode_nums_offset_str = []
|
||||||
|
offset_order_flag = False
|
||||||
|
for episode_num_str in episode_nums_str:
|
||||||
|
episode_num_int = int(cn2an.cn2an(episode_num_str, "smart"))
|
||||||
|
offset_caculate = offset.replace("EP", str(episode_num_int))
|
||||||
|
episode_num_offset_int = int(eval(offset_caculate))
|
||||||
|
# 向前偏移
|
||||||
|
if episode_num_int > episode_num_offset_int:
|
||||||
|
offset_order_flag = True
|
||||||
|
# 向后偏移
|
||||||
|
elif episode_num_int < episode_num_offset_int:
|
||||||
|
offset_order_flag = False
|
||||||
|
# 原值是中文数字,转换回中文数字,阿拉伯数字则还原0的填充
|
||||||
|
if not episode_num_str.isdigit():
|
||||||
|
episode_num_offset_str = cn2an.an2cn(episode_num_offset_int, "low")
|
||||||
|
else:
|
||||||
|
count_0 = re.findall(r"^0+", episode_num_str)
|
||||||
|
if count_0:
|
||||||
|
episode_num_offset_str = f"{count_0[0]}{episode_num_offset_int}"
|
||||||
|
else:
|
||||||
|
episode_num_offset_str = str(episode_num_offset_int)
|
||||||
|
episode_nums_offset_str.append(episode_num_offset_str)
|
||||||
|
episode_nums_dict = dict(zip(episode_nums_str, episode_nums_offset_str))
|
||||||
|
# 集数向前偏移,集数按升序处理
|
||||||
|
if offset_order_flag:
|
||||||
|
episode_nums_list = sorted(episode_nums_dict.items(), key=lambda x: x[1])
|
||||||
|
# 集数向后偏移,集数按降序处理
|
||||||
|
else:
|
||||||
|
episode_nums_list = sorted(episode_nums_dict.items(), key=lambda x: x[1], reverse=True)
|
||||||
|
for episode_num in episode_nums_list:
|
||||||
|
episode_offset_re = re.compile(
|
||||||
|
r'(?<=%s.*?)%s(?=.*?%s)' % (front, episode_num[0], back))
|
||||||
|
title = re.sub(episode_offset_re, r'%s' % episode_num[1], title)
|
||||||
|
return title, "", True
|
||||||
|
except Exception as err:
|
||||||
|
print(str(err))
|
||||||
|
return title, str(err), False
|
@ -3,27 +3,37 @@ from pathlib import Path
|
|||||||
import regex as re
|
import regex as re
|
||||||
|
|
||||||
from app.core.config import settings
|
from app.core.config import settings
|
||||||
from app.core.meta import MetaAnime, MetaVideo
|
from app.core.meta import MetaAnime, MetaVideo, MetaBase
|
||||||
|
from app.core.meta.words import WordsMatcher
|
||||||
|
|
||||||
|
|
||||||
def MetaInfo(title: str, subtitle: str = None):
|
def MetaInfo(title: str, subtitle: str = None) -> MetaBase:
|
||||||
"""
|
"""
|
||||||
媒体整理入口,根据名称和副标题,判断是哪种类型的识别,返回对应对象
|
媒体整理入口,根据名称和副标题,判断是哪种类型的识别,返回对应对象
|
||||||
:param title: 标题、种子名、文件名
|
:param title: 标题、种子名、文件名
|
||||||
:param subtitle: 副标题、描述
|
:param subtitle: 副标题、描述
|
||||||
:return: MetaAnime、MetaVideo
|
:return: MetaAnime、MetaVideo
|
||||||
"""
|
"""
|
||||||
|
# 原标题
|
||||||
|
org_title = title
|
||||||
|
# 预处理标题
|
||||||
|
title, apply_words = WordsMatcher().prepare(title)
|
||||||
# 判断是否处理文件
|
# 判断是否处理文件
|
||||||
if title and Path(title).suffix.lower() in settings.RMT_MEDIAEXT:
|
if title and Path(title).suffix.lower() in settings.RMT_MEDIAEXT:
|
||||||
isfile = True
|
isfile = True
|
||||||
else:
|
else:
|
||||||
isfile = False
|
isfile = False
|
||||||
|
# 识别
|
||||||
|
meta = MetaAnime(title, subtitle, isfile) if is_anime(title) else MetaVideo(title, subtitle, isfile)
|
||||||
|
# 记录原标题
|
||||||
|
meta.title = org_title
|
||||||
|
# 记录使用的识别词
|
||||||
|
meta.apply_words = apply_words or []
|
||||||
|
|
||||||
return MetaAnime(title, subtitle, isfile) if is_anime(title) else MetaVideo(title, subtitle, isfile)
|
return meta
|
||||||
|
|
||||||
|
|
||||||
def is_anime(name: str):
|
def is_anime(name: str) -> bool:
|
||||||
"""
|
"""
|
||||||
判断是否为动漫
|
判断是否为动漫
|
||||||
:param name: 名称
|
:param name: 名称
|
||||||
|
@ -1,28 +0,0 @@
|
|||||||
from typing import Tuple, Union
|
|
||||||
|
|
||||||
from app.modules import _ModuleBase
|
|
||||||
|
|
||||||
|
|
||||||
class WordsModule(_ModuleBase):
|
|
||||||
"""
|
|
||||||
字幕下载模块
|
|
||||||
"""
|
|
||||||
|
|
||||||
def init_module(self) -> None:
|
|
||||||
pass
|
|
||||||
|
|
||||||
def init_setting(self) -> Tuple[str, Union[str, bool]]:
|
|
||||||
pass
|
|
||||||
|
|
||||||
def stop(self) -> None:
|
|
||||||
pass
|
|
||||||
|
|
||||||
def prepare_recognize(self, title: str,
|
|
||||||
subtitle: str = None) -> Tuple[str, str]:
|
|
||||||
"""
|
|
||||||
处理各类特别命名,以便识别
|
|
||||||
:param title: 标题
|
|
||||||
:param subtitle: 副标题
|
|
||||||
:return: 处理后的标题、副标题,该方法可被多个模块同时处理
|
|
||||||
"""
|
|
||||||
pass
|
|
@ -42,6 +42,10 @@ class SystemConfigKey(Enum):
|
|||||||
TorrentsPriority = "TorrentsPriority"
|
TorrentsPriority = "TorrentsPriority"
|
||||||
# 通知消息渠道设置
|
# 通知消息渠道设置
|
||||||
NotificationChannels = "NotificationChannels"
|
NotificationChannels = "NotificationChannels"
|
||||||
|
# 自定义制作组/字幕组
|
||||||
|
CustomReleaseGroups = "CustomReleaseGroups"
|
||||||
|
# 自定义识别词
|
||||||
|
CustomIdentifiers = "CustomIdentifiers"
|
||||||
|
|
||||||
|
|
||||||
# 站点框架
|
# 站点框架
|
||||||
|
Loading…
x
Reference in New Issue
Block a user