feat 支持自定义词表
This commit is contained in:
118
app/core/meta/words.py
Normal file
118
app/core/meta/words.py
Normal file
@ -0,0 +1,118 @@
|
||||
from typing import List, Tuple
|
||||
|
||||
import cn2an
|
||||
import regex as re
|
||||
|
||||
from app.db.systemconfig_oper import SystemConfigOper
|
||||
from app.log import logger
|
||||
from app.schemas.types import SystemConfigKey
|
||||
from app.utils.singleton import Singleton
|
||||
|
||||
|
||||
class WordsMatcher(metaclass=Singleton):
|
||||
|
||||
def __init__(self):
|
||||
self.systemconfig = SystemConfigOper()
|
||||
|
||||
def prepare(self, title: str) -> Tuple[str, List[str]]:
|
||||
"""
|
||||
预处理标题,支持三种格式
|
||||
1:屏蔽词
|
||||
2:被替换词 => 替换词
|
||||
3:前定位词 <> 后定位词 >> 偏移量(EP)
|
||||
"""
|
||||
appley_words = []
|
||||
# 读取自定义识别词
|
||||
words: List[str] = self.systemconfig.get(SystemConfigKey.CustomIdentifiers) or []
|
||||
for word in words:
|
||||
if not word:
|
||||
continue
|
||||
try:
|
||||
if word.count(" => "):
|
||||
# 替换词
|
||||
strings = word.split(" => ")
|
||||
title, message, state = self.__replace_regex(title, strings[0], strings[1])
|
||||
elif word.count(" >> ") and word.count(" <> "):
|
||||
# 集偏移
|
||||
strings = word.split(" <> ")
|
||||
offsets = strings[1].split(" >> ")
|
||||
title, message, state = self.__episode_offset(title, strings[0], strings[1],
|
||||
offsets[1])
|
||||
else:
|
||||
# 屏蔽词
|
||||
title, message, state = self.__replace_regex(title, word, "")
|
||||
|
||||
if state:
|
||||
appley_words.append(word)
|
||||
else:
|
||||
logger.error(f"自定义识别词替换失败:{message}")
|
||||
except Exception as err:
|
||||
print(str(err))
|
||||
|
||||
return title, appley_words
|
||||
|
||||
@staticmethod
|
||||
def __replace_regex(title: str, replaced: str, replace: str) -> Tuple[str, str, bool]:
|
||||
"""
|
||||
正则替换
|
||||
"""
|
||||
try:
|
||||
if not re.findall(r'%s' % replaced, title):
|
||||
return title, "", False
|
||||
else:
|
||||
return re.sub(r'%s' % replaced, r'%s' % replace, title), "", True
|
||||
except Exception as err:
|
||||
print(str(err))
|
||||
return title, str(err), False
|
||||
|
||||
@staticmethod
|
||||
def __episode_offset(title: str, front: str, back: str, offset: str) -> Tuple[str, str, bool]:
|
||||
"""
|
||||
集数偏移
|
||||
"""
|
||||
try:
|
||||
if back and not re.findall(r'%s' % back, title):
|
||||
return title, "", False
|
||||
if front and not re.findall(r'%s' % front, title):
|
||||
return title, "", False
|
||||
offset_word_info_re = re.compile(r'(?<=%s.*?)[0-9一二三四五六七八九十]+(?=.*?%s)' % (front, back))
|
||||
episode_nums_str = re.findall(offset_word_info_re, title)
|
||||
if not episode_nums_str:
|
||||
return title, "", False
|
||||
episode_nums_offset_str = []
|
||||
offset_order_flag = False
|
||||
for episode_num_str in episode_nums_str:
|
||||
episode_num_int = int(cn2an.cn2an(episode_num_str, "smart"))
|
||||
offset_caculate = offset.replace("EP", str(episode_num_int))
|
||||
episode_num_offset_int = int(eval(offset_caculate))
|
||||
# 向前偏移
|
||||
if episode_num_int > episode_num_offset_int:
|
||||
offset_order_flag = True
|
||||
# 向后偏移
|
||||
elif episode_num_int < episode_num_offset_int:
|
||||
offset_order_flag = False
|
||||
# 原值是中文数字,转换回中文数字,阿拉伯数字则还原0的填充
|
||||
if not episode_num_str.isdigit():
|
||||
episode_num_offset_str = cn2an.an2cn(episode_num_offset_int, "low")
|
||||
else:
|
||||
count_0 = re.findall(r"^0+", episode_num_str)
|
||||
if count_0:
|
||||
episode_num_offset_str = f"{count_0[0]}{episode_num_offset_int}"
|
||||
else:
|
||||
episode_num_offset_str = str(episode_num_offset_int)
|
||||
episode_nums_offset_str.append(episode_num_offset_str)
|
||||
episode_nums_dict = dict(zip(episode_nums_str, episode_nums_offset_str))
|
||||
# 集数向前偏移,集数按升序处理
|
||||
if offset_order_flag:
|
||||
episode_nums_list = sorted(episode_nums_dict.items(), key=lambda x: x[1])
|
||||
# 集数向后偏移,集数按降序处理
|
||||
else:
|
||||
episode_nums_list = sorted(episode_nums_dict.items(), key=lambda x: x[1], reverse=True)
|
||||
for episode_num in episode_nums_list:
|
||||
episode_offset_re = re.compile(
|
||||
r'(?<=%s.*?)%s(?=.*?%s)' % (front, episode_num[0], back))
|
||||
title = re.sub(episode_offset_re, r'%s' % episode_num[1], title)
|
||||
return title, "", True
|
||||
except Exception as err:
|
||||
print(str(err))
|
||||
return title, str(err), False
|
Reference in New Issue
Block a user