This commit is contained in:
jxxghp
2023-08-29 12:22:14 +08:00
parent d0a92531ac
commit c138cda735
2 changed files with 23 additions and 13 deletions

View File

@ -270,7 +270,7 @@ class MessageChain(ChainBase):
elif text.startswith("#") \ elif text.startswith("#") \
or re.search(r"^请[问帮你]", text) \ or re.search(r"^请[问帮你]", text) \
or re.search(r"[?]$", text) \ or re.search(r"[?]$", text) \
or StringUtils.count_words(text) > 10 \ or StringUtils.count_words(text) > 15 \
or text.find("继续") != -1: or text.find("继续") != -1:
# 聊天 # 聊天
content = text content = text

View File

@ -418,21 +418,31 @@ class StringUtils:
return curr + format(amount, ",") return curr + format(amount, ",")
@staticmethod @staticmethod
def count_words(s: str) -> int: def count_words(text: str) -> int:
""" """
计算字符串中包含的单词数量,只适用于简单的单行文本 计算字符串中包含的单词或汉字的数量,需要兼容中英文混合的情况
:param s: 要计算的字符串 :param text: 要计算的字符串
:return: 字符串中包含的词数量 :return: 字符串中包含的词数量
""" """
# 匹配英文单词 if not text:
if re.match(r'^[A-Za-z0-9\s]+$', s): return 0
# 如果是英文字符串,则按空格分隔单词,并计算单词数量 # 使用正则表达式匹配汉字和英文单词
num_words = len(s.split()) chinese_pattern = '[\u4e00-\u9fa5]'
else: english_pattern = '[a-zA-Z]+'
# 如果不是英文字符串,则计算字符数量
num_words = len(s)
return num_words # 匹配汉字和英文单词
chinese_matches = re.findall(chinese_pattern, text)
english_matches = re.findall(english_pattern, text)
# 过滤掉空格和数字
chinese_words = [word for word in chinese_matches if word.isalpha()]
english_words = [word for word in english_matches if word.isalpha()]
# 计算汉字和英文单词的数量
chinese_count = len(chinese_words)
english_count = len(english_words)
return chinese_count + english_count
@staticmethod @staticmethod
def split_text(text: str, max_length: int) -> Generator: def split_text(text: str, max_length: int) -> Generator: