Merge pull request #168 from zuyu/regex-fix

Fixed regex SyntaxWarnings
2026-01-12 00:06:33 +08:00 · 2024-06-30 17:16:55 +08:00
parent bc2a3e9539 3e08da6beb
commit ab4c6e1aa2
3 changed files with 41 additions and 43 deletions
--- a/uilib/utils.py
+++ b/uilib/utils.py
@@ -31,7 +31,7 @@ def get_parameter(request, param, default, cast_type):

 # 数字转为英文读法
 def num_to_english(num):
-    
+
    num_str = str(num)
    # English representations for numbers 0-9
    english_digits = ["zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"]
@@ -41,29 +41,29 @@ def num_to_english(num):
    need_and = False  # Indicates whether 'and' needs to be added
    part = []  # Stores each group of 4 digits
    is_first_part = True  # Indicates if it is the first part for not adding 'and' at the beginning
-    
+
    # Split the number into 3-digit groups
    while num_str:
        part.append(num_str[-3:])
        num_str = num_str[:-3]
-    
+
    part.reverse()
-    
+
    for i, p in enumerate(part):
        p_str = ""
        digit_len = len(p)
        if int(p) == 0 and i < len(part) - 1:
            continue
-        
+
        hundreds_digit = int(p) // 100 if digit_len == 3 else None
        tens_digit = int(p) % 100 if digit_len >= 2 else int(p[0] if digit_len == 1 else p[1])
-        
+
        # Process hundreds
        if hundreds_digit is not None and hundreds_digit != 0:
            p_str += english_digits[hundreds_digit] + " hundred"
            if tens_digit != 0:
                p_str += " and "
-        
+
        # Process tens and ones
        if 10 < tens_digit < 20:  # Teens exception
            teen_map = {
@@ -79,17 +79,17 @@ def num_to_english(num):
                p_str += tens_map[tens_val] + (" " + english_digits[ones_val] if ones_val != 0 else "")
            elif tens_digit != 0 and tens_val < 2:  # When tens_digit is in [1, 9]
                p_str += english_digits[tens_digit]
-        
+
        if p_str and not is_first_part and need_and:
            result += " and "
        result += p_str
        if i < len(part) - 1 and int(p) != 0:
            result += " " + big_units[len(part) - i - 1] + ", "
-        
+
        is_first_part = False
        if int(p) != 0:
            need_and = True
-    
+
    return result.capitalize()


@@ -120,8 +120,8 @@ def num2text(text):
    text = re.sub(r'((?:\d+\.)?\d+)\s*/\s*(\d+)', fraction_to_words, text)

    # 取出数字 number_list= [('1000200030004000.123', '1000200030004000', '123'), ('23425', '23425', '')]
-    number_list=re.findall('((\d+)(?:\.(\d+))?%?)',text)
-    if len(number_list)>0:            
+    number_list=re.findall(r'((\d+)(?:\.(\d+))?%?)', text)
+    if len(number_list)>0:
        #dc= ('1000200030004000.123', '1000200030004000', '123','')
        for m,dc in enumerate(number_list):
            if len(dc[1])>16:
@@ -133,14 +133,14 @@ def num2text(text):
                int_text=f' the pronunciation of  {int_text}'
            text=text.replace(dc[0],int_text)

-        
+
    return text.replace('1',' one ').replace('2',' two ').replace('3',' three ').replace('4',' four ').replace('5',' five ').replace('6',' six ').replace('7','seven').replace('8',' eight ').replace('9',' nine ').replace('0',' zero ').replace('=',' equals ')



 # 中英文数字转换为文字，特殊符号处理
 def split_text(text_list):
-    
+
    tx = TextNormalizer()
    haserror=False
    result=[]
@@ -177,12 +177,12 @@ def split_text_by_punctuation(text):
    min_length = 150
    punctuation_marks = "。？！，、；：”’》」』）】…—"
    english_punctuation = ".?!,:;)}…"
-    
+
    # 结果列表
    result = []
    # 起始位置
    pos = 0
-    
+
    # 遍历文本中的每个字符
    text_length=len(text)
    for i, char in enumerate(text):
@@ -196,11 +196,11 @@ def split_text_by_punctuation(text):
                # 更新起始位置到当前标点的下一个字符
                pos = i+1
    #print(f'{pos=},{len(text)=}')
-    
+
    # 如果剩余文本长度超过120或没有更多标点符号可以进行分割，将剩余的文本作为一个分段添加到结果列表
    if pos < len(text):
        result.append(text[pos:])
-    
+
    return result


@@ -223,17 +223,17 @@ def ClearWav(directory):
            print(f"文件删除错误 {file_path}, 报错信息: {e}")
            return False, str(e)
    return True, "所有wav文件已被删除."
-    
-# 保存音色    
+
+# 保存音色
 # 参考 https://github.com/craii/ChatTTS_WebUI/blob/main/utils.py
-def save_speaker(name, tensor):   
+def save_speaker(name, tensor):
    try:
        df = pd.DataFrame({"speaker": [float(i) for i in tensor]})
        df.to_csv(f"{SPEAKER_DIR}/{name}.csv", index=False, header=False)
    except Exception as e:
        print(e)
-        
-        
+
+
 # 加载音色
 # 参考 https://github.com/craii/ChatTTS_WebUI/blob/main/utils.py
 def load_speaker(name):
@@ -311,4 +311,4 @@ def modelscope_status():
            return False
    except Exception as e:
        return False
-    return True
+    return True
--- a/uilib/zh_normalization/quantifier.py
+++ b/uilib/zh_normalization/quantifier.py
@@ -58,7 +58,6 @@ def replace_temperature(match) -> str:

 def replace_measure(sentence) -> str:
    for q_notation in measure_dict:
-        if q_notation in sentence and re.search(f'\d{q_notation}',sentence):
-        
+        if q_notation in sentence and re.search(r'\d{q_notation}', sentence):
            sentence = sentence.replace(q_notation, measure_dict[q_notation])
    return sentence
--- a/uilib/zh_normalization/text_normlization.py
+++ b/uilib/zh_normalization/text_normlization.py
@@ -60,7 +60,7 @@ class TextNormalizer():
            text (str): The input text.
        Returns:
            List[str]: Sentences.
-            
+
            character_map = {
    "：": "，",
    "；": "，",
@@ -105,8 +105,8 @@ class TextNormalizer():
        return sentences

    def _post_replace(self, sentence: str) -> str:
-        
-    
+
+
        #sentence = sentence.replace('/', '每')
        sentence = sentence.replace('~', '至')
        sentence = sentence.replace('～', '至')
@@ -146,8 +146,8 @@ class TextNormalizer():
        sentence = sentence.replace('ψ', '普赛').replace('Ψ', '普赛')
        sentence = sentence.replace('ω', '欧米伽').replace('Ω', '欧米伽')
        sentence = sentence.replace('+', '加')
-        
-        
+
+
        # re filter special characters, have one more character "-" than line 68
        sentence = re.sub(r'[-——《》【】<=>{}()（）#&@“”^|…\\]', '', sentence)
        return sentence
@@ -161,12 +161,12 @@ class TextNormalizer():
        result = ""
        zero_flag = False  # 标记是否需要加'零'
        part = []  # 存储每4位的数字
-        
+
        # 将数字按每4位分组
        while num_str:
            part.append(num_str[-4:])
            num_str = num_str[:-4]
-        
+
        for i in range(len(part)):
            part_str = ""
            part_zero_flag = False
@@ -184,21 +184,21 @@ class TextNormalizer():
                part_str = part_str[:-1]  # 去除尾部的'零'
            if part_str:
                zero_flag = True
-            
+
            if i > 0 and not set(part[i]) <= {'0'}:  # 如果当前部分不全是0，则加上相应的大单位
                result = part_str + big_units[i] + result
            else:
                result = part_str + result
-        
+
        # 处理输入为0的情况或者去掉开头的零
        result = result.lstrip(chinese_digits[0])
        if not result:
            return chinese_digits[0]
-        
+
        return result

    def normalize_sentence(self, sentence: str) -> str:
-        
+
        # basic character conversions
        # add
        sentence = re.sub(r'(\d+)\s*[\*xX]\s*(\d+)', r'\1 乘 \2', sentence,re.I)
@@ -207,12 +207,12 @@ class TextNormalizer():
        sentence = re.sub(r'(0\d+)\-(\d{3,})', r'\1杠\2', sentence,re.I)
        sentence = sentence.replace('=', '等于')
        sentence = sentence.replace('÷','除以')
-        
+
        #sentence = re.sub(r'(\d+)\s*\-', r'\1 减', sentence)
        sentence = re.sub(r'((?:\d+\.)?\d+)\s*/\s*(\d+)', r'\2分之\1', sentence)
-        
+
        # 取出数字 number_list= [('1000200030004000.123', '1000200030004000', '123'), ('23425', '23425', '')]
-        number_list=re.findall('((\d+)(?:\.(\d+))?%?)',sentence)
+        number_list=re.findall(r'((\d+)(?:\.(\d+))?%?)', sentence)
        numtext=['零','一','二','三','四','五','六','七','八','九']
        if len(number_list)>0:
            #dc= ('1000200030004000.123', '1000200030004000', '123','')
@@ -227,8 +227,7 @@ class TextNormalizer():
                if dc[0][-1]=='%':
                    int_text=f'百分之{int_text}'
                sentence=sentence.replace(dc[0],int_text)
-        
-        
+
        sentence = tranditional_to_simplified(sentence)
        sentence = sentence.translate(F2H_ASCII_LETTERS).translate(
            F2H_DIGITS).translate(F2H_SPACE)
@@ -258,7 +257,7 @@ class TextNormalizer():
        sentence = RE_DEFAULT_NUM.sub(replace_default_num, sentence)
        sentence = RE_NUMBER.sub(replace_number, sentence)
        sentence = self._post_replace(sentence)
-        
+
        sentence = sentence.replace('[一break]','[1break]')

        return sentence