Method normalize

indextts/utils/front.py:113–143 · view source on GitHub ↗

(self, text: str)

Source from the content-addressed store, hash-verified

111	self.en_normalizer = NormalizerEn(overwrite_cache=False)
112
113	def normalize(self, text: str) -> str:
114	text = text.replace("嗯", "恩").replace("呣", "母")
115	if not self.zh_normalizer or not self.en_normalizer:
116	print("Error, text normalizer is not initialized !!!")
117	return ""
118	if self.use_chinese(text):
119	text = re.sub(TextNormalizer.ENGLISH_CONTRACTION_PATTERN, r"\1 is", text, flags=re.IGNORECASE)
120	replaced_text, pinyin_list = self.save_pinyin_tones(text.rstrip())
121
122	replaced_text, original_name_list = self.save_names(replaced_text)
123	try:
124	result = self.zh_normalizer.normalize(replaced_text)
125	except Exception:
126	result = ""
127	print(traceback.format_exc())
128	# 恢复人名
129	result = self.restore_names(result, original_name_list)
130	# 恢复拼音声调
131	result = self.restore_pinyin_tones(result, pinyin_list)
132	pattern = re.compile("\|".join(re.escape(p) for p in self.zh_char_rep_map.keys()))
133	result = pattern.sub(lambda x: self.zh_char_rep_map[x.group()], result)
134	else:
135	try:
136	text = re.sub(TextNormalizer.ENGLISH_CONTRACTION_PATTERN, r"\1 is", text, flags=re.IGNORECASE)
137	result = self.en_normalizer.normalize(text)
138	except Exception:
139	result = text
140	print(traceback.format_exc())
141	pattern = re.compile("\|".join(re.escape(p) for p in self.char_rep_map.keys()))
142	result = pattern.sub(lambda x: self.char_rep_map[x.group()], result)
143	return result
144
145	def correct_pinyin(self, pinyin: str):
146	"""

l2normFunction · 0.80

encodeMethod · 0.80

batch_encodeMethod · 0.80

front.pyFile · 0.80

forwardMethod · 0.80

use_chineseMethod · 0.95

save_pinyin_tonesMethod · 0.95

save_namesMethod · 0.95

restore_namesMethod · 0.95

restore_pinyin_tonesMethod · 0.95

no test coverage detected