(self, text: str)
| 111 | self.en_normalizer = NormalizerEn(overwrite_cache=False) |
| 112 | |
| 113 | def normalize(self, text: str) -> str: |
| 114 | text = text.replace("嗯", "恩").replace("呣", "母") |
| 115 | if not self.zh_normalizer or not self.en_normalizer: |
| 116 | print("Error, text normalizer is not initialized !!!") |
| 117 | return "" |
| 118 | if self.use_chinese(text): |
| 119 | text = re.sub(TextNormalizer.ENGLISH_CONTRACTION_PATTERN, r"\1 is", text, flags=re.IGNORECASE) |
| 120 | replaced_text, pinyin_list = self.save_pinyin_tones(text.rstrip()) |
| 121 | |
| 122 | replaced_text, original_name_list = self.save_names(replaced_text) |
| 123 | try: |
| 124 | result = self.zh_normalizer.normalize(replaced_text) |
| 125 | except Exception: |
| 126 | result = "" |
| 127 | print(traceback.format_exc()) |
| 128 | # 恢复人名 |
| 129 | result = self.restore_names(result, original_name_list) |
| 130 | # 恢复拼音声调 |
| 131 | result = self.restore_pinyin_tones(result, pinyin_list) |
| 132 | pattern = re.compile("|".join(re.escape(p) for p in self.zh_char_rep_map.keys())) |
| 133 | result = pattern.sub(lambda x: self.zh_char_rep_map[x.group()], result) |
| 134 | else: |
| 135 | try: |
| 136 | text = re.sub(TextNormalizer.ENGLISH_CONTRACTION_PATTERN, r"\1 is", text, flags=re.IGNORECASE) |
| 137 | result = self.en_normalizer.normalize(text) |
| 138 | except Exception: |
| 139 | result = text |
| 140 | print(traceback.format_exc()) |
| 141 | pattern = re.compile("|".join(re.escape(p) for p in self.char_rep_map.keys())) |
| 142 | result = pattern.sub(lambda x: self.char_rep_map[x.group()], result) |
| 143 | return result |
| 144 | |
| 145 | def correct_pinyin(self, pinyin: str): |
| 146 | """ |
no test coverage detected