Normalizer class that converts text from written to spoken form. Useful for TTS preprocessing. Args: input_case: expected input capitalization lang: language specifying the TN rules, by default: English cache_dir: path to a dir with .far grammar file. Set to Non
| 31 | |
| 32 | |
| 33 | class Normalizer: |
| 34 | """ |
| 35 | Normalizer class that converts text from written to spoken form. |
| 36 | Useful for TTS preprocessing. |
| 37 | |
| 38 | Args: |
| 39 | input_case: expected input capitalization |
| 40 | lang: language specifying the TN rules, by default: English |
| 41 | cache_dir: path to a dir with .far grammar file. Set to None to avoid using cache. |
| 42 | overwrite_cache: set to True to overwrite .far files |
| 43 | whitelist: path to a file with whitelist replacements |
| 44 | post_process: WFST-based post processing, e.g. to remove extra spaces added during TN. |
| 45 | Note: punct_post_process flag in normalize() supports all languages. |
| 46 | """ |
| 47 | |
| 48 | def __init__( |
| 49 | self, |
| 50 | input_case: str, |
| 51 | lang: str = "en", |
| 52 | deterministic: bool = True, |
| 53 | cache_dir: str = None, |
| 54 | overwrite_cache: bool = False, |
| 55 | whitelist: str = None, |
| 56 | lm: bool = False, |
| 57 | post_process: bool = True, |
| 58 | ): |
| 59 | assert input_case in ["lower_cased", "cased"] |
| 60 | |
| 61 | self.post_processor = None |
| 62 | |
| 63 | if lang == "en": |
| 64 | from fun_text_processing.text_normalization.en.verbalizers.verbalize_final import ( |
| 65 | VerbalizeFinalFst, |
| 66 | ) |
| 67 | from fun_text_processing.text_normalization.en.verbalizers.post_processing import ( |
| 68 | PostProcessingFst, |
| 69 | ) |
| 70 | |
| 71 | if post_process: |
| 72 | self.post_processor = PostProcessingFst( |
| 73 | cache_dir=cache_dir, overwrite_cache=overwrite_cache |
| 74 | ) |
| 75 | |
| 76 | if deterministic: |
| 77 | from fun_text_processing.text_normalization.en.taggers.tokenize_and_classify import ( |
| 78 | ClassifyFst, |
| 79 | ) |
| 80 | else: |
| 81 | if lm: |
| 82 | from fun_text_processing.text_normalization.en.taggers.tokenize_and_classify_lm import ( |
| 83 | ClassifyFst, |
| 84 | ) |
| 85 | else: |
| 86 | from fun_text_processing.text_normalization.en.taggers.tokenize_and_classify_with_audio import ( |
| 87 | ClassifyFst, |
| 88 | ) |
| 89 | |
| 90 | elif lang == "ru": |
no outgoing calls
no test coverage detected
searching dependent graphs…