清洗VTT格式字幕(先去掉VTT头部,然后按SRT逻辑处理)
(content: str)
| 63 | |
| 64 | |
| 65 | def clean_vtt(content: str) -> str: |
| 66 | """清洗VTT格式字幕(先去掉VTT头部,然后按SRT逻辑处理)""" |
| 67 | # 去掉WEBVTT头部 |
| 68 | content = re.sub(r'^WEBVTT.*?\n\n', '', content, flags=re.DOTALL) |
| 69 | # 去掉NOTE块 |
| 70 | content = re.sub(r'NOTE.*?\n\n', '', content, flags=re.DOTALL) |
| 71 | return clean_srt(content) |
| 72 | |
| 73 | |
| 74 | def main(): |