()
| 72 | |
| 73 | |
| 74 | def main(): |
| 75 | if len(sys.argv) < 2: |
| 76 | print("用法: python3 srt_to_transcript.py <input.srt|input.vtt> [output.txt]") |
| 77 | sys.exit(1) |
| 78 | |
| 79 | input_path = Path(sys.argv[1]) |
| 80 | if not input_path.exists(): |
| 81 | print(f"❌ 文件不存在: {input_path}") |
| 82 | sys.exit(1) |
| 83 | |
| 84 | # 默认输出文件名 |
| 85 | if len(sys.argv) >= 3: |
| 86 | output_path = Path(sys.argv[2]) |
| 87 | else: |
| 88 | output_path = input_path.parent / f"{input_path.stem}_transcript.txt" |
| 89 | |
| 90 | # 读取并检测格式 |
| 91 | content = input_path.read_text(encoding='utf-8') |
| 92 | |
| 93 | if input_path.suffix.lower() == '.vtt' or content.startswith('WEBVTT'): |
| 94 | transcript = clean_vtt(content) |
| 95 | else: |
| 96 | transcript = clean_srt(content) |
| 97 | |
| 98 | output_path.write_text(transcript, encoding='utf-8') |
| 99 | |
| 100 | # 统计 |
| 101 | word_count = len(transcript) |
| 102 | line_count = transcript.count('\n') + 1 |
| 103 | print(f"✅ 转换完成: {output_path}") |
| 104 | print(f" 字数: {word_count} 段落数: {line_count}") |
| 105 | |
| 106 | |
| 107 | if __name__ == '__main__': |
no test coverage detected