Rich transcription postprocess. Args: s: TODO.
(s)
| 434 | |
| 435 | |
| 436 | def rich_transcription_postprocess(s): |
| 437 | """Rich transcription postprocess. |
| 438 | |
| 439 | Args: |
| 440 | s: TODO. |
| 441 | """ |
| 442 | def get_emo(s): |
| 443 | """Get emo. |
| 444 | |
| 445 | Args: |
| 446 | s: TODO. |
| 447 | """ |
| 448 | return s[-1] if s[-1] in emo_set else None |
| 449 | |
| 450 | def get_event(s): |
| 451 | """Get event. |
| 452 | |
| 453 | Args: |
| 454 | s: TODO. |
| 455 | """ |
| 456 | return s[0] if s[0] in event_set else None |
| 457 | |
| 458 | s = s.replace("<|nospeech|><|Event_UNK|>", "❓") |
| 459 | for lang in lang_dict: |
| 460 | s = s.replace(lang, "<|lang|>") |
| 461 | s_list = [format_str_v2(s_i).strip(" ") for s_i in s.split("<|lang|>")] |
| 462 | new_s = " " + s_list[0] |
| 463 | cur_ent_event = get_event(new_s) |
| 464 | for i in range(1, len(s_list)): |
| 465 | if len(s_list[i]) == 0: |
| 466 | continue |
| 467 | if get_event(s_list[i]) == cur_ent_event and get_event(s_list[i]) != None: |
| 468 | s_list[i] = s_list[i][1:] |
| 469 | if len(s_list[i]) == 0: |
| 470 | continue |
| 471 | # else: |
| 472 | cur_ent_event = get_event(s_list[i]) |
| 473 | if get_emo(s_list[i]) != None and get_emo(s_list[i]) == get_emo(new_s): |
| 474 | new_s = new_s[:-1] |
| 475 | new_s += s_list[i].strip().lstrip() |
| 476 | new_s = new_s.replace("The.", " ") |
| 477 | return new_s.strip() |
searching dependent graphs…