| 5 | |
| 6 | |
| 7 | def gather_output( |
| 8 | output_dir: str = "./output", |
| 9 | output_prefix: str = None, |
| 10 | if_remove_rank_files: int = 0, |
| 11 | ): |
| 12 | if output_prefix is None: |
| 13 | output_list = glob.glob(output_dir + "/*") |
| 14 | else: |
| 15 | output_list = glob.glob(os.path.join(output_dir, output_prefix + "*")) |
| 16 | |
| 17 | for output_file in output_list: |
| 18 | if "rank0" in output_file: |
| 19 | output_prefix_ = output_file.split("_rank0.jsonl")[0] |
| 20 | rank_files = glob.glob(output_prefix_ + "_rank*") |
| 21 | with open(output_prefix_ + ".jsonl", "w") as f_out: |
| 22 | for rank_file in rank_files: |
| 23 | with open(rank_file, "r") as f_in: |
| 24 | for line in f_in: |
| 25 | f_out.write(line) |
| 26 | if if_remove_rank_files: |
| 27 | os.remove(rank_file) |
| 28 | print(f"Removing {rank_file}...") |
| 29 | |
| 30 | if output_prefix is None: |
| 31 | output_list = glob.glob(output_dir + "/*") |
| 32 | else: |
| 33 | output_list = glob.glob(os.path.join(output_dir, output_prefix + "*")) |
| 34 | |
| 35 | for output_file in output_list: |
| 36 | if "rank" in output_file or "_unfinished" in output_file or "all" in output_file or "_result" in output_file: |
| 37 | continue |
| 38 | if "_finished" not in output_file: |
| 39 | continue |
| 40 | output_prefix_ = output_file.split("_finished.jsonl")[0] |
| 41 | files = [output_file, output_prefix_ + "_unfinished.jsonl"] |
| 42 | with open(output_prefix_ + "_all.jsonl", "w") as f_out: |
| 43 | for f in files: |
| 44 | with open(f, "r") as f_in: |
| 45 | for line in f_in: |
| 46 | f_out.write(line) |
| 47 | |
| 48 | print("Gathering finished. Saved in {}".format(output_prefix_ + "_all.jsonl")) |
| 49 | |
| 50 | |
| 51 | def main(): |