| 7 | |
| 8 | |
| 9 | def main( |
| 10 | data_path: str = "./test.jsonl", |
| 11 | threshold: int = -1, |
| 12 | random: int = 0, |
| 13 | log_path: str = 'inspect_jsonl.txt', |
| 14 | random_rate: float = 0.5, |
| 15 | ): |
| 16 | logger = Logger(__name__, log_file=log_path, log_mode="file", disable_formatter=True) |
| 17 | |
| 18 | n = 0 |
| 19 | with open(data_path, "r") as f: |
| 20 | for i, line in enumerate(f): |
| 21 | if i == 0: |
| 22 | logger.info("Data has the following keys") |
| 23 | obj = json.loads(line) |
| 24 | logger.info(obj.keys()) |
| 25 | if threshold > 0 and n > threshold: |
| 26 | break |
| 27 | if random and np.random.randint(10) > 10 * random_rate: |
| 28 | continue |
| 29 | |
| 30 | obj = json.loads(line) |
| 31 | n += 1 |
| 32 | logger.info(f"========== Sample {i} ==========") |
| 33 | if 'code' in obj: |
| 34 | try: |
| 35 | code_splits = obj['code'].split("\n") |
| 36 | logger.info(f"Length of chars: {len(obj['code'])}, length of lines: {len(code_splits)}.") |
| 37 | except: |
| 38 | pass |
| 39 | for j, k in enumerate(obj.keys()): |
| 40 | logger.info(f"** Key {j}: {k} **") |
| 41 | logger.info(obj[k]) |
| 42 | print(f"Log saved in {log_path}") |
| 43 | |
| 44 | |
| 45 | if __name__ == "__main__": |