()
| 32 | |
| 33 | |
| 34 | def main(): |
| 35 | model, neox_args = setup_for_inference_or_eval(use_cache=False) |
| 36 | results = run_eval_harness( |
| 37 | model, |
| 38 | forward_step, |
| 39 | neox_args, |
| 40 | eval_tasks=neox_args.eval_tasks, |
| 41 | bootstrap_iters=10000, |
| 42 | ) |
| 43 | if neox_args.rank == 0: |
| 44 | init_wandb(neox_args=neox_args) |
| 45 | # log to wandb |
| 46 | for k, v in results["results"].items(): |
| 47 | if isinstance(v, dict): |
| 48 | for k2, v2 in v.items(): |
| 49 | k3 = "_".join([k, k2]) |
| 50 | tb_wandb_log( |
| 51 | f"eval/{k3}", |
| 52 | v2, |
| 53 | neox_args.iteration, |
| 54 | use_wandb=neox_args.use_wandb, |
| 55 | ) |
| 56 | else: |
| 57 | tb_wandb_log( |
| 58 | f"eval/{k}", |
| 59 | v, |
| 60 | neox_args.iteration, |
| 61 | use_wandb=neox_args.use_wandb, |
| 62 | ) |
| 63 | |
| 64 | pprint(results) |
| 65 | results_path = ( |
| 66 | f'eval_results_{datetime.now().strftime("%m-%d-%Y-%H-%M-%S")}.json' |
| 67 | ) |
| 68 | if neox_args.eval_results_prefix: |
| 69 | results_path = f"{neox_args.eval_results_prefix}_{results_path}" |
| 70 | with open(results_path, "w") as f: |
| 71 | json.dump(results, f, indent=4) |
| 72 | |
| 73 | |
| 74 | if __name__ == "__main__": |
no test coverage detected