Function main

evaluate.py:34–71 · view source on GitHub ↗

()

Source from the content-addressed store, hash-verified

32
33
34	def main():
35	model, neox_args = setup_for_inference_or_eval(use_cache=False)
36	results = run_eval_harness(
37	model,
38	forward_step,
39	neox_args,
40	eval_tasks=neox_args.eval_tasks,
41	bootstrap_iters=10000,
42	)
43	if neox_args.rank == 0:
44	init_wandb(neox_args=neox_args)
45	# log to wandb
46	for k, v in results["results"].items():
47	if isinstance(v, dict):
48	for k2, v2 in v.items():
49	k3 = "_".join([k, k2])
50	tb_wandb_log(
51	f"eval/{k3}",
52	v2,
53	neox_args.iteration,
54	use_wandb=neox_args.use_wandb,
55	)
56	else:
57	tb_wandb_log(
58	f"eval/{k}",
59	v,
60	neox_args.iteration,
61	use_wandb=neox_args.use_wandb,
62	)
63
64	pprint(results)
65	results_path = (
66	f'eval_results_{datetime.now().strftime("%m-%d-%Y-%H-%M-%S")}.json'
67	)
68	if neox_args.eval_results_prefix:
69	results_path = f"{neox_args.eval_results_prefix}_{results_path}"
70	with open(results_path, "w") as f:
71	json.dump(results, f, indent=4)
72
73
74	if __name__ == "__main__":

evaluate.pyFile · 0.70

setup_for_inference_or_evalFunction · 0.90

run_eval_harnessFunction · 0.90

init_wandbFunction · 0.90

tb_wandb_logFunction · 0.90

no test coverage detected