hub / github.com/karpathy/nanochat / run_generative_eval

Function run_generative_eval

scripts/chat_eval.py:29–81 · view source on GitHub ↗

(task_object, tokenizer, model, engine, num_samples, max_new_tokens, temperature, top_k, max_problems=None)

Source from the content-addressed store, hash-verified

27	# Generative evaluation loop (we go one problem at a time, sample, evaluate)
28
29	def run_generative_eval(task_object, tokenizer, model, engine, num_samples, max_new_tokens, temperature, top_k, max_problems=None):
30
31	ddp, ddp_rank, ddp_local_rank, ddp_world_size = get_dist_info()
32	device = model.get_device()
33
34	num_problems = len(task_object) if max_problems is None else min(len(task_object), max_problems)
35
36	# Run the evaluation
37	num_passed, total = 0, 0
38	for i in range(ddp_rank, num_problems, ddp_world_size):
39	conversation = task_object[i]
40
41	# Tokenize the prompt
42	encoded_prompt = tokenizer.render_for_completion(conversation)
43	# Get the completions
44	results, _ = engine.generate_batch(
45	encoded_prompt,
46	num_samples=num_samples,
47	max_tokens=max_new_tokens,
48	temperature=temperature,
49	top_k=top_k,
50	)
51	# Decode the completions as text
52	prefix_length = len(encoded_prompt)
53	completions = [tokenizer.decode(result_tokens[prefix_length:]) for result_tokens in results]
54	# Evaluate success criteria
55	outcomes = [task_object.evaluate(conversation, completion) for completion in completions]
56	passed = any(outcomes)
57
58	# Keep stats
59	total += 1
60	num_passed += int(passed)
61
62	# Logging (overwrite the same line in the console)
63	print(f"\r\033[KRank {ddp_rank} \| {num_passed}/{total} ({100*num_passed/total:.2f}%)", end='', flush=True)
64
65	# Finish the in-place progress line with a newline before final summary
66	print()
67
68	# Aggregate results across all ranks
69	if ddp:
70	num_passed_tensor = torch.tensor([num_passed], dtype=torch.long, device=device)
71	total_tensor = torch.tensor([total], dtype=torch.long, device=device)
72	dist.all_reduce(num_passed_tensor, op=dist.ReduceOp.SUM)
73	dist.all_reduce(total_tensor, op=dist.ReduceOp.SUM)
74	num_passed = num_passed_tensor.item()
75	total = total_tensor.item()
76
77	print0("=" * 50)
78	print0(f"Final: {num_passed}/{total} ({100*num_passed/total:.2f}%)")
79
80	# Return the accuracy
81	return num_passed/total
82
83	# -----------------------------------------------------------------------------
84	# Categorical evaluation loop

Callers 1

run_chat_evalFunction · 0.85

Calls 7

get_dist_infoFunction · 0.90

print0Function · 0.90

render_for_completionMethod · 0.80

generate_batchMethod · 0.80

get_deviceMethod · 0.45

decodeMethod · 0.45

evaluateMethod · 0.45

Tested by

no test coverage detected