hub / github.com/karpathy/nanochat / main

Function main

scripts/base_eval.py:178–300 · view source on GitHub ↗

()

Source from the content-addressed store, hash-verified

176	# Main
177
178	def main():
179	parser = argparse.ArgumentParser(description="Base model evaluation")
180	parser.add_argument('--eval', type=str, default='core,bpb,sample', help='Comma-separated evaluations to run: core,bpb,sample (default: all)')
181	parser.add_argument('--hf-path', type=str, default=None, help='HuggingFace model path (e.g. openai-community/gpt2-xl)')
182	parser.add_argument('--model-tag', type=str, default=None, help='nanochat model tag to identify the checkpoint directory')
183	parser.add_argument('--step', type=int, default=None, help='Model step to load (default = last)')
184	parser.add_argument('--max-per-task', type=int, default=-1, help='Max examples per CORE task (-1 = all)')
185	parser.add_argument('--device-batch-size', type=int, default=32, help='Per-device batch size for BPB evaluation')
186	parser.add_argument('--split-tokens', type=int, default=40*524288, help='Number of tokens to evaluate per split for BPB')
187	parser.add_argument('--device-type', type=str, default='', help='cuda\|cpu\|mps (empty = autodetect)')
188	args = parser.parse_args()
189
190	# Parse evaluation modes
191	eval_modes = set(mode.strip() for mode in args.eval.split(','))
192	valid_modes = {'core', 'bpb', 'sample'}
193	invalid = eval_modes - valid_modes
194	if invalid:
195	parser.error(f"Invalid eval modes: {invalid}. Valid: {valid_modes}")
196
197	# Distributed / precision setup
198	device_type = autodetect_device_type() if args.device_type == '' else args.device_type
199	ddp, ddp_rank, ddp_local_rank, ddp_world_size, device = compute_init(device_type)
200	# Load model and tokenizer
201	is_hf_model = args.hf_path is not None
202	if is_hf_model:
203	model, tokenizer = load_hf_model(args.hf_path, device)
204	sequence_len = model.max_seq_len or 1024
205	token_bytes = get_hf_token_bytes(tokenizer, device=device)
206	model_name = args.hf_path
207	model_slug = args.hf_path.replace("/", "-")
208	else:
209	model, tokenizer, meta = load_model("base", device, phase="eval", model_tag=args.model_tag, step=args.step)
210	sequence_len = meta["model_config"]["sequence_len"]
211	token_bytes = get_token_bytes(device=device)
212	model_name = f"base_model (step {meta['step']})"
213	model_slug = f"base_model_{meta['step']:06d}"
214
215	print0(f"Evaluating model: {model_name}")
216	print0(f"Eval modes: {', '.join(sorted(eval_modes))}")
217
218	# Results to log
219	core_results = None
220	bpb_results = {}
221	samples = []
222	unconditioned_samples = []
223
224	# --- Sampling ---
225	if 'sample' in eval_modes and not is_hf_model:
226	print0("\n" + "="*80)
227	print0("Model Samples")
228	print0("="*80)
229	if ddp_rank == 0:
230	prompts = [
231	"The capital of France is",
232	"The chemical symbol of gold is",
233	"If yesterday was Friday, then tomorrow will be",
234	"The opposite of hot is",
235	"The planets of the solar system are:",

Callers 1

base_eval.pyFile · 0.85

Calls 15

generate_batchMethod · 0.95

autodetect_device_typeFunction · 0.90

compute_initFunction · 0.90

load_modelFunction · 0.90

get_token_bytesFunction · 0.90

print0Function · 0.90

EngineClass · 0.90

tokenizing_distributed_data_loader_bos_bestfitFunction · 0.90

evaluate_bpbFunction · 0.90

get_base_dirFunction · 0.90

compute_cleanupFunction · 0.90

load_hf_modelFunction · 0.85

Tested by

no test coverage detected