MCPcopy
hub / github.com/policy-gradient/GRPO-Zero / evaluate

Function evaluate

train.py:20–51  ·  view source on GitHub ↗
(model, tokenizer, device, dtype, config)

Source from the content-addressed store, hash-verified

18
19
20def evaluate(model, tokenizer, device, dtype, config):
21 test_dataset = CountdownTasksDataset(
22 data_path=config["data"]["path"],
23 tokenizer=tokenizer,
24 split="test",
25 test_size=config["data"]["test_size"],
26 )
27 generator = torch.Generator(device=device)
28 # We reduce the batch size by half as we want to
29 # generate twice as long trajectories.
30 dataloader = DataLoader(
31 test_dataset,
32 shuffle=False,
33 collate_fn=CountdownTasksDataset.collate_fn,
34 generator=generator,
35 batch_size=config["training"]["batch_size"] // 2,
36 drop_last=False,
37 )
38 success = []
39 for batch in dataloader:
40 episodes = rollout(
41 model=model,
42 tokenizer=tokenizer,
43 batch=batch,
44 max_gen_len=config["training"]["max_gen_len"] * 2,
45 num_answer_per_question=1,
46 reward_function=reward_function,
47 device=device,
48 dtype=dtype,
49 )
50 success.extend([episode.reward_info["answer_reward"] for episode in episodes])
51 return np.mean(success)
52
53
54def main(config_path: str):

Callers 1

mainFunction · 0.85

Calls 2

rolloutFunction · 0.90

Tested by

no test coverage detected