MCPcopy Index your code
hub / github.com/evalplus/evalplus / script

Function script

evalplus/syncheck.py:24–108  ·  view source on GitHub ↗
(
    samples: str, dataset: str, nsample_check: int = None, verbose: bool = False
)

Source from the content-addressed store, hash-verified

22
23
24def script(
25 samples: str, dataset: str, nsample_check: int = None, verbose: bool = False
26):
27 # List[Dict{"task_id", "solution"}]
28 solutions = load_solutions(samples)
29
30 if dataset == "humaneval":
31 from evalplus.data import get_human_eval_plus
32
33 dataset = get_human_eval_plus()
34 dataset_name = "HumanEval"
35 elif dataset == "mbpp":
36 from evalplus.data import get_mbpp_plus
37
38 dataset = get_mbpp_plus()
39 dataset_name = "Mbpp"
40
41 print(colored(f"Dataset: {dataset_name}", "blue"))
42
43 id2solutions = {}
44 for solution in solutions:
45 task_id = solution["task_id"]
46 if task_id not in id2solutions:
47 id2solutions[task_id] = []
48 if "solution" not in solution:
49 assert "completion" in solution, "solution or completion must exist!"
50 solution["solution"] = dataset[task_id]["prompt"] + solution["completion"]
51 id2solutions[task_id].append(solution)
52
53 print(colored("==============================", "blue"))
54 print(colored(" ::: Checking completeness... ", "blue"))
55 print(colored(" ::::: All tasks complete? ", "blue"))
56 ndone = 0
57
58 task_ids = dataset.keys()
59 ntask = len(task_ids)
60 for task_id in task_ids:
61 if task_id not in id2solutions:
62 print(colored(f" ⚠️ {task_id} is missing!", "red"))
63 continue
64 nfiles = len(id2solutions[task_id])
65
66 if nsample_check is None or nfiles <= nsample_check:
67 ndone += 1
68 continue
69
70 print(
71 colored(
72 f" ⚠️ {task_id} only has {nfiles} samples! But {nsample_check} are expected.",
73 "red",
74 )
75 )
76
77 # check if there is enough number of samples here.
78 if nsample_check is not None:
79 if ntask != ndone:
80 ntbd = ntask - ndone
81 print(colored(f" ::::: ⚠️ {ntbd}/{ntask} tasks incomplete!", "red"))

Callers

nothing calls this directly

Calls 4

load_solutionsFunction · 0.90
get_human_eval_plusFunction · 0.90
get_mbpp_plusFunction · 0.90
syntax_checkFunction · 0.85

Tested by

no test coverage detected