hub / github.com/evalplus/evalplus / perf_worker

Function perf_worker

evalplus/evalperf.py:140–288 · view source on GitHub ↗

(
    task_id: str,
    ptask: Dict,  # EvalPerf data
    ret_dict: Dict,
    lazy_evaluation: bool,
    max_profile: int,
)

Source from the content-addressed store, hash-verified

138
139
140	def perf_worker(
141	task_id: str,
142	ptask: Dict, # EvalPerf data
143	ret_dict: Dict,
144	lazy_evaluation: bool,
145	max_profile: int,
146	):
147	rich.print(f"{task_id}: Started")
148	start_time = time.time()
149
150	######################### Profiling Setup #########################
151	n_reference = len(ptask["reference"])
152	entry_point = ptask["entry_point"]
153	pe_input = (
154	mbpp_deserialize_inputs(task_id, ptask["pe_input"])[0]
155	if task_id.startswith("Mbpp/")
156	else ptask["pe_input"][0]
157	)
158	####################################################################
159
160	####################################################################
161	############### Lazily profile reference solutions #################
162	####################################################################
163	cache_ref_num_inst = [None] * n_reference
164
165	def get_avg_ref_profile(idx, check_order=True) -> Optional[Tuple]:
166	nonlocal cache_ref_num_inst
167
168	assert (
169	idx < n_reference - 1
170	and cache_ref_num_inst[idx + 1] is not None
171	or idx == n_reference - 1
172	), f"Calling get_avg_ref_profile({idx}) before get_avg_ref_profile({idx+1}) is called, is not allowed! {n_reference = }"
173
174	if cache_ref_num_inst[idx] is not None:
175	return cache_ref_num_inst[idx], ptask["scores"][idx]
176
177	evaluation_time = PERF_EVAL_TIMEOUT_SECOND
178	ref_solution = ptask["reference"][idx]
179	for _ in range(2): # at most retry twice
180	profiles = profile(
181	ref_solution,
182	entry_point,
183	[pe_input],
184	timeout_second_per_test=evaluation_time,
185	)
186
187	# Bad thing#1: timeout / failure happens
188	if are_profiles_broken(profiles):
189	print(f"{task_id}: [WARNING] Error in ref: {profiles}")
190	rich.print(Syntax(ref_solution, "python"))
191	print(f"{task_id}: Retrying w/ +10s timeout...")
192	evaluation_time += 10
193	else:
194	break
195
196	avg_profile = mean(profiles)
197	# Bad thing#2: if the current #instruction is faster than that of i+1

Callers

nothing calls this directly

Calls 6

mbpp_deserialize_inputsFunction · 0.90

profileFunction · 0.90

are_profiles_brokenFunction · 0.90

get_avg_ref_profileFunction · 0.85

not_noneFunction · 0.85

table_printFunction · 0.85

Tested by

no test coverage detected