(
task_id: str,
ptask: Dict, # EvalPerf data
ret_dict: Dict,
lazy_evaluation: bool,
max_profile: int,
)
| 138 | |
| 139 | |
| 140 | def perf_worker( |
| 141 | task_id: str, |
| 142 | ptask: Dict, # EvalPerf data |
| 143 | ret_dict: Dict, |
| 144 | lazy_evaluation: bool, |
| 145 | max_profile: int, |
| 146 | ): |
| 147 | rich.print(f"{task_id}: Started") |
| 148 | start_time = time.time() |
| 149 | |
| 150 | ######################### Profiling Setup ######################### |
| 151 | n_reference = len(ptask["reference"]) |
| 152 | entry_point = ptask["entry_point"] |
| 153 | pe_input = ( |
| 154 | mbpp_deserialize_inputs(task_id, ptask["pe_input"])[0] |
| 155 | if task_id.startswith("Mbpp/") |
| 156 | else ptask["pe_input"][0] |
| 157 | ) |
| 158 | #################################################################### |
| 159 | |
| 160 | #################################################################### |
| 161 | ############### Lazily profile reference solutions ################# |
| 162 | #################################################################### |
| 163 | cache_ref_num_inst = [None] * n_reference |
| 164 | |
| 165 | def get_avg_ref_profile(idx, check_order=True) -> Optional[Tuple]: |
| 166 | nonlocal cache_ref_num_inst |
| 167 | |
| 168 | assert ( |
| 169 | idx < n_reference - 1 |
| 170 | and cache_ref_num_inst[idx + 1] is not None |
| 171 | or idx == n_reference - 1 |
| 172 | ), f"Calling get_avg_ref_profile({idx}) before get_avg_ref_profile({idx+1}) is called, is not allowed! {n_reference = }" |
| 173 | |
| 174 | if cache_ref_num_inst[idx] is not None: |
| 175 | return cache_ref_num_inst[idx], ptask["scores"][idx] |
| 176 | |
| 177 | evaluation_time = PERF_EVAL_TIMEOUT_SECOND |
| 178 | ref_solution = ptask["reference"][idx] |
| 179 | for _ in range(2): # at most retry twice |
| 180 | profiles = profile( |
| 181 | ref_solution, |
| 182 | entry_point, |
| 183 | [pe_input], |
| 184 | timeout_second_per_test=evaluation_time, |
| 185 | ) |
| 186 | |
| 187 | # Bad thing#1: timeout / failure happens |
| 188 | if are_profiles_broken(profiles): |
| 189 | print(f"{task_id}: [WARNING] Error in ref: {profiles}") |
| 190 | rich.print(Syntax(ref_solution, "python")) |
| 191 | print(f"{task_id}: Retrying w/ +10s timeout...") |
| 192 | evaluation_time += 10 |
| 193 | else: |
| 194 | break |
| 195 | |
| 196 | avg_profile = mean(profiles) |
| 197 | # Bad thing#2: if the current #instruction is faster than that of i+1 |
nothing calls this directly
no test coverage detected