(
model: str,
dataset: str,
root: str = "evalplus_results",
bs: Optional[int] = None,
n_samples: int = 1,
temperature: float = 0.0,
resume: bool = True,
greedy: bool = False,
id_range: List = None,
version: str = "default",
backend: str = "vllm",
force_base_prompt: bool = False,
base_url: str = None,
tp: int = 1,
evalperf_type: str = None, # For EvalPerf
jsonl_fmt: bool = True,
attn_implementation: str = "eager",
trust_remote_code: bool = False,
dtype: str = "bfloat16",
)
| 125 | |
| 126 | |
| 127 | def run_codegen( |
| 128 | model: str, |
| 129 | dataset: str, |
| 130 | root: str = "evalplus_results", |
| 131 | bs: Optional[int] = None, |
| 132 | n_samples: int = 1, |
| 133 | temperature: float = 0.0, |
| 134 | resume: bool = True, |
| 135 | greedy: bool = False, |
| 136 | id_range: List = None, |
| 137 | version: str = "default", |
| 138 | backend: str = "vllm", |
| 139 | force_base_prompt: bool = False, |
| 140 | base_url: str = None, |
| 141 | tp: int = 1, |
| 142 | evalperf_type: str = None, # For EvalPerf |
| 143 | jsonl_fmt: bool = True, |
| 144 | attn_implementation: str = "eager", |
| 145 | trust_remote_code: bool = False, |
| 146 | dtype: str = "bfloat16", |
| 147 | ): |
| 148 | assert dataset in ["humaneval", "mbpp", "evalperf"], f"Invalid dataset {dataset}" |
| 149 | assert evalperf_type is None or evalperf_type in [ |
| 150 | "instruct", |
| 151 | "perf-instruct", |
| 152 | "perf-CoT", |
| 153 | ] |
| 154 | |
| 155 | if greedy and (temperature != 0 or bs != 1 or n_samples != 1): |
| 156 | temperature = 0.0 |
| 157 | bs = 1 |
| 158 | n_samples = 1 |
| 159 | print("Greedy decoding ON (--greedy): setting bs=1, n_samples=1, temperature=0") |
| 160 | |
| 161 | if id_range is not None: |
| 162 | assert len(id_range) == 2, "id_range must be a list of length 2" |
| 163 | assert id_range[0] < id_range[1], "id_range must be increasing" |
| 164 | id_range = tuple(id_range) |
| 165 | |
| 166 | if bs is None: |
| 167 | bs = min(n_samples, 32) |
| 168 | print(f"Setting batch size to {bs}") |
| 169 | |
| 170 | # Make project dir |
| 171 | os.makedirs(root, exist_ok=True) |
| 172 | # Make dataset dir |
| 173 | os.makedirs(os.path.join(root, dataset), exist_ok=True) |
| 174 | |
| 175 | # Model instructions |
| 176 | instruction_prefix = "Please provide a self-contained Python script that solves the following problem in a markdown code block:" |
| 177 | response_prefix = "Below is a Python script with a self-contained function that solves the problem and passes corresponding tests:" |
| 178 | |
| 179 | if evalperf_type == "perf-instruct": |
| 180 | instruction_prefix = "Please provide an efficient and self-contained Python script that solves the following problem in a markdown code block:" |
| 181 | response_prefix = "Below is a Python script with a self-contained function that efficiently solves the problem and passes corresponding tests:" |
| 182 | elif evalperf_type == "perf-CoT": |
| 183 | instruction_prefix = "Think step by step: please provide an efficient and self-contained Python script that solves the following problem in a markdown code block:" |
| 184 | response_prefix = "Below is a Python script with a self-contained function that efficiently solves the problem and passes corresponding tests:" |
no test coverage detected