hub / github.com/evalplus/evalplus / run_codegen

Function run_codegen

evalplus/codegen.py:127–232 · view source on GitHub ↗

(
    model: str,
    dataset: str,
    root: str = "evalplus_results",
    bs: Optional[int] = None,
    n_samples: int = 1,
    temperature: float = 0.0,
    resume: bool = True,
    greedy: bool = False,
    id_range: List = None,
    version: str = "default",
    backend: str = "vllm",
    force_base_prompt: bool = False,
    base_url: str = None,
    tp: int = 1,
    evalperf_type: str = None,  # For EvalPerf
    jsonl_fmt: bool = True,
    attn_implementation: str = "eager",
    trust_remote_code: bool = False,
    dtype: str = "bfloat16",
)

Source from the content-addressed store, hash-verified

125
126
127	def run_codegen(
128	model: str,
129	dataset: str,
130	root: str = "evalplus_results",
131	bs: Optional[int] = None,
132	n_samples: int = 1,
133	temperature: float = 0.0,
134	resume: bool = True,
135	greedy: bool = False,
136	id_range: List = None,
137	version: str = "default",
138	backend: str = "vllm",
139	force_base_prompt: bool = False,
140	base_url: str = None,
141	tp: int = 1,
142	evalperf_type: str = None, # For EvalPerf
143	jsonl_fmt: bool = True,
144	attn_implementation: str = "eager",
145	trust_remote_code: bool = False,
146	dtype: str = "bfloat16",
147	):
148	assert dataset in ["humaneval", "mbpp", "evalperf"], f"Invalid dataset {dataset}"
149	assert evalperf_type is None or evalperf_type in [
150	"instruct",
151	"perf-instruct",
152	"perf-CoT",
153	]
154
155	if greedy and (temperature != 0 or bs != 1 or n_samples != 1):
156	temperature = 0.0
157	bs = 1
158	n_samples = 1
159	print("Greedy decoding ON (--greedy): setting bs=1, n_samples=1, temperature=0")
160
161	if id_range is not None:
162	assert len(id_range) == 2, "id_range must be a list of length 2"
163	assert id_range[0] < id_range[1], "id_range must be increasing"
164	id_range = tuple(id_range)
165
166	if bs is None:
167	bs = min(n_samples, 32)
168	print(f"Setting batch size to {bs}")
169
170	# Make project dir
171	os.makedirs(root, exist_ok=True)
172	# Make dataset dir
173	os.makedirs(os.path.join(root, dataset), exist_ok=True)
174
175	# Model instructions
176	instruction_prefix = "Please provide a self-contained Python script that solves the following problem in a markdown code block:"
177	response_prefix = "Below is a Python script with a self-contained function that solves the problem and passes corresponding tests:"
178
179	if evalperf_type == "perf-instruct":
180	instruction_prefix = "Please provide an efficient and self-contained Python script that solves the following problem in a markdown code block:"
181	response_prefix = "Below is a Python script with a self-contained function that efficiently solves the problem and passes corresponding tests:"
182	elif evalperf_type == "perf-CoT":
183	instruction_prefix = "Think step by step: please provide an efficient and self-contained Python script that solves the following problem in a markdown code block:"
184	response_prefix = "Below is a Python script with a self-contained function that efficiently solves the problem and passes corresponding tests:"

Callers 2

scriptFunction · 0.90

evaluateFunction · 0.90

Calls 2

make_modelFunction · 0.90

codegenFunction · 0.85

Tested by

no test coverage detected