(
target_path: str,
model: DecoderBase,
dataset: str,
greedy=False,
n_samples=1,
id_range=None,
version="default",
resume=True,
)
| 9 | |
| 10 | |
| 11 | def codegen( |
| 12 | target_path: str, |
| 13 | model: DecoderBase, |
| 14 | dataset: str, |
| 15 | greedy=False, |
| 16 | n_samples=1, |
| 17 | id_range=None, |
| 18 | version="default", |
| 19 | resume=True, |
| 20 | ): |
| 21 | task2nexist = {} |
| 22 | if resume and target_path.endswith(".jsonl") and os.path.isfile(target_path): |
| 23 | with open(target_path, "r") as f: |
| 24 | for line in f: |
| 25 | if not line.strip(): |
| 26 | continue |
| 27 | task_id = json.loads(line)["task_id"] |
| 28 | task2nexist[task_id] = task2nexist.get(task_id, 0) + 1 |
| 29 | |
| 30 | if target_path.endswith(".jsonl"): |
| 31 | raw_target_path = target_path.replace(".jsonl", ".raw.jsonl") |
| 32 | else: |
| 33 | raw_target_path = target_path + ".raw" |
| 34 | os.makedirs(target_path, exist_ok=True) |
| 35 | |
| 36 | print(f"Sanitized code outputs will be saved to {target_path}") |
| 37 | print(f"Raw outputs will be saved to {raw_target_path}") |
| 38 | |
| 39 | with progress(dataset) as p: |
| 40 | if dataset == "humaneval": |
| 41 | dataset = get_human_eval_plus(version=version) |
| 42 | elif dataset == "mbpp": |
| 43 | dataset = get_mbpp_plus(version=version) |
| 44 | elif dataset == "evalperf": |
| 45 | original_dataset = {**get_human_eval_plus(), **get_mbpp_plus()} |
| 46 | dataset = {k: original_dataset[k] for k in get_evalperf_data()} |
| 47 | assert id_range is None, "id_range not supported for evalperf" |
| 48 | else: |
| 49 | raise ValueError(f"Invalid dataset {dataset}") |
| 50 | |
| 51 | for task_id, task in p.track(dataset.items()): |
| 52 | if id_range is not None: |
| 53 | id_num = int(task_id.split("/")[1]) |
| 54 | low, high = id_range |
| 55 | if id_num < low or id_num >= high: |
| 56 | p.console.print(f"Skipping {task_id} as it is not in {id_range}") |
| 57 | continue |
| 58 | |
| 59 | if not target_path.endswith(".jsonl"): |
| 60 | p_name = task_id.replace("/", "_") |
| 61 | os.makedirs(os.path.join(target_path, p_name), exist_ok=True) |
| 62 | task2nexist[task_id] = len( |
| 63 | [ |
| 64 | f |
| 65 | for f in os.listdir(os.path.join(target_path, p_name)) |
| 66 | if f.endswith(".py") |
| 67 | ] |
| 68 | ) |
no test coverage detected