(sample, problems, dataset_type, language_type, generation_mode)
| 41 | |
| 42 | |
| 43 | def process_test(sample, problems, dataset_type, language_type, generation_mode): |
| 44 | if dataset_type == "humanevalx": |
| 45 | task_id = sample["task_id"] |
| 46 | prompt = problems[task_id]["prompt"] |
| 47 | test = problems[task_id]["test"] |
| 48 | code = sample["generation"] |
| 49 | |
| 50 | # Pre-process for different languages |
| 51 | if language_type == "python": |
| 52 | test_setup = "\n".join(IMPORT_HELPER["python"]) + "\n" |
| 53 | test_string = test_setup + prompt + code + "\n" + test + "\n" |
| 54 | elif language_type == "cpp": |
| 55 | test_set_up = "" |
| 56 | for s in IMPORT_HELPER["cpp"]: |
| 57 | if s not in prompt: |
| 58 | test_set_up += s + "\n" |
| 59 | test_string = test_set_up + "\n" + prompt + code + "\n" + test |
| 60 | elif language_type == "java": |
| 61 | test_string = prompt + code + "\n" + test |
| 62 | elif language_type == "js" or language_type == "javascript": |
| 63 | test_string = prompt + code + "\n" + test |
| 64 | elif language_type == "go": |
| 65 | import_string = problems[task_id]["import"] |
| 66 | prompt = prompt.replace(import_string, "") |
| 67 | test = problems[task_id]["test"] |
| 68 | test_setup = problems[task_id]["test_setup"] |
| 69 | other_pkgs = [] |
| 70 | for pkg in IMPORT_HELPER["go"]: |
| 71 | if pkg not in test_setup: |
| 72 | p = pkg.split("/")[-1] |
| 73 | if p + "." in code: |
| 74 | other_pkgs.append(f"\"{pkg}\"") |
| 75 | if other_pkgs: |
| 76 | import_other_pkgs = "import (\n" + " ".join([p + "\n" for p in other_pkgs]) + ")" |
| 77 | test_string = test_setup + "\n" + import_other_pkgs + "\n" + prompt + code + "\n" + test |
| 78 | else: |
| 79 | test_string = test_setup + "\n" + prompt + code + "\n" + test |
| 80 | elif language_type == "rust": |
| 81 | main = "\nfn main(){ \n } \n" |
| 82 | test_string = main + prompt + code + test |
| 83 | elif dataset_type == "mbpp": |
| 84 | task_id = sample["task_id"] |
| 85 | prompt = sample["prompt"] |
| 86 | test = "\n".join(problems[task_id]["test_list"]) + "\n" + "\n".join(problems[task_id]["challenge_test_list"]) |
| 87 | code = sample["generation"] |
| 88 | test_setup = "\n".join(IMPORT_HELPER["python"]) + "\n" |
| 89 | test_string = test_setup + "\n" + prompt + code + "\n" + problems[task_id]["test_setup_code"] + "\n" + test + "\n" |
| 90 | |
| 91 | return test_string |
| 92 | |
| 93 | |
| 94 | def evaluate_functional_correctness( |
no outgoing calls
no test coverage detected