(
data_file_src: str = None,
data_file_tgt: str = None,
lang_src: str = None,
lang_tgt: str = None,
dataset_type: str = "humanevalx",
)
| 255 | |
| 256 | |
| 257 | def read_translation_dataset( |
| 258 | data_file_src: str = None, |
| 259 | data_file_tgt: str = None, |
| 260 | lang_src: str = None, |
| 261 | lang_tgt: str = None, |
| 262 | dataset_type: str = "humanevalx", |
| 263 | ) -> Dict: |
| 264 | if "humanevalx" in dataset_type.lower(): |
| 265 | dataset_src = {task["task_id"]: task for task in stream_jsonl(data_file_src)} |
| 266 | dataset_tgt = {task["task_id"].split("/")[-1]: task for task in stream_jsonl(data_file_tgt)} |
| 267 | for k, sample in dataset_src.items(): |
| 268 | prompt = "code translation\n" |
| 269 | if lang_src == "cpp": |
| 270 | prompt += "C++:\n" |
| 271 | elif lang_src == "js": |
| 272 | prompt += "JavaScript:\n" |
| 273 | else: |
| 274 | prompt += f"{lang_src}:\n".capitalize() |
| 275 | prompt += dataset_src[k]["declaration"] + "\n" + dataset_src[k]["canonical_solution"].rstrip() + "\n" |
| 276 | if lang_tgt == "cpp": |
| 277 | prompt += "C++:\n" |
| 278 | elif lang_tgt == "js": |
| 279 | prompt += "JavaScript:\n" |
| 280 | else: |
| 281 | prompt += f"{lang_tgt}:\n".capitalize() |
| 282 | prompt += dataset_tgt[k.split("/")[-1]]["declaration"] |
| 283 | dataset_src[k]["prompt"] = prompt |
| 284 | else: |
| 285 | raise f"Dataset: {dataset_type} not supported." |
| 286 | |
| 287 | return dataset_src |
| 288 | |
| 289 | |
| 290 | def process_extra_prompt( |
nothing calls this directly
no test coverage detected