MCPcopy Index your code
hub / github.com/zai-org/CodeGeeX2 / read_translation_dataset

Function read_translation_dataset

evaluation/utils.py:257–287  ·  view source on GitHub ↗
(
    data_file_src: str = None,
    data_file_tgt: str = None,
    lang_src: str = None,
    lang_tgt: str = None,
    dataset_type: str = "humanevalx",
)

Source from the content-addressed store, hash-verified

255
256
257def read_translation_dataset(
258 data_file_src: str = None,
259 data_file_tgt: str = None,
260 lang_src: str = None,
261 lang_tgt: str = None,
262 dataset_type: str = "humanevalx",
263) -> Dict:
264 if "humanevalx" in dataset_type.lower():
265 dataset_src = {task["task_id"]: task for task in stream_jsonl(data_file_src)}
266 dataset_tgt = {task["task_id"].split("/")[-1]: task for task in stream_jsonl(data_file_tgt)}
267 for k, sample in dataset_src.items():
268 prompt = "code translation\n"
269 if lang_src == "cpp":
270 prompt += "C++:\n"
271 elif lang_src == "js":
272 prompt += "JavaScript:\n"
273 else:
274 prompt += f"{lang_src}:\n".capitalize()
275 prompt += dataset_src[k]["declaration"] + "\n" + dataset_src[k]["canonical_solution"].rstrip() + "\n"
276 if lang_tgt == "cpp":
277 prompt += "C++:\n"
278 elif lang_tgt == "js":
279 prompt += "JavaScript:\n"
280 else:
281 prompt += f"{lang_tgt}:\n".capitalize()
282 prompt += dataset_tgt[k.split("/")[-1]]["declaration"]
283 dataset_src[k]["prompt"] = prompt
284 else:
285 raise f"Dataset: {dataset_type} not supported."
286
287 return dataset_src
288
289
290def process_extra_prompt(

Callers

nothing calls this directly

Calls 1

stream_jsonlFunction · 0.85

Tested by

no test coverage detected