()
| 70 | |
| 71 | |
| 72 | def test_math_data_processor(): |
| 73 | raw_dataset = Dataset.from_list( |
| 74 | [ |
| 75 | {"problem": "problem1", "expected_answer": "answer1"}, |
| 76 | {"problem": "problem2", "expected_answer": "answer2"}, |
| 77 | ] |
| 78 | ) |
| 79 | |
| 80 | tokenizer = get_tokenizer( |
| 81 | TokenizerConfig( |
| 82 | name="Qwen/Qwen2.5-Math-1.5B-Instruct", |
| 83 | chat_template="default", |
| 84 | ) |
| 85 | ) |
| 86 | |
| 87 | math_task_spec = TaskDataSpec( |
| 88 | task_name="math", |
| 89 | prompt_file=None, |
| 90 | system_prompt_file=None, |
| 91 | ) |
| 92 | |
| 93 | dataset = AllTaskProcessedDataset( |
| 94 | dataset=raw_dataset, |
| 95 | tokenizer=tokenizer, |
| 96 | default_task_data_spec=math_task_spec, |
| 97 | task_data_processors=math_data_processor, |
| 98 | max_seq_length=128, |
| 99 | ) |
| 100 | |
| 101 | assert dataset[0]["extra_env_info"]["ground_truth"] == "answer1" |
| 102 | assert dataset[1]["extra_env_info"]["ground_truth"] == "answer2" |
| 103 | |
| 104 | |
| 105 | @pytest.mark.hf_gated |
nothing calls this directly
no test coverage detected