(self, in_memory)
| 1072 | assert_arrow_metadata_are_synced_with_dataset_features(dset) |
| 1073 | |
| 1074 | def test_map(self, in_memory): |
| 1075 | # standard |
| 1076 | with tempfile.TemporaryDirectory() as tmp_dir: |
| 1077 | with self._create_dummy_dataset(in_memory, tmp_dir) as dset: |
| 1078 | self.assertDictEqual(dset.features, Features({"filename": Value("string")})) |
| 1079 | fingerprint = dset._fingerprint |
| 1080 | with dset.map( |
| 1081 | lambda x: {"name": x["filename"][:-2], "id": int(x["filename"].split("_")[-1])} |
| 1082 | ) as dset_test: |
| 1083 | self.assertEqual(len(dset_test), 30) |
| 1084 | self.assertDictEqual(dset.features, Features({"filename": Value("string")})) |
| 1085 | self.assertDictEqual( |
| 1086 | dset_test.features, |
| 1087 | Features({"filename": Value("string"), "name": Value("string"), "id": Value("int64")}), |
| 1088 | ) |
| 1089 | self.assertListEqual(dset_test["id"][:], list(range(30))) |
| 1090 | self.assertNotEqual(dset_test._fingerprint, fingerprint) |
| 1091 | assert_arrow_metadata_are_synced_with_dataset_features(dset_test) |
| 1092 | |
| 1093 | # no transform |
| 1094 | with tempfile.TemporaryDirectory() as tmp_dir: |
| 1095 | with self._create_dummy_dataset(in_memory, tmp_dir) as dset: |
| 1096 | fingerprint = dset._fingerprint |
| 1097 | with dset.map(lambda x: None) as dset_test: |
| 1098 | self.assertEqual(len(dset_test), 30) |
| 1099 | self.assertEqual(dset_test._fingerprint, fingerprint) |
| 1100 | assert_arrow_metadata_are_synced_with_dataset_features(dset_test) |
| 1101 | |
| 1102 | # with indices |
| 1103 | with tempfile.TemporaryDirectory() as tmp_dir: |
| 1104 | with self._create_dummy_dataset(in_memory, tmp_dir) as dset: |
| 1105 | with dset.map( |
| 1106 | lambda x, i: {"name": x["filename"][:-2], "id": i}, with_indices=True |
| 1107 | ) as dset_test_with_indices: |
| 1108 | self.assertEqual(len(dset_test_with_indices), 30) |
| 1109 | self.assertDictEqual(dset.features, Features({"filename": Value("string")})) |
| 1110 | self.assertDictEqual( |
| 1111 | dset_test_with_indices.features, |
| 1112 | Features({"filename": Value("string"), "name": Value("string"), "id": Value("int64")}), |
| 1113 | ) |
| 1114 | self.assertListEqual(dset_test_with_indices["id"][:], list(range(30))) |
| 1115 | assert_arrow_metadata_are_synced_with_dataset_features(dset_test_with_indices) |
| 1116 | |
| 1117 | # interrupted |
| 1118 | with tempfile.TemporaryDirectory() as tmp_dir: |
| 1119 | with self._create_dummy_dataset(in_memory, tmp_dir) as dset: |
| 1120 | |
| 1121 | def func(x, i): |
| 1122 | if i == 4: |
| 1123 | raise KeyboardInterrupt() |
| 1124 | return {"name": x["filename"][:-2], "id": i} |
| 1125 | |
| 1126 | tmp_file = os.path.join(tmp_dir, "test.arrow") |
| 1127 | self.assertRaises( |
| 1128 | KeyboardInterrupt, |
| 1129 | dset.map, |
| 1130 | function=func, |
| 1131 | with_indices=True, |
nothing calls this directly
no test coverage detected