MCPcopy
hub / github.com/huggingface/datasets / test_map

Method test_map

tests/test_arrow_dataset.py:1074–1184  ·  view source on GitHub ↗
(self, in_memory)

Source from the content-addressed store, hash-verified

1072 assert_arrow_metadata_are_synced_with_dataset_features(dset)
1073
1074 def test_map(self, in_memory):
1075 # standard
1076 with tempfile.TemporaryDirectory() as tmp_dir:
1077 with self._create_dummy_dataset(in_memory, tmp_dir) as dset:
1078 self.assertDictEqual(dset.features, Features({"filename": Value("string")}))
1079 fingerprint = dset._fingerprint
1080 with dset.map(
1081 lambda x: {"name": x["filename"][:-2], "id": int(x["filename"].split("_")[-1])}
1082 ) as dset_test:
1083 self.assertEqual(len(dset_test), 30)
1084 self.assertDictEqual(dset.features, Features({"filename": Value("string")}))
1085 self.assertDictEqual(
1086 dset_test.features,
1087 Features({"filename": Value("string"), "name": Value("string"), "id": Value("int64")}),
1088 )
1089 self.assertListEqual(dset_test["id"][:], list(range(30)))
1090 self.assertNotEqual(dset_test._fingerprint, fingerprint)
1091 assert_arrow_metadata_are_synced_with_dataset_features(dset_test)
1092
1093 # no transform
1094 with tempfile.TemporaryDirectory() as tmp_dir:
1095 with self._create_dummy_dataset(in_memory, tmp_dir) as dset:
1096 fingerprint = dset._fingerprint
1097 with dset.map(lambda x: None) as dset_test:
1098 self.assertEqual(len(dset_test), 30)
1099 self.assertEqual(dset_test._fingerprint, fingerprint)
1100 assert_arrow_metadata_are_synced_with_dataset_features(dset_test)
1101
1102 # with indices
1103 with tempfile.TemporaryDirectory() as tmp_dir:
1104 with self._create_dummy_dataset(in_memory, tmp_dir) as dset:
1105 with dset.map(
1106 lambda x, i: {"name": x["filename"][:-2], "id": i}, with_indices=True
1107 ) as dset_test_with_indices:
1108 self.assertEqual(len(dset_test_with_indices), 30)
1109 self.assertDictEqual(dset.features, Features({"filename": Value("string")}))
1110 self.assertDictEqual(
1111 dset_test_with_indices.features,
1112 Features({"filename": Value("string"), "name": Value("string"), "id": Value("int64")}),
1113 )
1114 self.assertListEqual(dset_test_with_indices["id"][:], list(range(30)))
1115 assert_arrow_metadata_are_synced_with_dataset_features(dset_test_with_indices)
1116
1117 # interrupted
1118 with tempfile.TemporaryDirectory() as tmp_dir:
1119 with self._create_dummy_dataset(in_memory, tmp_dir) as dset:
1120
1121 def func(x, i):
1122 if i == 4:
1123 raise KeyboardInterrupt()
1124 return {"name": x["filename"][:-2], "id": i}
1125
1126 tmp_file = os.path.join(tmp_dir, "test.arrow")
1127 self.assertRaises(
1128 KeyboardInterrupt,
1129 dset.map,
1130 function=func,
1131 with_indices=True,

Callers

nothing calls this directly

Calls 8

_create_dummy_datasetMethod · 0.95
FeaturesClass · 0.90
ValueClass · 0.90
splitMethod · 0.80
existsMethod · 0.80
mapMethod · 0.45
set_formatMethod · 0.45

Tested by

no test coverage detected