MCPcopy
hub / github.com/huggingface/datasets / test_flatten

Method test_flatten

tests/test_arrow_dataset.py:938–998  ·  view source on GitHub ↗
(self, in_memory)

Source from the content-addressed store, hash-verified

936 del repeated_dset
937
938 def test_flatten(self, in_memory):
939 with tempfile.TemporaryDirectory() as tmp_dir:
940 with Dataset.from_dict(
941 {"a": [{"b": {"c": ["text"]}}] * 10, "foo": [1] * 10},
942 features=Features({"a": {"b": {"c": List(Value("string"))}}, "foo": Value("int64")}),
943 ) as dset:
944 with self._to(in_memory, tmp_dir, dset) as dset:
945 fingerprint = dset._fingerprint
946 with dset.flatten() as dset:
947 self.assertListEqual(sorted(dset.column_names), ["a.b.c", "foo"])
948 self.assertListEqual(sorted(dset.features.keys()), ["a.b.c", "foo"])
949 self.assertDictEqual(
950 dset.features, Features({"a.b.c": List(Value("string")), "foo": Value("int64")})
951 )
952 self.assertNotEqual(dset._fingerprint, fingerprint)
953 assert_arrow_metadata_are_synced_with_dataset_features(dset)
954
955 with tempfile.TemporaryDirectory() as tmp_dir:
956 with Dataset.from_dict(
957 {"a": [{"en": "Thank you", "fr": "Merci"}] * 10, "foo": [1] * 10},
958 features=Features({"a": Translation(languages=["en", "fr"]), "foo": Value("int64")}),
959 ) as dset:
960 with self._to(in_memory, tmp_dir, dset) as dset:
961 fingerprint = dset._fingerprint
962 with dset.flatten() as dset:
963 self.assertListEqual(sorted(dset.column_names), ["a.en", "a.fr", "foo"])
964 self.assertListEqual(sorted(dset.features.keys()), ["a.en", "a.fr", "foo"])
965 self.assertDictEqual(
966 dset.features,
967 Features({"a.en": Value("string"), "a.fr": Value("string"), "foo": Value("int64")}),
968 )
969 self.assertNotEqual(dset._fingerprint, fingerprint)
970 assert_arrow_metadata_are_synced_with_dataset_features(dset)
971
972 with tempfile.TemporaryDirectory() as tmp_dir:
973 with Dataset.from_dict(
974 {"a": [{"en": "the cat", "fr": ["le chat", "la chatte"], "de": "die katze"}] * 10, "foo": [1] * 10},
975 features=Features(
976 {
977 "a": TranslationVariableLanguages(languages=["en", "fr", "de"]),
978 "foo": Value("int64"),
979 }
980 ),
981 ) as dset:
982 with self._to(in_memory, tmp_dir, dset) as dset:
983 fingerprint = dset._fingerprint
984 with dset.flatten() as dset:
985 self.assertListEqual(sorted(dset.column_names), ["a.language", "a.translation", "foo"])
986 self.assertListEqual(sorted(dset.features.keys()), ["a.language", "a.translation", "foo"])
987 self.assertDictEqual(
988 dset.features,
989 Features(
990 {
991 "a.language": List(Value("string")),
992 "a.translation": List(Value("string")),
993 "foo": Value("int64"),
994 }
995 ),

Callers

nothing calls this directly

Calls 9

_toMethod · 0.95
FeaturesClass · 0.90
ListClass · 0.90
ValueClass · 0.90
TranslationClass · 0.90
from_dictMethod · 0.45
flattenMethod · 0.45

Tested by

no test coverage detected