(self, in_memory)
| 936 | del repeated_dset |
| 937 | |
| 938 | def test_flatten(self, in_memory): |
| 939 | with tempfile.TemporaryDirectory() as tmp_dir: |
| 940 | with Dataset.from_dict( |
| 941 | {"a": [{"b": {"c": ["text"]}}] * 10, "foo": [1] * 10}, |
| 942 | features=Features({"a": {"b": {"c": List(Value("string"))}}, "foo": Value("int64")}), |
| 943 | ) as dset: |
| 944 | with self._to(in_memory, tmp_dir, dset) as dset: |
| 945 | fingerprint = dset._fingerprint |
| 946 | with dset.flatten() as dset: |
| 947 | self.assertListEqual(sorted(dset.column_names), ["a.b.c", "foo"]) |
| 948 | self.assertListEqual(sorted(dset.features.keys()), ["a.b.c", "foo"]) |
| 949 | self.assertDictEqual( |
| 950 | dset.features, Features({"a.b.c": List(Value("string")), "foo": Value("int64")}) |
| 951 | ) |
| 952 | self.assertNotEqual(dset._fingerprint, fingerprint) |
| 953 | assert_arrow_metadata_are_synced_with_dataset_features(dset) |
| 954 | |
| 955 | with tempfile.TemporaryDirectory() as tmp_dir: |
| 956 | with Dataset.from_dict( |
| 957 | {"a": [{"en": "Thank you", "fr": "Merci"}] * 10, "foo": [1] * 10}, |
| 958 | features=Features({"a": Translation(languages=["en", "fr"]), "foo": Value("int64")}), |
| 959 | ) as dset: |
| 960 | with self._to(in_memory, tmp_dir, dset) as dset: |
| 961 | fingerprint = dset._fingerprint |
| 962 | with dset.flatten() as dset: |
| 963 | self.assertListEqual(sorted(dset.column_names), ["a.en", "a.fr", "foo"]) |
| 964 | self.assertListEqual(sorted(dset.features.keys()), ["a.en", "a.fr", "foo"]) |
| 965 | self.assertDictEqual( |
| 966 | dset.features, |
| 967 | Features({"a.en": Value("string"), "a.fr": Value("string"), "foo": Value("int64")}), |
| 968 | ) |
| 969 | self.assertNotEqual(dset._fingerprint, fingerprint) |
| 970 | assert_arrow_metadata_are_synced_with_dataset_features(dset) |
| 971 | |
| 972 | with tempfile.TemporaryDirectory() as tmp_dir: |
| 973 | with Dataset.from_dict( |
| 974 | {"a": [{"en": "the cat", "fr": ["le chat", "la chatte"], "de": "die katze"}] * 10, "foo": [1] * 10}, |
| 975 | features=Features( |
| 976 | { |
| 977 | "a": TranslationVariableLanguages(languages=["en", "fr", "de"]), |
| 978 | "foo": Value("int64"), |
| 979 | } |
| 980 | ), |
| 981 | ) as dset: |
| 982 | with self._to(in_memory, tmp_dir, dset) as dset: |
| 983 | fingerprint = dset._fingerprint |
| 984 | with dset.flatten() as dset: |
| 985 | self.assertListEqual(sorted(dset.column_names), ["a.language", "a.translation", "foo"]) |
| 986 | self.assertListEqual(sorted(dset.features.keys()), ["a.language", "a.translation", "foo"]) |
| 987 | self.assertDictEqual( |
| 988 | dset.features, |
| 989 | Features( |
| 990 | { |
| 991 | "a.language": List(Value("string")), |
| 992 | "a.translation": List(Value("string")), |
| 993 | "foo": Value("int64"), |
| 994 | } |
| 995 | ), |
nothing calls this directly
no test coverage detected