(self, in_memory)
| 858 | del dset1, dset2, dset3 |
| 859 | |
| 860 | def test_concatenate_pickle(self, in_memory): |
| 861 | data1, data2, data3 = {"id": [0, 1, 2] * 2}, {"id": [3, 4, 5] * 2}, {"id": [6, 7], "foo": ["bar", "bar"]} |
| 862 | info1 = DatasetInfo(description="Dataset1") |
| 863 | info2 = DatasetInfo(description="Dataset2") |
| 864 | with tempfile.TemporaryDirectory() as tmp_dir: |
| 865 | dset1, dset2, dset3 = ( |
| 866 | Dataset.from_dict(data1, info=info1), |
| 867 | Dataset.from_dict(data2, info=info2), |
| 868 | Dataset.from_dict(data3), |
| 869 | ) |
| 870 | schema = dset1.data.schema |
| 871 | # mix from in-memory and on-disk datasets |
| 872 | dset1, dset2 = self._to(in_memory, tmp_dir, dset1, dset2) |
| 873 | dset3 = self._to(not in_memory, tmp_dir, dset3) |
| 874 | dset1, dset2, dset3 = ( |
| 875 | dset1.select( |
| 876 | [2, 1, 0], |
| 877 | keep_in_memory=in_memory, |
| 878 | indices_cache_file_name=os.path.join(tmp_dir, "i1.arrow") if not in_memory else None, |
| 879 | ), |
| 880 | dset2.select( |
| 881 | [2, 1, 0], |
| 882 | keep_in_memory=in_memory, |
| 883 | indices_cache_file_name=os.path.join(tmp_dir, "i2.arrow") if not in_memory else None, |
| 884 | ), |
| 885 | dset3.select( |
| 886 | [1, 0], |
| 887 | keep_in_memory=in_memory, |
| 888 | indices_cache_file_name=os.path.join(tmp_dir, "i3.arrow") if not in_memory else None, |
| 889 | ), |
| 890 | ) |
| 891 | |
| 892 | dset3 = dset3.rename_column("foo", "new_foo") |
| 893 | dset3 = dset3.remove_columns("new_foo") |
| 894 | if in_memory: |
| 895 | dset3._data.table = Unpicklable(schema=schema) |
| 896 | else: |
| 897 | dset1._data.table, dset2._data.table = Unpicklable(schema=schema), Unpicklable(schema=schema) |
| 898 | dset1, dset2, dset3 = (pickle.loads(pickle.dumps(d)) for d in (dset1, dset2, dset3)) |
| 899 | with concatenate_datasets([dset3, dset2, dset1]) as dset_concat: |
| 900 | if not in_memory: |
| 901 | dset_concat._data.table = Unpicklable(schema=schema) |
| 902 | with pickle.loads(pickle.dumps(dset_concat)) as dset_concat: |
| 903 | self.assertTupleEqual((len(dset1), len(dset2), len(dset3)), (3, 3, 2)) |
| 904 | self.assertEqual(len(dset_concat), len(dset1) + len(dset2) + len(dset3)) |
| 905 | self.assertListEqual(dset_concat["id"][:], [7, 6, 5, 4, 3, 2, 1, 0]) |
| 906 | # in_memory = True: 1 cache file for dset3 |
| 907 | # in_memory = False: 2 caches files for dset1 and dset2, and 1 cache file for i1.arrow |
| 908 | self.assertEqual(len(dset_concat.cache_files), 1 if in_memory else 2 + 1) |
| 909 | self.assertEqual(dset_concat.info.description, "Dataset2\n\nDataset1") |
| 910 | del dset1, dset2, dset3 |
| 911 | |
| 912 | def test_repeat(self, in_memory): |
| 913 | with tempfile.TemporaryDirectory() as tmp_dir: |
nothing calls this directly
no test coverage detected