(self)
| 818 | class TestToDaskDataFrame: |
| 819 | @pytest.mark.xfail(reason="https://github.com/dask/dask/issues/11584") |
| 820 | def test_to_dask_dataframe(self): |
| 821 | # Test conversion of Datasets to dask DataFrames |
| 822 | x = np.random.randn(10) |
| 823 | y = np.arange(10, dtype="uint8") |
| 824 | t = list("abcdefghij") |
| 825 | |
| 826 | ds = Dataset( |
| 827 | {"a": ("t", da.from_array(x, chunks=4)), "b": ("t", y), "t": ("t", t)} |
| 828 | ) |
| 829 | |
| 830 | expected_pd = pd.DataFrame({"a": x, "b": y}, index=pd.Index(t, name="t")) |
| 831 | |
| 832 | # test if 1-D index is correctly set up |
| 833 | expected = dd.from_pandas(expected_pd, chunksize=4) |
| 834 | actual = ds.to_dask_dataframe(set_index=True) |
| 835 | # test if we have dask dataframes |
| 836 | assert isinstance(actual, dd.DataFrame) |
| 837 | |
| 838 | # use the .equals from pandas to check dataframes are equivalent |
| 839 | assert_frame_equal(actual.compute(), expected.compute()) |
| 840 | |
| 841 | # test if no index is given |
| 842 | expected = dd.from_pandas(expected_pd.reset_index(drop=False), chunksize=4) |
| 843 | |
| 844 | actual = ds.to_dask_dataframe(set_index=False) |
| 845 | |
| 846 | assert isinstance(actual, dd.DataFrame) |
| 847 | assert_frame_equal(actual.compute(), expected.compute()) |
| 848 | |
| 849 | @pytest.mark.xfail( |
| 850 | reason="Currently pandas with pyarrow installed will return a `string[pyarrow]` type, " |
nothing calls this directly
no test coverage detected