(self)
| 5280 | assert_identical(expected, actual) |
| 5281 | |
| 5282 | def test_to_and_from_dataframe(self) -> None: |
| 5283 | x = np.random.randn(10) |
| 5284 | y = np.random.randn(10) |
| 5285 | t = list("abcdefghij") |
| 5286 | cat = pd.Categorical(["a", "b"] * 5) |
| 5287 | ds = Dataset({"a": ("t", x), "b": ("t", y), "t": ("t", t), "cat": ("t", cat)}) |
| 5288 | expected = pd.DataFrame( |
| 5289 | np.array([x, y]).T, columns=["a", "b"], index=pd.Index(t, name="t") |
| 5290 | ) |
| 5291 | expected["cat"] = cat |
| 5292 | actual = ds.to_dataframe() |
| 5293 | # use the .equals method to check all DataFrame metadata |
| 5294 | assert expected.equals(actual), (expected, actual) |
| 5295 | |
| 5296 | # verify coords are included |
| 5297 | actual = ds.set_coords("b").to_dataframe() |
| 5298 | assert expected.equals(actual), (expected, actual) |
| 5299 | |
| 5300 | # check roundtrip |
| 5301 | assert_identical(ds, Dataset.from_dataframe(actual)) |
| 5302 | assert isinstance(ds["cat"].variable.data.dtype, pd.CategoricalDtype) |
| 5303 | # test a case with a MultiIndex |
| 5304 | w = np.random.randn(2, 3) |
| 5305 | cat = pd.Categorical(["a", "a", "c"]) |
| 5306 | ds = Dataset({"w": (("x", "y"), w), "cat": ("y", cat)}) |
| 5307 | ds["y"] = ("y", list("abc")) |
| 5308 | exp_index = pd.MultiIndex.from_arrays( |
| 5309 | [[0, 0, 0, 1, 1, 1], ["a", "b", "c", "a", "b", "c"]], names=["x", "y"] |
| 5310 | ) |
| 5311 | expected = pd.DataFrame( |
| 5312 | {"w": w.reshape(-1), "cat": pd.Categorical(["a", "a", "c", "a", "a", "c"])}, |
| 5313 | index=exp_index, |
| 5314 | ) |
| 5315 | actual = ds.to_dataframe() |
| 5316 | assert expected.equals(actual) |
| 5317 | |
| 5318 | # check roundtrip |
| 5319 | # from_dataframe attempts to broadcast across because it doesn't know better, so cat must be converted |
| 5320 | ds["cat"] = (("x", "y"), np.stack((ds["cat"].to_numpy(), ds["cat"].to_numpy()))) |
| 5321 | assert_identical(ds.assign_coords(x=[0, 1]), Dataset.from_dataframe(actual)) |
| 5322 | # Check multiindex reordering |
| 5323 | new_order = ["x", "y"] |
| 5324 | # revert broadcasting fix above for 1d arrays |
| 5325 | ds["cat"] = ("y", cat) |
| 5326 | actual = ds.to_dataframe(dim_order=new_order) |
| 5327 | assert expected.equals(actual) |
| 5328 | |
| 5329 | new_order = ["y", "x"] |
| 5330 | exp_index = pd.MultiIndex.from_arrays( |
| 5331 | [["a", "a", "b", "b", "c", "c"], [0, 1, 0, 1, 0, 1]], names=["y", "x"] |
| 5332 | ) |
| 5333 | expected = pd.DataFrame( |
| 5334 | { |
| 5335 | "w": w.transpose().reshape(-1), |
| 5336 | "cat": pd.Categorical(["a", "a", "a", "a", "c", "c"]), |
| 5337 | }, |
| 5338 | index=exp_index, |
| 5339 | ) |
nothing calls this directly
no test coverage detected