Check that the dask metadata matches the result. If metadata matches, ``x`` is passed through unchanged. A nice error is raised if metadata doesn't match. Parameters ---------- x : DataFrame, Series, or Index meta : DataFrame, Series, or Index The expected metadata
(x, meta, funcname=None, numeric_equal=True)
| 312 | |
| 313 | |
| 314 | def check_meta(x, meta, funcname=None, numeric_equal=True): |
| 315 | """Check that the dask metadata matches the result. |
| 316 | |
| 317 | If metadata matches, ``x`` is passed through unchanged. A nice error is |
| 318 | raised if metadata doesn't match. |
| 319 | |
| 320 | Parameters |
| 321 | ---------- |
| 322 | x : DataFrame, Series, or Index |
| 323 | meta : DataFrame, Series, or Index |
| 324 | The expected metadata that ``x`` should match |
| 325 | funcname : str, optional |
| 326 | The name of the function in which the metadata was specified. If |
| 327 | provided, the function name will be included in the error message to be |
| 328 | more helpful to users. |
| 329 | numeric_equal : bool, optional |
| 330 | If True, integer and floating dtypes compare equal. This is useful due |
| 331 | to panda's implicit conversion of integer to floating upon encountering |
| 332 | missingness, which is hard to infer statically. |
| 333 | """ |
| 334 | eq_types = {"i", "f", "u"} if numeric_equal else set() |
| 335 | |
| 336 | def equal_dtypes(a, b): |
| 337 | if isinstance(a, pd.CategoricalDtype) != isinstance(b, pd.CategoricalDtype): |
| 338 | return False |
| 339 | if isinstance(a, str) and a == "-" or isinstance(b, str) and b == "-": |
| 340 | return False |
| 341 | if isinstance(a, pd.CategoricalDtype) and isinstance(b, pd.CategoricalDtype): |
| 342 | if UNKNOWN_CATEGORIES in a.categories or UNKNOWN_CATEGORIES in b.categories: |
| 343 | return True |
| 344 | return a == b |
| 345 | return (a.kind in eq_types and b.kind in eq_types) or is_dtype_equal(a, b) |
| 346 | |
| 347 | if not ( |
| 348 | is_dataframe_like(meta) or is_series_like(meta) or is_index_like(meta) |
| 349 | ) or is_dask_collection(meta): |
| 350 | raise TypeError( |
| 351 | "Expected partition to be DataFrame, Series, or " |
| 352 | f"Index, got `{typename(type(meta))}`" |
| 353 | ) |
| 354 | |
| 355 | # Notice, we use .__class__ as opposed to type() in order to support |
| 356 | # object proxies see <https://github.com/dask/dask/pull/6981> |
| 357 | if x.__class__ != meta.__class__: |
| 358 | errmsg = f"Expected partition of type `{typename(type(meta))}` but got `{typename(type(x))}`" |
| 359 | elif is_dataframe_like(meta): |
| 360 | dtypes = pd.concat([x.dtypes, meta.dtypes], axis=1, sort=True) |
| 361 | bad_dtypes = [ |
| 362 | (repr(col), a, b) |
| 363 | for col, a, b in dtypes.fillna("-").itertuples() |
| 364 | if not equal_dtypes(a, b) |
| 365 | ] |
| 366 | if bad_dtypes: |
| 367 | errmsg = "Partition type: `{}`\n{}".format( |
| 368 | typename(type(meta)), |
| 369 | asciitable(["Column", "Found", "Expected"], bad_dtypes), |
| 370 | ) |
| 371 | else: |
searching dependent graphs…