r"""Compute grouped mean aggregation. Examples: >>> import ray >>> ray.data.le(100).groupby("value").mean() # doctest: +SKIP >>> ray.data.from_items([ # doctest: +SKIP ... {"A": i % 3, "B": i, "C": i**2} # doctest: +SKIP ...
(
self, on: Union[str, List[str]] = None, ignore_nulls: bool = True
)
| 514 | |
| 515 | @PublicAPI(api_group=CDS_API_GROUP) |
| 516 | def mean( |
| 517 | self, on: Union[str, List[str]] = None, ignore_nulls: bool = True |
| 518 | ) -> Dataset: |
| 519 | r"""Compute grouped mean aggregation. |
| 520 | |
| 521 | Examples: |
| 522 | >>> import ray |
| 523 | >>> ray.data.le(100).groupby("value").mean() # doctest: +SKIP |
| 524 | >>> ray.data.from_items([ # doctest: +SKIP |
| 525 | ... {"A": i % 3, "B": i, "C": i**2} # doctest: +SKIP |
| 526 | ... for i in range(100)]) # doctest: +SKIP |
| 527 | ... .groupby("A") # doctest: +SKIP |
| 528 | ... .mean(["B", "C"]) # doctest: +SKIP |
| 529 | |
| 530 | Args: |
| 531 | on: a column name or a list of column names to aggregate. |
| 532 | ignore_nulls: Whether to ignore null values. If ``True``, null |
| 533 | values will be ignored when computing the mean; if ``False``, |
| 534 | if a null value is encountered, the output will be null. |
| 535 | We consider np.nan, None, and pd.NaT to be null values. |
| 536 | Default is ``True``. |
| 537 | |
| 538 | Returns: |
| 539 | The mean result. |
| 540 | |
| 541 | For different values of ``on``, the return varies: |
| 542 | |
| 543 | - ``on=None``: a dataset containing a groupby key column, |
| 544 | ``"k"``, and a column-wise mean column for each original column |
| 545 | in the dataset. |
| 546 | - ``on=["col_1", ..., "col_n"]``: a dataset of ``n + 1`` |
| 547 | columns where the first column is the groupby key and the second |
| 548 | through ``n + 1`` columns are the results of the aggregations. |
| 549 | |
| 550 | If groupby key is ``None`` then the key part of return is omitted. |
| 551 | """ |
| 552 | return self._aggregate_on(Mean, on, ignore_nulls=ignore_nulls) |
| 553 | |
| 554 | @PublicAPI(api_group=CDS_API_GROUP) |
| 555 | def std( |
nothing calls this directly
no test coverage detected