r"""Compute grouped sum aggregation. Examples: >>> import ray >>> ray.data.from_items([ # doctest: +SKIP ... (i % 3, i, i**2) # doctest: +SKIP ... for i in range(100)]) # doctest: +SKIP ... .groupby(lambda x: x[0] % 3)
(
self, on: Union[str, List[str]] = None, ignore_nulls: bool = True
)
| 392 | |
| 393 | @PublicAPI(api_group=CDS_API_GROUP) |
| 394 | def sum( |
| 395 | self, on: Union[str, List[str]] = None, ignore_nulls: bool = True |
| 396 | ) -> Dataset: |
| 397 | r"""Compute grouped sum aggregation. |
| 398 | |
| 399 | Examples: |
| 400 | >>> import ray |
| 401 | >>> ray.data.from_items([ # doctest: +SKIP |
| 402 | ... (i % 3, i, i**2) # doctest: +SKIP |
| 403 | ... for i in range(100)]) # doctest: +SKIP |
| 404 | ... .groupby(lambda x: x[0] % 3) # doctest: +SKIP |
| 405 | ... .sum(lambda x: x[2]) # doctest: +SKIP |
| 406 | >>> ray.data.range(100).groupby("id").sum() # doctest: +SKIP |
| 407 | >>> ray.data.from_items([ # doctest: +SKIP |
| 408 | ... {"A": i % 3, "B": i, "C": i**2} # doctest: +SKIP |
| 409 | ... for i in range(100)]) # doctest: +SKIP |
| 410 | ... .groupby("A") # doctest: +SKIP |
| 411 | ... .sum(["B", "C"]) # doctest: +SKIP |
| 412 | |
| 413 | Args: |
| 414 | on: a column name or a list of column names to aggregate. |
| 415 | ignore_nulls: Whether to ignore null values. If ``True``, null |
| 416 | values will be ignored when computing the sum; if ``False``, |
| 417 | if a null value is encountered, the output will be null. |
| 418 | We consider np.nan, None, and pd.NaT to be null values. |
| 419 | Default is ``True``. |
| 420 | |
| 421 | Returns: |
| 422 | The sum result. |
| 423 | |
| 424 | For different values of ``on``, the return varies: |
| 425 | |
| 426 | - ``on=None``: a dataset containing a groupby key column, |
| 427 | ``"k"``, and a column-wise sum column for each original column |
| 428 | in the dataset. |
| 429 | - ``on=["col_1", ..., "col_n"]``: a dataset of ``n + 1`` |
| 430 | columns where the first column is the groupby key and the second |
| 431 | through ``n + 1`` columns are the results of the aggregations. |
| 432 | |
| 433 | If groupby key is ``None`` then the key part of return is omitted. |
| 434 | """ |
| 435 | return self._aggregate_on(Sum, on, ignore_nulls=ignore_nulls) |
| 436 | |
| 437 | @PublicAPI(api_group=CDS_API_GROUP) |
| 438 | def min( |
nothing calls this directly
no test coverage detected