MCPcopy
hub / github.com/ray-project/ray / std

Method std

python/ray/data/grouped_data.py:555–604  ·  view source on GitHub ↗

r"""Compute grouped standard deviation aggregation. Examples: >>> import ray >>> ray.data.range(100).groupby("id").std(ddof=0) # doctest: +SKIP >>> ray.data.from_items([ # doctest: +SKIP ... {"A": i % 3, "B": i, "C": i**2} # doctest: +SKIP

(
        self,
        on: Union[str, List[str]] = None,
        ddof: int = 1,
        ignore_nulls: bool = True,
    )

Source from the content-addressed store, hash-verified

553
554 @PublicAPI(api_group=CDS_API_GROUP)
555 def std(
556 self,
557 on: Union[str, List[str]] = None,
558 ddof: int = 1,
559 ignore_nulls: bool = True,
560 ) -> Dataset:
561 r"""Compute grouped standard deviation aggregation.
562
563 Examples:
564 >>> import ray
565 >>> ray.data.range(100).groupby("id").std(ddof=0) # doctest: +SKIP
566 >>> ray.data.from_items([ # doctest: +SKIP
567 ... {"A": i % 3, "B": i, "C": i**2} # doctest: +SKIP
568 ... for i in range(100)]) # doctest: +SKIP
569 ... .groupby("A") # doctest: +SKIP
570 ... .std(["B", "C"]) # doctest: +SKIP
571
572 NOTE: This uses Welford's online method for an accumulator-style
573 computation of the standard deviation. This method was chosen due to
574 it's numerical stability, and it being computable in a single pass.
575 This may give different (but more accurate) results than NumPy, Pandas,
576 and sklearn, which use a less numerically stable two-pass algorithm.
577 See
578 https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm
579
580 Args:
581 on: a column name or a list of column names to aggregate.
582 ddof: Delta Degrees of Freedom. The divisor used in calculations
583 is ``N - ddof``, where ``N`` represents the number of elements.
584 ignore_nulls: Whether to ignore null values. If ``True``, null
585 values will be ignored when computing the std; if ``False``,
586 if a null value is encountered, the output will be null.
587 We consider np.nan, None, and pd.NaT to be null values.
588 Default is ``True``.
589
590 Returns:
591 The standard deviation result.
592
593 For different values of ``on``, the return varies:
594
595 - ``on=None``: a dataset containing a groupby key column,
596 ``"k"``, and a column-wise std column for each original column in
597 the dataset.
598 - ``on=["col_1", ..., "col_n"]``: a dataset of ``n + 1``
599 columns where the first column is the groupby key and the second
600 through ``n + 1`` columns are the results of the aggregations.
601
602 If groupby key is ``None`` then the key part of return is omitted.
603 """
604 return self._aggregate_on(Std, on, ignore_nulls=ignore_nulls, ddof=ddof)
605
606
607def _apply_udf_to_groups(

Callers

nothing calls this directly

Calls 1

_aggregate_onMethod · 0.95

Tested by

no test coverage detected