.. note:: This implementation follows the dask.array.stats implementation of skewness and calculates skewness without taking into account a bias term for finite sample size, which corresponds to the default settings of the scipy.stats skewness ca
(
self,
axis=0,
bias=True,
nan_policy="propagate",
numeric_only=False,
)
| 1647 | |
| 1648 | @derived_from(pd.DataFrame) |
| 1649 | def skew( |
| 1650 | self, |
| 1651 | axis=0, |
| 1652 | bias=True, |
| 1653 | nan_policy="propagate", |
| 1654 | numeric_only=False, |
| 1655 | ): |
| 1656 | """ |
| 1657 | .. note:: |
| 1658 | |
| 1659 | This implementation follows the dask.array.stats implementation |
| 1660 | of skewness and calculates skewness without taking into account |
| 1661 | a bias term for finite sample size, which corresponds to the |
| 1662 | default settings of the scipy.stats skewness calculation. However, |
| 1663 | Pandas corrects for this, so the values differ by a factor of |
| 1664 | (n * (n - 1)) ** 0.5 / (n - 2), where n is the number of samples. |
| 1665 | |
| 1666 | Further, this method currently does not support filtering out NaN |
| 1667 | values, which is again a difference to Pandas. |
| 1668 | """ |
| 1669 | _raise_if_object_series(self, "skew") |
| 1670 | if axis is None: |
| 1671 | raise ValueError("`axis=None` isn't currently supported for `skew`") |
| 1672 | axis = self._validate_axis(axis) |
| 1673 | |
| 1674 | if is_dataframe_like(self): |
| 1675 | # Let pandas raise errors if necessary |
| 1676 | meta = self._meta_nonempty.skew(axis=axis, numeric_only=numeric_only) |
| 1677 | else: |
| 1678 | meta = self._meta_nonempty.skew() |
| 1679 | |
| 1680 | if axis == 1: |
| 1681 | return self.map_partitions( |
| 1682 | M.skew, |
| 1683 | meta=meta, |
| 1684 | axis=axis, |
| 1685 | enforce_metadata=False, |
| 1686 | ) |
| 1687 | |
| 1688 | if not bias: |
| 1689 | raise NotImplementedError("bias=False is not implemented.") |
| 1690 | if nan_policy != "propagate": |
| 1691 | raise NotImplementedError( |
| 1692 | "`nan_policy` other than 'propagate' have not been implemented." |
| 1693 | ) |
| 1694 | |
| 1695 | frame = self |
| 1696 | if frame.ndim > 1: |
| 1697 | frame = frame.select_dtypes( |
| 1698 | include=["number", "bool"], exclude=[np.timedelta64] |
| 1699 | ) |
| 1700 | m2 = new_collection(Moment(frame, order=2)) |
| 1701 | m3 = new_collection(Moment(frame, order=3)) |
| 1702 | result = m3 / m2**1.5 |
| 1703 | if result.ndim == 1: |
| 1704 | result = result.fillna(0.0) |
| 1705 | return result |
| 1706 |
nothing calls this directly
no test coverage detected