MCPcopy Index your code
hub / github.com/dask/dask / sort_values

Method sort_values

dask/dataframe/dask_expr/_collection.py:3534–3607  ·  view source on GitHub ↗

Sort the dataset by a single column. Sorting a parallel dataset requires expensive shuffles and is generally not recommended. See ``set_index`` for implementation details. Parameters ---------- by: str or list[str] Column(s) to sort by. n

(
        self,
        by: str | list[str],
        npartitions: int | None = None,
        ascending: bool | list[bool] = True,
        na_position: Literal["first", "last"] = "last",
        partition_size: float = 128e6,
        sort_function: Callable[[pd.DataFrame], pd.DataFrame] | None = None,
        sort_function_kwargs: Mapping[str, Any] | None = None,
        upsample: float = 1.0,
        ignore_index: bool | None = False,
        shuffle_method: str | None = None,
        **options,
    )

Source from the content-addressed store, hash-verified

3532 )
3533
3534 def sort_values(
3535 self,
3536 by: str | list[str],
3537 npartitions: int | None = None,
3538 ascending: bool | list[bool] = True,
3539 na_position: Literal["first", "last"] = "last",
3540 partition_size: float = 128e6,
3541 sort_function: Callable[[pd.DataFrame], pd.DataFrame] | None = None,
3542 sort_function_kwargs: Mapping[str, Any] | None = None,
3543 upsample: float = 1.0,
3544 ignore_index: bool | None = False,
3545 shuffle_method: str | None = None,
3546 **options,
3547 ):
3548 """Sort the dataset by a single column.
3549
3550 Sorting a parallel dataset requires expensive shuffles and is generally
3551 not recommended. See ``set_index`` for implementation details.
3552
3553 Parameters
3554 ----------
3555 by: str or list[str]
3556 Column(s) to sort by.
3557 npartitions: int, None, or 'auto'
3558 The ideal number of output partitions. If None, use the same as
3559 the input. If 'auto' then decide by memory use.
3560 ascending: bool, optional
3561 Sort ascending vs. descending.
3562 Defaults to True.
3563 na_position: {'last', 'first'}, optional
3564 Puts NaNs at the beginning if 'first', puts NaN at the end if 'last'.
3565 Defaults to 'last'.
3566 sort_function: function, optional
3567 Sorting function to use when sorting underlying partitions.
3568 If None, defaults to ``M.sort_values`` (the partition library's
3569 implementation of ``sort_values``).
3570 sort_function_kwargs: dict, optional
3571 Additional keyword arguments to pass to the partition sorting function.
3572 By default, ``by``, ``ascending``, and ``na_position`` are provided.
3573
3574 Examples
3575 --------
3576 >>> df2 = df.sort_values('x') # doctest: +SKIP
3577 """
3578 if na_position not in ("first", "last"):
3579 raise ValueError("na_position must be either 'first' or 'last'")
3580 if not isinstance(by, list):
3581 by = [by]
3582 if any(not isinstance(b, str) for b in by):
3583 raise NotImplementedError(
3584 "Dataframes only support sorting by named columns which must be passed as a "
3585 "string or a list of strings.\n"
3586 f"You passed {by}"
3587 )
3588
3589 if not isinstance(ascending, bool) and not len(ascending) == len(by):
3590 raise ValueError(f"Length of {ascending=} != length of {by=}")
3591

Calls 4

new_collectionFunction · 0.90
SortValuesClass · 0.90
get_specified_shuffleFunction · 0.90
anyFunction · 0.85