Random sample of items Parameters ---------- n : int, optional Number of items to return is not supported by dask. Use frac instead. frac : float, optional Approximate fraction of items to return. This sampling fraction is
(self, n=None, frac=None, replace=False, random_state=None)
| 2576 | return func(self, *args, **kwargs) |
| 2577 | |
| 2578 | def sample(self, n=None, frac=None, replace=False, random_state=None): |
| 2579 | """Random sample of items |
| 2580 | |
| 2581 | Parameters |
| 2582 | ---------- |
| 2583 | n : int, optional |
| 2584 | Number of items to return is not supported by dask. Use frac |
| 2585 | instead. |
| 2586 | frac : float, optional |
| 2587 | Approximate fraction of items to return. This sampling fraction is |
| 2588 | applied to all partitions equally. Note that this is an |
| 2589 | **approximate fraction**. You should not expect exactly ``len(df) * frac`` |
| 2590 | items to be returned, as the exact number of elements selected will |
| 2591 | depend on how your data is partitioned (but should be pretty close |
| 2592 | in practice). |
| 2593 | replace : boolean, optional |
| 2594 | Sample with or without replacement. Default = False. |
| 2595 | random_state : int or ``np.random.RandomState`` |
| 2596 | If an int, we create a new RandomState with this as the seed; |
| 2597 | Otherwise we draw from the passed RandomState. |
| 2598 | |
| 2599 | See Also |
| 2600 | -------- |
| 2601 | DataFrame.random_split |
| 2602 | pandas.DataFrame.sample |
| 2603 | """ |
| 2604 | if n is not None: |
| 2605 | msg = ( |
| 2606 | "sample does not support the number of sampled items " |
| 2607 | "parameter, 'n'. Please use the 'frac' parameter instead." |
| 2608 | ) |
| 2609 | if isinstance(n, Number) and 0 <= n <= 1: |
| 2610 | warnings.warn(msg) |
| 2611 | frac = n |
| 2612 | else: |
| 2613 | raise ValueError(msg) |
| 2614 | |
| 2615 | if frac is None: |
| 2616 | raise ValueError("frac must not be None") |
| 2617 | |
| 2618 | if random_state is None: |
| 2619 | random_state = np.random.RandomState() |
| 2620 | |
| 2621 | state_data = random_state_data(self.npartitions, random_state) |
| 2622 | return new_collection( |
| 2623 | expr.Sample(self, state_data=state_data, frac=frac, replace=replace) |
| 2624 | ) |
| 2625 | |
| 2626 | def _repr_data(self): |
| 2627 | raise NotImplementedError |