MCPcopy
hub / github.com/ray-project/ray / Count

Class Count

python/ray/data/aggregate.py:392–458  ·  view source on GitHub ↗

Defines count aggregation. Example: .. testcode:: import ray from ray.data.aggregate import Count ds = ray.data.range(100) # Schema: {'id': int64} ds = ds.add_column( "group_key", lambda batch: batch["id"].as

Source from the content-addressed store, hash-verified

390
391@PublicAPI
392class Count(AggregateFnV2[int, int]):
393 """Defines count aggregation.
394
395 Example:
396
397 .. testcode::
398
399 import ray
400 from ray.data.aggregate import Count
401
402 ds = ray.data.range(100)
403 # Schema: {'id': int64}
404 ds = ds.add_column(
405 "group_key", lambda batch: batch["id"].astype("int64") % 3
406 )
407 # Schema: {'id': int64, 'group_key': int64}
408
409 # Counting all rows:
410 result = ds.aggregate(Count())
411 # result: {'count()': 100}
412
413
414 # Counting all rows per group:
415 result = ds.groupby("group_key").aggregate(Count(on="id")).take_all()
416 # result: [{'group_key': 0, 'count(id)': 34},
417 # {'group_key': 1, 'count(id)': 33},
418 # {'group_key': 2, 'count(id)': 33}]
419
420
421 Args:
422 on: Optional name of the column to count values on. If None, counts rows.
423 ignore_nulls: Whether to ignore null values when counting. Only applies if
424 `on` is specified. Default is `False` which means `Count()` on a column
425 will count nulls by default. To match pandas default behavior of not counting nulls,
426 set `ignore_nulls=True`.
427 alias_name: Optional name for the resulting column.
428 """
429
430 def __init__(
431 self,
432 on: Optional[str] = None,
433 ignore_nulls: bool = False,
434 alias_name: Optional[str] = None,
435 ):
436 super().__init__(
437 alias_name if alias_name else f"count({on or ''})",
438 on=on,
439 ignore_nulls=ignore_nulls,
440 zero_factory=lambda: 0,
441 )
442
443 def aggregate_block(self, block: Block) -> int:
444 block_accessor = BlockAccessor.for_block(block)
445
446 if self._target_col_name is None:
447 # In case of global count, simply fetch number of rows
448 return block_accessor.num_rows()
449

Calls

no outgoing calls

Used in the wild real call sites across dependent graphs

searching dependent graphs…