Format aggregation result into stat entries. Takes the raw aggregation result and formats it into one or more stat entries. For scalar results, returns a single entry. For list results, expands into multiple indexed entries. Args: agg: The aggregator instance value:
(
agg: AggregateFnV2, value: Any, agg_type: pa.DataType
)
| 454 | |
| 455 | |
| 456 | def _format_stats( |
| 457 | agg: AggregateFnV2, value: Any, agg_type: pa.DataType |
| 458 | ) -> Dict[str, Tuple[Any, pa.DataType]]: |
| 459 | """Format aggregation result into stat entries. |
| 460 | |
| 461 | Takes the raw aggregation result and formats it into one or more stat |
| 462 | entries. For scalar results, returns a single entry. For list results, |
| 463 | expands into multiple indexed entries. |
| 464 | |
| 465 | Args: |
| 466 | agg: The aggregator instance |
| 467 | value: The aggregation result value |
| 468 | agg_type: PyArrow type of the aggregation result |
| 469 | |
| 470 | Returns: |
| 471 | Dictionary mapping stat names to (value, type) tuples |
| 472 | """ |
| 473 | from ray.data.datatype import DataType |
| 474 | |
| 475 | agg_name = agg.get_agg_name() |
| 476 | |
| 477 | # Handle list results: expand into separate indexed stats |
| 478 | # If the value is None but the type is list, it means we got a null result |
| 479 | # for a list-type aggregator (e.g., ignore_nulls=True and all nulls). |
| 480 | is_list_type = ( |
| 481 | pa.types.is_list(agg_type) or DataType.from_arrow(agg_type).is_list_type() |
| 482 | ) |
| 483 | |
| 484 | if isinstance(value, list) or (value is None and is_list_type): |
| 485 | scalar_type = ( |
| 486 | agg_type.value_type |
| 487 | if DataType.from_arrow(agg_type).is_list_type() |
| 488 | else agg_type |
| 489 | ) |
| 490 | if value is None: |
| 491 | # Can't expand None without knowing the size, return as-is |
| 492 | pass |
| 493 | else: |
| 494 | labels = [str(idx) for idx in range(len(value))] |
| 495 | return { |
| 496 | f"{agg_name}[{label}]": (list_val, scalar_type) |
| 497 | for label, list_val in zip(labels, value) |
| 498 | } |
| 499 | |
| 500 | # Fallback to scalar result for non-list values or unexpandable Nones |
| 501 | return {agg_name: (value, agg_type)} |
| 502 | |
| 503 | |
| 504 | def _parse_summary_stats( |
no test coverage detected
searching dependent graphs…