Apply the given function to each row and then flatten results. Use this method if your transformation returns multiple rows for each input row. You can use either a function or a callable class to perform the transformation. For functions, Ray Data uses stateless Ra
(
self,
fn: UserDefinedFunction[
Dict[str, Any], Union[List[Dict[str, Any]], Dict[str, Any]]
],
*,
compute: Optional[ComputeStrategy] = None,
fn_args: Optional[Iterable[Any]] = None,
fn_kwargs: Optional[Dict[str, Any]] = None,
fn_constructor_args: Optional[Iterable[Any]] = None,
fn_constructor_kwargs: Optional[Dict[str, Any]] = None,
num_cpus: Optional[float] = None,
num_gpus: Optional[float] = None,
memory: Optional[float] = None,
concurrency: Optional[Union[int, Tuple[int, int], Tuple[int, int, int]]] = None,
ray_remote_args_fn: Optional[Callable[[], Dict[str, Any]]] = None,
**ray_remote_args,
)
| 1406 | |
| 1407 | @PublicAPI(api_group=BT_API_GROUP) |
| 1408 | def flat_map( |
| 1409 | self, |
| 1410 | fn: UserDefinedFunction[ |
| 1411 | Dict[str, Any], Union[List[Dict[str, Any]], Dict[str, Any]] |
| 1412 | ], |
| 1413 | *, |
| 1414 | compute: Optional[ComputeStrategy] = None, |
| 1415 | fn_args: Optional[Iterable[Any]] = None, |
| 1416 | fn_kwargs: Optional[Dict[str, Any]] = None, |
| 1417 | fn_constructor_args: Optional[Iterable[Any]] = None, |
| 1418 | fn_constructor_kwargs: Optional[Dict[str, Any]] = None, |
| 1419 | num_cpus: Optional[float] = None, |
| 1420 | num_gpus: Optional[float] = None, |
| 1421 | memory: Optional[float] = None, |
| 1422 | concurrency: Optional[Union[int, Tuple[int, int], Tuple[int, int, int]]] = None, |
| 1423 | ray_remote_args_fn: Optional[Callable[[], Dict[str, Any]]] = None, |
| 1424 | **ray_remote_args, |
| 1425 | ) -> "Dataset": |
| 1426 | """Apply the given function to each row and then flatten results. |
| 1427 | |
| 1428 | Use this method if your transformation returns multiple rows for each input |
| 1429 | row. |
| 1430 | |
| 1431 | You can use either a function or a callable class to perform the transformation. |
| 1432 | For functions, Ray Data uses stateless Ray tasks. For classes, Ray Data uses |
| 1433 | stateful Ray actors. For more information, see |
| 1434 | :ref:`Stateful Transforms <stateful_transforms>`. |
| 1435 | |
| 1436 | .. tip:: |
| 1437 | :meth:`~Dataset.map_batches` can also modify the number of rows. If your |
| 1438 | transformation is vectorized like most NumPy and pandas operations, |
| 1439 | it might be faster. |
| 1440 | |
| 1441 | .. warning:: |
| 1442 | Specifying both ``num_cpus`` and ``num_gpus`` for map tasks is experimental, |
| 1443 | and may result in scheduling or stability issues. Please |
| 1444 | `report any issues <https://github.com/ray-project/ray/issues/new/choose>`_ |
| 1445 | to the Ray team. |
| 1446 | |
| 1447 | Examples: |
| 1448 | |
| 1449 | .. testcode:: |
| 1450 | |
| 1451 | from typing import Any, Dict, List |
| 1452 | import ray |
| 1453 | |
| 1454 | def duplicate_row(row: Dict[str, Any]) -> List[Dict[str, Any]]: |
| 1455 | return [row] * 2 |
| 1456 | |
| 1457 | print( |
| 1458 | ray.data.range(3) |
| 1459 | .flat_map(duplicate_row) |
| 1460 | .take_all() |
| 1461 | ) |
| 1462 | |
| 1463 | .. testoutput:: |
| 1464 | |
| 1465 | [{'id': 0}, {'id': 0}, {'id': 1}, {'id': 1}, {'id': 2}, {'id': 2}] |