Returns tf.data.Dataset instance(s). Args: instructions (ReadInstruction, List[], Dict[]): instruction(s) to read. Instructions can be string and will then be passed to the Instruction constructor as it. split_infos: the available splits for dataset. read_confi
(
self,
*,
instructions: Tree[splits_lib.SplitArg],
split_infos: Sequence[splits_lib.SplitInfo],
read_config: read_config_lib.ReadConfig,
shuffle_files: bool,
disable_shuffling: bool = False,
decode_fn: DecodeFn | None = None,
)
| 386 | self._file_format = file_adapters.FileFormat.from_value(file_format) |
| 387 | |
| 388 | def read( |
| 389 | self, |
| 390 | *, |
| 391 | instructions: Tree[splits_lib.SplitArg], |
| 392 | split_infos: Sequence[splits_lib.SplitInfo], |
| 393 | read_config: read_config_lib.ReadConfig, |
| 394 | shuffle_files: bool, |
| 395 | disable_shuffling: bool = False, |
| 396 | decode_fn: DecodeFn | None = None, |
| 397 | ) -> Tree[tf.data.Dataset]: |
| 398 | """Returns tf.data.Dataset instance(s). |
| 399 | |
| 400 | Args: |
| 401 | instructions (ReadInstruction, List[], Dict[]): instruction(s) to read. |
| 402 | Instructions can be string and will then be passed to the Instruction |
| 403 | constructor as it. |
| 404 | split_infos: the available splits for dataset. |
| 405 | read_config: `tfds.ReadConfig`, the input pipeline options |
| 406 | shuffle_files (bool): If True, input files are shuffled before being read. |
| 407 | disable_shuffling: Specifies if the dataset being read has shuffling |
| 408 | disabled. |
| 409 | decode_fn: Eventual additional processing to apply to the example after |
| 410 | deserialization. |
| 411 | |
| 412 | Returns: |
| 413 | a single tf.data.Dataset instance if instruction is a single |
| 414 | ReadInstruction instance. Otherwise a dict/list of tf.data.Dataset |
| 415 | corresponding to given instructions param shape. |
| 416 | """ |
| 417 | |
| 418 | splits_dict = splits_lib.SplitDict(split_infos=split_infos) |
| 419 | |
| 420 | def _read_instruction_to_ds( |
| 421 | instruction: splits_lib.SplitArg, |
| 422 | ) -> tf.data.Dataset: |
| 423 | split_info = splits_dict[instruction] |
| 424 | file_instructions = split_info.file_instructions |
| 425 | return self.read_files( |
| 426 | file_instructions, |
| 427 | read_config=read_config, |
| 428 | shuffle_files=shuffle_files, |
| 429 | disable_shuffling=disable_shuffling, |
| 430 | decode_fn=decode_fn, |
| 431 | ) |
| 432 | |
| 433 | return tree.map_structure(_read_instruction_to_ds, instructions) |
| 434 | |
| 435 | def read_files( |
| 436 | self, |
no outgoing calls