Encode batch into a format for Arrow. Args: batch (`dict[str, list[Any]]`): Data in a Dataset batch. Returns: `dict[str, list[Any]]`
(self, batch)
| 2178 | return [encode_nested_example(self[column_name], obj, level=1) for obj in column] |
| 2179 | |
| 2180 | def encode_batch(self, batch): |
| 2181 | """ |
| 2182 | Encode batch into a format for Arrow. |
| 2183 | |
| 2184 | Args: |
| 2185 | batch (`dict[str, list[Any]]`): |
| 2186 | Data in a Dataset batch. |
| 2187 | |
| 2188 | Returns: |
| 2189 | `dict[str, list[Any]]` |
| 2190 | """ |
| 2191 | encoded_batch = {} |
| 2192 | if set(batch) != set(self): |
| 2193 | raise ValueError(f"Column mismatch between batch {set(batch)} and features {set(self)}") |
| 2194 | for key, column in batch.items(): |
| 2195 | column = cast_to_python_objects(column) |
| 2196 | encoded_batch[key] = [encode_nested_example(self[key], obj, level=1) for obj in column] |
| 2197 | return encoded_batch |
| 2198 | |
| 2199 | def decode_example(self, example: dict, token_per_repo_id: Optional[dict[str, Union[str, bool, None]]] = None): |
| 2200 | """Decode example with custom feature decoding. |