MCPcopy
hub / github.com/huggingface/datasets / encode_batch

Method encode_batch

src/datasets/features/features.py:2180–2197  ·  view source on GitHub ↗

Encode batch into a format for Arrow. Args: batch (`dict[str, list[Any]]`): Data in a Dataset batch. Returns: `dict[str, list[Any]]`

(self, batch)

Source from the content-addressed store, hash-verified

2178 return [encode_nested_example(self[column_name], obj, level=1) for obj in column]
2179
2180 def encode_batch(self, batch):
2181 """
2182 Encode batch into a format for Arrow.
2183
2184 Args:
2185 batch (`dict[str, list[Any]]`):
2186 Data in a Dataset batch.
2187
2188 Returns:
2189 `dict[str, list[Any]]`
2190 """
2191 encoded_batch = {}
2192 if set(batch) != set(self):
2193 raise ValueError(f"Column mismatch between batch {set(batch)} and features {set(self)}")
2194 for key, column in batch.items():
2195 column = cast_to_python_objects(column)
2196 encoded_batch[key] = [encode_nested_example(self[key], obj, level=1) for obj in column]
2197 return encoded_batch
2198
2199 def decode_example(self, example: dict, token_per_repo_id: Optional[dict[str, Union[str, bool, None]]] = None):
2200 """Decode example with custom feature decoding.

Calls 3

cast_to_python_objectsFunction · 0.85
encode_nested_exampleFunction · 0.85
itemsMethod · 0.80