MCPcopy
hub / github.com/huggingface/datasets / generate_examples

Function generate_examples

benchmarks/utils.py:22–44  ·  view source on GitHub ↗
(features: dict, num_examples=100, seq_shapes=None)

Source from the content-addressed store, hash-verified

20
21
22def generate_examples(features: dict, num_examples=100, seq_shapes=None):
23 dummy_data = []
24 seq_shapes = seq_shapes or {}
25 for i in range(num_examples):
26 example = {}
27 for col_id, (k, v) in enumerate(features.items()):
28 if isinstance(v, _ArrayXD):
29 data = np.random.rand(*v.shape).astype(v.dtype)
30 elif isinstance(v, datasets.Value):
31 if v.dtype == "string":
32 data = "The small grey turtle was surprisingly fast when challenged."
33 else:
34 data = np.random.randint(10, size=1).astype(v.dtype).item()
35 elif isinstance(v, datasets.Sequence):
36 while isinstance(v, datasets.Sequence):
37 v = v.feature
38 shape = seq_shapes[k]
39 data = np.random.rand(*shape).astype(v.dtype)
40 example[k] = data
41
42 dummy_data.append((i, example))
43
44 return dummy_data
45
46
47def generate_example_dataset(dataset_path, features, num_examples=100, seq_shapes=None):

Callers 2

benchmark_array_xdFunction · 0.90
generate_example_datasetFunction · 0.70

Calls 1

itemsMethod · 0.80

Tested by

no test coverage detected