MCPcopy
hub / github.com/huggingface/datasets / benchmark_array_xd

Function benchmark_array_xd

benchmarks/benchmark_array_xd.py:92–138  ·  view source on GitHub ↗
()

Source from the content-addressed store, hash-verified

90
91
92def benchmark_array_xd():
93 times = {}
94 read_functions = (
95 read_unformated,
96 read_formatted_as_numpy,
97 read_batch_unformated,
98 read_batch_formatted_as_numpy,
99 read_col_unformated,
100 read_col_formatted_as_numpy,
101 )
102 with tempfile.TemporaryDirectory() as tmp_dir:
103 feats = datasets.Features({"image": Array2D(SPEED_TEST_SHAPE, dtype="float32")})
104 data = generate_examples(features=feats, num_examples=SPEED_TEST_N_EXAMPLES)
105 times["write_array2d"] = write(feats, data, tmp_dir)
106 for read_func in read_functions:
107 times[read_func.__name__ + " after write_array2d"] = read_func(feats, tmp_dir)
108
109 with tempfile.TemporaryDirectory() as tmp_dir:
110 # don't use fixed length for fair comparison
111 # feats = datasets.Features(
112 # {"image": datasets.Sequence(datasets.Sequence(datasets.Value("float32"), SPEED_TEST_SHAPE[1]), SPEED_TEST_SHAPE[0])}
113 # )
114 feats = datasets.Features({"image": datasets.Sequence(datasets.Sequence(datasets.Value("float32")))})
115 data = generate_examples(
116 features=feats, num_examples=SPEED_TEST_N_EXAMPLES, seq_shapes={"image": SPEED_TEST_SHAPE}
117 )
118 times["write_nested_sequence"] = write(feats, data, tmp_dir)
119 for read_func in read_functions:
120 times[read_func.__name__ + " after write_nested_sequence"] = read_func(feats, tmp_dir)
121
122 with tempfile.TemporaryDirectory() as tmp_dir:
123 # don't use fixed length for fair comparison
124 # feats = datasets.Features(
125 # {"image": datasets.Sequence(datasets.Value("float32"), SPEED_TEST_SHAPE[0] * SPEED_TEST_SHAPE[1])}
126 # )
127 feats = datasets.Features({"image": datasets.Sequence(datasets.Value("float32"))})
128 data = generate_examples(
129 features=feats,
130 num_examples=SPEED_TEST_N_EXAMPLES,
131 seq_shapes={"image": [SPEED_TEST_SHAPE[0] * SPEED_TEST_SHAPE[1]]},
132 )
133 times["write_flattened_sequence"] = write(feats, data, tmp_dir)
134 for read_func in read_functions:
135 times[read_func.__name__ + " after write_flattened_sequence"] = read_func(feats, tmp_dir)
136
137 with open(RESULTS_FILE_PATH, "wb") as f:
138 f.write(json.dumps(times).encode("utf-8"))
139
140
141if __name__ == "__main__": # useful to run the profiler

Callers 1

Calls 5

Array2DClass · 0.90
generate_examplesFunction · 0.90
writeFunction · 0.70
writeMethod · 0.45
encodeMethod · 0.45

Tested by

no test coverage detected