hub / github.com/dask/dask / describe_numeric_aggregate

Function describe_numeric_aggregate

dask/dataframe/methods.py:191–235 · view source on GitHub ↗

(
    stats,
    name=None,
    is_timedelta_col=False,
    is_datetime_col=False,
    unit="ns",
)

Source from the content-addressed store, hash-verified

189
190
191	def describe_numeric_aggregate(
192	stats,
193	name=None,
194	is_timedelta_col=False,
195	is_datetime_col=False,
196	unit="ns",
197	):
198	unit = unit or "ns"
199	assert len(stats) == 6
200	count, mean, std, min, q, max = stats
201
202	if is_series_like(count):
203	typ = type(count.to_frame())
204	else:
205	typ = type(q)
206
207	if is_timedelta_col:
208	mean = pd.to_timedelta(mean, unit=unit).as_unit(unit)
209	std = pd.to_timedelta(std, unit=unit).as_unit(unit)
210	min = pd.to_timedelta(min, unit=unit).as_unit(unit)
211	max = pd.to_timedelta(max, unit=unit).as_unit(unit)
212	q = q.apply(lambda x: pd.to_timedelta(x, unit=unit).as_unit(unit))
213
214	if is_datetime_col:
215	# mean is not implemented for datetime
216	min = pd.to_datetime(min, unit=unit).as_unit(unit)
217	max = pd.to_datetime(max, unit=unit).as_unit(unit)
218	q = q.apply(lambda x: pd.to_datetime(x, unit=unit).as_unit(unit))
219
220	if is_datetime_col:
221	part1 = typ([count, min], index=["count", "min"])
222	else:
223	part1 = typ([count, mean, std, min], index=["count", "mean", "std", "min"])
224
225	q.index = [f"{l * 100:g}%" for l in tolist(q.index)]
226	if is_series_like(q) and typ != type(q):
227	q = q.to_frame()
228	part3 = typ([max], index=["max"])
229
230	result = concat([part1, q, part3], sort=False)
231
232	if is_series_like(result):
233	result.name = name
234
235	return result
236
237
238	def describe_nonnumeric_aggregate(stats, name):

operationMethod · 0.90

is_series_likeFunction · 0.90

tolistFunction · 0.90

concatFunction · 0.90

to_frameMethod · 0.45

applyMethod · 0.45

no test coverage detected

searching dependent graphs…