MCPcopy
hub / github.com/dask/dask / describe_numeric_aggregate

Function describe_numeric_aggregate

dask/dataframe/methods.py:191–235  ·  view source on GitHub ↗
(
    stats,
    name=None,
    is_timedelta_col=False,
    is_datetime_col=False,
    unit="ns",
)

Source from the content-addressed store, hash-verified

189
190
191def describe_numeric_aggregate(
192 stats,
193 name=None,
194 is_timedelta_col=False,
195 is_datetime_col=False,
196 unit="ns",
197):
198 unit = unit or "ns"
199 assert len(stats) == 6
200 count, mean, std, min, q, max = stats
201
202 if is_series_like(count):
203 typ = type(count.to_frame())
204 else:
205 typ = type(q)
206
207 if is_timedelta_col:
208 mean = pd.to_timedelta(mean, unit=unit).as_unit(unit)
209 std = pd.to_timedelta(std, unit=unit).as_unit(unit)
210 min = pd.to_timedelta(min, unit=unit).as_unit(unit)
211 max = pd.to_timedelta(max, unit=unit).as_unit(unit)
212 q = q.apply(lambda x: pd.to_timedelta(x, unit=unit).as_unit(unit))
213
214 if is_datetime_col:
215 # mean is not implemented for datetime
216 min = pd.to_datetime(min, unit=unit).as_unit(unit)
217 max = pd.to_datetime(max, unit=unit).as_unit(unit)
218 q = q.apply(lambda x: pd.to_datetime(x, unit=unit).as_unit(unit))
219
220 if is_datetime_col:
221 part1 = typ([count, min], index=["count", "min"])
222 else:
223 part1 = typ([count, mean, std, min], index=["count", "mean", "std", "min"])
224
225 q.index = [f"{l * 100:g}%" for l in tolist(q.index)]
226 if is_series_like(q) and typ != type(q):
227 q = q.to_frame()
228 part3 = typ([max], index=["max"])
229
230 result = concat([part1, q, part3], sort=False)
231
232 if is_series_like(result):
233 result.name = name
234
235 return result
236
237
238def describe_nonnumeric_aggregate(stats, name):

Callers 1

operationMethod · 0.90

Calls 5

is_series_likeFunction · 0.90
tolistFunction · 0.90
concatFunction · 0.90
to_frameMethod · 0.45
applyMethod · 0.45

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…