Method info

dask/dataframe/dask_expr/_collection.py:3965–4039 · view source on GitHub ↗

Concise summary of a Dask DataFrame

(self, buf=None, verbose=False, memory_usage=False)

Source from the content-addressed store, hash-verified

3963	return out
3964
3965	def info(self, buf=None, verbose=False, memory_usage=False):
3966	"""
3967	Concise summary of a Dask DataFrame
3968	"""
3969	if buf is None:
3970	import sys
3971
3972	buf = sys.stdout
3973	lines = [str(type(self)).replace("._collection", "")]
3974
3975	if len(self.columns) == 0:
3976	lines.append(f"{type(self.index._meta).__name__}: 0 entries")
3977	lines.append(f"Empty {type(self).__name__}")
3978	put_lines(buf, lines)
3979	return
3980
3981	# Group and execute the required computations
3982	computations = {}
3983	if verbose:
3984	computations.update({"index": self.index, "count": self.count()})
3985	if memory_usage:
3986	computations["memory_usage"] = self.memory_usage(deep=True, index=True)
3987
3988	computations = dict(zip(computations.keys(), compute(*computations.values())))
3989
3990	if verbose:
3991	import textwrap
3992
3993	index = computations["index"]
3994	counts = computations["count"]
3995	lines.append(index_summary(index))
3996	lines.append(f"Data columns (total {len(self.columns)} columns):")
3997
3998	from pandas.io.formats.printing import pprint_thing
3999
4000	space = max(len(pprint_thing(k)) for k in self.columns) + 1
4001	column_width = max(space, 7)
4002
4003	header = (
4004	textwrap.dedent("""\
4005	# {{column:<{column_width}}} Non-Null Count Dtype
4006	--- {{underl:<{column_width}}} -------------- -----""")
4007	.format(column_width=column_width)
4008	.format(column="Column", underl="------")
4009	)
4010	column_template = textwrap.dedent(f"""\
4011	{{i:^3}} {{name:<{column_width}}} {{count}} non-null {{dtype}}""")
4012	column_info = [
4013	column_template.format(
4014	i=pprint_thing(i),
4015	name=pprint_thing(name),
4016	count=pprint_thing(count),
4017	dtype=pprint_thing(dtype),
4018	)
4019	for i, (name, count, dtype) in enumerate(
4020	# NOTE: Use `counts.values` for cudf support
4021	zip(self.columns, counts.values, self.dtypes)
4022	)

Callers 4

read_bytesFunction · 0.80

open_headFunction · 0.80

test_infoFunction · 0.80

_assert_infoFunction · 0.80

Calls 15

memory_usageMethod · 0.95

put_linesFunction · 0.90

computeFunction · 0.90

index_summaryFunction · 0.90

memory_reprFunction · 0.90

maxFunction · 0.85

splitMethod · 0.80

replaceMethod · 0.45

countMethod · 0.45

keysMethod · 0.45

valuesMethod · 0.45

itemsMethod · 0.45

Tested by 2

test_infoFunction · 0.64

_assert_infoFunction · 0.64