MCPcopy
hub / github.com/dask/dask / info

Method info

dask/dataframe/dask_expr/_collection.py:3965–4039  ·  view source on GitHub ↗

Concise summary of a Dask DataFrame

(self, buf=None, verbose=False, memory_usage=False)

Source from the content-addressed store, hash-verified

3963 return out
3964
3965 def info(self, buf=None, verbose=False, memory_usage=False):
3966 """
3967 Concise summary of a Dask DataFrame
3968 """
3969 if buf is None:
3970 import sys
3971
3972 buf = sys.stdout
3973 lines = [str(type(self)).replace("._collection", "")]
3974
3975 if len(self.columns) == 0:
3976 lines.append(f"{type(self.index._meta).__name__}: 0 entries")
3977 lines.append(f"Empty {type(self).__name__}")
3978 put_lines(buf, lines)
3979 return
3980
3981 # Group and execute the required computations
3982 computations = {}
3983 if verbose:
3984 computations.update({"index": self.index, "count": self.count()})
3985 if memory_usage:
3986 computations["memory_usage"] = self.memory_usage(deep=True, index=True)
3987
3988 computations = dict(zip(computations.keys(), compute(*computations.values())))
3989
3990 if verbose:
3991 import textwrap
3992
3993 index = computations["index"]
3994 counts = computations["count"]
3995 lines.append(index_summary(index))
3996 lines.append(f"Data columns (total {len(self.columns)} columns):")
3997
3998 from pandas.io.formats.printing import pprint_thing
3999
4000 space = max(len(pprint_thing(k)) for k in self.columns) + 1
4001 column_width = max(space, 7)
4002
4003 header = (
4004 textwrap.dedent("""\
4005 # {{column:<{column_width}}} Non-Null Count Dtype
4006 --- {{underl:<{column_width}}} -------------- -----""")
4007 .format(column_width=column_width)
4008 .format(column="Column", underl="------")
4009 )
4010 column_template = textwrap.dedent(f"""\
4011 {{i:^3}} {{name:<{column_width}}} {{count}} non-null {{dtype}}""")
4012 column_info = [
4013 column_template.format(
4014 i=pprint_thing(i),
4015 name=pprint_thing(name),
4016 count=pprint_thing(count),
4017 dtype=pprint_thing(dtype),
4018 )
4019 for i, (name, count, dtype) in enumerate(
4020 # NOTE: Use `counts.values` for cudf support
4021 zip(self.columns, counts.values, self.dtypes)
4022 )

Callers 4

read_bytesFunction · 0.80
open_headFunction · 0.80
test_infoFunction · 0.80
_assert_infoFunction · 0.80

Calls 15

memory_usageMethod · 0.95
put_linesFunction · 0.90
computeFunction · 0.90
index_summaryFunction · 0.90
memory_reprFunction · 0.90
maxFunction · 0.85
splitMethod · 0.80
replaceMethod · 0.45
countMethod · 0.45
keysMethod · 0.45
valuesMethod · 0.45
itemsMethod · 0.45

Tested by 2

test_infoFunction · 0.64
_assert_infoFunction · 0.64