Concise summary of a Dask DataFrame
(self, buf=None, verbose=False, memory_usage=False)
| 3963 | return out |
| 3964 | |
| 3965 | def info(self, buf=None, verbose=False, memory_usage=False): |
| 3966 | """ |
| 3967 | Concise summary of a Dask DataFrame |
| 3968 | """ |
| 3969 | if buf is None: |
| 3970 | import sys |
| 3971 | |
| 3972 | buf = sys.stdout |
| 3973 | lines = [str(type(self)).replace("._collection", "")] |
| 3974 | |
| 3975 | if len(self.columns) == 0: |
| 3976 | lines.append(f"{type(self.index._meta).__name__}: 0 entries") |
| 3977 | lines.append(f"Empty {type(self).__name__}") |
| 3978 | put_lines(buf, lines) |
| 3979 | return |
| 3980 | |
| 3981 | # Group and execute the required computations |
| 3982 | computations = {} |
| 3983 | if verbose: |
| 3984 | computations.update({"index": self.index, "count": self.count()}) |
| 3985 | if memory_usage: |
| 3986 | computations["memory_usage"] = self.memory_usage(deep=True, index=True) |
| 3987 | |
| 3988 | computations = dict(zip(computations.keys(), compute(*computations.values()))) |
| 3989 | |
| 3990 | if verbose: |
| 3991 | import textwrap |
| 3992 | |
| 3993 | index = computations["index"] |
| 3994 | counts = computations["count"] |
| 3995 | lines.append(index_summary(index)) |
| 3996 | lines.append(f"Data columns (total {len(self.columns)} columns):") |
| 3997 | |
| 3998 | from pandas.io.formats.printing import pprint_thing |
| 3999 | |
| 4000 | space = max(len(pprint_thing(k)) for k in self.columns) + 1 |
| 4001 | column_width = max(space, 7) |
| 4002 | |
| 4003 | header = ( |
| 4004 | textwrap.dedent("""\ |
| 4005 | # {{column:<{column_width}}} Non-Null Count Dtype |
| 4006 | --- {{underl:<{column_width}}} -------------- -----""") |
| 4007 | .format(column_width=column_width) |
| 4008 | .format(column="Column", underl="------") |
| 4009 | ) |
| 4010 | column_template = textwrap.dedent(f"""\ |
| 4011 | {{i:^3}} {{name:<{column_width}}} {{count}} non-null {{dtype}}""") |
| 4012 | column_info = [ |
| 4013 | column_template.format( |
| 4014 | i=pprint_thing(i), |
| 4015 | name=pprint_thing(name), |
| 4016 | count=pprint_thing(count), |
| 4017 | dtype=pprint_thing(dtype), |
| 4018 | ) |
| 4019 | for i, (name, count, dtype) in enumerate( |
| 4020 | # NOTE: Use `counts.values` for cudf support |
| 4021 | zip(self.columns, counts.values, self.dtypes) |
| 4022 | ) |