Plots a bar chart of data nullities by column. :param df: The DataFrame whose completeness is being nullity matrix mapped. :param log: Whether or not to display a logorithmic plot. Defaults to False (linear). :param filter: The filter to apply to the heatmap. Should be one of "top"
(df, figsize=(24, 10), fontsize=16, labels=None, log=False, color=(0.25, 0.25, 0.25), inline=True,
filter=None, n=0, p=0, sort=None)
| 305 | |
| 306 | |
| 307 | def bar(df, figsize=(24, 10), fontsize=16, labels=None, log=False, color=(0.25, 0.25, 0.25), inline=True, |
| 308 | filter=None, n=0, p=0, sort=None): |
| 309 | """ |
| 310 | Plots a bar chart of data nullities by column. |
| 311 | |
| 312 | :param df: The DataFrame whose completeness is being nullity matrix mapped. |
| 313 | :param log: Whether or not to display a logorithmic plot. Defaults to False (linear). |
| 314 | :param filter: The filter to apply to the heatmap. Should be one of "top", "bottom", or None (default). See |
| 315 | `nullity_filter()` for more information. |
| 316 | :param n: The cap on the number of columns to include in the filtered DataFrame. See `nullity_filter()` for |
| 317 | more information. |
| 318 | :param p: The cap on the percentage fill of the columns in the filtered DataFrame. See `nullity_filter()` for |
| 319 | more information. |
| 320 | :param sort: The sort to apply to the heatmap. Should be one of "ascending", "descending", or None. See |
| 321 | `nullity_sort()` for more information. |
| 322 | :param figsize: The size of the figure to display. This is a `matplotlib` parameter. Defaults to (24, |
| 323 | 10). |
| 324 | :param fontsize: The figure's font size. This default to 16. |
| 325 | :param labels: Whether or not to display the column names. Would need to be turned off on particularly large |
| 326 | displays. Defaults to True. |
| 327 | :param color: The color of the filled columns. Default is a medium dark gray: the RGB multiple `(0.25, 0.25, 0.25)`. |
| 328 | :return: If `inline` is True, the underlying `matplotlib.figure` object. Else, nothing. |
| 329 | """ |
| 330 | # Get counts. |
| 331 | nullity_counts = len(df) - df.isnull().sum() |
| 332 | |
| 333 | # Apply filters and sorts. |
| 334 | df = nullity_filter(df, filter=filter, n=n, p=p) |
| 335 | df = nullity_sort(df, sort=sort) |
| 336 | |
| 337 | # Create the basic plot. |
| 338 | fig = plt.figure(figsize=figsize) |
| 339 | (nullity_counts / len(df)).plot(kind='bar', figsize=figsize, fontsize=fontsize, color=color, log=log) |
| 340 | |
| 341 | # Get current axis. |
| 342 | ax1 = plt.gca() |
| 343 | |
| 344 | # Start appending elements, starting with a modified bottom x axis. |
| 345 | if labels or (labels is None and len(df.columns) <= 50): |
| 346 | pos = ax1.get_xticks() |
| 347 | ax1.set_xticklabels(ax1.get_xticklabels(), rotation=45, ha='right', fontsize=fontsize) |
| 348 | |
| 349 | # Create the numerical ticks. |
| 350 | ax2 = ax1.twinx() |
| 351 | if not log: |
| 352 | # Simple if the plot is ordinary. |
| 353 | ax2.set_yticks(ax1.get_yticks()) |
| 354 | ax2.set_yticklabels([int(n*len(df)) for n in ax1.get_yticks()], fontsize=fontsize) |
| 355 | else: |
| 356 | # For some reason when a logarithmic plot is specified `ax1` always contains two more ticks than actually |
| 357 | # appears in the plot. For example, if we do `msno.histogram(collisions.sample(500), log=True)` the contents |
| 358 | # of the naive `ax1.get_yticks()` is [1.00000000e-03, 1.00000000e-02, 1.00000000e-01, 1.00000000e+00, |
| 359 | # 1.00000000e+01]. The fix is to ignore the first and last entries. |
| 360 | # |
| 361 | # Also note that when a log scale is used, we have to make it match the `ax1` layout ourselves. |
| 362 | ax2.set_yscale('log') |
| 363 | ax2.set_ylim(ax1.get_ylim()) |
| 364 | ax2.set_yticks(ax1.get_yticks()[1:-1]) |
nothing calls this directly
no test coverage detected