Helper method which calculates the nullity of a DataFrame. Factored out of and used within `geoplot`.
(geo_group, x_col, y_col)
| 562 | |
| 563 | |
| 564 | def _calculate_geographic_nullity(geo_group, x_col, y_col): |
| 565 | """ |
| 566 | Helper method which calculates the nullity of a DataFrame. Factored out of and used within `geoplot`. |
| 567 | """ |
| 568 | # Aggregate by point and fetch a list of non-null coordinate pairs, which is returned. |
| 569 | point_groups = geo_group.groupby([x_col, y_col]) |
| 570 | points = [point for point in point_groups.groups.keys() if pd.notnull(point[0]) and pd.notnull(point[1])] |
| 571 | # Calculate nullities by location, then take their average within the overall feature. |
| 572 | counts = np.sum(point_groups.count().values, axis=1) |
| 573 | entries = point_groups.size() |
| 574 | width = len(geo_group.columns) |
| 575 | # Remove empty (NaN, NaN) points. |
| 576 | if len(entries) > 0: # explicit check to avoid a Runtime Warning |
| 577 | geographic_nullity = np.average(1 - counts / width / entries) |
| 578 | return points, geographic_nullity |
| 579 | else: |
| 580 | return points, np.nan |
| 581 | |
| 582 | |
| 583 | def geoplot(df, x=None, y=None, coordinates=None, by=None, geometry=None, cutoff=None, histogram=False, |