| 106 | |
| 107 | |
| 108 | def compute_heterogeneity(data, k, centroids, cluster_assignment): |
| 109 | heterogeneity = 0.0 |
| 110 | for i in range(k): |
| 111 | # Select all data points that belong to cluster i. Fill in the blank (RHS only) |
| 112 | member_data_points = data[cluster_assignment == i, :] |
| 113 | |
| 114 | if member_data_points.shape[0] > 0: # check if i-th cluster is non-empty |
| 115 | # Compute distances from centroid to data points (RHS only) |
| 116 | distances = pairwise_distances( |
| 117 | member_data_points, [centroids[i]], metric="euclidean" |
| 118 | ) |
| 119 | squared_distances = distances**2 |
| 120 | heterogeneity += np.sum(squared_distances) |
| 121 | |
| 122 | return heterogeneity |
| 123 | |
| 124 | |
| 125 | def plot_heterogeneity(heterogeneity, k): |