(feature_name, feature_value, labeled_featuresets)
| 275 | |
| 276 | @staticmethod |
| 277 | def binary_stump(feature_name, feature_value, labeled_featuresets): |
| 278 | label = FreqDist(label for (featureset, label) in labeled_featuresets).max() |
| 279 | |
| 280 | # Find the best label for each value. |
| 281 | pos_fdist = FreqDist() |
| 282 | neg_fdist = FreqDist() |
| 283 | for featureset, label in labeled_featuresets: |
| 284 | if featureset.get(feature_name) == feature_value: |
| 285 | pos_fdist[label] += 1 |
| 286 | else: |
| 287 | neg_fdist[label] += 1 |
| 288 | |
| 289 | decisions = {} |
| 290 | default = label |
| 291 | # But hopefully we have observations! |
| 292 | if pos_fdist.N() > 0: |
| 293 | decisions = {feature_value: DecisionTreeClassifier(pos_fdist.max())} |
| 294 | if neg_fdist.N() > 0: |
| 295 | default = DecisionTreeClassifier(neg_fdist.max()) |
| 296 | |
| 297 | return DecisionTreeClassifier(label, feature_name, decisions, default) |
| 298 | |
| 299 | @staticmethod |
| 300 | def best_binary_stump( |
no test coverage detected