| 204 | return DecisionTreeClassifier(label, feature_name, decisions) |
| 205 | |
| 206 | def refine( |
| 207 | self, |
| 208 | labeled_featuresets, |
| 209 | entropy_cutoff, |
| 210 | depth_cutoff, |
| 211 | support_cutoff, |
| 212 | binary=False, |
| 213 | feature_values=None, |
| 214 | verbose=False, |
| 215 | ): |
| 216 | if len(labeled_featuresets) <= support_cutoff: |
| 217 | return |
| 218 | if self._fname is None: |
| 219 | return |
| 220 | if depth_cutoff <= 0: |
| 221 | return |
| 222 | for fval in self._decisions: |
| 223 | fval_featuresets = [ |
| 224 | (featureset, label) |
| 225 | for (featureset, label) in labeled_featuresets |
| 226 | if featureset.get(self._fname) == fval |
| 227 | ] |
| 228 | |
| 229 | label_freqs = FreqDist(label for (featureset, label) in fval_featuresets) |
| 230 | if entropy(MLEProbDist(label_freqs)) > entropy_cutoff: |
| 231 | self._decisions[fval] = DecisionTreeClassifier.train( |
| 232 | fval_featuresets, |
| 233 | entropy_cutoff, |
| 234 | depth_cutoff, |
| 235 | support_cutoff, |
| 236 | binary, |
| 237 | feature_values, |
| 238 | verbose, |
| 239 | ) |
| 240 | if self._default is not None: |
| 241 | default_featuresets = [ |
| 242 | (featureset, label) |
| 243 | for (featureset, label) in labeled_featuresets |
| 244 | if featureset.get(self._fname) not in self._decisions |
| 245 | ] |
| 246 | label_freqs = FreqDist(label for (featureset, label) in default_featuresets) |
| 247 | if entropy(MLEProbDist(label_freqs)) > entropy_cutoff: |
| 248 | self._default = DecisionTreeClassifier.train( |
| 249 | default_featuresets, |
| 250 | entropy_cutoff, |
| 251 | depth_cutoff, |
| 252 | support_cutoff, |
| 253 | binary, |
| 254 | feature_values, |
| 255 | verbose, |
| 256 | ) |
| 257 | |
| 258 | @staticmethod |
| 259 | def best_stump(feature_names, labeled_featuresets, verbose=False): |