:param binary: If true, then treat all feature/value pairs as individual binary features, rather than using a single n-way branch for each feature.
(
labeled_featuresets,
entropy_cutoff=0.05,
depth_cutoff=100,
support_cutoff=10,
binary=False,
feature_values=None,
verbose=False,
)
| 135 | |
| 136 | @staticmethod |
| 137 | def train( |
| 138 | labeled_featuresets, |
| 139 | entropy_cutoff=0.05, |
| 140 | depth_cutoff=100, |
| 141 | support_cutoff=10, |
| 142 | binary=False, |
| 143 | feature_values=None, |
| 144 | verbose=False, |
| 145 | ): |
| 146 | """ |
| 147 | :param binary: If true, then treat all feature/value pairs as |
| 148 | individual binary features, rather than using a single n-way |
| 149 | branch for each feature. |
| 150 | """ |
| 151 | # Collect a list of all feature names. |
| 152 | feature_names = set() |
| 153 | for featureset, label in labeled_featuresets: |
| 154 | for fname in featureset: |
| 155 | feature_names.add(fname) |
| 156 | |
| 157 | # Collect a list of the values each feature can take. |
| 158 | if feature_values is None and binary: |
| 159 | feature_values = defaultdict(set) |
| 160 | for featureset, label in labeled_featuresets: |
| 161 | for fname, fval in featureset.items(): |
| 162 | feature_values[fname].add(fval) |
| 163 | |
| 164 | # Start with a stump. |
| 165 | if not binary: |
| 166 | tree = DecisionTreeClassifier.best_stump( |
| 167 | feature_names, labeled_featuresets, verbose |
| 168 | ) |
| 169 | else: |
| 170 | tree = DecisionTreeClassifier.best_binary_stump( |
| 171 | feature_names, labeled_featuresets, feature_values, verbose |
| 172 | ) |
| 173 | |
| 174 | # Refine the stump. |
| 175 | tree.refine( |
| 176 | labeled_featuresets, |
| 177 | entropy_cutoff, |
| 178 | depth_cutoff - 1, |
| 179 | support_cutoff, |
| 180 | binary, |
| 181 | feature_values, |
| 182 | verbose, |
| 183 | ) |
| 184 | |
| 185 | # Return it |
| 186 | return tree |
| 187 | |
| 188 | @staticmethod |
| 189 | def leaf(labeled_featuresets): |
no test coverage detected