MCPcopy
hub / github.com/nltk/nltk / refine

Method refine

nltk/classify/decisiontree.py:206–256  ·  view source on GitHub ↗
(
        self,
        labeled_featuresets,
        entropy_cutoff,
        depth_cutoff,
        support_cutoff,
        binary=False,
        feature_values=None,
        verbose=False,
    )

Source from the content-addressed store, hash-verified

204 return DecisionTreeClassifier(label, feature_name, decisions)
205
206 def refine(
207 self,
208 labeled_featuresets,
209 entropy_cutoff,
210 depth_cutoff,
211 support_cutoff,
212 binary=False,
213 feature_values=None,
214 verbose=False,
215 ):
216 if len(labeled_featuresets) <= support_cutoff:
217 return
218 if self._fname is None:
219 return
220 if depth_cutoff <= 0:
221 return
222 for fval in self._decisions:
223 fval_featuresets = [
224 (featureset, label)
225 for (featureset, label) in labeled_featuresets
226 if featureset.get(self._fname) == fval
227 ]
228
229 label_freqs = FreqDist(label for (featureset, label) in fval_featuresets)
230 if entropy(MLEProbDist(label_freqs)) > entropy_cutoff:
231 self._decisions[fval] = DecisionTreeClassifier.train(
232 fval_featuresets,
233 entropy_cutoff,
234 depth_cutoff,
235 support_cutoff,
236 binary,
237 feature_values,
238 verbose,
239 )
240 if self._default is not None:
241 default_featuresets = [
242 (featureset, label)
243 for (featureset, label) in labeled_featuresets
244 if featureset.get(self._fname) not in self._decisions
245 ]
246 label_freqs = FreqDist(label for (featureset, label) in default_featuresets)
247 if entropy(MLEProbDist(label_freqs)) > entropy_cutoff:
248 self._default = DecisionTreeClassifier.train(
249 default_featuresets,
250 entropy_cutoff,
251 depth_cutoff,
252 support_cutoff,
253 binary,
254 feature_values,
255 verbose,
256 )
257
258 @staticmethod
259 def best_stump(feature_names, labeled_featuresets, verbose=False):

Callers 1

trainMethod · 0.80

Calls 5

FreqDistClass · 0.90
entropyFunction · 0.90
MLEProbDistClass · 0.90
getMethod · 0.45
trainMethod · 0.45

Tested by

no test coverage detected