MCPcopy
hub / github.com/nltk/nltk / from_train

Method from_train

nltk/classify/weka.py:282–311  ·  view source on GitHub ↗

Constructs an ARFF_Formatter instance with class labels and feature types determined from the given data. Handles boolean, numeric and string (note: not nominal) types.

(tokens)

Source from the content-addressed store, hash-verified

280
281 @staticmethod
282 def from_train(tokens):
283 """
284 Constructs an ARFF_Formatter instance with class labels and feature
285 types determined from the given data. Handles boolean, numeric and
286 string (note: not nominal) types.
287 """
288 # Find the set of all attested labels.
289 labels = {label for (tok, label) in tokens}
290
291 # Determine the types of all features.
292 features = {}
293 for tok, label in tokens:
294 for fname, fval in tok.items():
295 if issubclass(type(fval), bool):
296 ftype = "{True, False}"
297 elif issubclass(type(fval), (int, float, bool)):
298 ftype = "NUMERIC"
299 elif issubclass(type(fval), str):
300 ftype = "STRING"
301 elif fval is None:
302 continue # can't tell the type.
303 else:
304 raise ValueError("Unsupported value type %r" % ftype)
305
306 if features.get(fname, ftype) != ftype:
307 raise ValueError("Inconsistent type for %s" % fname)
308 features[fname] = ftype
309 features = sorted(features.items())
310
311 return ARFF_Formatter(labels, features)
312
313 def header_section(self):
314 """Returns an ARFF header as a string."""

Callers 1

trainMethod · 0.80

Calls 3

ARFF_FormatterClass · 0.85
itemsMethod · 0.80
getMethod · 0.45

Tested by

no test coverage detected