Constructs an ARFF_Formatter instance with class labels and feature types determined from the given data. Handles boolean, numeric and string (note: not nominal) types.
(tokens)
| 280 | |
| 281 | @staticmethod |
| 282 | def from_train(tokens): |
| 283 | """ |
| 284 | Constructs an ARFF_Formatter instance with class labels and feature |
| 285 | types determined from the given data. Handles boolean, numeric and |
| 286 | string (note: not nominal) types. |
| 287 | """ |
| 288 | # Find the set of all attested labels. |
| 289 | labels = {label for (tok, label) in tokens} |
| 290 | |
| 291 | # Determine the types of all features. |
| 292 | features = {} |
| 293 | for tok, label in tokens: |
| 294 | for fname, fval in tok.items(): |
| 295 | if issubclass(type(fval), bool): |
| 296 | ftype = "{True, False}" |
| 297 | elif issubclass(type(fval), (int, float, bool)): |
| 298 | ftype = "NUMERIC" |
| 299 | elif issubclass(type(fval), str): |
| 300 | ftype = "STRING" |
| 301 | elif fval is None: |
| 302 | continue # can't tell the type. |
| 303 | else: |
| 304 | raise ValueError("Unsupported value type %r" % ftype) |
| 305 | |
| 306 | if features.get(fname, ftype) != ftype: |
| 307 | raise ValueError("Inconsistent type for %s" % fname) |
| 308 | features[fname] = ftype |
| 309 | features = sorted(features.items()) |
| 310 | |
| 311 | return ARFF_Formatter(labels, features) |
| 312 | |
| 313 | def header_section(self): |
| 314 | """Returns an ARFF header as a string.""" |
no test coverage detected