(iterable, options=None)
| 60 | |
| 61 | |
| 62 | def evaluate(iterable, options=None): |
| 63 | if options is None: |
| 64 | options = parse_args([]) # use defaults |
| 65 | |
| 66 | counts = EvalCounts() |
| 67 | num_features = None # number of features per line |
| 68 | in_correct = False # currently processed chunks is correct until now |
| 69 | last_correct = 'O' # previous chunk tag in corpus |
| 70 | last_correct_type = '' # type of previously identified chunk tag |
| 71 | last_guessed = 'O' # previously identified chunk tag |
| 72 | last_guessed_type = '' # type of previous chunk tag in corpus |
| 73 | |
| 74 | for line in iterable: |
| 75 | line = line.rstrip('\r\n') |
| 76 | |
| 77 | if options.delimiter == ANY_SPACE: |
| 78 | features = line.split() |
| 79 | else: |
| 80 | features = line.split(options.delimiter) |
| 81 | |
| 82 | if num_features is None: |
| 83 | num_features = len(features) |
| 84 | elif num_features != len(features) and len(features) != 0: |
| 85 | raise FormatError('unexpected number of features: %d (%d)' % |
| 86 | (len(features), num_features)) |
| 87 | |
| 88 | if len(features) == 0 or features[0] == options.boundary: |
| 89 | features = [options.boundary, 'O', 'O'] |
| 90 | if len(features) < 3: |
| 91 | raise FormatError('unexpected number of features in line %s' % line) |
| 92 | |
| 93 | guessed, guessed_type = parse_tag(features.pop()) |
| 94 | correct, correct_type = parse_tag(features.pop()) |
| 95 | first_item = features.pop(0) |
| 96 | |
| 97 | if first_item == options.boundary: |
| 98 | guessed = 'O' |
| 99 | |
| 100 | end_correct = end_of_chunk(last_correct, correct, |
| 101 | last_correct_type, correct_type) |
| 102 | end_guessed = end_of_chunk(last_guessed, guessed, |
| 103 | last_guessed_type, guessed_type) |
| 104 | start_correct = start_of_chunk(last_correct, correct, |
| 105 | last_correct_type, correct_type) |
| 106 | start_guessed = start_of_chunk(last_guessed, guessed, |
| 107 | last_guessed_type, guessed_type) |
| 108 | |
| 109 | if in_correct: |
| 110 | if (end_correct and end_guessed and |
| 111 | last_guessed_type == last_correct_type): |
| 112 | in_correct = False |
| 113 | counts.correct_chunk += 1 |
| 114 | counts.t_correct_chunk[last_correct_type] += 1 |
| 115 | elif (end_correct != end_guessed or guessed_type != correct_type): |
| 116 | in_correct = False |
| 117 | |
| 118 | if start_correct and start_guessed and guessed_type == correct_type: |
| 119 | in_correct = True |
no test coverage detected