Test whether textcat works fine with empty doc
()
| 111 | |
| 112 | @pytest.mark.issue(4030) |
| 113 | def test_issue4030(): |
| 114 | """Test whether textcat works fine with empty doc""" |
| 115 | unique_classes = ["offensive", "inoffensive"] |
| 116 | x_train = [ |
| 117 | "This is an offensive text", |
| 118 | "This is the second offensive text", |
| 119 | "inoff", |
| 120 | ] |
| 121 | y_train = ["offensive", "offensive", "inoffensive"] |
| 122 | nlp = spacy.blank("en") |
| 123 | # preparing the data |
| 124 | train_data = [] |
| 125 | for text, train_instance in zip(x_train, y_train): |
| 126 | cat_dict = {label: label == train_instance for label in unique_classes} |
| 127 | train_data.append(Example.from_dict(nlp.make_doc(text), {"cats": cat_dict})) |
| 128 | # add a text categorizer component |
| 129 | model = { |
| 130 | "@architectures": "spacy.TextCatBOW.v1", |
| 131 | "exclusive_classes": True, |
| 132 | "ngram_size": 2, |
| 133 | "no_output_layer": False, |
| 134 | } |
| 135 | textcat = nlp.add_pipe("textcat", config={"model": model}, last=True) |
| 136 | for label in unique_classes: |
| 137 | textcat.add_label(label) |
| 138 | # training the network |
| 139 | with nlp.select_pipes(enable="textcat"): |
| 140 | optimizer = nlp.initialize() |
| 141 | for i in range(3): |
| 142 | losses = {} |
| 143 | batches = util.minibatch(train_data, size=compounding(4.0, 32.0, 1.001)) |
| 144 | |
| 145 | for batch in batches: |
| 146 | nlp.update(examples=batch, sgd=optimizer, drop=0.1, losses=losses) |
| 147 | # processing of an empty doc should result in 0.0 for all categories |
| 148 | doc = nlp("") |
| 149 | assert doc.cats["offensive"] == 0.0 |
| 150 | assert doc.cats["inoffensive"] == 0.0 |
| 151 | |
| 152 | |
| 153 | @pytest.mark.parametrize( |
nothing calls this directly
no test coverage detected
searching dependent graphs…