MCPcopy
hub / github.com/explosion/spaCy / test_issue4030

Function test_issue4030

spacy/tests/pipeline/test_textcat.py:113–150  ·  view source on GitHub ↗

Test whether textcat works fine with empty doc

()

Source from the content-addressed store, hash-verified

111
112@pytest.mark.issue(4030)
113def test_issue4030():
114 """Test whether textcat works fine with empty doc"""
115 unique_classes = ["offensive", "inoffensive"]
116 x_train = [
117 "This is an offensive text",
118 "This is the second offensive text",
119 "inoff",
120 ]
121 y_train = ["offensive", "offensive", "inoffensive"]
122 nlp = spacy.blank("en")
123 # preparing the data
124 train_data = []
125 for text, train_instance in zip(x_train, y_train):
126 cat_dict = {label: label == train_instance for label in unique_classes}
127 train_data.append(Example.from_dict(nlp.make_doc(text), {"cats": cat_dict}))
128 # add a text categorizer component
129 model = {
130 "@architectures": "spacy.TextCatBOW.v1",
131 "exclusive_classes": True,
132 "ngram_size": 2,
133 "no_output_layer": False,
134 }
135 textcat = nlp.add_pipe("textcat", config={"model": model}, last=True)
136 for label in unique_classes:
137 textcat.add_label(label)
138 # training the network
139 with nlp.select_pipes(enable="textcat"):
140 optimizer = nlp.initialize()
141 for i in range(3):
142 losses = {}
143 batches = util.minibatch(train_data, size=compounding(4.0, 32.0, 1.001))
144
145 for batch in batches:
146 nlp.update(examples=batch, sgd=optimizer, drop=0.1, losses=losses)
147 # processing of an empty doc should result in 0.0 for all categories
148 doc = nlp("")
149 assert doc.cats["offensive"] == 0.0
150 assert doc.cats["inoffensive"] == 0.0
151
152
153@pytest.mark.parametrize(

Callers

nothing calls this directly

Calls 9

appendMethod · 0.80
from_dictMethod · 0.80
make_docMethod · 0.80
add_pipeMethod · 0.80
select_pipesMethod · 0.80
nlpFunction · 0.70
add_labelMethod · 0.45
initializeMethod · 0.45
updateMethod · 0.45

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…