Function test_issue4030

spacy/tests/pipeline/test_textcat.py:113–150 · view source on GitHub ↗

Test whether textcat works fine with empty doc

()

Source from the content-addressed store, hash-verified

111
112	@pytest.mark.issue(4030)
113	def test_issue4030():
114	"""Test whether textcat works fine with empty doc"""
115	unique_classes = ["offensive", "inoffensive"]
116	x_train = [
117	"This is an offensive text",
118	"This is the second offensive text",
119	"inoff",
120	]
121	y_train = ["offensive", "offensive", "inoffensive"]
122	nlp = spacy.blank("en")
123	# preparing the data
124	train_data = []
125	for text, train_instance in zip(x_train, y_train):
126	cat_dict = {label: label == train_instance for label in unique_classes}
127	train_data.append(Example.from_dict(nlp.make_doc(text), {"cats": cat_dict}))
128	# add a text categorizer component
129	model = {
130	"@architectures": "spacy.TextCatBOW.v1",
131	"exclusive_classes": True,
132	"ngram_size": 2,
133	"no_output_layer": False,
134	}
135	textcat = nlp.add_pipe("textcat", config={"model": model}, last=True)
136	for label in unique_classes:
137	textcat.add_label(label)
138	# training the network
139	with nlp.select_pipes(enable="textcat"):
140	optimizer = nlp.initialize()
141	for i in range(3):
142	losses = {}
143	batches = util.minibatch(train_data, size=compounding(4.0, 32.0, 1.001))
144
145	for batch in batches:
146	nlp.update(examples=batch, sgd=optimizer, drop=0.1, losses=losses)
147	# processing of an empty doc should result in 0.0 for all categories
148	doc = nlp("")
149	assert doc.cats["offensive"] == 0.0
150	assert doc.cats["inoffensive"] == 0.0
151
152
153	@pytest.mark.parametrize(

nothing calls this directly

appendMethod · 0.80

from_dictMethod · 0.80

make_docMethod · 0.80

add_pipeMethod · 0.80

select_pipesMethod · 0.80

nlpFunction · 0.70

add_labelMethod · 0.45

initializeMethod · 0.45

updateMethod · 0.45

no test coverage detected

searching dependent graphs…