Function test_ctfidf

tests/test_vectorizers/test_ctfidf.py:23–54 · view source on GitHub ↗

(model, documents, request)

Source from the content-addressed store, hash-verified

21	],
22	)
23	def test_ctfidf(model, documents, request):
24	topic_model = copy.deepcopy(request.getfixturevalue(model))
25	topics = topic_model.topics_
26	documents = pd.DataFrame({"Document": documents, "ID": range(len(documents)), "Topic": topics})
27	documents_per_topic = documents.groupby(["Topic"], as_index=False).agg({"Document": " ".join})
28	documents = topic_model._preprocess_text(documents_per_topic.Document.values)
29	count = topic_model.vectorizer_model.fit(documents)
30
31	# Scikit-Learn Deprecation: get_feature_names is deprecated in 1.0
32	# and will be removed in 1.2. Please use get_feature_names_out instead.
33	if version.parse(sklearn_version) >= version.parse("1.0.0"):
34	words = count.get_feature_names_out()
35	else:
36	words = count.get_feature_names()
37
38	X = count.transform(documents)
39	transformer = ClassTfidfTransformer().fit(X)
40	c_tf_idf = transformer.transform(X)
41
42	assert len(words) > 1000
43	assert all([isinstance(x, str) for x in words])
44
45	assert isinstance(X, csr_matrix)
46	assert isinstance(c_tf_idf, csr_matrix)
47
48	assert X.shape[0] == len(set(topics))
49	assert X.shape[1] == len(words)
50
51	assert c_tf_idf.shape[0] == len(set(topics))
52	assert c_tf_idf.shape[1] == len(words)
53
54	assert np.min(X) == 0
55
56
57	@pytest.mark.parametrize(

nothing calls this directly

ClassTfidfTransformerClass · 0.90

_preprocess_textMethod · 0.80

fitMethod · 0.45

transformMethod · 0.45

no test coverage detected