Function test_tfidf

numpy_ml/tests/test_preprocessing.py:79–116 · view source on GitHub ↗

(N=15)

Source from the content-addressed store, hash-verified

77
78
79	def test_tfidf(N=15):
80	np.random.seed(12345)
81
82	i = 0
83	while i < N:
84	docs = []
85	n_docs = np.random.randint(1, 10)
86	for d in range(n_docs):
87	n_lines = np.random.randint(1, 1000)
88	lines = [random_paragraph(np.random.randint(1, 10)) for _ in range(n_lines)]
89	docs.append("\n".join([" ".join(l) for l in lines]))
90
91	smooth = bool(np.random.randint(2))
92
93	tfidf = TFIDFEncoder(
94	lowercase=True,
95	min_count=0,
96	smooth_idf=smooth,
97	max_tokens=None,
98	input_type="strings",
99	filter_stopwords=False,
100	)
101	gold = TfidfVectorizer(
102	input="content",
103	norm=None,
104	use_idf=True,
105	lowercase=True,
106	smooth_idf=smooth,
107	sublinear_tf=False,
108	)
109
110	tfidf.fit(docs)
111	mine = tfidf.transform(ignore_special_chars=True)
112	theirs = gold.fit_transform(docs).toarray()
113
114	np.testing.assert_almost_equal(mine, theirs)
115	print("PASSED")
116	i += 1
117
118
119	def test_dct(N=15):

nothing calls this directly

fitMethod · 0.95

transformMethod · 0.95

random_paragraphFunction · 0.90

TFIDFEncoderClass · 0.90

no test coverage detected