MCPcopy
hub / github.com/ddbourgin/numpy-ml / test_tfidf

Function test_tfidf

numpy_ml/tests/test_preprocessing.py:79–116  ·  view source on GitHub ↗
(N=15)

Source from the content-addressed store, hash-verified

77
78
79def test_tfidf(N=15):
80 np.random.seed(12345)
81
82 i = 0
83 while i < N:
84 docs = []
85 n_docs = np.random.randint(1, 10)
86 for d in range(n_docs):
87 n_lines = np.random.randint(1, 1000)
88 lines = [random_paragraph(np.random.randint(1, 10)) for _ in range(n_lines)]
89 docs.append("\n".join([" ".join(l) for l in lines]))
90
91 smooth = bool(np.random.randint(2))
92
93 tfidf = TFIDFEncoder(
94 lowercase=True,
95 min_count=0,
96 smooth_idf=smooth,
97 max_tokens=None,
98 input_type="strings",
99 filter_stopwords=False,
100 )
101 gold = TfidfVectorizer(
102 input="content",
103 norm=None,
104 use_idf=True,
105 lowercase=True,
106 smooth_idf=smooth,
107 sublinear_tf=False,
108 )
109
110 tfidf.fit(docs)
111 mine = tfidf.transform(ignore_special_chars=True)
112 theirs = gold.fit_transform(docs).toarray()
113
114 np.testing.assert_almost_equal(mine, theirs)
115 print("PASSED")
116 i += 1
117
118
119def test_dct(N=15):

Callers

nothing calls this directly

Calls 4

fitMethod · 0.95
transformMethod · 0.95
random_paragraphFunction · 0.90
TFIDFEncoderClass · 0.90

Tested by

no test coverage detected