MCPcopy
hub / github.com/codelucas/newspaper / test_parse_html

Method test_parse_html

tests/unit_tests.py:171–194  ·  view source on GitHub ↗
(self)

Source from the content-addressed store, hash-verified

169
170 @print_test
171 def test_parse_html(self):
172 AUTHORS = ['Chien-Ming Wang', 'Dana A. Ford', 'James S.A. Corey', 'Tom Watkins']
173 TITLE = 'After storm, forecasters see smooth sailing for Thanksgiving'
174 LEN_IMGS = 46
175 META_LANG = 'en'
176
177 self.article.parse()
178 self.article.nlp()
179
180 text = mock_resource_with('cnn', 'txt')
181 assert self.article.text == text
182 assert fulltext(self.article.html) == text
183
184 # NOTE: top_img extraction requires an internet connection
185 # unlike the rest of this test file
186 TOP_IMG = ('http://i2.cdn.turner.com/cnn/dam/assets/131129200805-'
187 '01-weather-1128-story-top.jpg')
188 assert self.article.top_img == TOP_IMG
189
190 assert sorted(self.article.authors) == AUTHORS
191 assert self.article.title == TITLE
192 assert len(self.article.imgs) == LEN_IMGS
193 assert self.article.meta_lang == META_LANG
194 assert str(self.article.publish_date) == '2013-11-27 00:00:00'
195
196 @print_test
197 def test_meta_type_extraction(self):

Callers 1

runTestMethod · 0.95

Calls 4

fulltextFunction · 0.90
mock_resource_withFunction · 0.85
nlpMethod · 0.80
parseMethod · 0.45

Tested by

no test coverage detected