Method test_parse_html

tests/unit_tests.py:171–194 · view source on GitHub ↗

(self)

Source from the content-addressed store, hash-verified

169
170	@print_test
171	def test_parse_html(self):
172	AUTHORS = ['Chien-Ming Wang', 'Dana A. Ford', 'James S.A. Corey', 'Tom Watkins']
173	TITLE = 'After storm, forecasters see smooth sailing for Thanksgiving'
174	LEN_IMGS = 46
175	META_LANG = 'en'
176
177	self.article.parse()
178	self.article.nlp()
179
180	text = mock_resource_with('cnn', 'txt')
181	assert self.article.text == text
182	assert fulltext(self.article.html) == text
183
184	# NOTE: top_img extraction requires an internet connection
185	# unlike the rest of this test file
186	TOP_IMG = ('http://i2.cdn.turner.com/cnn/dam/assets/131129200805-'
187	'01-weather-1128-story-top.jpg')
188	assert self.article.top_img == TOP_IMG
189
190	assert sorted(self.article.authors) == AUTHORS
191	assert self.article.title == TITLE
192	assert len(self.article.imgs) == LEN_IMGS
193	assert self.article.meta_lang == META_LANG
194	assert str(self.article.publish_date) == '2013-11-27 00:00:00'
195
196	@print_test
197	def test_meta_type_extraction(self):

runTestMethod · 0.95

fulltextFunction · 0.90

mock_resource_withFunction · 0.85

nlpMethod · 0.80

parseMethod · 0.45

no test coverage detected