(self)
| 169 | |
| 170 | @print_test |
| 171 | def test_parse_html(self): |
| 172 | AUTHORS = ['Chien-Ming Wang', 'Dana A. Ford', 'James S.A. Corey', 'Tom Watkins'] |
| 173 | TITLE = 'After storm, forecasters see smooth sailing for Thanksgiving' |
| 174 | LEN_IMGS = 46 |
| 175 | META_LANG = 'en' |
| 176 | |
| 177 | self.article.parse() |
| 178 | self.article.nlp() |
| 179 | |
| 180 | text = mock_resource_with('cnn', 'txt') |
| 181 | assert self.article.text == text |
| 182 | assert fulltext(self.article.html) == text |
| 183 | |
| 184 | # NOTE: top_img extraction requires an internet connection |
| 185 | # unlike the rest of this test file |
| 186 | TOP_IMG = ('http://i2.cdn.turner.com/cnn/dam/assets/131129200805-' |
| 187 | '01-weather-1128-story-top.jpg') |
| 188 | assert self.article.top_img == TOP_IMG |
| 189 | |
| 190 | assert sorted(self.article.authors) == AUTHORS |
| 191 | assert self.article.title == TITLE |
| 192 | assert len(self.article.imgs) == LEN_IMGS |
| 193 | assert self.article.meta_lang == META_LANG |
| 194 | assert str(self.article.publish_date) == '2013-11-27 00:00:00' |
| 195 | |
| 196 | @print_test |
| 197 | def test_meta_type_extraction(self): |
no test coverage detected