MCPcopy
hub / github.com/codelucas/newspaper / feeds_to_articles

Method feeds_to_articles

newspaper/source.py:223–253  ·  view source on GitHub ↗

Returns articles given the url of a feed

(self)

Source from the content-addressed store, hash-verified

221 self.feeds = [feed for feed in self.feeds if feed.dom is not None]
222
223 def feeds_to_articles(self):
224 """Returns articles given the url of a feed
225 """
226 articles = []
227 for feed in self.feeds:
228 urls = self.extractor.get_urls(feed.rss, regex=True)
229 cur_articles = []
230 before_purge = len(urls)
231
232 for url in urls:
233 article = Article(
234 url=url,
235 source_url=self.url,
236 config=self.config)
237 cur_articles.append(article)
238
239 cur_articles = self.purge_articles('url', cur_articles)
240 after_purge = len(cur_articles)
241
242 if self.config.memoize_articles:
243 cur_articles = utils.memoize_articles(self, cur_articles)
244 after_memo = len(cur_articles)
245
246 articles.extend(cur_articles)
247
248 if self.config.verbose:
249 print(('%d->%d->%d for %s' %
250 (before_purge, after_purge, after_memo, feed.url)))
251 log.debug('%d->%d->%d for %s' %
252 (before_purge, after_purge, after_memo, feed.url))
253 return articles
254
255 def categories_to_articles(self):
256 """Takes the categories, splays them into a big list of urls and churns

Callers 1

_generate_articlesMethod · 0.95

Calls 4

purge_articlesMethod · 0.95
ArticleClass · 0.85
get_urlsMethod · 0.80
appendMethod · 0.80

Tested by

no test coverage detected