Returns articles given the url of a feed
(self)
| 221 | self.feeds = [feed for feed in self.feeds if feed.dom is not None] |
| 222 | |
| 223 | def feeds_to_articles(self): |
| 224 | """Returns articles given the url of a feed |
| 225 | """ |
| 226 | articles = [] |
| 227 | for feed in self.feeds: |
| 228 | urls = self.extractor.get_urls(feed.rss, regex=True) |
| 229 | cur_articles = [] |
| 230 | before_purge = len(urls) |
| 231 | |
| 232 | for url in urls: |
| 233 | article = Article( |
| 234 | url=url, |
| 235 | source_url=self.url, |
| 236 | config=self.config) |
| 237 | cur_articles.append(article) |
| 238 | |
| 239 | cur_articles = self.purge_articles('url', cur_articles) |
| 240 | after_purge = len(cur_articles) |
| 241 | |
| 242 | if self.config.memoize_articles: |
| 243 | cur_articles = utils.memoize_articles(self, cur_articles) |
| 244 | after_memo = len(cur_articles) |
| 245 | |
| 246 | articles.extend(cur_articles) |
| 247 | |
| 248 | if self.config.verbose: |
| 249 | print(('%d->%d->%d for %s' % |
| 250 | (before_purge, after_purge, after_memo, feed.url))) |
| 251 | log.debug('%d->%d->%d for %s' % |
| 252 | (before_purge, after_purge, after_memo, feed.url)) |
| 253 | return articles |
| 254 | |
| 255 | def categories_to_articles(self): |
| 256 | """Takes the categories, splays them into a big list of urls and churns |
no test coverage detected