Returns an iterator over all article titles (for a given namespace id).
(self, namespace=0, start=None, count=100, cached=True, **kwargs)
| 1647 | articles = all |
| 1648 | |
| 1649 | def list(self, namespace=0, start=None, count=100, cached=True, **kwargs): |
| 1650 | """ Returns an iterator over all article titles (for a given namespace id). |
| 1651 | """ |
| 1652 | kwargs.setdefault("unicode", True) |
| 1653 | kwargs.setdefault("throttle", self.throttle) |
| 1654 | # Fetch article titles (default) or a custom id. |
| 1655 | id = kwargs.pop("_id", "title") |
| 1656 | # Loop endlessly (= until the last request no longer yields an "apcontinue"). |
| 1657 | # See: http://www.mediawiki.org/wiki/API:Allpages |
| 1658 | while start != -1: |
| 1659 | url = URL(self._url, method=GET, query={ |
| 1660 | "action": "query", |
| 1661 | "list": "allpages", |
| 1662 | "apnamespace": namespace, |
| 1663 | "apfrom": start or "", |
| 1664 | "aplimit": min(count, 500), |
| 1665 | "apfilterredir": "nonredirects", |
| 1666 | "format": "json" |
| 1667 | }) |
| 1668 | data = url.download(cached=cached, **kwargs) |
| 1669 | data = json.loads(data) |
| 1670 | for x in data.get("query", {}).get("allpages", {}): |
| 1671 | if x.get(id): |
| 1672 | yield x[id] |
| 1673 | start = data.get("query-continue", {}).get("allpages", {}) |
| 1674 | start = start.get("apcontinue", start.get("apfrom", -1)) |
| 1675 | raise StopIteration |
| 1676 | |
| 1677 | def search(self, query, type=SEARCH, start=1, count=1, sort=RELEVANCY, size=None, cached=True, **kwargs): |
| 1678 | """ Returns a MediaWikiArticle for the given query. |