Returns the ID and title of the non-redirect page corresponding to the provided title, handling titles with incorrect capitalization as well as redirects. Args: page_title: The title of the page to fetch. Returns: (int, str, bool): A tuple containing the page ID, title, and
(self, page_title)
| 29 | self.searches_cursor.arraysize = 1000 |
| 30 | |
| 31 | def fetch_page(self, page_title): |
| 32 | """Returns the ID and title of the non-redirect page corresponding to the provided title, |
| 33 | handling titles with incorrect capitalization as well as redirects. |
| 34 | |
| 35 | Args: |
| 36 | page_title: The title of the page to fetch. |
| 37 | |
| 38 | Returns: |
| 39 | (int, str, bool): A tuple containing the page ID, title, and whether or not a redirect was |
| 40 | followed. |
| 41 | OR |
| 42 | None: If no page exists. |
| 43 | |
| 44 | Raises: |
| 45 | ValueError: If the provided page title is invalid. |
| 46 | """ |
| 47 | sanitized_page_title = helpers.get_sanitized_page_title(page_title) |
| 48 | |
| 49 | query = 'SELECT * FROM pages WHERE title = ? COLLATE NOCASE;' |
| 50 | query_bindings = (sanitized_page_title,) |
| 51 | self.sdow_cursor.execute(query, query_bindings) |
| 52 | |
| 53 | # Because the above query is case-insensitive (due to the COLLATE NOCASE), multiple articles |
| 54 | # can be matched. |
| 55 | results = self.sdow_cursor.fetchall() |
| 56 | |
| 57 | if not results: |
| 58 | raise ValueError( |
| 59 | 'Invalid page title {0} provided. Page title does not exist.'.format(page_title)) |
| 60 | |
| 61 | # First, look for a non-redirect page which has exact match with the page title. |
| 62 | for current_page_id, current_page_title, current_page_is_redirect in results: |
| 63 | if current_page_title == sanitized_page_title and not current_page_is_redirect: |
| 64 | return (current_page_id, helpers.get_readable_page_title(current_page_title), False) |
| 65 | |
| 66 | # Next, look for a match with a non-redirect page. |
| 67 | for current_page_id, current_page_title, current_page_is_redirect in results: |
| 68 | if not current_page_is_redirect: |
| 69 | return (current_page_id, helpers.get_readable_page_title(current_page_title), False) |
| 70 | |
| 71 | # If all the results are redirects, use the page to which the first result redirects. |
| 72 | query = 'SELECT target_id, title FROM redirects INNER JOIN pages ON pages.id = target_id WHERE source_id = ?;' |
| 73 | query_bindings = (results[0][0],) |
| 74 | self.sdow_cursor.execute(query, query_bindings) |
| 75 | |
| 76 | result = self.sdow_cursor.fetchone() |
| 77 | |
| 78 | # TODO: This will no longer be required once the April 2018 database dump occurs since this |
| 79 | # scenario is prevented by the prune_pages_file.py Python script during the database creation. |
| 80 | if not result: |
| 81 | raise ValueError( |
| 82 | 'Invalid page title {0} provided. Page title does not exist.'.format(page_title)) |
| 83 | |
| 84 | return (result[0], helpers.get_readable_page_title(result[1]), True) |
| 85 | |
| 86 | def fetch_page_title(self, page_id): |
| 87 | """Returns the page title corresponding to the provided page ID. |
no outgoing calls
no test coverage detected