(identifier_type, page_ids_or_titles, resolve_redirects=False)
| 11 | |
| 12 | |
| 13 | def query_wikipedia_api(identifier_type, page_ids_or_titles, resolve_redirects=False): |
| 14 | if identifier_type not in ['pageids', 'titles']: |
| 15 | print('[Error] Page identifier type must be "pageids" or "titles".') |
| 16 | else: |
| 17 | query_params = { |
| 18 | 'action': 'query', |
| 19 | 'format': 'json', |
| 20 | 'prop': 'info|redirects', |
| 21 | 'inprop': 'displaytitle', |
| 22 | } |
| 23 | |
| 24 | if resolve_redirects: |
| 25 | query_params['redirects'] = '' |
| 26 | |
| 27 | results = {} |
| 28 | |
| 29 | # IDs must be strings in order for the join below to work. |
| 30 | page_ids_or_titles = [str(x) for x in page_ids_or_titles] |
| 31 | |
| 32 | start_query_index = 0 |
| 33 | while start_query_index < len(page_ids_or_titles): |
| 34 | # Query at most 50 pages per request (given WikiMedia API limits). |
| 35 | end_query_index = min(start_query_index + 50, len(page_ids_or_titles)) |
| 36 | |
| 37 | query_params[identifier_type] = '|'.join( |
| 38 | page_ids_or_titles[start_query_index:end_query_index]) |
| 39 | |
| 40 | req = requests.get(WIKIPEDIA_API_URL, params=query_params) |
| 41 | |
| 42 | results.update(req.json().get('query', {}).get('pages', {})) |
| 43 | |
| 44 | start_query_index = end_query_index |
| 45 | |
| 46 | return results |
| 47 | |
| 48 | |
| 49 | ids_to_lookup = [ |
no outgoing calls
no test coverage detected