MCPcopy
hub / github.com/jwngr/sdow / query_wikipedia_api

Function query_wikipedia_api

scripts/lookup_wikipedia_page_info.py:13–46  ·  view source on GitHub ↗
(identifier_type, page_ids_or_titles, resolve_redirects=False)

Source from the content-addressed store, hash-verified

11
12
13def query_wikipedia_api(identifier_type, page_ids_or_titles, resolve_redirects=False):
14 if identifier_type not in ['pageids', 'titles']:
15 print('[Error] Page identifier type must be "pageids" or "titles".')
16 else:
17 query_params = {
18 'action': 'query',
19 'format': 'json',
20 'prop': 'info|redirects',
21 'inprop': 'displaytitle',
22 }
23
24 if resolve_redirects:
25 query_params['redirects'] = ''
26
27 results = {}
28
29 # IDs must be strings in order for the join below to work.
30 page_ids_or_titles = [str(x) for x in page_ids_or_titles]
31
32 start_query_index = 0
33 while start_query_index < len(page_ids_or_titles):
34 # Query at most 50 pages per request (given WikiMedia API limits).
35 end_query_index = min(start_query_index + 50, len(page_ids_or_titles))
36
37 query_params[identifier_type] = '|'.join(
38 page_ids_or_titles[start_query_index:end_query_index])
39
40 req = requests.get(WIKIPEDIA_API_URL, params=query_params)
41
42 results.update(req.json().get('query', {}).get('pages', {}))
43
44 start_query_index = end_query_index
45
46 return results
47
48
49ids_to_lookup = [

Callers 1

Calls

no outgoing calls

Tested by

no test coverage detected