Returns an object for a single author
| 13 | |
| 14 | |
| 15 | class AuthorParser: |
| 16 | """Returns an object for a single author""" |
| 17 | |
| 18 | def __init__(self, nav): |
| 19 | self.nav = nav |
| 20 | self._sections = ['basics', |
| 21 | 'indices', |
| 22 | 'counts', |
| 23 | 'coauthors', |
| 24 | 'publications', |
| 25 | 'public_access'] |
| 26 | |
| 27 | def get_author(self, __data)->Author: |
| 28 | """ Fills the information for an author container |
| 29 | """ |
| 30 | author: Author = {'container_type': 'Author'} |
| 31 | author['filled'] = [] |
| 32 | if isinstance(__data, str): |
| 33 | author['scholar_id'] = __data |
| 34 | author['source'] = AuthorSource.AUTHOR_PROFILE_PAGE |
| 35 | else: |
| 36 | author['source'] = AuthorSource.SEARCH_AUTHOR_SNIPPETS |
| 37 | author['scholar_id'] = re.findall(_CITATIONAUTHRE, __data('a')[0]['href'])[0] |
| 38 | |
| 39 | pic = '/citations?view_op=medium_photo&user={}'.format(author['scholar_id']) |
| 40 | author['url_picture'] = _HOST.format(pic) |
| 41 | |
| 42 | name_class = self._find_tag_class_name(__data, 'h3', 'name') |
| 43 | author['name'] = __data.find('h3', class_=name_class).text |
| 44 | |
| 45 | aff_class = self._find_tag_class_name(__data, 'div', 'aff') |
| 46 | affiliation = __data.find('div', class_=aff_class) |
| 47 | if affiliation: |
| 48 | author['affiliation'] = affiliation.text |
| 49 | |
| 50 | email_class = self._find_tag_class_name(__data, 'div', 'eml') |
| 51 | email = __data.find('div', class_=email_class) |
| 52 | if email: |
| 53 | author['email_domain'] = re.sub(_EMAILAUTHORRE, r'@', email.text) |
| 54 | |
| 55 | int_class = self._find_tag_class_name(__data, 'a', 'one_int') |
| 56 | if int_class: |
| 57 | interests = __data.find_all('a', class_=int_class) |
| 58 | author['interests'] = [i.text.strip() for i in interests] |
| 59 | else: |
| 60 | author['interests'] = [] |
| 61 | |
| 62 | citedby_class = self._find_tag_class_name(__data, 'div', 'cby') |
| 63 | citedby = __data.find('div', class_=citedby_class) |
| 64 | if citedby and citedby.text != '': |
| 65 | author['citedby'] = int(citedby.text[9:]) |
| 66 | |
| 67 | return author |
| 68 | |
| 69 | |
| 70 | def _find_tag_class_name(self, __data, tag, text): |
| 71 | elements = __data.find_all(tag) |
| 72 | for element in elements: |
no outgoing calls
no test coverage detected
searching dependent graphs…