Generator that returns Author objects from the author search page
(self, url: str)
| 246 | return res |
| 247 | |
| 248 | def search_authors(self, url: str)->Author: |
| 249 | """Generator that returns Author objects from the author search page""" |
| 250 | soup = self._get_soup(url) |
| 251 | |
| 252 | author_parser = AuthorParser(self) |
| 253 | while True: |
| 254 | rows = soup.find_all('div', 'gsc_1usr') |
| 255 | self.logger.info("Found %d authors", len(rows)) |
| 256 | for row in rows: |
| 257 | yield author_parser.get_author(row) |
| 258 | cls1 = 'gs_btnPR gs_in_ib gs_btn_half ' |
| 259 | cls2 = 'gs_btn_lsb gs_btn_srt gsc_pgn_pnx' |
| 260 | next_button = soup.find(class_=cls1+cls2) # Can be improved |
| 261 | if next_button and 'disabled' not in next_button.attrs: |
| 262 | self.logger.info("Loading next page of authors") |
| 263 | url = next_button['onclick'][17:-1] |
| 264 | url = codecs.getdecoder("unicode_escape")(url)[0] |
| 265 | soup = self._get_soup(url) |
| 266 | else: |
| 267 | self.logger.info("No more author pages") |
| 268 | break |
| 269 | |
| 270 | def search_publication(self, url: str, |
| 271 | filled: bool = False) -> PublicationParser: |
no test coverage detected