Finds all anchor tags and parses the email href attributes. example attribute: `mailto:example@example.com`
(soup: BeautifulSoup)
| 158 | |
| 159 | |
| 160 | def parse_emails(soup: BeautifulSoup) -> list[str]: |
| 161 | """ |
| 162 | Finds all anchor tags and parses the email href attributes. |
| 163 | example attribute: `mailto:example@example.com` |
| 164 | """ |
| 165 | tags = soup.find_all('a') |
| 166 | |
| 167 | emails = set() |
| 168 | for tag in tags: |
| 169 | if tag.has_attr('href') and 'mailto:' in tag['href']: |
| 170 | email = tag['href'].split('mailto:', 1)[1] |
| 171 | if validators.email(email): |
| 172 | emails.add(set) |
| 173 | |
| 174 | return list(emails) |
| 175 | |
| 176 | |
| 177 | def parse_phone_numbers(soup: BeautifulSoup) -> list[str]: |