| 5 | |
| 6 | |
| 7 | def main(): |
| 8 | url = 'http://yahoo.com' |
| 9 | req = requests.get(url) |
| 10 | content = req.text |
| 11 | soup = BeautifulSoup(content, "html.parser") |
| 12 | |
| 13 | headlines = [] |
| 14 | for headline in soup.find_all("h3"): |
| 15 | raw_headline = headline.get_text() |
| 16 | headline = raw_headline.strip() |
| 17 | if len(headline) < 10: |
| 18 | continue |
| 19 | headlines.append(headline) |
| 20 | |
| 21 | print(json.dumps(headlines)) |
| 22 | |
| 23 | with open("headlines-output.csv", 'w') as out_file: |
| 24 | writer = csv.writer(out_file, delimiter=',') |
| 25 | writer.writerow(['headline']) |
| 26 | for headline in headlines: |
| 27 | writer.writerow([headline]) |
| 28 | |
| 29 | if __name__ == '__main__': |
| 30 | main() |