()
| 7 | |
| 8 | |
| 9 | def tweeter_scrapper(): |
| 10 | list_of_dirty_tweets = [] |
| 11 | clear_list_of_tweets = [] |
| 12 | base_tweeter_url = "https://twitter.com/{}" |
| 13 | |
| 14 | tweeter_id = input() |
| 15 | |
| 16 | response = requests.get(base_tweeter_url.format(tweeter_id)) |
| 17 | soup = BeautifulSoup(response.content, "lxml") |
| 18 | all_tweets = soup.find_all("div", {"class": "tweet"}) |
| 19 | |
| 20 | for tweet in all_tweets: |
| 21 | content = tweet.find("div", {"class": "content"}) |
| 22 | message = ( |
| 23 | content.find("div", {"class": "js-tweet-text-container"}) |
| 24 | .text.replace("\n", " ") |
| 25 | .strip() |
| 26 | ) |
| 27 | list_of_dirty_tweets.append(message) |
| 28 | for dirty_tweet in list_of_dirty_tweets: |
| 29 | dirty_tweet = re.sub(re_text, "", dirty_tweet, flags=re.MULTILINE) |
| 30 | dirty_tweet = re.sub(re_text_1, "", dirty_tweet, flags=re.MULTILINE) |
| 31 | dirty_tweet = dirty_tweet.replace("\xa0…", "") |
| 32 | dirty_tweet = dirty_tweet.replace("\xa0", "") |
| 33 | dirty_tweet = dirty_tweet.replace("\u200c", "") |
| 34 | clear_list_of_tweets.append(dirty_tweet) |
| 35 | print(clear_list_of_tweets) |
| 36 | |
| 37 | |
| 38 | if __name__ == "__main__": |
no test coverage detected