Get all the information for the given username
(browser, username, daysold, max_pic, logger)
| 101 | |
| 102 | |
| 103 | def extract_information(browser, username, daysold, max_pic, logger): |
| 104 | """Get all the information for the given username""" |
| 105 | web_address_navigator(browser, "https://www.instagram.com/" + username) |
| 106 | |
| 107 | try: |
| 108 | num_of_posts = get_number_of_posts(browser) |
| 109 | num_of_posts = min(num_of_posts, max_pic) |
| 110 | # we don't need to scroll more than is max number of posts we want |
| 111 | # to extract |
| 112 | |
| 113 | links1 = [] |
| 114 | links2 = [] |
| 115 | links3 = [] |
| 116 | # list links1 contains 30 links from the current view, as that is the |
| 117 | # maximum Instagram is showing at one time |
| 118 | # list links2 contains all the links collected so far without |
| 119 | # duplicates, in mixed order |
| 120 | # list links3 contains all the links collected so far with |
| 121 | # duplicates in preserved order |
| 122 | |
| 123 | except Exception as e: |
| 124 | logger.error( |
| 125 | "Error: Couldn't get user profile. Moving on... \n\t{}".format( |
| 126 | str(e).encode("utf-8") |
| 127 | ) |
| 128 | ) |
| 129 | return [] |
| 130 | |
| 131 | # PROFILE SCROLLING AND HARVESTING LINKS |
| 132 | try: |
| 133 | body_elem = browser.find_element(By.TAG_NAME, "body") |
| 134 | previouslen = -1 |
| 135 | |
| 136 | # every 60 links we will open picture and check it's date not to |
| 137 | # scroll endlessly in huge profiles such as natgeo |
| 138 | opened_overlay = 42 |
| 139 | sleep(0.5) |
| 140 | |
| 141 | # cycle that scrolls down the feed and collects links and saving |
| 142 | # them into links2 |
| 143 | while len(links2) < num_of_posts: |
| 144 | prev_divs = browser.find_elements(By.TAG_NAME, "main") |
| 145 | # harvesting current img links: |
| 146 | links_elems = [div.find_elements(By.TAG_NAME, "a") for div in prev_divs] |
| 147 | links1 = sum( |
| 148 | [ |
| 149 | [link_elem.get_attribute("href") for link_elem in elems] |
| 150 | for elems in links_elems |
| 151 | ], |
| 152 | [], |
| 153 | ) |
| 154 | # saving links for later: |
| 155 | for link in links1: |
| 156 | if "/p/" in link: |
| 157 | links2.append(link) |
| 158 | links3.append(link) |
| 159 | |
| 160 | links2 = list(set(links2)) |
no test coverage detected