Get the information from the current post
(browser, logger)
| 47 | |
| 48 | |
| 49 | def extract_post_info(browser, logger): |
| 50 | """Get the information from the current post""" |
| 51 | web_address_navigator(browser, browser.current_url + "comments/") |
| 52 | comments = [] |
| 53 | user_commented_list = [] |
| 54 | last_comment_count = 0 |
| 55 | # load all hidden comments |
| 56 | while check_exists_by_xpath( |
| 57 | browser, read_xpath(extract_post_info.__name__, "load_more_comments_element") |
| 58 | ): |
| 59 | load_more_comments_element = browser.find_element( |
| 60 | By.XPATH, |
| 61 | read_xpath(extract_post_info.__name__, "load_more_comments_element"), |
| 62 | ) |
| 63 | click_element(browser, load_more_comments_element) |
| 64 | sleep(0.5) |
| 65 | # get comment list |
| 66 | comment_list = browser.find_element( |
| 67 | By.XPATH, read_xpath(extract_post_info.__name__, "comment_list") |
| 68 | ) |
| 69 | comments = comment_list.find_elements( |
| 70 | By.XPATH, read_xpath(extract_post_info.__name__, "comments") |
| 71 | ) |
| 72 | # check instagram comment load bug |
| 73 | if len(comments) == last_comment_count: |
| 74 | break |
| 75 | if (len(comments) - last_comment_count) < 3: |
| 76 | break |
| 77 | last_comment_count = len(comments) |
| 78 | |
| 79 | # get all comment list |
| 80 | comment_list = browser.find_element( |
| 81 | By.XPATH, read_xpath(extract_post_info.__name__, "comment_list") |
| 82 | ) |
| 83 | comments = comment_list.find_elements( |
| 84 | By.XPATH, read_xpath(extract_post_info.__name__, "comments") |
| 85 | ) |
| 86 | |
| 87 | # get all commenter list |
| 88 | try: |
| 89 | for comm in comments: |
| 90 | user_commented = ( |
| 91 | comm.find_element(By.TAG_NAME, "a").get_attribute("href").split("/") |
| 92 | ) |
| 93 | logger.info("Found commenter: {}".format(user_commented[3])) |
| 94 | user_commented_list.append(user_commented[3]) |
| 95 | |
| 96 | except Exception as e: |
| 97 | logger.warning("Cant get comments".format(str(e).encode("utf-8"))) |
| 98 | |
| 99 | date_time = browser.find_element(By.TAG_NAME, "time").get_attribute("datetime") |
| 100 | return user_commented_list, date_time |
| 101 | |
| 102 | |
| 103 | def extract_information(browser, username, daysold, max_pic, logger): |
no test coverage detected