(browser, page, logger, media, element)
| 849 | |
| 850 | |
| 851 | def get_links(browser, page, logger, media, element): |
| 852 | links = [] |
| 853 | post_href = None |
| 854 | |
| 855 | try: |
| 856 | # Get image links in scope from hashtag, location and other pages |
| 857 | link_elems = element.find_elements(By.XPATH, '//a[starts-with(@href, "/p/")]') |
| 858 | sleep(random.randint(2, 5)) |
| 859 | |
| 860 | if link_elems: |
| 861 | for link_elem in link_elems: |
| 862 | try: |
| 863 | post_href = link_elem.get_attribute("href") |
| 864 | post_elem = element.find_elements( |
| 865 | By.XPATH, |
| 866 | "//a[@href='/p/" + post_href.split("/")[-2] + "/']/child::div", |
| 867 | ) |
| 868 | |
| 869 | if len(post_elem) == 1 and MEDIA_PHOTO in media: |
| 870 | logger.info("Found media type: {}".format(MEDIA_PHOTO)) |
| 871 | links.append(post_href) |
| 872 | |
| 873 | if len(post_elem) == 2: |
| 874 | logger.info( |
| 875 | "Found media type: {} - {} - {}".format( |
| 876 | MEDIA_CAROUSEL, MEDIA_VIDEO, MEDIA_IGTV |
| 877 | ) |
| 878 | ) |
| 879 | # If you see "Cannot detect post media type. Skip https://www.instagram.com/p/CFvUn0gpaMZ/" |
| 880 | # consider updating the @class,'CzVzU', new format types could be added |
| 881 | # Media types from constants.py must be updated here, otherwise the links |
| 882 | # cannot be categorized. |
| 883 | post_category = element.find_element( |
| 884 | By.XPATH, |
| 885 | "//a[@href='/p/" |
| 886 | + post_href.split("/")[-2] |
| 887 | + "/']/div[contains(@class,'CzVzU')]/child::*/*[name()='svg']", |
| 888 | ).get_attribute("aria-label") |
| 889 | |
| 890 | logger.info("Post category: {}".format(post_category)) |
| 891 | |
| 892 | if post_category in media: |
| 893 | links.append(post_href) |
| 894 | |
| 895 | except WebDriverException: |
| 896 | # If "post_href" is None skip the logger to avoid confusion, |
| 897 | # the links that are not empty will be catched into the next |
| 898 | # loop. Other case, the "post_href" is not empty and needs |
| 899 | # to be displayed to the STDOUT for further review. |
| 900 | if post_href: |
| 901 | logger.info( |
| 902 | "Cannot detect post media type. Skip {}".format(post_href) |
| 903 | ) |
| 904 | else: |
| 905 | logger.info("'{}' page does not contain a picture".format(page)) |
| 906 | |
| 907 | except BaseException as e: |
| 908 | logger.error("link_elems error \n\t{}".format(str(e).encode("utf-8"))) |
no test coverage detected