This function prints all the results found while crawling
()
| 768 | # IDENTIFY DIRECTORIES |
| 769 | ######################### |
| 770 | def identify_directories(): |
| 771 | |
| 772 | """ |
| 773 | This function prints all the results found while crawling |
| 774 | """ |
| 775 | |
| 776 | global main_domain |
| 777 | global host_name |
| 778 | global crawled |
| 779 | global directories |
| 780 | global debug |
| 781 | global verbose |
| 782 | global url_scheme |
| 783 | |
| 784 | #Variables |
| 785 | domain=url_scheme+'://'+host_name |
| 786 | |
| 787 | #Programa |
| 788 | print('\n\t\t+ Searching for directories...') |
| 789 | |
| 790 | try: |
| 791 | for link_url in crawled: |
| 792 | try: |
| 793 | if debug: |
| 794 | print('\t\t\t\t> Link extracted from "crawled": {0}'.format(link_url)) |
| 795 | # Here we eliminate error or status comments in URLs crawled. |
| 796 | try: |
| 797 | link_url.split('(')[1] |
| 798 | link_url=link_url.split('(')[0] |
| 799 | except: |
| 800 | pass |
| 801 | |
| 802 | # We store in tmp1 the complete path without domain |
| 803 | link_directory_tmp1 = link_url.split(domain)[1] |
| 804 | if debug: |
| 805 | print('\t\t\t\t> Path extracted form link: {0}'.format(link_directory_tmp1)) |
| 806 | |
| 807 | # We separate the path to stay with last directory in it |
| 808 | link_directory_tmp2 = link_directory_tmp1.split('/')[1:-1] |
| 809 | link_directory_tmp2.reverse() |
| 810 | dir_tmp="" |
| 811 | while len(link_directory_tmp2)>0: |
| 812 | dir_tmp=dir_tmp+link_directory_tmp2.pop()+'/' |
| 813 | link_directory = domain+'/'+dir_tmp |
| 814 | if link_directory not in directories: |
| 815 | directories.append(link_directory) |
| 816 | print('\t\t\t- Found: {0}'.format(link_directory)) |
| 817 | except KeyboardInterrupt: |
| 818 | try: |
| 819 | print('\t\t\t\t> Keyboard interrupt while iterating crawled vector. Waiting 1 seconds to continue.') |
| 820 | print('\t\t\t\t> Hit CTRL-C again to skip the rest of the URLs to analyze!') |
| 821 | time.sleep(1.5) |
| 822 | continue |
| 823 | except KeyboardInterrupt: |
| 824 | return -4 |
| 825 | except: |
| 826 | pass |
| 827 |
no test coverage detected