This function fetchs the files detected on the crawled domain in 'Files+domain' folder For now all files are stored in the same folder. No make any distinctions of directories.
()
| 981 | # FETCH FILES |
| 982 | ############# |
| 983 | def fetch_files(): |
| 984 | |
| 985 | """ |
| 986 | This function fetchs the files detected on the crawled domain in 'Files+domain' folder |
| 987 | For now all files are stored in the same folder. No make any distinctions of directories. |
| 988 | """ |
| 989 | |
| 990 | global main_domain |
| 991 | global link_to_files |
| 992 | #global extensions_for_download |
| 993 | |
| 994 | try: |
| 995 | print('\n\t\t+ Fetching found files:') |
| 996 | try: |
| 997 | try: |
| 998 | output_directory= main_domain.replace('http://','') |
| 999 | output_directory= main_domain.replace('www.','') |
| 1000 | try: |
| 1001 | output_directory = output_directory.split('/')[0] |
| 1002 | except: |
| 1003 | pass |
| 1004 | try: |
| 1005 | output_directory = output_directory.split(':')[0] |
| 1006 | except: |
| 1007 | pass |
| 1008 | except: |
| 1009 | output_directory=main_domain |
| 1010 | pass |
| 1011 | os.mkdir(output_directory) |
| 1012 | output_directory = output_directory+'/Files/' |
| 1013 | os.mkdir(output_directory) |
| 1014 | |
| 1015 | except OSError as error: |
| 1016 | if 'File exists' in error: |
| 1017 | try: |
| 1018 | if 'Files' in output_directory: |
| 1019 | os.mkdir(output_directory) |
| 1020 | else: |
| 1021 | output_directory = output_directory+'/Files/' |
| 1022 | os.mkdir(output_directory) |
| 1023 | except OSError as error: |
| 1024 | if 'File exists' in error: |
| 1025 | print('\t\t> Output directory already exists! Overwriting content!') |
| 1026 | else: |
| 1027 | print('\t\t\t\t> Cannot create output directory! Not downloading files:') |
| 1028 | return -15 |
| 1029 | |
| 1030 | print('\t\t\t- Files stored in: {0}'.format(output_directory)) |
| 1031 | if verbose: |
| 1032 | print('\t\t\t- File extensions included:', end=' ') |
| 1033 | for ext in extensions_for_download: |
| 1034 | if ext.islower(): |
| 1035 | print(ext, end=' ') |
| 1036 | print('\n') |
| 1037 | for i in link_to_files: |
| 1038 | for ext in extensions_for_download: |
| 1039 | if i.endswith(ext): |
| 1040 | print('\t\t\t+ Downloading file {0}'.format(i)) |
no test coverage detected