Given a url or url list, return a tuple ``(filesystem, path_or_paths)`` ``filesystem`` is created from the given url(s), and ``path_or_paths`` is a path or path list extracted from the given url(s) if url list given, the urls must have the same scheme and netloc.
(url_or_urls, hdfs_driver='libhdfs3', storage_options=None, filesystem=None)
| 177 | |
| 178 | |
| 179 | def get_filesystem_and_path_or_paths(url_or_urls, hdfs_driver='libhdfs3', storage_options=None, filesystem=None): |
| 180 | """ |
| 181 | Given a url or url list, return a tuple ``(filesystem, path_or_paths)`` |
| 182 | ``filesystem`` is created from the given url(s), and ``path_or_paths`` is a path or path list |
| 183 | extracted from the given url(s) |
| 184 | if url list given, the urls must have the same scheme and netloc. |
| 185 | """ |
| 186 | if isinstance(url_or_urls, list): |
| 187 | url_list = url_or_urls |
| 188 | else: |
| 189 | url_list = [url_or_urls] |
| 190 | |
| 191 | parsed_url_list = [urlparse(url) for url in url_list] |
| 192 | |
| 193 | first_scheme = parsed_url_list[0].scheme |
| 194 | first_netloc = parsed_url_list[0].netloc |
| 195 | |
| 196 | for parsed_url in parsed_url_list: |
| 197 | if parsed_url.scheme != first_scheme or parsed_url.netloc != first_netloc: |
| 198 | raise ValueError('The dataset url list must contain url with the same scheme and netloc.') |
| 199 | |
| 200 | fs = filesystem or FilesystemResolver( |
| 201 | url_list[0], hdfs_driver=hdfs_driver, storage_options=storage_options).filesystem() |
| 202 | path_list = [get_dataset_path(parsed_url) for parsed_url in parsed_url_list] |
| 203 | |
| 204 | if isinstance(url_or_urls, list): |
| 205 | path_or_paths = path_list |
| 206 | else: |
| 207 | path_or_paths = path_list[0] |
| 208 | |
| 209 | return fs, path_or_paths |
| 210 | |
| 211 | |
| 212 | def normalize_dir_url(dataset_url): |
searching dependent graphs…