added by cdxy May 8 Sun,2016 Use: iterate_path_to_list('http://cdxy.me:80/cdsa/cda/aaa.jsp?id=2#') Return: ['http://cdxy.me:80/cdsa/cda/aaa.jsp?id=2#', 'http://cdxy.me:80/' 'http://cdxy.me:80/cdsa', 'http://cdxy.me:80/cdsa/cda', 'http://cdxy.me:80/cdsa/cda/
(ori_str)
| 21 | |
| 22 | |
| 23 | def iterate_path(ori_str): |
| 24 | """ |
| 25 | added by cdxy May 8 Sun,2016 |
| 26 | |
| 27 | Use: |
| 28 | iterate_path_to_list('http://cdxy.me:80/cdsa/cda/aaa.jsp?id=2#') |
| 29 | |
| 30 | Return: |
| 31 | ['http://cdxy.me:80/cdsa/cda/aaa.jsp?id=2#', |
| 32 | 'http://cdxy.me:80/' |
| 33 | 'http://cdxy.me:80/cdsa', |
| 34 | 'http://cdxy.me:80/cdsa/cda', |
| 35 | 'http://cdxy.me:80/cdsa/cda/aaa.jsp'] |
| 36 | |
| 37 | """ |
| 38 | parser = urlparse.urlparse(ori_str) |
| 39 | _path_list = parser.path.replace('//', '/').strip('/').split('/') |
| 40 | _ans_list = set() |
| 41 | _ans_list.add(ori_str) |
| 42 | |
| 43 | if not _path_list[0]: |
| 44 | return _ans_list |
| 45 | |
| 46 | _ans_list.add(get_domain(ori_str)) |
| 47 | s = '' |
| 48 | for each in _path_list: |
| 49 | s += '/' + each |
| 50 | _ans_list.add(urlparse.urljoin(ori_str, s)) |
| 51 | return _ans_list |
| 52 | |
| 53 | |
| 54 | if __name__ == '__main__': |