| 7 | |
| 8 | |
| 9 | def response(flow): |
| 10 | if ( |
| 11 | "Content-Type" in flow.response.headers |
| 12 | and flow.response.headers["Content-Type"].find("text/html") != -1 |
| 13 | ): |
| 14 | pageUrl = flow.request.url |
| 15 | pageText = flow.response.text |
| 16 | pattern = ( |
| 17 | r"<a\s+(?:[^>]*?\s+)?href=(?P<delimiter>[\"'])" |
| 18 | r"(?P<link>(?!https?:\/\/|ftps?:\/\/|\/\/|#|javascript:|mailto:).*?)(?P=delimiter)" |
| 19 | ) |
| 20 | rel_matcher = re.compile(pattern, flags=re.IGNORECASE) |
| 21 | rel_matches = rel_matcher.finditer(pageText) |
| 22 | map_dict = {} |
| 23 | for match_num, match in enumerate(rel_matches): |
| 24 | (delimiter, rel_link) = match.group("delimiter", "link") |
| 25 | abs_link = urljoin(pageUrl, rel_link) |
| 26 | map_dict["{0}{1}{0}".format(delimiter, rel_link)] = "{0}{1}{0}".format( |
| 27 | delimiter, abs_link |
| 28 | ) |
| 29 | for map in map_dict.items(): |
| 30 | pageText = pageText.replace(*map) |
| 31 | # Uncomment the following to print the expansion mapping |
| 32 | # print("{0} -> {1}".format(*map)) |
| 33 | flow.response.text = pageText |