Return True only for plain http:// or https:// URLs (no localhost / LAN).
(url: str)
| 130 | |
| 131 | |
| 132 | def _safe_url(url: str) -> bool: |
| 133 | """Return True only for plain http:// or https:// URLs (no localhost / LAN).""" |
| 134 | if not re.match(r"^https?://", url, re.IGNORECASE): |
| 135 | return False |
| 136 | # Block requests to loopback / private addresses to prevent SSRF. |
| 137 | from urllib.parse import urlparse |
| 138 | |
| 139 | host = urlparse(url).hostname or "" |
| 140 | host = host.lower().rstrip(".") |
| 141 | # Reject empty, numeric localhost, and obviously private hostnames. |
| 142 | _PRIVATE = re.compile( |
| 143 | r"^(" |
| 144 | r"localhost" |
| 145 | r"|127\.\d+\.\d+\.\d+" |
| 146 | r"|::1" |
| 147 | r"|0\.0\.0\.0" |
| 148 | r"|10\.\d+\.\d+\.\d+" |
| 149 | r"|172\.(1[6-9]|2\d|3[01])\.\d+\.\d+" |
| 150 | r"|192\.168\.\d+\.\d+" |
| 151 | r"|169\.254\.\d+\.\d+" |
| 152 | r"|fc[0-9a-f]{2}:.*" |
| 153 | r"|fd[0-9a-f]{2}:.*" |
| 154 | r")$" |
| 155 | ) |
| 156 | if _PRIVATE.match(host): |
| 157 | return False |
| 158 | return True |
| 159 | |
| 160 | |
| 161 | def _collect_headers(raw_headers) -> dict: |