Decode a query string in x-www-form-urlencoded format into a sequence of two-element tuples. Unlike urlparse.parse_qsl(..., strict_parsing=True) urldecode will enforce correct formatting of the query string by validation. If validation fails a ValueError will be raised. urllib.parse
(query)
| 26 | |
| 27 | |
| 28 | def url_decode(query): |
| 29 | """Decode a query string in x-www-form-urlencoded format into a sequence |
| 30 | of two-element tuples. |
| 31 | |
| 32 | Unlike urlparse.parse_qsl(..., strict_parsing=True) urldecode will enforce |
| 33 | correct formatting of the query string by validation. If validation fails |
| 34 | a ValueError will be raised. urllib.parse_qsl will only raise errors if |
| 35 | any of name-value pairs omits the equals sign. |
| 36 | """ |
| 37 | # Check if query contains invalid characters |
| 38 | if query and not set(query) <= urlencoded: |
| 39 | error = ( |
| 40 | "Error trying to decode a non urlencoded string. " |
| 41 | "Found invalid characters: %s " |
| 42 | "in the string: '%s'. " |
| 43 | "Please ensure the request/response body is " |
| 44 | "x-www-form-urlencoded." |
| 45 | ) |
| 46 | raise ValueError(error % (set(query) - urlencoded, query)) |
| 47 | |
| 48 | # Check for correctly hex encoded values using a regular expression |
| 49 | # All encoded values begin with % followed by two hex characters |
| 50 | # correct = %00, %A0, %0A, %FF |
| 51 | # invalid = %G0, %5H, %PO |
| 52 | if INVALID_HEX_PATTERN.search(query): |
| 53 | raise ValueError("Invalid hex encoding in query string.") |
| 54 | |
| 55 | # We encode to utf-8 prior to parsing because parse_qsl behaves |
| 56 | # differently on unicode input in python 2 and 3. |
| 57 | # Python 2.7 |
| 58 | # >>> urlparse.parse_qsl(u'%E5%95%A6%E5%95%A6') |
| 59 | # u'\xe5\x95\xa6\xe5\x95\xa6' |
| 60 | # Python 2.7, non unicode input gives the same |
| 61 | # >>> urlparse.parse_qsl('%E5%95%A6%E5%95%A6') |
| 62 | # '\xe5\x95\xa6\xe5\x95\xa6' |
| 63 | # but now we can decode it to unicode |
| 64 | # >>> urlparse.parse_qsl('%E5%95%A6%E5%95%A6').decode('utf-8') |
| 65 | # u'\u5566\u5566' |
| 66 | # Python 3.3 however |
| 67 | # >>> urllib.parse.parse_qsl(u'%E5%95%A6%E5%95%A6') |
| 68 | # u'\u5566\u5566' |
| 69 | |
| 70 | # We want to allow queries such as "c2" whereas urlparse.parse_qsl |
| 71 | # with the strict_parsing flag will not. |
| 72 | params = urlparse.parse_qsl(query, keep_blank_values=True) |
| 73 | |
| 74 | # unicode all the things |
| 75 | decoded = [] |
| 76 | for k, v in params: |
| 77 | decoded.append((to_unicode(k), to_unicode(v))) |
| 78 | return decoded |
| 79 | |
| 80 | |
| 81 | def add_params_to_qs(query, params): |
searching dependent graphs…