Decode compressed/charset HTTP response >>> getText(decodePage(b" foo&bar ", None, "text/html; charset=utf-8")) ' foo&bar ' >>> getText(decodePage(b"	", None, "text/html; charset=utf-8")) '\\t'
(page, contentEncoding, contentType, percentDecode=True)
| 270 | return retVal |
| 271 | |
| 272 | def decodePage(page, contentEncoding, contentType, percentDecode=True): |
| 273 | """ |
| 274 | Decode compressed/charset HTTP response |
| 275 | |
| 276 | >>> getText(decodePage(b"<html>foo&bar</html>", None, "text/html; charset=utf-8")) |
| 277 | '<html>foo&bar</html>' |
| 278 | >>> getText(decodePage(b"	", None, "text/html; charset=utf-8")) |
| 279 | '\\t' |
| 280 | """ |
| 281 | |
| 282 | if not page or (conf.nullConnection and len(page) < 2): |
| 283 | return getUnicode(page) |
| 284 | |
| 285 | contentEncoding = contentEncoding.lower() if hasattr(contentEncoding, "lower") else "" |
| 286 | contentType = contentType.lower() if hasattr(contentType, "lower") else "" |
| 287 | |
| 288 | if contentEncoding in ("gzip", "x-gzip", "deflate"): |
| 289 | if not kb.pageCompress: |
| 290 | return None |
| 291 | |
| 292 | try: |
| 293 | if contentEncoding == "deflate": |
| 294 | data = io.BytesIO(zlib.decompress(page, -15)) # Reference: http://stackoverflow.com/questions/1089662/python-inflate-and-deflate-implementations |
| 295 | else: |
| 296 | data = gzip.GzipFile("", "rb", 9, io.BytesIO(page)) |
| 297 | size = struct.unpack("<l", page[-4:])[0] # Reference: http://pydoc.org/get.cgi/usr/local/lib/python2.5/gzip.py |
| 298 | if size > MAX_CONNECTION_TOTAL_SIZE: |
| 299 | raise Exception("size too large") |
| 300 | |
| 301 | page = data.read() |
| 302 | except Exception as ex: |
| 303 | if b"<html" not in page: # in some cases, invalid "Content-Encoding" appears for plain HTML (should be ignored) |
| 304 | errMsg = "detected invalid data for declared content " |
| 305 | errMsg += "encoding '%s' ('%s')" % (contentEncoding, getSafeExString(ex)) |
| 306 | singleTimeLogMessage(errMsg, logging.ERROR) |
| 307 | |
| 308 | warnMsg = "turning off page compression" |
| 309 | singleTimeWarnMessage(warnMsg) |
| 310 | |
| 311 | kb.pageCompress = False |
| 312 | raise SqlmapCompressionException |
| 313 | |
| 314 | if not conf.encoding: |
| 315 | httpCharset, metaCharset = None, None |
| 316 | |
| 317 | # Reference: http://stackoverflow.com/questions/1020892/python-urllib2-read-to-unicode |
| 318 | if contentType.find("charset=") != -1: |
| 319 | httpCharset = checkCharEncoding(contentType.split("charset=")[-1]) |
| 320 | |
| 321 | metaCharset = checkCharEncoding(extractRegexResult(META_CHARSET_REGEX, page)) |
| 322 | |
| 323 | if (any((httpCharset, metaCharset)) and (not all((httpCharset, metaCharset)) or isinstance(page, six.binary_type) and all(_ in PRINTABLE_BYTES for _ in page))) or (httpCharset == metaCharset and all((httpCharset, metaCharset))): |
| 324 | kb.pageEncoding = httpCharset or metaCharset # Reference: http://bytes.com/topic/html-css/answers/154758-http-equiv-vs-true-header-has-precedence |
| 325 | debugMsg = "declared web page charset '%s'" % kb.pageEncoding |
| 326 | singleTimeLogMessage(debugMsg, logging.DEBUG, debugMsg) |
| 327 | else: |
| 328 | kb.pageEncoding = None |
| 329 | else: |
no test coverage detected
searching dependent graphs…