Decode HTML or XML escape character references and entities from a text string
(text)
| 352 | |
| 353 | |
| 354 | def html_unescape(text): |
| 355 | """ |
| 356 | Decode HTML or XML escape character references and entities from a text string |
| 357 | """ |
| 358 | def fixup(m): |
| 359 | text = m.group(0) |
| 360 | if text[:2] == "&#": |
| 361 | # character reference |
| 362 | try: |
| 363 | if text[:3] == "&#x": |
| 364 | return unichr(int(text[3:-1], 16)) |
| 365 | else: |
| 366 | return unichr(int(text[2:-1])) |
| 367 | except ValueError: |
| 368 | pass |
| 369 | else: |
| 370 | # named entity |
| 371 | try: |
| 372 | name = text[1:-1] |
| 373 | text = unichr(name2codepoint[name]) |
| 374 | except KeyError: |
| 375 | pass |
| 376 | |
| 377 | return text # leave as is |
| 378 | |
| 379 | return re.sub("&#?\w+;", fixup, text) |
| 380 | #@TODO: Replace in 0.4.10 with: |
| 381 | # h = HTMLParser.HTMLParser() |
| 382 | # return h.unescape(text) |
| 383 | |
| 384 | |
| 385 | def isiterable(obj): |
no outgoing calls
no test coverage detected