Function html_unescape

module/plugins/internal/misc.py:354–382 · view source on GitHub ↗

Decode HTML or XML escape character references and entities from a text string

(text)

Source from the content-addressed store, hash-verified

352
353
354	def html_unescape(text):
355	"""
356	Decode HTML or XML escape character references and entities from a text string
357	"""
358	def fixup(m):
359	text = m.group(0)
360	if text[:2] == "&#":
361	# character reference
362	try:
363	if text[:3] == "&#x":
364	return unichr(int(text[3:-1], 16))
365	else:
366	return unichr(int(text[2:-1]))
367	except ValueError:
368	pass
369	else:
370	# named entity
371	try:
372	name = text[1:-1]
373	text = unichr(name2codepoint[name])
374	except KeyError:
375	pass
376
377	return text # leave as is
378
379	return re.sub("&#?\w+;", fixup, text)
380	#@TODO: Replace in 0.4.10 with:
381	# h = HTMLParser.HTMLParser()
382	# return h.unescape(text)
383
384
385	def isiterable(obj):

fixurlFunction · 0.70

parse_nameFunction · 0.70

loadMethod · 0.70

uploadMethod · 0.70

handle_captchaMethod · 0.70

handle_web_linksMethod · 0.50

get_file_nameMethod · 0.50

no outgoing calls

no test coverage detected