Method getElementsByTag

newspaper/parsers.py:110–122 · view source on GitHub ↗

(
            cls, node, tag=None, attr=None, value=None, childs=False)

Source from the content-addressed store, hash-verified

108
109	@classmethod
110	def getElementsByTag(
111	cls, node, tag=None, attr=None, value=None, childs=False):
112	NS = "http://exslt.org/regular-expressions"
113	# selector = tag or '*'
114	selector = 'descendant-or-self::%s' % (tag or '*')
115	if attr and value:
116	selector = '%s[re:test(@%s, "%s", "i")]' % (selector, attr, value)
117	elems = node.xpath(selector, namespaces={"re": NS})
118	# remove the root node
119	# if we have a selection tag
120	if node in elems and (tag or childs):
121	elems.remove(node)
122	return elems
123
124	@classmethod
125	def appendChild(cls, node, child):

clean_body_classesMethod · 0.80

clean_article_tagsMethod · 0.80

clean_em_tagsMethod · 0.80

remove_scripts_stylesMethod · 0.80

div_to_paraMethod · 0.80

get_authorsMethod · 0.80

get_publishing_dateMethod · 0.80

get_titleMethod · 0.80

get_feed_urlsMethod · 0.80

get_faviconMethod · 0.80

get_meta_langMethod · 0.80

get_meta_img_urlMethod · 0.80

removeMethod · 0.80

no test coverage detected