Method set_include_hyperlinks

xlsx2csv.py:752–796 · view source on GitHub ↗

(self, hyperlinks)

Source from the content-addressed store, hash-verified

750	self.scifloat = scifloat
751
752	def set_include_hyperlinks(self, hyperlinks):
753	if not hyperlinks or not self.relationships or not self.relationships.relationships:
754	return
755	# we must read file first to get hyperlinks, but we don't wont to parse whole file
756	if not self.filedata:
757	self.filedata = self.filehandle.read()
758	data = str(self.filedata) # python3: convert byte buffer to string
759
760	# find worksheet tag, we need namespaces from it
761	start = data.find("<worksheet")
762	if start < 0:
763	return
764	end = data.find(">", start)
765	worksheet = data[start: end + 1]
766
767	# find hyperlinks part
768	start = data.find("<hyperlinks>")
769	if start < 0:
770	# hyperlinks not found
771	return
772	end = data.find("</hyperlinks>")
773	data = data[start: end + 13]
774
775	# parse hyperlinks
776	doc = minidom.parseString(worksheet + data + "</worksheet>").firstChild
777	if doc.namespaceURI:
778	hiperlinkNodes = doc.getElementsByTagNameNS(doc.namespaceURI, "hyperlink")
779	else:
780	hiperlinkNodes = doc.getElementsByTagName("hyperlink")
781	for hlink in hiperlinkNodes:
782	attrs = hlink._attrs
783	ref = rId = None
784	for k in attrs.keys():
785	if k == "ref":
786	ref = str(attrs[k].value)
787	if k.endswith(":id"):
788	rId = str(attrs[k].value)
789	if not ref or not rId:
790	continue
791	rel = self.relationships.relationships.get(rId)
792	if not rel:
793	continue
794	target = rel.get('target')
795	for cell in self._range(ref):
796	self.hyperlinks[cell] = target
797
798	def to_csv(self, writer):
799	self.writer = writer

_convertMethod · 0.95

_rangeMethod · 0.95

no test coverage detected