MCPcopy
hub / github.com/dilshod/xlsx2csv / set_include_hyperlinks

Method set_include_hyperlinks

xlsx2csv.py:752–796  ·  view source on GitHub ↗
(self, hyperlinks)

Source from the content-addressed store, hash-verified

750 self.scifloat = scifloat
751
752 def set_include_hyperlinks(self, hyperlinks):
753 if not hyperlinks or not self.relationships or not self.relationships.relationships:
754 return
755 # we must read file first to get hyperlinks, but we don't wont to parse whole file
756 if not self.filedata:
757 self.filedata = self.filehandle.read()
758 data = str(self.filedata) # python3: convert byte buffer to string
759
760 # find worksheet tag, we need namespaces from it
761 start = data.find("<worksheet")
762 if start < 0:
763 return
764 end = data.find(">", start)
765 worksheet = data[start: end + 1]
766
767 # find hyperlinks part
768 start = data.find("<hyperlinks>")
769 if start < 0:
770 # hyperlinks not found
771 return
772 end = data.find("</hyperlinks>")
773 data = data[start: end + 13]
774
775 # parse hyperlinks
776 doc = minidom.parseString(worksheet + data + "</worksheet>").firstChild
777 if doc.namespaceURI:
778 hiperlinkNodes = doc.getElementsByTagNameNS(doc.namespaceURI, "hyperlink")
779 else:
780 hiperlinkNodes = doc.getElementsByTagName("hyperlink")
781 for hlink in hiperlinkNodes:
782 attrs = hlink._attrs
783 ref = rId = None
784 for k in attrs.keys():
785 if k == "ref":
786 ref = str(attrs[k].value)
787 if k.endswith(":id"):
788 rId = str(attrs[k].value)
789 if not ref or not rId:
790 continue
791 rel = self.relationships.relationships.get(rId)
792 if not rel:
793 continue
794 target = rel.get('target')
795 for cell in self._range(ref):
796 self.hyperlinks[cell] = target
797
798 def to_csv(self, writer):
799 self.writer = writer

Callers 1

_convertMethod · 0.95

Calls 1

_rangeMethod · 0.95

Tested by

no test coverage detected