A tree which handles HTML nodes. Designed to work with a python HTML parser, meaning that the current_node will be the most recently opened tag. When a tag is closed, the current_node moves up to the parent node.
| 52 | |
| 53 | |
| 54 | class HTMLTree: |
| 55 | """ |
| 56 | A tree which handles HTML nodes. Designed to work with a python HTML parser, |
| 57 | meaning that the current_node will be the most recently opened tag. When |
| 58 | a tag is closed, the current_node moves up to the parent node. |
| 59 | """ |
| 60 | |
| 61 | def __init__(self, doc): |
| 62 | self.doc = doc |
| 63 | self.head = StemNode() |
| 64 | self.current_node = self.head |
| 65 | self.unhandled_tags = [] |
| 66 | |
| 67 | def add_tag(self, tag, attrs=None, is_start=True): |
| 68 | if not self._doc_has_handler(tag, is_start): |
| 69 | self.unhandled_tags.append(tag) |
| 70 | return |
| 71 | |
| 72 | if is_start: |
| 73 | if tag == 'li': |
| 74 | node = LineItemNode(attrs) |
| 75 | else: |
| 76 | node = TagNode(tag, attrs) |
| 77 | self.current_node.add_child(node) |
| 78 | self.current_node = node |
| 79 | else: |
| 80 | self.current_node = self.current_node.parent |
| 81 | |
| 82 | def _doc_has_handler(self, tag, is_start): |
| 83 | if is_start: |
| 84 | handler_name = 'start_%s' % tag |
| 85 | else: |
| 86 | handler_name = 'end_%s' % tag |
| 87 | |
| 88 | return hasattr(self.doc.style, handler_name) |
| 89 | |
| 90 | def add_data(self, data): |
| 91 | self.current_node.add_child(DataNode(data)) |
| 92 | |
| 93 | def write(self): |
| 94 | self.head.write(self.doc) |
| 95 | |
| 96 | |
| 97 | class Node: |