| 35 | |
| 36 | |
| 37 | class HTML2Text(html.parser.HTMLParser): |
| 38 | def __init__( |
| 39 | self, |
| 40 | out: Optional[OutCallback] = None, |
| 41 | baseurl: str = "", |
| 42 | bodywidth: int = config.BODY_WIDTH, |
| 43 | ) -> None: |
| 44 | """ |
| 45 | Input parameters: |
| 46 | out: possible custom replacement for self.outtextf (which |
| 47 | appends lines of text). |
| 48 | baseurl: base URL of the document we process |
| 49 | """ |
| 50 | super().__init__(convert_charrefs=False) |
| 51 | |
| 52 | # Config options |
| 53 | self.split_next_td = False |
| 54 | self.td_count = 0 |
| 55 | self.table_start = False |
| 56 | self.unicode_snob = config.UNICODE_SNOB # covered in cli |
| 57 | |
| 58 | self.escape_snob = config.ESCAPE_SNOB # covered in cli |
| 59 | self.escape_backslash = config.ESCAPE_BACKSLASH # covered in cli |
| 60 | self.escape_dot = config.ESCAPE_DOT # covered in cli |
| 61 | self.escape_plus = config.ESCAPE_PLUS # covered in cli |
| 62 | self.escape_dash = config.ESCAPE_DASH # covered in cli |
| 63 | |
| 64 | self.links_each_paragraph = config.LINKS_EACH_PARAGRAPH |
| 65 | self.body_width = bodywidth # covered in cli |
| 66 | self.skip_internal_links = config.SKIP_INTERNAL_LINKS # covered in cli |
| 67 | self.inline_links = config.INLINE_LINKS # covered in cli |
| 68 | self.protect_links = config.PROTECT_LINKS # covered in cli |
| 69 | self.google_list_indent = config.GOOGLE_LIST_INDENT # covered in cli |
| 70 | self.ignore_links = config.IGNORE_ANCHORS # covered in cli |
| 71 | self.ignore_mailto_links = config.IGNORE_MAILTO_LINKS # covered in cli |
| 72 | self.ignore_images = config.IGNORE_IMAGES # covered in cli |
| 73 | self.images_as_html = config.IMAGES_AS_HTML # covered in cli |
| 74 | self.images_to_alt = config.IMAGES_TO_ALT # covered in cli |
| 75 | self.images_with_size = config.IMAGES_WITH_SIZE # covered in cli |
| 76 | self.ignore_emphasis = config.IGNORE_EMPHASIS # covered in cli |
| 77 | self.bypass_tables = config.BYPASS_TABLES # covered in cli |
| 78 | self.ignore_tables = config.IGNORE_TABLES # covered in cli |
| 79 | self.google_doc = False # covered in cli |
| 80 | self.ul_item_mark = "*" # covered in cli |
| 81 | self.emphasis_mark = "_" # covered in cli |
| 82 | self.strong_mark = "**" |
| 83 | self.single_line_break = config.SINGLE_LINE_BREAK # covered in cli |
| 84 | self.use_automatic_links = config.USE_AUTOMATIC_LINKS # covered in cli |
| 85 | self.hide_strikethrough = False # covered in cli |
| 86 | self.mark_code = config.MARK_CODE |
| 87 | self.wrap_list_items = config.WRAP_LIST_ITEMS # covered in cli |
| 88 | self.wrap_links = config.WRAP_LINKS # covered in cli |
| 89 | self.wrap_tables = config.WRAP_TABLES |
| 90 | self.pad_tables = config.PAD_TABLES # covered in cli |
| 91 | self.default_image_alt = config.DEFAULT_IMAGE_ALT # covered in cli |
| 92 | self.tag_callback = None |
| 93 | self.open_quote = config.OPEN_QUOTE # covered in cli |
| 94 | self.close_quote = config.CLOSE_QUOTE # covered in cli |