Very rudimentary auto HTML to Text mail body converter. Caveats: - This method doesn't check for correctness of the HTML document. - Links will be converted to "[text](url)" format. - List items ( ) are prefixed with "- ". - Indentation is stripped (both tabs and spaces). - Trailing spaces are p
(htmlDocument string)
| 30 | // - Trailing spaces are preserved. |
| 31 | // - Multiple consequence newlines are collapsed as one unless multiple <br> tags are used. |
| 32 | func html2Text(htmlDocument string) (string, error) { |
| 33 | doc, err := html.Parse(strings.NewReader(htmlDocument)) |
| 34 | if err != nil { |
| 35 | return "", err |
| 36 | } |
| 37 | |
| 38 | var builder strings.Builder |
| 39 | var canAddNewLine bool |
| 40 | |
| 41 | // see https://pkg.go.dev/golang.org/x/net/html#Parse |
| 42 | var f func(*html.Node, *strings.Builder) |
| 43 | f = func(n *html.Node, activeBuilder *strings.Builder) { |
| 44 | isLink := n.Type == html.ElementNode && n.Data == "a" |
| 45 | |
| 46 | if isLink { |
| 47 | var linkBuilder strings.Builder |
| 48 | activeBuilder = &linkBuilder |
| 49 | } else if activeBuilder == nil { |
| 50 | activeBuilder = &builder |
| 51 | } |
| 52 | |
| 53 | switch n.Type { |
| 54 | case html.TextNode: |
| 55 | txt := whitespaceRegex.ReplaceAllString(n.Data, " ") |
| 56 | |
| 57 | // the prev node has new line so it is safe to trim the indentation |
| 58 | if !canAddNewLine { |
| 59 | txt = strings.TrimLeft(txt, " ") |
| 60 | } |
| 61 | |
| 62 | if txt != "" { |
| 63 | activeBuilder.WriteString(txt) |
| 64 | canAddNewLine = true |
| 65 | } |
| 66 | case html.ElementNode: |
| 67 | if n.Data == "br" { |
| 68 | // always write new lines when <br> tag is used |
| 69 | activeBuilder.WriteString("\r\n") |
| 70 | canAddNewLine = false |
| 71 | } else if canAddNewLine && !list.ExistInSlice(n.Data, inlineTags) { |
| 72 | activeBuilder.WriteString("\r\n") |
| 73 | canAddNewLine = false |
| 74 | } |
| 75 | |
| 76 | // prefix list items with dash |
| 77 | if n.Data == "li" { |
| 78 | activeBuilder.WriteString("- ") |
| 79 | } |
| 80 | } |
| 81 | |
| 82 | for c := n.FirstChild; c != nil; c = c.NextSibling { |
| 83 | if c.Type != html.ElementNode || !list.ExistInSlice(c.Data, tagsToSkip) { |
| 84 | f(c, activeBuilder) |
| 85 | } |
| 86 | } |
| 87 | |
| 88 | // format links as [label](href) |
| 89 | if isLink { |
searching dependent graphs…