MCPcopy
hub / github.com/pocketbase/pocketbase / html2Text

Function html2Text

tools/mailer/html2text.go:32–118  ·  view source on GitHub ↗

Very rudimentary auto HTML to Text mail body converter. Caveats: - This method doesn't check for correctness of the HTML document. - Links will be converted to "[text](url)" format. - List items ( ) are prefixed with "- ". - Indentation is stripped (both tabs and spaces). - Trailing spaces are p

(htmlDocument string)

Source from the content-addressed store, hash-verified

30// - Trailing spaces are preserved.
31// - Multiple consequence newlines are collapsed as one unless multiple <br> tags are used.
32func html2Text(htmlDocument string) (string, error) {
33 doc, err := html.Parse(strings.NewReader(htmlDocument))
34 if err != nil {
35 return "", err
36 }
37
38 var builder strings.Builder
39 var canAddNewLine bool
40
41 // see https://pkg.go.dev/golang.org/x/net/html#Parse
42 var f func(*html.Node, *strings.Builder)
43 f = func(n *html.Node, activeBuilder *strings.Builder) {
44 isLink := n.Type == html.ElementNode && n.Data == "a"
45
46 if isLink {
47 var linkBuilder strings.Builder
48 activeBuilder = &linkBuilder
49 } else if activeBuilder == nil {
50 activeBuilder = &builder
51 }
52
53 switch n.Type {
54 case html.TextNode:
55 txt := whitespaceRegex.ReplaceAllString(n.Data, " ")
56
57 // the prev node has new line so it is safe to trim the indentation
58 if !canAddNewLine {
59 txt = strings.TrimLeft(txt, " ")
60 }
61
62 if txt != "" {
63 activeBuilder.WriteString(txt)
64 canAddNewLine = true
65 }
66 case html.ElementNode:
67 if n.Data == "br" {
68 // always write new lines when <br> tag is used
69 activeBuilder.WriteString("\r\n")
70 canAddNewLine = false
71 } else if canAddNewLine && !list.ExistInSlice(n.Data, inlineTags) {
72 activeBuilder.WriteString("\r\n")
73 canAddNewLine = false
74 }
75
76 // prefix list items with dash
77 if n.Data == "li" {
78 activeBuilder.WriteString("- ")
79 }
80 }
81
82 for c := n.FirstChild; c != nil; c = c.NextSibling {
83 if c.Type != html.ElementNode || !list.ExistInSlice(c.Data, tagsToSkip) {
84 f(c, activeBuilder)
85 }
86 }
87
88 // format links as [label](href)
89 if isLink {

Callers 2

sendMethod · 0.85
TestHTML2TextFunction · 0.85

Calls 6

ExistInSliceFunction · 0.92
ParseMethod · 0.80
NewReaderMethod · 0.80
fFunction · 0.50
StringMethod · 0.45
ResetMethod · 0.45

Tested by 1

TestHTML2TextFunction · 0.68

Used in the wild real call sites across dependent graphs

searching dependent graphs…