package html2text import ( "bytes" "regexp" "strconv" "strings" ) // Line break constants // Deprecated: Please use HTML2TextWithOptions(text, WithUnixLineBreak()) const ( WIN_LBR = "\r\n" UNIX_LBR = "\n" ) var legacyLBR = WIN_LBR var badTagnamesRE = regexp.MustCompile(`^(head|script|style|a)($|\s+)`) var linkTagRE = regexp.MustCompile(`^(?i:a)(?:$|\s).*(?i:href)\s*=\s*('([^']*?)'|"([^"]*?)"|([^\s"'` + "`" + `=<>]+))`) var badLinkHrefRE = regexp.MustCompile(`javascript:`) var headersRE = regexp.MustCompile(`^(\/)?h[1-6]`) var numericEntityRE = regexp.MustCompile(`(?i)^#(x?[a-f0-9]+)$`) type options struct { lbr string linksInnerText bool listPrefix string } func newOptions() *options { // apply defaults return &options{ lbr: WIN_LBR, } } // Option is a functional option type Option func(*options) // WithUnixLineBreaks instructs the converter to use unix line breaks ("\n" instead of "\r\n" default) func WithUnixLineBreaks() Option { return func(o *options) { o.lbr = UNIX_LBR } } // WithLinksInnerText instructs the converter to retain link tag inner text and append href URLs in angle brackets after the text // Example: click news func WithLinksInnerText() Option { return func(o *options) { o.linksInnerText = true } } // WithListSupportPrefix formats