after a new line created by previous
canPrintNewline := false outBuf := bytes.NewBufferString("") for i, r := range html { if inLen > 0 && i == inLen-1 { // prevent new line at the end of the document canPrintNewline = false } switch { // skip new lines and spaces adding a single space if not there yet case r <= 0xD, r == 0x85, r == 0x2028, r == 0x2029, // new lines r == ' ', r >= 0x2008 && r <= 0x200B: // spaces if shouldOutput && badTagStackDepth == 0 && !inEnt { //outBuf.WriteString(fmt.Sprintf("{DBG r:%c, inEnt:%t, tag:%s}", r, inEnt, html[tagStart:i])) writeSpace(outBuf) } continue case r == ';' && inEnt: // end of html entity inEnt = false continue case r == '&' && shouldOutput: // possible html entity entName := "" isEnt := false // parse the entity name - max 10 chars chars := 0 for _, er := range html[i+1:] { if er == ';' { isEnt = true break } else { entName += string(er) } chars++ if chars == 10 { break } } if isEnt { if ent, isEnt := parseHTMLEntity(entName); isEnt { outBuf.WriteString(ent) inEnt = true continue } } case r == '<': // start of a tag tagStart = i + 1 shouldOutput = false continue case r == '>': // end of a tag shouldOutput = true tag := html[tagStart:i] tagNameLowercase := strings.ToLower(tag) if tagNameLowercase == "/ul" || tagNameLowercase == "/ol" { outBuf.WriteString(opts.lbr) } else if tagNameLowercase == "li" || tagNameLowercase == "li/" { if opts.listPrefix != "" { outBuf.WriteString(opts.lbr + opts.listPrefix) } else { outBuf.WriteString(opts.lbr) } } else if headersRE.MatchString(tagNameLowercase) { if canPrintNewline { outBuf.WriteString(opts.lbr + opts.lbr) } canPrintNewline = false } else if tagNameLowercase == "br" || tagNameLowercase == "br/" { // new line outBuf.WriteString(opts.lbr) } else if tagNameLowercase == "p" || tagNameLowercase == "/p" { if canPrintNewline { outBuf.WriteString(opts.lbr + opts.lbr) } canPrintNewline = false } else if opts.linksInnerText && tagNameLowercase == "/a" { // end of link // links can be empty can happen if the link matches the badLinkHrefRE if len(hrefs) > 0 { outBuf.WriteString(" <") outBuf.WriteString(HTMLEntitiesToText(hrefs[0])) outBuf.WriteString(">") hrefs = hrefs[1:] } } else if opts.linksInnerText && linkTagRE.MatchString(tagNameLowercase) { // parse link href // add special handling for a tags m := linkTagRE.FindStringSubmatch(tag) if len(m) == 5 { link := m[2] if len(link) == 0 { link = m[3] if len(link) == 0 { link = m[4] } } if opts.linksInnerText && !badLinkHrefRE.MatchString(link) { hrefs = append(hrefs, link) } } } else if badTagnamesRE.MatchString(tagNameLowercase) { // unwanted block badTagStackDepth++ // if link inner text preservation is not enabled // and the current tag is a link tag, parse its href and output that if !opts.linksInnerText { // parse link href m := linkTagRE.FindStringSubmatch(tag) if len(m) == 5 { link := m[2] if len(link) == 0 { link = m[3] if len(link) == 0 { link = m[4] } } if !badLinkHrefRE.MatchString(link) { outBuf.WriteString(HTMLEntitiesToText(link)) } } } } else if len(tagNameLowercase) > 0 && tagNameLowercase[0] == '/' && badTagnamesRE.MatchString(tagNameLowercase[1:]) { // end of unwanted block badTagStackDepth-- } continue } // switch end if shouldOutput && badTagStackDepth == 0 && !inEnt { canPrintNewline = true outBuf.WriteRune(r) } } return outBuf.String() }