// Copyright 2011 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package html import ( "bufio" "errors" "fmt" "io" "strings" ) type writer interface { io.Writer io.ByteWriter WriteString(string) (int, error) } // Render renders the parse tree n to the given writer. // // Rendering is done on a 'best effort' basis: calling Parse on the output of // Render will always result in something similar to the original tree, but it // is not necessarily an exact clone unless the original tree was 'well-formed'. // 'Well-formed' is not easily specified; the HTML5 specification is // complicated. // // Calling Parse on arbitrary input typically results in a 'well-formed' parse // tree. However, it is possible for Parse to yield a 'badly-formed' parse tree. // For example, in a 'well-formed' parse tree, no <a> element is a child of // another <a> element: parsing "<a><a>" results in two sibling elements. // Similarly, in a 'well-formed' parse tree, no <a> element is a child of a // <table> element: parsing "<p><table><a>" results in a <p> with two sibling // children; the <a> is reparented to the <table>'s parent. However, calling // Parse on "<a><table><a>" does not return an error, but the result has an <a> // element with an <a> child, and is therefore not 'well-formed'. // // Programmatically constructed trees are typically also 'well-formed', but it // is possible to construct a tree that looks innocuous but, when rendered and // re-parsed, results in a different tree. A simple example is that a solitary // text node would become a tree containing <html>, <head> and <body> elements. // Another example is that the programmatic equivalent of "a<head>b</head>c" // becomes "<html><head><head/><body>abc</body></html>". func Render(w io.Writer, n *Node) error { if x, ok := w.(writer); ok { return render(x, n) } buf := bufio.NewWriter(w) if err := render(buf, n); err != nil { return err } return buf.Flush() } // plaintextAbort is returned from render1 when a <plaintext> element // has been rendered. No more end tags should be rendered after that. var plaintextAbort = errors.New("html: internal error (plaintext abort)") func render(w writer, n *Node) error { err := render1(w, n) if err == plaintextAbort { err = nil } return err } func render1(w writer, n *Node) error { // Render non-element nodes; these are the easy cases. switch n.Type { case ErrorNode: return errors.New("html: cannot render an ErrorNode node") case TextNode: return escape(w, n.Data) case DocumentNode: for c := n.FirstChild; c != nil; c = c.NextSibling { if err := render1(w, c); err != nil { return err } } return nil case ElementNode: // No-op. case CommentNode: if _, err := w.WriteString("<!--"); err != nil { return err } if err := escapeComment(w, n.Data); err != nil { return err } if _, err := w.WriteString("-->"); err != nil { return err } return nil case DoctypeNode: if _, err := w.WriteString("<!DOCTYPE "); err != nil { return err } if err := escape(w, n.Data); err != nil { return err } if n.Attr != nil { var p, s string for _, a := range n.Attr { switch a.Key { case "public": p = a.Val case "system": s = a.Val } } if p != "" { if _, err := w.WriteString(" PUBLIC "); err != nil { return err } if err := writeQuoted(w, p); err != nil { return err } if s != "" { if err := w.WriteByte(' '); err != nil { return err } if err := writeQuoted(w, s); err != nil { return err } } } else if s != "" { if _, err := w.WriteString(" SYSTEM "); err != nil { return err } if err := writeQuoted(w, s); err != nil { return err } } } return w.WriteByte('>') case RawNode: _, err := w.WriteString(n.Data) return err default: return errors.New("html: unknown node type") } // Render the <xxx> opening tag. if err := w.WriteByte('<'); err != nil { return err } if _, err := w.WriteString(n.Data); err != nil { return err } for _, a := range n.Attr { if err := w.WriteByte(' '); err != nil { return err } if a.Namespace != "" { if _, err := w.WriteString(a.Namespace); err != nil { return err } if err := w.WriteByte(':'); err != nil { return err } } if _, err := w.WriteString(a.Key); err != nil { return err } if _, err := w.WriteString(`="`); err != nil { return err } if err := escape(w, a.Val); err != nil { return err } if err := w.WriteByte('"'); err != nil { return err } } if voidElements[n.Data] { if n.FirstChild != nil { return fmt.Errorf("html: void element <%s> has child nodes", n.Data) } _, err := w.WriteString("/>") return err } if err := w.WriteByte('>'); err != nil { return err } // Add initial newline where there is danger of a newline beging ignored. if c := n.FirstChild; c != nil && c.Type == TextNode && strings.HasPrefix(c.Data, "\n") { switch n.Data { case "pre", "listing", "textarea": if err := w.WriteByte('\n'); err != nil { return err } } } // Render any child nodes if childTextNodesAreLiteral(n) { for c := n.FirstChild; c != nil; c = c.NextSibling { if c.Type == TextNode { if _, err := w.WriteString(c.Data); err != nil { return err } } else { if err := render1(w, c); err != nil { return err } } } if n.Data == "plaintext" { // Don't render anything else. <plaintext> must be the // last element in the file, with no closing tag. return plaintextAbort } } else { for c := n.FirstChild; c != nil; c = c.NextSibling { if err := render1(w, c); err != nil { return err } } } // Render the </xxx> closing tag. if _, err := w.WriteString("</"); err != nil { return err } if _, err := w.WriteString(n.Data); err != nil { return err } return w.WriteByte('>') } func childTextNodesAreLiteral(n *Node) bool { // Per WHATWG HTML 13.3, if the parent of the current node is a style, // script, xmp, iframe, noembed, noframes, or plaintext element, and the // current node is a text node, append the value of the node's data // literally. The specification is not explicit about it, but we only // enforce this if we are in the HTML namespace (i.e. when the namespace is // ""). // NOTE: we also always include noscript elements, although the // specification states that they should only be rendered as such if // scripting is enabled for the node (which is not something we track). if n.Namespace != "" { return false } switch n.Data { case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp": return true default: return false } } // writeQuoted writes s to w surrounded by quotes. Normally it will use double // quotes, but if s contains a double quote, it will use single quotes. // It is used for writing the identifiers in a doctype declaration. // In valid HTML, they can't contain both types of quotes. func writeQuoted(w writer, s string) error { var q byte = '"' if strings.Contains(s, `"`) { q = '\'' } if err := w.WriteByte(q); err != nil { return err } if _, err := w.WriteString(s); err != nil { return err } if err := w.WriteByte(q); err != nil { return err } return nil } // Section 12.1.2, "Elements", gives this list of void elements. Void elements // are those that can't have any contents. var voidElements = map[string]bool{ "area": true, "base": true, "br": true, "col": true, "embed": true, "hr": true, "img": true, "input": true, "keygen": true, // "keygen" has been removed from the spec, but are kept here for backwards compatibility. "link": true, "meta": true, "param": true, "source": true, "track": true, "wbr": true, }