mirror of
https://github.com/superseriousbusiness/gotosocial.git
synced 2025-01-15 19:10:14 +00:00
353 lines
8.8 KiB
Go
353 lines
8.8 KiB
Go
|
// Copyright 2016 The Go Authors. All rights reserved.
|
||
|
// Use of this source code is governed by a BSD-style
|
||
|
// license that can be found in the LICENSE file.
|
||
|
|
||
|
package httpguts
|
||
|
|
||
|
import (
|
||
|
"net"
|
||
|
"strings"
|
||
|
"unicode/utf8"
|
||
|
|
||
|
"golang.org/x/net/idna"
|
||
|
)
|
||
|
|
||
|
var isTokenTable = [127]bool{
|
||
|
'!': true,
|
||
|
'#': true,
|
||
|
'$': true,
|
||
|
'%': true,
|
||
|
'&': true,
|
||
|
'\'': true,
|
||
|
'*': true,
|
||
|
'+': true,
|
||
|
'-': true,
|
||
|
'.': true,
|
||
|
'0': true,
|
||
|
'1': true,
|
||
|
'2': true,
|
||
|
'3': true,
|
||
|
'4': true,
|
||
|
'5': true,
|
||
|
'6': true,
|
||
|
'7': true,
|
||
|
'8': true,
|
||
|
'9': true,
|
||
|
'A': true,
|
||
|
'B': true,
|
||
|
'C': true,
|
||
|
'D': true,
|
||
|
'E': true,
|
||
|
'F': true,
|
||
|
'G': true,
|
||
|
'H': true,
|
||
|
'I': true,
|
||
|
'J': true,
|
||
|
'K': true,
|
||
|
'L': true,
|
||
|
'M': true,
|
||
|
'N': true,
|
||
|
'O': true,
|
||
|
'P': true,
|
||
|
'Q': true,
|
||
|
'R': true,
|
||
|
'S': true,
|
||
|
'T': true,
|
||
|
'U': true,
|
||
|
'W': true,
|
||
|
'V': true,
|
||
|
'X': true,
|
||
|
'Y': true,
|
||
|
'Z': true,
|
||
|
'^': true,
|
||
|
'_': true,
|
||
|
'`': true,
|
||
|
'a': true,
|
||
|
'b': true,
|
||
|
'c': true,
|
||
|
'd': true,
|
||
|
'e': true,
|
||
|
'f': true,
|
||
|
'g': true,
|
||
|
'h': true,
|
||
|
'i': true,
|
||
|
'j': true,
|
||
|
'k': true,
|
||
|
'l': true,
|
||
|
'm': true,
|
||
|
'n': true,
|
||
|
'o': true,
|
||
|
'p': true,
|
||
|
'q': true,
|
||
|
'r': true,
|
||
|
's': true,
|
||
|
't': true,
|
||
|
'u': true,
|
||
|
'v': true,
|
||
|
'w': true,
|
||
|
'x': true,
|
||
|
'y': true,
|
||
|
'z': true,
|
||
|
'|': true,
|
||
|
'~': true,
|
||
|
}
|
||
|
|
||
|
func IsTokenRune(r rune) bool {
|
||
|
i := int(r)
|
||
|
return i < len(isTokenTable) && isTokenTable[i]
|
||
|
}
|
||
|
|
||
|
func isNotToken(r rune) bool {
|
||
|
return !IsTokenRune(r)
|
||
|
}
|
||
|
|
||
|
// HeaderValuesContainsToken reports whether any string in values
|
||
|
// contains the provided token, ASCII case-insensitively.
|
||
|
func HeaderValuesContainsToken(values []string, token string) bool {
|
||
|
for _, v := range values {
|
||
|
if headerValueContainsToken(v, token) {
|
||
|
return true
|
||
|
}
|
||
|
}
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
// isOWS reports whether b is an optional whitespace byte, as defined
|
||
|
// by RFC 7230 section 3.2.3.
|
||
|
func isOWS(b byte) bool { return b == ' ' || b == '\t' }
|
||
|
|
||
|
// trimOWS returns x with all optional whitespace removes from the
|
||
|
// beginning and end.
|
||
|
func trimOWS(x string) string {
|
||
|
// TODO: consider using strings.Trim(x, " \t") instead,
|
||
|
// if and when it's fast enough. See issue 10292.
|
||
|
// But this ASCII-only code will probably always beat UTF-8
|
||
|
// aware code.
|
||
|
for len(x) > 0 && isOWS(x[0]) {
|
||
|
x = x[1:]
|
||
|
}
|
||
|
for len(x) > 0 && isOWS(x[len(x)-1]) {
|
||
|
x = x[:len(x)-1]
|
||
|
}
|
||
|
return x
|
||
|
}
|
||
|
|
||
|
// headerValueContainsToken reports whether v (assumed to be a
|
||
|
// 0#element, in the ABNF extension described in RFC 7230 section 7)
|
||
|
// contains token amongst its comma-separated tokens, ASCII
|
||
|
// case-insensitively.
|
||
|
func headerValueContainsToken(v string, token string) bool {
|
||
|
for comma := strings.IndexByte(v, ','); comma != -1; comma = strings.IndexByte(v, ',') {
|
||
|
if tokenEqual(trimOWS(v[:comma]), token) {
|
||
|
return true
|
||
|
}
|
||
|
v = v[comma+1:]
|
||
|
}
|
||
|
return tokenEqual(trimOWS(v), token)
|
||
|
}
|
||
|
|
||
|
// lowerASCII returns the ASCII lowercase version of b.
|
||
|
func lowerASCII(b byte) byte {
|
||
|
if 'A' <= b && b <= 'Z' {
|
||
|
return b + ('a' - 'A')
|
||
|
}
|
||
|
return b
|
||
|
}
|
||
|
|
||
|
// tokenEqual reports whether t1 and t2 are equal, ASCII case-insensitively.
|
||
|
func tokenEqual(t1, t2 string) bool {
|
||
|
if len(t1) != len(t2) {
|
||
|
return false
|
||
|
}
|
||
|
for i, b := range t1 {
|
||
|
if b >= utf8.RuneSelf {
|
||
|
// No UTF-8 or non-ASCII allowed in tokens.
|
||
|
return false
|
||
|
}
|
||
|
if lowerASCII(byte(b)) != lowerASCII(t2[i]) {
|
||
|
return false
|
||
|
}
|
||
|
}
|
||
|
return true
|
||
|
}
|
||
|
|
||
|
// isLWS reports whether b is linear white space, according
|
||
|
// to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
|
||
|
//
|
||
|
// LWS = [CRLF] 1*( SP | HT )
|
||
|
func isLWS(b byte) bool { return b == ' ' || b == '\t' }
|
||
|
|
||
|
// isCTL reports whether b is a control byte, according
|
||
|
// to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
|
||
|
//
|
||
|
// CTL = <any US-ASCII control character
|
||
|
// (octets 0 - 31) and DEL (127)>
|
||
|
func isCTL(b byte) bool {
|
||
|
const del = 0x7f // a CTL
|
||
|
return b < ' ' || b == del
|
||
|
}
|
||
|
|
||
|
// ValidHeaderFieldName reports whether v is a valid HTTP/1.x header name.
|
||
|
// HTTP/2 imposes the additional restriction that uppercase ASCII
|
||
|
// letters are not allowed.
|
||
|
//
|
||
|
// RFC 7230 says:
|
||
|
//
|
||
|
// header-field = field-name ":" OWS field-value OWS
|
||
|
// field-name = token
|
||
|
// token = 1*tchar
|
||
|
// tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
|
||
|
// "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
|
||
|
func ValidHeaderFieldName(v string) bool {
|
||
|
if len(v) == 0 {
|
||
|
return false
|
||
|
}
|
||
|
for _, r := range v {
|
||
|
if !IsTokenRune(r) {
|
||
|
return false
|
||
|
}
|
||
|
}
|
||
|
return true
|
||
|
}
|
||
|
|
||
|
// ValidHostHeader reports whether h is a valid host header.
|
||
|
func ValidHostHeader(h string) bool {
|
||
|
// The latest spec is actually this:
|
||
|
//
|
||
|
// http://tools.ietf.org/html/rfc7230#section-5.4
|
||
|
// Host = uri-host [ ":" port ]
|
||
|
//
|
||
|
// Where uri-host is:
|
||
|
// http://tools.ietf.org/html/rfc3986#section-3.2.2
|
||
|
//
|
||
|
// But we're going to be much more lenient for now and just
|
||
|
// search for any byte that's not a valid byte in any of those
|
||
|
// expressions.
|
||
|
for i := 0; i < len(h); i++ {
|
||
|
if !validHostByte[h[i]] {
|
||
|
return false
|
||
|
}
|
||
|
}
|
||
|
return true
|
||
|
}
|
||
|
|
||
|
// See the validHostHeader comment.
|
||
|
var validHostByte = [256]bool{
|
||
|
'0': true, '1': true, '2': true, '3': true, '4': true, '5': true, '6': true, '7': true,
|
||
|
'8': true, '9': true,
|
||
|
|
||
|
'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 'g': true, 'h': true,
|
||
|
'i': true, 'j': true, 'k': true, 'l': true, 'm': true, 'n': true, 'o': true, 'p': true,
|
||
|
'q': true, 'r': true, 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true,
|
||
|
'y': true, 'z': true,
|
||
|
|
||
|
'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 'G': true, 'H': true,
|
||
|
'I': true, 'J': true, 'K': true, 'L': true, 'M': true, 'N': true, 'O': true, 'P': true,
|
||
|
'Q': true, 'R': true, 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true,
|
||
|
'Y': true, 'Z': true,
|
||
|
|
||
|
'!': true, // sub-delims
|
||
|
'$': true, // sub-delims
|
||
|
'%': true, // pct-encoded (and used in IPv6 zones)
|
||
|
'&': true, // sub-delims
|
||
|
'(': true, // sub-delims
|
||
|
')': true, // sub-delims
|
||
|
'*': true, // sub-delims
|
||
|
'+': true, // sub-delims
|
||
|
',': true, // sub-delims
|
||
|
'-': true, // unreserved
|
||
|
'.': true, // unreserved
|
||
|
':': true, // IPv6address + Host expression's optional port
|
||
|
';': true, // sub-delims
|
||
|
'=': true, // sub-delims
|
||
|
'[': true,
|
||
|
'\'': true, // sub-delims
|
||
|
']': true,
|
||
|
'_': true, // unreserved
|
||
|
'~': true, // unreserved
|
||
|
}
|
||
|
|
||
|
// ValidHeaderFieldValue reports whether v is a valid "field-value" according to
|
||
|
// http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 :
|
||
|
//
|
||
|
// message-header = field-name ":" [ field-value ]
|
||
|
// field-value = *( field-content | LWS )
|
||
|
// field-content = <the OCTETs making up the field-value
|
||
|
// and consisting of either *TEXT or combinations
|
||
|
// of token, separators, and quoted-string>
|
||
|
//
|
||
|
// http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 :
|
||
|
//
|
||
|
// TEXT = <any OCTET except CTLs,
|
||
|
// but including LWS>
|
||
|
// LWS = [CRLF] 1*( SP | HT )
|
||
|
// CTL = <any US-ASCII control character
|
||
|
// (octets 0 - 31) and DEL (127)>
|
||
|
//
|
||
|
// RFC 7230 says:
|
||
|
//
|
||
|
// field-value = *( field-content / obs-fold )
|
||
|
// obj-fold = N/A to http2, and deprecated
|
||
|
// field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
|
||
|
// field-vchar = VCHAR / obs-text
|
||
|
// obs-text = %x80-FF
|
||
|
// VCHAR = "any visible [USASCII] character"
|
||
|
//
|
||
|
// http2 further says: "Similarly, HTTP/2 allows header field values
|
||
|
// that are not valid. While most of the values that can be encoded
|
||
|
// will not alter header field parsing, carriage return (CR, ASCII
|
||
|
// 0xd), line feed (LF, ASCII 0xa), and the zero character (NUL, ASCII
|
||
|
// 0x0) might be exploited by an attacker if they are translated
|
||
|
// verbatim. Any request or response that contains a character not
|
||
|
// permitted in a header field value MUST be treated as malformed
|
||
|
// (Section 8.1.2.6). Valid characters are defined by the
|
||
|
// field-content ABNF rule in Section 3.2 of [RFC7230]."
|
||
|
//
|
||
|
// This function does not (yet?) properly handle the rejection of
|
||
|
// strings that begin or end with SP or HTAB.
|
||
|
func ValidHeaderFieldValue(v string) bool {
|
||
|
for i := 0; i < len(v); i++ {
|
||
|
b := v[i]
|
||
|
if isCTL(b) && !isLWS(b) {
|
||
|
return false
|
||
|
}
|
||
|
}
|
||
|
return true
|
||
|
}
|
||
|
|
||
|
func isASCII(s string) bool {
|
||
|
for i := 0; i < len(s); i++ {
|
||
|
if s[i] >= utf8.RuneSelf {
|
||
|
return false
|
||
|
}
|
||
|
}
|
||
|
return true
|
||
|
}
|
||
|
|
||
|
// PunycodeHostPort returns the IDNA Punycode version
|
||
|
// of the provided "host" or "host:port" string.
|
||
|
func PunycodeHostPort(v string) (string, error) {
|
||
|
if isASCII(v) {
|
||
|
return v, nil
|
||
|
}
|
||
|
|
||
|
host, port, err := net.SplitHostPort(v)
|
||
|
if err != nil {
|
||
|
// The input 'v' argument was just a "host" argument,
|
||
|
// without a port. This error should not be returned
|
||
|
// to the caller.
|
||
|
host = v
|
||
|
port = ""
|
||
|
}
|
||
|
host, err = idna.ToASCII(host)
|
||
|
if err != nil {
|
||
|
// Non-UTF-8? Not representable in Punycode, in any
|
||
|
// case.
|
||
|
return "", err
|
||
|
}
|
||
|
if port == "" {
|
||
|
return host, nil
|
||
|
}
|
||
|
return net.JoinHostPort(host, port), nil
|
||
|
}
|