From 1cdc163276f3437645c0b2c01602e53fa2292c46 Mon Sep 17 00:00:00 2001 From: tobi <31960611+tsmethurst@users.noreply.github.com> Date: Thu, 26 May 2022 13:38:41 +0200 Subject: [PATCH] [performance] Don't retry/backoff invalid http requests that will never succeed (#609) * add httpguts (ew) * add ValidateRequest err wrapping logic * don't retry on unrecoverable errors * i am very clever --- go.mod | 2 +- go.sum | 2 + internal/httpclient/client.go | 8 + internal/httpclient/request.go | 63 ++++ internal/transport/transport.go | 11 +- vendor/golang.org/x/net/http/httpguts/guts.go | 50 +++ .../golang.org/x/net/http/httpguts/httplex.go | 352 ++++++++++++++++++ vendor/modules.txt | 3 +- 8 files changed, 487 insertions(+), 4 deletions(-) create mode 100644 internal/httpclient/request.go create mode 100644 vendor/golang.org/x/net/http/httpguts/guts.go create mode 100644 vendor/golang.org/x/net/http/httpguts/httplex.go diff --git a/go.mod b/go.mod index 4e5a288dc..13c743ae2 100644 --- a/go.mod +++ b/go.mod @@ -44,7 +44,7 @@ require ( github.com/uptrace/bun/dialect/sqlitedialect v1.1.3 github.com/wagslane/go-password-validator v0.3.0 golang.org/x/crypto v0.0.0-20220427172511-eb4f295cb31f - golang.org/x/net v0.0.0-20220425223048-2871e0cb64e4 + golang.org/x/net v0.0.0-20220524220425-1d687d428aca golang.org/x/oauth2 v0.0.0-20220411215720-9780585627b5 golang.org/x/text v0.3.7 gopkg.in/mcuadros/go-syslog.v2 v2.3.0 diff --git a/go.sum b/go.sum index 4e339bea0..4e1d80880 100644 --- a/go.sum +++ b/go.sum @@ -665,6 +665,8 @@ golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qx golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= golang.org/x/net v0.0.0-20220425223048-2871e0cb64e4 h1:HVyaeDAYux4pnY+D/SiwmLOR36ewZ4iGQIIrtnuCjFA= golang.org/x/net v0.0.0-20220425223048-2871e0cb64e4/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= +golang.org/x/net v0.0.0-20220524220425-1d687d428aca h1:xTaFYiPROfpPhqrfTIDXj0ri1SpfueYT951s4bAuDO8= +golang.org/x/net v0.0.0-20220524220425-1d687d428aca/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= diff --git a/internal/httpclient/client.go b/internal/httpclient/client.go index 1a1f5e53b..55b98bbb6 100644 --- a/internal/httpclient/client.go +++ b/internal/httpclient/client.go @@ -28,6 +28,9 @@ "time" ) +// ErrInvalidRequest is returned if a given HTTP request is invalid and cannot be performed. +var ErrInvalidRequest = errors.New("invalid http request") + // ErrReservedAddr is returned if a dialed address resolves to an IP within a blocked or reserved net. var ErrReservedAddr = errors.New("dial within blocked / reserved IP range") @@ -164,6 +167,11 @@ func (c *Client) Do(req *http.Request) (*http.Response, error) { defer func() { <-c.queue }() } + // Firstly, ensure this is a valid request + if err := ValidateRequest(req); err != nil { + return nil, err + } + // Perform the HTTP request rsp, err := c.client.Do(req) if err != nil { diff --git a/internal/httpclient/request.go b/internal/httpclient/request.go new file mode 100644 index 000000000..c58d0b565 --- /dev/null +++ b/internal/httpclient/request.go @@ -0,0 +1,63 @@ +/* + GoToSocial + Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . +*/ + +package httpclient + +import ( + "fmt" + "net/http" + "strings" + + "golang.org/x/net/http/httpguts" +) + +// ValidateRequest performs the same request validation logic found in the default +// net/http.Transport{}.roundTrip() function, but pulls it out into this separate +// function allowing validation errors to be wrapped under a single error type. +func ValidateRequest(r *http.Request) error { + switch { + case r.URL == nil: + return fmt.Errorf("%w: nil url", ErrInvalidRequest) + case r.Header == nil: + return fmt.Errorf("%w: nil header", ErrInvalidRequest) + case r.URL.Host == "": + return fmt.Errorf("%w: empty url host", ErrInvalidRequest) + case r.URL.Scheme != "http" && r.URL.Scheme != "https": + return fmt.Errorf("%w: unsupported protocol %q", ErrInvalidRequest, r.URL.Scheme) + case strings.IndexFunc(r.Method, func(r rune) bool { return !httpguts.IsTokenRune(r) }) != -1: + return fmt.Errorf("%w: invalid method %q", ErrInvalidRequest, r.Method) + } + + for key, values := range r.Header { + // Check field key name is valid + if !httpguts.ValidHeaderFieldName(key) { + return fmt.Errorf("%w: invalid header field name %q", ErrInvalidRequest, key) + } + + // Check each field value is valid + for i := 0; i < len(values); i++ { + if !httpguts.ValidHeaderFieldValue(values[i]) { + return fmt.Errorf("%w: invalid header field value %q", ErrInvalidRequest, values[i]) + } + } + } + + // ps. kim wrote this + + return nil +} diff --git a/internal/transport/transport.go b/internal/transport/transport.go index c52686c43..22dfbeb9a 100644 --- a/internal/transport/transport.go +++ b/internal/transport/transport.go @@ -35,6 +35,7 @@ "github.com/sirupsen/logrus" "github.com/superseriousbusiness/activity/pub" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" + "github.com/superseriousbusiness/gotosocial/internal/httpclient" ) // Transport wraps the pub.Transport interface with some additional functionality for fetching remote media. @@ -123,8 +124,14 @@ func (t *transport) do(r *http.Request, signer func(*http.Request) error, retryO // Generate error from status code for logging err = errors.New(`http response "` + rsp.Status + `"`) - } else if errorsv2.Is(err, context.DeadlineExceeded, context.Canceled) { - // Return early if context has cancelled + } else if errorsv2.Is(err, + context.DeadlineExceeded, + context.Canceled, + httpclient.ErrInvalidRequest, + httpclient.ErrBodyTooLarge, + httpclient.ErrReservedAddr, + ) { + // Return on non-retryable errors return nil, err } else if strings.Contains(err.Error(), "stopped after 10 redirects") { // Don't bother if net/http returned after too many redirects diff --git a/vendor/golang.org/x/net/http/httpguts/guts.go b/vendor/golang.org/x/net/http/httpguts/guts.go new file mode 100644 index 000000000..e6cd0ced3 --- /dev/null +++ b/vendor/golang.org/x/net/http/httpguts/guts.go @@ -0,0 +1,50 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package httpguts provides functions implementing various details +// of the HTTP specification. +// +// This package is shared by the standard library (which vendors it) +// and x/net/http2. It comes with no API stability promise. +package httpguts + +import ( + "net/textproto" + "strings" +) + +// ValidTrailerHeader reports whether name is a valid header field name to appear +// in trailers. +// See RFC 7230, Section 4.1.2 +func ValidTrailerHeader(name string) bool { + name = textproto.CanonicalMIMEHeaderKey(name) + if strings.HasPrefix(name, "If-") || badTrailer[name] { + return false + } + return true +} + +var badTrailer = map[string]bool{ + "Authorization": true, + "Cache-Control": true, + "Connection": true, + "Content-Encoding": true, + "Content-Length": true, + "Content-Range": true, + "Content-Type": true, + "Expect": true, + "Host": true, + "Keep-Alive": true, + "Max-Forwards": true, + "Pragma": true, + "Proxy-Authenticate": true, + "Proxy-Authorization": true, + "Proxy-Connection": true, + "Range": true, + "Realm": true, + "Te": true, + "Trailer": true, + "Transfer-Encoding": true, + "Www-Authenticate": true, +} diff --git a/vendor/golang.org/x/net/http/httpguts/httplex.go b/vendor/golang.org/x/net/http/httpguts/httplex.go new file mode 100644 index 000000000..6e071e852 --- /dev/null +++ b/vendor/golang.org/x/net/http/httpguts/httplex.go @@ -0,0 +1,352 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package httpguts + +import ( + "net" + "strings" + "unicode/utf8" + + "golang.org/x/net/idna" +) + +var isTokenTable = [127]bool{ + '!': true, + '#': true, + '$': true, + '%': true, + '&': true, + '\'': true, + '*': true, + '+': true, + '-': true, + '.': true, + '0': true, + '1': true, + '2': true, + '3': true, + '4': true, + '5': true, + '6': true, + '7': true, + '8': true, + '9': true, + 'A': true, + 'B': true, + 'C': true, + 'D': true, + 'E': true, + 'F': true, + 'G': true, + 'H': true, + 'I': true, + 'J': true, + 'K': true, + 'L': true, + 'M': true, + 'N': true, + 'O': true, + 'P': true, + 'Q': true, + 'R': true, + 'S': true, + 'T': true, + 'U': true, + 'W': true, + 'V': true, + 'X': true, + 'Y': true, + 'Z': true, + '^': true, + '_': true, + '`': true, + 'a': true, + 'b': true, + 'c': true, + 'd': true, + 'e': true, + 'f': true, + 'g': true, + 'h': true, + 'i': true, + 'j': true, + 'k': true, + 'l': true, + 'm': true, + 'n': true, + 'o': true, + 'p': true, + 'q': true, + 'r': true, + 's': true, + 't': true, + 'u': true, + 'v': true, + 'w': true, + 'x': true, + 'y': true, + 'z': true, + '|': true, + '~': true, +} + +func IsTokenRune(r rune) bool { + i := int(r) + return i < len(isTokenTable) && isTokenTable[i] +} + +func isNotToken(r rune) bool { + return !IsTokenRune(r) +} + +// HeaderValuesContainsToken reports whether any string in values +// contains the provided token, ASCII case-insensitively. +func HeaderValuesContainsToken(values []string, token string) bool { + for _, v := range values { + if headerValueContainsToken(v, token) { + return true + } + } + return false +} + +// isOWS reports whether b is an optional whitespace byte, as defined +// by RFC 7230 section 3.2.3. +func isOWS(b byte) bool { return b == ' ' || b == '\t' } + +// trimOWS returns x with all optional whitespace removes from the +// beginning and end. +func trimOWS(x string) string { + // TODO: consider using strings.Trim(x, " \t") instead, + // if and when it's fast enough. See issue 10292. + // But this ASCII-only code will probably always beat UTF-8 + // aware code. + for len(x) > 0 && isOWS(x[0]) { + x = x[1:] + } + for len(x) > 0 && isOWS(x[len(x)-1]) { + x = x[:len(x)-1] + } + return x +} + +// headerValueContainsToken reports whether v (assumed to be a +// 0#element, in the ABNF extension described in RFC 7230 section 7) +// contains token amongst its comma-separated tokens, ASCII +// case-insensitively. +func headerValueContainsToken(v string, token string) bool { + for comma := strings.IndexByte(v, ','); comma != -1; comma = strings.IndexByte(v, ',') { + if tokenEqual(trimOWS(v[:comma]), token) { + return true + } + v = v[comma+1:] + } + return tokenEqual(trimOWS(v), token) +} + +// lowerASCII returns the ASCII lowercase version of b. +func lowerASCII(b byte) byte { + if 'A' <= b && b <= 'Z' { + return b + ('a' - 'A') + } + return b +} + +// tokenEqual reports whether t1 and t2 are equal, ASCII case-insensitively. +func tokenEqual(t1, t2 string) bool { + if len(t1) != len(t2) { + return false + } + for i, b := range t1 { + if b >= utf8.RuneSelf { + // No UTF-8 or non-ASCII allowed in tokens. + return false + } + if lowerASCII(byte(b)) != lowerASCII(t2[i]) { + return false + } + } + return true +} + +// isLWS reports whether b is linear white space, according +// to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 +// +// LWS = [CRLF] 1*( SP | HT ) +func isLWS(b byte) bool { return b == ' ' || b == '\t' } + +// isCTL reports whether b is a control byte, according +// to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 +// +// CTL = +func isCTL(b byte) bool { + const del = 0x7f // a CTL + return b < ' ' || b == del +} + +// ValidHeaderFieldName reports whether v is a valid HTTP/1.x header name. +// HTTP/2 imposes the additional restriction that uppercase ASCII +// letters are not allowed. +// +// RFC 7230 says: +// +// header-field = field-name ":" OWS field-value OWS +// field-name = token +// token = 1*tchar +// tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / +// "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA +func ValidHeaderFieldName(v string) bool { + if len(v) == 0 { + return false + } + for _, r := range v { + if !IsTokenRune(r) { + return false + } + } + return true +} + +// ValidHostHeader reports whether h is a valid host header. +func ValidHostHeader(h string) bool { + // The latest spec is actually this: + // + // http://tools.ietf.org/html/rfc7230#section-5.4 + // Host = uri-host [ ":" port ] + // + // Where uri-host is: + // http://tools.ietf.org/html/rfc3986#section-3.2.2 + // + // But we're going to be much more lenient for now and just + // search for any byte that's not a valid byte in any of those + // expressions. + for i := 0; i < len(h); i++ { + if !validHostByte[h[i]] { + return false + } + } + return true +} + +// See the validHostHeader comment. +var validHostByte = [256]bool{ + '0': true, '1': true, '2': true, '3': true, '4': true, '5': true, '6': true, '7': true, + '8': true, '9': true, + + 'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 'g': true, 'h': true, + 'i': true, 'j': true, 'k': true, 'l': true, 'm': true, 'n': true, 'o': true, 'p': true, + 'q': true, 'r': true, 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true, + 'y': true, 'z': true, + + 'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 'G': true, 'H': true, + 'I': true, 'J': true, 'K': true, 'L': true, 'M': true, 'N': true, 'O': true, 'P': true, + 'Q': true, 'R': true, 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true, + 'Y': true, 'Z': true, + + '!': true, // sub-delims + '$': true, // sub-delims + '%': true, // pct-encoded (and used in IPv6 zones) + '&': true, // sub-delims + '(': true, // sub-delims + ')': true, // sub-delims + '*': true, // sub-delims + '+': true, // sub-delims + ',': true, // sub-delims + '-': true, // unreserved + '.': true, // unreserved + ':': true, // IPv6address + Host expression's optional port + ';': true, // sub-delims + '=': true, // sub-delims + '[': true, + '\'': true, // sub-delims + ']': true, + '_': true, // unreserved + '~': true, // unreserved +} + +// ValidHeaderFieldValue reports whether v is a valid "field-value" according to +// http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 : +// +// message-header = field-name ":" [ field-value ] +// field-value = *( field-content | LWS ) +// field-content = +// +// http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 : +// +// TEXT = +// LWS = [CRLF] 1*( SP | HT ) +// CTL = +// +// RFC 7230 says: +// +// field-value = *( field-content / obs-fold ) +// obj-fold = N/A to http2, and deprecated +// field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ] +// field-vchar = VCHAR / obs-text +// obs-text = %x80-FF +// VCHAR = "any visible [USASCII] character" +// +// http2 further says: "Similarly, HTTP/2 allows header field values +// that are not valid. While most of the values that can be encoded +// will not alter header field parsing, carriage return (CR, ASCII +// 0xd), line feed (LF, ASCII 0xa), and the zero character (NUL, ASCII +// 0x0) might be exploited by an attacker if they are translated +// verbatim. Any request or response that contains a character not +// permitted in a header field value MUST be treated as malformed +// (Section 8.1.2.6). Valid characters are defined by the +// field-content ABNF rule in Section 3.2 of [RFC7230]." +// +// This function does not (yet?) properly handle the rejection of +// strings that begin or end with SP or HTAB. +func ValidHeaderFieldValue(v string) bool { + for i := 0; i < len(v); i++ { + b := v[i] + if isCTL(b) && !isLWS(b) { + return false + } + } + return true +} + +func isASCII(s string) bool { + for i := 0; i < len(s); i++ { + if s[i] >= utf8.RuneSelf { + return false + } + } + return true +} + +// PunycodeHostPort returns the IDNA Punycode version +// of the provided "host" or "host:port" string. +func PunycodeHostPort(v string) (string, error) { + if isASCII(v) { + return v, nil + } + + host, port, err := net.SplitHostPort(v) + if err != nil { + // The input 'v' argument was just a "host" argument, + // without a port. This error should not be returned + // to the caller. + host = v + port = "" + } + host, err = idna.ToASCII(host) + if err != nil { + // Non-UTF-8? Not representable in Punycode, in any + // case. + return "", err + } + if port == "" { + return host, nil + } + return net.JoinHostPort(host, port), nil +} diff --git a/vendor/modules.txt b/vendor/modules.txt index a143bfa87..b8b794be8 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -566,12 +566,13 @@ golang.org/x/crypto/ssh/internal/bcrypt_pbkdf # golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4 ## explicit; go 1.17 golang.org/x/mod/semver -# golang.org/x/net v0.0.0-20220425223048-2871e0cb64e4 +# golang.org/x/net v0.0.0-20220524220425-1d687d428aca ## explicit; go 1.17 golang.org/x/net/context golang.org/x/net/context/ctxhttp golang.org/x/net/html golang.org/x/net/html/atom +golang.org/x/net/http/httpguts golang.org/x/net/idna # golang.org/x/oauth2 v0.0.0-20220411215720-9780585627b5 ## explicit; go 1.11