gotosocial/vendor/github.com/buger/jsonparser/escape.go

package jsonparser

import (
	"bytes"
	"unicode/utf8"
)

// JSON Unicode stuff: see https://tools.ietf.org/html/rfc7159#section-7

const supplementalPlanesOffset = 0x10000
const highSurrogateOffset = 0xD800
const lowSurrogateOffset = 0xDC00

const basicMultilingualPlaneReservedOffset = 0xDFFF
const basicMultilingualPlaneOffset = 0xFFFF

func combineUTF16Surrogates(high, low rune) rune {
	return supplementalPlanesOffset + (high-highSurrogateOffset)<<10 + (low - lowSurrogateOffset)
}

const badHex = -1

func h2I(c byte) int {
	switch {
	case c >= '0' && c <= '9':
		return int(c - '0')
	case c >= 'A' && c <= 'F':
		return int(c - 'A' + 10)
	case c >= 'a' && c <= 'f':
		return int(c - 'a' + 10)
	}
	return badHex
}

// decodeSingleUnicodeEscape decodes a single \uXXXX escape sequence. The prefix \u is assumed to be present and
// is not checked.
// In JSON, these escapes can either come alone or as part of "UTF16 surrogate pairs" that must be handled together.
// This function only handles one; decodeUnicodeEscape handles this more complex case.
func decodeSingleUnicodeEscape(in []byte) (rune, bool) {
	// We need at least 6 characters total
	if len(in) < 6 {
		return utf8.RuneError, false
	}

	// Convert hex to decimal
	h1, h2, h3, h4 := h2I(in[2]), h2I(in[3]), h2I(in[4]), h2I(in[5])
	if h1 == badHex || h2 == badHex || h3 == badHex || h4 == badHex {
		return utf8.RuneError, false
	}

	// Compose the hex digits
	return rune(h1<<12 + h2<<8 + h3<<4 + h4), true
}

// isUTF16EncodedRune checks if a rune is in the range for non-BMP characters,
// which is used to describe UTF16 chars.
// Source: https://en.wikipedia.org/wiki/Plane_(Unicode)#Basic_Multilingual_Plane
func isUTF16EncodedRune(r rune) bool {
	return highSurrogateOffset <= r && r <= basicMultilingualPlaneReservedOffset
}

func decodeUnicodeEscape(in []byte) (rune, int) {
	if r, ok := decodeSingleUnicodeEscape(in); !ok {
		// Invalid Unicode escape
		return utf8.RuneError, -1
	} else if r <= basicMultilingualPlaneOffset && !isUTF16EncodedRune(r) {
		// Valid Unicode escape in Basic Multilingual Plane
		return r, 6
	} else if r2, ok := decodeSingleUnicodeEscape(in[6:]); !ok { // Note: previous decodeSingleUnicodeEscape success guarantees at least 6 bytes remain
		// UTF16 "high surrogate" without manditory valid following Unicode escape for the "low surrogate"
		return utf8.RuneError, -1
	} else if r2 < lowSurrogateOffset {
		// Invalid UTF16 "low surrogate"
		return utf8.RuneError, -1
	} else {
		// Valid UTF16 surrogate pair
		return combineUTF16Surrogates(r, r2), 12
	}
}

// backslashCharEscapeTable: when '\X' is found for some byte X, it is to be replaced with backslashCharEscapeTable[X]
var backslashCharEscapeTable = [...]byte{
	'"':  '"',
	'\\': '\\',
	'/':  '/',
	'b':  '\b',
	'f':  '\f',
	'n':  '\n',
	'r':  '\r',
	't':  '\t',
}

// unescapeToUTF8 unescapes the single escape sequence starting at 'in' into 'out' and returns
// how many characters were consumed from 'in' and emitted into 'out'.
// If a valid escape sequence does not appear as a prefix of 'in', (-1, -1) to signal the error.
func unescapeToUTF8(in, out []byte) (inLen int, outLen int) {
	if len(in) < 2 || in[0] != '\\' {
		// Invalid escape due to insufficient characters for any escape or no initial backslash
		return -1, -1
	}

	// https://tools.ietf.org/html/rfc7159#section-7
	switch e := in[1]; e {
	case '"', '\\', '/', 'b', 'f', 'n', 'r', 't':
		// Valid basic 2-character escapes (use lookup table)
		out[0] = backslashCharEscapeTable[e]
		return 2, 1
	case 'u':
		// Unicode escape
		if r, inLen := decodeUnicodeEscape(in); inLen == -1 {
			// Invalid Unicode escape
			return -1, -1
		} else {
			// Valid Unicode escape; re-encode as UTF8
			outLen := utf8.EncodeRune(out, r)
			return inLen, outLen
		}
	}

	return -1, -1
}

// unescape unescapes the string contained in 'in' and returns it as a slice.
// If 'in' contains no escaped characters:
//   Returns 'in'.
// Else, if 'out' is of sufficient capacity (guaranteed if cap(out) >= len(in)):
//   'out' is used to build the unescaped string and is returned with no extra allocation
// Else:
//   A new slice is allocated and returned.
func Unescape(in, out []byte) ([]byte, error) {
	firstBackslash := bytes.IndexByte(in, '\\')
	if firstBackslash == -1 {
		return in, nil
	}

	// Get a buffer of sufficient size (allocate if needed)
	if cap(out) < len(in) {
		out = make([]byte, len(in))
	} else {
		out = out[0:len(in)]
	}

	// Copy the first sequence of unescaped bytes to the output and obtain a buffer pointer (subslice)
	copy(out, in[:firstBackslash])
	in = in[firstBackslash:]
	buf := out[firstBackslash:]

	for len(in) > 0 {
		// Unescape the next escaped character
		inLen, bufLen := unescapeToUTF8(in, buf)
		if inLen == -1 {
			return nil, MalformedStringEscapeError
		}

		in = in[inLen:]
		buf = buf[bufLen:]

		// Copy everything up until the next backslash
		nextBackslash := bytes.IndexByte(in, '\\')
		if nextBackslash == -1 {
			copy(buf, in)
			buf = buf[len(in):]
			break
		} else {
			copy(buf, in[:nextBackslash])
			buf = buf[nextBackslash:]
			in = in[nextBackslash:]
		}
	}

	// Trim the out buffer to the amount that was actually emitted
	return out[:len(out)-len(buf)], nil
}
bumps uptrace/bun dependencies to v1.2.6 (#3569) 2024-11-25 15:42:37 +00:00			`package jsonparser`

			`import (`
			`"bytes"`
			`"unicode/utf8"`
			`)`

			`// JSON Unicode stuff: see https://tools.ietf.org/html/rfc7159#section-7`

			`const supplementalPlanesOffset = 0x10000`
			`const highSurrogateOffset = 0xD800`
			`const lowSurrogateOffset = 0xDC00`

			`const basicMultilingualPlaneReservedOffset = 0xDFFF`
			`const basicMultilingualPlaneOffset = 0xFFFF`

			`func combineUTF16Surrogates(high, low rune) rune {`
			`return supplementalPlanesOffset + (high-highSurrogateOffset)<<10 + (low - lowSurrogateOffset)`
			`}`

			`const badHex = -1`

			`func h2I(c byte) int {`
			`switch {`
			`case c >= '0' && c <= '9':`
			`return int(c - '0')`
			`case c >= 'A' && c <= 'F':`
			`return int(c - 'A' + 10)`
			`case c >= 'a' && c <= 'f':`
			`return int(c - 'a' + 10)`
			`}`
			`return badHex`
			`}`

			`// decodeSingleUnicodeEscape decodes a single \uXXXX escape sequence. The prefix \u is assumed to be present and`
			`// is not checked.`
			`// In JSON, these escapes can either come alone or as part of "UTF16 surrogate pairs" that must be handled together.`
			`// This function only handles one; decodeUnicodeEscape handles this more complex case.`
			`func decodeSingleUnicodeEscape(in []byte) (rune, bool) {`
			`// We need at least 6 characters total`
			`if len(in) < 6 {`
			`return utf8.RuneError, false`
			`}`

			`// Convert hex to decimal`
			`h1, h2, h3, h4 := h2I(in[2]), h2I(in[3]), h2I(in[4]), h2I(in[5])`
			`if h1 == badHex \|\| h2 == badHex \|\| h3 == badHex \|\| h4 == badHex {`
			`return utf8.RuneError, false`
			`}`

			`// Compose the hex digits`
			`return rune(h1<<12 + h2<<8 + h3<<4 + h4), true`
			`}`

			`// isUTF16EncodedRune checks if a rune is in the range for non-BMP characters,`
			`// which is used to describe UTF16 chars.`
			`// Source: https://en.wikipedia.org/wiki/Plane_(Unicode)#Basic_Multilingual_Plane`
			`func isUTF16EncodedRune(r rune) bool {`
			`return highSurrogateOffset <= r && r <= basicMultilingualPlaneReservedOffset`
			`}`

			`func decodeUnicodeEscape(in []byte) (rune, int) {`
			`if r, ok := decodeSingleUnicodeEscape(in); !ok {`
			`// Invalid Unicode escape`
			`return utf8.RuneError, -1`
			`} else if r <= basicMultilingualPlaneOffset && !isUTF16EncodedRune(r) {`
			`// Valid Unicode escape in Basic Multilingual Plane`
			`return r, 6`
			`} else if r2, ok := decodeSingleUnicodeEscape(in[6:]); !ok { // Note: previous decodeSingleUnicodeEscape success guarantees at least 6 bytes remain`
			`// UTF16 "high surrogate" without manditory valid following Unicode escape for the "low surrogate"`
			`return utf8.RuneError, -1`
			`} else if r2 < lowSurrogateOffset {`
			`// Invalid UTF16 "low surrogate"`
			`return utf8.RuneError, -1`
			`} else {`
			`// Valid UTF16 surrogate pair`
			`return combineUTF16Surrogates(r, r2), 12`
			`}`
			`}`

			`// backslashCharEscapeTable: when '\X' is found for some byte X, it is to be replaced with backslashCharEscapeTable[X]`
			`var backslashCharEscapeTable = [...]byte{`
			`'"': '"',`
			`'\\': '\\',`
			`'/': '/',`
			`'b': '\b',`
			`'f': '\f',`
			`'n': '\n',`
			`'r': '\r',`
			`'t': '\t',`
			`}`

			`// unescapeToUTF8 unescapes the single escape sequence starting at 'in' into 'out' and returns`
			`// how many characters were consumed from 'in' and emitted into 'out'.`
			`// If a valid escape sequence does not appear as a prefix of 'in', (-1, -1) to signal the error.`
			`func unescapeToUTF8(in, out []byte) (inLen int, outLen int) {`
			`if len(in) < 2 \|\| in[0] != '\\' {`
			`// Invalid escape due to insufficient characters for any escape or no initial backslash`
			`return -1, -1`
			`}`

			`// https://tools.ietf.org/html/rfc7159#section-7`
			`switch e := in[1]; e {`
			`case '"', '\\', '/', 'b', 'f', 'n', 'r', 't':`
			`// Valid basic 2-character escapes (use lookup table)`
			`out[0] = backslashCharEscapeTable[e]`
			`return 2, 1`
			`case 'u':`
			`// Unicode escape`
			`if r, inLen := decodeUnicodeEscape(in); inLen == -1 {`
			`// Invalid Unicode escape`
			`return -1, -1`
			`} else {`
			`// Valid Unicode escape; re-encode as UTF8`
			`outLen := utf8.EncodeRune(out, r)`
			`return inLen, outLen`
			`}`
			`}`

			`return -1, -1`
			`}`

			`// unescape unescapes the string contained in 'in' and returns it as a slice.`
			`// If 'in' contains no escaped characters:`
			`// Returns 'in'.`
			`// Else, if 'out' is of sufficient capacity (guaranteed if cap(out) >= len(in)):`
			`// 'out' is used to build the unescaped string and is returned with no extra allocation`
			`// Else:`
			`// A new slice is allocated and returned.`
			`func Unescape(in, out []byte) ([]byte, error) {`
			`firstBackslash := bytes.IndexByte(in, '\\')`
			`if firstBackslash == -1 {`
			`return in, nil`
			`}`

			`// Get a buffer of sufficient size (allocate if needed)`
			`if cap(out) < len(in) {`
			`out = make([]byte, len(in))`
			`} else {`
			`out = out[0:len(in)]`
			`}`

			`// Copy the first sequence of unescaped bytes to the output and obtain a buffer pointer (subslice)`
			`copy(out, in[:firstBackslash])`
			`in = in[firstBackslash:]`
			`buf := out[firstBackslash:]`

			`for len(in) > 0 {`
			`// Unescape the next escaped character`
			`inLen, bufLen := unescapeToUTF8(in, buf)`
			`if inLen == -1 {`
			`return nil, MalformedStringEscapeError`
			`}`

			`in = in[inLen:]`
			`buf = buf[bufLen:]`

			`// Copy everything up until the next backslash`
			`nextBackslash := bytes.IndexByte(in, '\\')`
			`if nextBackslash == -1 {`
			`copy(buf, in)`
			`buf = buf[len(in):]`
			`break`
			`} else {`
			`copy(buf, in[:nextBackslash])`
			`buf = buf[nextBackslash:]`
			`in = in[nextBackslash:]`
			`}`
			`}`

			`// Trim the out buffer to the amount that was actually emitted`
			`return out[:len(out)-len(buf)], nil`
			`}`