gotosocial/vendor/github.com/bytedance/sonic/internal/decoder/optdec/native.go

270 lines
6.4 KiB
Go
Raw Normal View History

package optdec
import (
"fmt"
"reflect"
"unsafe"
"sync"
"github.com/bytedance/sonic/internal/native"
"github.com/bytedance/sonic/internal/native/types"
"github.com/bytedance/sonic/internal/rt"
"github.com/bytedance/sonic/utf8"
)
type ErrorCode int
const (
SONIC_OK = 0;
SONIC_CONTROL_CHAR = 1;
SONIC_INVALID_ESCAPED = 2;
SONIC_INVALID_NUM = 3;
SONIC_FLOAT_INF = 4;
SONIC_EOF = 5;
SONIC_INVALID_CHAR = 6;
SONIC_EXPECT_KEY = 7;
SONIC_EXPECT_COLON = 8;
SONIC_EXPECT_OBJ_COMMA_OR_END = 9;
SONIC_EXPECT_ARR_COMMA_OR_END = 10;
SONIC_VISIT_FAILED = 11;
SONIC_INVALID_ESCAPED_UTF = 12;
SONIC_INVALID_LITERAL = 13;
SONIC_STACK_OVERFLOW = 14;
)
var ParsingErrors = []string{
SONIC_OK : "ok",
SONIC_CONTROL_CHAR : "control chars in string",
SONIC_INVALID_ESCAPED : "invalid escaped chars in string",
SONIC_INVALID_NUM : "invalid number",
SONIC_FLOAT_INF : "float infinity",
SONIC_EOF : "eof",
SONIC_INVALID_CHAR : "invalid chars",
SONIC_EXPECT_KEY : "expect a json key",
SONIC_EXPECT_COLON : "expect a `:`",
SONIC_EXPECT_OBJ_COMMA_OR_END : "expect a `,` or `}`",
SONIC_EXPECT_ARR_COMMA_OR_END : "expect a `,` or `]`",
SONIC_VISIT_FAILED : "failed in json visitor",
SONIC_INVALID_ESCAPED_UTF : "invalid escaped unicodes",
SONIC_INVALID_LITERAL : "invalid literal(true/false/null)",
SONIC_STACK_OVERFLOW : "json is exceeded max depth 4096, cause stack overflow",
}
func (code ErrorCode) Error() string {
return ParsingErrors[code]
}
type node struct {
typ uint64
val uint64
}
// should consitent with native/parser.c
type _nospaceBlock struct {
_ [8]byte
_ [8]byte
}
// should consitent with native/parser.c
type nodeBuf struct {
ncur uintptr
parent int64
depth uint64
nstart uintptr
nend uintptr
stat jsonStat
}
func (self *nodeBuf) init(nodes []node) {
self.ncur = uintptr(unsafe.Pointer(&nodes[0]))
self.nstart = self.ncur
self.nend = self.ncur + uintptr(cap(nodes)) * unsafe.Sizeof(node{})
self.parent = -1
}
// should consitent with native/parser.c
type Parser struct {
Json string
padded []byte
nodes []node
dbuf []byte
backup []node
options uint64
// JSON cursor
start uintptr
cur uintptr
end uintptr
_nbk _nospaceBlock
// node buffer cursor
nbuf nodeBuf
Utf8Inv bool
isEface bool
}
// only when parse non-empty object/array are needed.
type jsonStat struct {
object uint32
array uint32
str uint32
number uint32
array_elems uint32
object_keys uint32
max_depth uint32
}
var (
defaultJsonPaddedCap uintptr = 1 << 20 // 1 Mb
defaultNodesCap uintptr = (1 << 20) / unsafe.Sizeof(node{}) // 1 Mb
)
var parsePool sync.Pool = sync.Pool {
New: func () interface{} {
return &Parser{
options: 0,
padded: make([]byte, 0, defaultJsonPaddedCap),
nodes: make([]node, defaultNodesCap, defaultNodesCap),
dbuf: make([]byte, types.MaxDigitNums, types.MaxDigitNums),
}
},
}
var padding string = "x\"x\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
func newParser(data string, pos int, opt uint64) *Parser {
p := parsePool.Get().(*Parser)
/* validate json if needed */
if (opt & (1 << _F_validate_string)) != 0 && !utf8.ValidateString(data){
dbuf := utf8.CorrectWith(nil, rt.Str2Mem(data[pos:]), "\ufffd")
dbuf = append(dbuf, padding...)
p.Json = rt.Mem2Str(dbuf[:len(dbuf) - len(padding)])
p.Utf8Inv = true
p.start = uintptr((*rt.GoString)(unsafe.Pointer(&p.Json)).Ptr)
} else {
p.Json = data
// TODO: prevent too large JSON
p.padded = append(p.padded, data[pos:]...)
p.padded = append(p.padded, padding...)
p.start = uintptr((*rt.GoSlice)(unsafe.Pointer(&p.padded)).Ptr)
}
p.cur = p.start
p.end = p.cur + uintptr(len(p.Json))
p.options = opt
p.nbuf.init(p.nodes)
return p
}
func (p *Parser) Pos() int {
return int(p.cur - p.start)
}
func (p *Parser) JsonBytes() []byte {
if p.Utf8Inv {
return (rt.Str2Mem(p.Json))
} else {
return p.padded
}
}
var nodeType = rt.UnpackType(reflect.TypeOf(node{}))
//go:inline
func calMaxNodeCap(jsonSize int) int {
return jsonSize / 2 + 2
}
func (p *Parser) parse() ErrorCode {
// when decode into struct, we should decode number as possible
old := p.options
if !p.isEface {
p.options &^= 1 << _F_use_number
}
// fast path with limited node buffer
err := ErrorCode(native.ParseWithPadding(unsafe.Pointer(p)))
if err != SONIC_VISIT_FAILED {
p.options = old
return err
}
// check OoB here
offset := p.nbuf.ncur - p.nbuf.nstart
curLen := offset / unsafe.Sizeof(node{})
if curLen != uintptr(len(p.nodes)) {
panic(fmt.Sprintf("current len: %d, real len: %d cap: %d", curLen, len(p.nodes), cap(p.nodes)))
}
// node buf is not enough, continue parse
// the maxCap is always meet all valid JSON
maxCap := calMaxNodeCap(len(p.Json))
slice := rt.GoSlice{
Ptr: rt.Mallocgc(uintptr(maxCap) * nodeType.Size, nodeType, false),
Len: maxCap,
Cap: maxCap,
}
rt.Memmove(unsafe.Pointer(slice.Ptr), unsafe.Pointer(&p.nodes[0]), offset)
p.backup = p.nodes
p.nodes = *(*[]node)(unsafe.Pointer(&slice))
// update node cursor
p.nbuf.nstart = uintptr(unsafe.Pointer(&p.nodes[0]))
p.nbuf.nend = p.nbuf.nstart + uintptr(cap(p.nodes)) * unsafe.Sizeof(node{})
p.nbuf.ncur = p.nbuf.nstart + offset
// continue parse json
err = ErrorCode(native.ParseWithPadding(unsafe.Pointer(p)))
p.options = old
return err
}
func (p *Parser) reset() {
p.options = 0
p.padded = p.padded[:0]
// nodes is too large here, we will not reset it and use small backup nodes buffer
if p.backup != nil {
p.nodes = p.backup
p.backup = nil
}
p.start = 0
p.cur = 0
p.end = 0
p.Json = ""
p.nbuf = nodeBuf{}
p._nbk = _nospaceBlock{}
p.Utf8Inv = false
p.isEface = false
}
func (p *Parser) free() {
p.reset()
parsePool.Put(p)
}
//go:noinline
func (p *Parser) fixError(code ErrorCode) error {
if code == SONIC_OK {
return nil
}
if p.Pos() == 0 {
code = SONIC_EOF;
}
pos := p.Pos() - 1
return error_syntax(pos, p.Json, ParsingErrors[code])
}
func Parse(data string, opt uint64) error {
p := newParser(data, 0, opt)
err := p.parse()
p.free()
return err
}