// Copyright 2021 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package pkgbits import ( "encoding/binary" "errors" "fmt" "go/constant" "go/token" "io" "math/big" "os" "runtime" "strings" ) // A PkgDecoder provides methods for decoding a package's Unified IR // export data. type PkgDecoder struct { // version is the file format version. version uint32 // sync indicates whether the file uses sync markers. sync bool // pkgPath is the package path for the package to be decoded. // // TODO(mdempsky): Remove; unneeded since CL 391014. pkgPath string // elemData is the full data payload of the encoded package. // Elements are densely and contiguously packed together. // // The last 8 bytes of elemData are the package fingerprint. elemData string // elemEnds stores the byte-offset end positions of element // bitstreams within elemData. // // For example, element I's bitstream data starts at elemEnds[I-1] // (or 0, if I==0) and ends at elemEnds[I]. // // Note: elemEnds is indexed by absolute indices, not // section-relative indices. elemEnds []uint32 // elemEndsEnds stores the index-offset end positions of relocation // sections within elemEnds. // // For example, section K's end positions start at elemEndsEnds[K-1] // (or 0, if K==0) and end at elemEndsEnds[K]. elemEndsEnds [numRelocs]uint32 scratchRelocEnt []RelocEnt } // PkgPath returns the package path for the package // // TODO(mdempsky): Remove; unneeded since CL 391014. func (pr *PkgDecoder) PkgPath() string { return pr.pkgPath } // SyncMarkers reports whether pr uses sync markers. func (pr *PkgDecoder) SyncMarkers() bool { return pr.sync } // NewPkgDecoder returns a PkgDecoder initialized to read the Unified // IR export data from input. pkgPath is the package path for the // compilation unit that produced the export data. // // TODO(mdempsky): Remove pkgPath parameter; unneeded since CL 391014. func NewPkgDecoder(pkgPath, input string) PkgDecoder { pr := PkgDecoder{ pkgPath: pkgPath, } // TODO(mdempsky): Implement direct indexing of input string to // avoid copying the position information. r := strings.NewReader(input) assert(binary.Read(r, binary.LittleEndian, &pr.version) == nil) switch pr.version { default: panic(fmt.Errorf("unsupported version: %v", pr.version)) case 0: // no flags case 1: var flags uint32 assert(binary.Read(r, binary.LittleEndian, &flags) == nil) pr.sync = flags&flagSyncMarkers != 0 } assert(binary.Read(r, binary.LittleEndian, pr.elemEndsEnds[:]) == nil) pr.elemEnds = make([]uint32, pr.elemEndsEnds[len(pr.elemEndsEnds)-1]) assert(binary.Read(r, binary.LittleEndian, pr.elemEnds[:]) == nil) pos, err := r.Seek(0, io.SeekCurrent) assert(err == nil) pr.elemData = input[pos:] assert(len(pr.elemData)-8 == int(pr.elemEnds[len(pr.elemEnds)-1])) return pr } // NumElems returns the number of elements in section k. func (pr *PkgDecoder) NumElems(k RelocKind) int { count := int(pr.elemEndsEnds[k]) if k > 0 { count -= int(pr.elemEndsEnds[k-1]) } return count } // TotalElems returns the total number of elements across all sections. func (pr *PkgDecoder) TotalElems() int { return len(pr.elemEnds) } // Fingerprint returns the package fingerprint. func (pr *PkgDecoder) Fingerprint() [8]byte { var fp [8]byte copy(fp[:], pr.elemData[len(pr.elemData)-8:]) return fp } // AbsIdx returns the absolute index for the given (section, index) // pair. func (pr *PkgDecoder) AbsIdx(k RelocKind, idx Index) int { absIdx := int(idx) if k > 0 { absIdx += int(pr.elemEndsEnds[k-1]) } if absIdx >= int(pr.elemEndsEnds[k]) { errorf("%v:%v is out of bounds; %v", k, idx, pr.elemEndsEnds) } return absIdx } // DataIdx returns the raw element bitstream for the given (section, // index) pair. func (pr *PkgDecoder) DataIdx(k RelocKind, idx Index) string { absIdx := pr.AbsIdx(k, idx) var start uint32 if absIdx > 0 { start = pr.elemEnds[absIdx-1] } end := pr.elemEnds[absIdx] return pr.elemData[start:end] } // StringIdx returns the string value for the given string index. func (pr *PkgDecoder) StringIdx(idx Index) string { return pr.DataIdx(RelocString, idx) } // NewDecoder returns a Decoder for the given (section, index) pair, // and decodes the given SyncMarker from the element bitstream. func (pr *PkgDecoder) NewDecoder(k RelocKind, idx Index, marker SyncMarker) Decoder { r := pr.NewDecoderRaw(k, idx) r.Sync(marker) return r } // TempDecoder returns a Decoder for the given (section, index) pair, // and decodes the given SyncMarker from the element bitstream. // If possible the Decoder should be RetireDecoder'd when it is no longer // needed, this will avoid heap allocations. func (pr *PkgDecoder) TempDecoder(k RelocKind, idx Index, marker SyncMarker) Decoder { r := pr.TempDecoderRaw(k, idx) r.Sync(marker) return r } func (pr *PkgDecoder) RetireDecoder(d *Decoder) { pr.scratchRelocEnt = d.Relocs d.Relocs = nil } // NewDecoderRaw returns a Decoder for the given (section, index) pair. // // Most callers should use NewDecoder instead. func (pr *PkgDecoder) NewDecoderRaw(k RelocKind, idx Index) Decoder { r := Decoder{ common: pr, k: k, Idx: idx, } // TODO(mdempsky) r.data.Reset(...) after #44505 is resolved. r.Data = *strings.NewReader(pr.DataIdx(k, idx)) r.Sync(SyncRelocs) r.Relocs = make([]RelocEnt, r.Len()) for i := range r.Relocs { r.Sync(SyncReloc) r.Relocs[i] = RelocEnt{RelocKind(r.Len()), Index(r.Len())} } return r } func (pr *PkgDecoder) TempDecoderRaw(k RelocKind, idx Index) Decoder { r := Decoder{ common: pr, k: k, Idx: idx, } r.Data.Reset(pr.DataIdx(k, idx)) r.Sync(SyncRelocs) l := r.Len() if cap(pr.scratchRelocEnt) >= l { r.Relocs = pr.scratchRelocEnt[:l] pr.scratchRelocEnt = nil } else { r.Relocs = make([]RelocEnt, l) } for i := range r.Relocs { r.Sync(SyncReloc) r.Relocs[i] = RelocEnt{RelocKind(r.Len()), Index(r.Len())} } return r } // A Decoder provides methods for decoding an individual element's // bitstream data. type Decoder struct { common *PkgDecoder Relocs []RelocEnt Data strings.Reader k RelocKind Idx Index } func (r *Decoder) checkErr(err error) { if err != nil { errorf("unexpected decoding error: %w", err) } } func (r *Decoder) rawUvarint() uint64 { x, err := readUvarint(&r.Data) r.checkErr(err) return x } // readUvarint is a type-specialized copy of encoding/binary.ReadUvarint. // This avoids the interface conversion and thus has better escape properties, // which flows up the stack. func readUvarint(r *strings.Reader) (uint64, error) { var x uint64 var s uint for i := 0; i < binary.MaxVarintLen64; i++ { b, err := r.ReadByte() if err != nil { if i > 0 && err == io.EOF { err = io.ErrUnexpectedEOF } return x, err } if b < 0x80 { if i == binary.MaxVarintLen64-1 && b > 1 { return x, overflow } return x | uint64(b)<<s, nil } x |= uint64(b&0x7f) << s s += 7 } return x, overflow } var overflow = errors.New("pkgbits: readUvarint overflows a 64-bit integer") func (r *Decoder) rawVarint() int64 { ux := r.rawUvarint() // Zig-zag decode. x := int64(ux >> 1) if ux&1 != 0 { x = ^x } return x } func (r *Decoder) rawReloc(k RelocKind, idx int) Index { e := r.Relocs[idx] assert(e.Kind == k) return e.Idx } // Sync decodes a sync marker from the element bitstream and asserts // that it matches the expected marker. // // If r.common.sync is false, then Sync is a no-op. func (r *Decoder) Sync(mWant SyncMarker) { if !r.common.sync { return } pos, _ := r.Data.Seek(0, io.SeekCurrent) mHave := SyncMarker(r.rawUvarint()) writerPCs := make([]int, r.rawUvarint()) for i := range writerPCs { writerPCs[i] = int(r.rawUvarint()) } if mHave == mWant { return } // There's some tension here between printing: // // (1) full file paths that tools can recognize (e.g., so emacs // hyperlinks the "file:line" text for easy navigation), or // // (2) short file paths that are easier for humans to read (e.g., by // omitting redundant or irrelevant details, so it's easier to // focus on the useful bits that remain). // // The current formatting favors the former, as it seems more // helpful in practice. But perhaps the formatting could be improved // to better address both concerns. For example, use relative file // paths if they would be shorter, or rewrite file paths to contain // "$GOROOT" (like objabi.AbsFile does) if tools can be taught how // to reliably expand that again. fmt.Printf("export data desync: package %q, section %v, index %v, offset %v\n", r.common.pkgPath, r.k, r.Idx, pos) fmt.Printf("\nfound %v, written at:\n", mHave) if len(writerPCs) == 0 { fmt.Printf("\t[stack trace unavailable; recompile package %q with -d=syncframes]\n", r.common.pkgPath) } for _, pc := range writerPCs { fmt.Printf("\t%s\n", r.common.StringIdx(r.rawReloc(RelocString, pc))) } fmt.Printf("\nexpected %v, reading at:\n", mWant) var readerPCs [32]uintptr // TODO(mdempsky): Dynamically size? n := runtime.Callers(2, readerPCs[:]) for _, pc := range fmtFrames(readerPCs[:n]...) { fmt.Printf("\t%s\n", pc) } // We already printed a stack trace for the reader, so now we can // simply exit. Printing a second one with panic or base.Fatalf // would just be noise. os.Exit(1) } // Bool decodes and returns a bool value from the element bitstream. func (r *Decoder) Bool() bool { r.Sync(SyncBool) x, err := r.Data.ReadByte() r.checkErr(err) assert(x < 2) return x != 0 } // Int64 decodes and returns an int64 value from the element bitstream. func (r *Decoder) Int64() int64 { r.Sync(SyncInt64) return r.rawVarint() } // Uint64 decodes and returns a uint64 value from the element bitstream. func (r *Decoder) Uint64() uint64 { r.Sync(SyncUint64) return r.rawUvarint() } // Len decodes and returns a non-negative int value from the element bitstream. func (r *Decoder) Len() int { x := r.Uint64(); v := int(x); assert(uint64(v) == x); return v } // Int decodes and returns an int value from the element bitstream. func (r *Decoder) Int() int { x := r.Int64(); v := int(x); assert(int64(v) == x); return v } // Uint decodes and returns a uint value from the element bitstream. func (r *Decoder) Uint() uint { x := r.Uint64(); v := uint(x); assert(uint64(v) == x); return v } // Code decodes a Code value from the element bitstream and returns // its ordinal value. It's the caller's responsibility to convert the // result to an appropriate Code type. // // TODO(mdempsky): Ideally this method would have signature "Code[T // Code] T" instead, but we don't allow generic methods and the // compiler can't depend on generics yet anyway. func (r *Decoder) Code(mark SyncMarker) int { r.Sync(mark) return r.Len() } // Reloc decodes a relocation of expected section k from the element // bitstream and returns an index to the referenced element. func (r *Decoder) Reloc(k RelocKind) Index { r.Sync(SyncUseReloc) return r.rawReloc(k, r.Len()) } // String decodes and returns a string value from the element // bitstream. func (r *Decoder) String() string { r.Sync(SyncString) return r.common.StringIdx(r.Reloc(RelocString)) } // Strings decodes and returns a variable-length slice of strings from // the element bitstream. func (r *Decoder) Strings() []string { res := make([]string, r.Len()) for i := range res { res[i] = r.String() } return res } // Value decodes and returns a constant.Value from the element // bitstream. func (r *Decoder) Value() constant.Value { r.Sync(SyncValue) isComplex := r.Bool() val := r.scalar() if isComplex { val = constant.BinaryOp(val, token.ADD, constant.MakeImag(r.scalar())) } return val } func (r *Decoder) scalar() constant.Value { switch tag := CodeVal(r.Code(SyncVal)); tag { default: panic(fmt.Errorf("unexpected scalar tag: %v", tag)) case ValBool: return constant.MakeBool(r.Bool()) case ValString: return constant.MakeString(r.String()) case ValInt64: return constant.MakeInt64(r.Int64()) case ValBigInt: return constant.Make(r.bigInt()) case ValBigRat: num := r.bigInt() denom := r.bigInt() return constant.Make(new(big.Rat).SetFrac(num, denom)) case ValBigFloat: return constant.Make(r.bigFloat()) } } func (r *Decoder) bigInt() *big.Int { v := new(big.Int).SetBytes([]byte(r.String())) if r.Bool() { v.Neg(v) } return v } func (r *Decoder) bigFloat() *big.Float { v := new(big.Float).SetPrec(512) assert(v.UnmarshalText([]byte(r.String())) == nil) return v } // @@@ Helpers // TODO(mdempsky): These should probably be removed. I think they're a // smell that the export data format is not yet quite right. // PeekPkgPath returns the package path for the specified package // index. func (pr *PkgDecoder) PeekPkgPath(idx Index) string { var path string { r := pr.TempDecoder(RelocPkg, idx, SyncPkgDef) path = r.String() pr.RetireDecoder(&r) } if path == "" { path = pr.pkgPath } return path } // PeekObj returns the package path, object name, and CodeObj for the // specified object index. func (pr *PkgDecoder) PeekObj(idx Index) (string, string, CodeObj) { var ridx Index var name string var rcode int { r := pr.TempDecoder(RelocName, idx, SyncObject1) r.Sync(SyncSym) r.Sync(SyncPkg) ridx = r.Reloc(RelocPkg) name = r.String() rcode = r.Code(SyncCodeObj) pr.RetireDecoder(&r) } path := pr.PeekPkgPath(ridx) assert(name != "") tag := CodeObj(rcode) return path, name, tag }