[chore]: Bump github.com/KimMachineGun/automemlimit from 0.6.1 to 0.7.0 (#3726)

Bumps [github.com/KimMachineGun/automemlimit](https://github.com/KimMachineGun/automemlimit) from 0.6.1 to 0.7.0.
- [Release notes](https://github.com/KimMachineGun/automemlimit/releases)
- [Commits](https://github.com/KimMachineGun/automemlimit/compare/v0.6.1...v0.7.0)

---
updated-dependencies:
- dependency-name: github.com/KimMachineGun/automemlimit
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
This commit is contained in:
dependabot[bot] 2025-02-03 10:12:35 +00:00 committed by GitHub
parent a6d852d1c5
commit c086d4048c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
234 changed files with 529 additions and 43700 deletions

9
go.mod
View file

@ -43,7 +43,7 @@ require (
codeberg.org/gruf/go-structr v0.8.11
codeberg.org/superseriousbusiness/exif-terminator v0.9.1
github.com/DmitriyVTitov/size v1.5.0
github.com/KimMachineGun/automemlimit v0.6.1
github.com/KimMachineGun/automemlimit v0.7.0
github.com/SherClockHolmes/webpush-go v1.4.0
github.com/buckket/go-blurhash v1.1.0
github.com/coreos/go-oidc/v3 v3.12.0
@ -118,12 +118,8 @@ require (
github.com/bytedance/sonic/loader v0.2.2 // indirect
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/cilium/ebpf v0.9.1 // indirect
github.com/cloudwego/base64x v0.1.4 // indirect
github.com/containerd/cgroups/v3 v3.0.1 // indirect
github.com/coreos/go-systemd/v22 v22.3.2 // indirect
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
github.com/docker/go-units v0.5.0 // indirect
github.com/dsoprea/go-exif/v3 v3.0.0-20210625224831-a6301f85c82b // indirect
github.com/dsoprea/go-iptc v0.0.0-20200609062250-162ae6b44feb // indirect
github.com/dsoprea/go-logging v0.0.0-20200710184922-b02d349568dd // indirect
@ -156,7 +152,6 @@ require (
github.com/go-playground/validator/v10 v10.24.0 // indirect
github.com/go-xmlfmt/xmlfmt v0.0.0-20191208150333-d5b6f63a941b // indirect
github.com/goccy/go-json v0.10.4 // indirect
github.com/godbus/dbus/v5 v5.0.4 // indirect
github.com/golang-jwt/jwt v3.2.2+incompatible // indirect
github.com/golang-jwt/jwt/v5 v5.2.1 // indirect
github.com/golang/geo v0.0.0-20200319012246-673a6f80352d // indirect
@ -194,7 +189,6 @@ require (
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/ncruces/go-strftime v0.1.9 // indirect
github.com/ncruces/julianday v1.0.0 // indirect
github.com/opencontainers/runtime-spec v1.0.2 // indirect
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 // indirect
github.com/pelletier/go-toml/v2 v2.2.3 // indirect
github.com/pkg/errors v0.9.1 // indirect
@ -210,7 +204,6 @@ require (
github.com/sagikazarmark/locafero v0.4.0 // indirect
github.com/sagikazarmark/slog-shim v0.1.0 // indirect
github.com/shopspring/decimal v1.3.1 // indirect
github.com/sirupsen/logrus v1.9.3 // indirect
github.com/sourcegraph/conc v0.3.0 // indirect
github.com/spf13/afero v1.11.0 // indirect
github.com/spf13/cast v1.6.0 // indirect

19
go.sum generated
View file

@ -79,8 +79,8 @@ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
github.com/DmitriyVTitov/size v1.5.0 h1:/PzqxYrOyOUX1BXj6J9OuVRVGe+66VL4D9FlUaW515g=
github.com/DmitriyVTitov/size v1.5.0/go.mod h1:le6rNI4CoLQV1b9gzp1+3d7hMAD/uu2QcJ+aYbNgiU0=
github.com/KimMachineGun/automemlimit v0.6.1 h1:ILa9j1onAAMadBsyyUJv5cack8Y1WT26yLj/V+ulKp8=
github.com/KimMachineGun/automemlimit v0.6.1/go.mod h1:T7xYht7B8r6AG/AqFcUdc7fzd2bIdBKmepfP2S1svPY=
github.com/KimMachineGun/automemlimit v0.7.0 h1:7G06p/dMSf7G8E6oq+f2uOPuVncFyIlDI/pBWK49u88=
github.com/KimMachineGun/automemlimit v0.7.0/go.mod h1:QZxpHaGOQoYvFhv/r4u3U0JTC2ZcOwbSr11UZF46UBM=
github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI=
github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU=
github.com/Masterminds/semver/v3 v3.2.0/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ=
@ -116,8 +116,6 @@ github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XL
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
github.com/cilium/ebpf v0.9.1 h1:64sn2K3UKw8NbP/blsixRpF3nXuyhz/VjRlRzvlBRu4=
github.com/cilium/ebpf v0.9.1/go.mod h1:+OhNOIXx/Fnu1IE8bJz2dzOA+VSfyTfdNUVdlQnxUFY=
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y=
github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w=
@ -125,20 +123,14 @@ github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQ
github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
github.com/cnf/structhash v0.0.0-20201127153200-e1b16c1ebc08 h1:ox2F0PSMlrAAiAdknSRMDrAr8mfxPCfSZolH+/qQnyQ=
github.com/cnf/structhash v0.0.0-20201127153200-e1b16c1ebc08/go.mod h1:pCxVEbcm3AMg7ejXyorUXi6HQCzOIBf7zEDVPtw0/U4=
github.com/containerd/cgroups/v3 v3.0.1 h1:4hfGvu8rfGIwVIDd+nLzn/B9ZXx4BcCjzt5ToenJRaE=
github.com/containerd/cgroups/v3 v3.0.1/go.mod h1:/vtwk1VXrtoa5AaZLkypuOJgA/6DyPMZHJPGQNtlHnw=
github.com/coreos/go-oidc/v3 v3.12.0 h1:sJk+8G2qq94rDI6ehZ71Bol3oUHy63qNYmkiSjrc/Jo=
github.com/coreos/go-oidc/v3 v3.12.0/go.mod h1:gE3LgjOgFoHi9a4ce4/tJczr0Ai2/BoDhf0r5lltWI0=
github.com/coreos/go-systemd/v22 v22.3.2 h1:D9/bQk5vlXQFZ6Kwuu6zaiXJ9oTPe68++AzAJc1DzSI=
github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
github.com/dsoprea/go-exif/v2 v2.0.0-20200321225314-640175a69fe4/go.mod h1:Lm2lMM2zx8p4a34ZemkaUV95AnMl4ZvLbCUbwOvLC2E=
github.com/dsoprea/go-exif/v3 v3.0.0-20200717053412-08f1b6708903/go.mod h1:0nsO1ce0mh5czxGeLo4+OCZ/C6Eo6ZlMWsz7rH/Gxv8=
github.com/dsoprea/go-exif/v3 v3.0.0-20210428042052-dca55bf8ca15/go.mod h1:cg5SNYKHMmzxsr9X6ZeLh/nfBRHHp5PngtEPcujONtk=
@ -246,8 +238,6 @@ github.com/go-xmlfmt/xmlfmt v0.0.0-20191208150333-d5b6f63a941b h1:khEcpUM4yFcxg4
github.com/go-xmlfmt/xmlfmt v0.0.0-20191208150333-d5b6f63a941b/go.mod h1:aUCEOzzezBEjDBbFBoSiya/gduyIiWYRP6CnSFIV8AM=
github.com/goccy/go-json v0.10.4 h1:JSwxQzIqKfmFX1swYPpUThQZp/Ka4wzJdK0LWVytLPM=
github.com/goccy/go-json v0.10.4/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M=
github.com/godbus/dbus/v5 v5.0.4 h1:9349emZab16e7zQvpmsbtjc18ykshndd8y2PG3sgJbA=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/golang-jwt/jwt v3.2.1+incompatible/go.mod h1:8pz2t5EyA70fFQQSrl6XZXzqecmYZeUEB8OUGHkxJ+I=
github.com/golang-jwt/jwt v3.2.2+incompatible h1:IfV12K8xAKAnZqdXVzCZ+TOjboZ2keLg81eXfW3O+oY=
github.com/golang-jwt/jwt v3.2.2+incompatible/go.mod h1:8pz2t5EyA70fFQQSrl6XZXzqecmYZeUEB8OUGHkxJ+I=
@ -448,8 +438,6 @@ github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108
github.com/onsi/ginkgo v1.13.0/go.mod h1:+REjRxOmWfHCjfv9TTWB1jD1Frx4XydAD3zm1lskyM0=
github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY=
github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo=
github.com/opencontainers/runtime-spec v1.0.2 h1:UfAcuLBJB9Coz72x1hgl8O5RVzTdNiaglX6v2DM6FI0=
github.com/opencontainers/runtime-spec v1.0.2/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 h1:onHthvaw9LFnH4t2DcNVpwGmV9E1BkGknEliJkfwQj0=
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58/go.mod h1:DXv8WO4yhMYhSNPKjeNKa5WY9YCIEBRbNzFFPJbWO6Y=
github.com/pelletier/go-toml/v2 v2.2.3 h1:YmeHyLY8mFWbdkNWwpr+qIL2bEqT0o95WSdkNHvL12M=
@ -496,8 +484,6 @@ github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNX
github.com/shopspring/decimal v1.2.0/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o=
github.com/shopspring/decimal v1.3.1 h1:2Usl1nmF/WZucqkFZhnfFYxxxu8LG21F6nPQBE5gKV8=
github.com/shopspring/decimal v1.3.1/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o=
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d h1:zE9ykElWQ6/NYmHa3jpm/yHnI4xSofP+UP6SpjHcSeM=
github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
github.com/smartystreets/goconvey v1.6.4 h1:fv0U8FUIMPNf1L9lnHLvLhgicrIVChEkdzIKYqbNC9s=
@ -817,7 +803,6 @@ golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=

View file

@ -1,7 +1,16 @@
package memlimit
import (
"bufio"
"errors"
"fmt"
"io"
"math"
"os"
"path/filepath"
"slices"
"strconv"
"strings"
)
var (
@ -10,3 +19,394 @@
// ErrCgroupsNotSupported is returned when the system does not support cgroups.
ErrCgroupsNotSupported = errors.New("cgroups is not supported on this system")
)
// fromCgroup retrieves the memory limit from the cgroup.
// The versionDetector function is used to detect the cgroup version from the mountinfo.
func fromCgroup(versionDetector func(mis []mountInfo) (bool, bool)) (uint64, error) {
mf, err := os.Open("/proc/self/mountinfo")
if err != nil {
return 0, fmt.Errorf("failed to open /proc/self/mountinfo: %w", err)
}
defer mf.Close()
mis, err := parseMountInfo(mf)
if err != nil {
return 0, fmt.Errorf("failed to parse mountinfo: %w", err)
}
v1, v2 := versionDetector(mis)
if !(v1 || v2) {
return 0, ErrNoCgroup
}
cf, err := os.Open("/proc/self/cgroup")
if err != nil {
return 0, fmt.Errorf("failed to open /proc/self/cgroup: %w", err)
}
defer cf.Close()
chs, err := parseCgroupFile(cf)
if err != nil {
return 0, fmt.Errorf("failed to parse cgroup file: %w", err)
}
if v2 {
limit, err := getMemoryLimitV2(chs, mis)
if err == nil {
return limit, nil
} else if !v1 {
return 0, err
}
}
return getMemoryLimitV1(chs, mis)
}
// detectCgroupVersion detects the cgroup version from the mountinfo.
func detectCgroupVersion(mis []mountInfo) (bool, bool) {
var v1, v2 bool
for _, mi := range mis {
switch mi.FilesystemType {
case "cgroup":
v1 = true
case "cgroup2":
v2 = true
}
}
return v1, v2
}
// getMemoryLimitV2 retrieves the memory limit from the cgroup v2 controller.
func getMemoryLimitV2(chs []cgroupHierarchy, mis []mountInfo) (uint64, error) {
// find the cgroup v2 path for the memory controller.
// in cgroup v2, the paths are unified and the controller list is empty.
idx := slices.IndexFunc(chs, func(ch cgroupHierarchy) bool {
return ch.HierarchyID == "0" && ch.ControllerList == ""
})
if idx == -1 {
return 0, errors.New("cgroup v2 path not found")
}
relPath := chs[idx].CgroupPath
// find the mountpoint for the cgroup v2 controller.
idx = slices.IndexFunc(mis, func(mi mountInfo) bool {
return mi.FilesystemType == "cgroup2"
})
if idx == -1 {
return 0, errors.New("cgroup v2 mountpoint not found")
}
root, mountPoint := mis[idx].Root, mis[idx].MountPoint
// resolve the actual cgroup path
cgroupPath, err := resolveCgroupPath(mountPoint, root, relPath)
if err != nil {
return 0, err
}
// retrieve the memory limit from the memory.max file
return readMemoryLimitV2FromPath(filepath.Join(cgroupPath, "memory.max"))
}
// readMemoryLimitV2FromPath reads the memory limit for cgroup v2 from the given path.
// this function expects the path to be memory.max file.
func readMemoryLimitV2FromPath(path string) (uint64, error) {
b, err := os.ReadFile(path)
if err != nil {
if errors.Is(err, os.ErrNotExist) {
return 0, ErrNoLimit
}
return 0, fmt.Errorf("failed to read memory.max: %w", err)
}
slimit := strings.TrimSpace(string(b))
if slimit == "max" {
return 0, ErrNoLimit
}
limit, err := strconv.ParseUint(slimit, 10, 64)
if err != nil {
return 0, fmt.Errorf("failed to parse memory.max value: %w", err)
}
return limit, nil
}
// getMemoryLimitV1 retrieves the memory limit from the cgroup v1 controller.
func getMemoryLimitV1(chs []cgroupHierarchy, mis []mountInfo) (uint64, error) {
// find the cgroup v1 path for the memory controller.
idx := slices.IndexFunc(chs, func(ch cgroupHierarchy) bool {
return slices.Contains(strings.Split(ch.ControllerList, ","), "memory")
})
if idx == -1 {
return 0, errors.New("cgroup v1 path for memory controller not found")
}
relPath := chs[idx].CgroupPath
// find the mountpoint for the cgroup v1 controller.
idx = slices.IndexFunc(mis, func(mi mountInfo) bool {
return mi.FilesystemType == "cgroup" && slices.Contains(strings.Split(mi.SuperOptions, ","), "memory")
})
if idx == -1 {
return 0, errors.New("cgroup v1 mountpoint for memory controller not found")
}
root, mountPoint := mis[idx].Root, mis[idx].MountPoint
// resolve the actual cgroup path
cgroupPath, err := resolveCgroupPath(mountPoint, root, relPath)
if err != nil {
return 0, err
}
// retrieve the memory limit from the memory.stats and memory.limit_in_bytes files.
return readMemoryLimitV1FromPath(cgroupPath)
}
// getCgroupV1NoLimit returns the maximum value that is used to represent no limit in cgroup v1.
// the max memory limit is max int64, but it should be multiple of the page size.
func getCgroupV1NoLimit() uint64 {
ps := uint64(os.Getpagesize())
return math.MaxInt64 / ps * ps
}
// readMemoryLimitV1FromPath reads the memory limit for cgroup v1 from the given path.
// this function expects the path to be the cgroup directory.
func readMemoryLimitV1FromPath(cgroupPath string) (uint64, error) {
// read hierarchical_memory_limit and memory.limit_in_bytes files.
// but if hierarchical_memory_limit is not available, then use the max value as a fallback.
hml, err := readHierarchicalMemoryLimit(filepath.Join(cgroupPath, "memory.stats"))
if err != nil && !errors.Is(err, os.ErrNotExist) {
return 0, fmt.Errorf("failed to read hierarchical_memory_limit: %w", err)
} else if hml == 0 {
hml = math.MaxUint64
}
// read memory.limit_in_bytes file.
b, err := os.ReadFile(filepath.Join(cgroupPath, "memory.limit_in_bytes"))
if err != nil && !errors.Is(err, os.ErrNotExist) {
return 0, fmt.Errorf("failed to read memory.limit_in_bytes: %w", err)
}
lib, err := strconv.ParseUint(strings.TrimSpace(string(b)), 10, 64)
if err != nil {
return 0, fmt.Errorf("failed to parse memory.limit_in_bytes value: %w", err)
} else if lib == 0 {
hml = math.MaxUint64
}
// use the minimum value between hierarchical_memory_limit and memory.limit_in_bytes.
// if the limit is the maximum value, then it is considered as no limit.
limit := min(hml, lib)
if limit >= getCgroupV1NoLimit() {
return 0, ErrNoLimit
}
return limit, nil
}
// readHierarchicalMemoryLimit extracts hierarchical_memory_limit from memory.stats.
// this function expects the path to be memory.stats file.
func readHierarchicalMemoryLimit(path string) (uint64, error) {
file, err := os.Open(path)
if err != nil {
return 0, err
}
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := scanner.Text()
fields := strings.Split(line, " ")
if len(fields) < 2 {
return 0, fmt.Errorf("failed to parse memory.stats %q: not enough fields", line)
}
if fields[0] == "hierarchical_memory_limit" {
if len(fields) > 2 {
return 0, fmt.Errorf("failed to parse memory.stats %q: too many fields for hierarchical_memory_limit", line)
}
return strconv.ParseUint(fields[1], 10, 64)
}
}
if err := scanner.Err(); err != nil {
return 0, err
}
return 0, nil
}
// https://www.man7.org/linux/man-pages/man5/proc_pid_mountinfo.5.html
// 731 771 0:59 /sysrq-trigger /proc/sysrq-trigger ro,nosuid,nodev,noexec,relatime - proc proc rw
//
// 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue
// (1)(2)(3) (4) (5) (6) (7) (8) (9) (10) (11)
//
// (1) mount ID: a unique ID for the mount (may be reused after umount(2)).
// (2) parent ID: the ID of the parent mount (or of self for the root of this mount namespace's mount tree).
// (3) major:minor: the value of st_dev for files on this filesystem (see stat(2)).
// (4) root: the pathname of the directory in the filesystem which forms the root of this mount.
// (5) mount point: the pathname of the mount point relative to the process's root directory.
// (6) mount options: per-mount options (see mount(2)).
// (7) optional fields: zero or more fields of the form "tag[:value]"; see below.
// (8) separator: the end of the optional fields is marked by a single hyphen.
// (9) filesystem type: the filesystem type in the form "type[.subtype]".
// (10) mount source: filesystem-specific information or "none".
// (11) super options: per-superblock options (see mount(2)).
type mountInfo struct {
Root string
MountPoint string
FilesystemType string
SuperOptions string
}
// parseMountInfoLine parses a line from the mountinfo file.
func parseMountInfoLine(line string) (mountInfo, error) {
if line == "" {
return mountInfo{}, errors.New("empty line")
}
fieldss := strings.SplitN(line, " - ", 2)
if len(fieldss) != 2 {
return mountInfo{}, fmt.Errorf("invalid separator")
}
fields1 := strings.Split(fieldss[0], " ")
if len(fields1) < 6 {
return mountInfo{}, fmt.Errorf("not enough fields before separator: %v", fields1)
} else if len(fields1) > 7 {
return mountInfo{}, fmt.Errorf("too many fields before separator: %v", fields1)
} else if len(fields1) == 6 {
fields1 = append(fields1, "")
}
fields2 := strings.Split(fieldss[1], " ")
if len(fields2) < 3 {
return mountInfo{}, fmt.Errorf("not enough fields after separator: %v", fields2)
} else if len(fields2) > 3 {
return mountInfo{}, fmt.Errorf("too many fields after separator: %v", fields2)
}
return mountInfo{
Root: fields1[3],
MountPoint: fields1[4],
FilesystemType: fields2[0],
SuperOptions: fields2[2],
}, nil
}
// parseMountInfo parses the mountinfo file.
func parseMountInfo(r io.Reader) ([]mountInfo, error) {
var (
s = bufio.NewScanner(r)
mis []mountInfo
)
for s.Scan() {
line := s.Text()
mi, err := parseMountInfoLine(line)
if err != nil {
return nil, fmt.Errorf("failed to parse mountinfo file %q: %w", line, err)
}
mis = append(mis, mi)
}
if err := s.Err(); err != nil {
return nil, err
}
return mis, nil
}
// https://www.man7.org/linux/man-pages/man7/cgroups.7.html
//
// 5:cpuacct,cpu,cpuset:/daemons
// (1) (2) (3)
//
// (1) hierarchy ID:
//
// cgroups version 1 hierarchies, this field
// contains a unique hierarchy ID number that can be
// matched to a hierarchy ID in /proc/cgroups. For the
// cgroups version 2 hierarchy, this field contains the
// value 0.
//
// (2) controller list:
//
// For cgroups version 1 hierarchies, this field
// contains a comma-separated list of the controllers
// bound to the hierarchy. For the cgroups version 2
// hierarchy, this field is empty.
//
// (3) cgroup path:
//
// This field contains the pathname of the control group
// in the hierarchy to which the process belongs. This
// pathname is relative to the mount point of the
// hierarchy.
type cgroupHierarchy struct {
HierarchyID string
ControllerList string
CgroupPath string
}
// parseCgroupHierarchyLine parses a line from the cgroup file.
func parseCgroupHierarchyLine(line string) (cgroupHierarchy, error) {
if line == "" {
return cgroupHierarchy{}, errors.New("empty line")
}
fields := strings.Split(line, ":")
if len(fields) < 3 {
return cgroupHierarchy{}, fmt.Errorf("not enough fields: %v", fields)
} else if len(fields) > 3 {
return cgroupHierarchy{}, fmt.Errorf("too many fields: %v", fields)
}
return cgroupHierarchy{
HierarchyID: fields[0],
ControllerList: fields[1],
CgroupPath: fields[2],
}, nil
}
// parseCgroupFile parses the cgroup file.
func parseCgroupFile(r io.Reader) ([]cgroupHierarchy, error) {
var (
s = bufio.NewScanner(r)
chs []cgroupHierarchy
)
for s.Scan() {
line := s.Text()
ch, err := parseCgroupHierarchyLine(line)
if err != nil {
return nil, fmt.Errorf("failed to parse cgroup file %q: %w", line, err)
}
chs = append(chs, ch)
}
if err := s.Err(); err != nil {
return nil, err
}
return chs, nil
}
// resolveCgroupPath resolves the actual cgroup path from the mountpoint, root, and cgroupRelPath.
func resolveCgroupPath(mountpoint, root, cgroupRelPath string) (string, error) {
rel, err := filepath.Rel(root, cgroupRelPath)
if err != nil {
return "", err
}
// if the relative path is ".", then the cgroupRelPath is the root itself.
if rel == "." {
return mountpoint, nil
}
// if the relative path starts with "..", then it is outside the root.
if strings.HasPrefix(rel, "..") {
return "", fmt.Errorf("invalid cgroup path: %s is not under root %s", cgroupRelPath, root)
}
return filepath.Join(mountpoint, rel), nil
}

View file

@ -3,96 +3,30 @@
package memlimit
import (
"math"
"os"
"path/filepath"
"github.com/containerd/cgroups/v3"
"github.com/containerd/cgroups/v3/cgroup1"
"github.com/containerd/cgroups/v3/cgroup2"
)
const (
cgroupMountPoint = "/sys/fs/cgroup"
)
// FromCgroup returns the memory limit based on the cgroups version on this system.
// FromCgroup retrieves the memory limit from the cgroup.
func FromCgroup() (uint64, error) {
switch cgroups.Mode() {
case cgroups.Legacy:
return FromCgroupV1()
case cgroups.Hybrid:
return FromCgroupHybrid()
case cgroups.Unified:
return FromCgroupV2()
}
return 0, ErrNoCgroup
return fromCgroup(detectCgroupVersion)
}
// FromCgroupV1 returns the memory limit from the cgroup v1.
// FromCgroupV1 retrieves the memory limit from the cgroup v1 controller.
// After v1.0.0, this function could be removed and FromCgroup should be used instead.
func FromCgroupV1() (uint64, error) {
cg, err := cgroup1.Load(cgroup1.RootPath, cgroup1.WithHiearchy(
cgroup1.SingleSubsystem(cgroup1.Default, cgroup1.Memory),
))
if err != nil {
return 0, err
}
metrics, err := cg.Stat(cgroup1.IgnoreNotExist)
if err != nil {
return 0, err
}
if limit := metrics.GetMemory().GetHierarchicalMemoryLimit(); limit != 0 && limit != getCgroupV1NoLimit() {
return limit, nil
}
return 0, ErrNoLimit
return fromCgroup(func(_ []mountInfo) (bool, bool) {
return true, false
})
}
func getCgroupV1NoLimit() uint64 {
ps := uint64(os.Getpagesize())
return math.MaxInt64 / ps * ps
}
// FromCgroupHybrid returns the memory limit from the cgroup v1 or v2.
// It checks the cgroup v2 first, and if it fails, it falls back to cgroup v1.
// FromCgroupHybrid retrieves the memory limit from the cgroup v2 and v1 controller sequentially,
// basically, it is equivalent to FromCgroup.
// After v1.0.0, this function could be removed and FromCgroup should be used instead.
func FromCgroupHybrid() (uint64, error) {
limit, err := fromCgroupV2(filepath.Join(cgroupMountPoint, "unified"))
if err == nil {
return limit, nil
} else if err != ErrNoLimit {
return 0, err
}
return FromCgroupV1()
return FromCgroup()
}
// FromCgroupV2 returns the memory limit from the cgroup v2.
// FromCgroupV2 retrieves the memory limit from the cgroup v2 controller.
// After v1.0.0, this function could be removed and FromCgroup should be used instead.
func FromCgroupV2() (uint64, error) {
return fromCgroupV2(cgroupMountPoint)
}
func fromCgroupV2(mountPoint string) (uint64, error) {
path, err := cgroup2.NestedGroupPath("")
if err != nil {
return 0, err
}
m, err := cgroup2.Load(path, cgroup2.WithMountpoint(mountPoint))
if err != nil {
return 0, err
}
stats, err := m.Stat()
if err != nil {
return 0, err
}
if limit := stats.GetMemory().GetUsageLimit(); limit != 0 && limit != math.MaxUint64 {
return limit, nil
}
return 0, ErrNoLimit
return fromCgroup(func(_ []mountInfo) (bool, bool) {
return false, true
})
}

View file

@ -8,6 +8,7 @@
"os"
"runtime/debug"
"strconv"
"time"
)
const (
@ -19,15 +20,14 @@
defaultAUTOMEMLIMIT = 0.9
)
var (
// ErrNoLimit is returned when the memory limit is not set.
ErrNoLimit = errors.New("memory is not limited")
)
// ErrNoLimit is returned when the memory limit is not set.
var ErrNoLimit = errors.New("memory is not limited")
type config struct {
logger *slog.Logger
ratio float64
provider Provider
refresh time.Duration
}
// Option is a function that configures the behavior of SetGoMemLimitWithOptions.
@ -61,6 +61,19 @@ func WithLogger(logger *slog.Logger) Option {
}
}
// WithRefreshInterval configures the refresh interval for automemlimit.
// If a refresh interval is greater than 0, automemlimit periodically fetches
// the memory limit from the provider and reapplies it if it has changed.
// If the provider returns an error, it logs the error and continues.
// ErrNoLimit is treated as math.MaxInt64.
//
// Default: 0 (no refresh)
func WithRefreshInterval(refresh time.Duration) Option {
return func(cfg *config) {
cfg.refresh = refresh
}
}
// WithEnv configures whether to use environment variables.
//
// Default: false
@ -80,7 +93,7 @@ func memlimitLogger(logger *slog.Logger) *slog.Logger {
// SetGoMemLimitWithOpts sets GOMEMLIMIT with options and environment variables.
//
// You can configure how much memory of the cgroup's memory limit to set as GOMEMLIMIT
// through AUTOMEMLIMIT envrironment variable in the half-open range (0.0,1.0].
// through AUTOMEMLIMIT environment variable in the half-open range (0.0,1.0].
//
// If AUTOMEMLIMIT is not set, it defaults to 0.9. (10% is the headroom for memory sources the Go runtime is unaware of.)
// If GOMEMLIMIT is already set or AUTOMEMLIMIT=off, this function does nothing.
@ -128,20 +141,9 @@ func SetGoMemLimitWithOpts(opts ...Option) (_ int64, _err error) {
cfg.provider = ApplyFallback(cfg.provider, FromSystem)
}
// capture the current GOMEMLIMIT for rollback in case of panic
// rollback to previous memory limit on panic
snapshot := debug.SetMemoryLimit(-1)
defer func() {
panicErr := recover()
if panicErr != nil {
if _err != nil {
cfg.logger.Error("failed to set GOMEMLIMIT", slog.Any("error", _err))
}
_err = fmt.Errorf("panic during setting the Go's memory limit, rolling back to previous limit %d: %v",
snapshot, panicErr,
)
debug.SetMemoryLimit(snapshot)
}
}()
defer rollbackOnPanic(cfg.logger, snapshot, &_err)
// check if GOMEMLIMIT is already set
if val, ok := os.LookupEnv(envGOMEMLIMIT); ok {
@ -156,26 +158,89 @@ func SetGoMemLimitWithOpts(opts ...Option) (_ int64, _err error) {
cfg.logger.Info("AUTOMEMLIMIT is set to off, skipping")
return 0, nil
}
_ratio, err := strconv.ParseFloat(val, 64)
ratio, err = strconv.ParseFloat(val, 64)
if err != nil {
return 0, fmt.Errorf("cannot parse AUTOMEMLIMIT: %s", val)
}
ratio = _ratio
}
// set GOMEMLIMIT
limit, err := setGoMemLimit(ApplyRatio(cfg.provider, ratio))
// apply ratio to the provider
provider := capProvider(ApplyRatio(cfg.provider, ratio))
// set the memory limit and start refresh
limit, err := updateGoMemLimit(uint64(snapshot), provider, cfg.logger)
go refresh(provider, cfg.logger, cfg.refresh)
if err != nil {
if errors.Is(err, ErrNoLimit) {
cfg.logger.Info("memory is not limited, skipping")
// TODO: consider returning the snapshot
return 0, nil
}
return 0, fmt.Errorf("failed to set GOMEMLIMIT: %w", err)
}
cfg.logger.Info("GOMEMLIMIT is updated", slog.Int64(envGOMEMLIMIT, limit))
return int64(limit), nil
}
return limit, nil
// updateGoMemLimit updates the Go's memory limit, if it has changed.
func updateGoMemLimit(currLimit uint64, provider Provider, logger *slog.Logger) (uint64, error) {
newLimit, err := provider()
if err != nil {
return 0, err
}
if newLimit == currLimit {
logger.Debug("GOMEMLIMIT is not changed, skipping", slog.Uint64(envGOMEMLIMIT, newLimit))
return newLimit, nil
}
debug.SetMemoryLimit(int64(newLimit))
logger.Info("GOMEMLIMIT is updated", slog.Uint64(envGOMEMLIMIT, newLimit), slog.Uint64("previous", currLimit))
return newLimit, nil
}
// refresh periodically fetches the memory limit from the provider and reapplies it if it has changed.
// See more details in the documentation of WithRefreshInterval.
func refresh(provider Provider, logger *slog.Logger, refresh time.Duration) {
if refresh == 0 {
return
}
provider = noErrNoLimitProvider(provider)
t := time.NewTicker(refresh)
for range t.C {
err := func() (_err error) {
snapshot := debug.SetMemoryLimit(-1)
defer rollbackOnPanic(logger, snapshot, &_err)
_, err := updateGoMemLimit(uint64(snapshot), provider, logger)
if err != nil {
return err
}
return nil
}()
if err != nil {
logger.Error("failed to refresh GOMEMLIMIT", slog.Any("error", err))
}
}
}
// rollbackOnPanic rollbacks to the snapshot on panic.
// Since it uses recover, it should be called in a deferred function.
func rollbackOnPanic(logger *slog.Logger, snapshot int64, err *error) {
panicErr := recover()
if panicErr != nil {
if *err != nil {
logger.Error("failed to set GOMEMLIMIT", slog.Any("error", *err))
}
*err = fmt.Errorf("panic during setting the Go's memory limit, rolling back to previous limit %d: %v",
snapshot, panicErr,
)
debug.SetMemoryLimit(snapshot)
}
}
// SetGoMemLimitWithEnv sets GOMEMLIMIT with the value from the environment variables.
@ -195,19 +260,24 @@ func SetGoMemLimitWithProvider(provider Provider, ratio float64) (int64, error)
return SetGoMemLimitWithOpts(WithProvider(provider), WithRatio(ratio))
}
func setGoMemLimit(provider Provider) (int64, error) {
limit, err := provider()
if err != nil {
return 0, err
func noErrNoLimitProvider(provider Provider) Provider {
return func() (uint64, error) {
limit, err := provider()
if errors.Is(err, ErrNoLimit) {
return math.MaxInt64, nil
}
return limit, err
}
capped := cappedU64ToI64(limit)
debug.SetMemoryLimit(capped)
return capped, nil
}
func cappedU64ToI64(limit uint64) int64 {
if limit > math.MaxInt64 {
return math.MaxInt64
func capProvider(provider Provider) Provider {
return func() (uint64, error) {
limit, err := provider()
if err != nil {
return 0, err
} else if limit > math.MaxInt64 {
return math.MaxInt64, nil
}
return limit, nil
}
return int64(limit)
}

View file

@ -16,6 +16,9 @@ func Limit(limit uint64) func() (uint64, error) {
// ApplyRationA is a helper Provider function that applies the given ratio to the given provider.
func ApplyRatio(provider Provider, ratio float64) Provider {
if ratio == 1 {
return provider
}
return func() (uint64, error) {
if ratio <= 0 || ratio > 1 {
return 0, fmt.Errorf("invalid ratio: %f, ratio should be in the range (0.0,1.0]", ratio)

View file

@ -1,17 +0,0 @@
---
Language: Cpp
BasedOnStyle: LLVM
AlignAfterOpenBracket: DontAlign
AlignConsecutiveAssignments: true
AlignEscapedNewlines: DontAlign
AlwaysBreakBeforeMultilineStrings: true
AlwaysBreakTemplateDeclarations: false
AllowAllParametersOfDeclarationOnNextLine: false
AllowShortFunctionsOnASingleLine: false
BreakBeforeBraces: Attach
IndentWidth: 4
KeepEmptyLinesAtTheStartOfBlocks: false
TabWidth: 4
UseTab: ForContinuationAndIndentation
ColumnLimit: 1000
...

View file

@ -1,14 +0,0 @@
# Binaries for programs and plugins
*.exe
*.exe~
*.dll
*.so
*.dylib
*.o
!*_bpf*.o
# Test binary, build with `go test -c`
*.test
# Output of the go coverage tool, specifically when used with LiteIDE
*.out

View file

@ -1,28 +0,0 @@
---
issues:
exclude-rules:
# syscall param structs will have unused fields in Go code.
- path: syscall.*.go
linters:
- structcheck
linters:
disable-all: true
enable:
- deadcode
- errcheck
- goimports
- gosimple
- govet
- ineffassign
- misspell
- staticcheck
- structcheck
- typecheck
- unused
- varcheck
# Could be enabled later:
# - gocyclo
# - maligned
# - gosec

View file

@ -1,86 +0,0 @@
Architecture of the library
===
ELF -> Specifications -> Objects -> Links
ELF
---
BPF is usually produced by using Clang to compile a subset of C. Clang outputs
an ELF file which contains program byte code (aka BPF), but also metadata for
maps used by the program. The metadata follows the conventions set by libbpf
shipped with the kernel. Certain ELF sections have special meaning
and contain structures defined by libbpf. Newer versions of clang emit
additional metadata in BPF Type Format (aka BTF).
The library aims to be compatible with libbpf so that moving from a C toolchain
to a Go one creates little friction. To that end, the [ELF reader](elf_reader.go)
is tested against the Linux selftests and avoids introducing custom behaviour
if possible.
The output of the ELF reader is a `CollectionSpec` which encodes
all of the information contained in the ELF in a form that is easy to work with
in Go.
### BTF
The BPF Type Format describes more than just the types used by a BPF program. It
includes debug aids like which source line corresponds to which instructions and
what global variables are used.
[BTF parsing](internal/btf/) lives in a separate internal package since exposing
it would mean an additional maintenance burden, and because the API still
has sharp corners. The most important concept is the `btf.Type` interface, which
also describes things that aren't really types like `.rodata` or `.bss` sections.
`btf.Type`s can form cyclical graphs, which can easily lead to infinite loops if
one is not careful. Hopefully a safe pattern to work with `btf.Type` emerges as
we write more code that deals with it.
Specifications
---
`CollectionSpec`, `ProgramSpec` and `MapSpec` are blueprints for in-kernel
objects and contain everything necessary to execute the relevant `bpf(2)`
syscalls. Since the ELF reader outputs a `CollectionSpec` it's possible to
modify clang-compiled BPF code, for example to rewrite constants. At the same
time the [asm](asm/) package provides an assembler that can be used to generate
`ProgramSpec` on the fly.
Creating a spec should never require any privileges or be restricted in any way,
for example by only allowing programs in native endianness. This ensures that
the library stays flexible.
Objects
---
`Program` and `Map` are the result of loading specs into the kernel. Sometimes
loading a spec will fail because the kernel is too old, or a feature is not
enabled. There are multiple ways the library deals with that:
* Fallback: older kernels don't allow naming programs and maps. The library
automatically detects support for names, and omits them during load if
necessary. This works since name is primarily a debug aid.
* Sentinel error: sometimes it's possible to detect that a feature isn't available.
In that case the library will return an error wrapping `ErrNotSupported`.
This is also useful to skip tests that can't run on the current kernel.
Once program and map objects are loaded they expose the kernel's low-level API,
e.g. `NextKey`. Often this API is awkward to use in Go, so there are safer
wrappers on top of the low-level API, like `MapIterator`. The low-level API is
useful when our higher-level API doesn't support a particular use case.
Links
---
BPF can be attached to many different points in the kernel and newer BPF hooks
tend to use bpf_link to do so. Older hooks unfortunately use a combination of
syscalls, netlink messages, etc. Adding support for a new link type should not
pull in large dependencies like netlink, so XDP programs or tracepoints are
out of scope.
Each bpf_link_type has one corresponding Go type, e.g. `link.tracing` corresponds
to BPF_LINK_TRACING. In general, these types should be unexported as long as they
don't export methods outside of the Link interface. Each Go type may have multiple
exported constructors. For example `AttachTracing` and `AttachLSM` create a
tracing link, but are distinct functions since they may require different arguments.

View file

@ -1,46 +0,0 @@
# Contributor Covenant Code of Conduct
## Our Pledge
In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
## Our Standards
Examples of behavior that contributes to creating a positive environment include:
* Using welcoming and inclusive language
* Being respectful of differing viewpoints and experiences
* Gracefully accepting constructive criticism
* Focusing on what is best for the community
* Showing empathy towards other community members
Examples of unacceptable behavior by participants include:
* The use of sexualized language or imagery and unwelcome sexual attention or advances
* Trolling, insulting/derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or electronic address, without explicit permission
* Other conduct which could reasonably be considered inappropriate in a professional setting
## Our Responsibilities
Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
## Scope
This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
## Enforcement
Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at nathanjsweet at gmail dot com or i at lmb dot io. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
## Attribution
This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version]
[homepage]: http://contributor-covenant.org
[version]: http://contributor-covenant.org/version/1/4/

View file

@ -1,40 +0,0 @@
# How to contribute
Development is on [GitHub](https://github.com/cilium/ebpf) and contributions in
the form of pull requests and issues reporting bugs or suggesting new features
are welcome. Please take a look at [the architecture](ARCHITECTURE.md) to get
a better understanding for the high-level goals.
New features must be accompanied by tests. Before starting work on any large
feature, please [join](https://ebpf.io/slack) the
[#ebpf-go](https://cilium.slack.com/messages/ebpf-go) channel on Slack to
discuss the design first.
When submitting pull requests, consider writing details about what problem you
are solving and why the proposed approach solves that problem in commit messages
and/or pull request description to help future library users and maintainers to
reason about the proposed changes.
## Running the tests
Many of the tests require privileges to set resource limits and load eBPF code.
The easiest way to obtain these is to run the tests with `sudo`.
To test the current package with your local kernel you can simply run:
```
go test -exec sudo ./...
```
To test the current package with a different kernel version you can use the [run-tests.sh](run-tests.sh) script.
It requires [virtme](https://github.com/amluto/virtme) and qemu to be installed.
Examples:
```bash
# Run all tests on a 5.4 kernel
./run-tests.sh 5.4
# Run a subset of tests:
./run-tests.sh 5.4 go test ./link
```

View file

@ -1,23 +0,0 @@
MIT License
Copyright (c) 2017 Nathan Sweet
Copyright (c) 2018, 2019 Cloudflare
Copyright (c) 2019 Authors of Cilium
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View file

@ -1,8 +0,0 @@
# Maintainers
* [Lorenz Bauer]
* [Timo Beckers] (Isovalent)
[Lorenz Bauer]: https://github.com/lmb
[Timo Beckers]: https://github.com/ti-mo

View file

@ -1,110 +0,0 @@
# The development version of clang is distributed as the 'clang' binary,
# while stable/released versions have a version number attached.
# Pin the default clang to a stable version.
CLANG ?= clang-14
STRIP ?= llvm-strip-14
OBJCOPY ?= llvm-objcopy-14
CFLAGS := -O2 -g -Wall -Werror $(CFLAGS)
CI_KERNEL_URL ?= https://github.com/cilium/ci-kernels/raw/master/
# Obtain an absolute path to the directory of the Makefile.
# Assume the Makefile is in the root of the repository.
REPODIR := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))
UIDGID := $(shell stat -c '%u:%g' ${REPODIR})
# Prefer podman if installed, otherwise use docker.
# Note: Setting the var at runtime will always override.
CONTAINER_ENGINE ?= $(if $(shell command -v podman), podman, docker)
CONTAINER_RUN_ARGS ?= $(if $(filter ${CONTAINER_ENGINE}, podman), --log-driver=none, --user "${UIDGID}")
IMAGE := $(shell cat ${REPODIR}/testdata/docker/IMAGE)
VERSION := $(shell cat ${REPODIR}/testdata/docker/VERSION)
# clang <8 doesn't tag relocs properly (STT_NOTYPE)
# clang 9 is the first version emitting BTF
TARGETS := \
testdata/loader-clang-7 \
testdata/loader-clang-9 \
testdata/loader-$(CLANG) \
testdata/btf_map_init \
testdata/invalid_map \
testdata/raw_tracepoint \
testdata/invalid_map_static \
testdata/invalid_btf_map_init \
testdata/strings \
testdata/freplace \
testdata/iproute2_map_compat \
testdata/map_spin_lock \
testdata/subprog_reloc \
testdata/fwd_decl \
btf/testdata/relocs \
btf/testdata/relocs_read \
btf/testdata/relocs_read_tgt
.PHONY: all clean container-all container-shell generate
.DEFAULT_TARGET = container-all
# Build all ELF binaries using a containerized LLVM toolchain.
container-all:
${CONTAINER_ENGINE} run --rm ${CONTAINER_RUN_ARGS} \
-v "${REPODIR}":/ebpf -w /ebpf --env MAKEFLAGS \
--env CFLAGS="-fdebug-prefix-map=/ebpf=." \
--env HOME="/tmp" \
"${IMAGE}:${VERSION}" \
$(MAKE) all
# (debug) Drop the user into a shell inside the container as root.
container-shell:
${CONTAINER_ENGINE} run --rm -ti \
-v "${REPODIR}":/ebpf -w /ebpf \
"${IMAGE}:${VERSION}"
clean:
-$(RM) testdata/*.elf
-$(RM) btf/testdata/*.elf
format:
find . -type f -name "*.c" | xargs clang-format -i
all: format $(addsuffix -el.elf,$(TARGETS)) $(addsuffix -eb.elf,$(TARGETS)) generate
ln -srf testdata/loader-$(CLANG)-el.elf testdata/loader-el.elf
ln -srf testdata/loader-$(CLANG)-eb.elf testdata/loader-eb.elf
# $BPF_CLANG is used in go:generate invocations.
generate: export BPF_CLANG := $(CLANG)
generate: export BPF_CFLAGS := $(CFLAGS)
generate:
go generate ./cmd/bpf2go/test
go generate ./internal/sys
cd examples/ && go generate ./...
testdata/loader-%-el.elf: testdata/loader.c
$* $(CFLAGS) -target bpfel -c $< -o $@
$(STRIP) -g $@
testdata/loader-%-eb.elf: testdata/loader.c
$* $(CFLAGS) -target bpfeb -c $< -o $@
$(STRIP) -g $@
%-el.elf: %.c
$(CLANG) $(CFLAGS) -target bpfel -c $< -o $@
$(STRIP) -g $@
%-eb.elf : %.c
$(CLANG) $(CFLAGS) -target bpfeb -c $< -o $@
$(STRIP) -g $@
.PHONY: generate-btf
generate-btf: KERNEL_VERSION?=5.18
generate-btf:
$(eval TMP := $(shell mktemp -d))
curl -fL "$(CI_KERNEL_URL)/linux-$(KERNEL_VERSION).bz" -o "$(TMP)/bzImage"
./testdata/extract-vmlinux "$(TMP)/bzImage" > "$(TMP)/vmlinux"
$(OBJCOPY) --dump-section .BTF=/dev/stdout "$(TMP)/vmlinux" /dev/null | gzip > "btf/testdata/vmlinux.btf.gz"
curl -fL "$(CI_KERNEL_URL)/linux-$(KERNEL_VERSION)-selftests-bpf.tgz" -o "$(TMP)/selftests.tgz"
tar -xf "$(TMP)/selftests.tgz" --to-stdout tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.ko | \
$(OBJCOPY) --dump-section .BTF="btf/testdata/btf_testmod.btf" - /dev/null
$(RM) -r "$(TMP)"

View file

@ -1,77 +0,0 @@
# eBPF
[![PkgGoDev](https://pkg.go.dev/badge/github.com/cilium/ebpf)](https://pkg.go.dev/github.com/cilium/ebpf)
![HoneyGopher](.github/images/cilium-ebpf.png)
eBPF is a pure Go library that provides utilities for loading, compiling, and
debugging eBPF programs. It has minimal external dependencies and is intended to
be used in long running processes.
The library is maintained by [Cloudflare](https://www.cloudflare.com) and
[Cilium](https://www.cilium.io).
See [ebpf.io](https://ebpf.io) for other projects from the eBPF ecosystem.
## Getting Started
A small collection of Go and eBPF programs that serve as examples for building
your own tools can be found under [examples/](examples/).
Contributions are highly encouraged, as they highlight certain use cases of
eBPF and the library, and help shape the future of the project.
## Getting Help
Please
[join](https://ebpf.io/slack) the
[#ebpf-go](https://cilium.slack.com/messages/ebpf-go) channel on Slack if you
have questions regarding the library.
## Packages
This library includes the following packages:
* [asm](https://pkg.go.dev/github.com/cilium/ebpf/asm) contains a basic
assembler, allowing you to write eBPF assembly instructions directly
within your Go code. (You don't need to use this if you prefer to write your eBPF program in C.)
* [cmd/bpf2go](https://pkg.go.dev/github.com/cilium/ebpf/cmd/bpf2go) allows
compiling and embedding eBPF programs written in C within Go code. As well as
compiling the C code, it auto-generates Go code for loading and manipulating
the eBPF program and map objects.
* [link](https://pkg.go.dev/github.com/cilium/ebpf/link) allows attaching eBPF
to various hooks
* [perf](https://pkg.go.dev/github.com/cilium/ebpf/perf) allows reading from a
`PERF_EVENT_ARRAY`
* [ringbuf](https://pkg.go.dev/github.com/cilium/ebpf/ringbuf) allows reading from a
`BPF_MAP_TYPE_RINGBUF` map
* [features](https://pkg.go.dev/github.com/cilium/ebpf/features) implements the equivalent
of `bpftool feature probe` for discovering BPF-related kernel features using native Go.
* [rlimit](https://pkg.go.dev/github.com/cilium/ebpf/rlimit) provides a convenient API to lift
the `RLIMIT_MEMLOCK` constraint on kernels before 5.11.
## Requirements
* A version of Go that is [supported by
upstream](https://golang.org/doc/devel/release.html#policy)
* Linux >= 4.9. CI is run against kernel.org LTS releases. 4.4 should work but is
not tested against.
## Regenerating Testdata
Run `make` in the root of this repository to rebuild testdata in all
subpackages. This requires Docker, as it relies on a standardized build
environment to keep the build output stable.
It is possible to regenerate data using Podman by overriding the `CONTAINER_*`
variables: `CONTAINER_ENGINE=podman CONTAINER_RUN_ARGS= make`.
The toolchain image build files are kept in [testdata/docker/](testdata/docker/).
## License
MIT
### eBPF Gopher
The eBPF honeygopher is based on the Go gopher designed by Renee French.

View file

@ -1,149 +0,0 @@
package asm
//go:generate stringer -output alu_string.go -type=Source,Endianness,ALUOp
// Source of ALU / ALU64 / Branch operations
//
// msb lsb
// +----+-+---+
// |op |S|cls|
// +----+-+---+
type Source uint8
const sourceMask OpCode = 0x08
// Source bitmask
const (
// InvalidSource is returned by getters when invoked
// on non ALU / branch OpCodes.
InvalidSource Source = 0xff
// ImmSource src is from constant
ImmSource Source = 0x00
// RegSource src is from register
RegSource Source = 0x08
)
// The Endianness of a byte swap instruction.
type Endianness uint8
const endianMask = sourceMask
// Endian flags
const (
InvalidEndian Endianness = 0xff
// Convert to little endian
LE Endianness = 0x00
// Convert to big endian
BE Endianness = 0x08
)
// ALUOp are ALU / ALU64 operations
//
// msb lsb
// +----+-+---+
// |OP |s|cls|
// +----+-+---+
type ALUOp uint8
const aluMask OpCode = 0xf0
const (
// InvalidALUOp is returned by getters when invoked
// on non ALU OpCodes
InvalidALUOp ALUOp = 0xff
// Add - addition
Add ALUOp = 0x00
// Sub - subtraction
Sub ALUOp = 0x10
// Mul - multiplication
Mul ALUOp = 0x20
// Div - division
Div ALUOp = 0x30
// Or - bitwise or
Or ALUOp = 0x40
// And - bitwise and
And ALUOp = 0x50
// LSh - bitwise shift left
LSh ALUOp = 0x60
// RSh - bitwise shift right
RSh ALUOp = 0x70
// Neg - sign/unsign signing bit
Neg ALUOp = 0x80
// Mod - modulo
Mod ALUOp = 0x90
// Xor - bitwise xor
Xor ALUOp = 0xa0
// Mov - move value from one place to another
Mov ALUOp = 0xb0
// ArSh - arithmatic shift
ArSh ALUOp = 0xc0
// Swap - endian conversions
Swap ALUOp = 0xd0
)
// HostTo converts from host to another endianness.
func HostTo(endian Endianness, dst Register, size Size) Instruction {
var imm int64
switch size {
case Half:
imm = 16
case Word:
imm = 32
case DWord:
imm = 64
default:
return Instruction{OpCode: InvalidOpCode}
}
return Instruction{
OpCode: OpCode(ALUClass).SetALUOp(Swap).SetSource(Source(endian)),
Dst: dst,
Constant: imm,
}
}
// Op returns the OpCode for an ALU operation with a given source.
func (op ALUOp) Op(source Source) OpCode {
return OpCode(ALU64Class).SetALUOp(op).SetSource(source)
}
// Reg emits `dst (op) src`.
func (op ALUOp) Reg(dst, src Register) Instruction {
return Instruction{
OpCode: op.Op(RegSource),
Dst: dst,
Src: src,
}
}
// Imm emits `dst (op) value`.
func (op ALUOp) Imm(dst Register, value int32) Instruction {
return Instruction{
OpCode: op.Op(ImmSource),
Dst: dst,
Constant: int64(value),
}
}
// Op32 returns the OpCode for a 32-bit ALU operation with a given source.
func (op ALUOp) Op32(source Source) OpCode {
return OpCode(ALUClass).SetALUOp(op).SetSource(source)
}
// Reg32 emits `dst (op) src`, zeroing the upper 32 bit of dst.
func (op ALUOp) Reg32(dst, src Register) Instruction {
return Instruction{
OpCode: op.Op32(RegSource),
Dst: dst,
Src: src,
}
}
// Imm32 emits `dst (op) value`, zeroing the upper 32 bit of dst.
func (op ALUOp) Imm32(dst Register, value int32) Instruction {
return Instruction{
OpCode: op.Op32(ImmSource),
Dst: dst,
Constant: int64(value),
}
}

View file

@ -1,107 +0,0 @@
// Code generated by "stringer -output alu_string.go -type=Source,Endianness,ALUOp"; DO NOT EDIT.
package asm
import "strconv"
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[InvalidSource-255]
_ = x[ImmSource-0]
_ = x[RegSource-8]
}
const (
_Source_name_0 = "ImmSource"
_Source_name_1 = "RegSource"
_Source_name_2 = "InvalidSource"
)
func (i Source) String() string {
switch {
case i == 0:
return _Source_name_0
case i == 8:
return _Source_name_1
case i == 255:
return _Source_name_2
default:
return "Source(" + strconv.FormatInt(int64(i), 10) + ")"
}
}
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[InvalidEndian-255]
_ = x[LE-0]
_ = x[BE-8]
}
const (
_Endianness_name_0 = "LE"
_Endianness_name_1 = "BE"
_Endianness_name_2 = "InvalidEndian"
)
func (i Endianness) String() string {
switch {
case i == 0:
return _Endianness_name_0
case i == 8:
return _Endianness_name_1
case i == 255:
return _Endianness_name_2
default:
return "Endianness(" + strconv.FormatInt(int64(i), 10) + ")"
}
}
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[InvalidALUOp-255]
_ = x[Add-0]
_ = x[Sub-16]
_ = x[Mul-32]
_ = x[Div-48]
_ = x[Or-64]
_ = x[And-80]
_ = x[LSh-96]
_ = x[RSh-112]
_ = x[Neg-128]
_ = x[Mod-144]
_ = x[Xor-160]
_ = x[Mov-176]
_ = x[ArSh-192]
_ = x[Swap-208]
}
const _ALUOp_name = "AddSubMulDivOrAndLShRShNegModXorMovArShSwapInvalidALUOp"
var _ALUOp_map = map[ALUOp]string{
0: _ALUOp_name[0:3],
16: _ALUOp_name[3:6],
32: _ALUOp_name[6:9],
48: _ALUOp_name[9:12],
64: _ALUOp_name[12:14],
80: _ALUOp_name[14:17],
96: _ALUOp_name[17:20],
112: _ALUOp_name[20:23],
128: _ALUOp_name[23:26],
144: _ALUOp_name[26:29],
160: _ALUOp_name[29:32],
176: _ALUOp_name[32:35],
192: _ALUOp_name[35:39],
208: _ALUOp_name[39:43],
255: _ALUOp_name[43:55],
}
func (i ALUOp) String() string {
if str, ok := _ALUOp_map[i]; ok {
return str
}
return "ALUOp(" + strconv.FormatInt(int64(i), 10) + ")"
}

View file

@ -1,2 +0,0 @@
// Package asm is an assembler for eBPF bytecode.
package asm

View file

@ -1,242 +0,0 @@
package asm
//go:generate stringer -output func_string.go -type=BuiltinFunc
// BuiltinFunc is a built-in eBPF function.
type BuiltinFunc int32
func (_ BuiltinFunc) Max() BuiltinFunc {
return maxBuiltinFunc - 1
}
// eBPF built-in functions
//
// You can regenerate this list using the following gawk script:
//
// /FN\(.+\),/ {
// match($1, /\((.+)\)/, r)
// split(r[1], p, "_")
// printf "Fn"
// for (i in p) {
// printf "%s%s", toupper(substr(p[i], 1, 1)), substr(p[i], 2)
// }
// print ""
// }
//
// The script expects include/uapi/linux/bpf.h as it's input.
const (
FnUnspec BuiltinFunc = iota
FnMapLookupElem
FnMapUpdateElem
FnMapDeleteElem
FnProbeRead
FnKtimeGetNs
FnTracePrintk
FnGetPrandomU32
FnGetSmpProcessorId
FnSkbStoreBytes
FnL3CsumReplace
FnL4CsumReplace
FnTailCall
FnCloneRedirect
FnGetCurrentPidTgid
FnGetCurrentUidGid
FnGetCurrentComm
FnGetCgroupClassid
FnSkbVlanPush
FnSkbVlanPop
FnSkbGetTunnelKey
FnSkbSetTunnelKey
FnPerfEventRead
FnRedirect
FnGetRouteRealm
FnPerfEventOutput
FnSkbLoadBytes
FnGetStackid
FnCsumDiff
FnSkbGetTunnelOpt
FnSkbSetTunnelOpt
FnSkbChangeProto
FnSkbChangeType
FnSkbUnderCgroup
FnGetHashRecalc
FnGetCurrentTask
FnProbeWriteUser
FnCurrentTaskUnderCgroup
FnSkbChangeTail
FnSkbPullData
FnCsumUpdate
FnSetHashInvalid
FnGetNumaNodeId
FnSkbChangeHead
FnXdpAdjustHead
FnProbeReadStr
FnGetSocketCookie
FnGetSocketUid
FnSetHash
FnSetsockopt
FnSkbAdjustRoom
FnRedirectMap
FnSkRedirectMap
FnSockMapUpdate
FnXdpAdjustMeta
FnPerfEventReadValue
FnPerfProgReadValue
FnGetsockopt
FnOverrideReturn
FnSockOpsCbFlagsSet
FnMsgRedirectMap
FnMsgApplyBytes
FnMsgCorkBytes
FnMsgPullData
FnBind
FnXdpAdjustTail
FnSkbGetXfrmState
FnGetStack
FnSkbLoadBytesRelative
FnFibLookup
FnSockHashUpdate
FnMsgRedirectHash
FnSkRedirectHash
FnLwtPushEncap
FnLwtSeg6StoreBytes
FnLwtSeg6AdjustSrh
FnLwtSeg6Action
FnRcRepeat
FnRcKeydown
FnSkbCgroupId
FnGetCurrentCgroupId
FnGetLocalStorage
FnSkSelectReuseport
FnSkbAncestorCgroupId
FnSkLookupTcp
FnSkLookupUdp
FnSkRelease
FnMapPushElem
FnMapPopElem
FnMapPeekElem
FnMsgPushData
FnMsgPopData
FnRcPointerRel
FnSpinLock
FnSpinUnlock
FnSkFullsock
FnTcpSock
FnSkbEcnSetCe
FnGetListenerSock
FnSkcLookupTcp
FnTcpCheckSyncookie
FnSysctlGetName
FnSysctlGetCurrentValue
FnSysctlGetNewValue
FnSysctlSetNewValue
FnStrtol
FnStrtoul
FnSkStorageGet
FnSkStorageDelete
FnSendSignal
FnTcpGenSyncookie
FnSkbOutput
FnProbeReadUser
FnProbeReadKernel
FnProbeReadUserStr
FnProbeReadKernelStr
FnTcpSendAck
FnSendSignalThread
FnJiffies64
FnReadBranchRecords
FnGetNsCurrentPidTgid
FnXdpOutput
FnGetNetnsCookie
FnGetCurrentAncestorCgroupId
FnSkAssign
FnKtimeGetBootNs
FnSeqPrintf
FnSeqWrite
FnSkCgroupId
FnSkAncestorCgroupId
FnRingbufOutput
FnRingbufReserve
FnRingbufSubmit
FnRingbufDiscard
FnRingbufQuery
FnCsumLevel
FnSkcToTcp6Sock
FnSkcToTcpSock
FnSkcToTcpTimewaitSock
FnSkcToTcpRequestSock
FnSkcToUdp6Sock
FnGetTaskStack
FnLoadHdrOpt
FnStoreHdrOpt
FnReserveHdrOpt
FnInodeStorageGet
FnInodeStorageDelete
FnDPath
FnCopyFromUser
FnSnprintfBtf
FnSeqPrintfBtf
FnSkbCgroupClassid
FnRedirectNeigh
FnPerCpuPtr
FnThisCpuPtr
FnRedirectPeer
FnTaskStorageGet
FnTaskStorageDelete
FnGetCurrentTaskBtf
FnBprmOptsSet
FnKtimeGetCoarseNs
FnImaInodeHash
FnSockFromFile
FnCheckMtu
FnForEachMapElem
FnSnprintf
FnSysBpf
FnBtfFindByNameKind
FnSysClose
FnTimerInit
FnTimerSetCallback
FnTimerStart
FnTimerCancel
FnGetFuncIp
FnGetAttachCookie
FnTaskPtRegs
FnGetBranchSnapshot
FnTraceVprintk
FnSkcToUnixSock
FnKallsymsLookupName
FnFindVma
FnLoop
FnStrncmp
FnGetFuncArg
FnGetFuncRet
FnGetFuncArgCnt
FnGetRetval
FnSetRetval
FnXdpGetBuffLen
FnXdpLoadBytes
FnXdpStoreBytes
FnCopyFromUserTask
FnSkbSetTstamp
FnImaFileHash
FnKptrXchg
FnMapLookupPercpuElem
FnSkcToMptcpSock
FnDynptrFromMem
FnRingbufReserveDynptr
FnRingbufSubmitDynptr
FnRingbufDiscardDynptr
FnDynptrRead
FnDynptrWrite
FnDynptrData
maxBuiltinFunc
)
// Call emits a function call.
func (fn BuiltinFunc) Call() Instruction {
return Instruction{
OpCode: OpCode(JumpClass).SetJumpOp(Call),
Constant: int64(fn),
}
}

View file

@ -1,227 +0,0 @@
// Code generated by "stringer -output func_string.go -type=BuiltinFunc"; DO NOT EDIT.
package asm
import "strconv"
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[FnUnspec-0]
_ = x[FnMapLookupElem-1]
_ = x[FnMapUpdateElem-2]
_ = x[FnMapDeleteElem-3]
_ = x[FnProbeRead-4]
_ = x[FnKtimeGetNs-5]
_ = x[FnTracePrintk-6]
_ = x[FnGetPrandomU32-7]
_ = x[FnGetSmpProcessorId-8]
_ = x[FnSkbStoreBytes-9]
_ = x[FnL3CsumReplace-10]
_ = x[FnL4CsumReplace-11]
_ = x[FnTailCall-12]
_ = x[FnCloneRedirect-13]
_ = x[FnGetCurrentPidTgid-14]
_ = x[FnGetCurrentUidGid-15]
_ = x[FnGetCurrentComm-16]
_ = x[FnGetCgroupClassid-17]
_ = x[FnSkbVlanPush-18]
_ = x[FnSkbVlanPop-19]
_ = x[FnSkbGetTunnelKey-20]
_ = x[FnSkbSetTunnelKey-21]
_ = x[FnPerfEventRead-22]
_ = x[FnRedirect-23]
_ = x[FnGetRouteRealm-24]
_ = x[FnPerfEventOutput-25]
_ = x[FnSkbLoadBytes-26]
_ = x[FnGetStackid-27]
_ = x[FnCsumDiff-28]
_ = x[FnSkbGetTunnelOpt-29]
_ = x[FnSkbSetTunnelOpt-30]
_ = x[FnSkbChangeProto-31]
_ = x[FnSkbChangeType-32]
_ = x[FnSkbUnderCgroup-33]
_ = x[FnGetHashRecalc-34]
_ = x[FnGetCurrentTask-35]
_ = x[FnProbeWriteUser-36]
_ = x[FnCurrentTaskUnderCgroup-37]
_ = x[FnSkbChangeTail-38]
_ = x[FnSkbPullData-39]
_ = x[FnCsumUpdate-40]
_ = x[FnSetHashInvalid-41]
_ = x[FnGetNumaNodeId-42]
_ = x[FnSkbChangeHead-43]
_ = x[FnXdpAdjustHead-44]
_ = x[FnProbeReadStr-45]
_ = x[FnGetSocketCookie-46]
_ = x[FnGetSocketUid-47]
_ = x[FnSetHash-48]
_ = x[FnSetsockopt-49]
_ = x[FnSkbAdjustRoom-50]
_ = x[FnRedirectMap-51]
_ = x[FnSkRedirectMap-52]
_ = x[FnSockMapUpdate-53]
_ = x[FnXdpAdjustMeta-54]
_ = x[FnPerfEventReadValue-55]
_ = x[FnPerfProgReadValue-56]
_ = x[FnGetsockopt-57]
_ = x[FnOverrideReturn-58]
_ = x[FnSockOpsCbFlagsSet-59]
_ = x[FnMsgRedirectMap-60]
_ = x[FnMsgApplyBytes-61]
_ = x[FnMsgCorkBytes-62]
_ = x[FnMsgPullData-63]
_ = x[FnBind-64]
_ = x[FnXdpAdjustTail-65]
_ = x[FnSkbGetXfrmState-66]
_ = x[FnGetStack-67]
_ = x[FnSkbLoadBytesRelative-68]
_ = x[FnFibLookup-69]
_ = x[FnSockHashUpdate-70]
_ = x[FnMsgRedirectHash-71]
_ = x[FnSkRedirectHash-72]
_ = x[FnLwtPushEncap-73]
_ = x[FnLwtSeg6StoreBytes-74]
_ = x[FnLwtSeg6AdjustSrh-75]
_ = x[FnLwtSeg6Action-76]
_ = x[FnRcRepeat-77]
_ = x[FnRcKeydown-78]
_ = x[FnSkbCgroupId-79]
_ = x[FnGetCurrentCgroupId-80]
_ = x[FnGetLocalStorage-81]
_ = x[FnSkSelectReuseport-82]
_ = x[FnSkbAncestorCgroupId-83]
_ = x[FnSkLookupTcp-84]
_ = x[FnSkLookupUdp-85]
_ = x[FnSkRelease-86]
_ = x[FnMapPushElem-87]
_ = x[FnMapPopElem-88]
_ = x[FnMapPeekElem-89]
_ = x[FnMsgPushData-90]
_ = x[FnMsgPopData-91]
_ = x[FnRcPointerRel-92]
_ = x[FnSpinLock-93]
_ = x[FnSpinUnlock-94]
_ = x[FnSkFullsock-95]
_ = x[FnTcpSock-96]
_ = x[FnSkbEcnSetCe-97]
_ = x[FnGetListenerSock-98]
_ = x[FnSkcLookupTcp-99]
_ = x[FnTcpCheckSyncookie-100]
_ = x[FnSysctlGetName-101]
_ = x[FnSysctlGetCurrentValue-102]
_ = x[FnSysctlGetNewValue-103]
_ = x[FnSysctlSetNewValue-104]
_ = x[FnStrtol-105]
_ = x[FnStrtoul-106]
_ = x[FnSkStorageGet-107]
_ = x[FnSkStorageDelete-108]
_ = x[FnSendSignal-109]
_ = x[FnTcpGenSyncookie-110]
_ = x[FnSkbOutput-111]
_ = x[FnProbeReadUser-112]
_ = x[FnProbeReadKernel-113]
_ = x[FnProbeReadUserStr-114]
_ = x[FnProbeReadKernelStr-115]
_ = x[FnTcpSendAck-116]
_ = x[FnSendSignalThread-117]
_ = x[FnJiffies64-118]
_ = x[FnReadBranchRecords-119]
_ = x[FnGetNsCurrentPidTgid-120]
_ = x[FnXdpOutput-121]
_ = x[FnGetNetnsCookie-122]
_ = x[FnGetCurrentAncestorCgroupId-123]
_ = x[FnSkAssign-124]
_ = x[FnKtimeGetBootNs-125]
_ = x[FnSeqPrintf-126]
_ = x[FnSeqWrite-127]
_ = x[FnSkCgroupId-128]
_ = x[FnSkAncestorCgroupId-129]
_ = x[FnRingbufOutput-130]
_ = x[FnRingbufReserve-131]
_ = x[FnRingbufSubmit-132]
_ = x[FnRingbufDiscard-133]
_ = x[FnRingbufQuery-134]
_ = x[FnCsumLevel-135]
_ = x[FnSkcToTcp6Sock-136]
_ = x[FnSkcToTcpSock-137]
_ = x[FnSkcToTcpTimewaitSock-138]
_ = x[FnSkcToTcpRequestSock-139]
_ = x[FnSkcToUdp6Sock-140]
_ = x[FnGetTaskStack-141]
_ = x[FnLoadHdrOpt-142]
_ = x[FnStoreHdrOpt-143]
_ = x[FnReserveHdrOpt-144]
_ = x[FnInodeStorageGet-145]
_ = x[FnInodeStorageDelete-146]
_ = x[FnDPath-147]
_ = x[FnCopyFromUser-148]
_ = x[FnSnprintfBtf-149]
_ = x[FnSeqPrintfBtf-150]
_ = x[FnSkbCgroupClassid-151]
_ = x[FnRedirectNeigh-152]
_ = x[FnPerCpuPtr-153]
_ = x[FnThisCpuPtr-154]
_ = x[FnRedirectPeer-155]
_ = x[FnTaskStorageGet-156]
_ = x[FnTaskStorageDelete-157]
_ = x[FnGetCurrentTaskBtf-158]
_ = x[FnBprmOptsSet-159]
_ = x[FnKtimeGetCoarseNs-160]
_ = x[FnImaInodeHash-161]
_ = x[FnSockFromFile-162]
_ = x[FnCheckMtu-163]
_ = x[FnForEachMapElem-164]
_ = x[FnSnprintf-165]
_ = x[FnSysBpf-166]
_ = x[FnBtfFindByNameKind-167]
_ = x[FnSysClose-168]
_ = x[FnTimerInit-169]
_ = x[FnTimerSetCallback-170]
_ = x[FnTimerStart-171]
_ = x[FnTimerCancel-172]
_ = x[FnGetFuncIp-173]
_ = x[FnGetAttachCookie-174]
_ = x[FnTaskPtRegs-175]
_ = x[FnGetBranchSnapshot-176]
_ = x[FnTraceVprintk-177]
_ = x[FnSkcToUnixSock-178]
_ = x[FnKallsymsLookupName-179]
_ = x[FnFindVma-180]
_ = x[FnLoop-181]
_ = x[FnStrncmp-182]
_ = x[FnGetFuncArg-183]
_ = x[FnGetFuncRet-184]
_ = x[FnGetFuncArgCnt-185]
_ = x[FnGetRetval-186]
_ = x[FnSetRetval-187]
_ = x[FnXdpGetBuffLen-188]
_ = x[FnXdpLoadBytes-189]
_ = x[FnXdpStoreBytes-190]
_ = x[FnCopyFromUserTask-191]
_ = x[FnSkbSetTstamp-192]
_ = x[FnImaFileHash-193]
_ = x[FnKptrXchg-194]
_ = x[FnMapLookupPercpuElem-195]
_ = x[FnSkcToMptcpSock-196]
_ = x[FnDynptrFromMem-197]
_ = x[FnRingbufReserveDynptr-198]
_ = x[FnRingbufSubmitDynptr-199]
_ = x[FnRingbufDiscardDynptr-200]
_ = x[FnDynptrRead-201]
_ = x[FnDynptrWrite-202]
_ = x[FnDynptrData-203]
_ = x[maxBuiltinFunc-204]
}
const _BuiltinFunc_name = "FnUnspecFnMapLookupElemFnMapUpdateElemFnMapDeleteElemFnProbeReadFnKtimeGetNsFnTracePrintkFnGetPrandomU32FnGetSmpProcessorIdFnSkbStoreBytesFnL3CsumReplaceFnL4CsumReplaceFnTailCallFnCloneRedirectFnGetCurrentPidTgidFnGetCurrentUidGidFnGetCurrentCommFnGetCgroupClassidFnSkbVlanPushFnSkbVlanPopFnSkbGetTunnelKeyFnSkbSetTunnelKeyFnPerfEventReadFnRedirectFnGetRouteRealmFnPerfEventOutputFnSkbLoadBytesFnGetStackidFnCsumDiffFnSkbGetTunnelOptFnSkbSetTunnelOptFnSkbChangeProtoFnSkbChangeTypeFnSkbUnderCgroupFnGetHashRecalcFnGetCurrentTaskFnProbeWriteUserFnCurrentTaskUnderCgroupFnSkbChangeTailFnSkbPullDataFnCsumUpdateFnSetHashInvalidFnGetNumaNodeIdFnSkbChangeHeadFnXdpAdjustHeadFnProbeReadStrFnGetSocketCookieFnGetSocketUidFnSetHashFnSetsockoptFnSkbAdjustRoomFnRedirectMapFnSkRedirectMapFnSockMapUpdateFnXdpAdjustMetaFnPerfEventReadValueFnPerfProgReadValueFnGetsockoptFnOverrideReturnFnSockOpsCbFlagsSetFnMsgRedirectMapFnMsgApplyBytesFnMsgCorkBytesFnMsgPullDataFnBindFnXdpAdjustTailFnSkbGetXfrmStateFnGetStackFnSkbLoadBytesRelativeFnFibLookupFnSockHashUpdateFnMsgRedirectHashFnSkRedirectHashFnLwtPushEncapFnLwtSeg6StoreBytesFnLwtSeg6AdjustSrhFnLwtSeg6ActionFnRcRepeatFnRcKeydownFnSkbCgroupIdFnGetCurrentCgroupIdFnGetLocalStorageFnSkSelectReuseportFnSkbAncestorCgroupIdFnSkLookupTcpFnSkLookupUdpFnSkReleaseFnMapPushElemFnMapPopElemFnMapPeekElemFnMsgPushDataFnMsgPopDataFnRcPointerRelFnSpinLockFnSpinUnlockFnSkFullsockFnTcpSockFnSkbEcnSetCeFnGetListenerSockFnSkcLookupTcpFnTcpCheckSyncookieFnSysctlGetNameFnSysctlGetCurrentValueFnSysctlGetNewValueFnSysctlSetNewValueFnStrtolFnStrtoulFnSkStorageGetFnSkStorageDeleteFnSendSignalFnTcpGenSyncookieFnSkbOutputFnProbeReadUserFnProbeReadKernelFnProbeReadUserStrFnProbeReadKernelStrFnTcpSendAckFnSendSignalThreadFnJiffies64FnReadBranchRecordsFnGetNsCurrentPidTgidFnXdpOutputFnGetNetnsCookieFnGetCurrentAncestorCgroupIdFnSkAssignFnKtimeGetBootNsFnSeqPrintfFnSeqWriteFnSkCgroupIdFnSkAncestorCgroupIdFnRingbufOutputFnRingbufReserveFnRingbufSubmitFnRingbufDiscardFnRingbufQueryFnCsumLevelFnSkcToTcp6SockFnSkcToTcpSockFnSkcToTcpTimewaitSockFnSkcToTcpRequestSockFnSkcToUdp6SockFnGetTaskStackFnLoadHdrOptFnStoreHdrOptFnReserveHdrOptFnInodeStorageGetFnInodeStorageDeleteFnDPathFnCopyFromUserFnSnprintfBtfFnSeqPrintfBtfFnSkbCgroupClassidFnRedirectNeighFnPerCpuPtrFnThisCpuPtrFnRedirectPeerFnTaskStorageGetFnTaskStorageDeleteFnGetCurrentTaskBtfFnBprmOptsSetFnKtimeGetCoarseNsFnImaInodeHashFnSockFromFileFnCheckMtuFnForEachMapElemFnSnprintfFnSysBpfFnBtfFindByNameKindFnSysCloseFnTimerInitFnTimerSetCallbackFnTimerStartFnTimerCancelFnGetFuncIpFnGetAttachCookieFnTaskPtRegsFnGetBranchSnapshotFnTraceVprintkFnSkcToUnixSockFnKallsymsLookupNameFnFindVmaFnLoopFnStrncmpFnGetFuncArgFnGetFuncRetFnGetFuncArgCntFnGetRetvalFnSetRetvalFnXdpGetBuffLenFnXdpLoadBytesFnXdpStoreBytesFnCopyFromUserTaskFnSkbSetTstampFnImaFileHashFnKptrXchgFnMapLookupPercpuElemFnSkcToMptcpSockFnDynptrFromMemFnRingbufReserveDynptrFnRingbufSubmitDynptrFnRingbufDiscardDynptrFnDynptrReadFnDynptrWriteFnDynptrDatamaxBuiltinFunc"
var _BuiltinFunc_index = [...]uint16{0, 8, 23, 38, 53, 64, 76, 89, 104, 123, 138, 153, 168, 178, 193, 212, 230, 246, 264, 277, 289, 306, 323, 338, 348, 363, 380, 394, 406, 416, 433, 450, 466, 481, 497, 512, 528, 544, 568, 583, 596, 608, 624, 639, 654, 669, 683, 700, 714, 723, 735, 750, 763, 778, 793, 808, 828, 847, 859, 875, 894, 910, 925, 939, 952, 958, 973, 990, 1000, 1022, 1033, 1049, 1066, 1082, 1096, 1115, 1133, 1148, 1158, 1169, 1182, 1202, 1219, 1238, 1259, 1272, 1285, 1296, 1309, 1321, 1334, 1347, 1359, 1373, 1383, 1395, 1407, 1416, 1429, 1446, 1460, 1479, 1494, 1517, 1536, 1555, 1563, 1572, 1586, 1603, 1615, 1632, 1643, 1658, 1675, 1693, 1713, 1725, 1743, 1754, 1773, 1794, 1805, 1821, 1849, 1859, 1875, 1886, 1896, 1908, 1928, 1943, 1959, 1974, 1990, 2004, 2015, 2030, 2044, 2066, 2087, 2102, 2116, 2128, 2141, 2156, 2173, 2193, 2200, 2214, 2227, 2241, 2259, 2274, 2285, 2297, 2311, 2327, 2346, 2365, 2378, 2396, 2410, 2424, 2434, 2450, 2460, 2468, 2487, 2497, 2508, 2526, 2538, 2551, 2562, 2579, 2591, 2610, 2624, 2639, 2659, 2668, 2674, 2683, 2695, 2707, 2722, 2733, 2744, 2759, 2773, 2788, 2806, 2820, 2833, 2843, 2864, 2880, 2895, 2917, 2938, 2960, 2972, 2985, 2997, 3011}
func (i BuiltinFunc) String() string {
if i < 0 || i >= BuiltinFunc(len(_BuiltinFunc_index)-1) {
return "BuiltinFunc(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _BuiltinFunc_name[_BuiltinFunc_index[i]:_BuiltinFunc_index[i+1]]
}

View file

@ -1,859 +0,0 @@
package asm
import (
"crypto/sha1"
"encoding/binary"
"encoding/hex"
"errors"
"fmt"
"io"
"math"
"sort"
"strings"
"github.com/cilium/ebpf/internal/sys"
"github.com/cilium/ebpf/internal/unix"
)
// InstructionSize is the size of a BPF instruction in bytes
const InstructionSize = 8
// RawInstructionOffset is an offset in units of raw BPF instructions.
type RawInstructionOffset uint64
var ErrUnreferencedSymbol = errors.New("unreferenced symbol")
var ErrUnsatisfiedMapReference = errors.New("unsatisfied map reference")
var ErrUnsatisfiedProgramReference = errors.New("unsatisfied program reference")
// Bytes returns the offset of an instruction in bytes.
func (rio RawInstructionOffset) Bytes() uint64 {
return uint64(rio) * InstructionSize
}
// Instruction is a single eBPF instruction.
type Instruction struct {
OpCode OpCode
Dst Register
Src Register
Offset int16
Constant int64
// Metadata contains optional metadata about this instruction.
Metadata Metadata
}
// Unmarshal decodes a BPF instruction.
func (ins *Instruction) Unmarshal(r io.Reader, bo binary.ByteOrder) (uint64, error) {
data := make([]byte, InstructionSize)
if _, err := io.ReadFull(r, data); err != nil {
return 0, err
}
ins.OpCode = OpCode(data[0])
regs := data[1]
switch bo {
case binary.LittleEndian:
ins.Dst, ins.Src = Register(regs&0xF), Register(regs>>4)
case binary.BigEndian:
ins.Dst, ins.Src = Register(regs>>4), Register(regs&0xf)
}
ins.Offset = int16(bo.Uint16(data[2:4]))
// Convert to int32 before widening to int64
// to ensure the signed bit is carried over.
ins.Constant = int64(int32(bo.Uint32(data[4:8])))
if !ins.OpCode.IsDWordLoad() {
return InstructionSize, nil
}
// Pull another instruction from the stream to retrieve the second
// half of the 64-bit immediate value.
if _, err := io.ReadFull(r, data); err != nil {
// No Wrap, to avoid io.EOF clash
return 0, errors.New("64bit immediate is missing second half")
}
// Require that all fields other than the value are zero.
if bo.Uint32(data[0:4]) != 0 {
return 0, errors.New("64bit immediate has non-zero fields")
}
cons1 := uint32(ins.Constant)
cons2 := int32(bo.Uint32(data[4:8]))
ins.Constant = int64(cons2)<<32 | int64(cons1)
return 2 * InstructionSize, nil
}
// Marshal encodes a BPF instruction.
func (ins Instruction) Marshal(w io.Writer, bo binary.ByteOrder) (uint64, error) {
if ins.OpCode == InvalidOpCode {
return 0, errors.New("invalid opcode")
}
isDWordLoad := ins.OpCode.IsDWordLoad()
cons := int32(ins.Constant)
if isDWordLoad {
// Encode least significant 32bit first for 64bit operations.
cons = int32(uint32(ins.Constant))
}
regs, err := newBPFRegisters(ins.Dst, ins.Src, bo)
if err != nil {
return 0, fmt.Errorf("can't marshal registers: %s", err)
}
data := make([]byte, InstructionSize)
data[0] = byte(ins.OpCode)
data[1] = byte(regs)
bo.PutUint16(data[2:4], uint16(ins.Offset))
bo.PutUint32(data[4:8], uint32(cons))
if _, err := w.Write(data); err != nil {
return 0, err
}
if !isDWordLoad {
return InstructionSize, nil
}
// The first half of the second part of a double-wide instruction
// must be zero. The second half carries the value.
bo.PutUint32(data[0:4], 0)
bo.PutUint32(data[4:8], uint32(ins.Constant>>32))
if _, err := w.Write(data); err != nil {
return 0, err
}
return 2 * InstructionSize, nil
}
// AssociateMap associates a Map with this Instruction.
//
// Implicitly clears the Instruction's Reference field.
//
// Returns an error if the Instruction is not a map load.
func (ins *Instruction) AssociateMap(m FDer) error {
if !ins.IsLoadFromMap() {
return errors.New("not a load from a map")
}
ins.Metadata.Set(referenceMeta{}, nil)
ins.Metadata.Set(mapMeta{}, m)
return nil
}
// RewriteMapPtr changes an instruction to use a new map fd.
//
// Returns an error if the instruction doesn't load a map.
//
// Deprecated: use AssociateMap instead. If you cannot provide a Map,
// wrap an fd in a type implementing FDer.
func (ins *Instruction) RewriteMapPtr(fd int) error {
if !ins.IsLoadFromMap() {
return errors.New("not a load from a map")
}
ins.encodeMapFD(fd)
return nil
}
func (ins *Instruction) encodeMapFD(fd int) {
// Preserve the offset value for direct map loads.
offset := uint64(ins.Constant) & (math.MaxUint32 << 32)
rawFd := uint64(uint32(fd))
ins.Constant = int64(offset | rawFd)
}
// MapPtr returns the map fd for this instruction.
//
// The result is undefined if the instruction is not a load from a map,
// see IsLoadFromMap.
//
// Deprecated: use Map() instead.
func (ins *Instruction) MapPtr() int {
// If there is a map associated with the instruction, return its FD.
if fd := ins.Metadata.Get(mapMeta{}); fd != nil {
return fd.(FDer).FD()
}
// Fall back to the fd stored in the Constant field
return ins.mapFd()
}
// mapFd returns the map file descriptor stored in the 32 least significant
// bits of ins' Constant field.
func (ins *Instruction) mapFd() int {
return int(int32(ins.Constant))
}
// RewriteMapOffset changes the offset of a direct load from a map.
//
// Returns an error if the instruction is not a direct load.
func (ins *Instruction) RewriteMapOffset(offset uint32) error {
if !ins.OpCode.IsDWordLoad() {
return fmt.Errorf("%s is not a 64 bit load", ins.OpCode)
}
if ins.Src != PseudoMapValue {
return errors.New("not a direct load from a map")
}
fd := uint64(ins.Constant) & math.MaxUint32
ins.Constant = int64(uint64(offset)<<32 | fd)
return nil
}
func (ins *Instruction) mapOffset() uint32 {
return uint32(uint64(ins.Constant) >> 32)
}
// IsLoadFromMap returns true if the instruction loads from a map.
//
// This covers both loading the map pointer and direct map value loads.
func (ins *Instruction) IsLoadFromMap() bool {
return ins.OpCode == LoadImmOp(DWord) && (ins.Src == PseudoMapFD || ins.Src == PseudoMapValue)
}
// IsFunctionCall returns true if the instruction calls another BPF function.
//
// This is not the same thing as a BPF helper call.
func (ins *Instruction) IsFunctionCall() bool {
return ins.OpCode.JumpOp() == Call && ins.Src == PseudoCall
}
// IsLoadOfFunctionPointer returns true if the instruction loads a function pointer.
func (ins *Instruction) IsLoadOfFunctionPointer() bool {
return ins.OpCode.IsDWordLoad() && ins.Src == PseudoFunc
}
// IsFunctionReference returns true if the instruction references another BPF
// function, either by invoking a Call jump operation or by loading a function
// pointer.
func (ins *Instruction) IsFunctionReference() bool {
return ins.IsFunctionCall() || ins.IsLoadOfFunctionPointer()
}
// IsBuiltinCall returns true if the instruction is a built-in call, i.e. BPF helper call.
func (ins *Instruction) IsBuiltinCall() bool {
return ins.OpCode.JumpOp() == Call && ins.Src == R0 && ins.Dst == R0
}
// IsConstantLoad returns true if the instruction loads a constant of the
// given size.
func (ins *Instruction) IsConstantLoad(size Size) bool {
return ins.OpCode == LoadImmOp(size) && ins.Src == R0 && ins.Offset == 0
}
// Format implements fmt.Formatter.
func (ins Instruction) Format(f fmt.State, c rune) {
if c != 'v' {
fmt.Fprintf(f, "{UNRECOGNIZED: %c}", c)
return
}
op := ins.OpCode
if op == InvalidOpCode {
fmt.Fprint(f, "INVALID")
return
}
// Omit trailing space for Exit
if op.JumpOp() == Exit {
fmt.Fprint(f, op)
return
}
if ins.IsLoadFromMap() {
fd := ins.mapFd()
m := ins.Map()
switch ins.Src {
case PseudoMapFD:
if m != nil {
fmt.Fprintf(f, "LoadMapPtr dst: %s map: %s", ins.Dst, m)
} else {
fmt.Fprintf(f, "LoadMapPtr dst: %s fd: %d", ins.Dst, fd)
}
case PseudoMapValue:
if m != nil {
fmt.Fprintf(f, "LoadMapValue dst: %s, map: %s off: %d", ins.Dst, m, ins.mapOffset())
} else {
fmt.Fprintf(f, "LoadMapValue dst: %s, fd: %d off: %d", ins.Dst, fd, ins.mapOffset())
}
}
goto ref
}
fmt.Fprintf(f, "%v ", op)
switch cls := op.Class(); {
case cls.isLoadOrStore():
switch op.Mode() {
case ImmMode:
fmt.Fprintf(f, "dst: %s imm: %d", ins.Dst, ins.Constant)
case AbsMode:
fmt.Fprintf(f, "imm: %d", ins.Constant)
case IndMode:
fmt.Fprintf(f, "dst: %s src: %s imm: %d", ins.Dst, ins.Src, ins.Constant)
case MemMode:
fmt.Fprintf(f, "dst: %s src: %s off: %d imm: %d", ins.Dst, ins.Src, ins.Offset, ins.Constant)
case XAddMode:
fmt.Fprintf(f, "dst: %s src: %s", ins.Dst, ins.Src)
}
case cls.IsALU():
fmt.Fprintf(f, "dst: %s ", ins.Dst)
if op.ALUOp() == Swap || op.Source() == ImmSource {
fmt.Fprintf(f, "imm: %d", ins.Constant)
} else {
fmt.Fprintf(f, "src: %s", ins.Src)
}
case cls.IsJump():
switch jop := op.JumpOp(); jop {
case Call:
if ins.Src == PseudoCall {
// bpf-to-bpf call
fmt.Fprint(f, ins.Constant)
} else {
fmt.Fprint(f, BuiltinFunc(ins.Constant))
}
default:
fmt.Fprintf(f, "dst: %s off: %d ", ins.Dst, ins.Offset)
if op.Source() == ImmSource {
fmt.Fprintf(f, "imm: %d", ins.Constant)
} else {
fmt.Fprintf(f, "src: %s", ins.Src)
}
}
}
ref:
if ins.Reference() != "" {
fmt.Fprintf(f, " <%s>", ins.Reference())
}
}
func (ins Instruction) equal(other Instruction) bool {
return ins.OpCode == other.OpCode &&
ins.Dst == other.Dst &&
ins.Src == other.Src &&
ins.Offset == other.Offset &&
ins.Constant == other.Constant
}
// Size returns the amount of bytes ins would occupy in binary form.
func (ins Instruction) Size() uint64 {
return uint64(InstructionSize * ins.OpCode.rawInstructions())
}
type symbolMeta struct{}
// WithSymbol marks the Instruction as a Symbol, which other Instructions
// can point to using corresponding calls to WithReference.
func (ins Instruction) WithSymbol(name string) Instruction {
ins.Metadata.Set(symbolMeta{}, name)
return ins
}
// Sym creates a symbol.
//
// Deprecated: use WithSymbol instead.
func (ins Instruction) Sym(name string) Instruction {
return ins.WithSymbol(name)
}
// Symbol returns the value ins has been marked with using WithSymbol,
// otherwise returns an empty string. A symbol is often an Instruction
// at the start of a function body.
func (ins Instruction) Symbol() string {
sym, _ := ins.Metadata.Get(symbolMeta{}).(string)
return sym
}
type referenceMeta struct{}
// WithReference makes ins reference another Symbol or map by name.
func (ins Instruction) WithReference(ref string) Instruction {
ins.Metadata.Set(referenceMeta{}, ref)
return ins
}
// Reference returns the Symbol or map name referenced by ins, if any.
func (ins Instruction) Reference() string {
ref, _ := ins.Metadata.Get(referenceMeta{}).(string)
return ref
}
type mapMeta struct{}
// Map returns the Map referenced by ins, if any.
// An Instruction will contain a Map if e.g. it references an existing,
// pinned map that was opened during ELF loading.
func (ins Instruction) Map() FDer {
fd, _ := ins.Metadata.Get(mapMeta{}).(FDer)
return fd
}
type sourceMeta struct{}
// WithSource adds source information about the Instruction.
func (ins Instruction) WithSource(src fmt.Stringer) Instruction {
ins.Metadata.Set(sourceMeta{}, src)
return ins
}
// Source returns source information about the Instruction. The field is
// present when the compiler emits BTF line info about the Instruction and
// usually contains the line of source code responsible for it.
func (ins Instruction) Source() fmt.Stringer {
str, _ := ins.Metadata.Get(sourceMeta{}).(fmt.Stringer)
return str
}
// A Comment can be passed to Instruction.WithSource to add a comment
// to an instruction.
type Comment string
func (s Comment) String() string {
return string(s)
}
// FDer represents a resource tied to an underlying file descriptor.
// Used as a stand-in for e.g. ebpf.Map since that type cannot be
// imported here and FD() is the only method we rely on.
type FDer interface {
FD() int
}
// Instructions is an eBPF program.
type Instructions []Instruction
// Unmarshal unmarshals an Instructions from a binary instruction stream.
// All instructions in insns are replaced by instructions decoded from r.
func (insns *Instructions) Unmarshal(r io.Reader, bo binary.ByteOrder) error {
if len(*insns) > 0 {
*insns = nil
}
var offset uint64
for {
var ins Instruction
n, err := ins.Unmarshal(r, bo)
if errors.Is(err, io.EOF) {
break
}
if err != nil {
return fmt.Errorf("offset %d: %w", offset, err)
}
*insns = append(*insns, ins)
offset += n
}
return nil
}
// Name returns the name of the function insns belongs to, if any.
func (insns Instructions) Name() string {
if len(insns) == 0 {
return ""
}
return insns[0].Symbol()
}
func (insns Instructions) String() string {
return fmt.Sprint(insns)
}
// Size returns the amount of bytes insns would occupy in binary form.
func (insns Instructions) Size() uint64 {
var sum uint64
for _, ins := range insns {
sum += ins.Size()
}
return sum
}
// AssociateMap updates all Instructions that Reference the given symbol
// to point to an existing Map m instead.
//
// Returns ErrUnreferencedSymbol error if no references to symbol are found
// in insns. If symbol is anything else than the symbol name of map (e.g.
// a bpf2bpf subprogram), an error is returned.
func (insns Instructions) AssociateMap(symbol string, m FDer) error {
if symbol == "" {
return errors.New("empty symbol")
}
var found bool
for i := range insns {
ins := &insns[i]
if ins.Reference() != symbol {
continue
}
if err := ins.AssociateMap(m); err != nil {
return err
}
found = true
}
if !found {
return fmt.Errorf("symbol %s: %w", symbol, ErrUnreferencedSymbol)
}
return nil
}
// RewriteMapPtr rewrites all loads of a specific map pointer to a new fd.
//
// Returns ErrUnreferencedSymbol if the symbol isn't used.
//
// Deprecated: use AssociateMap instead.
func (insns Instructions) RewriteMapPtr(symbol string, fd int) error {
if symbol == "" {
return errors.New("empty symbol")
}
var found bool
for i := range insns {
ins := &insns[i]
if ins.Reference() != symbol {
continue
}
if !ins.IsLoadFromMap() {
return errors.New("not a load from a map")
}
ins.encodeMapFD(fd)
found = true
}
if !found {
return fmt.Errorf("symbol %s: %w", symbol, ErrUnreferencedSymbol)
}
return nil
}
// SymbolOffsets returns the set of symbols and their offset in
// the instructions.
func (insns Instructions) SymbolOffsets() (map[string]int, error) {
offsets := make(map[string]int)
for i, ins := range insns {
if ins.Symbol() == "" {
continue
}
if _, ok := offsets[ins.Symbol()]; ok {
return nil, fmt.Errorf("duplicate symbol %s", ins.Symbol())
}
offsets[ins.Symbol()] = i
}
return offsets, nil
}
// FunctionReferences returns a set of symbol names these Instructions make
// bpf-to-bpf calls to.
func (insns Instructions) FunctionReferences() []string {
calls := make(map[string]struct{})
for _, ins := range insns {
if ins.Constant != -1 {
// BPF-to-BPF calls have -1 constants.
continue
}
if ins.Reference() == "" {
continue
}
if !ins.IsFunctionReference() {
continue
}
calls[ins.Reference()] = struct{}{}
}
result := make([]string, 0, len(calls))
for call := range calls {
result = append(result, call)
}
sort.Strings(result)
return result
}
// ReferenceOffsets returns the set of references and their offset in
// the instructions.
func (insns Instructions) ReferenceOffsets() map[string][]int {
offsets := make(map[string][]int)
for i, ins := range insns {
if ins.Reference() == "" {
continue
}
offsets[ins.Reference()] = append(offsets[ins.Reference()], i)
}
return offsets
}
// Format implements fmt.Formatter.
//
// You can control indentation of symbols by
// specifying a width. Setting a precision controls the indentation of
// instructions.
// The default character is a tab, which can be overridden by specifying
// the ' ' space flag.
func (insns Instructions) Format(f fmt.State, c rune) {
if c != 's' && c != 'v' {
fmt.Fprintf(f, "{UNKNOWN FORMAT '%c'}", c)
return
}
// Precision is better in this case, because it allows
// specifying 0 padding easily.
padding, ok := f.Precision()
if !ok {
padding = 1
}
indent := strings.Repeat("\t", padding)
if f.Flag(' ') {
indent = strings.Repeat(" ", padding)
}
symPadding, ok := f.Width()
if !ok {
symPadding = padding - 1
}
if symPadding < 0 {
symPadding = 0
}
symIndent := strings.Repeat("\t", symPadding)
if f.Flag(' ') {
symIndent = strings.Repeat(" ", symPadding)
}
// Guess how many digits we need at most, by assuming that all instructions
// are double wide.
highestOffset := len(insns) * 2
offsetWidth := int(math.Ceil(math.Log10(float64(highestOffset))))
iter := insns.Iterate()
for iter.Next() {
if iter.Ins.Symbol() != "" {
fmt.Fprintf(f, "%s%s:\n", symIndent, iter.Ins.Symbol())
}
if src := iter.Ins.Source(); src != nil {
line := strings.TrimSpace(src.String())
if line != "" {
fmt.Fprintf(f, "%s%*s; %s\n", indent, offsetWidth, " ", line)
}
}
fmt.Fprintf(f, "%s%*d: %v\n", indent, offsetWidth, iter.Offset, iter.Ins)
}
}
// Marshal encodes a BPF program into the kernel format.
//
// insns may be modified if there are unresolved jumps or bpf2bpf calls.
//
// Returns ErrUnsatisfiedProgramReference if there is a Reference Instruction
// without a matching Symbol Instruction within insns.
func (insns Instructions) Marshal(w io.Writer, bo binary.ByteOrder) error {
if err := insns.encodeFunctionReferences(); err != nil {
return err
}
if err := insns.encodeMapPointers(); err != nil {
return err
}
for i, ins := range insns {
if _, err := ins.Marshal(w, bo); err != nil {
return fmt.Errorf("instruction %d: %w", i, err)
}
}
return nil
}
// Tag calculates the kernel tag for a series of instructions.
//
// It mirrors bpf_prog_calc_tag in the kernel and so can be compared
// to ProgramInfo.Tag to figure out whether a loaded program matches
// certain instructions.
func (insns Instructions) Tag(bo binary.ByteOrder) (string, error) {
h := sha1.New()
for i, ins := range insns {
if ins.IsLoadFromMap() {
ins.Constant = 0
}
_, err := ins.Marshal(h, bo)
if err != nil {
return "", fmt.Errorf("instruction %d: %w", i, err)
}
}
return hex.EncodeToString(h.Sum(nil)[:unix.BPF_TAG_SIZE]), nil
}
// encodeFunctionReferences populates the Offset (or Constant, depending on
// the instruction type) field of instructions with a Reference field to point
// to the offset of the corresponding instruction with a matching Symbol field.
//
// Only Reference Instructions that are either jumps or BPF function references
// (calls or function pointer loads) are populated.
//
// Returns ErrUnsatisfiedProgramReference if there is a Reference Instruction
// without at least one corresponding Symbol Instruction within insns.
func (insns Instructions) encodeFunctionReferences() error {
// Index the offsets of instructions tagged as a symbol.
symbolOffsets := make(map[string]RawInstructionOffset)
iter := insns.Iterate()
for iter.Next() {
ins := iter.Ins
if ins.Symbol() == "" {
continue
}
if _, ok := symbolOffsets[ins.Symbol()]; ok {
return fmt.Errorf("duplicate symbol %s", ins.Symbol())
}
symbolOffsets[ins.Symbol()] = iter.Offset
}
// Find all instructions tagged as references to other symbols.
// Depending on the instruction type, populate their constant or offset
// fields to point to the symbol they refer to within the insn stream.
iter = insns.Iterate()
for iter.Next() {
i := iter.Index
offset := iter.Offset
ins := iter.Ins
if ins.Reference() == "" {
continue
}
switch {
case ins.IsFunctionReference() && ins.Constant == -1:
symOffset, ok := symbolOffsets[ins.Reference()]
if !ok {
return fmt.Errorf("%s at insn %d: symbol %q: %w", ins.OpCode, i, ins.Reference(), ErrUnsatisfiedProgramReference)
}
ins.Constant = int64(symOffset - offset - 1)
case ins.OpCode.Class().IsJump() && ins.Offset == -1:
symOffset, ok := symbolOffsets[ins.Reference()]
if !ok {
return fmt.Errorf("%s at insn %d: symbol %q: %w", ins.OpCode, i, ins.Reference(), ErrUnsatisfiedProgramReference)
}
ins.Offset = int16(symOffset - offset - 1)
}
}
return nil
}
// encodeMapPointers finds all Map Instructions and encodes their FDs
// into their Constant fields.
func (insns Instructions) encodeMapPointers() error {
iter := insns.Iterate()
for iter.Next() {
ins := iter.Ins
if !ins.IsLoadFromMap() {
continue
}
m := ins.Map()
if m == nil {
continue
}
fd := m.FD()
if fd < 0 {
return fmt.Errorf("map %s: %w", m, sys.ErrClosedFd)
}
ins.encodeMapFD(m.FD())
}
return nil
}
// Iterate allows iterating a BPF program while keeping track of
// various offsets.
//
// Modifying the instruction slice will lead to undefined behaviour.
func (insns Instructions) Iterate() *InstructionIterator {
return &InstructionIterator{insns: insns}
}
// InstructionIterator iterates over a BPF program.
type InstructionIterator struct {
insns Instructions
// The instruction in question.
Ins *Instruction
// The index of the instruction in the original instruction slice.
Index int
// The offset of the instruction in raw BPF instructions. This accounts
// for double-wide instructions.
Offset RawInstructionOffset
}
// Next returns true as long as there are any instructions remaining.
func (iter *InstructionIterator) Next() bool {
if len(iter.insns) == 0 {
return false
}
if iter.Ins != nil {
iter.Index++
iter.Offset += RawInstructionOffset(iter.Ins.OpCode.rawInstructions())
}
iter.Ins = &iter.insns[0]
iter.insns = iter.insns[1:]
return true
}
type bpfRegisters uint8
func newBPFRegisters(dst, src Register, bo binary.ByteOrder) (bpfRegisters, error) {
switch bo {
case binary.LittleEndian:
return bpfRegisters((src << 4) | (dst & 0xF)), nil
case binary.BigEndian:
return bpfRegisters((dst << 4) | (src & 0xF)), nil
default:
return 0, fmt.Errorf("unrecognized ByteOrder %T", bo)
}
}
// IsUnreferencedSymbol returns true if err was caused by
// an unreferenced symbol.
//
// Deprecated: use errors.Is(err, asm.ErrUnreferencedSymbol).
func IsUnreferencedSymbol(err error) bool {
return errors.Is(err, ErrUnreferencedSymbol)
}

View file

@ -1,127 +0,0 @@
package asm
//go:generate stringer -output jump_string.go -type=JumpOp
// JumpOp affect control flow.
//
// msb lsb
// +----+-+---+
// |OP |s|cls|
// +----+-+---+
type JumpOp uint8
const jumpMask OpCode = aluMask
const (
// InvalidJumpOp is returned by getters when invoked
// on non branch OpCodes
InvalidJumpOp JumpOp = 0xff
// Ja jumps by offset unconditionally
Ja JumpOp = 0x00
// JEq jumps by offset if r == imm
JEq JumpOp = 0x10
// JGT jumps by offset if r > imm
JGT JumpOp = 0x20
// JGE jumps by offset if r >= imm
JGE JumpOp = 0x30
// JSet jumps by offset if r & imm
JSet JumpOp = 0x40
// JNE jumps by offset if r != imm
JNE JumpOp = 0x50
// JSGT jumps by offset if signed r > signed imm
JSGT JumpOp = 0x60
// JSGE jumps by offset if signed r >= signed imm
JSGE JumpOp = 0x70
// Call builtin or user defined function from imm
Call JumpOp = 0x80
// Exit ends execution, with value in r0
Exit JumpOp = 0x90
// JLT jumps by offset if r < imm
JLT JumpOp = 0xa0
// JLE jumps by offset if r <= imm
JLE JumpOp = 0xb0
// JSLT jumps by offset if signed r < signed imm
JSLT JumpOp = 0xc0
// JSLE jumps by offset if signed r <= signed imm
JSLE JumpOp = 0xd0
)
// Return emits an exit instruction.
//
// Requires a return value in R0.
func Return() Instruction {
return Instruction{
OpCode: OpCode(JumpClass).SetJumpOp(Exit),
}
}
// Op returns the OpCode for a given jump source.
func (op JumpOp) Op(source Source) OpCode {
return OpCode(JumpClass).SetJumpOp(op).SetSource(source)
}
// Imm compares 64 bit dst to 64 bit value (sign extended), and adjusts PC by offset if the condition is fulfilled.
func (op JumpOp) Imm(dst Register, value int32, label string) Instruction {
return Instruction{
OpCode: op.opCode(JumpClass, ImmSource),
Dst: dst,
Offset: -1,
Constant: int64(value),
}.WithReference(label)
}
// Imm32 compares 32 bit dst to 32 bit value, and adjusts PC by offset if the condition is fulfilled.
// Requires kernel 5.1.
func (op JumpOp) Imm32(dst Register, value int32, label string) Instruction {
return Instruction{
OpCode: op.opCode(Jump32Class, ImmSource),
Dst: dst,
Offset: -1,
Constant: int64(value),
}.WithReference(label)
}
// Reg compares 64 bit dst to 64 bit src, and adjusts PC by offset if the condition is fulfilled.
func (op JumpOp) Reg(dst, src Register, label string) Instruction {
return Instruction{
OpCode: op.opCode(JumpClass, RegSource),
Dst: dst,
Src: src,
Offset: -1,
}.WithReference(label)
}
// Reg32 compares 32 bit dst to 32 bit src, and adjusts PC by offset if the condition is fulfilled.
// Requires kernel 5.1.
func (op JumpOp) Reg32(dst, src Register, label string) Instruction {
return Instruction{
OpCode: op.opCode(Jump32Class, RegSource),
Dst: dst,
Src: src,
Offset: -1,
}.WithReference(label)
}
func (op JumpOp) opCode(class Class, source Source) OpCode {
if op == Exit || op == Call || op == Ja {
return InvalidOpCode
}
return OpCode(class).SetJumpOp(op).SetSource(source)
}
// Label adjusts PC to the address of the label.
func (op JumpOp) Label(label string) Instruction {
if op == Call {
return Instruction{
OpCode: OpCode(JumpClass).SetJumpOp(Call),
Src: PseudoCall,
Constant: -1,
}.WithReference(label)
}
return Instruction{
OpCode: OpCode(JumpClass).SetJumpOp(op),
Offset: -1,
}.WithReference(label)
}

View file

@ -1,53 +0,0 @@
// Code generated by "stringer -output jump_string.go -type=JumpOp"; DO NOT EDIT.
package asm
import "strconv"
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[InvalidJumpOp-255]
_ = x[Ja-0]
_ = x[JEq-16]
_ = x[JGT-32]
_ = x[JGE-48]
_ = x[JSet-64]
_ = x[JNE-80]
_ = x[JSGT-96]
_ = x[JSGE-112]
_ = x[Call-128]
_ = x[Exit-144]
_ = x[JLT-160]
_ = x[JLE-176]
_ = x[JSLT-192]
_ = x[JSLE-208]
}
const _JumpOp_name = "JaJEqJGTJGEJSetJNEJSGTJSGECallExitJLTJLEJSLTJSLEInvalidJumpOp"
var _JumpOp_map = map[JumpOp]string{
0: _JumpOp_name[0:2],
16: _JumpOp_name[2:5],
32: _JumpOp_name[5:8],
48: _JumpOp_name[8:11],
64: _JumpOp_name[11:15],
80: _JumpOp_name[15:18],
96: _JumpOp_name[18:22],
112: _JumpOp_name[22:26],
128: _JumpOp_name[26:30],
144: _JumpOp_name[30:34],
160: _JumpOp_name[34:37],
176: _JumpOp_name[37:40],
192: _JumpOp_name[40:44],
208: _JumpOp_name[44:48],
255: _JumpOp_name[48:61],
}
func (i JumpOp) String() string {
if str, ok := _JumpOp_map[i]; ok {
return str
}
return "JumpOp(" + strconv.FormatInt(int64(i), 10) + ")"
}

View file

@ -1,204 +0,0 @@
package asm
//go:generate stringer -output load_store_string.go -type=Mode,Size
// Mode for load and store operations
//
// msb lsb
// +---+--+---+
// |MDE|sz|cls|
// +---+--+---+
type Mode uint8
const modeMask OpCode = 0xe0
const (
// InvalidMode is returned by getters when invoked
// on non load / store OpCodes
InvalidMode Mode = 0xff
// ImmMode - immediate value
ImmMode Mode = 0x00
// AbsMode - immediate value + offset
AbsMode Mode = 0x20
// IndMode - indirect (imm+src)
IndMode Mode = 0x40
// MemMode - load from memory
MemMode Mode = 0x60
// XAddMode - add atomically across processors.
XAddMode Mode = 0xc0
)
// Size of load and store operations
//
// msb lsb
// +---+--+---+
// |mde|SZ|cls|
// +---+--+---+
type Size uint8
const sizeMask OpCode = 0x18
const (
// InvalidSize is returned by getters when invoked
// on non load / store OpCodes
InvalidSize Size = 0xff
// DWord - double word; 64 bits
DWord Size = 0x18
// Word - word; 32 bits
Word Size = 0x00
// Half - half-word; 16 bits
Half Size = 0x08
// Byte - byte; 8 bits
Byte Size = 0x10
)
// Sizeof returns the size in bytes.
func (s Size) Sizeof() int {
switch s {
case DWord:
return 8
case Word:
return 4
case Half:
return 2
case Byte:
return 1
default:
return -1
}
}
// LoadMemOp returns the OpCode to load a value of given size from memory.
func LoadMemOp(size Size) OpCode {
return OpCode(LdXClass).SetMode(MemMode).SetSize(size)
}
// LoadMem emits `dst = *(size *)(src + offset)`.
func LoadMem(dst, src Register, offset int16, size Size) Instruction {
return Instruction{
OpCode: LoadMemOp(size),
Dst: dst,
Src: src,
Offset: offset,
}
}
// LoadImmOp returns the OpCode to load an immediate of given size.
//
// As of kernel 4.20, only DWord size is accepted.
func LoadImmOp(size Size) OpCode {
return OpCode(LdClass).SetMode(ImmMode).SetSize(size)
}
// LoadImm emits `dst = (size)value`.
//
// As of kernel 4.20, only DWord size is accepted.
func LoadImm(dst Register, value int64, size Size) Instruction {
return Instruction{
OpCode: LoadImmOp(size),
Dst: dst,
Constant: value,
}
}
// LoadMapPtr stores a pointer to a map in dst.
func LoadMapPtr(dst Register, fd int) Instruction {
if fd < 0 {
return Instruction{OpCode: InvalidOpCode}
}
return Instruction{
OpCode: LoadImmOp(DWord),
Dst: dst,
Src: PseudoMapFD,
Constant: int64(uint32(fd)),
}
}
// LoadMapValue stores a pointer to the value at a certain offset of a map.
func LoadMapValue(dst Register, fd int, offset uint32) Instruction {
if fd < 0 {
return Instruction{OpCode: InvalidOpCode}
}
fdAndOffset := (uint64(offset) << 32) | uint64(uint32(fd))
return Instruction{
OpCode: LoadImmOp(DWord),
Dst: dst,
Src: PseudoMapValue,
Constant: int64(fdAndOffset),
}
}
// LoadIndOp returns the OpCode for loading a value of given size from an sk_buff.
func LoadIndOp(size Size) OpCode {
return OpCode(LdClass).SetMode(IndMode).SetSize(size)
}
// LoadInd emits `dst = ntoh(*(size *)(((sk_buff *)R6)->data + src + offset))`.
func LoadInd(dst, src Register, offset int32, size Size) Instruction {
return Instruction{
OpCode: LoadIndOp(size),
Dst: dst,
Src: src,
Constant: int64(offset),
}
}
// LoadAbsOp returns the OpCode for loading a value of given size from an sk_buff.
func LoadAbsOp(size Size) OpCode {
return OpCode(LdClass).SetMode(AbsMode).SetSize(size)
}
// LoadAbs emits `r0 = ntoh(*(size *)(((sk_buff *)R6)->data + offset))`.
func LoadAbs(offset int32, size Size) Instruction {
return Instruction{
OpCode: LoadAbsOp(size),
Dst: R0,
Constant: int64(offset),
}
}
// StoreMemOp returns the OpCode for storing a register of given size in memory.
func StoreMemOp(size Size) OpCode {
return OpCode(StXClass).SetMode(MemMode).SetSize(size)
}
// StoreMem emits `*(size *)(dst + offset) = src`
func StoreMem(dst Register, offset int16, src Register, size Size) Instruction {
return Instruction{
OpCode: StoreMemOp(size),
Dst: dst,
Src: src,
Offset: offset,
}
}
// StoreImmOp returns the OpCode for storing an immediate of given size in memory.
func StoreImmOp(size Size) OpCode {
return OpCode(StClass).SetMode(MemMode).SetSize(size)
}
// StoreImm emits `*(size *)(dst + offset) = value`.
func StoreImm(dst Register, offset int16, value int64, size Size) Instruction {
return Instruction{
OpCode: StoreImmOp(size),
Dst: dst,
Offset: offset,
Constant: value,
}
}
// StoreXAddOp returns the OpCode to atomically add a register to a value in memory.
func StoreXAddOp(size Size) OpCode {
return OpCode(StXClass).SetMode(XAddMode).SetSize(size)
}
// StoreXAdd atomically adds src to *dst.
func StoreXAdd(dst, src Register, size Size) Instruction {
return Instruction{
OpCode: StoreXAddOp(size),
Dst: dst,
Src: src,
}
}

View file

@ -1,80 +0,0 @@
// Code generated by "stringer -output load_store_string.go -type=Mode,Size"; DO NOT EDIT.
package asm
import "strconv"
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[InvalidMode-255]
_ = x[ImmMode-0]
_ = x[AbsMode-32]
_ = x[IndMode-64]
_ = x[MemMode-96]
_ = x[XAddMode-192]
}
const (
_Mode_name_0 = "ImmMode"
_Mode_name_1 = "AbsMode"
_Mode_name_2 = "IndMode"
_Mode_name_3 = "MemMode"
_Mode_name_4 = "XAddMode"
_Mode_name_5 = "InvalidMode"
)
func (i Mode) String() string {
switch {
case i == 0:
return _Mode_name_0
case i == 32:
return _Mode_name_1
case i == 64:
return _Mode_name_2
case i == 96:
return _Mode_name_3
case i == 192:
return _Mode_name_4
case i == 255:
return _Mode_name_5
default:
return "Mode(" + strconv.FormatInt(int64(i), 10) + ")"
}
}
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[InvalidSize-255]
_ = x[DWord-24]
_ = x[Word-0]
_ = x[Half-8]
_ = x[Byte-16]
}
const (
_Size_name_0 = "Word"
_Size_name_1 = "Half"
_Size_name_2 = "Byte"
_Size_name_3 = "DWord"
_Size_name_4 = "InvalidSize"
)
func (i Size) String() string {
switch {
case i == 0:
return _Size_name_0
case i == 8:
return _Size_name_1
case i == 16:
return _Size_name_2
case i == 24:
return _Size_name_3
case i == 255:
return _Size_name_4
default:
return "Size(" + strconv.FormatInt(int64(i), 10) + ")"
}
}

View file

@ -1,80 +0,0 @@
package asm
// Metadata contains metadata about an instruction.
type Metadata struct {
head *metaElement
}
type metaElement struct {
next *metaElement
key, value interface{}
}
// Find the element containing key.
//
// Returns nil if there is no such element.
func (m *Metadata) find(key interface{}) *metaElement {
for e := m.head; e != nil; e = e.next {
if e.key == key {
return e
}
}
return nil
}
// Remove an element from the linked list.
//
// Copies as many elements of the list as necessary to remove r, but doesn't
// perform a full copy.
func (m *Metadata) remove(r *metaElement) {
current := &m.head
for e := m.head; e != nil; e = e.next {
if e == r {
// We've found the element we want to remove.
*current = e.next
// No need to copy the tail.
return
}
// There is another element in front of the one we want to remove.
// We have to copy it to be able to change metaElement.next.
cpy := &metaElement{key: e.key, value: e.value}
*current = cpy
current = &cpy.next
}
}
// Set a key to a value.
//
// If value is nil, the key is removed. Avoids modifying old metadata by
// copying if necessary.
func (m *Metadata) Set(key, value interface{}) {
if e := m.find(key); e != nil {
if e.value == value {
// Key is present and the value is the same. Nothing to do.
return
}
// Key is present with a different value. Create a copy of the list
// which doesn't have the element in it.
m.remove(e)
}
// m.head is now a linked list that doesn't contain key.
if value == nil {
return
}
m.head = &metaElement{key: key, value: value, next: m.head}
}
// Get the value of a key.
//
// Returns nil if no value with the given key is present.
func (m *Metadata) Get(key interface{}) interface{} {
if e := m.find(key); e != nil {
return e.value
}
return nil
}

View file

@ -1,271 +0,0 @@
package asm
import (
"fmt"
"strings"
)
//go:generate stringer -output opcode_string.go -type=Class
// Class of operations
//
// msb lsb
// +---+--+---+
// | ?? |CLS|
// +---+--+---+
type Class uint8
const classMask OpCode = 0x07
const (
// LdClass loads immediate values into registers.
// Also used for non-standard load operations from cBPF.
LdClass Class = 0x00
// LdXClass loads memory into registers.
LdXClass Class = 0x01
// StClass stores immediate values to memory.
StClass Class = 0x02
// StXClass stores registers to memory.
StXClass Class = 0x03
// ALUClass describes arithmetic operators.
ALUClass Class = 0x04
// JumpClass describes jump operators.
JumpClass Class = 0x05
// Jump32Class describes jump operators with 32-bit comparisons.
// Requires kernel 5.1.
Jump32Class Class = 0x06
// ALU64Class describes arithmetic operators in 64-bit mode.
ALU64Class Class = 0x07
)
// IsLoad checks if this is either LdClass or LdXClass.
func (cls Class) IsLoad() bool {
return cls == LdClass || cls == LdXClass
}
// IsStore checks if this is either StClass or StXClass.
func (cls Class) IsStore() bool {
return cls == StClass || cls == StXClass
}
func (cls Class) isLoadOrStore() bool {
return cls.IsLoad() || cls.IsStore()
}
// IsALU checks if this is either ALUClass or ALU64Class.
func (cls Class) IsALU() bool {
return cls == ALUClass || cls == ALU64Class
}
// IsJump checks if this is either JumpClass or Jump32Class.
func (cls Class) IsJump() bool {
return cls == JumpClass || cls == Jump32Class
}
func (cls Class) isJumpOrALU() bool {
return cls.IsJump() || cls.IsALU()
}
// OpCode is a packed eBPF opcode.
//
// Its encoding is defined by a Class value:
//
// msb lsb
// +----+-+---+
// | ???? |CLS|
// +----+-+---+
type OpCode uint8
// InvalidOpCode is returned by setters on OpCode
const InvalidOpCode OpCode = 0xff
// rawInstructions returns the number of BPF instructions required
// to encode this opcode.
func (op OpCode) rawInstructions() int {
if op.IsDWordLoad() {
return 2
}
return 1
}
func (op OpCode) IsDWordLoad() bool {
return op == LoadImmOp(DWord)
}
// Class returns the class of operation.
func (op OpCode) Class() Class {
return Class(op & classMask)
}
// Mode returns the mode for load and store operations.
func (op OpCode) Mode() Mode {
if !op.Class().isLoadOrStore() {
return InvalidMode
}
return Mode(op & modeMask)
}
// Size returns the size for load and store operations.
func (op OpCode) Size() Size {
if !op.Class().isLoadOrStore() {
return InvalidSize
}
return Size(op & sizeMask)
}
// Source returns the source for branch and ALU operations.
func (op OpCode) Source() Source {
if !op.Class().isJumpOrALU() || op.ALUOp() == Swap {
return InvalidSource
}
return Source(op & sourceMask)
}
// ALUOp returns the ALUOp.
func (op OpCode) ALUOp() ALUOp {
if !op.Class().IsALU() {
return InvalidALUOp
}
return ALUOp(op & aluMask)
}
// Endianness returns the Endianness for a byte swap instruction.
func (op OpCode) Endianness() Endianness {
if op.ALUOp() != Swap {
return InvalidEndian
}
return Endianness(op & endianMask)
}
// JumpOp returns the JumpOp.
// Returns InvalidJumpOp if it doesn't encode a jump.
func (op OpCode) JumpOp() JumpOp {
if !op.Class().IsJump() {
return InvalidJumpOp
}
jumpOp := JumpOp(op & jumpMask)
// Some JumpOps are only supported by JumpClass, not Jump32Class.
if op.Class() == Jump32Class && (jumpOp == Exit || jumpOp == Call || jumpOp == Ja) {
return InvalidJumpOp
}
return jumpOp
}
// SetMode sets the mode on load and store operations.
//
// Returns InvalidOpCode if op is of the wrong class.
func (op OpCode) SetMode(mode Mode) OpCode {
if !op.Class().isLoadOrStore() || !valid(OpCode(mode), modeMask) {
return InvalidOpCode
}
return (op & ^modeMask) | OpCode(mode)
}
// SetSize sets the size on load and store operations.
//
// Returns InvalidOpCode if op is of the wrong class.
func (op OpCode) SetSize(size Size) OpCode {
if !op.Class().isLoadOrStore() || !valid(OpCode(size), sizeMask) {
return InvalidOpCode
}
return (op & ^sizeMask) | OpCode(size)
}
// SetSource sets the source on jump and ALU operations.
//
// Returns InvalidOpCode if op is of the wrong class.
func (op OpCode) SetSource(source Source) OpCode {
if !op.Class().isJumpOrALU() || !valid(OpCode(source), sourceMask) {
return InvalidOpCode
}
return (op & ^sourceMask) | OpCode(source)
}
// SetALUOp sets the ALUOp on ALU operations.
//
// Returns InvalidOpCode if op is of the wrong class.
func (op OpCode) SetALUOp(alu ALUOp) OpCode {
if !op.Class().IsALU() || !valid(OpCode(alu), aluMask) {
return InvalidOpCode
}
return (op & ^aluMask) | OpCode(alu)
}
// SetJumpOp sets the JumpOp on jump operations.
//
// Returns InvalidOpCode if op is of the wrong class.
func (op OpCode) SetJumpOp(jump JumpOp) OpCode {
if !op.Class().IsJump() || !valid(OpCode(jump), jumpMask) {
return InvalidOpCode
}
newOp := (op & ^jumpMask) | OpCode(jump)
// Check newOp is legal.
if newOp.JumpOp() == InvalidJumpOp {
return InvalidOpCode
}
return newOp
}
func (op OpCode) String() string {
var f strings.Builder
switch class := op.Class(); {
case class.isLoadOrStore():
f.WriteString(strings.TrimSuffix(class.String(), "Class"))
mode := op.Mode()
f.WriteString(strings.TrimSuffix(mode.String(), "Mode"))
switch op.Size() {
case DWord:
f.WriteString("DW")
case Word:
f.WriteString("W")
case Half:
f.WriteString("H")
case Byte:
f.WriteString("B")
}
case class.IsALU():
f.WriteString(op.ALUOp().String())
if op.ALUOp() == Swap {
// Width for Endian is controlled by Constant
f.WriteString(op.Endianness().String())
} else {
if class == ALUClass {
f.WriteString("32")
}
f.WriteString(strings.TrimSuffix(op.Source().String(), "Source"))
}
case class.IsJump():
f.WriteString(op.JumpOp().String())
if class == Jump32Class {
f.WriteString("32")
}
if jop := op.JumpOp(); jop != Exit && jop != Call {
f.WriteString(strings.TrimSuffix(op.Source().String(), "Source"))
}
default:
fmt.Fprintf(&f, "OpCode(%#x)", uint8(op))
}
return f.String()
}
// valid returns true if all bits in value are covered by mask.
func valid(value, mask OpCode) bool {
return value & ^mask == 0
}

View file

@ -1,30 +0,0 @@
// Code generated by "stringer -output opcode_string.go -type=Class"; DO NOT EDIT.
package asm
import "strconv"
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[LdClass-0]
_ = x[LdXClass-1]
_ = x[StClass-2]
_ = x[StXClass-3]
_ = x[ALUClass-4]
_ = x[JumpClass-5]
_ = x[Jump32Class-6]
_ = x[ALU64Class-7]
}
const _Class_name = "LdClassLdXClassStClassStXClassALUClassJumpClassJump32ClassALU64Class"
var _Class_index = [...]uint8{0, 7, 15, 22, 30, 38, 47, 58, 68}
func (i Class) String() string {
if i >= Class(len(_Class_index)-1) {
return "Class(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _Class_name[_Class_index[i]:_Class_index[i+1]]
}

View file

@ -1,50 +0,0 @@
package asm
import (
"fmt"
)
// Register is the source or destination of most operations.
type Register uint8
// R0 contains return values.
const R0 Register = 0
// Registers for function arguments.
const (
R1 Register = R0 + 1 + iota
R2
R3
R4
R5
)
// Callee saved registers preserved by function calls.
const (
R6 Register = R5 + 1 + iota
R7
R8
R9
)
// Read-only frame pointer to access stack.
const (
R10 Register = R9 + 1
RFP = R10
)
// Pseudo registers used by 64bit loads and jumps
const (
PseudoMapFD = R1 // BPF_PSEUDO_MAP_FD
PseudoMapValue = R2 // BPF_PSEUDO_MAP_VALUE
PseudoCall = R1 // BPF_PSEUDO_CALL
PseudoFunc = R4 // BPF_PSEUDO_FUNC
)
func (r Register) String() string {
v := uint8(r)
if v == 10 {
return "rfp"
}
return fmt.Sprintf("r%d", v)
}

View file

@ -1,65 +0,0 @@
// Code generated by "stringer -type AttachType -trimprefix Attach"; DO NOT EDIT.
package ebpf
import "strconv"
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[AttachNone-0]
_ = x[AttachCGroupInetIngress-0]
_ = x[AttachCGroupInetEgress-1]
_ = x[AttachCGroupInetSockCreate-2]
_ = x[AttachCGroupSockOps-3]
_ = x[AttachSkSKBStreamParser-4]
_ = x[AttachSkSKBStreamVerdict-5]
_ = x[AttachCGroupDevice-6]
_ = x[AttachSkMsgVerdict-7]
_ = x[AttachCGroupInet4Bind-8]
_ = x[AttachCGroupInet6Bind-9]
_ = x[AttachCGroupInet4Connect-10]
_ = x[AttachCGroupInet6Connect-11]
_ = x[AttachCGroupInet4PostBind-12]
_ = x[AttachCGroupInet6PostBind-13]
_ = x[AttachCGroupUDP4Sendmsg-14]
_ = x[AttachCGroupUDP6Sendmsg-15]
_ = x[AttachLircMode2-16]
_ = x[AttachFlowDissector-17]
_ = x[AttachCGroupSysctl-18]
_ = x[AttachCGroupUDP4Recvmsg-19]
_ = x[AttachCGroupUDP6Recvmsg-20]
_ = x[AttachCGroupGetsockopt-21]
_ = x[AttachCGroupSetsockopt-22]
_ = x[AttachTraceRawTp-23]
_ = x[AttachTraceFEntry-24]
_ = x[AttachTraceFExit-25]
_ = x[AttachModifyReturn-26]
_ = x[AttachLSMMac-27]
_ = x[AttachTraceIter-28]
_ = x[AttachCgroupInet4GetPeername-29]
_ = x[AttachCgroupInet6GetPeername-30]
_ = x[AttachCgroupInet4GetSockname-31]
_ = x[AttachCgroupInet6GetSockname-32]
_ = x[AttachXDPDevMap-33]
_ = x[AttachCgroupInetSockRelease-34]
_ = x[AttachXDPCPUMap-35]
_ = x[AttachSkLookup-36]
_ = x[AttachXDP-37]
_ = x[AttachSkSKBVerdict-38]
_ = x[AttachSkReuseportSelect-39]
_ = x[AttachSkReuseportSelectOrMigrate-40]
_ = x[AttachPerfEvent-41]
}
const _AttachType_name = "NoneCGroupInetEgressCGroupInetSockCreateCGroupSockOpsSkSKBStreamParserSkSKBStreamVerdictCGroupDeviceSkMsgVerdictCGroupInet4BindCGroupInet6BindCGroupInet4ConnectCGroupInet6ConnectCGroupInet4PostBindCGroupInet6PostBindCGroupUDP4SendmsgCGroupUDP6SendmsgLircMode2FlowDissectorCGroupSysctlCGroupUDP4RecvmsgCGroupUDP6RecvmsgCGroupGetsockoptCGroupSetsockoptTraceRawTpTraceFEntryTraceFExitModifyReturnLSMMacTraceIterCgroupInet4GetPeernameCgroupInet6GetPeernameCgroupInet4GetSocknameCgroupInet6GetSocknameXDPDevMapCgroupInetSockReleaseXDPCPUMapSkLookupXDPSkSKBVerdictSkReuseportSelectSkReuseportSelectOrMigratePerfEvent"
var _AttachType_index = [...]uint16{0, 4, 20, 40, 53, 70, 88, 100, 112, 127, 142, 160, 178, 197, 216, 233, 250, 259, 272, 284, 301, 318, 334, 350, 360, 371, 381, 393, 399, 408, 430, 452, 474, 496, 505, 526, 535, 543, 546, 558, 575, 601, 610}
func (i AttachType) String() string {
if i >= AttachType(len(_AttachType_index)-1) {
return "AttachType(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _AttachType_name[_AttachType_index[i]:_AttachType_index[i+1]]
}

View file

@ -1,897 +0,0 @@
package btf
import (
"bufio"
"bytes"
"debug/elf"
"encoding/binary"
"errors"
"fmt"
"io"
"math"
"os"
"reflect"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/sys"
"github.com/cilium/ebpf/internal/unix"
)
const btfMagic = 0xeB9F
// Errors returned by BTF functions.
var (
ErrNotSupported = internal.ErrNotSupported
ErrNotFound = errors.New("not found")
ErrNoExtendedInfo = errors.New("no extended info")
)
// ID represents the unique ID of a BTF object.
type ID = sys.BTFID
// Spec represents decoded BTF.
type Spec struct {
// Data from .BTF.
rawTypes []rawType
strings *stringTable
// All types contained by the spec. For the base type, the position of
// a type in the slice is its ID.
types types
// Type IDs indexed by type.
typeIDs map[Type]TypeID
// Types indexed by essential name.
// Includes all struct flavors and types with the same name.
namedTypes map[essentialName][]Type
byteOrder binary.ByteOrder
}
type btfHeader struct {
Magic uint16
Version uint8
Flags uint8
HdrLen uint32
TypeOff uint32
TypeLen uint32
StringOff uint32
StringLen uint32
}
// typeStart returns the offset from the beginning of the .BTF section
// to the start of its type entries.
func (h *btfHeader) typeStart() int64 {
return int64(h.HdrLen + h.TypeOff)
}
// stringStart returns the offset from the beginning of the .BTF section
// to the start of its string table.
func (h *btfHeader) stringStart() int64 {
return int64(h.HdrLen + h.StringOff)
}
// LoadSpec opens file and calls LoadSpecFromReader on it.
func LoadSpec(file string) (*Spec, error) {
fh, err := os.Open(file)
if err != nil {
return nil, err
}
defer fh.Close()
return LoadSpecFromReader(fh)
}
// LoadSpecFromReader reads from an ELF or a raw BTF blob.
//
// Returns ErrNotFound if reading from an ELF which contains no BTF. ExtInfos
// may be nil.
func LoadSpecFromReader(rd io.ReaderAt) (*Spec, error) {
file, err := internal.NewSafeELFFile(rd)
if err != nil {
if bo := guessRawBTFByteOrder(rd); bo != nil {
// Try to parse a naked BTF blob. This will return an error if
// we encounter a Datasec, since we can't fix it up.
spec, err := loadRawSpec(io.NewSectionReader(rd, 0, math.MaxInt64), bo, nil, nil)
return spec, err
}
return nil, err
}
return loadSpecFromELF(file)
}
// LoadSpecAndExtInfosFromReader reads from an ELF.
//
// ExtInfos may be nil if the ELF doesn't contain section metadta.
// Returns ErrNotFound if the ELF contains no BTF.
func LoadSpecAndExtInfosFromReader(rd io.ReaderAt) (*Spec, *ExtInfos, error) {
file, err := internal.NewSafeELFFile(rd)
if err != nil {
return nil, nil, err
}
spec, err := loadSpecFromELF(file)
if err != nil {
return nil, nil, err
}
extInfos, err := loadExtInfosFromELF(file, spec.types, spec.strings)
if err != nil && !errors.Is(err, ErrNotFound) {
return nil, nil, err
}
return spec, extInfos, nil
}
// variableOffsets extracts all symbols offsets from an ELF and indexes them by
// section and variable name.
//
// References to variables in BTF data sections carry unsigned 32-bit offsets.
// Some ELF symbols (e.g. in vmlinux) may point to virtual memory that is well
// beyond this range. Since these symbols cannot be described by BTF info,
// ignore them here.
func variableOffsets(file *internal.SafeELFFile) (map[variable]uint32, error) {
symbols, err := file.Symbols()
if err != nil {
return nil, fmt.Errorf("can't read symbols: %v", err)
}
variableOffsets := make(map[variable]uint32)
for _, symbol := range symbols {
if idx := symbol.Section; idx >= elf.SHN_LORESERVE && idx <= elf.SHN_HIRESERVE {
// Ignore things like SHN_ABS
continue
}
if symbol.Value > math.MaxUint32 {
// VarSecinfo offset is u32, cannot reference symbols in higher regions.
continue
}
if int(symbol.Section) >= len(file.Sections) {
return nil, fmt.Errorf("symbol %s: invalid section %d", symbol.Name, symbol.Section)
}
secName := file.Sections[symbol.Section].Name
variableOffsets[variable{secName, symbol.Name}] = uint32(symbol.Value)
}
return variableOffsets, nil
}
func loadSpecFromELF(file *internal.SafeELFFile) (*Spec, error) {
var (
btfSection *elf.Section
sectionSizes = make(map[string]uint32)
)
for _, sec := range file.Sections {
switch sec.Name {
case ".BTF":
btfSection = sec
default:
if sec.Type != elf.SHT_PROGBITS && sec.Type != elf.SHT_NOBITS {
break
}
if sec.Size > math.MaxUint32 {
return nil, fmt.Errorf("section %s exceeds maximum size", sec.Name)
}
sectionSizes[sec.Name] = uint32(sec.Size)
}
}
if btfSection == nil {
return nil, fmt.Errorf("btf: %w", ErrNotFound)
}
vars, err := variableOffsets(file)
if err != nil {
return nil, err
}
if btfSection.ReaderAt == nil {
return nil, fmt.Errorf("compressed BTF is not supported")
}
rawTypes, rawStrings, err := parseBTF(btfSection.ReaderAt, file.ByteOrder, nil)
if err != nil {
return nil, err
}
err = fixupDatasec(rawTypes, rawStrings, sectionSizes, vars)
if err != nil {
return nil, err
}
return inflateSpec(rawTypes, rawStrings, file.ByteOrder, nil)
}
func loadRawSpec(btf io.ReaderAt, bo binary.ByteOrder,
baseTypes types, baseStrings *stringTable) (*Spec, error) {
rawTypes, rawStrings, err := parseBTF(btf, bo, baseStrings)
if err != nil {
return nil, err
}
return inflateSpec(rawTypes, rawStrings, bo, baseTypes)
}
func inflateSpec(rawTypes []rawType, rawStrings *stringTable, bo binary.ByteOrder,
baseTypes types) (*Spec, error) {
types, err := inflateRawTypes(rawTypes, baseTypes, rawStrings)
if err != nil {
return nil, err
}
typeIDs, typesByName := indexTypes(types, TypeID(len(baseTypes)))
return &Spec{
rawTypes: rawTypes,
namedTypes: typesByName,
typeIDs: typeIDs,
types: types,
strings: rawStrings,
byteOrder: bo,
}, nil
}
func indexTypes(types []Type, typeIDOffset TypeID) (map[Type]TypeID, map[essentialName][]Type) {
namedTypes := 0
for _, typ := range types {
if typ.TypeName() != "" {
// Do a pre-pass to figure out how big types by name has to be.
// Most types have unique names, so it's OK to ignore essentialName
// here.
namedTypes++
}
}
typeIDs := make(map[Type]TypeID, len(types))
typesByName := make(map[essentialName][]Type, namedTypes)
for i, typ := range types {
if name := newEssentialName(typ.TypeName()); name != "" {
typesByName[name] = append(typesByName[name], typ)
}
typeIDs[typ] = TypeID(i) + typeIDOffset
}
return typeIDs, typesByName
}
// LoadKernelSpec returns the current kernel's BTF information.
//
// Defaults to /sys/kernel/btf/vmlinux and falls back to scanning the file system
// for vmlinux ELFs. Returns an error wrapping ErrNotSupported if BTF is not enabled.
func LoadKernelSpec() (*Spec, error) {
fh, err := os.Open("/sys/kernel/btf/vmlinux")
if err == nil {
defer fh.Close()
return loadRawSpec(fh, internal.NativeEndian, nil, nil)
}
file, err := findVMLinux()
if err != nil {
return nil, err
}
defer file.Close()
return loadSpecFromELF(file)
}
// findVMLinux scans multiple well-known paths for vmlinux kernel images.
func findVMLinux() (*internal.SafeELFFile, error) {
release, err := internal.KernelRelease()
if err != nil {
return nil, err
}
// use same list of locations as libbpf
// https://github.com/libbpf/libbpf/blob/9a3a42608dbe3731256a5682a125ac1e23bced8f/src/btf.c#L3114-L3122
locations := []string{
"/boot/vmlinux-%s",
"/lib/modules/%s/vmlinux-%[1]s",
"/lib/modules/%s/build/vmlinux",
"/usr/lib/modules/%s/kernel/vmlinux",
"/usr/lib/debug/boot/vmlinux-%s",
"/usr/lib/debug/boot/vmlinux-%s.debug",
"/usr/lib/debug/lib/modules/%s/vmlinux",
}
for _, loc := range locations {
file, err := internal.OpenSafeELFFile(fmt.Sprintf(loc, release))
if errors.Is(err, os.ErrNotExist) {
continue
}
return file, err
}
return nil, fmt.Errorf("no BTF found for kernel version %s: %w", release, internal.ErrNotSupported)
}
// parseBTFHeader parses the header of the .BTF section.
func parseBTFHeader(r io.Reader, bo binary.ByteOrder) (*btfHeader, error) {
var header btfHeader
if err := binary.Read(r, bo, &header); err != nil {
return nil, fmt.Errorf("can't read header: %v", err)
}
if header.Magic != btfMagic {
return nil, fmt.Errorf("incorrect magic value %v", header.Magic)
}
if header.Version != 1 {
return nil, fmt.Errorf("unexpected version %v", header.Version)
}
if header.Flags != 0 {
return nil, fmt.Errorf("unsupported flags %v", header.Flags)
}
remainder := int64(header.HdrLen) - int64(binary.Size(&header))
if remainder < 0 {
return nil, errors.New("header length shorter than btfHeader size")
}
if _, err := io.CopyN(internal.DiscardZeroes{}, r, remainder); err != nil {
return nil, fmt.Errorf("header padding: %v", err)
}
return &header, nil
}
func guessRawBTFByteOrder(r io.ReaderAt) binary.ByteOrder {
buf := new(bufio.Reader)
for _, bo := range []binary.ByteOrder{
binary.LittleEndian,
binary.BigEndian,
} {
buf.Reset(io.NewSectionReader(r, 0, math.MaxInt64))
if _, err := parseBTFHeader(buf, bo); err == nil {
return bo
}
}
return nil
}
// parseBTF reads a .BTF section into memory and parses it into a list of
// raw types and a string table.
func parseBTF(btf io.ReaderAt, bo binary.ByteOrder, baseStrings *stringTable) ([]rawType, *stringTable, error) {
buf := internal.NewBufferedSectionReader(btf, 0, math.MaxInt64)
header, err := parseBTFHeader(buf, bo)
if err != nil {
return nil, nil, fmt.Errorf("parsing .BTF header: %v", err)
}
rawStrings, err := readStringTable(io.NewSectionReader(btf, header.stringStart(), int64(header.StringLen)),
baseStrings)
if err != nil {
return nil, nil, fmt.Errorf("can't read type names: %w", err)
}
buf.Reset(io.NewSectionReader(btf, header.typeStart(), int64(header.TypeLen)))
rawTypes, err := readTypes(buf, bo, header.TypeLen)
if err != nil {
return nil, nil, fmt.Errorf("can't read types: %w", err)
}
return rawTypes, rawStrings, nil
}
type variable struct {
section string
name string
}
func fixupDatasec(rawTypes []rawType, rawStrings *stringTable, sectionSizes map[string]uint32, variableOffsets map[variable]uint32) error {
for i, rawType := range rawTypes {
if rawType.Kind() != kindDatasec {
continue
}
name, err := rawStrings.Lookup(rawType.NameOff)
if err != nil {
return err
}
if name == ".kconfig" || name == ".ksyms" {
return fmt.Errorf("reference to %s: %w", name, ErrNotSupported)
}
if rawTypes[i].SizeType != 0 {
continue
}
size, ok := sectionSizes[name]
if !ok {
return fmt.Errorf("data section %s: missing size", name)
}
rawTypes[i].SizeType = size
secinfos := rawType.data.([]btfVarSecinfo)
for j, secInfo := range secinfos {
id := int(secInfo.Type - 1)
if id >= len(rawTypes) {
return fmt.Errorf("data section %s: invalid type id %d for variable %d", name, id, j)
}
varName, err := rawStrings.Lookup(rawTypes[id].NameOff)
if err != nil {
return fmt.Errorf("data section %s: can't get name for type %d: %w", name, id, err)
}
offset, ok := variableOffsets[variable{name, varName}]
if !ok {
return fmt.Errorf("data section %s: missing offset for variable %s", name, varName)
}
secinfos[j].Offset = offset
}
}
return nil
}
// Copy creates a copy of Spec.
func (s *Spec) Copy() *Spec {
types := copyTypes(s.types, nil)
typeIDOffset := TypeID(0)
if len(s.types) != 0 {
typeIDOffset = s.typeIDs[s.types[0]]
}
typeIDs, typesByName := indexTypes(types, typeIDOffset)
// NB: Other parts of spec are not copied since they are immutable.
return &Spec{
s.rawTypes,
s.strings,
types,
typeIDs,
typesByName,
s.byteOrder,
}
}
type marshalOpts struct {
ByteOrder binary.ByteOrder
StripFuncLinkage bool
}
func (s *Spec) marshal(opts marshalOpts) ([]byte, error) {
var (
buf bytes.Buffer
header = new(btfHeader)
headerLen = binary.Size(header)
)
// Reserve space for the header. We have to write it last since
// we don't know the size of the type section yet.
_, _ = buf.Write(make([]byte, headerLen))
// Write type section, just after the header.
for _, raw := range s.rawTypes {
switch {
case opts.StripFuncLinkage && raw.Kind() == kindFunc:
raw.SetLinkage(StaticFunc)
}
if err := raw.Marshal(&buf, opts.ByteOrder); err != nil {
return nil, fmt.Errorf("can't marshal BTF: %w", err)
}
}
typeLen := uint32(buf.Len() - headerLen)
// Write string section after type section.
stringsLen := s.strings.Length()
buf.Grow(stringsLen)
if err := s.strings.Marshal(&buf); err != nil {
return nil, err
}
// Fill out the header, and write it out.
header = &btfHeader{
Magic: btfMagic,
Version: 1,
Flags: 0,
HdrLen: uint32(headerLen),
TypeOff: 0,
TypeLen: typeLen,
StringOff: typeLen,
StringLen: uint32(stringsLen),
}
raw := buf.Bytes()
err := binary.Write(sliceWriter(raw[:headerLen]), opts.ByteOrder, header)
if err != nil {
return nil, fmt.Errorf("can't write header: %v", err)
}
return raw, nil
}
type sliceWriter []byte
func (sw sliceWriter) Write(p []byte) (int, error) {
if len(p) != len(sw) {
return 0, errors.New("size doesn't match")
}
return copy(sw, p), nil
}
// TypeByID returns the BTF Type with the given type ID.
//
// Returns an error wrapping ErrNotFound if a Type with the given ID
// does not exist in the Spec.
func (s *Spec) TypeByID(id TypeID) (Type, error) {
return s.types.ByID(id)
}
// TypeID returns the ID for a given Type.
//
// Returns an error wrapping ErrNoFound if the type isn't part of the Spec.
func (s *Spec) TypeID(typ Type) (TypeID, error) {
if _, ok := typ.(*Void); ok {
// Equality is weird for void, since it is a zero sized type.
return 0, nil
}
id, ok := s.typeIDs[typ]
if !ok {
return 0, fmt.Errorf("no ID for type %s: %w", typ, ErrNotFound)
}
return id, nil
}
// AnyTypesByName returns a list of BTF Types with the given name.
//
// If the BTF blob describes multiple compilation units like vmlinux, multiple
// Types with the same name and kind can exist, but might not describe the same
// data structure.
//
// Returns an error wrapping ErrNotFound if no matching Type exists in the Spec.
func (s *Spec) AnyTypesByName(name string) ([]Type, error) {
types := s.namedTypes[newEssentialName(name)]
if len(types) == 0 {
return nil, fmt.Errorf("type name %s: %w", name, ErrNotFound)
}
// Return a copy to prevent changes to namedTypes.
result := make([]Type, 0, len(types))
for _, t := range types {
// Match against the full name, not just the essential one
// in case the type being looked up is a struct flavor.
if t.TypeName() == name {
result = append(result, t)
}
}
return result, nil
}
// AnyTypeByName returns a Type with the given name.
//
// Returns an error if multiple types of that name exist.
func (s *Spec) AnyTypeByName(name string) (Type, error) {
types, err := s.AnyTypesByName(name)
if err != nil {
return nil, err
}
if len(types) > 1 {
return nil, fmt.Errorf("found multiple types: %v", types)
}
return types[0], nil
}
// TypeByName searches for a Type with a specific name. Since multiple
// Types with the same name can exist, the parameter typ is taken to
// narrow down the search in case of a clash.
//
// typ must be a non-nil pointer to an implementation of a Type.
// On success, the address of the found Type will be copied to typ.
//
// Returns an error wrapping ErrNotFound if no matching
// Type exists in the Spec. If multiple candidates are found,
// an error is returned.
func (s *Spec) TypeByName(name string, typ interface{}) error {
typValue := reflect.ValueOf(typ)
if typValue.Kind() != reflect.Ptr {
return fmt.Errorf("%T is not a pointer", typ)
}
typPtr := typValue.Elem()
if !typPtr.CanSet() {
return fmt.Errorf("%T cannot be set", typ)
}
wanted := typPtr.Type()
if !wanted.AssignableTo(reflect.TypeOf((*Type)(nil)).Elem()) {
return fmt.Errorf("%T does not satisfy Type interface", typ)
}
types, err := s.AnyTypesByName(name)
if err != nil {
return err
}
var candidate Type
for _, typ := range types {
if reflect.TypeOf(typ) != wanted {
continue
}
if candidate != nil {
return fmt.Errorf("type %s: multiple candidates for %T", name, typ)
}
candidate = typ
}
if candidate == nil {
return fmt.Errorf("type %s: %w", name, ErrNotFound)
}
typPtr.Set(reflect.ValueOf(candidate))
return nil
}
// LoadSplitSpecFromReader loads split BTF from a reader.
//
// Types from base are used to resolve references in the split BTF.
// The returned Spec only contains types from the split BTF, not from the base.
func LoadSplitSpecFromReader(r io.ReaderAt, base *Spec) (*Spec, error) {
return loadRawSpec(r, internal.NativeEndian, base.types, base.strings)
}
// TypesIterator iterates over types of a given spec.
type TypesIterator struct {
spec *Spec
index int
// The last visited type in the spec.
Type Type
}
// Iterate returns the types iterator.
func (s *Spec) Iterate() *TypesIterator {
return &TypesIterator{spec: s, index: 0}
}
// Next returns true as long as there are any remaining types.
func (iter *TypesIterator) Next() bool {
if len(iter.spec.types) <= iter.index {
return false
}
iter.Type = iter.spec.types[iter.index]
iter.index++
return true
}
// Handle is a reference to BTF loaded into the kernel.
type Handle struct {
fd *sys.FD
// Size of the raw BTF in bytes.
size uint32
}
// NewHandle loads BTF into the kernel.
//
// Returns ErrNotSupported if BTF is not supported.
func NewHandle(spec *Spec) (*Handle, error) {
if err := haveBTF(); err != nil {
return nil, err
}
if spec.byteOrder != internal.NativeEndian {
return nil, fmt.Errorf("can't load %s BTF on %s", spec.byteOrder, internal.NativeEndian)
}
btf, err := spec.marshal(marshalOpts{
ByteOrder: internal.NativeEndian,
StripFuncLinkage: haveFuncLinkage() != nil,
})
if err != nil {
return nil, fmt.Errorf("can't marshal BTF: %w", err)
}
if uint64(len(btf)) > math.MaxUint32 {
return nil, errors.New("BTF exceeds the maximum size")
}
attr := &sys.BtfLoadAttr{
Btf: sys.NewSlicePointer(btf),
BtfSize: uint32(len(btf)),
}
fd, err := sys.BtfLoad(attr)
if err != nil {
logBuf := make([]byte, 64*1024)
attr.BtfLogBuf = sys.NewSlicePointer(logBuf)
attr.BtfLogSize = uint32(len(logBuf))
attr.BtfLogLevel = 1
// NB: The syscall will never return ENOSPC as of 5.18-rc4.
_, _ = sys.BtfLoad(attr)
return nil, internal.ErrorWithLog(err, logBuf)
}
return &Handle{fd, attr.BtfSize}, nil
}
// NewHandleFromID returns the BTF handle for a given id.
//
// Prefer calling [ebpf.Program.Handle] or [ebpf.Map.Handle] if possible.
//
// Returns ErrNotExist, if there is no BTF with the given id.
//
// Requires CAP_SYS_ADMIN.
func NewHandleFromID(id ID) (*Handle, error) {
fd, err := sys.BtfGetFdById(&sys.BtfGetFdByIdAttr{
Id: uint32(id),
})
if err != nil {
return nil, fmt.Errorf("get FD for ID %d: %w", id, err)
}
info, err := newHandleInfoFromFD(fd)
if err != nil {
_ = fd.Close()
return nil, err
}
return &Handle{fd, info.size}, nil
}
// Spec parses the kernel BTF into Go types.
//
// base is used to decode split BTF and may be nil.
func (h *Handle) Spec(base *Spec) (*Spec, error) {
var btfInfo sys.BtfInfo
btfBuffer := make([]byte, h.size)
btfInfo.Btf, btfInfo.BtfSize = sys.NewSlicePointerLen(btfBuffer)
if err := sys.ObjInfo(h.fd, &btfInfo); err != nil {
return nil, err
}
var baseTypes types
var baseStrings *stringTable
if base != nil {
baseTypes = base.types
baseStrings = base.strings
}
return loadRawSpec(bytes.NewReader(btfBuffer), internal.NativeEndian, baseTypes, baseStrings)
}
// Close destroys the handle.
//
// Subsequent calls to FD will return an invalid value.
func (h *Handle) Close() error {
if h == nil {
return nil
}
return h.fd.Close()
}
// FD returns the file descriptor for the handle.
func (h *Handle) FD() int {
return h.fd.Int()
}
// Info returns metadata about the handle.
func (h *Handle) Info() (*HandleInfo, error) {
return newHandleInfoFromFD(h.fd)
}
func marshalBTF(types interface{}, strings []byte, bo binary.ByteOrder) []byte {
const minHeaderLength = 24
typesLen := uint32(binary.Size(types))
header := btfHeader{
Magic: btfMagic,
Version: 1,
HdrLen: minHeaderLength,
TypeOff: 0,
TypeLen: typesLen,
StringOff: typesLen,
StringLen: uint32(len(strings)),
}
buf := new(bytes.Buffer)
_ = binary.Write(buf, bo, &header)
_ = binary.Write(buf, bo, types)
buf.Write(strings)
return buf.Bytes()
}
var haveBTF = internal.FeatureTest("BTF", "5.1", func() error {
var (
types struct {
Integer btfType
Var btfType
btfVar struct{ Linkage uint32 }
}
strings = []byte{0, 'a', 0}
)
// We use a BTF_KIND_VAR here, to make sure that
// the kernel understands BTF at least as well as we
// do. BTF_KIND_VAR was introduced ~5.1.
types.Integer.SetKind(kindPointer)
types.Var.NameOff = 1
types.Var.SetKind(kindVar)
types.Var.SizeType = 1
btf := marshalBTF(&types, strings, internal.NativeEndian)
fd, err := sys.BtfLoad(&sys.BtfLoadAttr{
Btf: sys.NewSlicePointer(btf),
BtfSize: uint32(len(btf)),
})
if errors.Is(err, unix.EINVAL) || errors.Is(err, unix.EPERM) {
// Treat both EINVAL and EPERM as not supported: loading the program
// might still succeed without BTF.
return internal.ErrNotSupported
}
if err != nil {
return err
}
fd.Close()
return nil
})
var haveFuncLinkage = internal.FeatureTest("BTF func linkage", "5.6", func() error {
if err := haveBTF(); err != nil {
return err
}
var (
types struct {
FuncProto btfType
Func btfType
}
strings = []byte{0, 'a', 0}
)
types.FuncProto.SetKind(kindFuncProto)
types.Func.SetKind(kindFunc)
types.Func.SizeType = 1 // aka FuncProto
types.Func.NameOff = 1
types.Func.SetLinkage(GlobalFunc)
btf := marshalBTF(&types, strings, internal.NativeEndian)
fd, err := sys.BtfLoad(&sys.BtfLoadAttr{
Btf: sys.NewSlicePointer(btf),
BtfSize: uint32(len(btf)),
})
if errors.Is(err, unix.EINVAL) {
return internal.ErrNotSupported
}
if err != nil {
return err
}
fd.Close()
return nil
})

View file

@ -1,343 +0,0 @@
package btf
import (
"encoding/binary"
"fmt"
"io"
)
//go:generate stringer -linecomment -output=btf_types_string.go -type=FuncLinkage,VarLinkage
// btfKind describes a Type.
type btfKind uint8
// Equivalents of the BTF_KIND_* constants.
const (
kindUnknown btfKind = iota
kindInt
kindPointer
kindArray
kindStruct
kindUnion
kindEnum
kindForward
kindTypedef
kindVolatile
kindConst
kindRestrict
// Added ~4.20
kindFunc
kindFuncProto
// Added ~5.1
kindVar
kindDatasec
// Added ~5.13
kindFloat
)
// FuncLinkage describes BTF function linkage metadata.
type FuncLinkage int
// Equivalent of enum btf_func_linkage.
const (
StaticFunc FuncLinkage = iota // static
GlobalFunc // global
ExternFunc // extern
)
// VarLinkage describes BTF variable linkage metadata.
type VarLinkage int
const (
StaticVar VarLinkage = iota // static
GlobalVar // global
ExternVar // extern
)
const (
btfTypeKindShift = 24
btfTypeKindLen = 5
btfTypeVlenShift = 0
btfTypeVlenMask = 16
btfTypeKindFlagShift = 31
btfTypeKindFlagMask = 1
)
// btfType is equivalent to struct btf_type in Documentation/bpf/btf.rst.
type btfType struct {
NameOff uint32
/* "info" bits arrangement
* bits 0-15: vlen (e.g. # of struct's members), linkage
* bits 16-23: unused
* bits 24-28: kind (e.g. int, ptr, array...etc)
* bits 29-30: unused
* bit 31: kind_flag, currently used by
* struct, union and fwd
*/
Info uint32
/* "size" is used by INT, ENUM, STRUCT and UNION.
* "size" tells the size of the type it is describing.
*
* "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
* FUNC and FUNC_PROTO.
* "type" is a type_id referring to another type.
*/
SizeType uint32
}
func (k btfKind) String() string {
switch k {
case kindUnknown:
return "Unknown"
case kindInt:
return "Integer"
case kindPointer:
return "Pointer"
case kindArray:
return "Array"
case kindStruct:
return "Struct"
case kindUnion:
return "Union"
case kindEnum:
return "Enumeration"
case kindForward:
return "Forward"
case kindTypedef:
return "Typedef"
case kindVolatile:
return "Volatile"
case kindConst:
return "Const"
case kindRestrict:
return "Restrict"
case kindFunc:
return "Function"
case kindFuncProto:
return "Function Proto"
case kindVar:
return "Variable"
case kindDatasec:
return "Section"
case kindFloat:
return "Float"
default:
return fmt.Sprintf("Unknown (%d)", k)
}
}
func mask(len uint32) uint32 {
return (1 << len) - 1
}
func readBits(value, len, shift uint32) uint32 {
return (value >> shift) & mask(len)
}
func writeBits(value, len, shift, new uint32) uint32 {
value &^= mask(len) << shift
value |= (new & mask(len)) << shift
return value
}
func (bt *btfType) info(len, shift uint32) uint32 {
return readBits(bt.Info, len, shift)
}
func (bt *btfType) setInfo(value, len, shift uint32) {
bt.Info = writeBits(bt.Info, len, shift, value)
}
func (bt *btfType) Kind() btfKind {
return btfKind(bt.info(btfTypeKindLen, btfTypeKindShift))
}
func (bt *btfType) SetKind(kind btfKind) {
bt.setInfo(uint32(kind), btfTypeKindLen, btfTypeKindShift)
}
func (bt *btfType) Vlen() int {
return int(bt.info(btfTypeVlenMask, btfTypeVlenShift))
}
func (bt *btfType) SetVlen(vlen int) {
bt.setInfo(uint32(vlen), btfTypeVlenMask, btfTypeVlenShift)
}
func (bt *btfType) KindFlag() bool {
return bt.info(btfTypeKindFlagMask, btfTypeKindFlagShift) == 1
}
func (bt *btfType) Linkage() FuncLinkage {
return FuncLinkage(bt.info(btfTypeVlenMask, btfTypeVlenShift))
}
func (bt *btfType) SetLinkage(linkage FuncLinkage) {
bt.setInfo(uint32(linkage), btfTypeVlenMask, btfTypeVlenShift)
}
func (bt *btfType) Type() TypeID {
// TODO: Panic here if wrong kind?
return TypeID(bt.SizeType)
}
func (bt *btfType) Size() uint32 {
// TODO: Panic here if wrong kind?
return bt.SizeType
}
func (bt *btfType) SetSize(size uint32) {
bt.SizeType = size
}
type rawType struct {
btfType
data interface{}
}
func (rt *rawType) Marshal(w io.Writer, bo binary.ByteOrder) error {
if err := binary.Write(w, bo, &rt.btfType); err != nil {
return err
}
if rt.data == nil {
return nil
}
return binary.Write(w, bo, rt.data)
}
// btfInt encodes additional data for integers.
//
// ? ? ? ? e e e e o o o o o o o o ? ? ? ? ? ? ? ? b b b b b b b b
// ? = undefined
// e = encoding
// o = offset (bitfields?)
// b = bits (bitfields)
type btfInt struct {
Raw uint32
}
const (
btfIntEncodingLen = 4
btfIntEncodingShift = 24
btfIntOffsetLen = 8
btfIntOffsetShift = 16
btfIntBitsLen = 8
btfIntBitsShift = 0
)
func (bi btfInt) Encoding() IntEncoding {
return IntEncoding(readBits(bi.Raw, btfIntEncodingLen, btfIntEncodingShift))
}
func (bi *btfInt) SetEncoding(e IntEncoding) {
bi.Raw = writeBits(uint32(bi.Raw), btfIntEncodingLen, btfIntEncodingShift, uint32(e))
}
func (bi btfInt) Offset() Bits {
return Bits(readBits(bi.Raw, btfIntOffsetLen, btfIntOffsetShift))
}
func (bi *btfInt) SetOffset(offset uint32) {
bi.Raw = writeBits(bi.Raw, btfIntOffsetLen, btfIntOffsetShift, offset)
}
func (bi btfInt) Bits() Bits {
return Bits(readBits(bi.Raw, btfIntBitsLen, btfIntBitsShift))
}
func (bi *btfInt) SetBits(bits byte) {
bi.Raw = writeBits(bi.Raw, btfIntBitsLen, btfIntBitsShift, uint32(bits))
}
type btfArray struct {
Type TypeID
IndexType TypeID
Nelems uint32
}
type btfMember struct {
NameOff uint32
Type TypeID
Offset uint32
}
type btfVarSecinfo struct {
Type TypeID
Offset uint32
Size uint32
}
type btfVariable struct {
Linkage uint32
}
type btfEnum struct {
NameOff uint32
Val int32
}
type btfParam struct {
NameOff uint32
Type TypeID
}
func readTypes(r io.Reader, bo binary.ByteOrder, typeLen uint32) ([]rawType, error) {
var header btfType
// because of the interleaving between types and struct members it is difficult to
// precompute the numbers of raw types this will parse
// this "guess" is a good first estimation
sizeOfbtfType := uintptr(binary.Size(btfType{}))
tyMaxCount := uintptr(typeLen) / sizeOfbtfType / 2
types := make([]rawType, 0, tyMaxCount)
for id := TypeID(1); ; id++ {
if err := binary.Read(r, bo, &header); err == io.EOF {
return types, nil
} else if err != nil {
return nil, fmt.Errorf("can't read type info for id %v: %v", id, err)
}
var data interface{}
switch header.Kind() {
case kindInt:
data = new(btfInt)
case kindPointer:
case kindArray:
data = new(btfArray)
case kindStruct:
fallthrough
case kindUnion:
data = make([]btfMember, header.Vlen())
case kindEnum:
data = make([]btfEnum, header.Vlen())
case kindForward:
case kindTypedef:
case kindVolatile:
case kindConst:
case kindRestrict:
case kindFunc:
case kindFuncProto:
data = make([]btfParam, header.Vlen())
case kindVar:
data = new(btfVariable)
case kindDatasec:
data = make([]btfVarSecinfo, header.Vlen())
case kindFloat:
default:
return nil, fmt.Errorf("type id %v: unknown kind: %v", id, header.Kind())
}
if data == nil {
types = append(types, rawType{header, nil})
continue
}
if err := binary.Read(r, bo, data); err != nil {
return nil, fmt.Errorf("type id %d: kind %v: can't read %T: %v", id, header.Kind(), data, err)
}
types = append(types, rawType{header, data})
}
}

View file

@ -1,44 +0,0 @@
// Code generated by "stringer -linecomment -output=btf_types_string.go -type=FuncLinkage,VarLinkage"; DO NOT EDIT.
package btf
import "strconv"
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[StaticFunc-0]
_ = x[GlobalFunc-1]
_ = x[ExternFunc-2]
}
const _FuncLinkage_name = "staticglobalextern"
var _FuncLinkage_index = [...]uint8{0, 6, 12, 18}
func (i FuncLinkage) String() string {
if i < 0 || i >= FuncLinkage(len(_FuncLinkage_index)-1) {
return "FuncLinkage(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _FuncLinkage_name[_FuncLinkage_index[i]:_FuncLinkage_index[i+1]]
}
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[StaticVar-0]
_ = x[GlobalVar-1]
_ = x[ExternVar-2]
}
const _VarLinkage_name = "staticglobalextern"
var _VarLinkage_index = [...]uint8{0, 6, 12, 18}
func (i VarLinkage) String() string {
if i < 0 || i >= VarLinkage(len(_VarLinkage_index)-1) {
return "VarLinkage(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _VarLinkage_name[_VarLinkage_index[i]:_VarLinkage_index[i+1]]
}

View file

@ -1,972 +0,0 @@
package btf
import (
"encoding/binary"
"errors"
"fmt"
"math"
"reflect"
"strconv"
"strings"
"github.com/cilium/ebpf/asm"
)
// Code in this file is derived from libbpf, which is available under a BSD
// 2-Clause license.
// COREFixup is the result of computing a CO-RE relocation for a target.
type COREFixup struct {
kind coreKind
local uint32
target uint32
// True if there is no valid fixup. The instruction is replaced with an
// invalid dummy.
poison bool
// True if the validation of the local value should be skipped. Used by
// some kinds of bitfield relocations.
skipLocalValidation bool
}
func (f *COREFixup) equal(other COREFixup) bool {
return f.local == other.local && f.target == other.target
}
func (f *COREFixup) String() string {
if f.poison {
return fmt.Sprintf("%s=poison", f.kind)
}
return fmt.Sprintf("%s=%d->%d", f.kind, f.local, f.target)
}
func (f *COREFixup) Apply(ins *asm.Instruction) error {
if f.poison {
const badRelo = 0xbad2310
*ins = asm.BuiltinFunc(badRelo).Call()
return nil
}
switch class := ins.OpCode.Class(); class {
case asm.LdXClass, asm.StClass, asm.StXClass:
if want := int16(f.local); !f.skipLocalValidation && want != ins.Offset {
return fmt.Errorf("invalid offset %d, expected %d", ins.Offset, f.local)
}
if f.target > math.MaxInt16 {
return fmt.Errorf("offset %d exceeds MaxInt16", f.target)
}
ins.Offset = int16(f.target)
case asm.LdClass:
if !ins.IsConstantLoad(asm.DWord) {
return fmt.Errorf("not a dword-sized immediate load")
}
if want := int64(f.local); !f.skipLocalValidation && want != ins.Constant {
return fmt.Errorf("invalid immediate %d, expected %d (fixup: %v)", ins.Constant, want, f)
}
ins.Constant = int64(f.target)
case asm.ALUClass:
if ins.OpCode.ALUOp() == asm.Swap {
return fmt.Errorf("relocation against swap")
}
fallthrough
case asm.ALU64Class:
if src := ins.OpCode.Source(); src != asm.ImmSource {
return fmt.Errorf("invalid source %s", src)
}
if want := int64(f.local); !f.skipLocalValidation && want != ins.Constant {
return fmt.Errorf("invalid immediate %d, expected %d (fixup: %v, kind: %v, ins: %v)", ins.Constant, want, f, f.kind, ins)
}
if f.target > math.MaxInt32 {
return fmt.Errorf("immediate %d exceeds MaxInt32", f.target)
}
ins.Constant = int64(f.target)
default:
return fmt.Errorf("invalid class %s", class)
}
return nil
}
func (f COREFixup) isNonExistant() bool {
return f.kind.checksForExistence() && f.target == 0
}
// coreKind is the type of CO-RE relocation as specified in BPF source code.
type coreKind uint32
const (
reloFieldByteOffset coreKind = iota /* field byte offset */
reloFieldByteSize /* field size in bytes */
reloFieldExists /* field existence in target kernel */
reloFieldSigned /* field signedness (0 - unsigned, 1 - signed) */
reloFieldLShiftU64 /* bitfield-specific left bitshift */
reloFieldRShiftU64 /* bitfield-specific right bitshift */
reloTypeIDLocal /* type ID in local BPF object */
reloTypeIDTarget /* type ID in target kernel */
reloTypeExists /* type existence in target kernel */
reloTypeSize /* type size in bytes */
reloEnumvalExists /* enum value existence in target kernel */
reloEnumvalValue /* enum value integer value */
)
func (k coreKind) checksForExistence() bool {
return k == reloEnumvalExists || k == reloTypeExists || k == reloFieldExists
}
func (k coreKind) String() string {
switch k {
case reloFieldByteOffset:
return "byte_off"
case reloFieldByteSize:
return "byte_sz"
case reloFieldExists:
return "field_exists"
case reloFieldSigned:
return "signed"
case reloFieldLShiftU64:
return "lshift_u64"
case reloFieldRShiftU64:
return "rshift_u64"
case reloTypeIDLocal:
return "local_type_id"
case reloTypeIDTarget:
return "target_type_id"
case reloTypeExists:
return "type_exists"
case reloTypeSize:
return "type_size"
case reloEnumvalExists:
return "enumval_exists"
case reloEnumvalValue:
return "enumval_value"
default:
return "unknown"
}
}
// CORERelocate calculates the difference in types between local and target.
//
// Returns a list of fixups which can be applied to instructions to make them
// match the target type(s).
//
// Fixups are returned in the order of relos, e.g. fixup[i] is the solution
// for relos[i].
func CORERelocate(local, target *Spec, relos []*CORERelocation) ([]COREFixup, error) {
if local.byteOrder != target.byteOrder {
return nil, fmt.Errorf("can't relocate %s against %s", local.byteOrder, target.byteOrder)
}
type reloGroup struct {
relos []*CORERelocation
// Position of each relocation in relos.
indices []int
}
// Split relocations into per Type lists.
relosByType := make(map[Type]*reloGroup)
result := make([]COREFixup, len(relos))
for i, relo := range relos {
if relo.kind == reloTypeIDLocal {
// Filtering out reloTypeIDLocal here makes our lives a lot easier
// down the line, since it doesn't have a target at all.
if len(relo.accessor) > 1 || relo.accessor[0] != 0 {
return nil, fmt.Errorf("%s: unexpected accessor %v", relo.kind, relo.accessor)
}
id, err := local.TypeID(relo.typ)
if err != nil {
return nil, fmt.Errorf("%s: %w", relo.kind, err)
}
result[i] = COREFixup{
kind: relo.kind,
local: uint32(id),
target: uint32(id),
}
continue
}
group, ok := relosByType[relo.typ]
if !ok {
group = &reloGroup{}
relosByType[relo.typ] = group
}
group.relos = append(group.relos, relo)
group.indices = append(group.indices, i)
}
for localType, group := range relosByType {
localTypeName := localType.TypeName()
if localTypeName == "" {
return nil, fmt.Errorf("relocate unnamed or anonymous type %s: %w", localType, ErrNotSupported)
}
targets := target.namedTypes[newEssentialName(localTypeName)]
fixups, err := coreCalculateFixups(local, target, localType, targets, group.relos)
if err != nil {
return nil, fmt.Errorf("relocate %s: %w", localType, err)
}
for j, index := range group.indices {
result[index] = fixups[j]
}
}
return result, nil
}
var errAmbiguousRelocation = errors.New("ambiguous relocation")
var errImpossibleRelocation = errors.New("impossible relocation")
// coreCalculateFixups calculates the fixups for the given relocations using
// the "best" target.
//
// The best target is determined by scoring: the less poisoning we have to do
// the better the target is.
func coreCalculateFixups(localSpec, targetSpec *Spec, local Type, targets []Type, relos []*CORERelocation) ([]COREFixup, error) {
localID, err := localSpec.TypeID(local)
if err != nil {
return nil, fmt.Errorf("local type ID: %w", err)
}
local = Copy(local, UnderlyingType)
bestScore := len(relos)
var bestFixups []COREFixup
for i := range targets {
targetID, err := targetSpec.TypeID(targets[i])
if err != nil {
return nil, fmt.Errorf("target type ID: %w", err)
}
target := Copy(targets[i], UnderlyingType)
score := 0 // lower is better
fixups := make([]COREFixup, 0, len(relos))
for _, relo := range relos {
fixup, err := coreCalculateFixup(localSpec.byteOrder, local, localID, target, targetID, relo)
if err != nil {
return nil, fmt.Errorf("target %s: %w", target, err)
}
if fixup.poison || fixup.isNonExistant() {
score++
}
fixups = append(fixups, fixup)
}
if score > bestScore {
// We have a better target already, ignore this one.
continue
}
if score < bestScore {
// This is the best target yet, use it.
bestScore = score
bestFixups = fixups
continue
}
// Some other target has the same score as the current one. Make sure
// the fixups agree with each other.
for i, fixup := range bestFixups {
if !fixup.equal(fixups[i]) {
return nil, fmt.Errorf("%s: multiple types match: %w", fixup.kind, errAmbiguousRelocation)
}
}
}
if bestFixups == nil {
// Nothing at all matched, probably because there are no suitable
// targets at all.
//
// Poison everything except checksForExistence.
bestFixups = make([]COREFixup, len(relos))
for i, relo := range relos {
if relo.kind.checksForExistence() {
bestFixups[i] = COREFixup{kind: relo.kind, local: 1, target: 0}
} else {
bestFixups[i] = COREFixup{kind: relo.kind, poison: true}
}
}
}
return bestFixups, nil
}
// coreCalculateFixup calculates the fixup for a single local type, target type
// and relocation.
func coreCalculateFixup(byteOrder binary.ByteOrder, local Type, localID TypeID, target Type, targetID TypeID, relo *CORERelocation) (COREFixup, error) {
fixup := func(local, target uint32) (COREFixup, error) {
return COREFixup{kind: relo.kind, local: local, target: target}, nil
}
fixupWithoutValidation := func(local, target uint32) (COREFixup, error) {
return COREFixup{kind: relo.kind, local: local, target: target, skipLocalValidation: true}, nil
}
poison := func() (COREFixup, error) {
if relo.kind.checksForExistence() {
return fixup(1, 0)
}
return COREFixup{kind: relo.kind, poison: true}, nil
}
zero := COREFixup{}
switch relo.kind {
case reloTypeIDTarget, reloTypeSize, reloTypeExists:
if len(relo.accessor) > 1 || relo.accessor[0] != 0 {
return zero, fmt.Errorf("%s: unexpected accessor %v", relo.kind, relo.accessor)
}
err := coreAreTypesCompatible(local, target)
if errors.Is(err, errImpossibleRelocation) {
return poison()
}
if err != nil {
return zero, fmt.Errorf("relocation %s: %w", relo.kind, err)
}
switch relo.kind {
case reloTypeExists:
return fixup(1, 1)
case reloTypeIDTarget:
return fixup(uint32(localID), uint32(targetID))
case reloTypeSize:
localSize, err := Sizeof(local)
if err != nil {
return zero, err
}
targetSize, err := Sizeof(target)
if err != nil {
return zero, err
}
return fixup(uint32(localSize), uint32(targetSize))
}
case reloEnumvalValue, reloEnumvalExists:
localValue, targetValue, err := coreFindEnumValue(local, relo.accessor, target)
if errors.Is(err, errImpossibleRelocation) {
return poison()
}
if err != nil {
return zero, fmt.Errorf("relocation %s: %w", relo.kind, err)
}
switch relo.kind {
case reloEnumvalExists:
return fixup(1, 1)
case reloEnumvalValue:
return fixup(uint32(localValue.Value), uint32(targetValue.Value))
}
case reloFieldSigned:
switch local.(type) {
case *Enum:
return fixup(1, 1)
case *Int:
return fixup(
uint32(local.(*Int).Encoding&Signed),
uint32(target.(*Int).Encoding&Signed),
)
default:
return fixupWithoutValidation(0, 0)
}
case reloFieldByteOffset, reloFieldByteSize, reloFieldExists, reloFieldLShiftU64, reloFieldRShiftU64:
if _, ok := target.(*Fwd); ok {
// We can't relocate fields using a forward declaration, so
// skip it. If a non-forward declaration is present in the BTF
// we'll find it in one of the other iterations.
return poison()
}
localField, targetField, err := coreFindField(local, relo.accessor, target)
if errors.Is(err, errImpossibleRelocation) {
return poison()
}
if err != nil {
return zero, fmt.Errorf("target %s: %w", target, err)
}
maybeSkipValidation := func(f COREFixup, err error) (COREFixup, error) {
f.skipLocalValidation = localField.bitfieldSize > 0
return f, err
}
switch relo.kind {
case reloFieldExists:
return fixup(1, 1)
case reloFieldByteOffset:
return maybeSkipValidation(fixup(localField.offset, targetField.offset))
case reloFieldByteSize:
localSize, err := Sizeof(localField.Type)
if err != nil {
return zero, err
}
targetSize, err := Sizeof(targetField.Type)
if err != nil {
return zero, err
}
return maybeSkipValidation(fixup(uint32(localSize), uint32(targetSize)))
case reloFieldLShiftU64:
var target uint32
if byteOrder == binary.LittleEndian {
targetSize, err := targetField.sizeBits()
if err != nil {
return zero, err
}
target = uint32(64 - targetField.bitfieldOffset - targetSize)
} else {
loadWidth, err := Sizeof(targetField.Type)
if err != nil {
return zero, err
}
target = uint32(64 - Bits(loadWidth*8) + targetField.bitfieldOffset)
}
return fixupWithoutValidation(0, target)
case reloFieldRShiftU64:
targetSize, err := targetField.sizeBits()
if err != nil {
return zero, err
}
return fixupWithoutValidation(0, uint32(64-targetSize))
}
}
return zero, fmt.Errorf("relocation %s: %w", relo.kind, ErrNotSupported)
}
/* coreAccessor contains a path through a struct. It contains at least one index.
*
* The interpretation depends on the kind of the relocation. The following is
* taken from struct bpf_core_relo in libbpf_internal.h:
*
* - for field-based relocations, string encodes an accessed field using
* a sequence of field and array indices, separated by colon (:). It's
* conceptually very close to LLVM's getelementptr ([0]) instruction's
* arguments for identifying offset to a field.
* - for type-based relocations, strings is expected to be just "0";
* - for enum value-based relocations, string contains an index of enum
* value within its enum type;
*
* Example to provide a better feel.
*
* struct sample {
* int a;
* struct {
* int b[10];
* };
* };
*
* struct sample s = ...;
* int x = &s->a; // encoded as "0:0" (a is field #0)
* int y = &s->b[5]; // encoded as "0:1:0:5" (anon struct is field #1,
* // b is field #0 inside anon struct, accessing elem #5)
* int z = &s[10]->b; // encoded as "10:1" (ptr is used as an array)
*/
type coreAccessor []int
func parseCOREAccessor(accessor string) (coreAccessor, error) {
if accessor == "" {
return nil, fmt.Errorf("empty accessor")
}
parts := strings.Split(accessor, ":")
result := make(coreAccessor, 0, len(parts))
for _, part := range parts {
// 31 bits to avoid overflowing int on 32 bit platforms.
index, err := strconv.ParseUint(part, 10, 31)
if err != nil {
return nil, fmt.Errorf("accessor index %q: %s", part, err)
}
result = append(result, int(index))
}
return result, nil
}
func (ca coreAccessor) String() string {
strs := make([]string, 0, len(ca))
for _, i := range ca {
strs = append(strs, strconv.Itoa(i))
}
return strings.Join(strs, ":")
}
func (ca coreAccessor) enumValue(t Type) (*EnumValue, error) {
e, ok := t.(*Enum)
if !ok {
return nil, fmt.Errorf("not an enum: %s", t)
}
if len(ca) > 1 {
return nil, fmt.Errorf("invalid accessor %s for enum", ca)
}
i := ca[0]
if i >= len(e.Values) {
return nil, fmt.Errorf("invalid index %d for %s", i, e)
}
return &e.Values[i], nil
}
// coreField represents the position of a "child" of a composite type from the
// start of that type.
//
// /- start of composite
// | offset * 8 | bitfieldOffset | bitfieldSize | ... |
// \- start of field end of field -/
type coreField struct {
Type Type
// The position of the field from the start of the composite type in bytes.
offset uint32
// The offset of the bitfield in bits from the start of the field.
bitfieldOffset Bits
// The size of the bitfield in bits.
//
// Zero if the field is not a bitfield.
bitfieldSize Bits
}
func (cf *coreField) adjustOffsetToNthElement(n int) error {
size, err := Sizeof(cf.Type)
if err != nil {
return err
}
cf.offset += uint32(n) * uint32(size)
return nil
}
func (cf *coreField) adjustOffsetBits(offset Bits) error {
align, err := alignof(cf.Type)
if err != nil {
return err
}
// We can compute the load offset by:
// 1) converting the bit offset to bytes with a flooring division.
// 2) dividing and multiplying that offset by the alignment, yielding the
// load size aligned offset.
offsetBytes := uint32(offset/8) / uint32(align) * uint32(align)
// The number of bits remaining is the bit offset less the number of bits
// we can "skip" with the aligned offset.
cf.bitfieldOffset = offset - Bits(offsetBytes*8)
// We know that cf.offset is aligned at to at least align since we get it
// from the compiler via BTF. Adding an aligned offsetBytes preserves the
// alignment.
cf.offset += offsetBytes
return nil
}
func (cf *coreField) sizeBits() (Bits, error) {
if cf.bitfieldSize > 0 {
return cf.bitfieldSize, nil
}
// Someone is trying to access a non-bitfield via a bit shift relocation.
// This happens when a field changes from a bitfield to a regular field
// between kernel versions. Synthesise the size to make the shifts work.
size, err := Sizeof(cf.Type)
if err != nil {
return 0, nil
}
return Bits(size * 8), nil
}
// coreFindField descends into the local type using the accessor and tries to
// find an equivalent field in target at each step.
//
// Returns the field and the offset of the field from the start of
// target in bits.
func coreFindField(localT Type, localAcc coreAccessor, targetT Type) (coreField, coreField, error) {
local := coreField{Type: localT}
target := coreField{Type: targetT}
// The first index is used to offset a pointer of the base type like
// when accessing an array.
if err := local.adjustOffsetToNthElement(localAcc[0]); err != nil {
return coreField{}, coreField{}, err
}
if err := target.adjustOffsetToNthElement(localAcc[0]); err != nil {
return coreField{}, coreField{}, err
}
if err := coreAreMembersCompatible(local.Type, target.Type); err != nil {
return coreField{}, coreField{}, fmt.Errorf("fields: %w", err)
}
var localMaybeFlex, targetMaybeFlex bool
for i, acc := range localAcc[1:] {
switch localType := local.Type.(type) {
case composite:
// For composite types acc is used to find the field in the local type,
// and then we try to find a field in target with the same name.
localMembers := localType.members()
if acc >= len(localMembers) {
return coreField{}, coreField{}, fmt.Errorf("invalid accessor %d for %s", acc, localType)
}
localMember := localMembers[acc]
if localMember.Name == "" {
_, ok := localMember.Type.(composite)
if !ok {
return coreField{}, coreField{}, fmt.Errorf("unnamed field with type %s: %s", localMember.Type, ErrNotSupported)
}
// This is an anonymous struct or union, ignore it.
local = coreField{
Type: localMember.Type,
offset: local.offset + localMember.Offset.Bytes(),
}
localMaybeFlex = false
continue
}
targetType, ok := target.Type.(composite)
if !ok {
return coreField{}, coreField{}, fmt.Errorf("target not composite: %w", errImpossibleRelocation)
}
targetMember, last, err := coreFindMember(targetType, localMember.Name)
if err != nil {
return coreField{}, coreField{}, err
}
local = coreField{
Type: localMember.Type,
offset: local.offset,
bitfieldSize: localMember.BitfieldSize,
}
localMaybeFlex = acc == len(localMembers)-1
target = coreField{
Type: targetMember.Type,
offset: target.offset,
bitfieldSize: targetMember.BitfieldSize,
}
targetMaybeFlex = last
if local.bitfieldSize == 0 && target.bitfieldSize == 0 {
local.offset += localMember.Offset.Bytes()
target.offset += targetMember.Offset.Bytes()
break
}
// Either of the members is a bitfield. Make sure we're at the
// end of the accessor.
if next := i + 1; next < len(localAcc[1:]) {
return coreField{}, coreField{}, fmt.Errorf("can't descend into bitfield")
}
if err := local.adjustOffsetBits(localMember.Offset); err != nil {
return coreField{}, coreField{}, err
}
if err := target.adjustOffsetBits(targetMember.Offset); err != nil {
return coreField{}, coreField{}, err
}
case *Array:
// For arrays, acc is the index in the target.
targetType, ok := target.Type.(*Array)
if !ok {
return coreField{}, coreField{}, fmt.Errorf("target not array: %w", errImpossibleRelocation)
}
if localType.Nelems == 0 && !localMaybeFlex {
return coreField{}, coreField{}, fmt.Errorf("local type has invalid flexible array")
}
if targetType.Nelems == 0 && !targetMaybeFlex {
return coreField{}, coreField{}, fmt.Errorf("target type has invalid flexible array")
}
if localType.Nelems > 0 && acc >= int(localType.Nelems) {
return coreField{}, coreField{}, fmt.Errorf("invalid access of %s at index %d", localType, acc)
}
if targetType.Nelems > 0 && acc >= int(targetType.Nelems) {
return coreField{}, coreField{}, fmt.Errorf("out of bounds access of target: %w", errImpossibleRelocation)
}
local = coreField{
Type: localType.Type,
offset: local.offset,
}
localMaybeFlex = false
if err := local.adjustOffsetToNthElement(acc); err != nil {
return coreField{}, coreField{}, err
}
target = coreField{
Type: targetType.Type,
offset: target.offset,
}
targetMaybeFlex = false
if err := target.adjustOffsetToNthElement(acc); err != nil {
return coreField{}, coreField{}, err
}
default:
return coreField{}, coreField{}, fmt.Errorf("relocate field of %T: %w", localType, ErrNotSupported)
}
if err := coreAreMembersCompatible(local.Type, target.Type); err != nil {
return coreField{}, coreField{}, err
}
}
return local, target, nil
}
// coreFindMember finds a member in a composite type while handling anonymous
// structs and unions.
func coreFindMember(typ composite, name string) (Member, bool, error) {
if name == "" {
return Member{}, false, errors.New("can't search for anonymous member")
}
type offsetTarget struct {
composite
offset Bits
}
targets := []offsetTarget{{typ, 0}}
visited := make(map[composite]bool)
for i := 0; i < len(targets); i++ {
target := targets[i]
// Only visit targets once to prevent infinite recursion.
if visited[target] {
continue
}
if len(visited) >= maxTypeDepth {
// This check is different than libbpf, which restricts the entire
// path to BPF_CORE_SPEC_MAX_LEN items.
return Member{}, false, fmt.Errorf("type is nested too deep")
}
visited[target] = true
members := target.members()
for j, member := range members {
if member.Name == name {
// NB: This is safe because member is a copy.
member.Offset += target.offset
return member, j == len(members)-1, nil
}
// The names don't match, but this member could be an anonymous struct
// or union.
if member.Name != "" {
continue
}
comp, ok := member.Type.(composite)
if !ok {
return Member{}, false, fmt.Errorf("anonymous non-composite type %T not allowed", member.Type)
}
targets = append(targets, offsetTarget{comp, target.offset + member.Offset})
}
}
return Member{}, false, fmt.Errorf("no matching member: %w", errImpossibleRelocation)
}
// coreFindEnumValue follows localAcc to find the equivalent enum value in target.
func coreFindEnumValue(local Type, localAcc coreAccessor, target Type) (localValue, targetValue *EnumValue, _ error) {
localValue, err := localAcc.enumValue(local)
if err != nil {
return nil, nil, err
}
targetEnum, ok := target.(*Enum)
if !ok {
return nil, nil, errImpossibleRelocation
}
localName := newEssentialName(localValue.Name)
for i, targetValue := range targetEnum.Values {
if newEssentialName(targetValue.Name) != localName {
continue
}
return localValue, &targetEnum.Values[i], nil
}
return nil, nil, errImpossibleRelocation
}
/* The comment below is from bpf_core_types_are_compat in libbpf.c:
*
* Check local and target types for compatibility. This check is used for
* type-based CO-RE relocations and follow slightly different rules than
* field-based relocations. This function assumes that root types were already
* checked for name match. Beyond that initial root-level name check, names
* are completely ignored. Compatibility rules are as follows:
* - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but
* kind should match for local and target types (i.e., STRUCT is not
* compatible with UNION);
* - for ENUMs, the size is ignored;
* - for INT, size and signedness are ignored;
* - for ARRAY, dimensionality is ignored, element types are checked for
* compatibility recursively;
* - CONST/VOLATILE/RESTRICT modifiers are ignored;
* - TYPEDEFs/PTRs are compatible if types they pointing to are compatible;
* - FUNC_PROTOs are compatible if they have compatible signature: same
* number of input args and compatible return and argument types.
* These rules are not set in stone and probably will be adjusted as we get
* more experience with using BPF CO-RE relocations.
*
* Returns errImpossibleRelocation if types are not compatible.
*/
func coreAreTypesCompatible(localType Type, targetType Type) error {
var (
localTs, targetTs typeDeque
l, t = &localType, &targetType
depth = 0
)
for ; l != nil && t != nil; l, t = localTs.shift(), targetTs.shift() {
if depth >= maxTypeDepth {
return errors.New("types are nested too deep")
}
localType = *l
targetType = *t
if reflect.TypeOf(localType) != reflect.TypeOf(targetType) {
return fmt.Errorf("type mismatch: %w", errImpossibleRelocation)
}
switch lv := (localType).(type) {
case *Void, *Struct, *Union, *Enum, *Fwd, *Int:
// Nothing to do here
case *Pointer, *Array:
depth++
localType.walk(&localTs)
targetType.walk(&targetTs)
case *FuncProto:
tv := targetType.(*FuncProto)
if len(lv.Params) != len(tv.Params) {
return fmt.Errorf("function param mismatch: %w", errImpossibleRelocation)
}
depth++
localType.walk(&localTs)
targetType.walk(&targetTs)
default:
return fmt.Errorf("unsupported type %T", localType)
}
}
if l != nil {
return fmt.Errorf("dangling local type %T", *l)
}
if t != nil {
return fmt.Errorf("dangling target type %T", *t)
}
return nil
}
/* coreAreMembersCompatible checks two types for field-based relocation compatibility.
*
* The comment below is from bpf_core_fields_are_compat in libbpf.c:
*
* Check two types for compatibility for the purpose of field access
* relocation. const/volatile/restrict and typedefs are skipped to ensure we
* are relocating semantically compatible entities:
* - any two STRUCTs/UNIONs are compatible and can be mixed;
* - any two FWDs are compatible, if their names match (modulo flavor suffix);
* - any two PTRs are always compatible;
* - for ENUMs, names should be the same (ignoring flavor suffix) or at
* least one of enums should be anonymous;
* - for ENUMs, check sizes, names are ignored;
* - for INT, size and signedness are ignored;
* - any two FLOATs are always compatible;
* - for ARRAY, dimensionality is ignored, element types are checked for
* compatibility recursively;
* [ NB: coreAreMembersCompatible doesn't recurse, this check is done
* by coreFindField. ]
* - everything else shouldn't be ever a target of relocation.
* These rules are not set in stone and probably will be adjusted as we get
* more experience with using BPF CO-RE relocations.
*
* Returns errImpossibleRelocation if the members are not compatible.
*/
func coreAreMembersCompatible(localType Type, targetType Type) error {
doNamesMatch := func(a, b string) error {
if a == "" || b == "" {
// allow anonymous and named type to match
return nil
}
if newEssentialName(a) == newEssentialName(b) {
return nil
}
return fmt.Errorf("names don't match: %w", errImpossibleRelocation)
}
_, lok := localType.(composite)
_, tok := targetType.(composite)
if lok && tok {
return nil
}
if reflect.TypeOf(localType) != reflect.TypeOf(targetType) {
return fmt.Errorf("type mismatch: %w", errImpossibleRelocation)
}
switch lv := localType.(type) {
case *Array, *Pointer, *Float, *Int:
return nil
case *Enum:
tv := targetType.(*Enum)
return doNamesMatch(lv.Name, tv.Name)
case *Fwd:
tv := targetType.(*Fwd)
return doNamesMatch(lv.Name, tv.Name)
default:
return fmt.Errorf("type %s: %w", localType, ErrNotSupported)
}
}

View file

@ -1,5 +0,0 @@
// Package btf handles data encoded according to the BPF Type Format.
//
// The canonical documentation lives in the Linux kernel repository and is
// available at https://www.kernel.org/doc/html/latest/bpf/btf.html
package btf

View file

@ -1,721 +0,0 @@
package btf
import (
"bytes"
"encoding/binary"
"errors"
"fmt"
"io"
"math"
"sort"
"github.com/cilium/ebpf/asm"
"github.com/cilium/ebpf/internal"
)
// ExtInfos contains ELF section metadata.
type ExtInfos struct {
// The slices are sorted by offset in ascending order.
funcInfos map[string][]funcInfo
lineInfos map[string][]lineInfo
relocationInfos map[string][]coreRelocationInfo
}
// loadExtInfosFromELF parses ext infos from the .BTF.ext section in an ELF.
//
// Returns an error wrapping ErrNotFound if no ext infos are present.
func loadExtInfosFromELF(file *internal.SafeELFFile, ts types, strings *stringTable) (*ExtInfos, error) {
section := file.Section(".BTF.ext")
if section == nil {
return nil, fmt.Errorf("btf ext infos: %w", ErrNotFound)
}
if section.ReaderAt == nil {
return nil, fmt.Errorf("compressed ext_info is not supported")
}
return loadExtInfos(section.ReaderAt, file.ByteOrder, ts, strings)
}
// loadExtInfos parses bare ext infos.
func loadExtInfos(r io.ReaderAt, bo binary.ByteOrder, ts types, strings *stringTable) (*ExtInfos, error) {
// Open unbuffered section reader. binary.Read() calls io.ReadFull on
// the header structs, resulting in one syscall per header.
headerRd := io.NewSectionReader(r, 0, math.MaxInt64)
extHeader, err := parseBTFExtHeader(headerRd, bo)
if err != nil {
return nil, fmt.Errorf("parsing BTF extension header: %w", err)
}
coreHeader, err := parseBTFExtCOREHeader(headerRd, bo, extHeader)
if err != nil {
return nil, fmt.Errorf("parsing BTF CO-RE header: %w", err)
}
buf := internal.NewBufferedSectionReader(r, extHeader.funcInfoStart(), int64(extHeader.FuncInfoLen))
btfFuncInfos, err := parseFuncInfos(buf, bo, strings)
if err != nil {
return nil, fmt.Errorf("parsing BTF function info: %w", err)
}
funcInfos := make(map[string][]funcInfo, len(btfFuncInfos))
for section, bfis := range btfFuncInfos {
funcInfos[section], err = newFuncInfos(bfis, ts)
if err != nil {
return nil, fmt.Errorf("section %s: func infos: %w", section, err)
}
}
buf = internal.NewBufferedSectionReader(r, extHeader.lineInfoStart(), int64(extHeader.LineInfoLen))
btfLineInfos, err := parseLineInfos(buf, bo, strings)
if err != nil {
return nil, fmt.Errorf("parsing BTF line info: %w", err)
}
lineInfos := make(map[string][]lineInfo, len(btfLineInfos))
for section, blis := range btfLineInfos {
lineInfos[section], err = newLineInfos(blis, strings)
if err != nil {
return nil, fmt.Errorf("section %s: line infos: %w", section, err)
}
}
if coreHeader == nil || coreHeader.COREReloLen == 0 {
return &ExtInfos{funcInfos, lineInfos, nil}, nil
}
var btfCORERelos map[string][]bpfCORERelo
buf = internal.NewBufferedSectionReader(r, extHeader.coreReloStart(coreHeader), int64(coreHeader.COREReloLen))
btfCORERelos, err = parseCORERelos(buf, bo, strings)
if err != nil {
return nil, fmt.Errorf("parsing CO-RE relocation info: %w", err)
}
coreRelos := make(map[string][]coreRelocationInfo, len(btfCORERelos))
for section, brs := range btfCORERelos {
coreRelos[section], err = newRelocationInfos(brs, ts, strings)
if err != nil {
return nil, fmt.Errorf("section %s: CO-RE relocations: %w", section, err)
}
}
return &ExtInfos{funcInfos, lineInfos, coreRelos}, nil
}
type funcInfoMeta struct{}
type coreRelocationMeta struct{}
// Assign per-section metadata from BTF to a section's instructions.
func (ei *ExtInfos) Assign(insns asm.Instructions, section string) {
funcInfos := ei.funcInfos[section]
lineInfos := ei.lineInfos[section]
reloInfos := ei.relocationInfos[section]
iter := insns.Iterate()
for iter.Next() {
if len(funcInfos) > 0 && funcInfos[0].offset == iter.Offset {
iter.Ins.Metadata.Set(funcInfoMeta{}, funcInfos[0].fn)
funcInfos = funcInfos[1:]
}
if len(lineInfos) > 0 && lineInfos[0].offset == iter.Offset {
*iter.Ins = iter.Ins.WithSource(lineInfos[0].line)
lineInfos = lineInfos[1:]
}
if len(reloInfos) > 0 && reloInfos[0].offset == iter.Offset {
iter.Ins.Metadata.Set(coreRelocationMeta{}, reloInfos[0].relo)
reloInfos = reloInfos[1:]
}
}
}
// MarshalExtInfos encodes function and line info embedded in insns into kernel
// wire format.
func MarshalExtInfos(insns asm.Instructions, typeID func(Type) (TypeID, error)) (funcInfos, lineInfos []byte, _ error) {
iter := insns.Iterate()
var fiBuf, liBuf bytes.Buffer
for iter.Next() {
if fn := FuncMetadata(iter.Ins); fn != nil {
fi := &funcInfo{
fn: fn,
offset: iter.Offset,
}
if err := fi.marshal(&fiBuf, typeID); err != nil {
return nil, nil, fmt.Errorf("write func info: %w", err)
}
}
if line, ok := iter.Ins.Source().(*Line); ok {
li := &lineInfo{
line: line,
offset: iter.Offset,
}
if err := li.marshal(&liBuf); err != nil {
return nil, nil, fmt.Errorf("write line info: %w", err)
}
}
}
return fiBuf.Bytes(), liBuf.Bytes(), nil
}
// btfExtHeader is found at the start of the .BTF.ext section.
type btfExtHeader struct {
Magic uint16
Version uint8
Flags uint8
// HdrLen is larger than the size of struct btfExtHeader when it is
// immediately followed by a btfExtCOREHeader.
HdrLen uint32
FuncInfoOff uint32
FuncInfoLen uint32
LineInfoOff uint32
LineInfoLen uint32
}
// parseBTFExtHeader parses the header of the .BTF.ext section.
func parseBTFExtHeader(r io.Reader, bo binary.ByteOrder) (*btfExtHeader, error) {
var header btfExtHeader
if err := binary.Read(r, bo, &header); err != nil {
return nil, fmt.Errorf("can't read header: %v", err)
}
if header.Magic != btfMagic {
return nil, fmt.Errorf("incorrect magic value %v", header.Magic)
}
if header.Version != 1 {
return nil, fmt.Errorf("unexpected version %v", header.Version)
}
if header.Flags != 0 {
return nil, fmt.Errorf("unsupported flags %v", header.Flags)
}
if int64(header.HdrLen) < int64(binary.Size(&header)) {
return nil, fmt.Errorf("header length shorter than btfExtHeader size")
}
return &header, nil
}
// funcInfoStart returns the offset from the beginning of the .BTF.ext section
// to the start of its func_info entries.
func (h *btfExtHeader) funcInfoStart() int64 {
return int64(h.HdrLen + h.FuncInfoOff)
}
// lineInfoStart returns the offset from the beginning of the .BTF.ext section
// to the start of its line_info entries.
func (h *btfExtHeader) lineInfoStart() int64 {
return int64(h.HdrLen + h.LineInfoOff)
}
// coreReloStart returns the offset from the beginning of the .BTF.ext section
// to the start of its CO-RE relocation entries.
func (h *btfExtHeader) coreReloStart(ch *btfExtCOREHeader) int64 {
return int64(h.HdrLen + ch.COREReloOff)
}
// btfExtCOREHeader is found right after the btfExtHeader when its HdrLen
// field is larger than its size.
type btfExtCOREHeader struct {
COREReloOff uint32
COREReloLen uint32
}
// parseBTFExtCOREHeader parses the tail of the .BTF.ext header. If additional
// header bytes are present, extHeader.HdrLen will be larger than the struct,
// indicating the presence of a CO-RE extension header.
func parseBTFExtCOREHeader(r io.Reader, bo binary.ByteOrder, extHeader *btfExtHeader) (*btfExtCOREHeader, error) {
extHdrSize := int64(binary.Size(&extHeader))
remainder := int64(extHeader.HdrLen) - extHdrSize
if remainder == 0 {
return nil, nil
}
var coreHeader btfExtCOREHeader
if err := binary.Read(r, bo, &coreHeader); err != nil {
return nil, fmt.Errorf("can't read header: %v", err)
}
return &coreHeader, nil
}
type btfExtInfoSec struct {
SecNameOff uint32
NumInfo uint32
}
// parseExtInfoSec parses a btf_ext_info_sec header within .BTF.ext,
// appearing within func_info and line_info sub-sections.
// These headers appear once for each program section in the ELF and are
// followed by one or more func/line_info records for the section.
func parseExtInfoSec(r io.Reader, bo binary.ByteOrder, strings *stringTable) (string, *btfExtInfoSec, error) {
var infoHeader btfExtInfoSec
if err := binary.Read(r, bo, &infoHeader); err != nil {
return "", nil, fmt.Errorf("read ext info header: %w", err)
}
secName, err := strings.Lookup(infoHeader.SecNameOff)
if err != nil {
return "", nil, fmt.Errorf("get section name: %w", err)
}
if secName == "" {
return "", nil, fmt.Errorf("extinfo header refers to empty section name")
}
if infoHeader.NumInfo == 0 {
return "", nil, fmt.Errorf("section %s has zero records", secName)
}
return secName, &infoHeader, nil
}
// parseExtInfoRecordSize parses the uint32 at the beginning of a func_infos
// or line_infos segment that describes the length of all extInfoRecords in
// that segment.
func parseExtInfoRecordSize(r io.Reader, bo binary.ByteOrder) (uint32, error) {
const maxRecordSize = 256
var recordSize uint32
if err := binary.Read(r, bo, &recordSize); err != nil {
return 0, fmt.Errorf("can't read record size: %v", err)
}
if recordSize < 4 {
// Need at least InsnOff worth of bytes per record.
return 0, errors.New("record size too short")
}
if recordSize > maxRecordSize {
return 0, fmt.Errorf("record size %v exceeds %v", recordSize, maxRecordSize)
}
return recordSize, nil
}
// The size of a FuncInfo in BTF wire format.
var FuncInfoSize = uint32(binary.Size(bpfFuncInfo{}))
type funcInfo struct {
fn *Func
offset asm.RawInstructionOffset
}
type bpfFuncInfo struct {
// Instruction offset of the function within an ELF section.
InsnOff uint32
TypeID TypeID
}
func newFuncInfo(fi bpfFuncInfo, ts types) (*funcInfo, error) {
typ, err := ts.ByID(fi.TypeID)
if err != nil {
return nil, err
}
fn, ok := typ.(*Func)
if !ok {
return nil, fmt.Errorf("type ID %d is a %T, but expected a Func", fi.TypeID, typ)
}
// C doesn't have anonymous functions, but check just in case.
if fn.Name == "" {
return nil, fmt.Errorf("func with type ID %d doesn't have a name", fi.TypeID)
}
return &funcInfo{
fn,
asm.RawInstructionOffset(fi.InsnOff),
}, nil
}
func newFuncInfos(bfis []bpfFuncInfo, ts types) ([]funcInfo, error) {
fis := make([]funcInfo, 0, len(bfis))
for _, bfi := range bfis {
fi, err := newFuncInfo(bfi, ts)
if err != nil {
return nil, fmt.Errorf("offset %d: %w", bfi.InsnOff, err)
}
fis = append(fis, *fi)
}
sort.Slice(fis, func(i, j int) bool {
return fis[i].offset <= fis[j].offset
})
return fis, nil
}
// marshal into the BTF wire format.
func (fi *funcInfo) marshal(w io.Writer, typeID func(Type) (TypeID, error)) error {
id, err := typeID(fi.fn)
if err != nil {
return err
}
bfi := bpfFuncInfo{
InsnOff: uint32(fi.offset),
TypeID: id,
}
return binary.Write(w, internal.NativeEndian, &bfi)
}
// parseLineInfos parses a func_info sub-section within .BTF.ext ito a map of
// func infos indexed by section name.
func parseFuncInfos(r io.Reader, bo binary.ByteOrder, strings *stringTable) (map[string][]bpfFuncInfo, error) {
recordSize, err := parseExtInfoRecordSize(r, bo)
if err != nil {
return nil, err
}
result := make(map[string][]bpfFuncInfo)
for {
secName, infoHeader, err := parseExtInfoSec(r, bo, strings)
if errors.Is(err, io.EOF) {
return result, nil
}
if err != nil {
return nil, err
}
records, err := parseFuncInfoRecords(r, bo, recordSize, infoHeader.NumInfo)
if err != nil {
return nil, fmt.Errorf("section %v: %w", secName, err)
}
result[secName] = records
}
}
// parseFuncInfoRecords parses a stream of func_infos into a funcInfos.
// These records appear after a btf_ext_info_sec header in the func_info
// sub-section of .BTF.ext.
func parseFuncInfoRecords(r io.Reader, bo binary.ByteOrder, recordSize uint32, recordNum uint32) ([]bpfFuncInfo, error) {
var out []bpfFuncInfo
var fi bpfFuncInfo
if exp, got := FuncInfoSize, recordSize; exp != got {
// BTF blob's record size is longer than we know how to parse.
return nil, fmt.Errorf("expected FuncInfo record size %d, but BTF blob contains %d", exp, got)
}
for i := uint32(0); i < recordNum; i++ {
if err := binary.Read(r, bo, &fi); err != nil {
return nil, fmt.Errorf("can't read function info: %v", err)
}
if fi.InsnOff%asm.InstructionSize != 0 {
return nil, fmt.Errorf("offset %v is not aligned with instruction size", fi.InsnOff)
}
// ELF tracks offset in bytes, the kernel expects raw BPF instructions.
// Convert as early as possible.
fi.InsnOff /= asm.InstructionSize
out = append(out, fi)
}
return out, nil
}
var LineInfoSize = uint32(binary.Size(bpfLineInfo{}))
// Line represents the location and contents of a single line of source
// code a BPF ELF was compiled from.
type Line struct {
fileName string
line string
lineNumber uint32
lineColumn uint32
// TODO: We should get rid of the fields below, but for that we need to be
// able to write BTF.
fileNameOff uint32
lineOff uint32
}
func (li *Line) FileName() string {
return li.fileName
}
func (li *Line) Line() string {
return li.line
}
func (li *Line) LineNumber() uint32 {
return li.lineNumber
}
func (li *Line) LineColumn() uint32 {
return li.lineColumn
}
func (li *Line) String() string {
return li.line
}
type lineInfo struct {
line *Line
offset asm.RawInstructionOffset
}
// Constants for the format of bpfLineInfo.LineCol.
const (
bpfLineShift = 10
bpfLineMax = (1 << (32 - bpfLineShift)) - 1
bpfColumnMax = (1 << bpfLineShift) - 1
)
type bpfLineInfo struct {
// Instruction offset of the line within the whole instruction stream, in instructions.
InsnOff uint32
FileNameOff uint32
LineOff uint32
LineCol uint32
}
func newLineInfo(li bpfLineInfo, strings *stringTable) (*lineInfo, error) {
line, err := strings.Lookup(li.LineOff)
if err != nil {
return nil, fmt.Errorf("lookup of line: %w", err)
}
fileName, err := strings.Lookup(li.FileNameOff)
if err != nil {
return nil, fmt.Errorf("lookup of filename: %w", err)
}
lineNumber := li.LineCol >> bpfLineShift
lineColumn := li.LineCol & bpfColumnMax
return &lineInfo{
&Line{
fileName,
line,
lineNumber,
lineColumn,
li.FileNameOff,
li.LineOff,
},
asm.RawInstructionOffset(li.InsnOff),
}, nil
}
func newLineInfos(blis []bpfLineInfo, strings *stringTable) ([]lineInfo, error) {
lis := make([]lineInfo, 0, len(blis))
for _, bli := range blis {
li, err := newLineInfo(bli, strings)
if err != nil {
return nil, fmt.Errorf("offset %d: %w", bli.InsnOff, err)
}
lis = append(lis, *li)
}
sort.Slice(lis, func(i, j int) bool {
return lis[i].offset <= lis[j].offset
})
return lis, nil
}
// marshal writes the binary representation of the LineInfo to w.
func (li *lineInfo) marshal(w io.Writer) error {
line := li.line
if line.lineNumber > bpfLineMax {
return fmt.Errorf("line %d exceeds %d", line.lineNumber, bpfLineMax)
}
if line.lineColumn > bpfColumnMax {
return fmt.Errorf("column %d exceeds %d", line.lineColumn, bpfColumnMax)
}
bli := bpfLineInfo{
uint32(li.offset),
line.fileNameOff,
line.lineOff,
(line.lineNumber << bpfLineShift) | line.lineColumn,
}
return binary.Write(w, internal.NativeEndian, &bli)
}
// parseLineInfos parses a line_info sub-section within .BTF.ext ito a map of
// line infos indexed by section name.
func parseLineInfos(r io.Reader, bo binary.ByteOrder, strings *stringTable) (map[string][]bpfLineInfo, error) {
recordSize, err := parseExtInfoRecordSize(r, bo)
if err != nil {
return nil, err
}
result := make(map[string][]bpfLineInfo)
for {
secName, infoHeader, err := parseExtInfoSec(r, bo, strings)
if errors.Is(err, io.EOF) {
return result, nil
}
if err != nil {
return nil, err
}
records, err := parseLineInfoRecords(r, bo, recordSize, infoHeader.NumInfo)
if err != nil {
return nil, fmt.Errorf("section %v: %w", secName, err)
}
result[secName] = records
}
}
// parseLineInfoRecords parses a stream of line_infos into a lineInfos.
// These records appear after a btf_ext_info_sec header in the line_info
// sub-section of .BTF.ext.
func parseLineInfoRecords(r io.Reader, bo binary.ByteOrder, recordSize uint32, recordNum uint32) ([]bpfLineInfo, error) {
var out []bpfLineInfo
var li bpfLineInfo
if exp, got := uint32(binary.Size(li)), recordSize; exp != got {
// BTF blob's record size is longer than we know how to parse.
return nil, fmt.Errorf("expected LineInfo record size %d, but BTF blob contains %d", exp, got)
}
for i := uint32(0); i < recordNum; i++ {
if err := binary.Read(r, bo, &li); err != nil {
return nil, fmt.Errorf("can't read line info: %v", err)
}
if li.InsnOff%asm.InstructionSize != 0 {
return nil, fmt.Errorf("offset %v is not aligned with instruction size", li.InsnOff)
}
// ELF tracks offset in bytes, the kernel expects raw BPF instructions.
// Convert as early as possible.
li.InsnOff /= asm.InstructionSize
out = append(out, li)
}
return out, nil
}
// bpfCORERelo matches the kernel's struct bpf_core_relo.
type bpfCORERelo struct {
InsnOff uint32
TypeID TypeID
AccessStrOff uint32
Kind coreKind
}
type CORERelocation struct {
typ Type
accessor coreAccessor
kind coreKind
}
func CORERelocationMetadata(ins *asm.Instruction) *CORERelocation {
relo, _ := ins.Metadata.Get(coreRelocationMeta{}).(*CORERelocation)
return relo
}
type coreRelocationInfo struct {
relo *CORERelocation
offset asm.RawInstructionOffset
}
func newRelocationInfo(relo bpfCORERelo, ts types, strings *stringTable) (*coreRelocationInfo, error) {
typ, err := ts.ByID(relo.TypeID)
if err != nil {
return nil, err
}
accessorStr, err := strings.Lookup(relo.AccessStrOff)
if err != nil {
return nil, err
}
accessor, err := parseCOREAccessor(accessorStr)
if err != nil {
return nil, fmt.Errorf("accessor %q: %s", accessorStr, err)
}
return &coreRelocationInfo{
&CORERelocation{
typ,
accessor,
relo.Kind,
},
asm.RawInstructionOffset(relo.InsnOff),
}, nil
}
func newRelocationInfos(brs []bpfCORERelo, ts types, strings *stringTable) ([]coreRelocationInfo, error) {
rs := make([]coreRelocationInfo, 0, len(brs))
for _, br := range brs {
relo, err := newRelocationInfo(br, ts, strings)
if err != nil {
return nil, fmt.Errorf("offset %d: %w", br.InsnOff, err)
}
rs = append(rs, *relo)
}
sort.Slice(rs, func(i, j int) bool {
return rs[i].offset < rs[j].offset
})
return rs, nil
}
var extInfoReloSize = binary.Size(bpfCORERelo{})
// parseCORERelos parses a core_relos sub-section within .BTF.ext ito a map of
// CO-RE relocations indexed by section name.
func parseCORERelos(r io.Reader, bo binary.ByteOrder, strings *stringTable) (map[string][]bpfCORERelo, error) {
recordSize, err := parseExtInfoRecordSize(r, bo)
if err != nil {
return nil, err
}
if recordSize != uint32(extInfoReloSize) {
return nil, fmt.Errorf("expected record size %d, got %d", extInfoReloSize, recordSize)
}
result := make(map[string][]bpfCORERelo)
for {
secName, infoHeader, err := parseExtInfoSec(r, bo, strings)
if errors.Is(err, io.EOF) {
return result, nil
}
if err != nil {
return nil, err
}
records, err := parseCOREReloRecords(r, bo, recordSize, infoHeader.NumInfo)
if err != nil {
return nil, fmt.Errorf("section %v: %w", secName, err)
}
result[secName] = records
}
}
// parseCOREReloRecords parses a stream of CO-RE relocation entries into a
// coreRelos. These records appear after a btf_ext_info_sec header in the
// core_relos sub-section of .BTF.ext.
func parseCOREReloRecords(r io.Reader, bo binary.ByteOrder, recordSize uint32, recordNum uint32) ([]bpfCORERelo, error) {
var out []bpfCORERelo
var relo bpfCORERelo
for i := uint32(0); i < recordNum; i++ {
if err := binary.Read(r, bo, &relo); err != nil {
return nil, fmt.Errorf("can't read CO-RE relocation: %v", err)
}
if relo.InsnOff%asm.InstructionSize != 0 {
return nil, fmt.Errorf("offset %v is not aligned with instruction size", relo.InsnOff)
}
// ELF tracks offset in bytes, the kernel expects raw BPF instructions.
// Convert as early as possible.
relo.InsnOff /= asm.InstructionSize
out = append(out, relo)
}
return out, nil
}

View file

@ -1,319 +0,0 @@
package btf
import (
"errors"
"fmt"
"strings"
)
var errNestedTooDeep = errors.New("nested too deep")
// GoFormatter converts a Type to Go syntax.
//
// A zero GoFormatter is valid to use.
type GoFormatter struct {
w strings.Builder
// Types present in this map are referred to using the given name if they
// are encountered when outputting another type.
Names map[Type]string
// Identifier is called for each field of struct-like types. By default the
// field name is used as is.
Identifier func(string) string
// EnumIdentifier is called for each element of an enum. By default the
// name of the enum type is concatenated with Identifier(element).
EnumIdentifier func(name, element string) string
}
// TypeDeclaration generates a Go type declaration for a BTF type.
func (gf *GoFormatter) TypeDeclaration(name string, typ Type) (string, error) {
gf.w.Reset()
if err := gf.writeTypeDecl(name, typ); err != nil {
return "", err
}
return gf.w.String(), nil
}
func (gf *GoFormatter) identifier(s string) string {
if gf.Identifier != nil {
return gf.Identifier(s)
}
return s
}
func (gf *GoFormatter) enumIdentifier(name, element string) string {
if gf.EnumIdentifier != nil {
return gf.EnumIdentifier(name, element)
}
return name + gf.identifier(element)
}
// writeTypeDecl outputs a declaration of the given type.
//
// It encodes https://golang.org/ref/spec#Type_declarations:
//
// type foo struct { bar uint32; }
// type bar int32
func (gf *GoFormatter) writeTypeDecl(name string, typ Type) error {
if name == "" {
return fmt.Errorf("need a name for type %s", typ)
}
switch v := skipQualifiers(typ).(type) {
case *Enum:
fmt.Fprintf(&gf.w, "type %s ", name)
switch v.Size {
case 1:
gf.w.WriteString("int8")
case 2:
gf.w.WriteString("int16")
case 4:
gf.w.WriteString("int32")
case 8:
gf.w.WriteString("int64")
default:
return fmt.Errorf("%s: invalid enum size %d", typ, v.Size)
}
if len(v.Values) == 0 {
return nil
}
gf.w.WriteString("; const ( ")
for _, ev := range v.Values {
id := gf.enumIdentifier(name, ev.Name)
fmt.Fprintf(&gf.w, "%s %s = %d; ", id, name, ev.Value)
}
gf.w.WriteString(")")
return nil
default:
fmt.Fprintf(&gf.w, "type %s ", name)
return gf.writeTypeLit(v, 0)
}
}
// writeType outputs the name of a named type or a literal describing the type.
//
// It encodes https://golang.org/ref/spec#Types.
//
// foo (if foo is a named type)
// uint32
func (gf *GoFormatter) writeType(typ Type, depth int) error {
typ = skipQualifiers(typ)
name := gf.Names[typ]
if name != "" {
gf.w.WriteString(name)
return nil
}
return gf.writeTypeLit(typ, depth)
}
// writeTypeLit outputs a literal describing the type.
//
// The function ignores named types.
//
// It encodes https://golang.org/ref/spec#TypeLit.
//
// struct { bar uint32; }
// uint32
func (gf *GoFormatter) writeTypeLit(typ Type, depth int) error {
depth++
if depth > maxTypeDepth {
return errNestedTooDeep
}
var err error
switch v := skipQualifiers(typ).(type) {
case *Int:
gf.writeIntLit(v)
case *Enum:
gf.w.WriteString("int32")
case *Typedef:
err = gf.writeType(v.Type, depth)
case *Array:
fmt.Fprintf(&gf.w, "[%d]", v.Nelems)
err = gf.writeType(v.Type, depth)
case *Struct:
err = gf.writeStructLit(v.Size, v.Members, depth)
case *Union:
// Always choose the first member to represent the union in Go.
err = gf.writeStructLit(v.Size, v.Members[:1], depth)
case *Datasec:
err = gf.writeDatasecLit(v, depth)
default:
return fmt.Errorf("type %T: %w", v, ErrNotSupported)
}
if err != nil {
return fmt.Errorf("%s: %w", typ, err)
}
return nil
}
func (gf *GoFormatter) writeIntLit(i *Int) {
// NB: Encoding.IsChar is ignored.
if i.Encoding.IsBool() && i.Size == 1 {
gf.w.WriteString("bool")
return
}
bits := i.Size * 8
if i.Encoding.IsSigned() {
fmt.Fprintf(&gf.w, "int%d", bits)
} else {
fmt.Fprintf(&gf.w, "uint%d", bits)
}
}
func (gf *GoFormatter) writeStructLit(size uint32, members []Member, depth int) error {
gf.w.WriteString("struct { ")
prevOffset := uint32(0)
skippedBitfield := false
for i, m := range members {
if m.BitfieldSize > 0 {
skippedBitfield = true
continue
}
offset := m.Offset.Bytes()
if n := offset - prevOffset; skippedBitfield && n > 0 {
fmt.Fprintf(&gf.w, "_ [%d]byte /* unsupported bitfield */; ", n)
} else {
gf.writePadding(n)
}
size, err := Sizeof(m.Type)
if err != nil {
return fmt.Errorf("field %d: %w", i, err)
}
prevOffset = offset + uint32(size)
if err := gf.writeStructField(m, depth); err != nil {
return fmt.Errorf("field %d: %w", i, err)
}
}
gf.writePadding(size - prevOffset)
gf.w.WriteString("}")
return nil
}
func (gf *GoFormatter) writeStructField(m Member, depth int) error {
if m.BitfieldSize > 0 {
return fmt.Errorf("bitfields are not supported")
}
if m.Offset%8 != 0 {
return fmt.Errorf("unsupported offset %d", m.Offset)
}
if m.Name == "" {
// Special case a nested anonymous union like
// struct foo { union { int bar; int baz }; }
// by replacing the whole union with its first member.
union, ok := m.Type.(*Union)
if !ok {
return fmt.Errorf("anonymous fields are not supported")
}
if len(union.Members) == 0 {
return errors.New("empty anonymous union")
}
depth++
if depth > maxTypeDepth {
return errNestedTooDeep
}
m := union.Members[0]
size, err := Sizeof(m.Type)
if err != nil {
return err
}
if err := gf.writeStructField(m, depth); err != nil {
return err
}
gf.writePadding(union.Size - uint32(size))
return nil
}
fmt.Fprintf(&gf.w, "%s ", gf.identifier(m.Name))
if err := gf.writeType(m.Type, depth); err != nil {
return err
}
gf.w.WriteString("; ")
return nil
}
func (gf *GoFormatter) writeDatasecLit(ds *Datasec, depth int) error {
gf.w.WriteString("struct { ")
prevOffset := uint32(0)
for i, vsi := range ds.Vars {
v := vsi.Type.(*Var)
if v.Linkage != GlobalVar {
// Ignore static, extern, etc. for now.
continue
}
if v.Name == "" {
return fmt.Errorf("variable %d: empty name", i)
}
gf.writePadding(vsi.Offset - prevOffset)
prevOffset = vsi.Offset + vsi.Size
fmt.Fprintf(&gf.w, "%s ", gf.identifier(v.Name))
if err := gf.writeType(v.Type, depth); err != nil {
return fmt.Errorf("variable %d: %w", i, err)
}
gf.w.WriteString("; ")
}
gf.writePadding(ds.Size - prevOffset)
gf.w.WriteString("}")
return nil
}
func (gf *GoFormatter) writePadding(bytes uint32) {
if bytes > 0 {
fmt.Fprintf(&gf.w, "_ [%d]byte; ", bytes)
}
}
func skipQualifiers(typ Type) Type {
result := typ
for depth := 0; depth <= maxTypeDepth; depth++ {
switch v := (result).(type) {
case qualifier:
result = v.qualify()
default:
return result
}
}
return &cycle{typ}
}

View file

@ -1,121 +0,0 @@
package btf
import (
"errors"
"fmt"
"os"
"github.com/cilium/ebpf/internal/sys"
"github.com/cilium/ebpf/internal/unix"
)
// HandleInfo describes a Handle.
type HandleInfo struct {
// ID of this handle in the kernel. The ID is only valid as long as the
// associated handle is kept alive.
ID ID
// Name is an identifying name for the BTF, currently only used by the
// kernel.
Name string
// IsKernel is true if the BTF originated with the kernel and not
// userspace.
IsKernel bool
// Size of the raw BTF in bytes.
size uint32
}
func newHandleInfoFromFD(fd *sys.FD) (*HandleInfo, error) {
// We invoke the syscall once with a empty BTF and name buffers to get size
// information to allocate buffers. Then we invoke it a second time with
// buffers to receive the data.
var btfInfo sys.BtfInfo
if err := sys.ObjInfo(fd, &btfInfo); err != nil {
return nil, fmt.Errorf("get BTF info for fd %s: %w", fd, err)
}
if btfInfo.NameLen > 0 {
// NameLen doesn't account for the terminating NUL.
btfInfo.NameLen++
}
// Don't pull raw BTF by default, since it may be quite large.
btfSize := btfInfo.BtfSize
btfInfo.BtfSize = 0
nameBuffer := make([]byte, btfInfo.NameLen)
btfInfo.Name, btfInfo.NameLen = sys.NewSlicePointerLen(nameBuffer)
if err := sys.ObjInfo(fd, &btfInfo); err != nil {
return nil, err
}
return &HandleInfo{
ID: ID(btfInfo.Id),
Name: unix.ByteSliceToString(nameBuffer),
IsKernel: btfInfo.KernelBtf != 0,
size: btfSize,
}, nil
}
// IsModule returns true if the BTF is for the kernel itself.
func (i *HandleInfo) IsVmlinux() bool {
return i.IsKernel && i.Name == "vmlinux"
}
// IsModule returns true if the BTF is for a kernel module.
func (i *HandleInfo) IsModule() bool {
return i.IsKernel && i.Name != "vmlinux"
}
// HandleIterator allows enumerating BTF blobs loaded into the kernel.
type HandleIterator struct {
// The ID of the last retrieved handle. Only valid after a call to Next.
ID ID
err error
}
// Next retrieves a handle for the next BTF blob.
//
// [Handle.Close] is called if *handle is non-nil to avoid leaking fds.
//
// Returns true if another BTF blob was found. Call [HandleIterator.Err] after
// the function returns false.
func (it *HandleIterator) Next(handle **Handle) bool {
if *handle != nil {
(*handle).Close()
*handle = nil
}
id := it.ID
for {
attr := &sys.BtfGetNextIdAttr{Id: id}
err := sys.BtfGetNextId(attr)
if errors.Is(err, os.ErrNotExist) {
// There are no more BTF objects.
return false
} else if err != nil {
it.err = fmt.Errorf("get next BTF ID: %w", err)
return false
}
id = attr.NextId
*handle, err = NewHandleFromID(id)
if errors.Is(err, os.ErrNotExist) {
// Try again with the next ID.
continue
} else if err != nil {
it.err = fmt.Errorf("retrieve handle for ID %d: %w", id, err)
return false
}
it.ID = id
return true
}
}
// Err returns an error if iteration failed for some reason.
func (it *HandleIterator) Err() error {
return it.err
}

View file

@ -1,128 +0,0 @@
package btf
import (
"bufio"
"bytes"
"errors"
"fmt"
"io"
)
type stringTable struct {
base *stringTable
offsets []uint32
strings []string
}
// sizedReader is implemented by bytes.Reader, io.SectionReader, strings.Reader, etc.
type sizedReader interface {
io.Reader
Size() int64
}
func readStringTable(r sizedReader, base *stringTable) (*stringTable, error) {
// When parsing split BTF's string table, the first entry offset is derived
// from the last entry offset of the base BTF.
firstStringOffset := uint32(0)
if base != nil {
idx := len(base.offsets) - 1
firstStringOffset = base.offsets[idx] + uint32(len(base.strings[idx])) + 1
}
// Derived from vmlinux BTF.
const averageStringLength = 16
n := int(r.Size() / averageStringLength)
offsets := make([]uint32, 0, n)
strings := make([]string, 0, n)
offset := firstStringOffset
scanner := bufio.NewScanner(r)
scanner.Split(splitNull)
for scanner.Scan() {
str := scanner.Text()
offsets = append(offsets, offset)
strings = append(strings, str)
offset += uint32(len(str)) + 1
}
if err := scanner.Err(); err != nil {
return nil, err
}
if len(strings) == 0 {
return nil, errors.New("string table is empty")
}
if firstStringOffset == 0 && strings[0] != "" {
return nil, errors.New("first item in string table is non-empty")
}
return &stringTable{base, offsets, strings}, nil
}
func splitNull(data []byte, atEOF bool) (advance int, token []byte, err error) {
i := bytes.IndexByte(data, 0)
if i == -1 {
if atEOF && len(data) > 0 {
return 0, nil, errors.New("string table isn't null terminated")
}
return 0, nil, nil
}
return i + 1, data[:i], nil
}
func (st *stringTable) Lookup(offset uint32) (string, error) {
if st.base != nil && offset <= st.base.offsets[len(st.base.offsets)-1] {
return st.base.lookup(offset)
}
return st.lookup(offset)
}
func (st *stringTable) lookup(offset uint32) (string, error) {
i := search(st.offsets, offset)
if i == len(st.offsets) || st.offsets[i] != offset {
return "", fmt.Errorf("offset %d isn't start of a string", offset)
}
return st.strings[i], nil
}
func (st *stringTable) Length() int {
last := len(st.offsets) - 1
return int(st.offsets[last]) + len(st.strings[last]) + 1
}
func (st *stringTable) Marshal(w io.Writer) error {
for _, str := range st.strings {
_, err := io.WriteString(w, str)
if err != nil {
return err
}
_, err = w.Write([]byte{0})
if err != nil {
return err
}
}
return nil
}
// search is a copy of sort.Search specialised for uint32.
//
// Licensed under https://go.dev/LICENSE
func search(ints []uint32, needle uint32) int {
// Define f(-1) == false and f(n) == true.
// Invariant: f(i-1) == false, f(j) == true.
i, j := 0, len(ints)
for i < j {
h := int(uint(i+j) >> 1) // avoid overflow when computing h
// i ≤ h < j
if !(ints[h] >= needle) {
i = h + 1 // preserves f(i-1) == false
} else {
j = h // preserves f(j) == true
}
}
// i == j, f(i-1) == false, and f(j) (= f(i)) == true => answer is i.
return i
}

File diff suppressed because it is too large Load diff

View file

@ -1,772 +0,0 @@
package ebpf
import (
"encoding/binary"
"errors"
"fmt"
"reflect"
"strings"
"github.com/cilium/ebpf/asm"
"github.com/cilium/ebpf/btf"
)
// CollectionOptions control loading a collection into the kernel.
//
// Maps and Programs are passed to NewMapWithOptions and NewProgramsWithOptions.
type CollectionOptions struct {
Maps MapOptions
Programs ProgramOptions
// MapReplacements takes a set of Maps that will be used instead of
// creating new ones when loading the CollectionSpec.
//
// For each given Map, there must be a corresponding MapSpec in
// CollectionSpec.Maps, and its type, key/value size, max entries and flags
// must match the values of the MapSpec.
//
// The given Maps are Clone()d before being used in the Collection, so the
// caller can Close() them freely when they are no longer needed.
MapReplacements map[string]*Map
}
// CollectionSpec describes a collection.
type CollectionSpec struct {
Maps map[string]*MapSpec
Programs map[string]*ProgramSpec
// Types holds type information about Maps and Programs.
// Modifications to Types are currently undefined behaviour.
Types *btf.Spec
// ByteOrder specifies whether the ELF was compiled for
// big-endian or little-endian architectures.
ByteOrder binary.ByteOrder
}
// Copy returns a recursive copy of the spec.
func (cs *CollectionSpec) Copy() *CollectionSpec {
if cs == nil {
return nil
}
cpy := CollectionSpec{
Maps: make(map[string]*MapSpec, len(cs.Maps)),
Programs: make(map[string]*ProgramSpec, len(cs.Programs)),
ByteOrder: cs.ByteOrder,
Types: cs.Types,
}
for name, spec := range cs.Maps {
cpy.Maps[name] = spec.Copy()
}
for name, spec := range cs.Programs {
cpy.Programs[name] = spec.Copy()
}
return &cpy
}
// RewriteMaps replaces all references to specific maps.
//
// Use this function to use pre-existing maps instead of creating new ones
// when calling NewCollection. Any named maps are removed from CollectionSpec.Maps.
//
// Returns an error if a named map isn't used in at least one program.
//
// Deprecated: Pass CollectionOptions.MapReplacements when loading the Collection
// instead.
func (cs *CollectionSpec) RewriteMaps(maps map[string]*Map) error {
for symbol, m := range maps {
// have we seen a program that uses this symbol / map
seen := false
for progName, progSpec := range cs.Programs {
err := progSpec.Instructions.AssociateMap(symbol, m)
switch {
case err == nil:
seen = true
case errors.Is(err, asm.ErrUnreferencedSymbol):
// Not all programs need to use the map
default:
return fmt.Errorf("program %s: %w", progName, err)
}
}
if !seen {
return fmt.Errorf("map %s not referenced by any programs", symbol)
}
// Prevent NewCollection from creating rewritten maps
delete(cs.Maps, symbol)
}
return nil
}
// RewriteConstants replaces the value of multiple constants.
//
// The constant must be defined like so in the C program:
//
// volatile const type foobar;
// volatile const type foobar = default;
//
// Replacement values must be of the same length as the C sizeof(type).
// If necessary, they are marshalled according to the same rules as
// map values.
//
// From Linux 5.5 the verifier will use constants to eliminate dead code.
//
// Returns an error if a constant doesn't exist.
func (cs *CollectionSpec) RewriteConstants(consts map[string]interface{}) error {
replaced := make(map[string]bool)
for name, spec := range cs.Maps {
if !strings.HasPrefix(name, ".rodata") {
continue
}
b, ds, err := spec.dataSection()
if errors.Is(err, errMapNoBTFValue) {
// Data sections without a BTF Datasec are valid, but don't support
// constant replacements.
continue
}
if err != nil {
return fmt.Errorf("map %s: %w", name, err)
}
// MapSpec.Copy() performs a shallow copy. Fully copy the byte slice
// to avoid any changes affecting other copies of the MapSpec.
cpy := make([]byte, len(b))
copy(cpy, b)
for _, v := range ds.Vars {
vname := v.Type.TypeName()
replacement, ok := consts[vname]
if !ok {
continue
}
if replaced[vname] {
return fmt.Errorf("section %s: duplicate variable %s", name, vname)
}
if int(v.Offset+v.Size) > len(cpy) {
return fmt.Errorf("section %s: offset %d(+%d) for variable %s is out of bounds", name, v.Offset, v.Size, vname)
}
b, err := marshalBytes(replacement, int(v.Size))
if err != nil {
return fmt.Errorf("marshaling constant replacement %s: %w", vname, err)
}
copy(cpy[v.Offset:v.Offset+v.Size], b)
replaced[vname] = true
}
spec.Contents[0] = MapKV{Key: uint32(0), Value: cpy}
}
var missing []string
for c := range consts {
if !replaced[c] {
missing = append(missing, c)
}
}
if len(missing) != 0 {
return fmt.Errorf("spec is missing one or more constants: %s", strings.Join(missing, ","))
}
return nil
}
// Assign the contents of a CollectionSpec to a struct.
//
// This function is a shortcut to manually checking the presence
// of maps and programs in a CollectionSpec. Consider using bpf2go
// if this sounds useful.
//
// 'to' must be a pointer to a struct. A field of the
// struct is updated with values from Programs or Maps if it
// has an `ebpf` tag and its type is *ProgramSpec or *MapSpec.
// The tag's value specifies the name of the program or map as
// found in the CollectionSpec.
//
// struct {
// Foo *ebpf.ProgramSpec `ebpf:"xdp_foo"`
// Bar *ebpf.MapSpec `ebpf:"bar_map"`
// Ignored int
// }
//
// Returns an error if any of the eBPF objects can't be found, or
// if the same MapSpec or ProgramSpec is assigned multiple times.
func (cs *CollectionSpec) Assign(to interface{}) error {
// Assign() only supports assigning ProgramSpecs and MapSpecs,
// so doesn't load any resources into the kernel.
getValue := func(typ reflect.Type, name string) (interface{}, error) {
switch typ {
case reflect.TypeOf((*ProgramSpec)(nil)):
if p := cs.Programs[name]; p != nil {
return p, nil
}
return nil, fmt.Errorf("missing program %q", name)
case reflect.TypeOf((*MapSpec)(nil)):
if m := cs.Maps[name]; m != nil {
return m, nil
}
return nil, fmt.Errorf("missing map %q", name)
default:
return nil, fmt.Errorf("unsupported type %s", typ)
}
}
return assignValues(to, getValue)
}
// LoadAndAssign loads Maps and Programs into the kernel and assigns them
// to a struct.
//
// Omitting Map/Program.Close() during application shutdown is an error.
// See the package documentation for details around Map and Program lifecycle.
//
// This function is a shortcut to manually checking the presence
// of maps and programs in a CollectionSpec. Consider using bpf2go
// if this sounds useful.
//
// 'to' must be a pointer to a struct. A field of the struct is updated with
// a Program or Map if it has an `ebpf` tag and its type is *Program or *Map.
// The tag's value specifies the name of the program or map as found in the
// CollectionSpec. Before updating the struct, the requested objects and their
// dependent resources are loaded into the kernel and populated with values if
// specified.
//
// struct {
// Foo *ebpf.Program `ebpf:"xdp_foo"`
// Bar *ebpf.Map `ebpf:"bar_map"`
// Ignored int
// }
//
// opts may be nil.
//
// Returns an error if any of the fields can't be found, or
// if the same Map or Program is assigned multiple times.
func (cs *CollectionSpec) LoadAndAssign(to interface{}, opts *CollectionOptions) error {
loader, err := newCollectionLoader(cs, opts)
if err != nil {
return err
}
defer loader.close()
// Support assigning Programs and Maps, lazy-loading the required objects.
assignedMaps := make(map[string]bool)
assignedProgs := make(map[string]bool)
getValue := func(typ reflect.Type, name string) (interface{}, error) {
switch typ {
case reflect.TypeOf((*Program)(nil)):
assignedProgs[name] = true
return loader.loadProgram(name)
case reflect.TypeOf((*Map)(nil)):
assignedMaps[name] = true
return loader.loadMap(name)
default:
return nil, fmt.Errorf("unsupported type %s", typ)
}
}
// Load the Maps and Programs requested by the annotated struct.
if err := assignValues(to, getValue); err != nil {
return err
}
// Populate the requested maps. Has a chance of lazy-loading other dependent maps.
if err := loader.populateMaps(); err != nil {
return err
}
// Evaluate the loader's objects after all (lazy)loading has taken place.
for n, m := range loader.maps {
switch m.typ {
case ProgramArray:
// Require all lazy-loaded ProgramArrays to be assigned to the given object.
// The kernel empties a ProgramArray once the last user space reference
// to it closes, which leads to failed tail calls. Combined with the library
// closing map fds via GC finalizers this can lead to surprising behaviour.
// Only allow unassigned ProgramArrays when the library hasn't pre-populated
// any entries from static value declarations. At this point, we know the map
// is empty and there's no way for the caller to interact with the map going
// forward.
if !assignedMaps[n] && len(cs.Maps[n].Contents) > 0 {
return fmt.Errorf("ProgramArray %s must be assigned to prevent missed tail calls", n)
}
}
}
// Prevent loader.cleanup() from closing assigned Maps and Programs.
for m := range assignedMaps {
delete(loader.maps, m)
}
for p := range assignedProgs {
delete(loader.programs, p)
}
return nil
}
// Collection is a collection of Programs and Maps associated
// with their symbols
type Collection struct {
Programs map[string]*Program
Maps map[string]*Map
}
// NewCollection creates a Collection from the given spec, creating and
// loading its declared resources into the kernel.
//
// Omitting Collection.Close() during application shutdown is an error.
// See the package documentation for details around Map and Program lifecycle.
func NewCollection(spec *CollectionSpec) (*Collection, error) {
return NewCollectionWithOptions(spec, CollectionOptions{})
}
// NewCollectionWithOptions creates a Collection from the given spec using
// options, creating and loading its declared resources into the kernel.
//
// Omitting Collection.Close() during application shutdown is an error.
// See the package documentation for details around Map and Program lifecycle.
func NewCollectionWithOptions(spec *CollectionSpec, opts CollectionOptions) (*Collection, error) {
loader, err := newCollectionLoader(spec, &opts)
if err != nil {
return nil, err
}
defer loader.close()
// Create maps first, as their fds need to be linked into programs.
for mapName := range spec.Maps {
if _, err := loader.loadMap(mapName); err != nil {
return nil, err
}
}
for progName, prog := range spec.Programs {
if prog.Type == UnspecifiedProgram {
continue
}
if _, err := loader.loadProgram(progName); err != nil {
return nil, err
}
}
// Maps can contain Program and Map stubs, so populate them after
// all Maps and Programs have been successfully loaded.
if err := loader.populateMaps(); err != nil {
return nil, err
}
// Prevent loader.cleanup from closing maps and programs.
maps, progs := loader.maps, loader.programs
loader.maps, loader.programs = nil, nil
return &Collection{
progs,
maps,
}, nil
}
type handleCache struct {
btfHandles map[*btf.Spec]*btf.Handle
}
func newHandleCache() *handleCache {
return &handleCache{
btfHandles: make(map[*btf.Spec]*btf.Handle),
}
}
func (hc handleCache) btfHandle(spec *btf.Spec) (*btf.Handle, error) {
if hc.btfHandles[spec] != nil {
return hc.btfHandles[spec], nil
}
handle, err := btf.NewHandle(spec)
if err != nil {
return nil, err
}
hc.btfHandles[spec] = handle
return handle, nil
}
func (hc handleCache) close() {
for _, handle := range hc.btfHandles {
handle.Close()
}
}
type collectionLoader struct {
coll *CollectionSpec
opts *CollectionOptions
maps map[string]*Map
programs map[string]*Program
handles *handleCache
}
func newCollectionLoader(coll *CollectionSpec, opts *CollectionOptions) (*collectionLoader, error) {
if opts == nil {
opts = &CollectionOptions{}
}
// Check for existing MapSpecs in the CollectionSpec for all provided replacement maps.
for name, m := range opts.MapReplacements {
spec, ok := coll.Maps[name]
if !ok {
return nil, fmt.Errorf("replacement map %s not found in CollectionSpec", name)
}
if err := spec.checkCompatibility(m); err != nil {
return nil, fmt.Errorf("using replacement map %s: %w", spec.Name, err)
}
}
return &collectionLoader{
coll,
opts,
make(map[string]*Map),
make(map[string]*Program),
newHandleCache(),
}, nil
}
// close all resources left over in the collectionLoader.
func (cl *collectionLoader) close() {
cl.handles.close()
for _, m := range cl.maps {
m.Close()
}
for _, p := range cl.programs {
p.Close()
}
}
func (cl *collectionLoader) loadMap(mapName string) (*Map, error) {
if m := cl.maps[mapName]; m != nil {
return m, nil
}
mapSpec := cl.coll.Maps[mapName]
if mapSpec == nil {
return nil, fmt.Errorf("missing map %s", mapName)
}
if mapSpec.BTF != nil && cl.coll.Types != mapSpec.BTF {
return nil, fmt.Errorf("map %s: BTF doesn't match collection", mapName)
}
if replaceMap, ok := cl.opts.MapReplacements[mapName]; ok {
// Clone the map to avoid closing user's map later on.
m, err := replaceMap.Clone()
if err != nil {
return nil, err
}
cl.maps[mapName] = m
return m, nil
}
m, err := newMapWithOptions(mapSpec, cl.opts.Maps, cl.handles)
if err != nil {
return nil, fmt.Errorf("map %s: %w", mapName, err)
}
cl.maps[mapName] = m
return m, nil
}
func (cl *collectionLoader) loadProgram(progName string) (*Program, error) {
if prog := cl.programs[progName]; prog != nil {
return prog, nil
}
progSpec := cl.coll.Programs[progName]
if progSpec == nil {
return nil, fmt.Errorf("unknown program %s", progName)
}
// Bail out early if we know the kernel is going to reject the program.
// This skips loading map dependencies, saving some cleanup work later.
if progSpec.Type == UnspecifiedProgram {
return nil, fmt.Errorf("cannot load program %s: program type is unspecified", progName)
}
if progSpec.BTF != nil && cl.coll.Types != progSpec.BTF {
return nil, fmt.Errorf("program %s: BTF doesn't match collection", progName)
}
progSpec = progSpec.Copy()
// Rewrite any reference to a valid map in the program's instructions,
// which includes all of its dependencies.
for i := range progSpec.Instructions {
ins := &progSpec.Instructions[i]
if !ins.IsLoadFromMap() || ins.Reference() == "" {
continue
}
// Don't overwrite map loads containing non-zero map fd's,
// they can be manually included by the caller.
// Map FDs/IDs are placed in the lower 32 bits of Constant.
if int32(ins.Constant) > 0 {
continue
}
m, err := cl.loadMap(ins.Reference())
if err != nil {
return nil, fmt.Errorf("program %s: %w", progName, err)
}
if err := ins.AssociateMap(m); err != nil {
return nil, fmt.Errorf("program %s: map %s: %w", progName, ins.Reference(), err)
}
}
prog, err := newProgramWithOptions(progSpec, cl.opts.Programs, cl.handles)
if err != nil {
return nil, fmt.Errorf("program %s: %w", progName, err)
}
cl.programs[progName] = prog
return prog, nil
}
func (cl *collectionLoader) populateMaps() error {
for mapName, m := range cl.maps {
mapSpec, ok := cl.coll.Maps[mapName]
if !ok {
return fmt.Errorf("missing map spec %s", mapName)
}
mapSpec = mapSpec.Copy()
// MapSpecs that refer to inner maps or programs within the same
// CollectionSpec do so using strings. These strings are used as the key
// to look up the respective object in the Maps or Programs fields.
// Resolve those references to actual Map or Program resources that
// have been loaded into the kernel.
for i, kv := range mapSpec.Contents {
if objName, ok := kv.Value.(string); ok {
switch mapSpec.Type {
case ProgramArray:
// loadProgram is idempotent and could return an existing Program.
prog, err := cl.loadProgram(objName)
if err != nil {
return fmt.Errorf("loading program %s, for map %s: %w", objName, mapName, err)
}
mapSpec.Contents[i] = MapKV{kv.Key, prog}
case ArrayOfMaps, HashOfMaps:
// loadMap is idempotent and could return an existing Map.
innerMap, err := cl.loadMap(objName)
if err != nil {
return fmt.Errorf("loading inner map %s, for map %s: %w", objName, mapName, err)
}
mapSpec.Contents[i] = MapKV{kv.Key, innerMap}
}
}
}
// Populate and freeze the map if specified.
if err := m.finalize(mapSpec); err != nil {
return fmt.Errorf("populating map %s: %w", mapName, err)
}
}
return nil
}
// LoadCollection reads an object file and creates and loads its declared
// resources into the kernel.
//
// Omitting Collection.Close() during application shutdown is an error.
// See the package documentation for details around Map and Program lifecycle.
func LoadCollection(file string) (*Collection, error) {
spec, err := LoadCollectionSpec(file)
if err != nil {
return nil, err
}
return NewCollection(spec)
}
// Close frees all maps and programs associated with the collection.
//
// The collection mustn't be used afterwards.
func (coll *Collection) Close() {
for _, prog := range coll.Programs {
prog.Close()
}
for _, m := range coll.Maps {
m.Close()
}
}
// DetachMap removes the named map from the Collection.
//
// This means that a later call to Close() will not affect this map.
//
// Returns nil if no map of that name exists.
func (coll *Collection) DetachMap(name string) *Map {
m := coll.Maps[name]
delete(coll.Maps, name)
return m
}
// DetachProgram removes the named program from the Collection.
//
// This means that a later call to Close() will not affect this program.
//
// Returns nil if no program of that name exists.
func (coll *Collection) DetachProgram(name string) *Program {
p := coll.Programs[name]
delete(coll.Programs, name)
return p
}
// structField represents a struct field containing the ebpf struct tag.
type structField struct {
reflect.StructField
value reflect.Value
}
// ebpfFields extracts field names tagged with 'ebpf' from a struct type.
// Keep track of visited types to avoid infinite recursion.
func ebpfFields(structVal reflect.Value, visited map[reflect.Type]bool) ([]structField, error) {
if visited == nil {
visited = make(map[reflect.Type]bool)
}
structType := structVal.Type()
if structType.Kind() != reflect.Struct {
return nil, fmt.Errorf("%s is not a struct", structType)
}
if visited[structType] {
return nil, fmt.Errorf("recursion on type %s", structType)
}
fields := make([]structField, 0, structType.NumField())
for i := 0; i < structType.NumField(); i++ {
field := structField{structType.Field(i), structVal.Field(i)}
// If the field is tagged, gather it and move on.
name := field.Tag.Get("ebpf")
if name != "" {
fields = append(fields, field)
continue
}
// If the field does not have an ebpf tag, but is a struct or a pointer
// to a struct, attempt to gather its fields as well.
var v reflect.Value
switch field.Type.Kind() {
case reflect.Ptr:
if field.Type.Elem().Kind() != reflect.Struct {
continue
}
if field.value.IsNil() {
return nil, fmt.Errorf("nil pointer to %s", structType)
}
// Obtain the destination type of the pointer.
v = field.value.Elem()
case reflect.Struct:
// Reference the value's type directly.
v = field.value
default:
continue
}
inner, err := ebpfFields(v, visited)
if err != nil {
return nil, fmt.Errorf("field %s: %w", field.Name, err)
}
fields = append(fields, inner...)
}
return fields, nil
}
// assignValues attempts to populate all fields of 'to' tagged with 'ebpf'.
//
// getValue is called for every tagged field of 'to' and must return the value
// to be assigned to the field with the given typ and name.
func assignValues(to interface{},
getValue func(typ reflect.Type, name string) (interface{}, error)) error {
toValue := reflect.ValueOf(to)
if toValue.Type().Kind() != reflect.Ptr {
return fmt.Errorf("%T is not a pointer to struct", to)
}
if toValue.IsNil() {
return fmt.Errorf("nil pointer to %T", to)
}
fields, err := ebpfFields(toValue.Elem(), nil)
if err != nil {
return err
}
type elem struct {
// Either *Map or *Program
typ reflect.Type
name string
}
assigned := make(map[elem]string)
for _, field := range fields {
// Get string value the field is tagged with.
tag := field.Tag.Get("ebpf")
if strings.Contains(tag, ",") {
return fmt.Errorf("field %s: ebpf tag contains a comma", field.Name)
}
// Check if the eBPF object with the requested
// type and tag was already assigned elsewhere.
e := elem{field.Type, tag}
if af := assigned[e]; af != "" {
return fmt.Errorf("field %s: object %q was already assigned to %s", field.Name, tag, af)
}
// Get the eBPF object referred to by the tag.
value, err := getValue(field.Type, tag)
if err != nil {
return fmt.Errorf("field %s: %w", field.Name, err)
}
if !field.value.CanSet() {
return fmt.Errorf("field %s: can't set value", field.Name)
}
field.value.Set(reflect.ValueOf(value))
assigned[e] = field.Name
}
return nil
}

25
vendor/github.com/cilium/ebpf/doc.go generated vendored
View file

@ -1,25 +0,0 @@
// Package ebpf is a toolkit for working with eBPF programs.
//
// eBPF programs are small snippets of code which are executed directly
// in a VM in the Linux kernel, which makes them very fast and flexible.
// Many Linux subsystems now accept eBPF programs. This makes it possible
// to implement highly application specific logic inside the kernel,
// without having to modify the actual kernel itself.
//
// This package is designed for long-running processes which
// want to use eBPF to implement part of their application logic. It has no
// run-time dependencies outside of the library and the Linux kernel itself.
// eBPF code should be compiled ahead of time using clang, and shipped with
// your application as any other resource.
//
// Use the link subpackage to attach a loaded program to a hook in the kernel.
//
// Note that losing all references to Map and Program resources will cause
// their underlying file descriptors to be closed, potentially removing those
// objects from the kernel. Always retain a reference by e.g. deferring a
// Close() of a Collection or LoadAndAssign object until application exit.
//
// Special care needs to be taken when handling maps of type ProgramArray,
// as the kernel erases its contents when the last userspace or bpffs
// reference disappears, regardless of the map being in active use.
package ebpf

File diff suppressed because it is too large Load diff

323
vendor/github.com/cilium/ebpf/info.go generated vendored
View file

@ -1,323 +0,0 @@
package ebpf
import (
"bufio"
"bytes"
"encoding/hex"
"errors"
"fmt"
"io"
"os"
"strings"
"syscall"
"time"
"unsafe"
"github.com/cilium/ebpf/asm"
"github.com/cilium/ebpf/btf"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/sys"
"github.com/cilium/ebpf/internal/unix"
)
// MapInfo describes a map.
type MapInfo struct {
Type MapType
id MapID
KeySize uint32
ValueSize uint32
MaxEntries uint32
Flags uint32
// Name as supplied by user space at load time. Available from 4.15.
Name string
}
func newMapInfoFromFd(fd *sys.FD) (*MapInfo, error) {
var info sys.MapInfo
err := sys.ObjInfo(fd, &info)
if errors.Is(err, syscall.EINVAL) {
return newMapInfoFromProc(fd)
}
if err != nil {
return nil, err
}
return &MapInfo{
MapType(info.Type),
MapID(info.Id),
info.KeySize,
info.ValueSize,
info.MaxEntries,
info.MapFlags,
unix.ByteSliceToString(info.Name[:]),
}, nil
}
func newMapInfoFromProc(fd *sys.FD) (*MapInfo, error) {
var mi MapInfo
err := scanFdInfo(fd, map[string]interface{}{
"map_type": &mi.Type,
"key_size": &mi.KeySize,
"value_size": &mi.ValueSize,
"max_entries": &mi.MaxEntries,
"map_flags": &mi.Flags,
})
if err != nil {
return nil, err
}
return &mi, nil
}
// ID returns the map ID.
//
// Available from 4.13.
//
// The bool return value indicates whether this optional field is available.
func (mi *MapInfo) ID() (MapID, bool) {
return mi.id, mi.id > 0
}
// programStats holds statistics of a program.
type programStats struct {
// Total accumulated runtime of the program ins ns.
runtime time.Duration
// Total number of times the program was called.
runCount uint64
}
// ProgramInfo describes a program.
type ProgramInfo struct {
Type ProgramType
id ProgramID
// Truncated hash of the BPF bytecode. Available from 4.13.
Tag string
// Name as supplied by user space at load time. Available from 4.15.
Name string
btf btf.ID
stats *programStats
maps []MapID
insns []byte
}
func newProgramInfoFromFd(fd *sys.FD) (*ProgramInfo, error) {
var info sys.ProgInfo
err := sys.ObjInfo(fd, &info)
if errors.Is(err, syscall.EINVAL) {
return newProgramInfoFromProc(fd)
}
if err != nil {
return nil, err
}
pi := ProgramInfo{
Type: ProgramType(info.Type),
id: ProgramID(info.Id),
Tag: hex.EncodeToString(info.Tag[:]),
Name: unix.ByteSliceToString(info.Name[:]),
btf: btf.ID(info.BtfId),
stats: &programStats{
runtime: time.Duration(info.RunTimeNs),
runCount: info.RunCnt,
},
}
// Start with a clean struct for the second call, otherwise we may get EFAULT.
var info2 sys.ProgInfo
if info.NrMapIds > 0 {
pi.maps = make([]MapID, info.NrMapIds)
info2.NrMapIds = info.NrMapIds
info2.MapIds = sys.NewPointer(unsafe.Pointer(&pi.maps[0]))
}
if info.XlatedProgLen > 0 {
pi.insns = make([]byte, info.XlatedProgLen)
info2.XlatedProgLen = info.XlatedProgLen
info2.XlatedProgInsns = sys.NewSlicePointer(pi.insns)
}
if info.NrMapIds > 0 || info.XlatedProgLen > 0 {
if err := sys.ObjInfo(fd, &info2); err != nil {
return nil, err
}
}
return &pi, nil
}
func newProgramInfoFromProc(fd *sys.FD) (*ProgramInfo, error) {
var info ProgramInfo
err := scanFdInfo(fd, map[string]interface{}{
"prog_type": &info.Type,
"prog_tag": &info.Tag,
})
if errors.Is(err, errMissingFields) {
return nil, &internal.UnsupportedFeatureError{
Name: "reading program info from /proc/self/fdinfo",
MinimumVersion: internal.Version{4, 10, 0},
}
}
if err != nil {
return nil, err
}
return &info, nil
}
// ID returns the program ID.
//
// Available from 4.13.
//
// The bool return value indicates whether this optional field is available.
func (pi *ProgramInfo) ID() (ProgramID, bool) {
return pi.id, pi.id > 0
}
// BTFID returns the BTF ID associated with the program.
//
// The ID is only valid as long as the associated program is kept alive.
// Available from 5.0.
//
// The bool return value indicates whether this optional field is available and
// populated. (The field may be available but not populated if the kernel
// supports the field but the program was loaded without BTF information.)
func (pi *ProgramInfo) BTFID() (btf.ID, bool) {
return pi.btf, pi.btf > 0
}
// RunCount returns the total number of times the program was called.
//
// Can return 0 if the collection of statistics is not enabled. See EnableStats().
// The bool return value indicates whether this optional field is available.
func (pi *ProgramInfo) RunCount() (uint64, bool) {
if pi.stats != nil {
return pi.stats.runCount, true
}
return 0, false
}
// Runtime returns the total accumulated runtime of the program.
//
// Can return 0 if the collection of statistics is not enabled. See EnableStats().
// The bool return value indicates whether this optional field is available.
func (pi *ProgramInfo) Runtime() (time.Duration, bool) {
if pi.stats != nil {
return pi.stats.runtime, true
}
return time.Duration(0), false
}
// Instructions returns the 'xlated' instruction stream of the program
// after it has been verified and rewritten by the kernel. These instructions
// cannot be loaded back into the kernel as-is, this is mainly used for
// inspecting loaded programs for troubleshooting, dumping, etc.
//
// For example, map accesses are made to reference their kernel map IDs,
// not the FDs they had when the program was inserted. Note that before
// the introduction of bpf_insn_prepare_dump in kernel 4.16, xlated
// instructions were not sanitized, making the output even less reusable
// and less likely to round-trip or evaluate to the same program Tag.
//
// The first instruction is marked as a symbol using the Program's name.
//
// Available from 4.13. Requires CAP_BPF or equivalent.
func (pi *ProgramInfo) Instructions() (asm.Instructions, error) {
// If the calling process is not BPF-capable or if the kernel doesn't
// support getting xlated instructions, the field will be zero.
if len(pi.insns) == 0 {
return nil, fmt.Errorf("insufficient permissions or unsupported kernel: %w", ErrNotSupported)
}
r := bytes.NewReader(pi.insns)
var insns asm.Instructions
if err := insns.Unmarshal(r, internal.NativeEndian); err != nil {
return nil, fmt.Errorf("unmarshaling instructions: %w", err)
}
// Tag the first instruction with the name of the program, if available.
insns[0] = insns[0].WithSymbol(pi.Name)
return insns, nil
}
// MapIDs returns the maps related to the program.
//
// Available from 4.15.
//
// The bool return value indicates whether this optional field is available.
func (pi *ProgramInfo) MapIDs() ([]MapID, bool) {
return pi.maps, pi.maps != nil
}
func scanFdInfo(fd *sys.FD, fields map[string]interface{}) error {
fh, err := os.Open(fmt.Sprintf("/proc/self/fdinfo/%d", fd.Int()))
if err != nil {
return err
}
defer fh.Close()
if err := scanFdInfoReader(fh, fields); err != nil {
return fmt.Errorf("%s: %w", fh.Name(), err)
}
return nil
}
var errMissingFields = errors.New("missing fields")
func scanFdInfoReader(r io.Reader, fields map[string]interface{}) error {
var (
scanner = bufio.NewScanner(r)
scanned int
)
for scanner.Scan() {
parts := strings.SplitN(scanner.Text(), "\t", 2)
if len(parts) != 2 {
continue
}
name := strings.TrimSuffix(parts[0], ":")
field, ok := fields[string(name)]
if !ok {
continue
}
if n, err := fmt.Sscanln(parts[1], field); err != nil || n != 1 {
return fmt.Errorf("can't parse field %s: %v", name, err)
}
scanned++
}
if err := scanner.Err(); err != nil {
return err
}
if len(fields) > 0 && scanned == 0 {
return ErrNotSupported
}
if scanned != len(fields) {
return errMissingFields
}
return nil
}
// EnableStats starts the measuring of the runtime
// and run counts of eBPF programs.
//
// Collecting statistics can have an impact on the performance.
//
// Requires at least 5.8.
func EnableStats(which uint32) (io.Closer, error) {
fd, err := sys.EnableStats(&sys.EnableStatsAttr{
Type: which,
})
if err != nil {
return nil, err
}
return fd, nil
}

View file

@ -1,6 +0,0 @@
package internal
// Align returns 'n' updated to 'alignment' boundary.
func Align(n, alignment int) int {
return (int(n) + alignment - 1) / alignment * alignment
}

View file

@ -1,62 +0,0 @@
package internal
import (
"fmt"
"os"
"strings"
"sync"
)
var sysCPU struct {
once sync.Once
err error
num int
}
// PossibleCPUs returns the max number of CPUs a system may possibly have
// Logical CPU numbers must be of the form 0-n
func PossibleCPUs() (int, error) {
sysCPU.once.Do(func() {
sysCPU.num, sysCPU.err = parseCPUsFromFile("/sys/devices/system/cpu/possible")
})
return sysCPU.num, sysCPU.err
}
func parseCPUsFromFile(path string) (int, error) {
spec, err := os.ReadFile(path)
if err != nil {
return 0, err
}
n, err := parseCPUs(string(spec))
if err != nil {
return 0, fmt.Errorf("can't parse %s: %v", path, err)
}
return n, nil
}
// parseCPUs parses the number of cpus from a string produced
// by bitmap_list_string() in the Linux kernel.
// Multiple ranges are rejected, since they can't be unified
// into a single number.
// This is the format of /sys/devices/system/cpu/possible, it
// is not suitable for /sys/devices/system/cpu/online, etc.
func parseCPUs(spec string) (int, error) {
if strings.Trim(spec, "\n") == "0" {
return 1, nil
}
var low, high int
n, err := fmt.Sscanf(spec, "%d-%d\n", &low, &high)
if n != 2 || err != nil {
return 0, fmt.Errorf("invalid format: %s", spec)
}
if low != 0 {
return 0, fmt.Errorf("CPU spec doesn't start at zero: %s", spec)
}
// cpus is 0 indexed
return high + 1, nil
}

View file

@ -1,102 +0,0 @@
package internal
import (
"debug/elf"
"fmt"
"io"
)
type SafeELFFile struct {
*elf.File
}
// NewSafeELFFile reads an ELF safely.
//
// Any panic during parsing is turned into an error. This is necessary since
// there are a bunch of unfixed bugs in debug/elf.
//
// https://github.com/golang/go/issues?q=is%3Aissue+is%3Aopen+debug%2Felf+in%3Atitle
func NewSafeELFFile(r io.ReaderAt) (safe *SafeELFFile, err error) {
defer func() {
r := recover()
if r == nil {
return
}
safe = nil
err = fmt.Errorf("reading ELF file panicked: %s", r)
}()
file, err := elf.NewFile(r)
if err != nil {
return nil, err
}
return &SafeELFFile{file}, nil
}
// OpenSafeELFFile reads an ELF from a file.
//
// It works like NewSafeELFFile, with the exception that safe.Close will
// close the underlying file.
func OpenSafeELFFile(path string) (safe *SafeELFFile, err error) {
defer func() {
r := recover()
if r == nil {
return
}
safe = nil
err = fmt.Errorf("reading ELF file panicked: %s", r)
}()
file, err := elf.Open(path)
if err != nil {
return nil, err
}
return &SafeELFFile{file}, nil
}
// Symbols is the safe version of elf.File.Symbols.
func (se *SafeELFFile) Symbols() (syms []elf.Symbol, err error) {
defer func() {
r := recover()
if r == nil {
return
}
syms = nil
err = fmt.Errorf("reading ELF symbols panicked: %s", r)
}()
syms, err = se.File.Symbols()
return
}
// DynamicSymbols is the safe version of elf.File.DynamicSymbols.
func (se *SafeELFFile) DynamicSymbols() (syms []elf.Symbol, err error) {
defer func() {
r := recover()
if r == nil {
return
}
syms = nil
err = fmt.Errorf("reading ELF dynamic symbols panicked: %s", r)
}()
syms, err = se.File.DynamicSymbols()
return
}
// SectionsByType returns all sections in the file with the specified section type.
func (se *SafeELFFile) SectionsByType(typ elf.SectionType) []*elf.Section {
sections := make([]*elf.Section, 0, 1)
for _, section := range se.Sections {
if section.Type == typ {
sections = append(sections, section)
}
}
return sections
}

View file

@ -1,13 +0,0 @@
//go:build armbe || arm64be || mips || mips64 || mips64p32 || ppc64 || s390 || s390x || sparc || sparc64
// +build armbe arm64be mips mips64 mips64p32 ppc64 s390 s390x sparc sparc64
package internal
import "encoding/binary"
// NativeEndian is set to either binary.BigEndian or binary.LittleEndian,
// depending on the host's endianness.
var NativeEndian binary.ByteOrder = binary.BigEndian
// ClangEndian is set to either "el" or "eb" depending on the host's endianness.
const ClangEndian = "eb"

View file

@ -1,13 +0,0 @@
//go:build 386 || amd64 || amd64p32 || arm || arm64 || mipsle || mips64le || mips64p32le || ppc64le || riscv64
// +build 386 amd64 amd64p32 arm arm64 mipsle mips64le mips64p32le ppc64le riscv64
package internal
import "encoding/binary"
// NativeEndian is set to either binary.BigEndian or binary.LittleEndian,
// depending on the host's endianness.
var NativeEndian binary.ByteOrder = binary.LittleEndian
// ClangEndian is set to either "el" or "eb" depending on the host's endianness.
const ClangEndian = "el"

View file

@ -1,206 +0,0 @@
package internal
import (
"bytes"
"fmt"
"io"
"strings"
)
// ErrorWithLog returns an error which includes logs from the kernel verifier.
//
// The default error output is a summary of the full log. The latter can be
// accessed via VerifierError.Log or by formatting the error, see Format.
//
// A set of heuristics is used to determine whether the log has been truncated.
func ErrorWithLog(err error, log []byte) *VerifierError {
const whitespace = "\t\r\v\n "
// Convert verifier log C string by truncating it on the first 0 byte
// and trimming trailing whitespace before interpreting as a Go string.
truncated := false
if i := bytes.IndexByte(log, 0); i != -1 {
if i == len(log)-1 && !bytes.HasSuffix(log[:i], []byte{'\n'}) {
// The null byte is at the end of the buffer and it's not preceded
// by a newline character. Most likely the buffer was too short.
truncated = true
}
log = log[:i]
} else if len(log) > 0 {
// No null byte? Dodgy!
truncated = true
}
log = bytes.Trim(log, whitespace)
logLines := bytes.Split(log, []byte{'\n'})
lines := make([]string, 0, len(logLines))
for _, line := range logLines {
// Don't remove leading white space on individual lines. We rely on it
// when outputting logs.
lines = append(lines, string(bytes.TrimRight(line, whitespace)))
}
return &VerifierError{err, lines, truncated}
}
// VerifierError includes information from the eBPF verifier.
//
// It summarises the log output, see Format if you want to output the full contents.
type VerifierError struct {
// The error which caused this error.
Cause error
// The verifier output split into lines.
Log []string
// Whether the log output is truncated, based on several heuristics.
Truncated bool
}
func (le *VerifierError) Unwrap() error {
return le.Cause
}
func (le *VerifierError) Error() string {
log := le.Log
if n := len(log); n > 0 && strings.HasPrefix(log[n-1], "processed ") {
// Get rid of "processed 39 insns (limit 1000000) ..." from summary.
log = log[:n-1]
}
n := len(log)
if n == 0 {
return le.Cause.Error()
}
lines := log[n-1:]
if n >= 2 && (includePreviousLine(log[n-1]) || le.Truncated) {
// Add one more line of context if it aids understanding the error.
lines = log[n-2:]
}
var b strings.Builder
fmt.Fprintf(&b, "%s: ", le.Cause.Error())
for i, line := range lines {
b.WriteString(strings.TrimSpace(line))
if i != len(lines)-1 {
b.WriteString(": ")
}
}
omitted := len(le.Log) - len(lines)
if omitted == 0 && !le.Truncated {
return b.String()
}
b.WriteString(" (")
if le.Truncated {
b.WriteString("truncated")
}
if omitted > 0 {
if le.Truncated {
b.WriteString(", ")
}
fmt.Fprintf(&b, "%d line(s) omitted", omitted)
}
b.WriteString(")")
return b.String()
}
// includePreviousLine returns true if the given line likely is better
// understood with additional context from the preceding line.
func includePreviousLine(line string) bool {
// We need to find a good trade off between understandable error messages
// and too much complexity here. Checking the string prefix is ok, requiring
// regular expressions to do it is probably overkill.
if strings.HasPrefix(line, "\t") {
// [13] STRUCT drm_rect size=16 vlen=4
// \tx1 type_id=2
return true
}
if len(line) >= 2 && line[0] == 'R' && line[1] >= '0' && line[1] <= '9' {
// 0: (95) exit
// R0 !read_ok
return true
}
if strings.HasPrefix(line, "invalid bpf_context access") {
// 0: (79) r6 = *(u64 *)(r1 +0)
// func '__x64_sys_recvfrom' arg0 type FWD is not a struct
// invalid bpf_context access off=0 size=8
return true
}
return false
}
// Format the error.
//
// Understood verbs are %s and %v, which are equivalent to calling Error(). %v
// allows outputting additional information using the following flags:
//
// + Output the first <width> lines, or all lines if no width is given.
// - Output the last <width> lines, or all lines if no width is given.
//
// Use width to specify how many lines to output. Use the '-' flag to output
// lines from the end of the log instead of the beginning.
func (le *VerifierError) Format(f fmt.State, verb rune) {
switch verb {
case 's':
_, _ = io.WriteString(f, le.Error())
case 'v':
n, haveWidth := f.Width()
if !haveWidth || n > len(le.Log) {
n = len(le.Log)
}
if !f.Flag('+') && !f.Flag('-') {
if haveWidth {
_, _ = io.WriteString(f, "%!v(BADWIDTH)")
return
}
_, _ = io.WriteString(f, le.Error())
return
}
if f.Flag('+') && f.Flag('-') {
_, _ = io.WriteString(f, "%!v(BADFLAG)")
return
}
fmt.Fprintf(f, "%s:", le.Cause.Error())
omitted := len(le.Log) - n
lines := le.Log[:n]
if f.Flag('-') {
// Print last instead of first lines.
lines = le.Log[len(le.Log)-n:]
if omitted > 0 {
fmt.Fprintf(f, "\n\t(%d line(s) omitted)", omitted)
}
}
for _, line := range lines {
fmt.Fprintf(f, "\n\t%s", line)
}
if !f.Flag('-') {
if omitted > 0 {
fmt.Fprintf(f, "\n\t(%d line(s) omitted)", omitted)
}
}
if le.Truncated {
fmt.Fprintf(f, "\n\t(truncated)")
}
default:
fmt.Fprintf(f, "%%!%c(BADVERB)", verb)
}
}

View file

@ -1,100 +0,0 @@
package internal
import (
"errors"
"fmt"
"sync"
)
// ErrNotSupported indicates that a feature is not supported by the current kernel.
var ErrNotSupported = errors.New("not supported")
// UnsupportedFeatureError is returned by FeatureTest() functions.
type UnsupportedFeatureError struct {
// The minimum Linux mainline version required for this feature.
// Used for the error string, and for sanity checking during testing.
MinimumVersion Version
// The name of the feature that isn't supported.
Name string
}
func (ufe *UnsupportedFeatureError) Error() string {
if ufe.MinimumVersion.Unspecified() {
return fmt.Sprintf("%s not supported", ufe.Name)
}
return fmt.Sprintf("%s not supported (requires >= %s)", ufe.Name, ufe.MinimumVersion)
}
// Is indicates that UnsupportedFeatureError is ErrNotSupported.
func (ufe *UnsupportedFeatureError) Is(target error) bool {
return target == ErrNotSupported
}
type featureTest struct {
sync.RWMutex
successful bool
result error
}
// FeatureTestFn is used to determine whether the kernel supports
// a certain feature.
//
// The return values have the following semantics:
//
// err == ErrNotSupported: the feature is not available
// err == nil: the feature is available
// err != nil: the test couldn't be executed
type FeatureTestFn func() error
// FeatureTest wraps a function so that it is run at most once.
//
// name should identify the tested feature, while version must be in the
// form Major.Minor[.Patch].
//
// Returns an error wrapping ErrNotSupported if the feature is not supported.
func FeatureTest(name, version string, fn FeatureTestFn) func() error {
ft := new(featureTest)
return func() error {
ft.RLock()
if ft.successful {
defer ft.RUnlock()
return ft.result
}
ft.RUnlock()
ft.Lock()
defer ft.Unlock()
// check one more time on the off
// chance that two go routines
// were able to call into the write
// lock
if ft.successful {
return ft.result
}
err := fn()
switch {
case errors.Is(err, ErrNotSupported):
v, err := NewVersion(version)
if err != nil {
return err
}
ft.result = &UnsupportedFeatureError{
MinimumVersion: v,
Name: name,
}
fallthrough
case err == nil:
ft.successful = true
default:
// We couldn't execute the feature test to a point
// where it could make a determination.
// Don't cache the result, just return it.
return fmt.Errorf("detect support for %s: %w", name, err)
}
return ft.result
}
}

View file

@ -1,62 +0,0 @@
package internal
import (
"bufio"
"compress/gzip"
"errors"
"io"
"os"
)
// NewBufferedSectionReader wraps an io.ReaderAt in an appropriately-sized
// buffered reader. It is a convenience function for reading subsections of
// ELF sections while minimizing the amount of read() syscalls made.
//
// Syscall overhead is non-negligible in continuous integration context
// where ELFs might be accessed over virtual filesystems with poor random
// access performance. Buffering reads makes sense because (sub)sections
// end up being read completely anyway.
//
// Use instead of the r.Seek() + io.LimitReader() pattern.
func NewBufferedSectionReader(ra io.ReaderAt, off, n int64) *bufio.Reader {
// Clamp the size of the buffer to one page to avoid slurping large parts
// of a file into memory. bufio.NewReader uses a hardcoded default buffer
// of 4096. Allow arches with larger pages to allocate more, but don't
// allocate a fixed 4k buffer if we only need to read a small segment.
buf := n
if ps := int64(os.Getpagesize()); n > ps {
buf = ps
}
return bufio.NewReaderSize(io.NewSectionReader(ra, off, n), int(buf))
}
// DiscardZeroes makes sure that all written bytes are zero
// before discarding them.
type DiscardZeroes struct{}
func (DiscardZeroes) Write(p []byte) (int, error) {
for _, b := range p {
if b != 0 {
return 0, errors.New("encountered non-zero byte")
}
}
return len(p), nil
}
// ReadAllCompressed decompresses a gzipped file into memory.
func ReadAllCompressed(file string) ([]byte, error) {
fh, err := os.Open(file)
if err != nil {
return nil, err
}
defer fh.Close()
gz, err := gzip.NewReader(fh)
if err != nil {
return nil, err
}
defer gz.Close()
return io.ReadAll(gz)
}

View file

@ -1,84 +0,0 @@
package internal
import (
"bytes"
"errors"
"go/format"
"go/scanner"
"io"
"strings"
"unicode"
)
// Identifier turns a C style type or field name into an exportable Go equivalent.
func Identifier(str string) string {
prev := rune(-1)
return strings.Map(func(r rune) rune {
// See https://golang.org/ref/spec#Identifiers
switch {
case unicode.IsLetter(r):
if prev == -1 {
r = unicode.ToUpper(r)
}
case r == '_':
switch {
// The previous rune was deleted, or we are at the
// beginning of the string.
case prev == -1:
fallthrough
// The previous rune is a lower case letter or a digit.
case unicode.IsDigit(prev) || (unicode.IsLetter(prev) && unicode.IsLower(prev)):
// delete the current rune, and force the
// next character to be uppercased.
r = -1
}
case unicode.IsDigit(r):
default:
// Delete the current rune. prev is unchanged.
return -1
}
prev = r
return r
}, str)
}
// WriteFormatted outputs a formatted src into out.
//
// If formatting fails it returns an informative error message.
func WriteFormatted(src []byte, out io.Writer) error {
formatted, err := format.Source(src)
if err == nil {
_, err = out.Write(formatted)
return err
}
var el scanner.ErrorList
if !errors.As(err, &el) {
return err
}
var nel scanner.ErrorList
for _, err := range el {
if !err.Pos.IsValid() {
nel = append(nel, err)
continue
}
buf := src[err.Pos.Offset:]
nl := bytes.IndexRune(buf, '\n')
if nl == -1 {
nel = append(nel, err)
continue
}
err.Msg += ": " + string(buf[:nl])
nel = append(nel, err)
}
return nel
}

View file

@ -1,77 +0,0 @@
package internal
import (
"errors"
"fmt"
"os"
"path/filepath"
"runtime"
"unsafe"
"github.com/cilium/ebpf/internal/sys"
"github.com/cilium/ebpf/internal/unix"
)
func Pin(currentPath, newPath string, fd *sys.FD) error {
const bpfFSType = 0xcafe4a11
if newPath == "" {
return errors.New("given pinning path cannot be empty")
}
if currentPath == newPath {
return nil
}
var statfs unix.Statfs_t
if err := unix.Statfs(filepath.Dir(newPath), &statfs); err != nil {
return err
}
fsType := int64(statfs.Type)
if unsafe.Sizeof(statfs.Type) == 4 {
// We're on a 32 bit arch, where statfs.Type is int32. bpfFSType is a
// negative number when interpreted as int32 so we need to cast via
// uint32 to avoid sign extension.
fsType = int64(uint32(statfs.Type))
}
if fsType != bpfFSType {
return fmt.Errorf("%s is not on a bpf filesystem", newPath)
}
defer runtime.KeepAlive(fd)
if currentPath == "" {
return sys.ObjPin(&sys.ObjPinAttr{
Pathname: sys.NewStringPointer(newPath),
BpfFd: fd.Uint(),
})
}
// Renameat2 is used instead of os.Rename to disallow the new path replacing
// an existing path.
err := unix.Renameat2(unix.AT_FDCWD, currentPath, unix.AT_FDCWD, newPath, unix.RENAME_NOREPLACE)
if err == nil {
// Object is now moved to the new pinning path.
return nil
}
if !os.IsNotExist(err) {
return fmt.Errorf("unable to move pinned object to new path %v: %w", newPath, err)
}
// Internal state not in sync with the file system so let's fix it.
return sys.ObjPin(&sys.ObjPinAttr{
Pathname: sys.NewStringPointer(newPath),
BpfFd: fd.Uint(),
})
}
func Unpin(pinnedPath string) error {
if pinnedPath == "" {
return nil
}
err := os.Remove(pinnedPath)
if err == nil || os.IsNotExist(err) {
return nil
}
return err
}

View file

@ -1,6 +0,0 @@
// Package sys contains bindings for the BPF syscall.
package sys
// Regenerate types.go by invoking go generate in the current directory.
//go:generate go run github.com/cilium/ebpf/internal/cmd/gentypes ../../btf/testdata/vmlinux.btf.gz

View file

@ -1,96 +0,0 @@
package sys
import (
"fmt"
"math"
"os"
"runtime"
"strconv"
"github.com/cilium/ebpf/internal/unix"
)
var ErrClosedFd = unix.EBADF
type FD struct {
raw int
}
func newFD(value int) *FD {
fd := &FD{value}
runtime.SetFinalizer(fd, (*FD).Close)
return fd
}
// NewFD wraps a raw fd with a finalizer.
//
// You must not use the raw fd after calling this function, since the underlying
// file descriptor number may change. This is because the BPF UAPI assumes that
// zero is not a valid fd value.
func NewFD(value int) (*FD, error) {
if value < 0 {
return nil, fmt.Errorf("invalid fd %d", value)
}
fd := newFD(value)
if value != 0 {
return fd, nil
}
dup, err := fd.Dup()
_ = fd.Close()
return dup, err
}
func (fd *FD) String() string {
return strconv.FormatInt(int64(fd.raw), 10)
}
func (fd *FD) Int() int {
return fd.raw
}
func (fd *FD) Uint() uint32 {
if fd.raw < 0 || int64(fd.raw) > math.MaxUint32 {
// Best effort: this is the number most likely to be an invalid file
// descriptor. It is equal to -1 (on two's complement arches).
return math.MaxUint32
}
return uint32(fd.raw)
}
func (fd *FD) Close() error {
if fd.raw < 0 {
return nil
}
value := int(fd.raw)
fd.raw = -1
fd.Forget()
return unix.Close(value)
}
func (fd *FD) Forget() {
runtime.SetFinalizer(fd, nil)
}
func (fd *FD) Dup() (*FD, error) {
if fd.raw < 0 {
return nil, ErrClosedFd
}
// Always require the fd to be larger than zero: the BPF API treats the value
// as "no argument provided".
dup, err := unix.FcntlInt(uintptr(fd.raw), unix.F_DUPFD_CLOEXEC, 1)
if err != nil {
return nil, fmt.Errorf("can't dup fd: %v", err)
}
return newFD(dup), nil
}
func (fd *FD) File(name string) *os.File {
fd.Forget()
return os.NewFile(uintptr(fd.raw), name)
}

View file

@ -1,38 +0,0 @@
package sys
import (
"unsafe"
"github.com/cilium/ebpf/internal/unix"
)
// NewPointer creates a 64-bit pointer from an unsafe Pointer.
func NewPointer(ptr unsafe.Pointer) Pointer {
return Pointer{ptr: ptr}
}
// NewSlicePointer creates a 64-bit pointer from a byte slice.
func NewSlicePointer(buf []byte) Pointer {
if len(buf) == 0 {
return Pointer{}
}
return Pointer{ptr: unsafe.Pointer(&buf[0])}
}
// NewSlicePointer creates a 64-bit pointer from a byte slice.
//
// Useful to assign both the pointer and the length in one go.
func NewSlicePointerLen(buf []byte) (Pointer, uint32) {
return NewSlicePointer(buf), uint32(len(buf))
}
// NewStringPointer creates a 64-bit pointer from a string.
func NewStringPointer(str string) Pointer {
p, err := unix.BytePtrFromString(str)
if err != nil {
return Pointer{}
}
return Pointer{ptr: unsafe.Pointer(p)}
}

View file

@ -1,15 +0,0 @@
//go:build armbe || mips || mips64p32
// +build armbe mips mips64p32
package sys
import (
"unsafe"
)
// Pointer wraps an unsafe.Pointer to be 64bit to
// conform to the syscall specification.
type Pointer struct {
pad uint32
ptr unsafe.Pointer
}

View file

@ -1,15 +0,0 @@
//go:build 386 || amd64p32 || arm || mipsle || mips64p32le
// +build 386 amd64p32 arm mipsle mips64p32le
package sys
import (
"unsafe"
)
// Pointer wraps an unsafe.Pointer to be 64bit to
// conform to the syscall specification.
type Pointer struct {
ptr unsafe.Pointer
pad uint32
}

View file

@ -1,14 +0,0 @@
//go:build !386 && !amd64p32 && !arm && !mipsle && !mips64p32le && !armbe && !mips && !mips64p32
// +build !386,!amd64p32,!arm,!mipsle,!mips64p32le,!armbe,!mips,!mips64p32
package sys
import (
"unsafe"
)
// Pointer wraps an unsafe.Pointer to be 64bit to
// conform to the syscall specification.
type Pointer struct {
ptr unsafe.Pointer
}

View file

@ -1,126 +0,0 @@
package sys
import (
"runtime"
"syscall"
"unsafe"
"github.com/cilium/ebpf/internal/unix"
)
// BPF wraps SYS_BPF.
//
// Any pointers contained in attr must use the Pointer type from this package.
func BPF(cmd Cmd, attr unsafe.Pointer, size uintptr) (uintptr, error) {
for {
r1, _, errNo := unix.Syscall(unix.SYS_BPF, uintptr(cmd), uintptr(attr), size)
runtime.KeepAlive(attr)
// As of ~4.20 the verifier can be interrupted by a signal,
// and returns EAGAIN in that case.
if errNo == unix.EAGAIN && cmd == BPF_PROG_LOAD {
continue
}
var err error
if errNo != 0 {
err = wrappedErrno{errNo}
}
return r1, err
}
}
// Info is implemented by all structs that can be passed to the ObjInfo syscall.
//
// MapInfo
// ProgInfo
// LinkInfo
// BtfInfo
type Info interface {
info() (unsafe.Pointer, uint32)
}
var _ Info = (*MapInfo)(nil)
func (i *MapInfo) info() (unsafe.Pointer, uint32) {
return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i))
}
var _ Info = (*ProgInfo)(nil)
func (i *ProgInfo) info() (unsafe.Pointer, uint32) {
return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i))
}
var _ Info = (*LinkInfo)(nil)
func (i *LinkInfo) info() (unsafe.Pointer, uint32) {
return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i))
}
var _ Info = (*BtfInfo)(nil)
func (i *BtfInfo) info() (unsafe.Pointer, uint32) {
return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i))
}
// ObjInfo retrieves information about a BPF Fd.
//
// info may be one of MapInfo, ProgInfo, LinkInfo and BtfInfo.
func ObjInfo(fd *FD, info Info) error {
ptr, len := info.info()
err := ObjGetInfoByFd(&ObjGetInfoByFdAttr{
BpfFd: fd.Uint(),
InfoLen: len,
Info: NewPointer(ptr),
})
runtime.KeepAlive(fd)
return err
}
// BPFObjName is a null-terminated string made up of
// 'A-Za-z0-9_' characters.
type ObjName [unix.BPF_OBJ_NAME_LEN]byte
// NewObjName truncates the result if it is too long.
func NewObjName(name string) ObjName {
var result ObjName
copy(result[:unix.BPF_OBJ_NAME_LEN-1], name)
return result
}
// LinkID uniquely identifies a bpf_link.
type LinkID uint32
// BTFID uniquely identifies a BTF blob loaded into the kernel.
type BTFID uint32
// wrappedErrno wraps syscall.Errno to prevent direct comparisons with
// syscall.E* or unix.E* constants.
//
// You should never export an error of this type.
type wrappedErrno struct {
syscall.Errno
}
func (we wrappedErrno) Unwrap() error {
return we.Errno
}
type syscallError struct {
error
errno syscall.Errno
}
func Error(err error, errno syscall.Errno) error {
return &syscallError{err, errno}
}
func (se *syscallError) Is(target error) bool {
return target == se.error
}
func (se *syscallError) Unwrap() error {
return se.errno
}

File diff suppressed because it is too large Load diff

View file

@ -1,210 +0,0 @@
//go:build linux
// +build linux
package unix
import (
"syscall"
linux "golang.org/x/sys/unix"
)
const (
ENOENT = linux.ENOENT
EEXIST = linux.EEXIST
EAGAIN = linux.EAGAIN
ENOSPC = linux.ENOSPC
EINVAL = linux.EINVAL
EPOLLIN = linux.EPOLLIN
EINTR = linux.EINTR
EPERM = linux.EPERM
ESRCH = linux.ESRCH
ENODEV = linux.ENODEV
EBADF = linux.EBADF
E2BIG = linux.E2BIG
EFAULT = linux.EFAULT
EACCES = linux.EACCES
// ENOTSUPP is not the same as ENOTSUP or EOPNOTSUP
ENOTSUPP = syscall.Errno(0x20c)
BPF_F_NO_PREALLOC = linux.BPF_F_NO_PREALLOC
BPF_F_NUMA_NODE = linux.BPF_F_NUMA_NODE
BPF_F_RDONLY = linux.BPF_F_RDONLY
BPF_F_WRONLY = linux.BPF_F_WRONLY
BPF_F_RDONLY_PROG = linux.BPF_F_RDONLY_PROG
BPF_F_WRONLY_PROG = linux.BPF_F_WRONLY_PROG
BPF_F_SLEEPABLE = linux.BPF_F_SLEEPABLE
BPF_F_MMAPABLE = linux.BPF_F_MMAPABLE
BPF_F_INNER_MAP = linux.BPF_F_INNER_MAP
BPF_OBJ_NAME_LEN = linux.BPF_OBJ_NAME_LEN
BPF_TAG_SIZE = linux.BPF_TAG_SIZE
BPF_RINGBUF_BUSY_BIT = linux.BPF_RINGBUF_BUSY_BIT
BPF_RINGBUF_DISCARD_BIT = linux.BPF_RINGBUF_DISCARD_BIT
BPF_RINGBUF_HDR_SZ = linux.BPF_RINGBUF_HDR_SZ
SYS_BPF = linux.SYS_BPF
F_DUPFD_CLOEXEC = linux.F_DUPFD_CLOEXEC
EPOLL_CTL_ADD = linux.EPOLL_CTL_ADD
EPOLL_CLOEXEC = linux.EPOLL_CLOEXEC
O_CLOEXEC = linux.O_CLOEXEC
O_NONBLOCK = linux.O_NONBLOCK
PROT_READ = linux.PROT_READ
PROT_WRITE = linux.PROT_WRITE
MAP_SHARED = linux.MAP_SHARED
PERF_ATTR_SIZE_VER1 = linux.PERF_ATTR_SIZE_VER1
PERF_TYPE_SOFTWARE = linux.PERF_TYPE_SOFTWARE
PERF_TYPE_TRACEPOINT = linux.PERF_TYPE_TRACEPOINT
PERF_COUNT_SW_BPF_OUTPUT = linux.PERF_COUNT_SW_BPF_OUTPUT
PERF_EVENT_IOC_DISABLE = linux.PERF_EVENT_IOC_DISABLE
PERF_EVENT_IOC_ENABLE = linux.PERF_EVENT_IOC_ENABLE
PERF_EVENT_IOC_SET_BPF = linux.PERF_EVENT_IOC_SET_BPF
PerfBitWatermark = linux.PerfBitWatermark
PERF_SAMPLE_RAW = linux.PERF_SAMPLE_RAW
PERF_FLAG_FD_CLOEXEC = linux.PERF_FLAG_FD_CLOEXEC
RLIM_INFINITY = linux.RLIM_INFINITY
RLIMIT_MEMLOCK = linux.RLIMIT_MEMLOCK
BPF_STATS_RUN_TIME = linux.BPF_STATS_RUN_TIME
PERF_RECORD_LOST = linux.PERF_RECORD_LOST
PERF_RECORD_SAMPLE = linux.PERF_RECORD_SAMPLE
AT_FDCWD = linux.AT_FDCWD
RENAME_NOREPLACE = linux.RENAME_NOREPLACE
SO_ATTACH_BPF = linux.SO_ATTACH_BPF
SO_DETACH_BPF = linux.SO_DETACH_BPF
SOL_SOCKET = linux.SOL_SOCKET
)
// Statfs_t is a wrapper
type Statfs_t = linux.Statfs_t
type Stat_t = linux.Stat_t
// Rlimit is a wrapper
type Rlimit = linux.Rlimit
// Syscall is a wrapper
func Syscall(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno) {
return linux.Syscall(trap, a1, a2, a3)
}
// FcntlInt is a wrapper
func FcntlInt(fd uintptr, cmd, arg int) (int, error) {
return linux.FcntlInt(fd, cmd, arg)
}
// IoctlSetInt is a wrapper
func IoctlSetInt(fd int, req uint, value int) error {
return linux.IoctlSetInt(fd, req, value)
}
// Statfs is a wrapper
func Statfs(path string, buf *Statfs_t) (err error) {
return linux.Statfs(path, buf)
}
// Close is a wrapper
func Close(fd int) (err error) {
return linux.Close(fd)
}
// EpollEvent is a wrapper
type EpollEvent = linux.EpollEvent
// EpollWait is a wrapper
func EpollWait(epfd int, events []EpollEvent, msec int) (n int, err error) {
return linux.EpollWait(epfd, events, msec)
}
// EpollCtl is a wrapper
func EpollCtl(epfd int, op int, fd int, event *EpollEvent) (err error) {
return linux.EpollCtl(epfd, op, fd, event)
}
// Eventfd is a wrapper
func Eventfd(initval uint, flags int) (fd int, err error) {
return linux.Eventfd(initval, flags)
}
// Write is a wrapper
func Write(fd int, p []byte) (n int, err error) {
return linux.Write(fd, p)
}
// EpollCreate1 is a wrapper
func EpollCreate1(flag int) (fd int, err error) {
return linux.EpollCreate1(flag)
}
// PerfEventMmapPage is a wrapper
type PerfEventMmapPage linux.PerfEventMmapPage
// SetNonblock is a wrapper
func SetNonblock(fd int, nonblocking bool) (err error) {
return linux.SetNonblock(fd, nonblocking)
}
// Mmap is a wrapper
func Mmap(fd int, offset int64, length int, prot int, flags int) (data []byte, err error) {
return linux.Mmap(fd, offset, length, prot, flags)
}
// Munmap is a wrapper
func Munmap(b []byte) (err error) {
return linux.Munmap(b)
}
// PerfEventAttr is a wrapper
type PerfEventAttr = linux.PerfEventAttr
// PerfEventOpen is a wrapper
func PerfEventOpen(attr *PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) {
return linux.PerfEventOpen(attr, pid, cpu, groupFd, flags)
}
// Utsname is a wrapper
type Utsname = linux.Utsname
// Uname is a wrapper
func Uname(buf *Utsname) (err error) {
return linux.Uname(buf)
}
// Getpid is a wrapper
func Getpid() int {
return linux.Getpid()
}
// Gettid is a wrapper
func Gettid() int {
return linux.Gettid()
}
// Tgkill is a wrapper
func Tgkill(tgid int, tid int, sig syscall.Signal) (err error) {
return linux.Tgkill(tgid, tid, sig)
}
// BytePtrFromString is a wrapper
func BytePtrFromString(s string) (*byte, error) {
return linux.BytePtrFromString(s)
}
// ByteSliceToString is a wrapper
func ByteSliceToString(s []byte) string {
return linux.ByteSliceToString(s)
}
// Renameat2 is a wrapper
func Renameat2(olddirfd int, oldpath string, newdirfd int, newpath string, flags uint) error {
return linux.Renameat2(olddirfd, oldpath, newdirfd, newpath, flags)
}
func Prlimit(pid, resource int, new, old *Rlimit) error {
return linux.Prlimit(pid, resource, new, old)
}
func Open(path string, mode int, perm uint32) (int, error) {
return linux.Open(path, mode, perm)
}
func Fstat(fd int, stat *Stat_t) error {
return linux.Fstat(fd, stat)
}

View file

@ -1,278 +0,0 @@
//go:build !linux
// +build !linux
package unix
import (
"fmt"
"runtime"
"syscall"
)
var errNonLinux = fmt.Errorf("unsupported platform %s/%s", runtime.GOOS, runtime.GOARCH)
const (
ENOENT = syscall.ENOENT
EEXIST = syscall.EEXIST
EAGAIN = syscall.EAGAIN
ENOSPC = syscall.ENOSPC
EINVAL = syscall.EINVAL
EINTR = syscall.EINTR
EPERM = syscall.EPERM
ESRCH = syscall.ESRCH
ENODEV = syscall.ENODEV
EBADF = syscall.Errno(0)
E2BIG = syscall.Errno(0)
EFAULT = syscall.EFAULT
EACCES = syscall.Errno(0)
// ENOTSUPP is not the same as ENOTSUP or EOPNOTSUP
ENOTSUPP = syscall.Errno(0x20c)
BPF_F_NO_PREALLOC = 0
BPF_F_NUMA_NODE = 0
BPF_F_RDONLY = 0
BPF_F_WRONLY = 0
BPF_F_RDONLY_PROG = 0
BPF_F_WRONLY_PROG = 0
BPF_F_SLEEPABLE = 0
BPF_F_MMAPABLE = 0
BPF_F_INNER_MAP = 0
BPF_OBJ_NAME_LEN = 0x10
BPF_TAG_SIZE = 0x8
BPF_RINGBUF_BUSY_BIT = 0
BPF_RINGBUF_DISCARD_BIT = 0
BPF_RINGBUF_HDR_SZ = 0
SYS_BPF = 321
F_DUPFD_CLOEXEC = 0x406
EPOLLIN = 0x1
EPOLL_CTL_ADD = 0x1
EPOLL_CLOEXEC = 0x80000
O_CLOEXEC = 0x80000
O_NONBLOCK = 0x800
PROT_READ = 0x1
PROT_WRITE = 0x2
MAP_SHARED = 0x1
PERF_ATTR_SIZE_VER1 = 0
PERF_TYPE_SOFTWARE = 0x1
PERF_TYPE_TRACEPOINT = 0
PERF_COUNT_SW_BPF_OUTPUT = 0xa
PERF_EVENT_IOC_DISABLE = 0
PERF_EVENT_IOC_ENABLE = 0
PERF_EVENT_IOC_SET_BPF = 0
PerfBitWatermark = 0x4000
PERF_SAMPLE_RAW = 0x400
PERF_FLAG_FD_CLOEXEC = 0x8
RLIM_INFINITY = 0x7fffffffffffffff
RLIMIT_MEMLOCK = 8
BPF_STATS_RUN_TIME = 0
PERF_RECORD_LOST = 2
PERF_RECORD_SAMPLE = 9
AT_FDCWD = -0x2
RENAME_NOREPLACE = 0x1
SO_ATTACH_BPF = 0x32
SO_DETACH_BPF = 0x1b
SOL_SOCKET = 0x1
)
// Statfs_t is a wrapper
type Statfs_t struct {
Type int64
Bsize int64
Blocks uint64
Bfree uint64
Bavail uint64
Files uint64
Ffree uint64
Fsid [2]int32
Namelen int64
Frsize int64
Flags int64
Spare [4]int64
}
type Stat_t struct{}
// Rlimit is a wrapper
type Rlimit struct {
Cur uint64
Max uint64
}
// Syscall is a wrapper
func Syscall(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno) {
return 0, 0, syscall.Errno(1)
}
// FcntlInt is a wrapper
func FcntlInt(fd uintptr, cmd, arg int) (int, error) {
return -1, errNonLinux
}
// IoctlSetInt is a wrapper
func IoctlSetInt(fd int, req uint, value int) error {
return errNonLinux
}
// Statfs is a wrapper
func Statfs(path string, buf *Statfs_t) error {
return errNonLinux
}
// Close is a wrapper
func Close(fd int) (err error) {
return errNonLinux
}
// EpollEvent is a wrapper
type EpollEvent struct {
Events uint32
Fd int32
Pad int32
}
// EpollWait is a wrapper
func EpollWait(epfd int, events []EpollEvent, msec int) (n int, err error) {
return 0, errNonLinux
}
// EpollCtl is a wrapper
func EpollCtl(epfd int, op int, fd int, event *EpollEvent) (err error) {
return errNonLinux
}
// Eventfd is a wrapper
func Eventfd(initval uint, flags int) (fd int, err error) {
return 0, errNonLinux
}
// Write is a wrapper
func Write(fd int, p []byte) (n int, err error) {
return 0, errNonLinux
}
// EpollCreate1 is a wrapper
func EpollCreate1(flag int) (fd int, err error) {
return 0, errNonLinux
}
// PerfEventMmapPage is a wrapper
type PerfEventMmapPage struct {
Version uint32
Compat_version uint32
Lock uint32
Index uint32
Offset int64
Time_enabled uint64
Time_running uint64
Capabilities uint64
Pmc_width uint16
Time_shift uint16
Time_mult uint32
Time_offset uint64
Time_zero uint64
Size uint32
Data_head uint64
Data_tail uint64
Data_offset uint64
Data_size uint64
Aux_head uint64
Aux_tail uint64
Aux_offset uint64
Aux_size uint64
}
// SetNonblock is a wrapper
func SetNonblock(fd int, nonblocking bool) (err error) {
return errNonLinux
}
// Mmap is a wrapper
func Mmap(fd int, offset int64, length int, prot int, flags int) (data []byte, err error) {
return []byte{}, errNonLinux
}
// Munmap is a wrapper
func Munmap(b []byte) (err error) {
return errNonLinux
}
// PerfEventAttr is a wrapper
type PerfEventAttr struct {
Type uint32
Size uint32
Config uint64
Sample uint64
Sample_type uint64
Read_format uint64
Bits uint64
Wakeup uint32
Bp_type uint32
Ext1 uint64
Ext2 uint64
Branch_sample_type uint64
Sample_regs_user uint64
Sample_stack_user uint32
Clockid int32
Sample_regs_intr uint64
Aux_watermark uint32
Sample_max_stack uint16
}
// PerfEventOpen is a wrapper
func PerfEventOpen(attr *PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) {
return 0, errNonLinux
}
// Utsname is a wrapper
type Utsname struct {
Release [65]byte
Version [65]byte
}
// Uname is a wrapper
func Uname(buf *Utsname) (err error) {
return errNonLinux
}
// Getpid is a wrapper
func Getpid() int {
return -1
}
// Gettid is a wrapper
func Gettid() int {
return -1
}
// Tgkill is a wrapper
func Tgkill(tgid int, tid int, sig syscall.Signal) (err error) {
return errNonLinux
}
// BytePtrFromString is a wrapper
func BytePtrFromString(s string) (*byte, error) {
return nil, errNonLinux
}
// ByteSliceToString is a wrapper
func ByteSliceToString(s []byte) string {
return ""
}
// Renameat2 is a wrapper
func Renameat2(olddirfd int, oldpath string, newdirfd int, newpath string, flags uint) error {
return errNonLinux
}
func Prlimit(pid, resource int, new, old *Rlimit) error {
return errNonLinux
}
func Open(path string, mode int, perm uint32) (int, error) {
return -1, errNonLinux
}
func Fstat(fd int, stat *Stat_t) error {
return errNonLinux
}

View file

@ -1,150 +0,0 @@
package internal
import (
"debug/elf"
"encoding/binary"
"errors"
"fmt"
"io"
"math"
"os"
"github.com/cilium/ebpf/internal/unix"
)
var (
errAuxvNoVDSO = errors.New("no vdso address found in auxv")
)
// vdsoVersion returns the LINUX_VERSION_CODE embedded in the vDSO library
// linked into the current process image.
func vdsoVersion() (uint32, error) {
// Read data from the auxiliary vector, which is normally passed directly
// to the process. Go does not expose that data, so we must read it from procfs.
// https://man7.org/linux/man-pages/man3/getauxval.3.html
av, err := os.Open("/proc/self/auxv")
if err != nil {
return 0, fmt.Errorf("opening auxv: %w", err)
}
defer av.Close()
vdsoAddr, err := vdsoMemoryAddress(av)
if err != nil {
return 0, fmt.Errorf("finding vDSO memory address: %w", err)
}
// Use /proc/self/mem rather than unsafe.Pointer tricks.
mem, err := os.Open("/proc/self/mem")
if err != nil {
return 0, fmt.Errorf("opening mem: %w", err)
}
defer mem.Close()
// Open ELF at provided memory address, as offset into /proc/self/mem.
c, err := vdsoLinuxVersionCode(io.NewSectionReader(mem, int64(vdsoAddr), math.MaxInt64))
if err != nil {
return 0, fmt.Errorf("reading linux version code: %w", err)
}
return c, nil
}
// vdsoMemoryAddress returns the memory address of the vDSO library
// linked into the current process image. r is an io.Reader into an auxv blob.
func vdsoMemoryAddress(r io.Reader) (uint64, error) {
const (
_AT_NULL = 0 // End of vector
_AT_SYSINFO_EHDR = 33 // Offset to vDSO blob in process image
)
// Loop through all tag/value pairs in auxv until we find `AT_SYSINFO_EHDR`,
// the address of a page containing the virtual Dynamic Shared Object (vDSO).
aux := struct{ Tag, Val uint64 }{}
for {
if err := binary.Read(r, NativeEndian, &aux); err != nil {
return 0, fmt.Errorf("reading auxv entry: %w", err)
}
switch aux.Tag {
case _AT_SYSINFO_EHDR:
if aux.Val != 0 {
return aux.Val, nil
}
return 0, fmt.Errorf("invalid vDSO address in auxv")
// _AT_NULL is always the last tag/val pair in the aux vector
// and can be treated like EOF.
case _AT_NULL:
return 0, errAuxvNoVDSO
}
}
}
// format described at https://www.man7.org/linux/man-pages/man5/elf.5.html in section 'Notes (Nhdr)'
type elfNoteHeader struct {
NameSize int32
DescSize int32
Type int32
}
// vdsoLinuxVersionCode returns the LINUX_VERSION_CODE embedded in
// the ELF notes section of the binary provided by the reader.
func vdsoLinuxVersionCode(r io.ReaderAt) (uint32, error) {
hdr, err := NewSafeELFFile(r)
if err != nil {
return 0, fmt.Errorf("reading vDSO ELF: %w", err)
}
sections := hdr.SectionsByType(elf.SHT_NOTE)
if len(sections) == 0 {
return 0, fmt.Errorf("no note section found in vDSO ELF")
}
for _, sec := range sections {
sr := sec.Open()
var n elfNoteHeader
// Read notes until we find one named 'Linux'.
for {
if err := binary.Read(sr, hdr.ByteOrder, &n); err != nil {
if errors.Is(err, io.EOF) {
// We looked at all the notes in this section
break
}
return 0, fmt.Errorf("reading note header: %w", err)
}
// If a note name is defined, it follows the note header.
var name string
if n.NameSize > 0 {
// Read the note name, aligned to 4 bytes.
buf := make([]byte, Align(int(n.NameSize), 4))
if err := binary.Read(sr, hdr.ByteOrder, &buf); err != nil {
return 0, fmt.Errorf("reading note name: %w", err)
}
// Read nul-terminated string.
name = unix.ByteSliceToString(buf[:n.NameSize])
}
// If a note descriptor is defined, it follows the name.
// It is possible for a note to have a descriptor but not a name.
if n.DescSize > 0 {
// LINUX_VERSION_CODE is a uint32 value.
if name == "Linux" && n.DescSize == 4 && n.Type == 0 {
var version uint32
if err := binary.Read(sr, hdr.ByteOrder, &version); err != nil {
return 0, fmt.Errorf("reading note descriptor: %w", err)
}
return version, nil
}
// Discard the note descriptor if it exists but we're not interested in it.
if _, err := io.CopyN(io.Discard, sr, int64(Align(int(n.DescSize), 4))); err != nil {
return 0, err
}
}
}
}
return 0, fmt.Errorf("no Linux note in ELF")
}

View file

@ -1,122 +0,0 @@
package internal
import (
"fmt"
"sync"
"github.com/cilium/ebpf/internal/unix"
)
const (
// Version constant used in ELF binaries indicating that the loader needs to
// substitute the eBPF program's version with the value of the kernel's
// KERNEL_VERSION compile-time macro. Used for compatibility with BCC, gobpf
// and RedSift.
MagicKernelVersion = 0xFFFFFFFE
)
var (
kernelVersion = struct {
once sync.Once
version Version
err error
}{}
)
// A Version in the form Major.Minor.Patch.
type Version [3]uint16
// NewVersion creates a version from a string like "Major.Minor.Patch".
//
// Patch is optional.
func NewVersion(ver string) (Version, error) {
var major, minor, patch uint16
n, _ := fmt.Sscanf(ver, "%d.%d.%d", &major, &minor, &patch)
if n < 2 {
return Version{}, fmt.Errorf("invalid version: %s", ver)
}
return Version{major, minor, patch}, nil
}
// NewVersionFromCode creates a version from a LINUX_VERSION_CODE.
func NewVersionFromCode(code uint32) Version {
return Version{
uint16(uint8(code >> 16)),
uint16(uint8(code >> 8)),
uint16(uint8(code)),
}
}
func (v Version) String() string {
if v[2] == 0 {
return fmt.Sprintf("v%d.%d", v[0], v[1])
}
return fmt.Sprintf("v%d.%d.%d", v[0], v[1], v[2])
}
// Less returns true if the version is less than another version.
func (v Version) Less(other Version) bool {
for i, a := range v {
if a == other[i] {
continue
}
return a < other[i]
}
return false
}
// Unspecified returns true if the version is all zero.
func (v Version) Unspecified() bool {
return v[0] == 0 && v[1] == 0 && v[2] == 0
}
// Kernel implements the kernel's KERNEL_VERSION macro from linux/version.h.
// It represents the kernel version and patch level as a single value.
func (v Version) Kernel() uint32 {
// Kernels 4.4 and 4.9 have their SUBLEVEL clamped to 255 to avoid
// overflowing into PATCHLEVEL.
// See kernel commit 9b82f13e7ef3 ("kbuild: clamp SUBLEVEL to 255").
s := v[2]
if s > 255 {
s = 255
}
// Truncate members to uint8 to prevent them from spilling over into
// each other when overflowing 8 bits.
return uint32(uint8(v[0]))<<16 | uint32(uint8(v[1]))<<8 | uint32(uint8(s))
}
// KernelVersion returns the version of the currently running kernel.
func KernelVersion() (Version, error) {
kernelVersion.once.Do(func() {
kernelVersion.version, kernelVersion.err = detectKernelVersion()
})
if kernelVersion.err != nil {
return Version{}, kernelVersion.err
}
return kernelVersion.version, nil
}
// detectKernelVersion returns the version of the running kernel.
func detectKernelVersion() (Version, error) {
vc, err := vdsoVersion()
if err != nil {
return Version{}, err
}
return NewVersionFromCode(vc), nil
}
// KernelRelease returns the release string of the running kernel.
// Its format depends on the Linux distribution and corresponds to directory
// names in /lib/modules by convention. Some examples are 5.15.17-1-lts and
// 4.19.0-16-amd64.
func KernelRelease() (string, error) {
var uname unix.Utsname
if err := unix.Uname(&uname); err != nil {
return "", fmt.Errorf("uname failed: %w", err)
}
return unix.ByteSliceToString(uname.Release[:]), nil
}

View file

@ -1,165 +0,0 @@
package link
import (
"errors"
"fmt"
"os"
"github.com/cilium/ebpf"
)
type cgroupAttachFlags uint32
// cgroup attach flags
const (
flagAllowOverride cgroupAttachFlags = 1 << iota
flagAllowMulti
flagReplace
)
type CgroupOptions struct {
// Path to a cgroupv2 folder.
Path string
// One of the AttachCgroup* constants
Attach ebpf.AttachType
// Program must be of type CGroup*, and the attach type must match Attach.
Program *ebpf.Program
}
// AttachCgroup links a BPF program to a cgroup.
func AttachCgroup(opts CgroupOptions) (Link, error) {
cgroup, err := os.Open(opts.Path)
if err != nil {
return nil, fmt.Errorf("can't open cgroup: %s", err)
}
clone, err := opts.Program.Clone()
if err != nil {
cgroup.Close()
return nil, err
}
var cg Link
cg, err = newLinkCgroup(cgroup, opts.Attach, clone)
if errors.Is(err, ErrNotSupported) {
cg, err = newProgAttachCgroup(cgroup, opts.Attach, clone, flagAllowMulti)
}
if errors.Is(err, ErrNotSupported) {
cg, err = newProgAttachCgroup(cgroup, opts.Attach, clone, flagAllowOverride)
}
if err != nil {
cgroup.Close()
clone.Close()
return nil, err
}
return cg, nil
}
type progAttachCgroup struct {
cgroup *os.File
current *ebpf.Program
attachType ebpf.AttachType
flags cgroupAttachFlags
}
var _ Link = (*progAttachCgroup)(nil)
func (cg *progAttachCgroup) isLink() {}
func newProgAttachCgroup(cgroup *os.File, attach ebpf.AttachType, prog *ebpf.Program, flags cgroupAttachFlags) (*progAttachCgroup, error) {
if flags&flagAllowMulti > 0 {
if err := haveProgAttachReplace(); err != nil {
return nil, fmt.Errorf("can't support multiple programs: %w", err)
}
}
err := RawAttachProgram(RawAttachProgramOptions{
Target: int(cgroup.Fd()),
Program: prog,
Flags: uint32(flags),
Attach: attach,
})
if err != nil {
return nil, fmt.Errorf("cgroup: %w", err)
}
return &progAttachCgroup{cgroup, prog, attach, flags}, nil
}
func (cg *progAttachCgroup) Close() error {
defer cg.cgroup.Close()
defer cg.current.Close()
err := RawDetachProgram(RawDetachProgramOptions{
Target: int(cg.cgroup.Fd()),
Program: cg.current,
Attach: cg.attachType,
})
if err != nil {
return fmt.Errorf("close cgroup: %s", err)
}
return nil
}
func (cg *progAttachCgroup) Update(prog *ebpf.Program) error {
new, err := prog.Clone()
if err != nil {
return err
}
args := RawAttachProgramOptions{
Target: int(cg.cgroup.Fd()),
Program: prog,
Attach: cg.attachType,
Flags: uint32(cg.flags),
}
if cg.flags&flagAllowMulti > 0 {
// Atomically replacing multiple programs requires at least
// 5.5 (commit 7dd68b3279f17921 "bpf: Support replacing cgroup-bpf
// program in MULTI mode")
args.Flags |= uint32(flagReplace)
args.Replace = cg.current
}
if err := RawAttachProgram(args); err != nil {
new.Close()
return fmt.Errorf("can't update cgroup: %s", err)
}
cg.current.Close()
cg.current = new
return nil
}
func (cg *progAttachCgroup) Pin(string) error {
return fmt.Errorf("can't pin cgroup: %w", ErrNotSupported)
}
func (cg *progAttachCgroup) Unpin() error {
return fmt.Errorf("can't pin cgroup: %w", ErrNotSupported)
}
func (cg *progAttachCgroup) Info() (*Info, error) {
return nil, fmt.Errorf("can't get cgroup info: %w", ErrNotSupported)
}
type linkCgroup struct {
RawLink
}
var _ Link = (*linkCgroup)(nil)
func newLinkCgroup(cgroup *os.File, attach ebpf.AttachType, prog *ebpf.Program) (*linkCgroup, error) {
link, err := AttachRawLink(RawLinkOptions{
Target: int(cgroup.Fd()),
Program: prog,
Attach: attach,
})
if err != nil {
return nil, err
}
return &linkCgroup{*link}, err
}

View file

@ -1,2 +0,0 @@
// Package link allows attaching eBPF programs to various kernel hooks.
package link

View file

@ -1,85 +0,0 @@
package link
import (
"fmt"
"io"
"unsafe"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/internal/sys"
)
type IterOptions struct {
// Program must be of type Tracing with attach type
// AttachTraceIter. The kind of iterator to attach to is
// determined at load time via the AttachTo field.
//
// AttachTo requires the kernel to include BTF of itself,
// and it to be compiled with a recent pahole (>= 1.16).
Program *ebpf.Program
// Map specifies the target map for bpf_map_elem and sockmap iterators.
// It may be nil.
Map *ebpf.Map
}
// AttachIter attaches a BPF seq_file iterator.
func AttachIter(opts IterOptions) (*Iter, error) {
if err := haveBPFLink(); err != nil {
return nil, err
}
progFd := opts.Program.FD()
if progFd < 0 {
return nil, fmt.Errorf("invalid program: %s", sys.ErrClosedFd)
}
var info bpfIterLinkInfoMap
if opts.Map != nil {
mapFd := opts.Map.FD()
if mapFd < 0 {
return nil, fmt.Errorf("invalid map: %w", sys.ErrClosedFd)
}
info.map_fd = uint32(mapFd)
}
attr := sys.LinkCreateIterAttr{
ProgFd: uint32(progFd),
AttachType: sys.AttachType(ebpf.AttachTraceIter),
IterInfo: sys.NewPointer(unsafe.Pointer(&info)),
IterInfoLen: uint32(unsafe.Sizeof(info)),
}
fd, err := sys.LinkCreateIter(&attr)
if err != nil {
return nil, fmt.Errorf("can't link iterator: %w", err)
}
return &Iter{RawLink{fd, ""}}, err
}
// Iter represents an attached bpf_iter.
type Iter struct {
RawLink
}
// Open creates a new instance of the iterator.
//
// Reading from the returned reader triggers the BPF program.
func (it *Iter) Open() (io.ReadCloser, error) {
attr := &sys.IterCreateAttr{
LinkFd: it.fd.Uint(),
}
fd, err := sys.IterCreate(attr)
if err != nil {
return nil, fmt.Errorf("can't create iterator: %w", err)
}
return fd.File("bpf_iter"), nil
}
// union bpf_iter_link_info.map
type bpfIterLinkInfoMap struct {
map_fd uint32
}

View file

@ -1,568 +0,0 @@
package link
import (
"bytes"
"crypto/rand"
"errors"
"fmt"
"os"
"path/filepath"
"runtime"
"strings"
"sync"
"syscall"
"unsafe"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/internal/sys"
"github.com/cilium/ebpf/internal/unix"
)
var (
kprobeEventsPath = filepath.Join(tracefsPath, "kprobe_events")
kprobeRetprobeBit = struct {
once sync.Once
value uint64
err error
}{}
)
type probeType uint8
type probeArgs struct {
symbol, group, path string
offset, refCtrOffset, cookie uint64
pid int
ret bool
}
// KprobeOptions defines additional parameters that will be used
// when loading Kprobes.
type KprobeOptions struct {
// Arbitrary value that can be fetched from an eBPF program
// via `bpf_get_attach_cookie()`.
//
// Needs kernel 5.15+.
Cookie uint64
// Offset of the kprobe relative to the traced symbol.
// Can be used to insert kprobes at arbitrary offsets in kernel functions,
// e.g. in places where functions have been inlined.
Offset uint64
}
const (
kprobeType probeType = iota
uprobeType
)
func (pt probeType) String() string {
if pt == kprobeType {
return "kprobe"
}
return "uprobe"
}
func (pt probeType) EventsPath() string {
if pt == kprobeType {
return kprobeEventsPath
}
return uprobeEventsPath
}
func (pt probeType) PerfEventType(ret bool) perfEventType {
if pt == kprobeType {
if ret {
return kretprobeEvent
}
return kprobeEvent
}
if ret {
return uretprobeEvent
}
return uprobeEvent
}
func (pt probeType) RetprobeBit() (uint64, error) {
if pt == kprobeType {
return kretprobeBit()
}
return uretprobeBit()
}
// Kprobe attaches the given eBPF program to a perf event that fires when the
// given kernel symbol starts executing. See /proc/kallsyms for available
// symbols. For example, printk():
//
// kp, err := Kprobe("printk", prog, nil)
//
// Losing the reference to the resulting Link (kp) will close the Kprobe
// and prevent further execution of prog. The Link must be Closed during
// program shutdown to avoid leaking system resources.
func Kprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions) (Link, error) {
k, err := kprobe(symbol, prog, opts, false)
if err != nil {
return nil, err
}
lnk, err := attachPerfEvent(k, prog)
if err != nil {
k.Close()
return nil, err
}
return lnk, nil
}
// Kretprobe attaches the given eBPF program to a perf event that fires right
// before the given kernel symbol exits, with the function stack left intact.
// See /proc/kallsyms for available symbols. For example, printk():
//
// kp, err := Kretprobe("printk", prog, nil)
//
// Losing the reference to the resulting Link (kp) will close the Kretprobe
// and prevent further execution of prog. The Link must be Closed during
// program shutdown to avoid leaking system resources.
func Kretprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions) (Link, error) {
k, err := kprobe(symbol, prog, opts, true)
if err != nil {
return nil, err
}
lnk, err := attachPerfEvent(k, prog)
if err != nil {
k.Close()
return nil, err
}
return lnk, nil
}
// isValidKprobeSymbol implements the equivalent of a regex match
// against "^[a-zA-Z_][0-9a-zA-Z_.]*$".
func isValidKprobeSymbol(s string) bool {
if len(s) < 1 {
return false
}
for i, c := range []byte(s) {
switch {
case c >= 'a' && c <= 'z':
case c >= 'A' && c <= 'Z':
case c == '_':
case i > 0 && c >= '0' && c <= '9':
// Allow `.` in symbol name. GCC-compiled kernel may change symbol name
// to have a `.isra.$n` suffix, like `udp_send_skb.isra.52`.
// See: https://gcc.gnu.org/gcc-10/changes.html
case i > 0 && c == '.':
default:
return false
}
}
return true
}
// kprobe opens a perf event on the given symbol and attaches prog to it.
// If ret is true, create a kretprobe.
func kprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions, ret bool) (*perfEvent, error) {
if symbol == "" {
return nil, fmt.Errorf("symbol name cannot be empty: %w", errInvalidInput)
}
if prog == nil {
return nil, fmt.Errorf("prog cannot be nil: %w", errInvalidInput)
}
if !isValidKprobeSymbol(symbol) {
return nil, fmt.Errorf("symbol '%s' must be a valid symbol in /proc/kallsyms: %w", symbol, errInvalidInput)
}
if prog.Type() != ebpf.Kprobe {
return nil, fmt.Errorf("eBPF program type %s is not a Kprobe: %w", prog.Type(), errInvalidInput)
}
args := probeArgs{
pid: perfAllThreads,
symbol: symbol,
ret: ret,
}
if opts != nil {
args.cookie = opts.Cookie
args.offset = opts.Offset
}
// Use kprobe PMU if the kernel has it available.
tp, err := pmuKprobe(args)
if errors.Is(err, os.ErrNotExist) {
args.symbol = platformPrefix(symbol)
tp, err = pmuKprobe(args)
}
if err == nil {
return tp, nil
}
if err != nil && !errors.Is(err, ErrNotSupported) {
return nil, fmt.Errorf("creating perf_kprobe PMU: %w", err)
}
// Use tracefs if kprobe PMU is missing.
args.symbol = symbol
tp, err = tracefsKprobe(args)
if errors.Is(err, os.ErrNotExist) {
args.symbol = platformPrefix(symbol)
tp, err = tracefsKprobe(args)
}
if err != nil {
return nil, fmt.Errorf("creating trace event '%s' in tracefs: %w", symbol, err)
}
return tp, nil
}
// pmuKprobe opens a perf event based on the kprobe PMU.
// Returns os.ErrNotExist if the given symbol does not exist in the kernel.
func pmuKprobe(args probeArgs) (*perfEvent, error) {
return pmuProbe(kprobeType, args)
}
// pmuProbe opens a perf event based on a Performance Monitoring Unit.
//
// Requires at least a 4.17 kernel.
// e12f03d7031a "perf/core: Implement the 'perf_kprobe' PMU"
// 33ea4b24277b "perf/core: Implement the 'perf_uprobe' PMU"
//
// Returns ErrNotSupported if the kernel doesn't support perf_[k,u]probe PMU
func pmuProbe(typ probeType, args probeArgs) (*perfEvent, error) {
// Getting the PMU type will fail if the kernel doesn't support
// the perf_[k,u]probe PMU.
et, err := getPMUEventType(typ)
if err != nil {
return nil, err
}
var config uint64
if args.ret {
bit, err := typ.RetprobeBit()
if err != nil {
return nil, err
}
config |= 1 << bit
}
var (
attr unix.PerfEventAttr
sp unsafe.Pointer
)
switch typ {
case kprobeType:
// Create a pointer to a NUL-terminated string for the kernel.
sp, err = unsafeStringPtr(args.symbol)
if err != nil {
return nil, err
}
attr = unix.PerfEventAttr{
// The minimum size required for PMU kprobes is PERF_ATTR_SIZE_VER1,
// since it added the config2 (Ext2) field. Use Ext2 as probe_offset.
Size: unix.PERF_ATTR_SIZE_VER1,
Type: uint32(et), // PMU event type read from sysfs
Ext1: uint64(uintptr(sp)), // Kernel symbol to trace
Ext2: args.offset, // Kernel symbol offset
Config: config, // Retprobe flag
}
case uprobeType:
sp, err = unsafeStringPtr(args.path)
if err != nil {
return nil, err
}
if args.refCtrOffset != 0 {
config |= args.refCtrOffset << uprobeRefCtrOffsetShift
}
attr = unix.PerfEventAttr{
// The minimum size required for PMU uprobes is PERF_ATTR_SIZE_VER1,
// since it added the config2 (Ext2) field. The Size field controls the
// size of the internal buffer the kernel allocates for reading the
// perf_event_attr argument from userspace.
Size: unix.PERF_ATTR_SIZE_VER1,
Type: uint32(et), // PMU event type read from sysfs
Ext1: uint64(uintptr(sp)), // Uprobe path
Ext2: args.offset, // Uprobe offset
Config: config, // RefCtrOffset, Retprobe flag
}
}
rawFd, err := unix.PerfEventOpen(&attr, args.pid, 0, -1, unix.PERF_FLAG_FD_CLOEXEC)
// On some old kernels, kprobe PMU doesn't allow `.` in symbol names and
// return -EINVAL. Return ErrNotSupported to allow falling back to tracefs.
// https://github.com/torvalds/linux/blob/94710cac0ef4/kernel/trace/trace_kprobe.c#L340-L343
if errors.Is(err, unix.EINVAL) && strings.Contains(args.symbol, ".") {
return nil, fmt.Errorf("symbol '%s+%#x': older kernels don't accept dots: %w", args.symbol, args.offset, ErrNotSupported)
}
// Since commit 97c753e62e6c, ENOENT is correctly returned instead of EINVAL
// when trying to create a kretprobe for a missing symbol. Make sure ENOENT
// is returned to the caller.
if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL) {
return nil, fmt.Errorf("symbol '%s+%#x' not found: %w", args.symbol, args.offset, os.ErrNotExist)
}
// Since commit ab105a4fb894, -EILSEQ is returned when a kprobe sym+offset is resolved
// to an invalid insn boundary.
if errors.Is(err, syscall.EILSEQ) {
return nil, fmt.Errorf("symbol '%s+%#x' not found (bad insn boundary): %w", args.symbol, args.offset, os.ErrNotExist)
}
// Since at least commit cb9a19fe4aa51, ENOTSUPP is returned
// when attempting to set a uprobe on a trap instruction.
if errors.Is(err, unix.ENOTSUPP) {
return nil, fmt.Errorf("failed setting uprobe on offset %#x (possible trap insn): %w", args.offset, err)
}
if err != nil {
return nil, fmt.Errorf("opening perf event: %w", err)
}
// Ensure the string pointer is not collected before PerfEventOpen returns.
runtime.KeepAlive(sp)
fd, err := sys.NewFD(rawFd)
if err != nil {
return nil, err
}
// Kernel has perf_[k,u]probe PMU available, initialize perf event.
return &perfEvent{
typ: typ.PerfEventType(args.ret),
name: args.symbol,
pmuID: et,
cookie: args.cookie,
fd: fd,
}, nil
}
// tracefsKprobe creates a Kprobe tracefs entry.
func tracefsKprobe(args probeArgs) (*perfEvent, error) {
return tracefsProbe(kprobeType, args)
}
// tracefsProbe creates a trace event by writing an entry to <tracefs>/[k,u]probe_events.
// A new trace event group name is generated on every call to support creating
// multiple trace events for the same kernel or userspace symbol.
// Path and offset are only set in the case of uprobe(s) and are used to set
// the executable/library path on the filesystem and the offset where the probe is inserted.
// A perf event is then opened on the newly-created trace event and returned to the caller.
func tracefsProbe(typ probeType, args probeArgs) (_ *perfEvent, err error) {
// Generate a random string for each trace event we attempt to create.
// This value is used as the 'group' token in tracefs to allow creating
// multiple kprobe trace events with the same name.
group, err := randomGroup("ebpf")
if err != nil {
return nil, fmt.Errorf("randomizing group name: %w", err)
}
args.group = group
// Before attempting to create a trace event through tracefs,
// check if an event with the same group and name already exists.
// Kernels 4.x and earlier don't return os.ErrExist on writing a duplicate
// entry, so we need to rely on reads for detecting uniqueness.
_, err = getTraceEventID(group, args.symbol)
if err == nil {
return nil, fmt.Errorf("trace event already exists: %s/%s", group, args.symbol)
}
if err != nil && !errors.Is(err, os.ErrNotExist) {
return nil, fmt.Errorf("checking trace event %s/%s: %w", group, args.symbol, err)
}
// Create the [k,u]probe trace event using tracefs.
if err := createTraceFSProbeEvent(typ, args); err != nil {
return nil, fmt.Errorf("creating probe entry on tracefs: %w", err)
}
defer func() {
if err != nil {
// Make sure we clean up the created tracefs event when we return error.
// If a livepatch handler is already active on the symbol, the write to
// tracefs will succeed, a trace event will show up, but creating the
// perf event will fail with EBUSY.
_ = closeTraceFSProbeEvent(typ, args.group, args.symbol)
}
}()
// Get the newly-created trace event's id.
tid, err := getTraceEventID(group, args.symbol)
if err != nil {
return nil, fmt.Errorf("getting trace event id: %w", err)
}
// Kprobes are ephemeral tracepoints and share the same perf event type.
fd, err := openTracepointPerfEvent(tid, args.pid)
if err != nil {
return nil, err
}
return &perfEvent{
typ: typ.PerfEventType(args.ret),
group: group,
name: args.symbol,
tracefsID: tid,
cookie: args.cookie,
fd: fd,
}, nil
}
// createTraceFSProbeEvent creates a new ephemeral trace event by writing to
// <tracefs>/[k,u]probe_events. Returns os.ErrNotExist if symbol is not a valid
// kernel symbol, or if it is not traceable with kprobes. Returns os.ErrExist
// if a probe with the same group and symbol already exists.
func createTraceFSProbeEvent(typ probeType, args probeArgs) error {
// Open the kprobe_events file in tracefs.
f, err := os.OpenFile(typ.EventsPath(), os.O_APPEND|os.O_WRONLY, 0666)
if err != nil {
return fmt.Errorf("error opening '%s': %w", typ.EventsPath(), err)
}
defer f.Close()
var pe, token string
switch typ {
case kprobeType:
// The kprobe_events syntax is as follows (see Documentation/trace/kprobetrace.txt):
// p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS] : Set a probe
// r[MAXACTIVE][:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS] : Set a return probe
// -:[GRP/]EVENT : Clear a probe
//
// Some examples:
// r:ebpf_1234/r_my_kretprobe nf_conntrack_destroy
// p:ebpf_5678/p_my_kprobe __x64_sys_execve
//
// Leaving the kretprobe's MAXACTIVE set to 0 (or absent) will make the
// kernel default to NR_CPUS. This is desired in most eBPF cases since
// subsampling or rate limiting logic can be more accurately implemented in
// the eBPF program itself.
// See Documentation/kprobes.txt for more details.
token = kprobeToken(args)
pe = fmt.Sprintf("%s:%s/%s %s", probePrefix(args.ret), args.group, sanitizeSymbol(args.symbol), token)
case uprobeType:
// The uprobe_events syntax is as follows:
// p[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a probe
// r[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a return probe
// -:[GRP/]EVENT : Clear a probe
//
// Some examples:
// r:ebpf_1234/readline /bin/bash:0x12345
// p:ebpf_5678/main_mySymbol /bin/mybin:0x12345(0x123)
//
// See Documentation/trace/uprobetracer.txt for more details.
token = uprobeToken(args)
pe = fmt.Sprintf("%s:%s/%s %s", probePrefix(args.ret), args.group, args.symbol, token)
}
_, err = f.WriteString(pe)
// Since commit 97c753e62e6c, ENOENT is correctly returned instead of EINVAL
// when trying to create a kretprobe for a missing symbol. Make sure ENOENT
// is returned to the caller.
// EINVAL is also returned on pre-5.2 kernels when the `SYM[+offs]` token
// is resolved to an invalid insn boundary.
if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL) {
return fmt.Errorf("token %s: %w", token, os.ErrNotExist)
}
// Since commit ab105a4fb894, -EILSEQ is returned when a kprobe sym+offset is resolved
// to an invalid insn boundary.
if errors.Is(err, syscall.EILSEQ) {
return fmt.Errorf("token %s: bad insn boundary: %w", token, os.ErrNotExist)
}
// ERANGE is returned when the `SYM[+offs]` token is too big and cannot
// be resolved.
if errors.Is(err, syscall.ERANGE) {
return fmt.Errorf("token %s: offset too big: %w", token, os.ErrNotExist)
}
if err != nil {
return fmt.Errorf("writing '%s' to '%s': %w", pe, typ.EventsPath(), err)
}
return nil
}
// closeTraceFSProbeEvent removes the [k,u]probe with the given type, group and symbol
// from <tracefs>/[k,u]probe_events.
func closeTraceFSProbeEvent(typ probeType, group, symbol string) error {
f, err := os.OpenFile(typ.EventsPath(), os.O_APPEND|os.O_WRONLY, 0666)
if err != nil {
return fmt.Errorf("error opening %s: %w", typ.EventsPath(), err)
}
defer f.Close()
// See [k,u]probe_events syntax above. The probe type does not need to be specified
// for removals.
pe := fmt.Sprintf("-:%s/%s", group, sanitizeSymbol(symbol))
if _, err = f.WriteString(pe); err != nil {
return fmt.Errorf("writing '%s' to '%s': %w", pe, typ.EventsPath(), err)
}
return nil
}
// randomGroup generates a pseudorandom string for use as a tracefs group name.
// Returns an error when the output string would exceed 63 characters (kernel
// limitation), when rand.Read() fails or when prefix contains characters not
// allowed by isValidTraceID.
func randomGroup(prefix string) (string, error) {
if !isValidTraceID(prefix) {
return "", fmt.Errorf("prefix '%s' must be alphanumeric or underscore: %w", prefix, errInvalidInput)
}
b := make([]byte, 8)
if _, err := rand.Read(b); err != nil {
return "", fmt.Errorf("reading random bytes: %w", err)
}
group := fmt.Sprintf("%s_%x", prefix, b)
if len(group) > 63 {
return "", fmt.Errorf("group name '%s' cannot be longer than 63 characters: %w", group, errInvalidInput)
}
return group, nil
}
func probePrefix(ret bool) string {
if ret {
return "r"
}
return "p"
}
// determineRetprobeBit reads a Performance Monitoring Unit's retprobe bit
// from /sys/bus/event_source/devices/<pmu>/format/retprobe.
func determineRetprobeBit(typ probeType) (uint64, error) {
p := filepath.Join("/sys/bus/event_source/devices/", typ.String(), "/format/retprobe")
data, err := os.ReadFile(p)
if err != nil {
return 0, err
}
var rp uint64
n, err := fmt.Sscanf(string(bytes.TrimSpace(data)), "config:%d", &rp)
if err != nil {
return 0, fmt.Errorf("parse retprobe bit: %w", err)
}
if n != 1 {
return 0, fmt.Errorf("parse retprobe bit: expected 1 item, got %d", n)
}
return rp, nil
}
func kretprobeBit() (uint64, error) {
kprobeRetprobeBit.once.Do(func() {
kprobeRetprobeBit.value, kprobeRetprobeBit.err = determineRetprobeBit(kprobeType)
})
return kprobeRetprobeBit.value, kprobeRetprobeBit.err
}
// kprobeToken creates the SYM[+offs] token for the tracefs api.
func kprobeToken(args probeArgs) string {
po := args.symbol
if args.offset != 0 {
po += fmt.Sprintf("+%#x", args.offset)
}
return po
}

View file

@ -1,313 +0,0 @@
package link
import (
"bytes"
"encoding/binary"
"fmt"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/btf"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/sys"
)
var ErrNotSupported = internal.ErrNotSupported
// Link represents a Program attached to a BPF hook.
type Link interface {
// Replace the current program with a new program.
//
// Passing a nil program is an error. May return an error wrapping ErrNotSupported.
Update(*ebpf.Program) error
// Persist a link by pinning it into a bpffs.
//
// May return an error wrapping ErrNotSupported.
Pin(string) error
// Undo a previous call to Pin.
//
// May return an error wrapping ErrNotSupported.
Unpin() error
// Close frees resources.
//
// The link will be broken unless it has been successfully pinned.
// A link may continue past the lifetime of the process if Close is
// not called.
Close() error
// Info returns metadata on a link.
//
// May return an error wrapping ErrNotSupported.
Info() (*Info, error)
// Prevent external users from implementing this interface.
isLink()
}
// LoadPinnedLink loads a link that was persisted into a bpffs.
func LoadPinnedLink(fileName string, opts *ebpf.LoadPinOptions) (Link, error) {
raw, err := loadPinnedRawLink(fileName, opts)
if err != nil {
return nil, err
}
return wrapRawLink(raw)
}
// wrap a RawLink in a more specific type if possible.
//
// The function takes ownership of raw and closes it on error.
func wrapRawLink(raw *RawLink) (Link, error) {
info, err := raw.Info()
if err != nil {
raw.Close()
return nil, err
}
switch info.Type {
case RawTracepointType:
return &rawTracepoint{*raw}, nil
case TracingType:
return &tracing{*raw}, nil
case CgroupType:
return &linkCgroup{*raw}, nil
case IterType:
return &Iter{*raw}, nil
case NetNsType:
return &NetNsLink{*raw}, nil
default:
return raw, nil
}
}
// ID uniquely identifies a BPF link.
type ID = sys.LinkID
// RawLinkOptions control the creation of a raw link.
type RawLinkOptions struct {
// File descriptor to attach to. This differs for each attach type.
Target int
// Program to attach.
Program *ebpf.Program
// Attach must match the attach type of Program.
Attach ebpf.AttachType
// BTF is the BTF of the attachment target.
BTF btf.TypeID
// Flags control the attach behaviour.
Flags uint32
}
// Info contains metadata on a link.
type Info struct {
Type Type
ID ID
Program ebpf.ProgramID
extra interface{}
}
type TracingInfo sys.TracingLinkInfo
type CgroupInfo sys.CgroupLinkInfo
type NetNsInfo sys.NetNsLinkInfo
type XDPInfo sys.XDPLinkInfo
// Tracing returns tracing type-specific link info.
//
// Returns nil if the type-specific link info isn't available.
func (r Info) Tracing() *TracingInfo {
e, _ := r.extra.(*TracingInfo)
return e
}
// Cgroup returns cgroup type-specific link info.
//
// Returns nil if the type-specific link info isn't available.
func (r Info) Cgroup() *CgroupInfo {
e, _ := r.extra.(*CgroupInfo)
return e
}
// NetNs returns netns type-specific link info.
//
// Returns nil if the type-specific link info isn't available.
func (r Info) NetNs() *NetNsInfo {
e, _ := r.extra.(*NetNsInfo)
return e
}
// ExtraNetNs returns XDP type-specific link info.
//
// Returns nil if the type-specific link info isn't available.
func (r Info) XDP() *XDPInfo {
e, _ := r.extra.(*XDPInfo)
return e
}
// RawLink is the low-level API to bpf_link.
//
// You should consider using the higher level interfaces in this
// package instead.
type RawLink struct {
fd *sys.FD
pinnedPath string
}
// AttachRawLink creates a raw link.
func AttachRawLink(opts RawLinkOptions) (*RawLink, error) {
if err := haveBPFLink(); err != nil {
return nil, err
}
if opts.Target < 0 {
return nil, fmt.Errorf("invalid target: %s", sys.ErrClosedFd)
}
progFd := opts.Program.FD()
if progFd < 0 {
return nil, fmt.Errorf("invalid program: %s", sys.ErrClosedFd)
}
attr := sys.LinkCreateAttr{
TargetFd: uint32(opts.Target),
ProgFd: uint32(progFd),
AttachType: sys.AttachType(opts.Attach),
TargetBtfId: uint32(opts.BTF),
Flags: opts.Flags,
}
fd, err := sys.LinkCreate(&attr)
if err != nil {
return nil, fmt.Errorf("can't create link: %s", err)
}
return &RawLink{fd, ""}, nil
}
func loadPinnedRawLink(fileName string, opts *ebpf.LoadPinOptions) (*RawLink, error) {
fd, err := sys.ObjGet(&sys.ObjGetAttr{
Pathname: sys.NewStringPointer(fileName),
FileFlags: opts.Marshal(),
})
if err != nil {
return nil, fmt.Errorf("load pinned link: %w", err)
}
return &RawLink{fd, fileName}, nil
}
func (l *RawLink) isLink() {}
// FD returns the raw file descriptor.
func (l *RawLink) FD() int {
return l.fd.Int()
}
// Close breaks the link.
//
// Use Pin if you want to make the link persistent.
func (l *RawLink) Close() error {
return l.fd.Close()
}
// Pin persists a link past the lifetime of the process.
//
// Calling Close on a pinned Link will not break the link
// until the pin is removed.
func (l *RawLink) Pin(fileName string) error {
if err := internal.Pin(l.pinnedPath, fileName, l.fd); err != nil {
return err
}
l.pinnedPath = fileName
return nil
}
// Unpin implements the Link interface.
func (l *RawLink) Unpin() error {
if err := internal.Unpin(l.pinnedPath); err != nil {
return err
}
l.pinnedPath = ""
return nil
}
// Update implements the Link interface.
func (l *RawLink) Update(new *ebpf.Program) error {
return l.UpdateArgs(RawLinkUpdateOptions{
New: new,
})
}
// RawLinkUpdateOptions control the behaviour of RawLink.UpdateArgs.
type RawLinkUpdateOptions struct {
New *ebpf.Program
Old *ebpf.Program
Flags uint32
}
// UpdateArgs updates a link based on args.
func (l *RawLink) UpdateArgs(opts RawLinkUpdateOptions) error {
newFd := opts.New.FD()
if newFd < 0 {
return fmt.Errorf("invalid program: %s", sys.ErrClosedFd)
}
var oldFd int
if opts.Old != nil {
oldFd = opts.Old.FD()
if oldFd < 0 {
return fmt.Errorf("invalid replacement program: %s", sys.ErrClosedFd)
}
}
attr := sys.LinkUpdateAttr{
LinkFd: l.fd.Uint(),
NewProgFd: uint32(newFd),
OldProgFd: uint32(oldFd),
Flags: opts.Flags,
}
return sys.LinkUpdate(&attr)
}
// Info returns metadata about the link.
func (l *RawLink) Info() (*Info, error) {
var info sys.LinkInfo
if err := sys.ObjInfo(l.fd, &info); err != nil {
return nil, fmt.Errorf("link info: %s", err)
}
var extra interface{}
switch info.Type {
case CgroupType:
extra = &CgroupInfo{}
case IterType:
// not supported
case NetNsType:
extra = &NetNsInfo{}
case RawTracepointType:
// not supported
case TracingType:
extra = &TracingInfo{}
case XDPType:
extra = &XDPInfo{}
case PerfEventType:
// no extra
default:
return nil, fmt.Errorf("unknown link info type: %d", info.Type)
}
if info.Type != RawTracepointType && info.Type != IterType && info.Type != PerfEventType {
buf := bytes.NewReader(info.Extra[:])
err := binary.Read(buf, internal.NativeEndian, extra)
if err != nil {
return nil, fmt.Errorf("can not read extra link info: %w", err)
}
}
return &Info{
info.Type,
info.Id,
ebpf.ProgramID(info.ProgId),
extra,
}, nil
}

View file

@ -1,36 +0,0 @@
package link
import (
"fmt"
"github.com/cilium/ebpf"
)
// NetNsLink is a program attached to a network namespace.
type NetNsLink struct {
RawLink
}
// AttachNetNs attaches a program to a network namespace.
func AttachNetNs(ns int, prog *ebpf.Program) (*NetNsLink, error) {
var attach ebpf.AttachType
switch t := prog.Type(); t {
case ebpf.FlowDissector:
attach = ebpf.AttachFlowDissector
case ebpf.SkLookup:
attach = ebpf.AttachSkLookup
default:
return nil, fmt.Errorf("can't attach %v to network namespace", t)
}
link, err := AttachRawLink(RawLinkOptions{
Target: ns,
Program: prog,
Attach: attach,
})
if err != nil {
return nil, err
}
return &NetNsLink{*link}, nil
}

View file

@ -1,394 +0,0 @@
package link
import (
"bytes"
"errors"
"fmt"
"os"
"path/filepath"
"runtime"
"strconv"
"strings"
"unsafe"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/asm"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/sys"
"github.com/cilium/ebpf/internal/unix"
)
// Getting the terminology right is usually the hardest part. For posterity and
// for staying sane during implementation:
//
// - trace event: Representation of a kernel runtime hook. Filesystem entries
// under <tracefs>/events. Can be tracepoints (static), kprobes or uprobes.
// Can be instantiated into perf events (see below).
// - tracepoint: A predetermined hook point in the kernel. Exposed as trace
// events in (sub)directories under <tracefs>/events. Cannot be closed or
// removed, they are static.
// - k(ret)probe: Ephemeral trace events based on entry or exit points of
// exported kernel symbols. kprobe-based (tracefs) trace events can be
// created system-wide by writing to the <tracefs>/kprobe_events file, or
// they can be scoped to the current process by creating PMU perf events.
// - u(ret)probe: Ephemeral trace events based on user provides ELF binaries
// and offsets. uprobe-based (tracefs) trace events can be
// created system-wide by writing to the <tracefs>/uprobe_events file, or
// they can be scoped to the current process by creating PMU perf events.
// - perf event: An object instantiated based on an existing trace event or
// kernel symbol. Referred to by fd in userspace.
// Exactly one eBPF program can be attached to a perf event. Multiple perf
// events can be created from a single trace event. Closing a perf event
// stops any further invocations of the attached eBPF program.
var (
tracefsPath = "/sys/kernel/debug/tracing"
errInvalidInput = errors.New("invalid input")
)
const (
perfAllThreads = -1
)
type perfEventType uint8
const (
tracepointEvent perfEventType = iota
kprobeEvent
kretprobeEvent
uprobeEvent
uretprobeEvent
)
// A perfEvent represents a perf event kernel object. Exactly one eBPF program
// can be attached to it. It is created based on a tracefs trace event or a
// Performance Monitoring Unit (PMU).
type perfEvent struct {
// The event type determines the types of programs that can be attached.
typ perfEventType
// Group and name of the tracepoint/kprobe/uprobe.
group string
name string
// PMU event ID read from sysfs. Valid IDs are non-zero.
pmuID uint64
// ID of the trace event read from tracefs. Valid IDs are non-zero.
tracefsID uint64
// User provided arbitrary value.
cookie uint64
// This is the perf event FD.
fd *sys.FD
}
func (pe *perfEvent) Close() error {
if err := pe.fd.Close(); err != nil {
return fmt.Errorf("closing perf event fd: %w", err)
}
switch pe.typ {
case kprobeEvent, kretprobeEvent:
// Clean up kprobe tracefs entry.
if pe.tracefsID != 0 {
return closeTraceFSProbeEvent(kprobeType, pe.group, pe.name)
}
case uprobeEvent, uretprobeEvent:
// Clean up uprobe tracefs entry.
if pe.tracefsID != 0 {
return closeTraceFSProbeEvent(uprobeType, pe.group, pe.name)
}
case tracepointEvent:
// Tracepoint trace events don't hold any extra resources.
return nil
}
return nil
}
// perfEventLink represents a bpf perf link.
type perfEventLink struct {
RawLink
pe *perfEvent
}
func (pl *perfEventLink) isLink() {}
// Pinning requires the underlying perf event FD to stay open.
//
// | PerfEvent FD | BpfLink FD | Works |
// |--------------|------------|-------|
// | Open | Open | Yes |
// | Closed | Open | No |
// | Open | Closed | No (Pin() -> EINVAL) |
// | Closed | Closed | No (Pin() -> EINVAL) |
//
// There is currently no pretty way to recover the perf event FD
// when loading a pinned link, so leave as not supported for now.
func (pl *perfEventLink) Pin(string) error {
return fmt.Errorf("perf event link pin: %w", ErrNotSupported)
}
func (pl *perfEventLink) Unpin() error {
return fmt.Errorf("perf event link unpin: %w", ErrNotSupported)
}
func (pl *perfEventLink) Close() error {
if err := pl.pe.Close(); err != nil {
return fmt.Errorf("perf event link close: %w", err)
}
return pl.fd.Close()
}
func (pl *perfEventLink) Update(prog *ebpf.Program) error {
return fmt.Errorf("perf event link update: %w", ErrNotSupported)
}
// perfEventIoctl implements Link and handles the perf event lifecycle
// via ioctl().
type perfEventIoctl struct {
*perfEvent
}
func (pi *perfEventIoctl) isLink() {}
// Since 4.15 (e87c6bc3852b "bpf: permit multiple bpf attachments for a single perf event"),
// calling PERF_EVENT_IOC_SET_BPF appends the given program to a prog_array
// owned by the perf event, which means multiple programs can be attached
// simultaneously.
//
// Before 4.15, calling PERF_EVENT_IOC_SET_BPF more than once on a perf event
// returns EEXIST.
//
// Detaching a program from a perf event is currently not possible, so a
// program replacement mechanism cannot be implemented for perf events.
func (pi *perfEventIoctl) Update(prog *ebpf.Program) error {
return fmt.Errorf("perf event ioctl update: %w", ErrNotSupported)
}
func (pi *perfEventIoctl) Pin(string) error {
return fmt.Errorf("perf event ioctl pin: %w", ErrNotSupported)
}
func (pi *perfEventIoctl) Unpin() error {
return fmt.Errorf("perf event ioctl unpin: %w", ErrNotSupported)
}
func (pi *perfEventIoctl) Info() (*Info, error) {
return nil, fmt.Errorf("perf event ioctl info: %w", ErrNotSupported)
}
// attach the given eBPF prog to the perf event stored in pe.
// pe must contain a valid perf event fd.
// prog's type must match the program type stored in pe.
func attachPerfEvent(pe *perfEvent, prog *ebpf.Program) (Link, error) {
if prog == nil {
return nil, errors.New("cannot attach a nil program")
}
if prog.FD() < 0 {
return nil, fmt.Errorf("invalid program: %w", sys.ErrClosedFd)
}
switch pe.typ {
case kprobeEvent, kretprobeEvent, uprobeEvent, uretprobeEvent:
if t := prog.Type(); t != ebpf.Kprobe {
return nil, fmt.Errorf("invalid program type (expected %s): %s", ebpf.Kprobe, t)
}
case tracepointEvent:
if t := prog.Type(); t != ebpf.TracePoint {
return nil, fmt.Errorf("invalid program type (expected %s): %s", ebpf.TracePoint, t)
}
default:
return nil, fmt.Errorf("unknown perf event type: %d", pe.typ)
}
if err := haveBPFLinkPerfEvent(); err == nil {
return attachPerfEventLink(pe, prog)
}
return attachPerfEventIoctl(pe, prog)
}
func attachPerfEventIoctl(pe *perfEvent, prog *ebpf.Program) (*perfEventIoctl, error) {
if pe.cookie != 0 {
return nil, fmt.Errorf("cookies are not supported: %w", ErrNotSupported)
}
// Assign the eBPF program to the perf event.
err := unix.IoctlSetInt(pe.fd.Int(), unix.PERF_EVENT_IOC_SET_BPF, prog.FD())
if err != nil {
return nil, fmt.Errorf("setting perf event bpf program: %w", err)
}
// PERF_EVENT_IOC_ENABLE and _DISABLE ignore their given values.
if err := unix.IoctlSetInt(pe.fd.Int(), unix.PERF_EVENT_IOC_ENABLE, 0); err != nil {
return nil, fmt.Errorf("enable perf event: %s", err)
}
pi := &perfEventIoctl{pe}
// Close the perf event when its reference is lost to avoid leaking system resources.
runtime.SetFinalizer(pi, (*perfEventIoctl).Close)
return pi, nil
}
// Use the bpf api to attach the perf event (BPF_LINK_TYPE_PERF_EVENT, 5.15+).
//
// https://github.com/torvalds/linux/commit/b89fbfbb854c9afc3047e8273cc3a694650b802e
func attachPerfEventLink(pe *perfEvent, prog *ebpf.Program) (*perfEventLink, error) {
fd, err := sys.LinkCreatePerfEvent(&sys.LinkCreatePerfEventAttr{
ProgFd: uint32(prog.FD()),
TargetFd: pe.fd.Uint(),
AttachType: sys.BPF_PERF_EVENT,
BpfCookie: pe.cookie,
})
if err != nil {
return nil, fmt.Errorf("cannot create bpf perf link: %v", err)
}
pl := &perfEventLink{RawLink{fd: fd}, pe}
// Close the perf event when its reference is lost to avoid leaking system resources.
runtime.SetFinalizer(pl, (*perfEventLink).Close)
return pl, nil
}
// unsafeStringPtr returns an unsafe.Pointer to a NUL-terminated copy of str.
func unsafeStringPtr(str string) (unsafe.Pointer, error) {
p, err := unix.BytePtrFromString(str)
if err != nil {
return nil, err
}
return unsafe.Pointer(p), nil
}
// getTraceEventID reads a trace event's ID from tracefs given its group and name.
// The kernel requires group and name to be alphanumeric or underscore.
//
// name automatically has its invalid symbols converted to underscores so the caller
// can pass a raw symbol name, e.g. a kernel symbol containing dots.
func getTraceEventID(group, name string) (uint64, error) {
name = sanitizeSymbol(name)
tid, err := uint64FromFile(tracefsPath, "events", group, name, "id")
if errors.Is(err, os.ErrNotExist) {
return 0, fmt.Errorf("trace event %s/%s: %w", group, name, os.ErrNotExist)
}
if err != nil {
return 0, fmt.Errorf("reading trace event ID of %s/%s: %w", group, name, err)
}
return tid, nil
}
// getPMUEventType reads a Performance Monitoring Unit's type (numeric identifier)
// from /sys/bus/event_source/devices/<pmu>/type.
//
// Returns ErrNotSupported if the pmu type is not supported.
func getPMUEventType(typ probeType) (uint64, error) {
et, err := uint64FromFile("/sys/bus/event_source/devices", typ.String(), "type")
if errors.Is(err, os.ErrNotExist) {
return 0, fmt.Errorf("pmu type %s: %w", typ, ErrNotSupported)
}
if err != nil {
return 0, fmt.Errorf("reading pmu type %s: %w", typ, err)
}
return et, nil
}
// openTracepointPerfEvent opens a tracepoint-type perf event. System-wide
// [k,u]probes created by writing to <tracefs>/[k,u]probe_events are tracepoints
// behind the scenes, and can be attached to using these perf events.
func openTracepointPerfEvent(tid uint64, pid int) (*sys.FD, error) {
attr := unix.PerfEventAttr{
Type: unix.PERF_TYPE_TRACEPOINT,
Config: tid,
Sample_type: unix.PERF_SAMPLE_RAW,
Sample: 1,
Wakeup: 1,
}
fd, err := unix.PerfEventOpen(&attr, pid, 0, -1, unix.PERF_FLAG_FD_CLOEXEC)
if err != nil {
return nil, fmt.Errorf("opening tracepoint perf event: %w", err)
}
return sys.NewFD(fd)
}
// uint64FromFile reads a uint64 from a file. All elements of path are sanitized
// and joined onto base. Returns error if base no longer prefixes the path after
// joining all components.
func uint64FromFile(base string, path ...string) (uint64, error) {
l := filepath.Join(path...)
p := filepath.Join(base, l)
if !strings.HasPrefix(p, base) {
return 0, fmt.Errorf("path '%s' attempts to escape base path '%s': %w", l, base, errInvalidInput)
}
data, err := os.ReadFile(p)
if err != nil {
return 0, fmt.Errorf("reading file %s: %w", p, err)
}
et := bytes.TrimSpace(data)
return strconv.ParseUint(string(et), 10, 64)
}
// Probe BPF perf link.
//
// https://elixir.bootlin.com/linux/v5.16.8/source/kernel/bpf/syscall.c#L4307
// https://github.com/torvalds/linux/commit/b89fbfbb854c9afc3047e8273cc3a694650b802e
var haveBPFLinkPerfEvent = internal.FeatureTest("bpf_link_perf_event", "5.15", func() error {
prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{
Name: "probe_bpf_perf_link",
Type: ebpf.Kprobe,
Instructions: asm.Instructions{
asm.Mov.Imm(asm.R0, 0),
asm.Return(),
},
License: "MIT",
})
if err != nil {
return err
}
defer prog.Close()
_, err = sys.LinkCreatePerfEvent(&sys.LinkCreatePerfEventAttr{
ProgFd: uint32(prog.FD()),
AttachType: sys.BPF_PERF_EVENT,
})
if errors.Is(err, unix.EINVAL) {
return internal.ErrNotSupported
}
if errors.Is(err, unix.EBADF) {
return nil
}
return err
})
// isValidTraceID implements the equivalent of a regex match
// against "^[a-zA-Z_][0-9a-zA-Z_]*$".
//
// Trace event groups, names and kernel symbols must adhere to this set
// of characters. Non-empty, first character must not be a number, all
// characters must be alphanumeric or underscore.
func isValidTraceID(s string) bool {
if len(s) < 1 {
return false
}
for i, c := range []byte(s) {
switch {
case c >= 'a' && c <= 'z':
case c >= 'A' && c <= 'Z':
case c == '_':
case i > 0 && c >= '0' && c <= '9':
default:
return false
}
}
return true
}

View file

@ -1,25 +0,0 @@
package link
import (
"fmt"
"runtime"
)
func platformPrefix(symbol string) string {
prefix := runtime.GOARCH
// per https://github.com/golang/go/blob/master/src/go/build/syslist.go
switch prefix {
case "386":
prefix = "ia32"
case "amd64", "amd64p32":
prefix = "x64"
case "arm64", "arm64be":
prefix = "arm64"
default:
return symbol
}
return fmt.Sprintf("__%s_%s", prefix, symbol)
}

View file

@ -1,76 +0,0 @@
package link
import (
"fmt"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/internal/sys"
)
type RawAttachProgramOptions struct {
// File descriptor to attach to. This differs for each attach type.
Target int
// Program to attach.
Program *ebpf.Program
// Program to replace (cgroups).
Replace *ebpf.Program
// Attach must match the attach type of Program (and Replace).
Attach ebpf.AttachType
// Flags control the attach behaviour. This differs for each attach type.
Flags uint32
}
// RawAttachProgram is a low level wrapper around BPF_PROG_ATTACH.
//
// You should use one of the higher level abstractions available in this
// package if possible.
func RawAttachProgram(opts RawAttachProgramOptions) error {
if err := haveProgAttach(); err != nil {
return err
}
var replaceFd uint32
if opts.Replace != nil {
replaceFd = uint32(opts.Replace.FD())
}
attr := sys.ProgAttachAttr{
TargetFd: uint32(opts.Target),
AttachBpfFd: uint32(opts.Program.FD()),
ReplaceBpfFd: replaceFd,
AttachType: uint32(opts.Attach),
AttachFlags: uint32(opts.Flags),
}
if err := sys.ProgAttach(&attr); err != nil {
return fmt.Errorf("can't attach program: %w", err)
}
return nil
}
type RawDetachProgramOptions struct {
Target int
Program *ebpf.Program
Attach ebpf.AttachType
}
// RawDetachProgram is a low level wrapper around BPF_PROG_DETACH.
//
// You should use one of the higher level abstractions available in this
// package if possible.
func RawDetachProgram(opts RawDetachProgramOptions) error {
if err := haveProgAttach(); err != nil {
return err
}
attr := sys.ProgDetachAttr{
TargetFd: uint32(opts.Target),
AttachBpfFd: uint32(opts.Program.FD()),
AttachType: uint32(opts.Attach),
}
if err := sys.ProgDetach(&attr); err != nil {
return fmt.Errorf("can't detach program: %w", err)
}
return nil
}

View file

@ -1,87 +0,0 @@
package link
import (
"errors"
"fmt"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/internal/sys"
)
type RawTracepointOptions struct {
// Tracepoint name.
Name string
// Program must be of type RawTracepoint*
Program *ebpf.Program
}
// AttachRawTracepoint links a BPF program to a raw_tracepoint.
//
// Requires at least Linux 4.17.
func AttachRawTracepoint(opts RawTracepointOptions) (Link, error) {
if t := opts.Program.Type(); t != ebpf.RawTracepoint && t != ebpf.RawTracepointWritable {
return nil, fmt.Errorf("invalid program type %s, expected RawTracepoint(Writable)", t)
}
if opts.Program.FD() < 0 {
return nil, fmt.Errorf("invalid program: %w", sys.ErrClosedFd)
}
fd, err := sys.RawTracepointOpen(&sys.RawTracepointOpenAttr{
Name: sys.NewStringPointer(opts.Name),
ProgFd: uint32(opts.Program.FD()),
})
if err != nil {
return nil, err
}
err = haveBPFLink()
if errors.Is(err, ErrNotSupported) {
// Prior to commit 70ed506c3bbc ("bpf: Introduce pinnable bpf_link abstraction")
// raw_tracepoints are just a plain fd.
return &simpleRawTracepoint{fd}, nil
}
if err != nil {
return nil, err
}
return &rawTracepoint{RawLink{fd: fd}}, nil
}
type simpleRawTracepoint struct {
fd *sys.FD
}
var _ Link = (*simpleRawTracepoint)(nil)
func (frt *simpleRawTracepoint) isLink() {}
func (frt *simpleRawTracepoint) Close() error {
return frt.fd.Close()
}
func (frt *simpleRawTracepoint) Update(_ *ebpf.Program) error {
return fmt.Errorf("update raw_tracepoint: %w", ErrNotSupported)
}
func (frt *simpleRawTracepoint) Pin(string) error {
return fmt.Errorf("pin raw_tracepoint: %w", ErrNotSupported)
}
func (frt *simpleRawTracepoint) Unpin() error {
return fmt.Errorf("unpin raw_tracepoint: %w", ErrNotSupported)
}
func (frt *simpleRawTracepoint) Info() (*Info, error) {
return nil, fmt.Errorf("can't get raw_tracepoint info: %w", ErrNotSupported)
}
type rawTracepoint struct {
RawLink
}
var _ Link = (*rawTracepoint)(nil)
func (rt *rawTracepoint) Update(_ *ebpf.Program) error {
return fmt.Errorf("update raw_tracepoint: %w", ErrNotSupported)
}

View file

@ -1,40 +0,0 @@
package link
import (
"syscall"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/internal/unix"
)
// AttachSocketFilter attaches a SocketFilter BPF program to a socket.
func AttachSocketFilter(conn syscall.Conn, program *ebpf.Program) error {
rawConn, err := conn.SyscallConn()
if err != nil {
return err
}
var ssoErr error
err = rawConn.Control(func(fd uintptr) {
ssoErr = syscall.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_ATTACH_BPF, program.FD())
})
if ssoErr != nil {
return ssoErr
}
return err
}
// DetachSocketFilter detaches a SocketFilter BPF program from a socket.
func DetachSocketFilter(conn syscall.Conn) error {
rawConn, err := conn.SyscallConn()
if err != nil {
return err
}
var ssoErr error
err = rawConn.Control(func(fd uintptr) {
ssoErr = syscall.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_DETACH_BPF, 0)
})
if ssoErr != nil {
return ssoErr
}
return err
}

View file

@ -1,103 +0,0 @@
package link
import (
"errors"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/asm"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/sys"
"github.com/cilium/ebpf/internal/unix"
)
// Type is the kind of link.
type Type = sys.LinkType
// Valid link types.
const (
UnspecifiedType = sys.BPF_LINK_TYPE_UNSPEC
RawTracepointType = sys.BPF_LINK_TYPE_RAW_TRACEPOINT
TracingType = sys.BPF_LINK_TYPE_TRACING
CgroupType = sys.BPF_LINK_TYPE_CGROUP
IterType = sys.BPF_LINK_TYPE_ITER
NetNsType = sys.BPF_LINK_TYPE_NETNS
XDPType = sys.BPF_LINK_TYPE_XDP
PerfEventType = sys.BPF_LINK_TYPE_PERF_EVENT
)
var haveProgAttach = internal.FeatureTest("BPF_PROG_ATTACH", "4.10", func() error {
prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{
Type: ebpf.CGroupSKB,
License: "MIT",
Instructions: asm.Instructions{
asm.Mov.Imm(asm.R0, 0),
asm.Return(),
},
})
if err != nil {
return internal.ErrNotSupported
}
// BPF_PROG_ATTACH was introduced at the same time as CGgroupSKB,
// so being able to load the program is enough to infer that we
// have the syscall.
prog.Close()
return nil
})
var haveProgAttachReplace = internal.FeatureTest("BPF_PROG_ATTACH atomic replacement", "5.5", func() error {
if err := haveProgAttach(); err != nil {
return err
}
prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{
Type: ebpf.CGroupSKB,
AttachType: ebpf.AttachCGroupInetIngress,
License: "MIT",
Instructions: asm.Instructions{
asm.Mov.Imm(asm.R0, 0),
asm.Return(),
},
})
if err != nil {
return internal.ErrNotSupported
}
defer prog.Close()
// We know that we have BPF_PROG_ATTACH since we can load CGroupSKB programs.
// If passing BPF_F_REPLACE gives us EINVAL we know that the feature isn't
// present.
attr := sys.ProgAttachAttr{
// We rely on this being checked after attachFlags.
TargetFd: ^uint32(0),
AttachBpfFd: uint32(prog.FD()),
AttachType: uint32(ebpf.AttachCGroupInetIngress),
AttachFlags: uint32(flagReplace),
}
err = sys.ProgAttach(&attr)
if errors.Is(err, unix.EINVAL) {
return internal.ErrNotSupported
}
if errors.Is(err, unix.EBADF) {
return nil
}
return err
})
var haveBPFLink = internal.FeatureTest("bpf_link", "5.7", func() error {
attr := sys.LinkCreateAttr{
// This is a hopefully invalid file descriptor, which triggers EBADF.
TargetFd: ^uint32(0),
ProgFd: ^uint32(0),
AttachType: sys.AttachType(ebpf.AttachCGroupInetIngress),
}
_, err := sys.LinkCreate(&attr)
if errors.Is(err, unix.EINVAL) {
return internal.ErrNotSupported
}
if errors.Is(err, unix.EBADF) {
return nil
}
return err
})

View file

@ -1,77 +0,0 @@
package link
import (
"fmt"
"github.com/cilium/ebpf"
)
// TracepointOptions defines additional parameters that will be used
// when loading Tracepoints.
type TracepointOptions struct {
// Arbitrary value that can be fetched from an eBPF program
// via `bpf_get_attach_cookie()`.
//
// Needs kernel 5.15+.
Cookie uint64
}
// Tracepoint attaches the given eBPF program to the tracepoint with the given
// group and name. See /sys/kernel/debug/tracing/events to find available
// tracepoints. The top-level directory is the group, the event's subdirectory
// is the name. Example:
//
// tp, err := Tracepoint("syscalls", "sys_enter_fork", prog, nil)
//
// Losing the reference to the resulting Link (tp) will close the Tracepoint
// and prevent further execution of prog. The Link must be Closed during
// program shutdown to avoid leaking system resources.
//
// Note that attaching eBPF programs to syscalls (sys_enter_*/sys_exit_*) is
// only possible as of kernel 4.14 (commit cf5f5ce).
func Tracepoint(group, name string, prog *ebpf.Program, opts *TracepointOptions) (Link, error) {
if group == "" || name == "" {
return nil, fmt.Errorf("group and name cannot be empty: %w", errInvalidInput)
}
if prog == nil {
return nil, fmt.Errorf("prog cannot be nil: %w", errInvalidInput)
}
if !isValidTraceID(group) || !isValidTraceID(name) {
return nil, fmt.Errorf("group and name '%s/%s' must be alphanumeric or underscore: %w", group, name, errInvalidInput)
}
if prog.Type() != ebpf.TracePoint {
return nil, fmt.Errorf("eBPF program type %s is not a Tracepoint: %w", prog.Type(), errInvalidInput)
}
tid, err := getTraceEventID(group, name)
if err != nil {
return nil, err
}
fd, err := openTracepointPerfEvent(tid, perfAllThreads)
if err != nil {
return nil, err
}
var cookie uint64
if opts != nil {
cookie = opts.Cookie
}
pe := &perfEvent{
typ: tracepointEvent,
group: group,
name: name,
tracefsID: tid,
cookie: cookie,
fd: fd,
}
lnk, err := attachPerfEvent(pe, prog)
if err != nil {
pe.Close()
return nil, err
}
return lnk, nil
}

View file

@ -1,141 +0,0 @@
package link
import (
"fmt"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/btf"
"github.com/cilium/ebpf/internal/sys"
)
type tracing struct {
RawLink
}
func (f *tracing) Update(new *ebpf.Program) error {
return fmt.Errorf("tracing update: %w", ErrNotSupported)
}
// AttachFreplace attaches the given eBPF program to the function it replaces.
//
// The program and name can either be provided at link time, or can be provided
// at program load time. If they were provided at load time, they should be nil
// and empty respectively here, as they will be ignored by the kernel.
// Examples:
//
// AttachFreplace(dispatcher, "function", replacement)
// AttachFreplace(nil, "", replacement)
func AttachFreplace(targetProg *ebpf.Program, name string, prog *ebpf.Program) (Link, error) {
if (name == "") != (targetProg == nil) {
return nil, fmt.Errorf("must provide both or neither of name and targetProg: %w", errInvalidInput)
}
if prog == nil {
return nil, fmt.Errorf("prog cannot be nil: %w", errInvalidInput)
}
if prog.Type() != ebpf.Extension {
return nil, fmt.Errorf("eBPF program type %s is not an Extension: %w", prog.Type(), errInvalidInput)
}
var (
target int
typeID btf.TypeID
)
if targetProg != nil {
btfHandle, err := targetProg.Handle()
if err != nil {
return nil, err
}
defer btfHandle.Close()
spec, err := btfHandle.Spec(nil)
if err != nil {
return nil, err
}
var function *btf.Func
if err := spec.TypeByName(name, &function); err != nil {
return nil, err
}
target = targetProg.FD()
typeID, err = spec.TypeID(function)
if err != nil {
return nil, err
}
}
link, err := AttachRawLink(RawLinkOptions{
Target: target,
Program: prog,
Attach: ebpf.AttachNone,
BTF: typeID,
})
if err != nil {
return nil, err
}
return &tracing{*link}, nil
}
type TracingOptions struct {
// Program must be of type Tracing with attach type
// AttachTraceFEntry/AttachTraceFExit/AttachModifyReturn or
// AttachTraceRawTp.
Program *ebpf.Program
}
type LSMOptions struct {
// Program must be of type LSM with attach type
// AttachLSMMac.
Program *ebpf.Program
}
// attachBTFID links all BPF program types (Tracing/LSM) that they attach to a btf_id.
func attachBTFID(program *ebpf.Program) (Link, error) {
if program.FD() < 0 {
return nil, fmt.Errorf("invalid program %w", sys.ErrClosedFd)
}
fd, err := sys.RawTracepointOpen(&sys.RawTracepointOpenAttr{
ProgFd: uint32(program.FD()),
})
if err != nil {
return nil, err
}
raw := RawLink{fd: fd}
info, err := raw.Info()
if err != nil {
raw.Close()
return nil, err
}
if info.Type == RawTracepointType {
// Sadness upon sadness: a Tracing program with AttachRawTp returns
// a raw_tracepoint link. Other types return a tracing link.
return &rawTracepoint{raw}, nil
}
return &tracing{RawLink: RawLink{fd: fd}}, nil
}
// AttachTracing links a tracing (fentry/fexit/fmod_ret) BPF program or
// a BTF-powered raw tracepoint (tp_btf) BPF Program to a BPF hook defined
// in kernel modules.
func AttachTracing(opts TracingOptions) (Link, error) {
if t := opts.Program.Type(); t != ebpf.Tracing {
return nil, fmt.Errorf("invalid program type %s, expected Tracing", t)
}
return attachBTFID(opts.Program)
}
// AttachLSM links a Linux security module (LSM) BPF Program to a BPF
// hook defined in kernel modules.
func AttachLSM(opts LSMOptions) (Link, error) {
if t := opts.Program.Type(); t != ebpf.LSM {
return nil, fmt.Errorf("invalid program type %s, expected LSM", t)
}
return attachBTFID(opts.Program)
}

View file

@ -1,373 +0,0 @@
package link
import (
"debug/elf"
"errors"
"fmt"
"os"
"path/filepath"
"strings"
"sync"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/internal"
)
var (
uprobeEventsPath = filepath.Join(tracefsPath, "uprobe_events")
uprobeRetprobeBit = struct {
once sync.Once
value uint64
err error
}{}
uprobeRefCtrOffsetPMUPath = "/sys/bus/event_source/devices/uprobe/format/ref_ctr_offset"
// elixir.bootlin.com/linux/v5.15-rc7/source/kernel/events/core.c#L9799
uprobeRefCtrOffsetShift = 32
haveRefCtrOffsetPMU = internal.FeatureTest("RefCtrOffsetPMU", "4.20", func() error {
_, err := os.Stat(uprobeRefCtrOffsetPMUPath)
if err != nil {
return internal.ErrNotSupported
}
return nil
})
// ErrNoSymbol indicates that the given symbol was not found
// in the ELF symbols table.
ErrNoSymbol = errors.New("not found")
)
// Executable defines an executable program on the filesystem.
type Executable struct {
// Path of the executable on the filesystem.
path string
// Parsed ELF and dynamic symbols' addresses.
addresses map[string]uint64
}
// UprobeOptions defines additional parameters that will be used
// when loading Uprobes.
type UprobeOptions struct {
// Symbol address. Must be provided in case of external symbols (shared libs).
// If set, overrides the address eventually parsed from the executable.
Address uint64
// The offset relative to given symbol. Useful when tracing an arbitrary point
// inside the frame of given symbol.
//
// Note: this field changed from being an absolute offset to being relative
// to Address.
Offset uint64
// Only set the uprobe on the given process ID. Useful when tracing
// shared library calls or programs that have many running instances.
PID int
// Automatically manage SDT reference counts (semaphores).
//
// If this field is set, the Kernel will increment/decrement the
// semaphore located in the process memory at the provided address on
// probe attach/detach.
//
// See also:
// sourceware.org/systemtap/wiki/UserSpaceProbeImplementation (Semaphore Handling)
// github.com/torvalds/linux/commit/1cc33161a83d
// github.com/torvalds/linux/commit/a6ca88b241d5
RefCtrOffset uint64
// Arbitrary value that can be fetched from an eBPF program
// via `bpf_get_attach_cookie()`.
//
// Needs kernel 5.15+.
Cookie uint64
}
// To open a new Executable, use:
//
// OpenExecutable("/bin/bash")
//
// The returned value can then be used to open Uprobe(s).
func OpenExecutable(path string) (*Executable, error) {
if path == "" {
return nil, fmt.Errorf("path cannot be empty")
}
f, err := os.Open(path)
if err != nil {
return nil, fmt.Errorf("open file '%s': %w", path, err)
}
defer f.Close()
se, err := internal.NewSafeELFFile(f)
if err != nil {
return nil, fmt.Errorf("parse ELF file: %w", err)
}
if se.Type != elf.ET_EXEC && se.Type != elf.ET_DYN {
// ELF is not an executable or a shared object.
return nil, errors.New("the given file is not an executable or a shared object")
}
ex := Executable{
path: path,
addresses: make(map[string]uint64),
}
if err := ex.load(se); err != nil {
return nil, err
}
return &ex, nil
}
func (ex *Executable) load(f *internal.SafeELFFile) error {
syms, err := f.Symbols()
if err != nil && !errors.Is(err, elf.ErrNoSymbols) {
return err
}
dynsyms, err := f.DynamicSymbols()
if err != nil && !errors.Is(err, elf.ErrNoSymbols) {
return err
}
syms = append(syms, dynsyms...)
for _, s := range syms {
if elf.ST_TYPE(s.Info) != elf.STT_FUNC {
// Symbol not associated with a function or other executable code.
continue
}
address := s.Value
// Loop over ELF segments.
for _, prog := range f.Progs {
// Skip uninteresting segments.
if prog.Type != elf.PT_LOAD || (prog.Flags&elf.PF_X) == 0 {
continue
}
if prog.Vaddr <= s.Value && s.Value < (prog.Vaddr+prog.Memsz) {
// If the symbol value is contained in the segment, calculate
// the symbol offset.
//
// fn symbol offset = fn symbol VA - .text VA + .text offset
//
// stackoverflow.com/a/40249502
address = s.Value - prog.Vaddr + prog.Off
break
}
}
ex.addresses[s.Name] = address
}
return nil
}
// address calculates the address of a symbol in the executable.
//
// opts must not be nil.
func (ex *Executable) address(symbol string, opts *UprobeOptions) (uint64, error) {
if opts.Address > 0 {
return opts.Address + opts.Offset, nil
}
address, ok := ex.addresses[symbol]
if !ok {
return 0, fmt.Errorf("symbol %s: %w", symbol, ErrNoSymbol)
}
// Symbols with location 0 from section undef are shared library calls and
// are relocated before the binary is executed. Dynamic linking is not
// implemented by the library, so mark this as unsupported for now.
//
// Since only offset values are stored and not elf.Symbol, if the value is 0,
// assume it's an external symbol.
if address == 0 {
return 0, fmt.Errorf("cannot resolve %s library call '%s': %w "+
"(consider providing UprobeOptions.Address)", ex.path, symbol, ErrNotSupported)
}
return address + opts.Offset, nil
}
// Uprobe attaches the given eBPF program to a perf event that fires when the
// given symbol starts executing in the given Executable.
// For example, /bin/bash::main():
//
// ex, _ = OpenExecutable("/bin/bash")
// ex.Uprobe("main", prog, nil)
//
// When using symbols which belongs to shared libraries,
// an offset must be provided via options:
//
// up, err := ex.Uprobe("main", prog, &UprobeOptions{Offset: 0x123})
//
// Note: Setting the Offset field in the options supersedes the symbol's offset.
//
// Losing the reference to the resulting Link (up) will close the Uprobe
// and prevent further execution of prog. The Link must be Closed during
// program shutdown to avoid leaking system resources.
//
// Functions provided by shared libraries can currently not be traced and
// will result in an ErrNotSupported.
func (ex *Executable) Uprobe(symbol string, prog *ebpf.Program, opts *UprobeOptions) (Link, error) {
u, err := ex.uprobe(symbol, prog, opts, false)
if err != nil {
return nil, err
}
lnk, err := attachPerfEvent(u, prog)
if err != nil {
u.Close()
return nil, err
}
return lnk, nil
}
// Uretprobe attaches the given eBPF program to a perf event that fires right
// before the given symbol exits. For example, /bin/bash::main():
//
// ex, _ = OpenExecutable("/bin/bash")
// ex.Uretprobe("main", prog, nil)
//
// When using symbols which belongs to shared libraries,
// an offset must be provided via options:
//
// up, err := ex.Uretprobe("main", prog, &UprobeOptions{Offset: 0x123})
//
// Note: Setting the Offset field in the options supersedes the symbol's offset.
//
// Losing the reference to the resulting Link (up) will close the Uprobe
// and prevent further execution of prog. The Link must be Closed during
// program shutdown to avoid leaking system resources.
//
// Functions provided by shared libraries can currently not be traced and
// will result in an ErrNotSupported.
func (ex *Executable) Uretprobe(symbol string, prog *ebpf.Program, opts *UprobeOptions) (Link, error) {
u, err := ex.uprobe(symbol, prog, opts, true)
if err != nil {
return nil, err
}
lnk, err := attachPerfEvent(u, prog)
if err != nil {
u.Close()
return nil, err
}
return lnk, nil
}
// uprobe opens a perf event for the given binary/symbol and attaches prog to it.
// If ret is true, create a uretprobe.
func (ex *Executable) uprobe(symbol string, prog *ebpf.Program, opts *UprobeOptions, ret bool) (*perfEvent, error) {
if prog == nil {
return nil, fmt.Errorf("prog cannot be nil: %w", errInvalidInput)
}
if prog.Type() != ebpf.Kprobe {
return nil, fmt.Errorf("eBPF program type %s is not Kprobe: %w", prog.Type(), errInvalidInput)
}
if opts == nil {
opts = &UprobeOptions{}
}
offset, err := ex.address(symbol, opts)
if err != nil {
return nil, err
}
pid := opts.PID
if pid == 0 {
pid = perfAllThreads
}
if opts.RefCtrOffset != 0 {
if err := haveRefCtrOffsetPMU(); err != nil {
return nil, fmt.Errorf("uprobe ref_ctr_offset: %w", err)
}
}
args := probeArgs{
symbol: symbol,
path: ex.path,
offset: offset,
pid: pid,
refCtrOffset: opts.RefCtrOffset,
ret: ret,
cookie: opts.Cookie,
}
// Use uprobe PMU if the kernel has it available.
tp, err := pmuUprobe(args)
if err == nil {
return tp, nil
}
if err != nil && !errors.Is(err, ErrNotSupported) {
return nil, fmt.Errorf("creating perf_uprobe PMU: %w", err)
}
// Use tracefs if uprobe PMU is missing.
args.symbol = sanitizeSymbol(symbol)
tp, err = tracefsUprobe(args)
if err != nil {
return nil, fmt.Errorf("creating trace event '%s:%s' in tracefs: %w", ex.path, symbol, err)
}
return tp, nil
}
// pmuUprobe opens a perf event based on the uprobe PMU.
func pmuUprobe(args probeArgs) (*perfEvent, error) {
return pmuProbe(uprobeType, args)
}
// tracefsUprobe creates a Uprobe tracefs entry.
func tracefsUprobe(args probeArgs) (*perfEvent, error) {
return tracefsProbe(uprobeType, args)
}
// sanitizeSymbol replaces every invalid character for the tracefs api with an underscore.
// It is equivalent to calling regexp.MustCompile("[^a-zA-Z0-9]+").ReplaceAllString("_").
func sanitizeSymbol(s string) string {
var b strings.Builder
b.Grow(len(s))
var skip bool
for _, c := range []byte(s) {
switch {
case c >= 'a' && c <= 'z',
c >= 'A' && c <= 'Z',
c >= '0' && c <= '9':
skip = false
b.WriteByte(c)
default:
if !skip {
b.WriteByte('_')
skip = true
}
}
}
return b.String()
}
// uprobeToken creates the PATH:OFFSET(REF_CTR_OFFSET) token for the tracefs api.
func uprobeToken(args probeArgs) string {
po := fmt.Sprintf("%s:%#x", args.path, args.offset)
if args.refCtrOffset != 0 {
// This is not documented in Documentation/trace/uprobetracer.txt.
// elixir.bootlin.com/linux/v5.15-rc7/source/kernel/trace/trace.c#L5564
po += fmt.Sprintf("(%#x)", args.refCtrOffset)
}
return po
}
func uretprobeBit() (uint64, error) {
uprobeRetprobeBit.once.Do(func() {
uprobeRetprobeBit.value, uprobeRetprobeBit.err = determineRetprobeBit(uprobeType)
})
return uprobeRetprobeBit.value, uprobeRetprobeBit.err
}

View file

@ -1,54 +0,0 @@
package link
import (
"fmt"
"github.com/cilium/ebpf"
)
// XDPAttachFlags represents how XDP program will be attached to interface.
type XDPAttachFlags uint32
const (
// XDPGenericMode (SKB) links XDP BPF program for drivers which do
// not yet support native XDP.
XDPGenericMode XDPAttachFlags = 1 << (iota + 1)
// XDPDriverMode links XDP BPF program into the drivers receive path.
XDPDriverMode
// XDPOffloadMode offloads the entire XDP BPF program into hardware.
XDPOffloadMode
)
type XDPOptions struct {
// Program must be an XDP BPF program.
Program *ebpf.Program
// Interface is the interface index to attach program to.
Interface int
// Flags is one of XDPAttachFlags (optional).
//
// Only one XDP mode should be set, without flag defaults
// to driver/generic mode (best effort).
Flags XDPAttachFlags
}
// AttachXDP links an XDP BPF program to an XDP hook.
func AttachXDP(opts XDPOptions) (Link, error) {
if t := opts.Program.Type(); t != ebpf.XDP {
return nil, fmt.Errorf("invalid program type %s, expected XDP", t)
}
if opts.Interface < 1 {
return nil, fmt.Errorf("invalid interface index: %d", opts.Interface)
}
rawLink, err := AttachRawLink(RawLinkOptions{
Program: opts.Program,
Attach: ebpf.AttachXDP,
Target: opts.Interface,
Flags: uint32(opts.Flags),
})
return rawLink, err
}

View file

@ -1,238 +0,0 @@
package ebpf
import (
"errors"
"fmt"
"sync"
"github.com/cilium/ebpf/asm"
"github.com/cilium/ebpf/btf"
)
// splitSymbols splits insns into subsections delimited by Symbol Instructions.
// insns cannot be empty and must start with a Symbol Instruction.
//
// The resulting map is indexed by Symbol name.
func splitSymbols(insns asm.Instructions) (map[string]asm.Instructions, error) {
if len(insns) == 0 {
return nil, errors.New("insns is empty")
}
if insns[0].Symbol() == "" {
return nil, errors.New("insns must start with a Symbol")
}
var name string
progs := make(map[string]asm.Instructions)
for _, ins := range insns {
if sym := ins.Symbol(); sym != "" {
if progs[sym] != nil {
return nil, fmt.Errorf("insns contains duplicate Symbol %s", sym)
}
name = sym
}
progs[name] = append(progs[name], ins)
}
return progs, nil
}
// The linker is responsible for resolving bpf-to-bpf calls between programs
// within an ELF. Each BPF program must be a self-contained binary blob,
// so when an instruction in one ELF program section wants to jump to
// a function in another, the linker needs to pull in the bytecode
// (and BTF info) of the target function and concatenate the instruction
// streams.
//
// Later on in the pipeline, all call sites are fixed up with relative jumps
// within this newly-created instruction stream to then finally hand off to
// the kernel with BPF_PROG_LOAD.
//
// Each function is denoted by an ELF symbol and the compiler takes care of
// register setup before each jump instruction.
// hasFunctionReferences returns true if insns contains one or more bpf2bpf
// function references.
func hasFunctionReferences(insns asm.Instructions) bool {
for _, i := range insns {
if i.IsFunctionReference() {
return true
}
}
return false
}
// applyRelocations collects and applies any CO-RE relocations in insns.
//
// Passing a nil target will relocate against the running kernel. insns are
// modified in place.
func applyRelocations(insns asm.Instructions, local, target *btf.Spec) error {
var relos []*btf.CORERelocation
var reloInsns []*asm.Instruction
iter := insns.Iterate()
for iter.Next() {
if relo := btf.CORERelocationMetadata(iter.Ins); relo != nil {
relos = append(relos, relo)
reloInsns = append(reloInsns, iter.Ins)
}
}
if len(relos) == 0 {
return nil
}
target, err := maybeLoadKernelBTF(target)
if err != nil {
return err
}
fixups, err := btf.CORERelocate(local, target, relos)
if err != nil {
return err
}
for i, fixup := range fixups {
if err := fixup.Apply(reloInsns[i]); err != nil {
return fmt.Errorf("apply fixup %s: %w", &fixup, err)
}
}
return nil
}
// flattenPrograms resolves bpf-to-bpf calls for a set of programs.
//
// Links all programs in names by modifying their ProgramSpec in progs.
func flattenPrograms(progs map[string]*ProgramSpec, names []string) {
// Pre-calculate all function references.
refs := make(map[*ProgramSpec][]string)
for _, prog := range progs {
refs[prog] = prog.Instructions.FunctionReferences()
}
// Create a flattened instruction stream, but don't modify progs yet to
// avoid linking multiple times.
flattened := make([]asm.Instructions, 0, len(names))
for _, name := range names {
flattened = append(flattened, flattenInstructions(name, progs, refs))
}
// Finally, assign the flattened instructions.
for i, name := range names {
progs[name].Instructions = flattened[i]
}
}
// flattenInstructions resolves bpf-to-bpf calls for a single program.
//
// Flattens the instructions of prog by concatenating the instructions of all
// direct and indirect dependencies.
//
// progs contains all referenceable programs, while refs contain the direct
// dependencies of each program.
func flattenInstructions(name string, progs map[string]*ProgramSpec, refs map[*ProgramSpec][]string) asm.Instructions {
prog := progs[name]
insns := make(asm.Instructions, len(prog.Instructions))
copy(insns, prog.Instructions)
// Add all direct references of prog to the list of to be linked programs.
pending := make([]string, len(refs[prog]))
copy(pending, refs[prog])
// All references for which we've appended instructions.
linked := make(map[string]bool)
// Iterate all pending references. We can't use a range since pending is
// modified in the body below.
for len(pending) > 0 {
var ref string
ref, pending = pending[0], pending[1:]
if linked[ref] {
// We've already linked this ref, don't append instructions again.
continue
}
progRef := progs[ref]
if progRef == nil {
// We don't have instructions that go with this reference. This
// happens when calling extern functions.
continue
}
insns = append(insns, progRef.Instructions...)
linked[ref] = true
// Make sure we link indirect references.
pending = append(pending, refs[progRef]...)
}
return insns
}
// fixupAndValidate is called by the ELF reader right before marshaling the
// instruction stream. It performs last-minute adjustments to the program and
// runs some sanity checks before sending it off to the kernel.
func fixupAndValidate(insns asm.Instructions) error {
iter := insns.Iterate()
for iter.Next() {
ins := iter.Ins
// Map load was tagged with a Reference, but does not contain a Map pointer.
if ins.IsLoadFromMap() && ins.Reference() != "" && ins.Map() == nil {
return fmt.Errorf("instruction %d: map %s: %w", iter.Index, ins.Reference(), asm.ErrUnsatisfiedMapReference)
}
fixupProbeReadKernel(ins)
}
return nil
}
// fixupProbeReadKernel replaces calls to bpf_probe_read_{kernel,user}(_str)
// with bpf_probe_read(_str) on kernels that don't support it yet.
func fixupProbeReadKernel(ins *asm.Instruction) {
if !ins.IsBuiltinCall() {
return
}
// Kernel supports bpf_probe_read_kernel, nothing to do.
if haveProbeReadKernel() == nil {
return
}
switch asm.BuiltinFunc(ins.Constant) {
case asm.FnProbeReadKernel, asm.FnProbeReadUser:
ins.Constant = int64(asm.FnProbeRead)
case asm.FnProbeReadKernelStr, asm.FnProbeReadUserStr:
ins.Constant = int64(asm.FnProbeReadStr)
}
}
var kernelBTF struct {
sync.Mutex
spec *btf.Spec
}
// maybeLoadKernelBTF loads the current kernel's BTF if spec is nil, otherwise
// it returns spec unchanged.
//
// The kernel BTF is cached for the lifetime of the process.
func maybeLoadKernelBTF(spec *btf.Spec) (*btf.Spec, error) {
if spec != nil {
return spec, nil
}
kernelBTF.Lock()
defer kernelBTF.Unlock()
if kernelBTF.spec != nil {
return kernelBTF.spec, nil
}
var err error
kernelBTF.spec, err = btf.LoadKernelSpec()
return kernelBTF.spec, err
}

1424
vendor/github.com/cilium/ebpf/map.go generated vendored

File diff suppressed because it is too large Load diff

View file

@ -1,247 +0,0 @@
package ebpf
import (
"bytes"
"encoding"
"encoding/binary"
"errors"
"fmt"
"reflect"
"runtime"
"sync"
"unsafe"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/sys"
)
// marshalPtr converts an arbitrary value into a pointer suitable
// to be passed to the kernel.
//
// As an optimization, it returns the original value if it is an
// unsafe.Pointer.
func marshalPtr(data interface{}, length int) (sys.Pointer, error) {
if ptr, ok := data.(unsafe.Pointer); ok {
return sys.NewPointer(ptr), nil
}
buf, err := marshalBytes(data, length)
if err != nil {
return sys.Pointer{}, err
}
return sys.NewSlicePointer(buf), nil
}
// marshalBytes converts an arbitrary value into a byte buffer.
//
// Prefer using Map.marshalKey and Map.marshalValue if possible, since
// those have special cases that allow more types to be encoded.
//
// Returns an error if the given value isn't representable in exactly
// length bytes.
func marshalBytes(data interface{}, length int) (buf []byte, err error) {
if data == nil {
return nil, errors.New("can't marshal a nil value")
}
switch value := data.(type) {
case encoding.BinaryMarshaler:
buf, err = value.MarshalBinary()
case string:
buf = []byte(value)
case []byte:
buf = value
case unsafe.Pointer:
err = errors.New("can't marshal from unsafe.Pointer")
case Map, *Map, Program, *Program:
err = fmt.Errorf("can't marshal %T", value)
default:
var wr bytes.Buffer
err = binary.Write(&wr, internal.NativeEndian, value)
if err != nil {
err = fmt.Errorf("encoding %T: %v", value, err)
}
buf = wr.Bytes()
}
if err != nil {
return nil, err
}
if len(buf) != length {
return nil, fmt.Errorf("%T doesn't marshal to %d bytes", data, length)
}
return buf, nil
}
func makeBuffer(dst interface{}, length int) (sys.Pointer, []byte) {
if ptr, ok := dst.(unsafe.Pointer); ok {
return sys.NewPointer(ptr), nil
}
buf := make([]byte, length)
return sys.NewSlicePointer(buf), buf
}
var bytesReaderPool = sync.Pool{
New: func() interface{} {
return new(bytes.Reader)
},
}
// unmarshalBytes converts a byte buffer into an arbitrary value.
//
// Prefer using Map.unmarshalKey and Map.unmarshalValue if possible, since
// those have special cases that allow more types to be encoded.
//
// The common int32 and int64 types are directly handled to avoid
// unnecessary heap allocations as happening in the default case.
func unmarshalBytes(data interface{}, buf []byte) error {
switch value := data.(type) {
case unsafe.Pointer:
dst := unsafe.Slice((*byte)(value), len(buf))
copy(dst, buf)
runtime.KeepAlive(value)
return nil
case Map, *Map, Program, *Program:
return fmt.Errorf("can't unmarshal into %T", value)
case encoding.BinaryUnmarshaler:
return value.UnmarshalBinary(buf)
case *string:
*value = string(buf)
return nil
case *[]byte:
*value = buf
return nil
case *int32:
if len(buf) < 4 {
return errors.New("int32 requires 4 bytes")
}
*value = int32(internal.NativeEndian.Uint32(buf))
return nil
case *uint32:
if len(buf) < 4 {
return errors.New("uint32 requires 4 bytes")
}
*value = internal.NativeEndian.Uint32(buf)
return nil
case *int64:
if len(buf) < 8 {
return errors.New("int64 requires 8 bytes")
}
*value = int64(internal.NativeEndian.Uint64(buf))
return nil
case *uint64:
if len(buf) < 8 {
return errors.New("uint64 requires 8 bytes")
}
*value = internal.NativeEndian.Uint64(buf)
return nil
case string:
return errors.New("require pointer to string")
case []byte:
return errors.New("require pointer to []byte")
default:
rd := bytesReaderPool.Get().(*bytes.Reader)
rd.Reset(buf)
defer bytesReaderPool.Put(rd)
if err := binary.Read(rd, internal.NativeEndian, value); err != nil {
return fmt.Errorf("decoding %T: %v", value, err)
}
return nil
}
}
// marshalPerCPUValue encodes a slice containing one value per
// possible CPU into a buffer of bytes.
//
// Values are initialized to zero if the slice has less elements than CPUs.
//
// slice must have a type like []elementType.
func marshalPerCPUValue(slice interface{}, elemLength int) (sys.Pointer, error) {
sliceType := reflect.TypeOf(slice)
if sliceType.Kind() != reflect.Slice {
return sys.Pointer{}, errors.New("per-CPU value requires slice")
}
possibleCPUs, err := internal.PossibleCPUs()
if err != nil {
return sys.Pointer{}, err
}
sliceValue := reflect.ValueOf(slice)
sliceLen := sliceValue.Len()
if sliceLen > possibleCPUs {
return sys.Pointer{}, fmt.Errorf("per-CPU value exceeds number of CPUs")
}
alignedElemLength := internal.Align(elemLength, 8)
buf := make([]byte, alignedElemLength*possibleCPUs)
for i := 0; i < sliceLen; i++ {
elem := sliceValue.Index(i).Interface()
elemBytes, err := marshalBytes(elem, elemLength)
if err != nil {
return sys.Pointer{}, err
}
offset := i * alignedElemLength
copy(buf[offset:offset+elemLength], elemBytes)
}
return sys.NewSlicePointer(buf), nil
}
// unmarshalPerCPUValue decodes a buffer into a slice containing one value per
// possible CPU.
//
// valueOut must have a type like *[]elementType
func unmarshalPerCPUValue(slicePtr interface{}, elemLength int, buf []byte) error {
slicePtrType := reflect.TypeOf(slicePtr)
if slicePtrType.Kind() != reflect.Ptr || slicePtrType.Elem().Kind() != reflect.Slice {
return fmt.Errorf("per-cpu value requires pointer to slice")
}
possibleCPUs, err := internal.PossibleCPUs()
if err != nil {
return err
}
sliceType := slicePtrType.Elem()
slice := reflect.MakeSlice(sliceType, possibleCPUs, possibleCPUs)
sliceElemType := sliceType.Elem()
sliceElemIsPointer := sliceElemType.Kind() == reflect.Ptr
if sliceElemIsPointer {
sliceElemType = sliceElemType.Elem()
}
step := len(buf) / possibleCPUs
if step < elemLength {
return fmt.Errorf("per-cpu element length is larger than available data")
}
for i := 0; i < possibleCPUs; i++ {
var elem interface{}
if sliceElemIsPointer {
newElem := reflect.New(sliceElemType)
slice.Index(i).Set(newElem)
elem = newElem.Interface()
} else {
elem = slice.Index(i).Addr().Interface()
}
// Make a copy, since unmarshal can hold on to itemBytes
elemBytes := make([]byte, elemLength)
copy(elemBytes, buf[:elemLength])
err := unmarshalBytes(elem, elemBytes)
if err != nil {
return fmt.Errorf("cpu %d: %w", i, err)
}
buf = buf[step:]
}
reflect.ValueOf(slicePtr).Elem().Set(slice)
return nil
}

875
vendor/github.com/cilium/ebpf/prog.go generated vendored
View file

@ -1,875 +0,0 @@
package ebpf
import (
"bytes"
"encoding/binary"
"errors"
"fmt"
"math"
"path/filepath"
"runtime"
"strings"
"time"
"github.com/cilium/ebpf/asm"
"github.com/cilium/ebpf/btf"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/sys"
"github.com/cilium/ebpf/internal/unix"
)
// ErrNotSupported is returned whenever the kernel doesn't support a feature.
var ErrNotSupported = internal.ErrNotSupported
// ProgramID represents the unique ID of an eBPF program.
type ProgramID uint32
const (
// Number of bytes to pad the output buffer for BPF_PROG_TEST_RUN.
// This is currently the maximum of spare space allocated for SKB
// and XDP programs, and equal to XDP_PACKET_HEADROOM + NET_IP_ALIGN.
outputPad = 256 + 2
)
// DefaultVerifierLogSize is the default number of bytes allocated for the
// verifier log.
const DefaultVerifierLogSize = 64 * 1024
// ProgramOptions control loading a program into the kernel.
type ProgramOptions struct {
// Controls the detail emitted by the kernel verifier. Set to non-zero
// to enable logging.
LogLevel uint32
// Controls the output buffer size for the verifier. Defaults to
// DefaultVerifierLogSize.
LogSize int
// Type information used for CO-RE relocations and when attaching to
// kernel functions.
//
// This is useful in environments where the kernel BTF is not available
// (containers) or where it is in a non-standard location. Defaults to
// use the kernel BTF from a well-known location if nil.
KernelTypes *btf.Spec
}
// ProgramSpec defines a Program.
type ProgramSpec struct {
// Name is passed to the kernel as a debug aid. Must only contain
// alpha numeric and '_' characters.
Name string
// Type determines at which hook in the kernel a program will run.
Type ProgramType
// AttachType of the program, needed to differentiate allowed context
// accesses in some newer program types like CGroupSockAddr.
//
// Available on kernels 4.17 and later.
AttachType AttachType
// Name of a kernel data structure or function to attach to. Its
// interpretation depends on Type and AttachType.
AttachTo string
// The program to attach to. Must be provided manually.
AttachTarget *Program
// The name of the ELF section this program orininated from.
SectionName string
Instructions asm.Instructions
// Flags is passed to the kernel and specifies additional program
// load attributes.
Flags uint32
// License of the program. Some helpers are only available if
// the license is deemed compatible with the GPL.
//
// See https://www.kernel.org/doc/html/latest/process/license-rules.html#id1
License string
// Version used by Kprobe programs.
//
// Deprecated on kernels 5.0 and later. Leave empty to let the library
// detect this value automatically.
KernelVersion uint32
// The BTF associated with this program. Changing Instructions
// will most likely invalidate the contained data, and may
// result in errors when attempting to load it into the kernel.
BTF *btf.Spec
// The byte order this program was compiled for, may be nil.
ByteOrder binary.ByteOrder
}
// Copy returns a copy of the spec.
func (ps *ProgramSpec) Copy() *ProgramSpec {
if ps == nil {
return nil
}
cpy := *ps
cpy.Instructions = make(asm.Instructions, len(ps.Instructions))
copy(cpy.Instructions, ps.Instructions)
return &cpy
}
// Tag calculates the kernel tag for a series of instructions.
//
// Use asm.Instructions.Tag if you need to calculate for non-native endianness.
func (ps *ProgramSpec) Tag() (string, error) {
return ps.Instructions.Tag(internal.NativeEndian)
}
type VerifierError = internal.VerifierError
// Program represents BPF program loaded into the kernel.
//
// It is not safe to close a Program which is used by other goroutines.
type Program struct {
// Contains the output of the kernel verifier if enabled,
// otherwise it is empty.
VerifierLog string
fd *sys.FD
name string
pinnedPath string
typ ProgramType
}
// NewProgram creates a new Program.
//
// See NewProgramWithOptions for details.
func NewProgram(spec *ProgramSpec) (*Program, error) {
return NewProgramWithOptions(spec, ProgramOptions{})
}
// NewProgramWithOptions creates a new Program.
//
// Loading a program for the first time will perform
// feature detection by loading small, temporary programs.
//
// Returns an error wrapping VerifierError if the program or its BTF is rejected
// by the kernel.
func NewProgramWithOptions(spec *ProgramSpec, opts ProgramOptions) (*Program, error) {
if spec == nil {
return nil, errors.New("can't load a program from a nil spec")
}
handles := newHandleCache()
defer handles.close()
prog, err := newProgramWithOptions(spec, opts, handles)
if errors.Is(err, asm.ErrUnsatisfiedMapReference) {
return nil, fmt.Errorf("cannot load program without loading its whole collection: %w", err)
}
return prog, err
}
func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions, handles *handleCache) (*Program, error) {
if len(spec.Instructions) == 0 {
return nil, errors.New("instructions cannot be empty")
}
if spec.Type == UnspecifiedProgram {
return nil, errors.New("can't load program of unspecified type")
}
if spec.ByteOrder != nil && spec.ByteOrder != internal.NativeEndian {
return nil, fmt.Errorf("can't load %s program on %s", spec.ByteOrder, internal.NativeEndian)
}
// Kernels before 5.0 (6c4fc209fcf9 "bpf: remove useless version check for prog load")
// require the version field to be set to the value of the KERNEL_VERSION
// macro for kprobe-type programs.
// Overwrite Kprobe program version if set to zero or the magic version constant.
kv := spec.KernelVersion
if spec.Type == Kprobe && (kv == 0 || kv == internal.MagicKernelVersion) {
v, err := internal.KernelVersion()
if err != nil {
return nil, fmt.Errorf("detecting kernel version: %w", err)
}
kv = v.Kernel()
}
attr := &sys.ProgLoadAttr{
ProgType: sys.ProgType(spec.Type),
ProgFlags: spec.Flags,
ExpectedAttachType: sys.AttachType(spec.AttachType),
License: sys.NewStringPointer(spec.License),
KernVersion: kv,
}
if haveObjName() == nil {
attr.ProgName = sys.NewObjName(spec.Name)
}
kernelTypes := opts.KernelTypes
insns := make(asm.Instructions, len(spec.Instructions))
copy(insns, spec.Instructions)
var btfDisabled bool
if spec.BTF != nil {
if err := applyRelocations(insns, spec.BTF, kernelTypes); err != nil {
return nil, fmt.Errorf("apply CO-RE relocations: %w", err)
}
handle, err := handles.btfHandle(spec.BTF)
btfDisabled = errors.Is(err, btf.ErrNotSupported)
if err != nil && !btfDisabled {
return nil, fmt.Errorf("load BTF: %w", err)
}
if handle != nil {
attr.ProgBtfFd = uint32(handle.FD())
fib, lib, err := btf.MarshalExtInfos(insns, spec.BTF.TypeID)
if err != nil {
return nil, err
}
attr.FuncInfoRecSize = btf.FuncInfoSize
attr.FuncInfoCnt = uint32(len(fib)) / btf.FuncInfoSize
attr.FuncInfo = sys.NewSlicePointer(fib)
attr.LineInfoRecSize = btf.LineInfoSize
attr.LineInfoCnt = uint32(len(lib)) / btf.LineInfoSize
attr.LineInfo = sys.NewSlicePointer(lib)
}
}
if err := fixupAndValidate(insns); err != nil {
return nil, err
}
buf := bytes.NewBuffer(make([]byte, 0, insns.Size()))
err := insns.Marshal(buf, internal.NativeEndian)
if err != nil {
return nil, err
}
bytecode := buf.Bytes()
attr.Insns = sys.NewSlicePointer(bytecode)
attr.InsnCnt = uint32(len(bytecode) / asm.InstructionSize)
if spec.AttachTarget != nil {
targetID, err := findTargetInProgram(spec.AttachTarget, spec.AttachTo, spec.Type, spec.AttachType)
if err != nil {
return nil, fmt.Errorf("attach %s/%s: %w", spec.Type, spec.AttachType, err)
}
attr.AttachBtfId = uint32(targetID)
attr.AttachProgFd = uint32(spec.AttachTarget.FD())
defer runtime.KeepAlive(spec.AttachTarget)
} else if spec.AttachTo != "" {
targetID, err := findTargetInKernel(kernelTypes, spec.AttachTo, spec.Type, spec.AttachType)
if err != nil && !errors.Is(err, errUnrecognizedAttachType) {
// We ignore errUnrecognizedAttachType since AttachTo may be non-empty
// for programs that don't attach anywhere.
return nil, fmt.Errorf("attach %s/%s: %w", spec.Type, spec.AttachType, err)
}
attr.AttachBtfId = uint32(targetID)
}
logSize := DefaultVerifierLogSize
if opts.LogSize > 0 {
logSize = opts.LogSize
}
var logBuf []byte
if opts.LogLevel > 0 {
logBuf = make([]byte, logSize)
attr.LogLevel = opts.LogLevel
attr.LogSize = uint32(len(logBuf))
attr.LogBuf = sys.NewSlicePointer(logBuf)
}
fd, err := sys.ProgLoad(attr)
if err == nil {
return &Program{unix.ByteSliceToString(logBuf), fd, spec.Name, "", spec.Type}, nil
}
if opts.LogLevel == 0 && opts.LogSize >= 0 {
// Re-run with the verifier enabled to get better error messages.
logBuf = make([]byte, logSize)
attr.LogLevel = 1
attr.LogSize = uint32(len(logBuf))
attr.LogBuf = sys.NewSlicePointer(logBuf)
_, _ = sys.ProgLoad(attr)
}
switch {
case errors.Is(err, unix.EPERM):
if len(logBuf) > 0 && logBuf[0] == 0 {
// EPERM due to RLIMIT_MEMLOCK happens before the verifier, so we can
// check that the log is empty to reduce false positives.
return nil, fmt.Errorf("load program: %w (MEMLOCK may be too low, consider rlimit.RemoveMemlock)", err)
}
fallthrough
case errors.Is(err, unix.EINVAL):
if hasFunctionReferences(spec.Instructions) {
if err := haveBPFToBPFCalls(); err != nil {
return nil, fmt.Errorf("load program: %w", err)
}
}
}
err = internal.ErrorWithLog(err, logBuf)
if btfDisabled {
return nil, fmt.Errorf("load program: %w (BTF disabled)", err)
}
return nil, fmt.Errorf("load program: %w", err)
}
// NewProgramFromFD creates a program from a raw fd.
//
// You should not use fd after calling this function.
//
// Requires at least Linux 4.10.
func NewProgramFromFD(fd int) (*Program, error) {
f, err := sys.NewFD(fd)
if err != nil {
return nil, err
}
return newProgramFromFD(f)
}
// NewProgramFromID returns the program for a given id.
//
// Returns ErrNotExist, if there is no eBPF program with the given id.
func NewProgramFromID(id ProgramID) (*Program, error) {
fd, err := sys.ProgGetFdById(&sys.ProgGetFdByIdAttr{
Id: uint32(id),
})
if err != nil {
return nil, fmt.Errorf("get program by id: %w", err)
}
return newProgramFromFD(fd)
}
func newProgramFromFD(fd *sys.FD) (*Program, error) {
info, err := newProgramInfoFromFd(fd)
if err != nil {
fd.Close()
return nil, fmt.Errorf("discover program type: %w", err)
}
return &Program{"", fd, "", "", info.Type}, nil
}
func (p *Program) String() string {
if p.name != "" {
return fmt.Sprintf("%s(%s)#%v", p.typ, p.name, p.fd)
}
return fmt.Sprintf("%s(%v)", p.typ, p.fd)
}
// Type returns the underlying type of the program.
func (p *Program) Type() ProgramType {
return p.typ
}
// Info returns metadata about the program.
//
// Requires at least 4.10.
func (p *Program) Info() (*ProgramInfo, error) {
return newProgramInfoFromFd(p.fd)
}
// Handle returns a reference to the program's type information in the kernel.
//
// Returns ErrNotSupported if the kernel has no BTF support, or if there is no
// BTF associated with the program.
func (p *Program) Handle() (*btf.Handle, error) {
info, err := p.Info()
if err != nil {
return nil, err
}
id, ok := info.BTFID()
if !ok {
return nil, fmt.Errorf("program %s: retrieve BTF ID: %w", p, ErrNotSupported)
}
return btf.NewHandleFromID(id)
}
// FD gets the file descriptor of the Program.
//
// It is invalid to call this function after Close has been called.
func (p *Program) FD() int {
return p.fd.Int()
}
// Clone creates a duplicate of the Program.
//
// Closing the duplicate does not affect the original, and vice versa.
//
// Cloning a nil Program returns nil.
func (p *Program) Clone() (*Program, error) {
if p == nil {
return nil, nil
}
dup, err := p.fd.Dup()
if err != nil {
return nil, fmt.Errorf("can't clone program: %w", err)
}
return &Program{p.VerifierLog, dup, p.name, "", p.typ}, nil
}
// Pin persists the Program on the BPF virtual file system past the lifetime of
// the process that created it
//
// Calling Pin on a previously pinned program will overwrite the path, except when
// the new path already exists. Re-pinning across filesystems is not supported.
//
// This requires bpffs to be mounted above fileName. See https://docs.cilium.io/en/k8s-doc/admin/#admin-mount-bpffs
func (p *Program) Pin(fileName string) error {
if err := internal.Pin(p.pinnedPath, fileName, p.fd); err != nil {
return err
}
p.pinnedPath = fileName
return nil
}
// Unpin removes the persisted state for the Program from the BPF virtual filesystem.
//
// Failed calls to Unpin will not alter the state returned by IsPinned.
//
// Unpinning an unpinned Program returns nil.
func (p *Program) Unpin() error {
if err := internal.Unpin(p.pinnedPath); err != nil {
return err
}
p.pinnedPath = ""
return nil
}
// IsPinned returns true if the Program has a non-empty pinned path.
func (p *Program) IsPinned() bool {
return p.pinnedPath != ""
}
// Close the Program's underlying file descriptor, which could unload
// the program from the kernel if it is not pinned or attached to a
// kernel hook.
func (p *Program) Close() error {
if p == nil {
return nil
}
return p.fd.Close()
}
// Various options for Run'ing a Program
type RunOptions struct {
// Program's data input. Required field.
Data []byte
// Program's data after Program has run. Caller must allocate. Optional field.
DataOut []byte
// Program's context input. Optional field.
Context interface{}
// Program's context after Program has run. Must be a pointer or slice. Optional field.
ContextOut interface{}
// Number of times to run Program. Optional field. Defaults to 1.
Repeat uint32
// Optional flags.
Flags uint32
// CPU to run Program on. Optional field.
// Note not all program types support this field.
CPU uint32
// Called whenever the syscall is interrupted, and should be set to testing.B.ResetTimer
// or similar. Typically used during benchmarking. Optional field.
Reset func()
}
// Test runs the Program in the kernel with the given input and returns the
// value returned by the eBPF program. outLen may be zero.
//
// Note: the kernel expects at least 14 bytes input for an ethernet header for
// XDP and SKB programs.
//
// This function requires at least Linux 4.12.
func (p *Program) Test(in []byte) (uint32, []byte, error) {
// Older kernels ignore the dataSizeOut argument when copying to user space.
// Combined with things like bpf_xdp_adjust_head() we don't really know what the final
// size will be. Hence we allocate an output buffer which we hope will always be large
// enough, and panic if the kernel wrote past the end of the allocation.
// See https://patchwork.ozlabs.org/cover/1006822/
var out []byte
if len(in) > 0 {
out = make([]byte, len(in)+outputPad)
}
opts := RunOptions{
Data: in,
DataOut: out,
Repeat: 1,
}
ret, _, err := p.testRun(&opts)
if err != nil {
return ret, nil, fmt.Errorf("can't test program: %w", err)
}
return ret, opts.DataOut, nil
}
// Run runs the Program in kernel with given RunOptions.
//
// Note: the same restrictions from Test apply.
func (p *Program) Run(opts *RunOptions) (uint32, error) {
ret, _, err := p.testRun(opts)
if err != nil {
return ret, fmt.Errorf("can't test program: %w", err)
}
return ret, nil
}
// Benchmark runs the Program with the given input for a number of times
// and returns the time taken per iteration.
//
// Returns the result of the last execution of the program and the time per
// run or an error. reset is called whenever the benchmark syscall is
// interrupted, and should be set to testing.B.ResetTimer or similar.
//
// Note: profiling a call to this function will skew it's results, see
// https://github.com/cilium/ebpf/issues/24
//
// This function requires at least Linux 4.12.
func (p *Program) Benchmark(in []byte, repeat int, reset func()) (uint32, time.Duration, error) {
if uint(repeat) > math.MaxUint32 {
return 0, 0, fmt.Errorf("repeat is too high")
}
opts := RunOptions{
Data: in,
Repeat: uint32(repeat),
Reset: reset,
}
ret, total, err := p.testRun(&opts)
if err != nil {
return ret, total, fmt.Errorf("can't benchmark program: %w", err)
}
return ret, total, nil
}
var haveProgTestRun = internal.FeatureTest("BPF_PROG_TEST_RUN", "4.12", func() error {
prog, err := NewProgram(&ProgramSpec{
// SocketFilter does not require privileges on newer kernels.
Type: SocketFilter,
Instructions: asm.Instructions{
asm.LoadImm(asm.R0, 0, asm.DWord),
asm.Return(),
},
License: "MIT",
})
if err != nil {
// This may be because we lack sufficient permissions, etc.
return err
}
defer prog.Close()
// Programs require at least 14 bytes input
in := make([]byte, 14)
attr := sys.ProgRunAttr{
ProgFd: uint32(prog.FD()),
DataSizeIn: uint32(len(in)),
DataIn: sys.NewSlicePointer(in),
}
err = sys.ProgRun(&attr)
switch {
case errors.Is(err, unix.EINVAL):
// Check for EINVAL specifically, rather than err != nil since we
// otherwise misdetect due to insufficient permissions.
return internal.ErrNotSupported
case errors.Is(err, unix.EINTR):
// We know that PROG_TEST_RUN is supported if we get EINTR.
return nil
case errors.Is(err, unix.ENOTSUPP):
// The first PROG_TEST_RUN patches shipped in 4.12 didn't include
// a test runner for SocketFilter. ENOTSUPP means PROG_TEST_RUN is
// supported, but not for the program type used in the probe.
return nil
}
return err
})
func (p *Program) testRun(opts *RunOptions) (uint32, time.Duration, error) {
if uint(len(opts.Data)) > math.MaxUint32 {
return 0, 0, fmt.Errorf("input is too long")
}
if err := haveProgTestRun(); err != nil {
return 0, 0, err
}
var ctxBytes []byte
if opts.Context != nil {
ctx := new(bytes.Buffer)
if err := binary.Write(ctx, internal.NativeEndian, opts.Context); err != nil {
return 0, 0, fmt.Errorf("cannot serialize context: %v", err)
}
ctxBytes = ctx.Bytes()
}
var ctxOut []byte
if opts.ContextOut != nil {
ctxOut = make([]byte, binary.Size(opts.ContextOut))
}
attr := sys.ProgRunAttr{
ProgFd: p.fd.Uint(),
DataSizeIn: uint32(len(opts.Data)),
DataSizeOut: uint32(len(opts.DataOut)),
DataIn: sys.NewSlicePointer(opts.Data),
DataOut: sys.NewSlicePointer(opts.DataOut),
Repeat: uint32(opts.Repeat),
CtxSizeIn: uint32(len(ctxBytes)),
CtxSizeOut: uint32(len(ctxOut)),
CtxIn: sys.NewSlicePointer(ctxBytes),
CtxOut: sys.NewSlicePointer(ctxOut),
Flags: opts.Flags,
Cpu: opts.CPU,
}
for {
err := sys.ProgRun(&attr)
if err == nil {
break
}
if errors.Is(err, unix.EINTR) {
if opts.Reset != nil {
opts.Reset()
}
continue
}
if errors.Is(err, unix.ENOTSUPP) {
return 0, 0, fmt.Errorf("kernel doesn't support testing program type %s: %w", p.Type(), ErrNotSupported)
}
return 0, 0, fmt.Errorf("can't run test: %w", err)
}
if opts.DataOut != nil {
if int(attr.DataSizeOut) > cap(opts.DataOut) {
// Houston, we have a problem. The program created more data than we allocated,
// and the kernel wrote past the end of our buffer.
panic("kernel wrote past end of output buffer")
}
opts.DataOut = opts.DataOut[:int(attr.DataSizeOut)]
}
if len(ctxOut) != 0 {
b := bytes.NewReader(ctxOut)
if err := binary.Read(b, internal.NativeEndian, opts.ContextOut); err != nil {
return 0, 0, fmt.Errorf("failed to decode ContextOut: %v", err)
}
}
total := time.Duration(attr.Duration) * time.Nanosecond
return attr.Retval, total, nil
}
func unmarshalProgram(buf []byte) (*Program, error) {
if len(buf) != 4 {
return nil, errors.New("program id requires 4 byte value")
}
// Looking up an entry in a nested map or prog array returns an id,
// not an fd.
id := internal.NativeEndian.Uint32(buf)
return NewProgramFromID(ProgramID(id))
}
func marshalProgram(p *Program, length int) ([]byte, error) {
if length != 4 {
return nil, fmt.Errorf("can't marshal program to %d bytes", length)
}
buf := make([]byte, 4)
internal.NativeEndian.PutUint32(buf, p.fd.Uint())
return buf, nil
}
// LoadPinnedProgram loads a Program from a BPF file.
//
// Requires at least Linux 4.11.
func LoadPinnedProgram(fileName string, opts *LoadPinOptions) (*Program, error) {
fd, err := sys.ObjGet(&sys.ObjGetAttr{
Pathname: sys.NewStringPointer(fileName),
FileFlags: opts.Marshal(),
})
if err != nil {
return nil, err
}
info, err := newProgramInfoFromFd(fd)
if err != nil {
_ = fd.Close()
return nil, fmt.Errorf("info for %s: %w", fileName, err)
}
return &Program{"", fd, filepath.Base(fileName), fileName, info.Type}, nil
}
// SanitizeName replaces all invalid characters in name with replacement.
// Passing a negative value for replacement will delete characters instead
// of replacing them. Use this to automatically generate valid names for maps
// and programs at runtime.
//
// The set of allowed characters depends on the running kernel version.
// Dots are only allowed as of kernel 5.2.
func SanitizeName(name string, replacement rune) string {
return strings.Map(func(char rune) rune {
if invalidBPFObjNameChar(char) {
return replacement
}
return char
}, name)
}
// ProgramGetNextID returns the ID of the next eBPF program.
//
// Returns ErrNotExist, if there is no next eBPF program.
func ProgramGetNextID(startID ProgramID) (ProgramID, error) {
attr := &sys.ProgGetNextIdAttr{Id: uint32(startID)}
return ProgramID(attr.NextId), sys.ProgGetNextId(attr)
}
// BindMap binds map to the program and is only released once program is released.
//
// This may be used in cases where metadata should be associated with the program
// which otherwise does not contain any references to the map.
func (p *Program) BindMap(m *Map) error {
attr := &sys.ProgBindMapAttr{
ProgFd: uint32(p.FD()),
MapFd: uint32(m.FD()),
}
return sys.ProgBindMap(attr)
}
var errUnrecognizedAttachType = errors.New("unrecognized attach type")
// find an attach target type in the kernel.
//
// spec may be nil and defaults to the canonical kernel BTF. name together with
// progType and attachType determine which type we need to attach to.
//
// Returns errUnrecognizedAttachType.
func findTargetInKernel(spec *btf.Spec, name string, progType ProgramType, attachType AttachType) (btf.TypeID, error) {
type match struct {
p ProgramType
a AttachType
}
var (
typeName, featureName string
isBTFTypeFunc = true
)
switch (match{progType, attachType}) {
case match{LSM, AttachLSMMac}:
typeName = "bpf_lsm_" + name
featureName = name + " LSM hook"
case match{Tracing, AttachTraceIter}:
typeName = "bpf_iter_" + name
featureName = name + " iterator"
case match{Tracing, AttachTraceFEntry}:
typeName = name
featureName = fmt.Sprintf("fentry %s", name)
case match{Tracing, AttachTraceFExit}:
typeName = name
featureName = fmt.Sprintf("fexit %s", name)
case match{Tracing, AttachModifyReturn}:
typeName = name
featureName = fmt.Sprintf("fmod_ret %s", name)
case match{Tracing, AttachTraceRawTp}:
typeName = fmt.Sprintf("btf_trace_%s", name)
featureName = fmt.Sprintf("raw_tp %s", name)
isBTFTypeFunc = false
default:
return 0, errUnrecognizedAttachType
}
spec, err := maybeLoadKernelBTF(spec)
if err != nil {
return 0, fmt.Errorf("load kernel spec: %w", err)
}
var target btf.Type
if isBTFTypeFunc {
var targetFunc *btf.Func
err = spec.TypeByName(typeName, &targetFunc)
target = targetFunc
} else {
var targetTypedef *btf.Typedef
err = spec.TypeByName(typeName, &targetTypedef)
target = targetTypedef
}
if err != nil {
if errors.Is(err, btf.ErrNotFound) {
return 0, &internal.UnsupportedFeatureError{
Name: featureName,
}
}
return 0, fmt.Errorf("find target for %s: %w", featureName, err)
}
return spec.TypeID(target)
}
// find an attach target type in a program.
//
// Returns errUnrecognizedAttachType.
func findTargetInProgram(prog *Program, name string, progType ProgramType, attachType AttachType) (btf.TypeID, error) {
type match struct {
p ProgramType
a AttachType
}
var typeName string
switch (match{progType, attachType}) {
case match{Extension, AttachNone}:
typeName = name
default:
return 0, errUnrecognizedAttachType
}
btfHandle, err := prog.Handle()
if err != nil {
return 0, fmt.Errorf("load target BTF: %w", err)
}
defer btfHandle.Close()
spec, err := btfHandle.Spec(nil)
if err != nil {
return 0, err
}
var targetFunc *btf.Func
err = spec.TypeByName(typeName, &targetFunc)
if err != nil {
return 0, fmt.Errorf("find target %s: %w", typeName, err)
}
return spec.TypeID(targetFunc)
}

View file

@ -1,145 +0,0 @@
#!/usr/bin/env bash
# Test the current package under a different kernel.
# Requires virtme and qemu to be installed.
# Examples:
# Run all tests on a 5.4 kernel
# $ ./run-tests.sh 5.4
# Run a subset of tests:
# $ ./run-tests.sh 5.4 ./link
set -euo pipefail
script="$(realpath "$0")"
readonly script
# This script is a bit like a Matryoshka doll since it keeps re-executing itself
# in various different contexts:
#
# 1. invoked by the user like run-tests.sh 5.4
# 2. invoked by go test like run-tests.sh --exec-vm
# 3. invoked by init in the vm like run-tests.sh --exec-test
#
# This allows us to use all available CPU on the host machine to compile our
# code, and then only use the VM to execute the test. This is because the VM
# is usually slower at compiling than the host.
if [[ "${1:-}" = "--exec-vm" ]]; then
shift
input="$1"
shift
# Use sudo if /dev/kvm isn't accessible by the current user.
sudo=""
if [[ ! -r /dev/kvm || ! -w /dev/kvm ]]; then
sudo="sudo"
fi
readonly sudo
testdir="$(dirname "$1")"
output="$(mktemp -d)"
printf -v cmd "%q " "$@"
if [[ "$(stat -c '%t:%T' -L /proc/$$/fd/0)" == "1:3" ]]; then
# stdin is /dev/null, which doesn't play well with qemu. Use a fifo as a
# blocking substitute.
mkfifo "${output}/fake-stdin"
# Open for reading and writing to avoid blocking.
exec 0<> "${output}/fake-stdin"
rm "${output}/fake-stdin"
fi
for ((i = 0; i < 3; i++)); do
if ! $sudo virtme-run --kimg "${input}/bzImage" --memory 768M --pwd \
--rwdir="${testdir}=${testdir}" \
--rodir=/run/input="${input}" \
--rwdir=/run/output="${output}" \
--script-sh "PATH=\"$PATH\" CI_MAX_KERNEL_VERSION="${CI_MAX_KERNEL_VERSION:-}" \"$script\" --exec-test $cmd" \
--kopt possible_cpus=2; then # need at least two CPUs for some tests
exit 23
fi
if [[ -e "${output}/status" ]]; then
break
fi
if [[ -v CI ]]; then
echo "Retrying test run due to qemu crash"
continue
fi
exit 42
done
rc=$(<"${output}/status")
$sudo rm -r "$output"
exit $rc
elif [[ "${1:-}" = "--exec-test" ]]; then
shift
mount -t bpf bpf /sys/fs/bpf
mount -t tracefs tracefs /sys/kernel/debug/tracing
if [[ -d "/run/input/bpf" ]]; then
export KERNEL_SELFTESTS="/run/input/bpf"
fi
if [[ -f "/run/input/bpf/bpf_testmod/bpf_testmod.ko" ]]; then
insmod "/run/input/bpf/bpf_testmod/bpf_testmod.ko"
fi
dmesg --clear
rc=0
"$@" || rc=$?
dmesg
echo $rc > "/run/output/status"
exit $rc # this return code is "swallowed" by qemu
fi
readonly kernel_version="${1:-}"
if [[ -z "${kernel_version}" ]]; then
echo "Expecting kernel version as first argument"
exit 1
fi
shift
readonly kernel="linux-${kernel_version}.bz"
readonly selftests="linux-${kernel_version}-selftests-bpf.tgz"
readonly input="$(mktemp -d)"
readonly tmp_dir="${TMPDIR:-/tmp}"
readonly branch="${BRANCH:-master}"
fetch() {
echo Fetching "${1}"
pushd "${tmp_dir}" > /dev/null
curl -s -L -O --fail --etag-compare "${1}.etag" --etag-save "${1}.etag" "https://github.com/cilium/ci-kernels/raw/${branch}/${1}"
local ret=$?
popd > /dev/null
return $ret
}
fetch "${kernel}"
cp "${tmp_dir}/${kernel}" "${input}/bzImage"
if fetch "${selftests}"; then
echo "Decompressing selftests"
mkdir "${input}/bpf"
tar --strip-components=4 -xf "${tmp_dir}/${selftests}" -C "${input}/bpf"
else
echo "No selftests found, disabling"
fi
args=(-short -coverpkg=./... -coverprofile=coverage.out -count 1 ./...)
if (( $# > 0 )); then
args=("$@")
fi
export GOFLAGS=-mod=readonly
export CGO_ENABLED=0
# LINUX_VERSION_CODE test compares this to discovered value.
export KERNEL_VERSION="${kernel_version}"
echo Testing on "${kernel_version}"
go test -exec "$script --exec-vm $input" "${args[@]}"
echo "Test successful on ${kernel_version}"
rm -r "${input}"

View file

@ -1,264 +0,0 @@
package ebpf
import (
"bytes"
"errors"
"fmt"
"github.com/cilium/ebpf/asm"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/sys"
"github.com/cilium/ebpf/internal/unix"
)
// invalidBPFObjNameChar returns true if char may not appear in
// a BPF object name.
func invalidBPFObjNameChar(char rune) bool {
dotAllowed := objNameAllowsDot() == nil
switch {
case char >= 'A' && char <= 'Z':
return false
case char >= 'a' && char <= 'z':
return false
case char >= '0' && char <= '9':
return false
case dotAllowed && char == '.':
return false
case char == '_':
return false
default:
return true
}
}
func progLoad(insns asm.Instructions, typ ProgramType, license string) (*sys.FD, error) {
buf := bytes.NewBuffer(make([]byte, 0, insns.Size()))
if err := insns.Marshal(buf, internal.NativeEndian); err != nil {
return nil, err
}
bytecode := buf.Bytes()
return sys.ProgLoad(&sys.ProgLoadAttr{
ProgType: sys.ProgType(typ),
License: sys.NewStringPointer(license),
Insns: sys.NewSlicePointer(bytecode),
InsnCnt: uint32(len(bytecode) / asm.InstructionSize),
})
}
var haveNestedMaps = internal.FeatureTest("nested maps", "4.12", func() error {
_, err := sys.MapCreate(&sys.MapCreateAttr{
MapType: sys.MapType(ArrayOfMaps),
KeySize: 4,
ValueSize: 4,
MaxEntries: 1,
// Invalid file descriptor.
InnerMapFd: ^uint32(0),
})
if errors.Is(err, unix.EINVAL) {
return internal.ErrNotSupported
}
if errors.Is(err, unix.EBADF) {
return nil
}
return err
})
var haveMapMutabilityModifiers = internal.FeatureTest("read- and write-only maps", "5.2", func() error {
// This checks BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG. Since
// BPF_MAP_FREEZE appeared in 5.2 as well we don't do a separate check.
m, err := sys.MapCreate(&sys.MapCreateAttr{
MapType: sys.MapType(Array),
KeySize: 4,
ValueSize: 4,
MaxEntries: 1,
MapFlags: unix.BPF_F_RDONLY_PROG,
})
if err != nil {
return internal.ErrNotSupported
}
_ = m.Close()
return nil
})
var haveMmapableMaps = internal.FeatureTest("mmapable maps", "5.5", func() error {
// This checks BPF_F_MMAPABLE, which appeared in 5.5 for array maps.
m, err := sys.MapCreate(&sys.MapCreateAttr{
MapType: sys.MapType(Array),
KeySize: 4,
ValueSize: 4,
MaxEntries: 1,
MapFlags: unix.BPF_F_MMAPABLE,
})
if err != nil {
return internal.ErrNotSupported
}
_ = m.Close()
return nil
})
var haveInnerMaps = internal.FeatureTest("inner maps", "5.10", func() error {
// This checks BPF_F_INNER_MAP, which appeared in 5.10.
m, err := sys.MapCreate(&sys.MapCreateAttr{
MapType: sys.MapType(Array),
KeySize: 4,
ValueSize: 4,
MaxEntries: 1,
MapFlags: unix.BPF_F_INNER_MAP,
})
if err != nil {
return internal.ErrNotSupported
}
_ = m.Close()
return nil
})
var haveNoPreallocMaps = internal.FeatureTest("prealloc maps", "4.6", func() error {
// This checks BPF_F_NO_PREALLOC, which appeared in 4.6.
m, err := sys.MapCreate(&sys.MapCreateAttr{
MapType: sys.MapType(Hash),
KeySize: 4,
ValueSize: 4,
MaxEntries: 1,
MapFlags: unix.BPF_F_NO_PREALLOC,
})
if err != nil {
return internal.ErrNotSupported
}
_ = m.Close()
return nil
})
func wrapMapError(err error) error {
if err == nil {
return nil
}
if errors.Is(err, unix.ENOENT) {
return sys.Error(ErrKeyNotExist, unix.ENOENT)
}
if errors.Is(err, unix.EEXIST) {
return sys.Error(ErrKeyExist, unix.EEXIST)
}
if errors.Is(err, unix.ENOTSUPP) {
return sys.Error(ErrNotSupported, unix.ENOTSUPP)
}
if errors.Is(err, unix.E2BIG) {
return fmt.Errorf("key too big for map: %w", err)
}
return err
}
var haveObjName = internal.FeatureTest("object names", "4.15", func() error {
attr := sys.MapCreateAttr{
MapType: sys.MapType(Array),
KeySize: 4,
ValueSize: 4,
MaxEntries: 1,
MapName: sys.NewObjName("feature_test"),
}
fd, err := sys.MapCreate(&attr)
if err != nil {
return internal.ErrNotSupported
}
_ = fd.Close()
return nil
})
var objNameAllowsDot = internal.FeatureTest("dot in object names", "5.2", func() error {
if err := haveObjName(); err != nil {
return err
}
attr := sys.MapCreateAttr{
MapType: sys.MapType(Array),
KeySize: 4,
ValueSize: 4,
MaxEntries: 1,
MapName: sys.NewObjName(".test"),
}
fd, err := sys.MapCreate(&attr)
if err != nil {
return internal.ErrNotSupported
}
_ = fd.Close()
return nil
})
var haveBatchAPI = internal.FeatureTest("map batch api", "5.6", func() error {
var maxEntries uint32 = 2
attr := sys.MapCreateAttr{
MapType: sys.MapType(Hash),
KeySize: 4,
ValueSize: 4,
MaxEntries: maxEntries,
}
fd, err := sys.MapCreate(&attr)
if err != nil {
return internal.ErrNotSupported
}
defer fd.Close()
keys := []uint32{1, 2}
values := []uint32{3, 4}
kp, _ := marshalPtr(keys, 8)
vp, _ := marshalPtr(values, 8)
err = sys.MapUpdateBatch(&sys.MapUpdateBatchAttr{
MapFd: fd.Uint(),
Keys: kp,
Values: vp,
Count: maxEntries,
})
if err != nil {
return internal.ErrNotSupported
}
return nil
})
var haveProbeReadKernel = internal.FeatureTest("bpf_probe_read_kernel", "5.5", func() error {
insns := asm.Instructions{
asm.Mov.Reg(asm.R1, asm.R10),
asm.Add.Imm(asm.R1, -8),
asm.Mov.Imm(asm.R2, 8),
asm.Mov.Imm(asm.R3, 0),
asm.FnProbeReadKernel.Call(),
asm.Return(),
}
fd, err := progLoad(insns, Kprobe, "GPL")
if err != nil {
return internal.ErrNotSupported
}
_ = fd.Close()
return nil
})
var haveBPFToBPFCalls = internal.FeatureTest("bpf2bpf calls", "4.16", func() error {
insns := asm.Instructions{
asm.Call.Label("prog2").WithSymbol("prog1"),
asm.Return(),
asm.Mov.Imm(asm.R0, 0).WithSymbol("prog2"),
asm.Return(),
}
fd, err := progLoad(insns, SocketFilter, "MIT")
if errors.Is(err, unix.EINVAL) {
return internal.ErrNotSupported
}
if err != nil {
return err
}
_ = fd.Close()
return nil
})

View file

@ -1,284 +0,0 @@
package ebpf
import (
"github.com/cilium/ebpf/internal/unix"
)
//go:generate stringer -output types_string.go -type=MapType,ProgramType,PinType
// MapType indicates the type map structure
// that will be initialized in the kernel.
type MapType uint32
// Max returns the latest supported MapType.
func (MapType) Max() MapType {
return maxMapType - 1
}
// All the various map types that can be created
const (
UnspecifiedMap MapType = iota
// Hash is a hash map
Hash
// Array is an array map
Array
// ProgramArray - A program array map is a special kind of array map whose map
// values contain only file descriptors referring to other eBPF
// programs. Thus, both the key_size and value_size must be
// exactly four bytes. This map is used in conjunction with the
// TailCall helper.
ProgramArray
// PerfEventArray - A perf event array is used in conjunction with PerfEventRead
// and PerfEventOutput calls, to read the raw bpf_perf_data from the registers.
PerfEventArray
// PerCPUHash - This data structure is useful for people who have high performance
// network needs and can reconcile adds at the end of some cycle, so that
// hashes can be lock free without the use of XAdd, which can be costly.
PerCPUHash
// PerCPUArray - This data structure is useful for people who have high performance
// network needs and can reconcile adds at the end of some cycle, so that
// hashes can be lock free without the use of XAdd, which can be costly.
// Each CPU gets a copy of this hash, the contents of all of which can be reconciled
// later.
PerCPUArray
// StackTrace - This holds whole user and kernel stack traces, it can be retrieved with
// GetStackID
StackTrace
// CGroupArray - This is a very niche structure used to help SKBInCGroup determine
// if an skb is from a socket belonging to a specific cgroup
CGroupArray
// LRUHash - This allows you to create a small hash structure that will purge the
// least recently used items rather than thow an error when you run out of memory
LRUHash
// LRUCPUHash - This is NOT like PerCPUHash, this structure is shared among the CPUs,
// it has more to do with including the CPU id with the LRU calculation so that if a
// particular CPU is using a value over-and-over again, then it will be saved, but if
// a value is being retrieved a lot but sparsely across CPUs it is not as important, basically
// giving weight to CPU locality over overall usage.
LRUCPUHash
// LPMTrie - This is an implementation of Longest-Prefix-Match Trie structure. It is useful,
// for storing things like IP addresses which can be bit masked allowing for keys of differing
// values to refer to the same reference based on their masks. See wikipedia for more details.
LPMTrie
// ArrayOfMaps - Each item in the array is another map. The inner map mustn't be a map of maps
// itself.
ArrayOfMaps
// HashOfMaps - Each item in the hash map is another map. The inner map mustn't be a map of maps
// itself.
HashOfMaps
// DevMap - Specialized map to store references to network devices.
DevMap
// SockMap - Specialized map to store references to sockets.
SockMap
// CPUMap - Specialized map to store references to CPUs.
CPUMap
// XSKMap - Specialized map for XDP programs to store references to open sockets.
XSKMap
// SockHash - Specialized hash to store references to sockets.
SockHash
// CGroupStorage - Special map for CGroups.
CGroupStorage
// ReusePortSockArray - Specialized map to store references to sockets that can be reused.
ReusePortSockArray
// PerCPUCGroupStorage - Special per CPU map for CGroups.
PerCPUCGroupStorage
// Queue - FIFO storage for BPF programs.
Queue
// Stack - LIFO storage for BPF programs.
Stack
// SkStorage - Specialized map for local storage at SK for BPF programs.
SkStorage
// DevMapHash - Hash-based indexing scheme for references to network devices.
DevMapHash
// StructOpsMap - This map holds a kernel struct with its function pointer implemented in a BPF
// program.
StructOpsMap
// RingBuf - Similar to PerfEventArray, but shared across all CPUs.
RingBuf
// InodeStorage - Specialized local storage map for inodes.
InodeStorage
// TaskStorage - Specialized local storage map for task_struct.
TaskStorage
// maxMapType - Bound enum of MapTypes, has to be last in enum.
maxMapType
)
// hasPerCPUValue returns true if the Map stores a value per CPU.
func (mt MapType) hasPerCPUValue() bool {
return mt == PerCPUHash || mt == PerCPUArray || mt == LRUCPUHash || mt == PerCPUCGroupStorage
}
// canStoreMap returns true if the map type accepts a map fd
// for update and returns a map id for lookup.
func (mt MapType) canStoreMap() bool {
return mt == ArrayOfMaps || mt == HashOfMaps
}
// canStoreProgram returns true if the map type accepts a program fd
// for update and returns a program id for lookup.
func (mt MapType) canStoreProgram() bool {
return mt == ProgramArray
}
// hasBTF returns true if the map type supports BTF key/value metadata.
func (mt MapType) hasBTF() bool {
switch mt {
case PerfEventArray, CGroupArray, StackTrace, ArrayOfMaps, HashOfMaps, DevMap,
DevMapHash, CPUMap, XSKMap, SockMap, SockHash, Queue, Stack, RingBuf:
return false
default:
return true
}
}
// ProgramType of the eBPF program
type ProgramType uint32
// Max return the latest supported ProgramType.
func (ProgramType) Max() ProgramType {
return maxProgramType - 1
}
// eBPF program types
const (
UnspecifiedProgram ProgramType = iota
SocketFilter
Kprobe
SchedCLS
SchedACT
TracePoint
XDP
PerfEvent
CGroupSKB
CGroupSock
LWTIn
LWTOut
LWTXmit
SockOps
SkSKB
CGroupDevice
SkMsg
RawTracepoint
CGroupSockAddr
LWTSeg6Local
LircMode2
SkReuseport
FlowDissector
CGroupSysctl
RawTracepointWritable
CGroupSockopt
Tracing
StructOps
Extension
LSM
SkLookup
Syscall
maxProgramType
)
// AttachType of the eBPF program, needed to differentiate allowed context accesses in
// some newer program types like CGroupSockAddr. Should be set to AttachNone if not required.
// Will cause invalid argument (EINVAL) at program load time if set incorrectly.
type AttachType uint32
//go:generate stringer -type AttachType -trimprefix Attach
// AttachNone is an alias for AttachCGroupInetIngress for readability reasons.
const AttachNone AttachType = 0
const (
AttachCGroupInetIngress AttachType = iota
AttachCGroupInetEgress
AttachCGroupInetSockCreate
AttachCGroupSockOps
AttachSkSKBStreamParser
AttachSkSKBStreamVerdict
AttachCGroupDevice
AttachSkMsgVerdict
AttachCGroupInet4Bind
AttachCGroupInet6Bind
AttachCGroupInet4Connect
AttachCGroupInet6Connect
AttachCGroupInet4PostBind
AttachCGroupInet6PostBind
AttachCGroupUDP4Sendmsg
AttachCGroupUDP6Sendmsg
AttachLircMode2
AttachFlowDissector
AttachCGroupSysctl
AttachCGroupUDP4Recvmsg
AttachCGroupUDP6Recvmsg
AttachCGroupGetsockopt
AttachCGroupSetsockopt
AttachTraceRawTp
AttachTraceFEntry
AttachTraceFExit
AttachModifyReturn
AttachLSMMac
AttachTraceIter
AttachCgroupInet4GetPeername
AttachCgroupInet6GetPeername
AttachCgroupInet4GetSockname
AttachCgroupInet6GetSockname
AttachXDPDevMap
AttachCgroupInetSockRelease
AttachXDPCPUMap
AttachSkLookup
AttachXDP
AttachSkSKBVerdict
AttachSkReuseportSelect
AttachSkReuseportSelectOrMigrate
AttachPerfEvent
)
// AttachFlags of the eBPF program used in BPF_PROG_ATTACH command
type AttachFlags uint32
// PinType determines whether a map is pinned into a BPFFS.
type PinType int
// Valid pin types.
//
// Mirrors enum libbpf_pin_type.
const (
PinNone PinType = iota
// Pin an object by using its name as the filename.
PinByName
)
// LoadPinOptions control how a pinned object is loaded.
type LoadPinOptions struct {
// Request a read-only or write-only object. The default is a read-write
// object. Only one of the flags may be set.
ReadOnly bool
WriteOnly bool
// Raw flags for the syscall. Other fields of this struct take precedence.
Flags uint32
}
// Marshal returns a value suitable for BPF_OBJ_GET syscall file_flags parameter.
func (lpo *LoadPinOptions) Marshal() uint32 {
if lpo == nil {
return 0
}
flags := lpo.Flags
if lpo.ReadOnly {
flags |= unix.BPF_F_RDONLY
}
if lpo.WriteOnly {
flags |= unix.BPF_F_WRONLY
}
return flags
}
// BatchOptions batch map operations options
//
// Mirrors libbpf struct bpf_map_batch_opts
// Currently BPF_F_FLAG is the only supported
// flag (for ElemFlags).
type BatchOptions struct {
ElemFlags uint64
Flags uint64
}

View file

@ -1,120 +0,0 @@
// Code generated by "stringer -output types_string.go -type=MapType,ProgramType,PinType"; DO NOT EDIT.
package ebpf
import "strconv"
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[UnspecifiedMap-0]
_ = x[Hash-1]
_ = x[Array-2]
_ = x[ProgramArray-3]
_ = x[PerfEventArray-4]
_ = x[PerCPUHash-5]
_ = x[PerCPUArray-6]
_ = x[StackTrace-7]
_ = x[CGroupArray-8]
_ = x[LRUHash-9]
_ = x[LRUCPUHash-10]
_ = x[LPMTrie-11]
_ = x[ArrayOfMaps-12]
_ = x[HashOfMaps-13]
_ = x[DevMap-14]
_ = x[SockMap-15]
_ = x[CPUMap-16]
_ = x[XSKMap-17]
_ = x[SockHash-18]
_ = x[CGroupStorage-19]
_ = x[ReusePortSockArray-20]
_ = x[PerCPUCGroupStorage-21]
_ = x[Queue-22]
_ = x[Stack-23]
_ = x[SkStorage-24]
_ = x[DevMapHash-25]
_ = x[StructOpsMap-26]
_ = x[RingBuf-27]
_ = x[InodeStorage-28]
_ = x[TaskStorage-29]
_ = x[maxMapType-30]
}
const _MapType_name = "UnspecifiedMapHashArrayProgramArrayPerfEventArrayPerCPUHashPerCPUArrayStackTraceCGroupArrayLRUHashLRUCPUHashLPMTrieArrayOfMapsHashOfMapsDevMapSockMapCPUMapXSKMapSockHashCGroupStorageReusePortSockArrayPerCPUCGroupStorageQueueStackSkStorageDevMapHashStructOpsMapRingBufInodeStorageTaskStoragemaxMapType"
var _MapType_index = [...]uint16{0, 14, 18, 23, 35, 49, 59, 70, 80, 91, 98, 108, 115, 126, 136, 142, 149, 155, 161, 169, 182, 200, 219, 224, 229, 238, 248, 260, 267, 279, 290, 300}
func (i MapType) String() string {
if i >= MapType(len(_MapType_index)-1) {
return "MapType(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _MapType_name[_MapType_index[i]:_MapType_index[i+1]]
}
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[UnspecifiedProgram-0]
_ = x[SocketFilter-1]
_ = x[Kprobe-2]
_ = x[SchedCLS-3]
_ = x[SchedACT-4]
_ = x[TracePoint-5]
_ = x[XDP-6]
_ = x[PerfEvent-7]
_ = x[CGroupSKB-8]
_ = x[CGroupSock-9]
_ = x[LWTIn-10]
_ = x[LWTOut-11]
_ = x[LWTXmit-12]
_ = x[SockOps-13]
_ = x[SkSKB-14]
_ = x[CGroupDevice-15]
_ = x[SkMsg-16]
_ = x[RawTracepoint-17]
_ = x[CGroupSockAddr-18]
_ = x[LWTSeg6Local-19]
_ = x[LircMode2-20]
_ = x[SkReuseport-21]
_ = x[FlowDissector-22]
_ = x[CGroupSysctl-23]
_ = x[RawTracepointWritable-24]
_ = x[CGroupSockopt-25]
_ = x[Tracing-26]
_ = x[StructOps-27]
_ = x[Extension-28]
_ = x[LSM-29]
_ = x[SkLookup-30]
_ = x[Syscall-31]
_ = x[maxProgramType-32]
}
const _ProgramType_name = "UnspecifiedProgramSocketFilterKprobeSchedCLSSchedACTTracePointXDPPerfEventCGroupSKBCGroupSockLWTInLWTOutLWTXmitSockOpsSkSKBCGroupDeviceSkMsgRawTracepointCGroupSockAddrLWTSeg6LocalLircMode2SkReuseportFlowDissectorCGroupSysctlRawTracepointWritableCGroupSockoptTracingStructOpsExtensionLSMSkLookupSyscallmaxProgramType"
var _ProgramType_index = [...]uint16{0, 18, 30, 36, 44, 52, 62, 65, 74, 83, 93, 98, 104, 111, 118, 123, 135, 140, 153, 167, 179, 188, 199, 212, 224, 245, 258, 265, 274, 283, 286, 294, 301, 315}
func (i ProgramType) String() string {
if i >= ProgramType(len(_ProgramType_index)-1) {
return "ProgramType(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _ProgramType_name[_ProgramType_index[i]:_ProgramType_index[i+1]]
}
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[PinNone-0]
_ = x[PinByName-1]
}
const _PinType_name = "PinNonePinByName"
var _PinType_index = [...]uint8{0, 7, 16}
func (i PinType) String() string {
if i < 0 || i >= PinType(len(_PinType_index)-1) {
return "PinType(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _PinType_name[_PinType_index[i]:_PinType_index[i+1]]
}

View file

@ -1,2 +0,0 @@
example/example
cmd/cgctl/cgctl

View file

@ -1,201 +0,0 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View file

@ -1,26 +0,0 @@
# Copyright The containerd Authors.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
PACKAGES=$(shell go list ./... | grep -v /vendor/)
all: cgutil
go build -v
cgutil:
cd cmd/cgctl && go build -v
proto:
protobuild --quiet ${PACKAGES}
# Keep them Go-idiomatic and backward-compatible with the gogo/protobuf era.
go-fix-acronym -w -a '(Cpu|Tcp|Rss)' $(shell find cgroup1/stats/ cgroup2/stats/ -name '*.pb.go')

View file

@ -1,31 +0,0 @@
version = "2"
generators = ["go"]
# Control protoc include paths. Below are usually some good defaults, but feel
# free to try it without them if it works for your project.
[includes]
# Include paths that will be added before all others. Typically, you want to
# treat the root of the project as an include, but this may not be necessary.
# before = ["."]
# Paths that will be added untouched to the end of the includes. We use
# `/usr/local/include` to pickup the common install location of protobuf.
# This is the default.
after = ["/usr/local/include", "/usr/include"]
# Aggregrate the API descriptors to lock down API changes.
[[descriptors]]
prefix = "github.com/containerd/cgroups/cgroup1/stats"
target = "cgroup1/stats/metrics.pb.txt"
ignore_files = [
"google/protobuf/descriptor.proto",
]
[[descriptors]]
prefix = "github.com/containerd/cgroups/cgroup2/stats"
target = "cgroup2/stats/metrics.pb.txt"
ignore_files = [
"google/protobuf/descriptor.proto",
]
[parameters.go]
paths = "source_relative"

View file

@ -1,217 +0,0 @@
# cgroups
[![Build Status](https://github.com/containerd/cgroups/workflows/CI/badge.svg)](https://github.com/containerd/cgroups/actions?query=workflow%3ACI)
[![codecov](https://codecov.io/gh/containerd/cgroups/branch/main/graph/badge.svg)](https://codecov.io/gh/containerd/cgroups)
[![GoDoc](https://godoc.org/github.com/containerd/cgroups?status.svg)](https://godoc.org/github.com/containerd/cgroups)
[![Go Report Card](https://goreportcard.com/badge/github.com/containerd/cgroups)](https://goreportcard.com/report/github.com/containerd/cgroups)
Go package for creating, managing, inspecting, and destroying cgroups.
The resources format for settings on the cgroup uses the OCI runtime-spec found
[here](https://github.com/opencontainers/runtime-spec).
## Examples (v1)
### Create a new cgroup
This creates a new cgroup using a static path for all subsystems under `/test`.
* /sys/fs/cgroup/cpu/test
* /sys/fs/cgroup/memory/test
* etc....
It uses a single hierarchy and specifies cpu shares as a resource constraint and
uses the v1 implementation of cgroups.
```go
shares := uint64(100)
control, err := cgroup1.New(cgroup1.StaticPath("/test"), &specs.LinuxResources{
CPU: &specs.LinuxCPU{
Shares: &shares,
},
})
defer control.Delete()
```
### Create with systemd slice support
```go
control, err := cgroup1.New(cgroup1.Systemd, cgroup1.Slice("system.slice", "runc-test"), &specs.LinuxResources{
CPU: &specs.CPU{
Shares: &shares,
},
})
```
### Load an existing cgroup
```go
control, err = cgroup1.Load(cgroup1.Default, cgroups.StaticPath("/test"))
```
### Add a process to the cgroup
```go
if err := control.Add(cgroup1.Process{Pid:1234}); err != nil {
}
```
### Update the cgroup
To update the resources applied in the cgroup
```go
shares = uint64(200)
if err := control.Update(&specs.LinuxResources{
CPU: &specs.LinuxCPU{
Shares: &shares,
},
}); err != nil {
}
```
### Freeze and Thaw the cgroup
```go
if err := control.Freeze(); err != nil {
}
if err := control.Thaw(); err != nil {
}
```
### List all processes in the cgroup or recursively
```go
processes, err := control.Processes(cgroup1.Devices, recursive)
```
### Get Stats on the cgroup
```go
stats, err := control.Stat()
```
By adding `cgroups.IgnoreNotExist` all non-existent files will be ignored, e.g. swap memory stats without swap enabled
```go
stats, err := control.Stat(cgroup1.IgnoreNotExist)
```
### Move process across cgroups
This allows you to take processes from one cgroup and move them to another.
```go
err := control.MoveTo(destination)
```
### Create subcgroup
```go
subCgroup, err := control.New("child", resources)
```
### Registering for memory events
This allows you to get notified by an eventfd for v1 memory cgroups events.
```go
event := cgroup1.MemoryThresholdEvent(50 * 1024 * 1024, false)
efd, err := control.RegisterMemoryEvent(event)
```
```go
event := cgroup1.MemoryPressureEvent(cgroup1.MediumPressure, cgroup1.DefaultMode)
efd, err := control.RegisterMemoryEvent(event)
```
```go
efd, err := control.OOMEventFD()
// or by using RegisterMemoryEvent
event := cgroup1.OOMEvent()
efd, err := control.RegisterMemoryEvent(event)
```
## Examples (v2/unified)
### Check that the current system is running cgroups v2
```go
var cgroupV2 bool
if cgroups.Mode() == cgroups.Unified {
cgroupV2 = true
}
```
### Create a new cgroup
This creates a new systemd v2 cgroup slice. Systemd slices consider ["-" a special character](https://www.freedesktop.org/software/systemd/man/systemd.slice.html),
so the resulting slice would be located here on disk:
* /sys/fs/cgroup/my.slice/my-cgroup.slice/my-cgroup-abc.slice
```go
import (
"github.com/containerd/cgroups/v3/cgroup2"
specs "github.com/opencontainers/runtime-spec/specs-go"
)
res := cgroup2.Resources{}
// dummy PID of -1 is used for creating a "general slice" to be used as a parent cgroup.
// see https://github.com/containerd/cgroups/blob/1df78138f1e1e6ee593db155c6b369466f577651/v2/manager.go#L732-L735
m, err := cgroup2.NewSystemd("/", "my-cgroup-abc.slice", -1, &res)
if err != nil {
return err
}
```
### Load an existing cgroup
```go
m, err := cgroup2.LoadSystemd("/", "my-cgroup-abc.slice")
if err != nil {
return err
}
```
### Delete a cgroup
```go
m, err := cgroup2.LoadSystemd("/", "my-cgroup-abc.slice")
if err != nil {
return err
}
err = m.DeleteSystemd()
if err != nil {
return err
}
```
### Kill all processes in a cgroup
```go
m, err := cgroup2.LoadSystemd("/", "my-cgroup-abc.slice")
if err != nil {
return err
}
err = m.Kill()
if err != nil {
return err
}
```
### Attention
All static path should not include `/sys/fs/cgroup/` prefix, it should start with your own cgroups name
## Project details
Cgroups is a containerd sub-project, licensed under the [Apache 2.0 license](./LICENSE).
As a containerd sub-project, you will find the:
* [Project governance](https://github.com/containerd/project/blob/main/GOVERNANCE.md),
* [Maintainers](https://github.com/containerd/project/blob/main/MAINTAINERS),
* and [Contributing guidelines](https://github.com/containerd/project/blob/main/CONTRIBUTING.md)
information in our [`containerd/project`](https://github.com/containerd/project) repository.

View file

@ -1,362 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cgroup1
import (
"bufio"
"fmt"
"io"
"os"
"path/filepath"
"strconv"
"strings"
v1 "github.com/containerd/cgroups/v3/cgroup1/stats"
specs "github.com/opencontainers/runtime-spec/specs-go"
)
// NewBlkio returns a Blkio controller given the root folder of cgroups.
// It may optionally accept other configuration options, such as ProcRoot(path)
func NewBlkio(root string, options ...func(controller *blkioController)) *blkioController {
ctrl := &blkioController{
root: filepath.Join(root, string(Blkio)),
procRoot: "/proc",
}
for _, opt := range options {
opt(ctrl)
}
return ctrl
}
// ProcRoot overrides the default location of the "/proc" filesystem
func ProcRoot(path string) func(controller *blkioController) {
return func(c *blkioController) {
c.procRoot = path
}
}
type blkioController struct {
root string
procRoot string
}
func (b *blkioController) Name() Name {
return Blkio
}
func (b *blkioController) Path(path string) string {
return filepath.Join(b.root, path)
}
func (b *blkioController) Create(path string, resources *specs.LinuxResources) error {
if err := os.MkdirAll(b.Path(path), defaultDirPerm); err != nil {
return err
}
if resources.BlockIO == nil {
return nil
}
for _, t := range createBlkioSettings(resources.BlockIO) {
if t.value != nil {
if err := os.WriteFile(
filepath.Join(b.Path(path), "blkio."+t.name),
t.format(t.value),
defaultFilePerm,
); err != nil {
return err
}
}
}
return nil
}
func (b *blkioController) Update(path string, resources *specs.LinuxResources) error {
return b.Create(path, resources)
}
func (b *blkioController) Stat(path string, stats *v1.Metrics) error {
stats.Blkio = &v1.BlkIOStat{}
var settings []blkioStatSettings
// Try to read CFQ stats available on all CFQ enabled kernels first
if _, err := os.Lstat(filepath.Join(b.Path(path), "blkio.io_serviced_recursive")); err == nil {
settings = []blkioStatSettings{
{
name: "sectors_recursive",
entry: &stats.Blkio.SectorsRecursive,
},
{
name: "io_service_bytes_recursive",
entry: &stats.Blkio.IoServiceBytesRecursive,
},
{
name: "io_serviced_recursive",
entry: &stats.Blkio.IoServicedRecursive,
},
{
name: "io_queued_recursive",
entry: &stats.Blkio.IoQueuedRecursive,
},
{
name: "io_service_time_recursive",
entry: &stats.Blkio.IoServiceTimeRecursive,
},
{
name: "io_wait_time_recursive",
entry: &stats.Blkio.IoWaitTimeRecursive,
},
{
name: "io_merged_recursive",
entry: &stats.Blkio.IoMergedRecursive,
},
{
name: "time_recursive",
entry: &stats.Blkio.IoTimeRecursive,
},
}
}
f, err := os.Open(filepath.Join(b.procRoot, "partitions"))
if err != nil {
return err
}
defer f.Close()
devices, err := getDevices(f)
if err != nil {
return err
}
var size int
for _, t := range settings {
if err := b.readEntry(devices, path, t.name, t.entry); err != nil {
return err
}
size += len(*t.entry)
}
if size > 0 {
return nil
}
// Even the kernel is compiled with the CFQ scheduler, the cgroup may not use
// block devices with the CFQ scheduler. If so, we should fallback to throttle.* files.
settings = []blkioStatSettings{
{
name: "throttle.io_serviced",
entry: &stats.Blkio.IoServicedRecursive,
},
{
name: "throttle.io_service_bytes",
entry: &stats.Blkio.IoServiceBytesRecursive,
},
}
for _, t := range settings {
if err := b.readEntry(devices, path, t.name, t.entry); err != nil {
return err
}
}
return nil
}
func (b *blkioController) readEntry(devices map[deviceKey]string, path, name string, entry *[]*v1.BlkIOEntry) error {
f, err := os.Open(filepath.Join(b.Path(path), "blkio."+name))
if err != nil {
return err
}
defer f.Close()
sc := bufio.NewScanner(f)
for sc.Scan() {
// format: dev type amount
fields := strings.FieldsFunc(sc.Text(), splitBlkIOStatLine)
if len(fields) < 3 {
if len(fields) == 2 && fields[0] == "Total" {
// skip total line
continue
} else {
return fmt.Errorf("invalid line found while parsing %s: %s", path, sc.Text())
}
}
major, err := strconv.ParseUint(fields[0], 10, 64)
if err != nil {
return err
}
minor, err := strconv.ParseUint(fields[1], 10, 64)
if err != nil {
return err
}
op := ""
valueField := 2
if len(fields) == 4 {
op = fields[2]
valueField = 3
}
v, err := strconv.ParseUint(fields[valueField], 10, 64)
if err != nil {
return err
}
*entry = append(*entry, &v1.BlkIOEntry{
Device: devices[deviceKey{major, minor}],
Major: major,
Minor: minor,
Op: op,
Value: v,
})
}
return sc.Err()
}
func createBlkioSettings(blkio *specs.LinuxBlockIO) []blkioSettings {
settings := []blkioSettings{}
if blkio.Weight != nil {
settings = append(settings,
blkioSettings{
name: "weight",
value: blkio.Weight,
format: uintf,
})
}
if blkio.LeafWeight != nil {
settings = append(settings,
blkioSettings{
name: "leaf_weight",
value: blkio.LeafWeight,
format: uintf,
})
}
for _, wd := range blkio.WeightDevice {
if wd.Weight != nil {
settings = append(settings,
blkioSettings{
name: "weight_device",
value: wd,
format: weightdev,
})
}
if wd.LeafWeight != nil {
settings = append(settings,
blkioSettings{
name: "leaf_weight_device",
value: wd,
format: weightleafdev,
})
}
}
for _, t := range []struct {
name string
list []specs.LinuxThrottleDevice
}{
{
name: "throttle.read_bps_device",
list: blkio.ThrottleReadBpsDevice,
},
{
name: "throttle.read_iops_device",
list: blkio.ThrottleReadIOPSDevice,
},
{
name: "throttle.write_bps_device",
list: blkio.ThrottleWriteBpsDevice,
},
{
name: "throttle.write_iops_device",
list: blkio.ThrottleWriteIOPSDevice,
},
} {
for _, td := range t.list {
settings = append(settings, blkioSettings{
name: t.name,
value: td,
format: throttleddev,
})
}
}
return settings
}
type blkioSettings struct {
name string
value interface{}
format func(v interface{}) []byte
}
type blkioStatSettings struct {
name string
entry *[]*v1.BlkIOEntry
}
func uintf(v interface{}) []byte {
return []byte(strconv.FormatUint(uint64(*v.(*uint16)), 10))
}
func weightdev(v interface{}) []byte {
wd := v.(specs.LinuxWeightDevice)
return []byte(fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, *wd.Weight))
}
func weightleafdev(v interface{}) []byte {
wd := v.(specs.LinuxWeightDevice)
return []byte(fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, *wd.LeafWeight))
}
func throttleddev(v interface{}) []byte {
td := v.(specs.LinuxThrottleDevice)
return []byte(fmt.Sprintf("%d:%d %d", td.Major, td.Minor, td.Rate))
}
func splitBlkIOStatLine(r rune) bool {
return r == ' ' || r == ':'
}
type deviceKey struct {
major, minor uint64
}
// getDevices makes a best effort attempt to read all the devices into a map
// keyed by major and minor number. Since devices may be mapped multiple times,
// we err on taking the first occurrence.
func getDevices(r io.Reader) (map[deviceKey]string, error) {
var (
s = bufio.NewScanner(r)
devices = make(map[deviceKey]string)
)
for i := 0; s.Scan(); i++ {
if i < 2 {
continue
}
fields := strings.Fields(s.Text())
major, err := strconv.Atoi(fields[0])
if err != nil {
return nil, err
}
minor, err := strconv.Atoi(fields[1])
if err != nil {
return nil, err
}
key := deviceKey{
major: uint64(major),
minor: uint64(minor),
}
if _, ok := devices[key]; ok {
continue
}
devices[key] = filepath.Join("/dev", fields[3])
}
return devices, s.Err()
}

View file

@ -1,571 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cgroup1
import (
"errors"
"fmt"
"io/fs"
"os"
"path/filepath"
"strconv"
"strings"
"sync"
"syscall"
"time"
v1 "github.com/containerd/cgroups/v3/cgroup1/stats"
"github.com/opencontainers/runtime-spec/specs-go"
)
// New returns a new control via the cgroup cgroups interface
func New(path Path, resources *specs.LinuxResources, opts ...InitOpts) (Cgroup, error) {
config := newInitConfig()
for _, o := range opts {
if err := o(config); err != nil {
return nil, err
}
}
subsystems, err := config.hiearchy()
if err != nil {
return nil, err
}
var active []Subsystem
for _, s := range subsystems {
// check if subsystem exists
if err := initializeSubsystem(s, path, resources); err != nil {
if err == ErrControllerNotActive {
if config.InitCheck != nil {
if skerr := config.InitCheck(s, path, err); skerr != nil {
if skerr != ErrIgnoreSubsystem {
return nil, skerr
}
}
}
continue
}
return nil, err
}
active = append(active, s)
}
return &cgroup{
path: path,
subsystems: active,
}, nil
}
// Load will load an existing cgroup and allow it to be controlled
// All static path should not include `/sys/fs/cgroup/` prefix, it should start with your own cgroups name
func Load(path Path, opts ...InitOpts) (Cgroup, error) {
config := newInitConfig()
for _, o := range opts {
if err := o(config); err != nil {
return nil, err
}
}
var activeSubsystems []Subsystem
subsystems, err := config.hiearchy()
if err != nil {
return nil, err
}
// check that the subsystems still exist, and keep only those that actually exist
for _, s := range pathers(subsystems) {
p, err := path(s.Name())
if err != nil {
if errors.Is(err, os.ErrNotExist) {
return nil, ErrCgroupDeleted
}
if err == ErrControllerNotActive {
if config.InitCheck != nil {
if skerr := config.InitCheck(s, path, err); skerr != nil {
if skerr != ErrIgnoreSubsystem {
return nil, skerr
}
}
}
continue
}
return nil, err
}
if _, err := os.Lstat(s.Path(p)); err != nil {
if os.IsNotExist(err) {
continue
}
return nil, err
}
activeSubsystems = append(activeSubsystems, s)
}
// if we do not have any active systems then the cgroup is deleted
if len(activeSubsystems) == 0 {
return nil, ErrCgroupDeleted
}
return &cgroup{
path: path,
subsystems: activeSubsystems,
}, nil
}
type cgroup struct {
path Path
subsystems []Subsystem
mu sync.Mutex
err error
}
// New returns a new sub cgroup
func (c *cgroup) New(name string, resources *specs.LinuxResources) (Cgroup, error) {
c.mu.Lock()
defer c.mu.Unlock()
if c.err != nil {
return nil, c.err
}
path := subPath(c.path, name)
for _, s := range c.subsystems {
if err := initializeSubsystem(s, path, resources); err != nil {
return nil, err
}
}
return &cgroup{
path: path,
subsystems: c.subsystems,
}, nil
}
// Subsystems returns all the subsystems that are currently being
// consumed by the group
func (c *cgroup) Subsystems() []Subsystem {
return c.subsystems
}
func (c *cgroup) subsystemsFilter(subsystems ...Name) []Subsystem {
if len(subsystems) == 0 {
return c.subsystems
}
var filteredSubsystems = []Subsystem{}
for _, s := range c.subsystems {
for _, f := range subsystems {
if s.Name() == f {
filteredSubsystems = append(filteredSubsystems, s)
break
}
}
}
return filteredSubsystems
}
// Add moves the provided process into the new cgroup.
// Without additional arguments, the process is added to all the cgroup subsystems.
// When giving Add a list of subsystem names, the process is only added to those
// subsystems, provided that they are active in the targeted cgroup.
func (c *cgroup) Add(process Process, subsystems ...Name) error {
return c.add(process, cgroupProcs, subsystems...)
}
// AddProc moves the provided process id into the new cgroup.
// Without additional arguments, the process with the given id is added to all
// the cgroup subsystems. When giving AddProc a list of subsystem names, the process
// id is only added to those subsystems, provided that they are active in the targeted
// cgroup.
func (c *cgroup) AddProc(pid uint64, subsystems ...Name) error {
return c.add(Process{Pid: int(pid)}, cgroupProcs, subsystems...)
}
// AddTask moves the provided tasks (threads) into the new cgroup.
// Without additional arguments, the task is added to all the cgroup subsystems.
// When giving AddTask a list of subsystem names, the task is only added to those
// subsystems, provided that they are active in the targeted cgroup.
func (c *cgroup) AddTask(process Process, subsystems ...Name) error {
return c.add(process, cgroupTasks, subsystems...)
}
// writeCgroupsProcs writes to the file, but retries on EINVAL.
func writeCgroupProcs(path string, content []byte, perm fs.FileMode) error {
f, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY, perm)
if err != nil {
return err
}
defer f.Close()
for i := 0; i < 5; i++ {
_, err = f.Write(content)
if err == nil {
return nil
}
// If the process's associated task's state is TASK_NEW, the kernel
// returns EINVAL. The function will retry on the error like runc.
// https://github.com/torvalds/linux/blob/v6.0/kernel/sched/core.c#L10308-L10337
// https://github.com/opencontainers/runc/pull/1950
if !errors.Is(err, syscall.EINVAL) {
return err
}
time.Sleep(30 * time.Millisecond)
}
return err
}
func (c *cgroup) add(process Process, pType procType, subsystems ...Name) error {
if process.Pid <= 0 {
return ErrInvalidPid
}
c.mu.Lock()
defer c.mu.Unlock()
if c.err != nil {
return c.err
}
for _, s := range pathers(c.subsystemsFilter(subsystems...)) {
p, err := c.path(s.Name())
if err != nil {
return err
}
err = writeCgroupProcs(
filepath.Join(s.Path(p), pType),
[]byte(strconv.Itoa(process.Pid)),
defaultFilePerm,
)
if err != nil {
return err
}
}
return nil
}
// Delete will remove the control group from each of the subsystems registered
func (c *cgroup) Delete() error {
c.mu.Lock()
defer c.mu.Unlock()
if c.err != nil {
return c.err
}
var errs []string
for _, s := range c.subsystems {
// kernel prevents cgroups with running process from being removed, check the tree is empty
procs, err := c.processes(s.Name(), true, cgroupProcs)
if err != nil {
return err
}
if len(procs) > 0 {
errs = append(errs, fmt.Sprintf("%s (contains running processes)", string(s.Name())))
continue
}
if d, ok := s.(deleter); ok {
sp, err := c.path(s.Name())
if err != nil {
return err
}
if err := d.Delete(sp); err != nil {
errs = append(errs, string(s.Name()))
}
continue
}
if p, ok := s.(pather); ok {
sp, err := c.path(s.Name())
if err != nil {
return err
}
path := p.Path(sp)
if err := remove(path); err != nil {
errs = append(errs, path)
}
continue
}
}
if len(errs) > 0 {
return fmt.Errorf("cgroups: unable to remove paths %s", strings.Join(errs, ", "))
}
c.err = ErrCgroupDeleted
return nil
}
// Stat returns the current metrics for the cgroup
func (c *cgroup) Stat(handlers ...ErrorHandler) (*v1.Metrics, error) {
c.mu.Lock()
defer c.mu.Unlock()
if c.err != nil {
return nil, c.err
}
if len(handlers) == 0 {
handlers = append(handlers, errPassthrough)
}
var (
stats = &v1.Metrics{
CPU: &v1.CPUStat{
Throttling: &v1.Throttle{},
Usage: &v1.CPUUsage{},
},
}
wg = &sync.WaitGroup{}
errs = make(chan error, len(c.subsystems))
)
for _, s := range c.subsystems {
if ss, ok := s.(stater); ok {
sp, err := c.path(s.Name())
if err != nil {
return nil, err
}
wg.Add(1)
go func() {
defer wg.Done()
if err := ss.Stat(sp, stats); err != nil {
for _, eh := range handlers {
if herr := eh(err); herr != nil {
errs <- herr
}
}
}
}()
}
}
wg.Wait()
close(errs)
for err := range errs {
return nil, err
}
return stats, nil
}
// Update updates the cgroup with the new resource values provided
//
// Be prepared to handle EBUSY when trying to update a cgroup with
// live processes and other operations like Stats being performed at the
// same time
func (c *cgroup) Update(resources *specs.LinuxResources) error {
c.mu.Lock()
defer c.mu.Unlock()
if c.err != nil {
return c.err
}
for _, s := range c.subsystems {
if u, ok := s.(updater); ok {
sp, err := c.path(s.Name())
if err != nil {
return err
}
if err := u.Update(sp, resources); err != nil {
return err
}
}
}
return nil
}
// Processes returns the processes running inside the cgroup along
// with the subsystem used, pid, and path
func (c *cgroup) Processes(subsystem Name, recursive bool) ([]Process, error) {
c.mu.Lock()
defer c.mu.Unlock()
if c.err != nil {
return nil, c.err
}
return c.processes(subsystem, recursive, cgroupProcs)
}
// Tasks returns the tasks running inside the cgroup along
// with the subsystem used, pid, and path
func (c *cgroup) Tasks(subsystem Name, recursive bool) ([]Task, error) {
c.mu.Lock()
defer c.mu.Unlock()
if c.err != nil {
return nil, c.err
}
return c.processes(subsystem, recursive, cgroupTasks)
}
func (c *cgroup) processes(subsystem Name, recursive bool, pType procType) ([]Process, error) {
s := c.getSubsystem(subsystem)
sp, err := c.path(subsystem)
if err != nil {
return nil, err
}
if s == nil {
return nil, fmt.Errorf("cgroups: %s doesn't exist in %s subsystem", sp, subsystem)
}
path := s.(pather).Path(sp)
var processes []Process
err = filepath.Walk(path, func(p string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if !recursive && info.IsDir() {
if p == path {
return nil
}
return filepath.SkipDir
}
dir, name := filepath.Split(p)
if name != pType {
return nil
}
procs, err := readPids(dir, subsystem, pType)
if err != nil {
return err
}
processes = append(processes, procs...)
return nil
})
return processes, err
}
// Freeze freezes the entire cgroup and all the processes inside it
func (c *cgroup) Freeze() error {
c.mu.Lock()
defer c.mu.Unlock()
if c.err != nil {
return c.err
}
s := c.getSubsystem(Freezer)
if s == nil {
return ErrFreezerNotSupported
}
sp, err := c.path(Freezer)
if err != nil {
return err
}
return s.(*freezerController).Freeze(sp)
}
// Thaw thaws out the cgroup and all the processes inside it
func (c *cgroup) Thaw() error {
c.mu.Lock()
defer c.mu.Unlock()
if c.err != nil {
return c.err
}
s := c.getSubsystem(Freezer)
if s == nil {
return ErrFreezerNotSupported
}
sp, err := c.path(Freezer)
if err != nil {
return err
}
return s.(*freezerController).Thaw(sp)
}
// OOMEventFD returns the memory cgroup's out of memory event fd that triggers
// when processes inside the cgroup receive an oom event. Returns
// ErrMemoryNotSupported if memory cgroups is not supported.
func (c *cgroup) OOMEventFD() (uintptr, error) {
c.mu.Lock()
defer c.mu.Unlock()
if c.err != nil {
return 0, c.err
}
s := c.getSubsystem(Memory)
if s == nil {
return 0, ErrMemoryNotSupported
}
sp, err := c.path(Memory)
if err != nil {
return 0, err
}
return s.(*memoryController).memoryEvent(sp, OOMEvent())
}
// RegisterMemoryEvent allows the ability to register for all v1 memory cgroups
// notifications.
func (c *cgroup) RegisterMemoryEvent(event MemoryEvent) (uintptr, error) {
c.mu.Lock()
defer c.mu.Unlock()
if c.err != nil {
return 0, c.err
}
s := c.getSubsystem(Memory)
if s == nil {
return 0, ErrMemoryNotSupported
}
sp, err := c.path(Memory)
if err != nil {
return 0, err
}
return s.(*memoryController).memoryEvent(sp, event)
}
// State returns the state of the cgroup and its processes
func (c *cgroup) State() State {
c.mu.Lock()
defer c.mu.Unlock()
c.checkExists()
if c.err != nil && c.err == ErrCgroupDeleted {
return Deleted
}
s := c.getSubsystem(Freezer)
if s == nil {
return Thawed
}
sp, err := c.path(Freezer)
if err != nil {
return Unknown
}
state, err := s.(*freezerController).state(sp)
if err != nil {
return Unknown
}
return state
}
// MoveTo does a recursive move subsystem by subsystem of all the processes
// inside the group
func (c *cgroup) MoveTo(destination Cgroup) error {
c.mu.Lock()
defer c.mu.Unlock()
if c.err != nil {
return c.err
}
for _, s := range c.subsystems {
processes, err := c.processes(s.Name(), true, cgroupProcs)
if err != nil {
return err
}
for _, p := range processes {
if err := destination.Add(p); err != nil {
if strings.Contains(err.Error(), "no such process") {
continue
}
return err
}
}
}
return nil
}
func (c *cgroup) getSubsystem(n Name) Subsystem {
for _, s := range c.subsystems {
if s.Name() == n {
return s
}
}
return nil
}
func (c *cgroup) checkExists() {
for _, s := range pathers(c.subsystems) {
p, err := c.path(s.Name())
if err != nil {
return
}
if _, err := os.Lstat(s.Path(p)); err != nil {
if os.IsNotExist(err) {
c.err = ErrCgroupDeleted
return
}
}
}
}

View file

@ -1,99 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cgroup1
import (
"os"
v1 "github.com/containerd/cgroups/v3/cgroup1/stats"
specs "github.com/opencontainers/runtime-spec/specs-go"
)
type procType = string
const (
cgroupProcs procType = "cgroup.procs"
cgroupTasks procType = "tasks"
defaultDirPerm = 0755
)
// defaultFilePerm is a var so that the test framework can change the filemode
// of all files created when the tests are running. The difference between the
// tests and real world use is that files like "cgroup.procs" will exist when writing
// to a read cgroup filesystem and do not exist prior when running in the tests.
// this is set to a non 0 value in the test code
var defaultFilePerm = os.FileMode(0)
type Process struct {
// Subsystem is the name of the subsystem that the process / task is in.
Subsystem Name
// Pid is the process id of the process / task.
Pid int
// Path is the full path of the subsystem and location that the process / task is in.
Path string
}
type Task = Process
// Cgroup handles interactions with the individual groups to perform
// actions on them as them main interface to this cgroup package
type Cgroup interface {
// New creates a new cgroup under the calling cgroup
New(string, *specs.LinuxResources) (Cgroup, error)
// Add adds a process to the cgroup (cgroup.procs). Without additional arguments,
// the process is added to all the cgroup subsystems. When giving Add a list of
// subsystem names, the process is only added to those subsystems, provided that
// they are active in the targeted cgroup.
Add(Process, ...Name) error
// AddProc adds the process with the given id to the cgroup (cgroup.procs).
// Without additional arguments, the process with the given id is added to all
// the cgroup subsystems. When giving AddProc a list of subsystem names, the process
// id is only added to those subsystems, provided that they are active in the targeted
// cgroup.
AddProc(uint64, ...Name) error
// AddTask adds a process to the cgroup (tasks). Without additional arguments, the
// task is added to all the cgroup subsystems. When giving AddTask a list of subsystem
// names, the task is only added to those subsystems, provided that they are active in
// the targeted cgroup.
AddTask(Process, ...Name) error
// Delete removes the cgroup as a whole
Delete() error
// MoveTo moves all the processes under the calling cgroup to the provided one
// subsystems are moved one at a time
MoveTo(Cgroup) error
// Stat returns the stats for all subsystems in the cgroup
Stat(...ErrorHandler) (*v1.Metrics, error)
// Update updates all the subsystems with the provided resource changes
Update(resources *specs.LinuxResources) error
// Processes returns all the processes in a select subsystem for the cgroup
Processes(Name, bool) ([]Process, error)
// Tasks returns all the tasks in a select subsystem for the cgroup
Tasks(Name, bool) ([]Task, error)
// Freeze freezes or pauses all processes inside the cgroup
Freeze() error
// Thaw thaw or resumes all processes inside the cgroup
Thaw() error
// OOMEventFD returns the memory subsystem's event fd for OOM events
OOMEventFD() (uintptr, error)
// RegisterMemoryEvent returns the memory subsystems event fd for whatever memory event was
// registered for. Can alternatively register for the oom event with this method.
RegisterMemoryEvent(MemoryEvent) (uintptr, error)
// State returns the cgroups current state
State() State
// Subsystems returns all the subsystems in the cgroup
Subsystems() []Subsystem
}

View file

@ -1,125 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cgroup1
import (
"bufio"
"os"
"path/filepath"
"strconv"
v1 "github.com/containerd/cgroups/v3/cgroup1/stats"
specs "github.com/opencontainers/runtime-spec/specs-go"
)
func NewCpu(root string) *cpuController {
return &cpuController{
root: filepath.Join(root, string(Cpu)),
}
}
type cpuController struct {
root string
}
func (c *cpuController) Name() Name {
return Cpu
}
func (c *cpuController) Path(path string) string {
return filepath.Join(c.root, path)
}
func (c *cpuController) Create(path string, resources *specs.LinuxResources) error {
if err := os.MkdirAll(c.Path(path), defaultDirPerm); err != nil {
return err
}
if cpu := resources.CPU; cpu != nil {
for _, t := range []struct {
name string
ivalue *int64
uvalue *uint64
}{
{
name: "rt_period_us",
uvalue: cpu.RealtimePeriod,
},
{
name: "rt_runtime_us",
ivalue: cpu.RealtimeRuntime,
},
{
name: "shares",
uvalue: cpu.Shares,
},
{
name: "cfs_period_us",
uvalue: cpu.Period,
},
{
name: "cfs_quota_us",
ivalue: cpu.Quota,
},
} {
var value []byte
if t.uvalue != nil {
value = []byte(strconv.FormatUint(*t.uvalue, 10))
} else if t.ivalue != nil {
value = []byte(strconv.FormatInt(*t.ivalue, 10))
}
if value != nil {
if err := os.WriteFile(
filepath.Join(c.Path(path), "cpu."+t.name),
value,
defaultFilePerm,
); err != nil {
return err
}
}
}
}
return nil
}
func (c *cpuController) Update(path string, resources *specs.LinuxResources) error {
return c.Create(path, resources)
}
func (c *cpuController) Stat(path string, stats *v1.Metrics) error {
f, err := os.Open(filepath.Join(c.Path(path), "cpu.stat"))
if err != nil {
return err
}
defer f.Close()
// get or create the cpu field because cpuacct can also set values on this struct
sc := bufio.NewScanner(f)
for sc.Scan() {
key, v, err := parseKV(sc.Text())
if err != nil {
return err
}
switch key {
case "nr_periods":
stats.CPU.Throttling.Periods = v
case "nr_throttled":
stats.CPU.Throttling.ThrottledPeriods = v
case "throttled_time":
stats.CPU.Throttling.ThrottledTime = v
}
}
return sc.Err()
}

Some files were not shown because too many files have changed in this diff Show more