From af685b3294375154cab93f51fc35717dacbb1dd3 Mon Sep 17 00:00:00 2001 From: Keisuke Umegaki <41987730+keisku@users.noreply.github.com> Date: Sun, 19 May 2024 23:08:48 +0900 Subject: [PATCH] Use BPF ringbuf to send collected data to user-space (#10) * use ring buffer for data sharing * require kernel version >= 5.8 * improve error handling --- README.md | 1 + ebpf/bpf_x86_bpfel.go | 24 ++----- ebpf/c/gmon.c | 39 ++++++----- ebpf/c/maps.h | 26 ++----- ebpf/event_handler.go | 158 +++++++++++++----------------------------- ebpf/gmon.go | 9 ++- gmon.sh | 10 +++ go.mod | 1 + go.sum | 2 + 9 files changed, 107 insertions(+), 163 deletions(-) diff --git a/README.md b/README.md index 8b901c3..7bc554d 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@ # Prerequisites - amd64 (x86_64) +- Linux Kernel 5.8+ since `gmon` uses [BPF ring buffer](https://nakryiko.com/posts/bpf-ringbuf/) # Usage diff --git a/ebpf/bpf_x86_bpfel.go b/ebpf/bpf_x86_bpfel.go index 00f1d06..700fc3c 100644 --- a/ebpf/bpf_x86_bpfel.go +++ b/ebpf/bpf_x86_bpfel.go @@ -12,18 +12,11 @@ import ( "github.com/cilium/ebpf" ) -type bpfGoexit1Event struct{ StackId int32 } - -type bpfGoexit1EventKey struct { - GoroutineId int64 - Ktime uint64 -} - -type bpfNewproc1Event struct{ StackId int32 } - -type bpfNewproc1EventKey struct { +type bpfEvent struct { GoroutineId int64 - Ktime uint64 + StackId int32 + Exit bool + _ [3]byte } type bpfStackTraceT [20]uint64 @@ -77,8 +70,7 @@ type bpfProgramSpecs struct { // // It can be passed ebpf.CollectionSpec.Assign. type bpfMapSpecs struct { - Goexit1Events *ebpf.MapSpec `ebpf:"goexit1_events"` - Newproc1Events *ebpf.MapSpec `ebpf:"newproc1_events"` + Events *ebpf.MapSpec `ebpf:"events"` StackAddresses *ebpf.MapSpec `ebpf:"stack_addresses"` } @@ -101,15 +93,13 @@ func (o *bpfObjects) Close() error { // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfMaps struct { - Goexit1Events *ebpf.Map `ebpf:"goexit1_events"` - Newproc1Events *ebpf.Map `ebpf:"newproc1_events"` + Events *ebpf.Map `ebpf:"events"` StackAddresses *ebpf.Map `ebpf:"stack_addresses"` } func (m *bpfMaps) Close() error { return _BpfClose( - m.Goexit1Events, - m.Newproc1Events, + m.Events, m.StackAddresses, ) } diff --git a/ebpf/c/gmon.c b/ebpf/c/gmon.c index accb178..7f68569 100644 --- a/ebpf/c/gmon.c +++ b/ebpf/c/gmon.c @@ -39,14 +39,18 @@ int runtime_newproc1(struct pt_regs *ctx) { bpf_printk("%s:%d | failed to read stackid\n", __FILE__, __LINE__); return 0; } - struct newproc1_event_key key = { - .goroutine_id = goid, - .ktime = bpf_ktime_get_ns(), - }; - struct newproc1_event event = { - .stack_id = stack_id, - }; - bpf_map_update_elem(&newproc1_events, &key, &event, BPF_ANY); + + struct event *ev; + ev = bpf_ringbuf_reserve(&events, sizeof(*ev), 0); + if (!ev) { + bpf_printk("%s:%d | failed to reserve ringbuf\n", __FILE__, __LINE__); + return 0; + } + ev->goroutine_id = goid; + ev->stack_id = stack_id; + ev->exit = false; + bpf_ringbuf_submit(ev, 0); + return 0; } @@ -65,14 +69,17 @@ int runtime_goexit1(struct pt_regs *ctx) { return 0; } - struct goexit1_event_key key = { - .goroutine_id = go_id, - .ktime = bpf_ktime_get_ns(), - }; - struct goexit1_event event = { - .stack_id = stack_id, - }; - bpf_map_update_elem(&goexit1_events, &key, &event, BPF_ANY); + struct event *ev; + ev = bpf_ringbuf_reserve(&events, sizeof(*ev), 0); + if (!ev) { + bpf_printk("%s:%d | failed to reserve ringbuf\n", __FILE__, __LINE__); + return 0; + } + ev->goroutine_id = go_id; + ev->stack_id = stack_id; + ev->exit = true; + bpf_ringbuf_submit(ev, 0); + return 0; } diff --git a/ebpf/c/maps.h b/ebpf/c/maps.h index eb7c58e..c9efe8c 100644 --- a/ebpf/c/maps.h +++ b/ebpf/c/maps.h @@ -16,9 +16,6 @@ __type(value, _value_type); \ } _name SEC(".maps"); -#define BPF_HASH(_name, _key_type, _value_type, _max_entries) \ - BPF_MAP(_name, BPF_MAP_TYPE_HASH, _key_type, _value_type, _max_entries) - // stack traces: the value is 1 big byte array of the stack addresses typedef __u64 stack_trace_t[MAX_STACK_DEPTH]; #define BPF_STACK_TRACE(_name, _max_entries) \ @@ -26,26 +23,17 @@ typedef __u64 stack_trace_t[MAX_STACK_DEPTH]; BPF_STACK_TRACE(stack_addresses, MAX_STACK_ADDRESSES); // store stack traces -struct newproc1_event_key { - int64_t goroutine_id; - uint64_t ktime; // To make this struct unique -}; +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 1 << 24); +} events SEC(".maps"); -struct newproc1_event { - int stack_id; -}; - -BPF_HASH(newproc1_events, struct newproc1_event_key, struct newproc1_event, 10240); - -struct goexit1_event_key { +struct event { int64_t goroutine_id; - uint64_t ktime; // To make this struct unique -}; - -struct goexit1_event { int stack_id; + bool exit; }; -BPF_HASH(goexit1_events, struct goexit1_event_key, struct goexit1_event, 10240); +struct event *unused __attribute__((unused)); #endif /* __MAPS_H__ */ diff --git a/ebpf/event_handler.go b/ebpf/event_handler.go index 08d6662..15e2355 100644 --- a/ebpf/event_handler.go +++ b/ebpf/event_handler.go @@ -1,16 +1,19 @@ package ebpf import ( + "bytes" "context" "encoding/binary" + "errors" "fmt" "log/slog" "runtime/trace" "strconv" "time" - "github.com/cilium/ebpf" + "github.com/cilium/ebpf/ringbuf" "github.com/go-delve/delve/pkg/proc" + lru "github.com/hashicorp/golang-lru/v2" "github.com/keisku/gmon/bininfo" ) @@ -18,39 +21,64 @@ type eventHandler struct { goroutineQueue chan<- goroutine objs *bpfObjects biTranslator bininfo.Translator + reader *ringbuf.Reader } func (h *eventHandler) run(ctx context.Context) { - ticker := time.NewTicker(200 * time.Millisecond) + var event bpfEvent + stackIdCache, _ := lru.New[int32, struct{}](16) for { - select { - case <-ctx.Done(): - ticker.Stop() - return - case <-ticker.C: - ctx, task := trace.NewTask(ctx, "event_handler.handle") - trace.WithRegion(ctx, "event_handler.handle_newproc1", func() { - if err := h.handleNewproc1(); err != nil { - slog.Warn("Failed to handle newproc1", slog.Any("error", err)) - } - }) - trace.WithRegion(ctx, "event_handler.handle_goexit1", func() { - if err := h.handleGoexit1(); err != nil { - slog.Warn("Failed to handle goexit1", slog.Any("error", err)) - } - }) - task.End() + if err := h.readRecord(ctx, &event); err != nil { + if errors.Is(err, ringbuf.ErrClosed) { + slog.Debug("ring buffer is closed") + return + } + slog.Warn("Failed to read bpf ring buffer", slog.Any("error", err)) + continue + } + stack, err := h.lookupStack(ctx, event.StackId) + if err != nil { + slog.Warn(err.Error()) + continue + } + h.sendGoroutine(goroutine{ + Id: event.GoroutineId, + ObservedAt: time.Now(), + Stack: stack, + Exit: event.Exit, + }) + contains, _ := stackIdCache.ContainsOrAdd(event.StackId, struct{}{}) + if !contains { + slog.Debug("delete stack_addresses", slog.Int("stack_id", int(event.StackId))) + if err := h.objs.StackAddresses.Delete(event.StackId); err != nil { + slog.Debug("Failed to delete stack_addresses", slog.Any("error", err)) + } } } } +func (h *eventHandler) readRecord(ctx context.Context, event *bpfEvent) error { + _, task := trace.NewTask(ctx, "event_handler.read_ring_buffer") + defer task.End() + record, err := h.reader.Read() + if err != nil { + return err + } + if err := binary.Read(bytes.NewBuffer(record.RawSample), binary.LittleEndian, event); err != nil { + return fmt.Errorf("decode ring buffer record: %w", err) + } + return nil +} + // lookupStack is a copy of the function in tracee. // https://github.com/aquasecurity/tracee/blob/f61866b4e2277d2a7dddc6cd77a67cd5a5da3b14/pkg/ebpf/events_pipeline.go#L642-L681 const maxStackDepth = 20 var stackFrameSize = (strconv.IntSize / 8) -func (h *eventHandler) lookupStack(stackId int32) ([]*proc.Function, error) { +func (h *eventHandler) lookupStack(ctx context.Context, stackId int32) ([]*proc.Function, error) { + _, task := trace.NewTask(ctx, "event_handler.lookup_stack") + defer task.End() stackBytes, err := h.objs.StackAddresses.LookupBytes(stackId) if err != nil { return nil, fmt.Errorf("failed to lookup stack addresses: %w", err) @@ -83,38 +111,6 @@ func (h *eventHandler) lookupStack(stackId int32) ([]*proc.Function, error) { return stack[0:stackCounter], nil } -func (h *eventHandler) handle( - stackAddrs, eventMap *ebpf.Map, - // stackIdSet is the set of stack_id to delete later. - // keysToDelete is the slice of eBPF map keys to delete later. - // keyLength holds the count of keys in keysToDelete to determine if BatchDelete is required. - processMap func(iter *ebpf.MapIterator, stackIdSet map[int32]struct{}) (keysToDelete any, keyLength int), -) error { - stackIdSetToDelete := make(map[int32]struct{}) - mapIter := eventMap.Iterate() - keysToDelete, keyLength := processMap(mapIter, stackIdSetToDelete) - if err := mapIter.Err(); err != nil { - return fmt.Errorf("failed to iterate eBPF map: %w", err) - } - if 0 < keyLength { - if n, err := eventMap.BatchDelete(keysToDelete, nil); err == nil { - slog.Debug("Deleted eBPF map", slog.Int("deleted", n), slog.Int("expected", keyLength)) - } else { - slog.Warn("Failed to delete eBPF map", slog.Any("err", err)) - } - } - // Don't use BatchDelete for stack addresses because the opration is not supported. - // If we do it, we will see "batch delete: not supported" error. - for stackId := range stackIdSetToDelete { - if err := stackAddrs.Delete(stackId); err != nil { - slog.Debug("Failed to delete stack_addresses", slog.Any("error", err)) - continue - } - slog.Debug("Deleted stack address map", slog.Int("stack_id", int(stackId))) - } - return nil -} - func (h *eventHandler) sendGoroutine(g goroutine) { maxRetries := 3 retryInterval := 10 * time.Millisecond @@ -145,61 +141,3 @@ func (h *eventHandler) sendGoroutine(g goroutine) { } } } - -func (h *eventHandler) handleNewproc1() error { - var key bpfNewproc1EventKey - var value bpfNewproc1Event - var keysToDelete []bpfNewproc1EventKey - - return h.handle( - h.objs.StackAddresses, - h.objs.Newproc1Events, - func(mapIter *ebpf.MapIterator, stackIdSet map[int32]struct{}) (any, int) { - for mapIter.Next(&key, &value) { - stack, err := h.lookupStack(value.StackId) - if err != nil { - slog.Warn(err.Error()) - continue - } - stackIdSet[value.StackId] = struct{}{} - keysToDelete = append(keysToDelete, key) - h.sendGoroutine(goroutine{ - Id: key.GoroutineId, - ObservedAt: time.Now(), - Stack: stack, - Exit: false, - }) - } - return keysToDelete, len(keysToDelete) - }, - ) -} - -func (h *eventHandler) handleGoexit1() error { - var key bpfGoexit1EventKey - var value bpfGoexit1Event - var keysToDelete []bpfGoexit1EventKey - - return h.handle( - h.objs.StackAddresses, - h.objs.Goexit1Events, - func(mapIter *ebpf.MapIterator, stackIdSet map[int32]struct{}) (any, int) { - for mapIter.Next(&key, &value) { - stack, err := h.lookupStack(value.StackId) - if err != nil { - slog.Warn(err.Error()) - continue - } - stackIdSet[value.StackId] = struct{}{} - keysToDelete = append(keysToDelete, key) - h.sendGoroutine(goroutine{ - Id: key.GoroutineId, - ObservedAt: time.Now(), - Stack: stack, - Exit: false, - }) - } - return keysToDelete, len(keysToDelete) - }, - ) -} diff --git a/ebpf/gmon.go b/ebpf/gmon.go index 4dc860d..9bac72e 100644 --- a/ebpf/gmon.go +++ b/ebpf/gmon.go @@ -8,11 +8,12 @@ import ( "github.com/cilium/ebpf" "github.com/cilium/ebpf/link" + "github.com/cilium/ebpf/ringbuf" "github.com/keisku/gmon/bininfo" ) // $BPF_CLANG and $BPF_CFLAGS are set by the Makefile. -//go:generate go run github.com/cilium/ebpf/cmd/bpf2go -cc $BPF_CLANG -target amd64 -cflags $BPF_CFLAGS bpf ./c/gmon.c -- -I./c +//go:generate go run github.com/cilium/ebpf/cmd/bpf2go -type event -cc $BPF_CLANG -target amd64 -cflags $BPF_CFLAGS bpf ./c/gmon.c -- -I./c func Run(ctx context.Context, config Config) (func(), error) { slog.Debug("eBPF programs start with config", slog.String("config", config.String())) @@ -51,11 +52,16 @@ func Run(ctx context.Context, config Config) (func(), error) { if err != nil { return func() {}, err } + ringbufReader, err := ringbuf.NewReader(objs.Events) + if err != nil { + return func() {}, err + } goroutineQueue := make(chan goroutine, 100) eventhandler := &eventHandler{ goroutineQueue: goroutineQueue, objs: &objs, biTranslator: biTranslator, + reader: ringbufReader, } reporter := &reporter{ goroutineQueue: goroutineQueue, @@ -63,6 +69,7 @@ func Run(ctx context.Context, config Config) (func(), error) { go reporter.run(ctx) go eventhandler.run(ctx) return func() { + ringbufReader.Close() for i := range links { if err := links[i].Close(); err != nil { slog.Warn("Failed to close link", slog.Any("error", err)) diff --git a/gmon.sh b/gmon.sh index ae0dae9..b074ce6 100755 --- a/gmon.sh +++ b/gmon.sh @@ -8,6 +8,16 @@ if [ "$arch" != "x86_64" ]; then exit 1 fi +kernel_version=$(uname -r) +major_version=$(echo $kernel_version | cut -d. -f1) +minor_version=$(echo $kernel_version | cut -d. -f2) +if [ "$major_version" -gt 5 ] || ([ "$major_version" -eq 5 ] && [ "$minor_version" -ge 8 ]); then + echo "Your kernel version is $kernel_version" +else + echo "Your kernel version should be >= 5.8, got $kernel_version" + exit 1 +fi + if [ "$1" = "build" ] || [ "$1" = "install" ] || [ "$1" = "test" ] || [ "$1" = "format" ]; then echo "Running $1 on $arch" else diff --git a/go.mod b/go.mod index c97b263..a5a839a 100644 --- a/go.mod +++ b/go.mod @@ -5,6 +5,7 @@ go 1.22.1 require ( github.com/cilium/ebpf v0.14.0 github.com/go-delve/delve v1.22.1 + github.com/hashicorp/golang-lru/v2 v2.0.7 github.com/prometheus/client_golang v1.19.0 github.com/prometheus/client_model v0.6.1 github.com/prometheus/common v0.52.3 diff --git a/go.sum b/go.sum index 1e2b519..a3d5a0d 100644 --- a/go.sum +++ b/go.sum @@ -16,6 +16,8 @@ github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/hashicorp/golang-lru v1.0.2 h1:dV3g9Z/unq5DpblPpw+Oqcv4dU/1omnb4Ok8iPY6p1c= github.com/hashicorp/golang-lru v1.0.2/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4= +github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= +github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=