-
Notifications
You must be signed in to change notification settings - Fork 78
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Instrument gRPC server span status codes #1127
base: main
Are you sure you want to change the base?
Changes from all commits
5910feb
4bbe040
8deb716
d45da58
02c614e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,7 @@ struct grpc_request_t | |
{ | ||
BASE_SPAN_PROPERTIES | ||
char method[MAX_SIZE]; | ||
u32 status_code; | ||
}; | ||
|
||
struct | ||
|
@@ -59,6 +60,8 @@ volatile const u64 frame_stream_id_pod; | |
volatile const u64 stream_id_pos; | ||
volatile const u64 stream_ctx_pos; | ||
volatile const bool is_new_frame_pos; | ||
volatile const u64 status_s_pos; | ||
volatile const u64 status_code_pos; | ||
|
||
static __always_inline long dummy_extract_span_context_from_headers(void *stream_id, struct span_context *parent_span_context) { | ||
return 0; | ||
|
@@ -113,14 +116,14 @@ int uprobe_server_handleStream(struct pt_regs *ctx) | |
if (!get_go_string_from_user_ptr((void *)(stream_ptr + stream_method_ptr_pos), grpcReq->method, sizeof(grpcReq->method))) | ||
{ | ||
bpf_printk("Failed to read gRPC method from stream"); | ||
goto done; | ||
bpf_map_delete_elem(&streamid_to_grpc_events, &stream_id); | ||
return 0; | ||
} | ||
|
||
// Write event | ||
bpf_map_update_elem(&grpc_events, &key, grpcReq, 0); | ||
start_tracking_span(go_context.data, &grpcReq->sc); | ||
done: | ||
bpf_map_delete_elem(&streamid_to_grpc_events, &stream_id); | ||
|
||
return 0; | ||
} | ||
|
||
|
@@ -167,3 +170,65 @@ int uprobe_http2Server_operateHeader(struct pt_regs *ctx) | |
|
||
return 0; | ||
} | ||
|
||
// func (ht *http2Server) WriteStatus(s *Stream, st *status.Status) | ||
// https://github.com/grpc/grpc-go/blob/bcf9171a20e44ed81a6eb152e3ca9e35b2c02c5d/internal/transport/http2_server.go#L1049 | ||
SEC("uprobe/http2Server_WriteStatus") | ||
int uprobe_http2Server_WriteStatus(struct pt_regs *ctx) { | ||
struct go_iface go_context = {0}; | ||
get_Go_context(ctx, 2, stream_ctx_pos, true, &go_context); | ||
void *key = get_consistent_key(ctx, go_context.data); | ||
|
||
// Get parent context if exists | ||
void *stream_ptr = get_argument(ctx, 2); | ||
u32 stream_id = 0; | ||
bpf_probe_read(&stream_id, sizeof(stream_id), (void *)(stream_ptr + stream_id_pos)); | ||
struct grpc_request_t *grpcReq = bpf_map_lookup_elem(&streamid_to_grpc_events, &stream_id); | ||
if (grpcReq == NULL) { | ||
// No parent span context, generate new span context | ||
u32 zero = 0; | ||
grpcReq = bpf_map_lookup_elem(&grpc_storage_map, &zero); | ||
if (grpcReq == NULL) { | ||
bpf_printk("failed to get grpcReq from storage map"); | ||
return -1; | ||
} | ||
} | ||
|
||
void *grpcReq_event_ptr = bpf_map_lookup_elem(&grpc_events, &key); | ||
// if grpcReq_event is null, then handleStream probe didn't run. Try starting a new span here | ||
if (grpcReq_event_ptr == NULL) | ||
{ | ||
grpcReq->start_time = bpf_ktime_get_ns(); | ||
|
||
start_span_params_t start_span_params = { | ||
.ctx = ctx, | ||
.sc = &grpcReq->sc, | ||
.psc = &grpcReq->psc, | ||
.go_context = &go_context, | ||
// The parent span context is set by operateHeader probe | ||
.get_parent_span_context_fn = dummy_extract_span_context_from_headers, | ||
.get_parent_span_context_arg = NULL, | ||
}; | ||
start_span(&start_span_params); | ||
|
||
// Set attributes | ||
if (!get_go_string_from_user_ptr((void *)(stream_ptr + stream_method_ptr_pos), grpcReq->method, sizeof(grpcReq->method))) | ||
{ | ||
bpf_printk("Failed to read gRPC method from stream"); | ||
bpf_map_delete_elem(&streamid_to_grpc_events, &stream_id); | ||
return 0; | ||
damemi marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
} | ||
|
||
void *status_ptr = get_argument(ctx, 3); | ||
void *s_ptr = 0; | ||
bpf_probe_read_user(&s_ptr, sizeof(s_ptr), (void *)(status_ptr + status_s_pos)); | ||
// Get status code from Status.s pointer | ||
bpf_probe_read_user(&grpcReq->status_code, sizeof(grpcReq->status_code), (void *)(s_ptr + status_code_pos)); | ||
|
||
bpf_map_update_elem(&grpc_events, &key, grpcReq, 0); | ||
bpf_map_delete_elem(&streamid_to_grpc_events, &stream_id); | ||
return 0; | ||
} | ||
|
||
UPROBE_RETURN(http2Server_WriteStatus, struct grpc_request_t, grpc_events, events, 2, stream_ctx_pos, false) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not sure about this return probe, since we already have |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it might be better to look here at the
grpc_events
map.If I understand this correctly, this probe should be called between the entry probe of
handleStream
(where an entry is created for the stream in thegrpc_events
map) and the return probe ofhandleStream
.If we can't find an entry it means the entry probe didn't run properly or didn't run at all, hence in that case we won't have a valid start time. WDYT?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's a good question. If
handleStream
isn't called (say the server hits an error before that) then we might still want to record the status fromWriteStatus
. Maybe in that case, this probe should check if an event already exists ingrpc_events
and if not start a new span with the status?It seems like
WriteStatus
might be used in places besideshandleStream
(SendMsg and RecvMsg are 2 instances I found). Do you think these are relevant? If not then I agree with your suggestionThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think that depends on the RPC life cycle the application is using.
The creation of a new stream can be called on the initialization phase of the instrumented process, that is also a case where we'll miss the entry probe of
handleStream
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That sounds like a case where we would want to create a new trace in the WriteStatus probe, is that what you mean?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure. We won't have the RPC method in that case
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
WriteStatus does have the same
Stream
argument that is passed tohandleStream
, so we could try to get the RPC method from there. I just pushed an update that does that, ptal. I think this would be neat to cover the cases where we miss the original initialization like you mentioned. If not I'm all for just simplifying this down