Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BUG] Verifier failure on cos-beta-117-18613-0-66 #2118

Open
Andreagit97 opened this issue Oct 16, 2024 · 3 comments
Open

[BUG] Verifier failure on cos-beta-117-18613-0-66 #2118

Andreagit97 opened this issue Oct 16, 2024 · 3 comments
Assignees
Labels
kind/bug Something isn't working
Milestone

Comments

@Andreagit97
Copy link
Member

Andreagit97 commented Oct 16, 2024

Describe the bug

Running the modern bpf probe on:

We face the following verifier error

libbpf: prog 'capset_x': BPF program load failed: Permission denied
libbpf: prog 'capset_x': -- BEGIN PROG LOAD LOG --
reg type unsupported for arg#0 function capset_x#982
0: R1=ctx(off=0,imm=0) R10=fp0
; int BPF_PROG(capset_x,
0: (bf) r7 = r1                       ; R1=ctx(off=0,imm=0) R7_w=ctx(off=0,imm=0)
; int BPF_PROG(capset_x,
1: (79) r9 = *(u64 *)(r7 +8)          ; R7_w=ctx(off=0,imm=0) R9_w=scalar()
; uint32_t cpu_id = (uint32_t)bpf_get_smp_processor_id();
2: (85) call bpf_get_smp_processor_id#8       ; R0_w=scalar(umax=1,var_off=(0x0; 0x1))
; uint32_t cpu_id = (uint32_t)bpf_get_smp_processor_id();
3: (63) *(u32 *)(r10 -8) = r0         ; R0_w=scalar(umax=1,var_off=(0x0; 0x1)) R10=fp0 fp-8=
4: (bf) r2 = r10                      ; R2_w=fp0 R10=fp0
;
5: (07) r2 += -8                      ; R2_w=fp-8
; return (struct ringbuf_map *)bpf_map_lookup_elem(&ringbuf_maps, &cpu_id);
6: (18) r1 = 0xffff88810ab0fe00       ; R1_w=map_ptr(off=0,ks=4,vs=4,imm=0)
8: (85) call bpf_map_lookup_elem#1    ; R0=map_value_or_null(id=1,off=0,ks=4,vs=4,imm=0)
9: (bf) r6 = r0                       ; R0=map_value_or_null(id=1,off=0,ks=4,vs=4,imm=0) R6_w=map_value_or_null(id=1,off=0,ks=4,vs=4,imm=0)
; if(!rb)
10: (55) if r6 != 0x0 goto pc+6 17: R0=map_ptr(off=0,ks=0,vs=0,imm=0) R6=map_ptr(off=0,ks=0,vs=0,imm=0) R7=ctx(off=0,imm=0) R9=scalar() R10=fp0 fp-8=????mmmm
; uint32_t cpu_id = (uint32_t)bpf_get_smp_processor_id();
17: (85) call bpf_get_smp_processor_id#8      ; R0_w=scalar(umax=1,var_off=(0x0; 0x1))
; uint32_t cpu_id = (uint32_t)bpf_get_smp_processor_id();
18: (63) *(u32 *)(r10 -8) = r0        ; R0_w=scalar(umax=1,var_off=(0x0; 0x1)) R10=fp0 fp-8=
19: (bf) r2 = r10                     ; R2_w=fp0 R10=fp0
;
20: (07) r2 += -8                     ; R2_w=fp-8
; return (struct counter_map *)bpf_map_lookup_elem(&counter_maps, &cpu_id);
21: (18) r1 = 0xffff88812a85cc00      ; R1_w=map_ptr(off=0,ks=4,vs=136,imm=0)
23: (85) call bpf_map_lookup_elem#1   ; R0_w=map_value_or_null(id=2,off=0,ks=4,vs=136,imm=0)
24: (bf) r7 = r0                      ; R0_w=map_value_or_null(id=2,off=0,ks=4,vs=136,imm=0) R7_w=map_value_or_null(id=2,off=0,ks=4,vs=136,imm=0)
; if(!counter)
25: (15) if r7 == 0x0 goto pc+330     ; R7_w=map_value(off=0,ks=4,vs=136,imm=0)
; counter->n_evts++;
26: (79) r1 = *(u64 *)(r7 +0)         ; R1_w=scalar() R7_w=map_value(off=0,ks=4,vs=136,imm=0)
27: (07) r1 += 1                      ; R1_w=scalar()
28: (7b) *(u64 *)(r7 +0) = r1         ; R1_w=scalar() R7_w=map_value(off=0,ks=4,vs=136,imm=0)
; uint8_t *space = bpf_ringbuf_reserve(rb, event_size, 0);
29: (bf) r1 = r6                      ; R1_w=map_ptr(off=0,ks=0,vs=0,imm=0) R6=map_ptr(off=0,ks=0,vs=0,imm=0)
30: (b7) r2 = 66                      ; R2_w=66
31: (b7) r3 = 0                       ; R3_w=0
32: (85) call bpf_ringbuf_reserve#131         ; R0=ringbuf_mem_or_null(id=4,ref_obj_id=4,off=0,imm=0) refs=4
33: (bf) r6 = r0                      ; R0=ringbuf_mem_or_null(id=4,ref_obj_id=4,off=0,imm=0) R6_w=ringbuf_mem_or_null(id=4,ref_obj_id=4,off=0,imm=0) refs=4
; if(!space)
34: (55) if r6 != 0x0 goto pc+7 42: R0=ringbuf_mem(ref_obj_id=4,off=0,imm=0) R6_w=ringbuf_mem(ref_obj_id=4,off=0,imm=0) R7=map_value(off=0,ks=4,vs=136,imm=0) R9=scalar() R10=fp0 fp-8=????mmmm refs=4
; return g_event_params_table[event_id];
42: (18) r1 = 0xffffc900044cc010      ; R1_w=map_value(off=16,ks=4,vs=245818,imm=0) refs=4
44: (71) r2 = *(u8 *)(r1 +353)        ; R1_w=map_value(off=16,ks=4,vs=245818,imm=0) R2_w=4 refs=4
; ringbuf->payload_pos = sizeof(struct ppm_evt_hdr) + nparams * sizeof(uint16_t);
45: (bf) r7 = r2                      ; R2_w=4 R7_w=4 refs=4
46: (67) r7 <<= 1                     ; R7_w=8 refs=4
47: (7b) *(u64 *)(r10 -24) = r7       ; R7_w=8 R10=fp0 fp-24_w=8 refs=4
; ringbuf->payload_pos = sizeof(struct ppm_evt_hdr) + nparams * sizeof(uint16_t);
48: (07) r7 += 26                     ; R7_w=34 refs=4
49: (b7) r1 = 20                      ; R1_w=20 refs=4
50: (7b) *(u64 *)(r10 -32) = r2       ; R2_w=4 R10=fp0 fp-32_w=4 refs=4
; PUSH_FIXED_SIZE_TO_RINGBUF(ringbuf, param, sizeof(int64_t));
51: (2d) if r1 > r2 goto pc+1         ; R1_w=20 R2_w=4 refs=4
; return g_settings.boot_time;
53: (18) r1 = 0xffffc9000450bdb0      ; R1_w=map_value(off=3504,ks=4,vs=591841,imm=0) refs=4
55: (79) r8 = *(u64 *)(r1 +0)         ; R1_w=map_value(off=3504,ks=4,vs=591841,imm=0) R8_w=scalar() refs=4
; hdr->ts = maps__get_boot_time() + bpf_ktime_get_boot_ns();
56: (85) call bpf_ktime_get_boot_ns#125       ; R0_w=scalar() refs=4
; hdr->ts = maps__get_boot_time() + bpf_ktime_get_boot_ns();
57: (0f) r0 += r8                     ; R0_w=scalar() R8_w=scalar() refs=4
; hdr->ts = maps__get_boot_time() + bpf_ktime_get_boot_ns();
58: (bf) r1 = r0                      ; R0_w=scalar(id=5) R1_w=scalar(id=5) refs=4
59: (77) r1 >>= 56                    ; R1_w=scalar(umax=255,var_off=(0x0; 0xff)) refs=4
60: (73) *(u8 *)(r6 +7) = r1          ; R1_w=scalar(umax=255,var_off=(0x0; 0xff)) R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
61: (bf) r1 = r0                      ; R0_w=scalar(id=5) R1_w=scalar(id=5) refs=4
62: (77) r1 >>= 48                    ; R1_w=scalar(umax=65535,var_off=(0x0; 0xffff)) refs=4
63: (73) *(u8 *)(r6 +6) = r1          ; R1_w=scalar(umax=65535,var_off=(0x0; 0xffff)) R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
64: (bf) r1 = r0                      ; R0_w=scalar(id=5) R1_w=scalar(id=5) refs=4
65: (77) r1 >>= 40                    ; R1_w=scalar(umax=16777215,var_off=(0x0; 0xffffff)) refs=4
66: (73) *(u8 *)(r6 +5) = r1          ; R1_w=scalar(umax=16777215,var_off=(0x0; 0xffffff)) R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
67: (bf) r1 = r0                      ; R0_w=scalar(id=5) R1_w=scalar(id=5) refs=4
68: (77) r1 >>= 32                    ; R1_w=scalar(umax=4294967295,var_off=(0x0; 0xffffffff)) refs=4
69: (73) *(u8 *)(r6 +4) = r1          ; R1_w=scalar(umax=4294967295,var_off=(0x0; 0xffffffff)) R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
70: (bf) r1 = r0                      ; R0_w=scalar(id=5) R1_w=scalar(id=5) refs=4
71: (77) r1 >>= 24                    ; R1_w=scalar(umax=1099511627775,var_off=(0x0; 0xffffffffff)) refs=4
72: (73) *(u8 *)(r6 +3) = r1          ; R1_w=scalar(umax=1099511627775,var_off=(0x0; 0xffffffffff)) R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
73: (bf) r1 = r0                      ; R0_w=scalar(id=5) R1_w=scalar(id=5) refs=4
74: (77) r1 >>= 16                    ; R1_w=scalar(umax=281474976710655,var_off=(0x0; 0xffffffffffff)) refs=4
75: (73) *(u8 *)(r6 +2) = r1          ; R1_w=scalar(umax=281474976710655,var_off=(0x0; 0xffffffffffff)) R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
76: (73) *(u8 *)(r6 +0) = r0          ; R0_w=scalar(id=5) R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
77: (77) r0 >>= 8                     ; R0_w=scalar(umax=72057594037927935,var_off=(0x0; 0xffffffffffffff)) refs=4
78: (73) *(u8 *)(r6 +1) = r0          ; R0_w=scalar(umax=72057594037927935,var_off=(0x0; 0xffffffffffffff)) R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
; hdr->tid = bpf_get_current_pid_tgid() & 0xffffffff;
79: (85) call bpf_get_current_pid_tgid#14     ; R0=scalar() refs=4
80: (b7) r1 = 1                       ; R1_w=1 refs=4
; hdr->type = ringbuf->event_type;
81: (73) *(u8 *)(r6 +21) = r1         ; R1_w=1 R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
82: (b7) r1 = 97                      ; R1_w=97 refs=4
83: (73) *(u8 *)(r6 +20) = r1         ; R1_w=97 R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
84: (b7) r1 = 0                       ; R1_w=0 refs=4
; hdr->nparams = nparams;
85: (73) *(u8 *)(r6 +25) = r1         ; R1_w=0 R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
86: (73) *(u8 *)(r6 +24) = r1         ; R1_w=0 R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
87: (73) *(u8 *)(r6 +23) = r1         ; R1_w=0 R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
; hdr->tid = bpf_get_current_pid_tgid() & 0xffffffff;
88: (73) *(u8 *)(r6 +15) = r1         ; R1_w=0 R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
89: (73) *(u8 *)(r6 +14) = r1         ; R1_w=0 R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
90: (73) *(u8 *)(r6 +13) = r1         ; R1_w=0 R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
91: (73) *(u8 *)(r6 +12) = r1         ; R1_w=0 R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
; hdr->len = ringbuf->reserved_event_size;
92: (73) *(u8 *)(r6 +19) = r1         ; R1_w=0 R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
93: (73) *(u8 *)(r6 +18) = r1         ; R1_w=0 R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
94: (73) *(u8 *)(r6 +17) = r1         ; R1_w=0 R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
95: (b7) r1 = 66                      ; R1_w=66 refs=4
96: (73) *(u8 *)(r6 +16) = r1         ; R1_w=66 R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
; hdr->tid = bpf_get_current_pid_tgid() & 0xffffffff;
97: (bf) r1 = r0                      ; R0=scalar(id=6) R1_w=scalar(id=6) refs=4
98: (77) r1 >>= 24                    ; R1_w=scalar(umax=1099511627775,var_off=(0x0; 0xffffffffff)) refs=4
99: (73) *(u8 *)(r6 +11) = r1         ; R1_w=scalar(umax=1099511627775,var_off=(0x0; 0xffffffffff)) R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
100: (bf) r1 = r0                     ; R0=scalar(id=6) R1_w=scalar(id=6) refs=4
101: (77) r1 >>= 16                   ; R1_w=scalar(umax=281474976710655,var_off=(0x0; 0xffffffffffff)) refs=4
102: (73) *(u8 *)(r6 +10) = r1        ; R1_w=scalar(umax=281474976710655,var_off=(0x0; 0xffffffffffff)) R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
103: (73) *(u8 *)(r6 +8) = r0         ; R0=scalar(id=6) R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
104: (77) r0 >>= 8                    ; R0_w=scalar(umax=72057594037927935,var_off=(0x0; 0xffffffffffffff)) refs=4
105: (73) *(u8 *)(r6 +9) = r0         ; R0_w=scalar(umax=72057594037927935,var_off=(0x0; 0xffffffffffffff)) R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
106: (79) r1 = *(u64 *)(r10 -32)      ; R1_w=4 R10=fp0 fp-32=4 refs=4
107: (bf) r8 = r1                     ; R1_w=4 R8_w=4 refs=4
; hdr->nparams = nparams;
108: (73) *(u8 *)(r6 +22) = r1        ; R1_w=4 R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
; PUSH_FIXED_SIZE_TO_RINGBUF(ringbuf, param, sizeof(int64_t));
109: (bf) r1 = r6                     ; R1_w=ringbuf_mem(ref_obj_id=4,off=0,imm=0) R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
110: (0f) r1 += r7                    ; R1_w=ringbuf_mem(ref_obj_id=4,off=34,imm=0) R7=34 refs=4
111: (bf) r2 = r9                     ; R2_w=scalar(id=7) R9=scalar(id=7) refs=4
112: (77) r2 >>= 48                   ; R2_w=scalar(umax=65535,var_off=(0x0; 0xffff)) refs=4
113: (73) *(u8 *)(r1 +6) = r2         ; R1_w=ringbuf_mem(ref_obj_id=4,off=34,imm=0) R2_w=scalar(umax=65535,var_off=(0x0; 0xffff)) refs=4
114: (bf) r2 = r9                     ; R2_w=scalar(id=7) R9=scalar(id=7) refs=4
115: (77) r2 >>= 56                   ; R2_w=scalar(umax=255,var_off=(0x0; 0xff)) refs=4
116: (73) *(u8 *)(r1 +7) = r2         ; R1_w=ringbuf_mem(ref_obj_id=4,off=34,imm=0) R2_w=scalar(umax=255,var_off=(0x0; 0xff)) refs=4
117: (bf) r2 = r9                     ; R2_w=scalar(id=7) R9=scalar(id=7) refs=4
118: (77) r2 >>= 32                   ; R2_w=scalar(umax=4294967295,var_off=(0x0; 0xffffffff)) refs=4
119: (73) *(u8 *)(r1 +4) = r2         ; R1_w=ringbuf_mem(ref_obj_id=4,off=34,imm=0) R2_w=scalar(umax=4294967295,var_off=(0x0; 0xffffffff)) refs=4
120: (bf) r2 = r9                     ; R2_w=scalar(id=7) R9=scalar(id=7) refs=4
121: (77) r2 >>= 40                   ; R2_w=scalar(umax=16777215,var_off=(0x0; 0xffffff)) refs=4
122: (73) *(u8 *)(r1 +5) = r2         ; R1_w=ringbuf_mem(ref_obj_id=4,off=34,imm=0) R2_w=scalar(umax=16777215,var_off=(0x0; 0xffffff)) refs=4
123: (bf) r2 = r9                     ; R2_w=scalar(id=7) R9=scalar(id=7) refs=4
124: (77) r2 >>= 16                   ; R2_w=scalar(umax=281474976710655,var_off=(0x0; 0xffffffffffff)) refs=4
125: (73) *(u8 *)(r1 +2) = r2         ; R1_w=ringbuf_mem(ref_obj_id=4,off=34,imm=0) R2_w=scalar(umax=281474976710655,var_off=(0x0; 0xffffffffffff)) refs=4
126: (bf) r2 = r9                     ; R2_w=scalar(id=7) R9=scalar(id=7) refs=4
127: (77) r2 >>= 24                   ; R2_w=scalar(umax=1099511627775,var_off=(0x0; 0xffffffffff)) refs=4
128: (73) *(u8 *)(r1 +3) = r2         ; R1_w=ringbuf_mem(ref_obj_id=4,off=34,imm=0) R2_w=scalar(umax=1099511627775,var_off=(0x0; 0xffffffffff)) refs=4
129: (73) *(u8 *)(r1 +0) = r9         ; R1_w=ringbuf_mem(ref_obj_id=4,off=34,imm=0) R9=scalar(id=7) refs=4
130: (77) r9 >>= 8                    ; R9_w=scalar(umax=72057594037927935,var_off=(0x0; 0xffffffffffffff)) refs=4
131: (73) *(u8 *)(r1 +1) = r9         ; R1_w=ringbuf_mem(ref_obj_id=4,off=34,imm=0) R9_w=scalar(umax=72057594037927935,var_off=(0x0; 0xffffffffffffff)) refs=4
132: (b7) r1 = 8                      ; R1_w=8 refs=4
133: (6b) *(u16 *)(r6 +26) = r1       ; R1_w=8 R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
134: (18) r1 = 0x1                    ; R1_w=1 refs=4
; && (bpf_core_enum_value(enum bpf_func_id, BPF_FUNC_get_current_task_btf) == BPF_FUNC_get_current_task_btf))
136: (15) if r1 == 0x0 goto pc+5      ; R1_w=1 refs=4
137: (18) r1 = 0x9e                   ; R1_w=158 refs=4
; if(bpf_core_enum_value_exists(enum bpf_func_id, BPF_FUNC_get_current_task_btf)
139: (55) if r1 != 0x9e goto pc+2     ; R1_w=158 refs=4
; return (struct task_struct *)bpf_get_current_task_btf();
140: (85) call bpf_get_current_task_btf#158   ; R0=trusted_ptr_task_struct(off=0,imm=0) refs=4
141: (05) goto pc+1
;
143: (bf) r7 = r0                     ; R0=trusted_ptr_task_struct(off=0,imm=0) R7_w=trusted_ptr_task_struct(off=0,imm=0) refs=4
144: (18) r1 = 0x1                    ; R1_w=1 refs=4
146: (79) r9 = *(u64 *)(r10 -24)      ; R9_w=8 R10=fp0 fp-24=8 refs=4
; READ_TASK_FIELD_INTO(&cap_struct, task, cred, cap_inheritable);
147: (15) if r1 == 0x0 goto pc+7      ; R1_w=1 refs=4
148: (18) r1 = 0x9e                   ; R1_w=158 refs=4
; READ_TASK_FIELD_INTO(&cap_struct, task, cred, cap_inheritable);
150: (55) if r1 != 0x9e goto pc+4     ; R1_w=158 refs=4
; READ_TASK_FIELD_INTO(&cap_struct, task, cred, cap_inheritable);
151: (79) r1 = *(u64 *)(r7 +1984)     ; R1_w=rcu_ptr_or_null_cred(id=8,off=0,imm=0) R7_w=trusted_ptr_task_struct(off=0,imm=0) refs=4
152: (79) r1 = *(u64 *)(r1 +48)
R1 invalid mem access 'rcu_ptr_or_null_'
processed 146 insns (limit 1000000) max_states_per_insn 0 total_states 7 peak_states 7 mark_read 5
-- END PROG LOAD LOG --

More in detail

; READ_TASK_FIELD_INTO(&cap_struct, task, cred, cap_inheritable);
151: (79) r1 = *(u64 *)(r7 +1984)     ; R1_w=rcu_ptr_or_null_cred(id=8,off=0,imm=0) R7_w=trusted_ptr_task_struct(off=0,imm=0) refs=4
152: (79) r1 = *(u64 *)(r1 +48)
R1 invalid mem access 'rcu_ptr_or_null_'
processed 146 insns (limit 1000000) max_states_per_insn 0 total_states 7 peak_states 7 mark_read 5
-- END PROG LOAD LOG --

There is a problem when we try to access the task->cred field. Looking at the same program (capset_x) loaded on another kernel (6.8.0-45-generic #45~22.04.1-Ubuntu) we obtain the following register state

; READ_TASK_FIELD_INTO(&cap_struct, task, cred, cap_permitted);
238: (79) r1 = *(u64 *)(r7 +2992)     ; R1_w=ptr_cred() R7=trusted_ptr_task_struct() refs=4
239: (79) r1 = *(u64 *)(r1 +56)       ; R1_w=scalar() refs=4
240: (7b) *(u64 *)(r10 -8) = r1       ; R1_w=scalar() R10=fp0 fp-8_w=mmmmmmmm refs=4

So as you can see the same field cred is seen as a simple ptr in the Ubuntu kernel while on COS this is seen as rcu_ptr_or_null_ and so we hit the following verifier branch

	} else if (base_type(reg->type) == PTR_TO_MEM) {
		bool rdonly_mem = type_is_rdonly_mem(reg->type);

		if (type_may_be_null(reg->type)) {
			verbose(env, "R%d invalid mem access '%s'\n", regno,
				reg_type_str(env, reg->type));
			return -EACCES;
		}

Now the reason why COS is changing this type resides probably in how the type cred is marked in the kernel BTF

			t = btf_type_by_id(btf, mtype->type);
			if (btf_type_is_type_tag(t)) {
				tag_value = __btf_name_by_offset(btf, t->name_off);
				/* check __user tag */
				if (strcmp(tag_value, "user") == 0)
					tmp_flag = MEM_USER;
				/* check __percpu tag */
				if (strcmp(tag_value, "percpu") == 0)
					tmp_flag = MEM_PERCPU;
				/* check __rcu tag */
				if (strcmp(tag_value, "rcu") == 0)
					tmp_flag = MEM_RCU;
			}

For some reason in COS we enter the RCU branch and once the MEM_RCU flag is set we also acquire the PTR_MAYBE_NULL flag as we can see above

		if (type_is_trusted(env, reg, field_name, btf_id)) {
			flag |= PTR_TRUSTED;
		} else if (type_is_trusted_or_null(env, reg, field_name, btf_id)) {
			flag |= PTR_TRUSTED | PTR_MAYBE_NULL;
		} else if (in_rcu_cs(env) && !type_may_be_null(reg->type)) {
			if (type_is_rcu(env, reg, field_name, btf_id)) {
				/* ignore __rcu tag and mark it MEM_RCU */
				flag |= MEM_RCU;
			} else if (flag & MEM_RCU ||
				   type_is_rcu_or_null(env, reg, field_name, btf_id)) {
				/* __rcu tagged pointers can be NULL */
				flag |= MEM_RCU | PTR_MAYBE_NULL; // <--- here we acquire the `PTR_MAYBE_NULL`

				/* We always trust them */
				if (type_is_rcu_or_null(env, reg, field_name, btf_id) &&
				    flag & PTR_UNTRUSTED)
					flag &= ~PTR_UNTRUSTED;
			} else if (flag & (MEM_PERCPU | MEM_USER)) {
				/* keep as-is */
			} else {
				/* walking unknown pointers yields old deprecated PTR_TO_BTF_ID */
				clear_trusted_flags(&flag);
			}
		}

I will propose a possible fix for this in the short term but we should look into why this is happening on the COS kernel and if this is a bug or an intended behavior.

@Andreagit97 Andreagit97 added the kind/bug Something isn't working label Oct 16, 2024
@Andreagit97 Andreagit97 added this to the 0.19.0 milestone Oct 16, 2024
@Andreagit97 Andreagit97 self-assigned this Oct 16, 2024
@Andreagit97
Copy link
Member Author

@Molter73 this is the same issue we talked about some time ago

@Molter73
Copy link
Contributor

Molter73 commented Oct 16, 2024

We had a fix for this that I thought we had already upstreamed, sorry this fell through the cracks, it's pretty close to your PR though: stackrox/falcosecurity-libs#82

If I recall correctly, @erthalion looked into it and COS is compiling the kernel with clang, which has some additional safety annotations that are ignored by GCC and cause this verifier issue, which also matches your analysis.

@Andreagit97
Copy link
Member Author

Oh, that explains why __rcu markers are considered in COS and not on other kernels, thank you for the info!
For what concern the proposed fix, they are almost identical, i avoided the extra null check since BPF_CORE_READ_INTO should do it for us, if unsafe_ptr is 0 + something, copy_from_kernel_nofault will fail because this is not a kernel address and so the output will be memset to 0, and again and again until we end the iterations of BPF_CORE_READ_INTO

bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr)
{
	int ret = -EFAULT;

	if (IS_ENABLED(CONFIG_BPF_EVENTS))
		ret = copy_from_kernel_nofault(dst, unsafe_ptr, size);
	if (unlikely(ret < 0))
		memset(dst, 0, size);
	return ret;
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
kind/bug Something isn't working
Projects
None yet
Development

No branches or pull requests

2 participants