diff options
120 files changed, 3519 insertions, 895 deletions
diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst index 4877563241f3..c7525942f12c 100644 --- a/Documentation/admin-guide/sysctl/net.rst +++ b/Documentation/admin-guide/sysctl/net.rst @@ -71,6 +71,7 @@ two flavors of JITs, the newer eBPF JIT currently supported on: - s390x - riscv64 - riscv32 + - loongarch64 And the older cBPF JIT supported on the following archs: diff --git a/Documentation/bpf/libbpf/program_types.rst b/Documentation/bpf/libbpf/program_types.rst index ad4d4d5eecb0..63bb88846e50 100644 --- a/Documentation/bpf/libbpf/program_types.rst +++ b/Documentation/bpf/libbpf/program_types.rst @@ -56,6 +56,16 @@ described in more detail in the footnotes. | | ``BPF_CGROUP_UDP6_RECVMSG`` | ``cgroup/recvmsg6`` | | + +----------------------------------------+----------------------------------+-----------+ | | ``BPF_CGROUP_UDP6_SENDMSG`` | ``cgroup/sendmsg6`` | | +| +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_CGROUP_UNIX_CONNECT`` | ``cgroup/connect_unix`` | | +| +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_CGROUP_UNIX_SENDMSG`` | ``cgroup/sendmsg_unix`` | | +| +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_CGROUP_UNIX_RECVMSG`` | ``cgroup/recvmsg_unix`` | | +| +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_CGROUP_UNIX_GETPEERNAME`` | ``cgroup/getpeername_unix`` | | +| +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_CGROUP_UNIX_GETSOCKNAME`` | ``cgroup/getsockname_unix`` | | +-------------------------------------------+----------------------------------------+----------------------------------+-----------+ | ``BPF_PROG_TYPE_CGROUP_SOCK`` | ``BPF_CGROUP_INET4_POST_BIND`` | ``cgroup/post_bind4`` | | + +----------------------------------------+----------------------------------+-----------+ diff --git a/Documentation/networking/filter.rst b/Documentation/networking/filter.rst index f69da5074860..7d8c5380492f 100644 --- a/Documentation/networking/filter.rst +++ b/Documentation/networking/filter.rst @@ -650,8 +650,8 @@ before a conversion to the new layout is being done behind the scenes! Currently, the classic BPF format is being used for JITing on most 32-bit architectures, whereas x86-64, aarch64, s390x, powerpc64, -sparc64, arm32, riscv64, riscv32 perform JIT compilation from eBPF -instruction set. +sparc64, arm32, riscv64, riscv32, loongarch64 perform JIT compilation +from eBPF instruction set. Testing ------- diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 6b93a68c5f84..bf06b7283f0c 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -670,15 +670,18 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth) static int get_probe_mem_regno(const u8 *insn) { /* - * insn must point to llgc, llgh, llgf or lg, which have destination - * register at the same position. + * insn must point to llgc, llgh, llgf, lg, lgb, lgh or lgf, which have + * destination register at the same position. */ - if (insn[0] != 0xe3) /* common llgc, llgh, llgf and lg prefix */ + if (insn[0] != 0xe3) /* common prefix */ return -1; if (insn[5] != 0x90 && /* llgc */ insn[5] != 0x91 && /* llgh */ insn[5] != 0x16 && /* llgf */ - insn[5] != 0x04) /* lg */ + insn[5] != 0x04 && /* lg */ + insn[5] != 0x77 && /* lgb */ + insn[5] != 0x15 && /* lgh */ + insn[5] != 0x14) /* lgf */ return -1; return insn[1] >> 4; } @@ -776,6 +779,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i, bool extra_pass, u32 stack_depth) { struct bpf_insn *insn = &fp->insnsi[i]; + s16 branch_oc_off = insn->off; u32 dst_reg = insn->dst_reg; u32 src_reg = insn->src_reg; int last, insn_count = 1; @@ -788,22 +792,55 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int err; if (BPF_CLASS(insn->code) == BPF_LDX && - BPF_MODE(insn->code) == BPF_PROBE_MEM) + (BPF_MODE(insn->code) == BPF_PROBE_MEM || + BPF_MODE(insn->code) == BPF_PROBE_MEMSX)) probe_prg = jit->prg; switch (insn->code) { /* * BPF_MOV */ - case BPF_ALU | BPF_MOV | BPF_X: /* dst = (u32) src */ - /* llgfr %dst,%src */ - EMIT4(0xb9160000, dst_reg, src_reg); - if (insn_is_zext(&insn[1])) - insn_count = 2; + case BPF_ALU | BPF_MOV | BPF_X: + switch (insn->off) { + case 0: /* DST = (u32) SRC */ + /* llgfr %dst,%src */ + EMIT4(0xb9160000, dst_reg, src_reg); + if (insn_is_zext(&insn[1])) + insn_count = 2; + break; + case 8: /* DST = (u32)(s8) SRC */ + /* lbr %dst,%src */ + EMIT4(0xb9260000, dst_reg, src_reg); + /* llgfr %dst,%dst */ + EMIT4(0xb9160000, dst_reg, dst_reg); + break; + case 16: /* DST = (u32)(s16) SRC */ + /* lhr %dst,%src */ + EMIT4(0xb9270000, dst_reg, src_reg); + /* llgfr %dst,%dst */ + EMIT4(0xb9160000, dst_reg, dst_reg); + break; + } break; - case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */ - /* lgr %dst,%src */ - EMIT4(0xb9040000, dst_reg, src_reg); + case BPF_ALU64 | BPF_MOV | BPF_X: + switch (insn->off) { + case 0: /* DST = SRC */ + /* lgr %dst,%src */ + EMIT4(0xb9040000, dst_reg, src_reg); + break; + case 8: /* DST = (s8) SRC */ + /* lgbr %dst,%src */ + EMIT4(0xb9060000, dst_reg, src_reg); + break; + case 16: /* DST = (s16) SRC */ + /* lghr %dst,%src */ + EMIT4(0xb9070000, dst_reg, src_reg); + break; + case 32: /* DST = (s32) SRC */ + /* lgfr %dst,%src */ + EMIT4(0xb9140000, dst_reg, src_reg); + break; + } break; case BPF_ALU | BPF_MOV | BPF_K: /* dst = (u32) imm */ /* llilf %dst,imm */ @@ -912,66 +949,115 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, /* * BPF_DIV / BPF_MOD */ - case BPF_ALU | BPF_DIV | BPF_X: /* dst = (u32) dst / (u32) src */ - case BPF_ALU | BPF_MOD | BPF_X: /* dst = (u32) dst % (u32) src */ + case BPF_ALU | BPF_DIV | BPF_X: + case BPF_ALU | BPF_MOD | BPF_X: { int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0; - /* lhi %w0,0 */ - EMIT4_IMM(0xa7080000, REG_W0, 0); - /* lr %w1,%dst */ - EMIT2(0x1800, REG_W1, dst_reg); - /* dlr %w0,%src */ - EMIT4(0xb9970000, REG_W0, src_reg); + switch (off) { + case 0: /* dst = (u32) dst {/,%} (u32) src */ + /* xr %w0,%w0 */ + EMIT2(0x1700, REG_W0, REG_W0); + /* lr %w1,%dst */ + EMIT2(0x1800, REG_W1, dst_reg); + /* dlr %w0,%src */ + EMIT4(0xb9970000, REG_W0, src_reg); + break; + case 1: /* dst = (u32) ((s32) dst {/,%} (s32) src) */ + /* lgfr %r1,%dst */ + EMIT4(0xb9140000, REG_W1, dst_reg); + /* dsgfr %r0,%src */ + EMIT4(0xb91d0000, REG_W0, src_reg); + break; + } /* llgfr %dst,%rc */ EMIT4(0xb9160000, dst_reg, rc_reg); if (insn_is_zext(&insn[1])) insn_count = 2; break; } - case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / src */ - case BPF_ALU64 | BPF_MOD | BPF_X: /* dst = dst % src */ + case BPF_ALU64 | BPF_DIV | BPF_X: + case BPF_ALU64 | BPF_MOD | BPF_X: { int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0; - /* lghi %w0,0 */ - EMIT4_IMM(0xa7090000, REG_W0, 0); - /* lgr %w1,%dst */ - EMIT4(0xb9040000, REG_W1, dst_reg); - /* dlgr %w0,%dst */ - EMIT4(0xb9870000, REG_W0, src_reg); + switch (off) { + case 0: /* dst = dst {/,%} src */ + /* lghi %w0,0 */ + EMIT4_IMM(0xa7090000, REG_W0, 0); + /* lgr %w1,%dst */ + EMIT4(0xb9040000, REG_W1, dst_reg); + /* dlgr %w0,%src */ + EMIT4(0xb9870000, REG_W0, src_reg); + break; + case 1: /* dst = (s64) dst {/,%} (s64) src */ + /* lgr %w1,%dst */ + EMIT4(0xb9040000, REG_W1, dst_reg); + /* dsgr %w0,%src */ + EMIT4(0xb90d0000, REG_W0, src_reg); + break; + } /* lgr %dst,%rc */ EMIT4(0xb9040000, dst_reg, rc_reg); break; } - case BPF_ALU | BPF_DIV | BPF_K: /* dst = (u32) dst / (u32) imm */ - case BPF_ALU | BPF_MOD | BPF_K: /* dst = (u32) dst % (u32) imm */ + case BPF_ALU | BPF_DIV | BPF_K: + case BPF_ALU | BPF_MOD | BPF_K: { int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0; if (imm == 1) { if (BPF_OP(insn->code) == BPF_MOD) - /* lhgi %dst,0 */ + /* lghi %dst,0 */ EMIT4_IMM(0xa7090000, dst_reg, 0); else EMIT_ZERO(dst_reg); break; } - /* lhi %w0,0 */ - EMIT4_IMM(0xa7080000, REG_W0, 0); - /* lr %w1,%dst */ - EMIT2(0x1800, REG_W1, dst_reg); if (!is_first_pass(jit) && can_use_ldisp_for_lit32(jit)) { - /* dl %w0,<d(imm)>(%l) */ - EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0, REG_L, - EMIT_CONST_U32(imm)); + switch (off) { + case 0: /* dst = (u32) dst {/,%} (u32) imm */ + /* xr %w0,%w0 */ + EMIT2(0x1700, REG_W0, REG_W0); + /* lr %w1,%dst */ + EMIT2(0x1800, REG_W1, dst_reg); + /* dl %w0,<d(imm)>(%l) */ + EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0, + REG_L, EMIT_CONST_U32(imm)); + break; + case 1: /* dst = (s32) dst {/,%} (s32) imm */ + /* lgfr %r1,%dst */ + EMIT4(0xb9140000, REG_W1, dst_reg); + /* dsgf %r0,<d(imm)>(%l) */ + EMIT6_DISP_LH(0xe3000000, 0x001d, REG_W0, REG_0, + REG_L, EMIT_CONST_U32(imm)); + break; + } } else { - /* lgfrl %dst,imm */ - EMIT6_PCREL_RILB(0xc40c0000, dst_reg, - _EMIT_CONST_U32(imm)); - jit->seen |= SEEN_LITERAL; - /* dlr %w0,%dst */ - EMIT4(0xb9970000, REG_W0, dst_reg); + switch (off) { + case 0: /* dst = (u32) dst {/,%} (u32) imm */ + /* xr %w0,%w0 */ + EMIT2(0x1700, REG_W0, REG_W0); + /* lr %w1,%dst */ + EMIT2(0x1800, REG_W1, dst_reg); + /* lrl %dst,imm */ + EMIT6_PCREL_RILB(0xc40d0000, dst_reg, + _EMIT_CONST_U32(imm)); + jit->seen |= SEEN_LITERAL; + /* dlr %w0,%dst */ + EMIT4(0xb9970000, REG_W0, dst_reg); + break; + case 1: /* dst = (s32) dst {/,%} (s32) imm */ + /* lgfr %w1,%dst */ + EMIT4(0xb9140000, REG_W1, dst_reg); + /* lgfrl %dst,imm */ + EMIT6_PCREL_RILB(0xc40c0000, dst_reg, + _EMIT_CONST_U32(imm)); + jit->seen |= SEEN_LITERAL; + /* dsgr %w0,%dst */ + EMIT4(0xb90d0000, REG_W0, dst_reg); + break; + } } /* llgfr %dst,%rc */ EMIT4(0xb9160000, dst_reg, rc_reg); @@ -979,8 +1065,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, insn_count = 2; break; } - case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / imm */ - case BPF_ALU64 | BPF_MOD | BPF_K: /* dst = dst % imm */ + case BPF_ALU64 | BPF_DIV | BPF_K: + case BPF_ALU64 | BPF_MOD | BPF_K: { int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0; @@ -990,21 +1076,50 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, EMIT4_IMM(0xa7090000, dst_reg, 0); break; } - /* lghi %w0,0 */ - EMIT4_IMM(0xa7090000, REG_W0, 0); - /* lgr %w1,%dst */ - EMIT4(0xb9040000, REG_W1, dst_reg); if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) { - /* dlg %w0,<d(imm)>(%l) */ - EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L, - EMIT_CONST_U64(imm)); + switch (off) { + case 0: /* dst = dst {/,%} imm */ + /* lghi %w0,0 */ + EMIT4_IMM(0xa7090000, REG_W0, 0); + /* lgr %w1,%dst */ + EMIT4(0xb9040000, REG_W1, dst_reg); + /* dlg %w0,<d(imm)>(%l) */ + EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, + REG_L, EMIT_CONST_U64(imm)); + break; + case 1: /* dst = (s64) dst {/,%} (s64) imm */ + /* lgr %w1,%dst */ + EMIT4(0xb9040000, REG_W1, dst_reg); + /* dsg %w0,<d(imm)>(%l) */ + EMIT6_DISP_LH(0xe3000000, 0x000d, REG_W0, REG_0, + REG_L, EMIT_CONST_U64(imm)); + break; + } } else { - /* lgrl %dst,imm */ - EMIT6_PCREL_RILB(0xc4080000, dst_reg, - _EMIT_CONST_U64(imm)); - jit->seen |= SEEN_LITERAL; - /* dlgr %w0,%dst */ - EMIT4(0xb9870000, REG_W0, dst_reg); + switch (off) { + case 0: /* dst = dst {/,%} imm */ + /* lghi %w0,0 */ + EMIT4_IMM(0xa7090000, REG_W0, 0); + /* lgr %w1,%dst */ + EMIT4(0xb9040000, REG_W1, dst_reg); + /* lgrl %dst,imm */ + EMIT6_PCREL_RILB(0xc4080000, dst_reg, + _EMIT_CONST_U64(imm)); + jit->seen |= SEEN_LITERAL; + /* dlgr %w0,%dst */ + EMIT4(0xb9870000, REG_W0, dst_reg); + break; + case 1: /* dst = (s64) dst {/,%} (s64) imm */ + /* lgr %w1,%dst */ + EMIT4(0xb9040000, REG_W1, dst_reg); + /* lgrl %dst,imm */ + EMIT6_PCREL_RILB(0xc4080000, dst_reg, + _EMIT_CONST_U64(imm)); + jit->seen |= SEEN_LITERAL; + /* dsgr %w0,%dst */ + EMIT4(0xb90d0000, REG_W0, dst_reg); + break; + } } /* lgr %dst,%rc */ EMIT4(0xb9040000, dst_reg, rc_reg); @@ -1217,6 +1332,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, } break; case BPF_ALU | BPF_END | BPF_FROM_LE: + case BPF_ALU64 | BPF_END | BPF_FROM_LE: switch (imm) { case 16: /* dst = (u16) cpu_to_le16(dst) */ /* lrvr %dst,%dst */ @@ -1374,6 +1490,12 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, if (insn_is_zext(&insn[1])) insn_count = 2; break; + case BPF_LDX | BPF_MEMSX | BPF_B: /* dst = *(s8 *)(ul) (src + off) */ + case BPF_LDX | BPF_PROBE_MEMSX | BPF_B: + /* lgb %dst,0(off,%src) */ + EMIT6_DISP_LH(0xe3000000, 0x0077, dst_reg, src_reg, REG_0, off); + jit->seen |= SEEN_MEM; + break; case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */ case BPF_LDX | BPF_PROBE_MEM | BPF_H: /* llgh %dst,0(off,%src) */ @@ -1382,6 +1504,12 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, if (insn_is_zext(&insn[1])) insn_count = 2; break; + case BPF_LDX | BPF_MEMSX | BPF_H: /* dst = *(s16 *)(ul) (src + off) */ + case BPF_LDX | BPF_PROBE_MEMSX | BPF_H: + /* lgh %dst,0(off,%src) */ + EMIT6_DISP_LH(0xe3000000, 0x0015, dst_reg, src_reg, REG_0, off); + jit->seen |= SEEN_MEM; + break; case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */ case BPF_LDX | BPF_PROBE_MEM | BPF_W: /* llgf %dst,off(%src) */ @@ -1390,6 +1518,12 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, if (insn_is_zext(&insn[1])) insn_count = 2; break; + case BPF_LDX | BPF_MEMSX | BPF_W: /* dst = *(s32 *)(ul) (src + off) */ + case BPF_LDX | BPF_PROBE_MEMSX | BPF_W: + /* lgf %dst,off(%src) */ + jit->seen |= SEEN_MEM; + EMIT6_DISP_LH(0xe3000000, 0x0014, dst_reg, src_reg, REG_0, off); + break; case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */ case BPF_LDX | BPF_PROBE_MEM | BPF_DW: /* lg %dst,0(off,%src) */ @@ -1570,6 +1704,9 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, * instruction itself (loop) and for BPF with offset 0 we * branch to the instruction behind the branch. */ + case BPF_JMP32 | BPF_JA: /* if (true) */ + branch_oc_off = imm; + fallthrough; case BPF_JMP | BPF_JA: /* if (true) */ mask = 0xf000; /* j */ goto branch_oc; @@ -1738,14 +1875,16 @@ branch_xu: break; branch_oc: if (!is_first_pass(jit) && - can_use_rel(jit, addrs[i + off + 1])) { + can_use_rel(jit, addrs[i + branch_oc_off + 1])) { /* brc mask,off */ EMIT4_PCREL_RIC(0xa7040000, - mask >> 12, addrs[i + off + 1]); + mask >> 12, + addrs[i + branch_oc_off + 1]); } else { /* brcl mask,off */ EMIT6_PCREL_RILC(0xc0040000, - mask >> 12, addrs[i + off + 1]); + mask >> 12, + addrs[i + branch_oc_off + 1]); } break; } diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h index 7b121bd780eb..0985221d5478 100644 --- a/include/linux/bpf-cgroup-defs.h +++ b/include/linux/bpf-cgroup-defs.h @@ -28,19 +28,24 @@ enum cgroup_bpf_attach_type { CGROUP_INET6_BIND, CGROUP_INET4_CONNECT, CGROUP_INET6_CONNECT, + CGROUP_UNIX_CONNECT, CGROUP_INET4_POST_BIND, CGROUP_INET6_POST_BIND, CGROUP_UDP4_SENDMSG, CGROUP_UDP6_SENDMSG, + CGROUP_UNIX_SENDMSG, CGROUP_SYSCTL, CGROUP_UDP4_RECVMSG, CGROUP_UDP6_RECVMSG, + CGROUP_UNIX_RECVMSG, CGROUP_GETSOCKOPT, CGROUP_SETSOCKOPT, CGROUP_INET4_GETPEERNAME, CGROUP_INET6_GETPEERNAME, + CGROUP_UNIX_GETPEERNAME, CGROUP_INET4_GETSOCKNAME, CGROUP_INET6_GETSOCKNAME, + CGROUP_UNIX_GETSOCKNAME, CGROUP_INET_SOCK_RELEASE, CGROUP_LSM_START, CGROUP_LSM_END = CGROUP_LSM_START + CGROUP_LSM_NUM - 1, diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 8506690dbb9c..98b8cea904fe 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -48,19 +48,24 @@ to_cgroup_bpf_attach_type(enum bpf_attach_type attach_type) CGROUP_ATYPE(CGROUP_INET6_BIND); CGROUP_ATYPE(CGROUP_INET4_CONNECT); CGROUP_ATYPE(CGROUP_INET6_CONNECT); + CGROUP_ATYPE(CGROUP_UNIX_CONNECT); CGROUP_ATYPE(CGROUP_INET4_POST_BIND); CGROUP_ATYPE(CGROUP_INET6_POST_BIND); CGROUP_ATYPE(CGROUP_UDP4_SENDMSG); CGROUP_ATYPE(CGROUP_UDP6_SENDMSG); + CGROUP_ATYPE(CGROUP_UNIX_SENDMSG); CGROUP_ATYPE(CGROUP_SYSCTL); CGROUP_ATYPE(CGROUP_UDP4_RECVMSG); CGROUP_ATYPE(CGROUP_UDP6_RECVMSG); + CGROUP_ATYPE(CGROUP_UNIX_RECVMSG); CGROUP_ATYPE(CGROUP_GETSOCKOPT); CGROUP_ATYPE(CGROUP_SETSOCKOPT); CGROUP_ATYPE(CGROUP_INET4_GETPEERNAME); CGROUP_ATYPE(CGROUP_INET6_GETPEERNAME); + CGROUP_ATYPE(CGROUP_UNIX_GETPEERNAME); CGROUP_ATYPE(CGROUP_INET4_GETSOCKNAME); CGROUP_ATYPE(CGROUP_INET6_GETSOCKNAME); + CGROUP_ATYPE(CGROUP_UNIX_GETSOCKNAME); CGROUP_ATYPE(CGROUP_INET_SOCK_RELEASE); default: return CGROUP_BPF_ATTACH_TYPE_INVALID; @@ -120,6 +125,7 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk, int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, struct sockaddr *uaddr, + int *uaddrlen, enum cgroup_bpf_attach_type atype, void *t_ctx, u32 *flags); @@ -230,22 +236,22 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) \ BPF_CGROUP_RUN_SK_PROG(sk, CGROUP_INET6_POST_BIND) -#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype) \ +#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, atype) \ ({ \ int __ret = 0; \ if (cgroup_bpf_enabled(atype)) \ - __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \ - NULL, NULL); \ + __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \ + atype, NULL, NULL); \ __ret; \ }) -#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) \ +#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, atype, t_ctx) \ ({ \ int __ret = 0; \ if (cgroup_bpf_enabled(atype)) { \ lock_sock(sk); \ - __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \ - t_ctx, NULL); \ + __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \ + atype, t_ctx, NULL); \ release_sock(sk); \ } \ __ret; \ @@ -256,14 +262,14 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, * (at bit position 0) is to indicate CAP_NET_BIND_SERVICE capability check * should be bypassed (BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE). */ -#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, atype, bind_flags) \ +#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, uaddrlen, atype, bind_flags) \ ({ \ u32 __flags = 0; \ int __ret = 0; \ if (cgroup_bpf_enabled(atype)) { \ lock_sock(sk); \ - __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \ - NULL, &__flags); \ + __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \ + atype, NULL, &__flags); \ release_sock(sk); \ if (__flags & BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE) \ *bind_flags |= BIND_NO_CAP_NET_BIND_SERVICE; \ @@ -276,29 +282,38 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, cgroup_bpf_enabled(CGROUP_INET6_CONNECT)) && \ (sk)->sk_prot->pre_connect) -#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG(sk, uaddr, CGROUP_INET4_CONNECT) +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, CGROUP_INET4_CONNECT) -#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG(sk, uaddr, CGROUP_INET6_CONNECT) +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, CGROUP_INET6_CONNECT) -#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_INET4_CONNECT, NULL) +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_INET4_CONNECT, NULL) -#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_INET6_CONNECT, NULL) +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_INET6_CONNECT, NULL) -#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP4_SENDMSG, t_ctx) +#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_CONNECT, NULL) -#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP6_SENDMSG, t_ctx) +#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_SENDMSG, t_ctx) -#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP4_RECVMSG, NULL) +#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_SENDMSG, t_ctx) -#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP6_RECVMSG, NULL) +#define BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_SENDMSG, t_ctx) + +#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_RECVMSG, NULL) + +#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_RECVMSG, NULL) + +#define BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_RECVMSG, NULL) /* The SOCK_OPS"_SK" macro should be used when sock_ops->sk is not a * fullsock and its parent fullsock cannot be traced by @@ -477,24 +492,27 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, } #define cgroup_bpf_enabled(atype) (0) -#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) ({ 0; }) -#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype) ({ 0; }) +#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, atype, t_ctx) ({ 0; }) +#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, atype) ({ 0; }) #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0) #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, atype, flags) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, uaddrlen, atype, flags) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(atype, major, minor, access) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos) ({ 0; }) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a34ac7f00c86..d3c51a507508 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2164,12 +2164,12 @@ static inline bool bpf_allow_uninit_stack(void) static inline bool bpf_bypass_spec_v1(void) { - return perfmon_capable(); + return cpu_mitigations_off() || perfmon_capable(); } static inline bool bpf_bypass_spec_v4(void) { - return perfmon_capable(); + return cpu_mitigations_off() || perfmon_capable(); } int bpf_map_new_fd(struct bpf_map *map, int flags); @@ -2922,6 +2922,22 @@ static inline int sock_map_bpf_prog_query(const union bpf_attr *attr, #endif /* CONFIG_BPF_SYSCALL */ #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ +static __always_inline void +bpf_prog_inc_misses_counters(const struct bpf_prog_array *array) +{ + const struct bpf_prog_array_item *item; + struct bpf_prog *prog; + + if (unlikely(!array)) + return; + + item = &array->items[0]; + while ((prog = READ_ONCE(item->prog))) { + bpf_prog_inc_misses_counter(prog); + item++; + } +} + #if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) void bpf_sk_reuseport_detach(struct sock *sk); int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key, diff --git a/include/linux/filter.h b/include/linux/filter.h index e8822bd595f9..a4953fafc8cb 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -736,7 +736,7 @@ static inline void bpf_compute_and_save_data_end( cb->data_end = skb->data + skb_headlen(skb); } -/* Restore data saved by bpf_compute_data_pointers(). */ +/* Restore data saved by bpf_compute_and_save_data_end(). */ static inline void bpf_restore_data_end( struct sk_buff *skb, void *saved_data_end) { @@ -1329,6 +1329,7 @@ struct bpf_sock_addr_kern { */ u64 tmp_reg; void *t_ctx; /* Attach type specific context. */ + u32 uaddrlen; }; struct bpf_sock_ops_kern { diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 21ae37e49319..5eb88a66eb68 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -761,7 +761,8 @@ struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name); void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp); int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, u32 *fd_type, const char **buf, - u64 *probe_offset, u64 *probe_addr); + u64 *probe_offset, u64 *probe_addr, + unsigned long *missed); int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); #else @@ -801,7 +802,7 @@ static inline void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp) static inline int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, u32 *fd_type, const char **buf, u64 *probe_offset, - u64 *probe_addr) + u64 *probe_addr, unsigned long *missed) { return -EOPNOTSUPP; } @@ -877,6 +878,7 @@ extern void perf_kprobe_destroy(struct perf_event *event); extern int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type, const char **symbol, u64 *probe_offset, u64 *probe_addr, + unsigned long *missed, bool perf_type_tracepoint); #endif #ifdef CONFIG_UPROBE_EVENTS diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index c48186bf4737..21da31e1dff5 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -85,6 +85,11 @@ struct ipv6_bpf_stub { sockptr_t optval, unsigned int optlen); int (*ipv6_getsockopt)(struct sock *sk, int level, int optname, sockptr_t optval, sockptr_t optlen); + int (*ipv6_dev_get_saddr)(struct net *net, + const struct net_device *dst_dev, + const struct in6_addr *daddr, + unsigned int prefs, + struct in6_addr *saddr); }; extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 5f13db15a3c7..7ba61b75bc0e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1047,6 +1047,11 @@ enum bpf_attach_type { BPF_TCX_INGRESS, BPF_TCX_EGRESS, BPF_TRACE_UPROBE_MULTI, + BPF_CGROUP_UNIX_CONNECT, + BPF_CGROUP_UNIX_SENDMSG, + BPF_CGROUP_UNIX_RECVMSG, + BPF_CGROUP_UNIX_GETPEERNAME, + BPF_CGROUP_UNIX_GETSOCKNAME, __MAX_BPF_ATTACH_TYPE }; @@ -2704,8 +2709,8 @@ union bpf_attr { * *bpf_socket* should be one of the following: * * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. - * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT** - * and **BPF_CGROUP_INET6_CONNECT**. + * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**, + * **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**. * * This helper actually implements a subset of **setsockopt()**. * It supports the following *level*\ s: @@ -2943,8 +2948,8 @@ union bpf_attr { * *bpf_socket* should be one of the following: * * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. - * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT** - * and **BPF_CGROUP_INET6_CONNECT**. + * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**, + * **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**. * * This helper actually implements a subset of **getsockopt()**. * It supports the same set of *optname*\ s that is supported by @@ -3264,6 +3269,11 @@ union bpf_attr { * and *params*->smac will not be set as output. A common * use case is to call **bpf_redirect_neigh**\ () after * doing **bpf_fib_lookup**\ (). + * **BPF_FIB_LOOKUP_SRC** + * Derive and set source IP addr in *params*->ipv{4,6}_src + * for the nexthop. If the src addr cannot be derived, + * **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this + * case, *params*->dmac and *params*->smac are not set either. * * *ctx* is either **struct xdp_md** for XDP programs or * **struct sk_buff** tc cls_act programs. @@ -5096,6 +5106,8 @@ union bpf_attr { * **BPF_F_TIMER_ABS** * Start the timer in absolute expire value instead of the * default relative one. + * **BPF_F_TIMER_CPU_PIN** + * Timer will be pinned to the CPU of the caller. * * Return * 0 on success. @@ -6532,6 +6544,7 @@ struct bpf_link_info { __aligned_u64 addrs; __u32 count; /* in/out: kprobe_multi function count */ __u32 flags; + __u64 missed; } kprobe_multi; struct { __u32 type; /* enum bpf_perf_event_type */ @@ -6547,6 +6560,7 @@ struct bpf_link_info { __u32 name_len; __u32 offset; /* offset from func_name */ __u64 addr; + __u64 missed; } kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */ struct { __aligned_u64 tp_name; /* in/out */ @@ -6960,6 +6974,7 @@ enum { BPF_FIB_LOOKUP_OUTPUT = (1U << 1), BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), BPF_FIB_LOOKUP_TBID = (1U << 3), + BPF_FIB_LOOKUP_SRC = (1U << 4), }; enum { @@ -6972,6 +6987,7 @@ enum { BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ + BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */ }; struct bpf_fib_lookup { @@ -7006,6 +7022,9 @@ struct bpf_fib_lookup { __u32 rt_metric; }; + /* input: source address to consider for lookup + * output: source address result from lookup + */ union { __be32 ipv4_src; __u32 ipv6_src[4]; /* in6_addr; network order */ @@ -7307,9 +7326,11 @@ struct bpf_core_relo { * Flags to control bpf_timer_start() behaviour. * - BPF_F_TIMER_ABS: Timeout passed is absolute time, by default it is * relative to current time. + * - BPF_F_TIMER_CPU_PIN: Timer will be pinned to the CPU of the caller. */ enum { BPF_F_TIMER_ABS = (1ULL << 0), + BPF_F_TIMER_CPU_PIN = (1ULL << 1), }; /* BPF numbers iterator state */ diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index 96856f130cbf..833faa04461b 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -793,8 +793,6 @@ __bpf_kfunc int bpf_iter_num_new(struct bpf_iter_num *it, int start, int end) BUILD_BUG_ON(sizeof(struct bpf_iter_num_kern) != sizeof(struct bpf_iter_num)); BUILD_BUG_ON(__alignof__(struct bpf_iter_num_kern) != __alignof__(struct bpf_iter_num)); - BTF_TYPE_EMIT(struct btf_iter_num); - /* start == end is legit, it's an empty range and we'll just get NULL * on first (and any subsequent) bpf_iter_num_next() call */ diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 69101200c124..15d71d2986d3 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -7850,6 +7850,7 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type) case BPF_PROG_TYPE_SYSCALL: return BTF_KFUNC_HOOK_SYSCALL; case BPF_PROG_TYPE_CGROUP_SKB: + case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: return BTF_KFUNC_HOOK_CGROUP_SKB; case BPF_PROG_TYPE_SCHED_ACT: return BTF_KFUNC_HOOK_SCHED_ACT; diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 03b3d4492980..74ad2215e1ba 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1450,18 +1450,22 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); * provided by user sockaddr * @sk: sock struct that will use sockaddr * @uaddr: sockaddr struct provided by user + * @uaddrlen: Pointer to the size of the sockaddr struct provided by user. It is + * read-only for AF_INET[6] uaddr but can be modified for AF_UNIX + * uaddr. * @atype: The type of program to be executed * @t_ctx: Pointer to attach type specific context * @flags: Pointer to u32 which contains higher bits of BPF program * return value (OR'ed together). * - * socket is expected to be of type INET or INET6. + * socket is expected to be of type INET, INET6 or UNIX. * * This function will return %-EPERM if an attached program is found and * returned value != 1 during execution. In all other cases, 0 is returned. */ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, struct sockaddr *uaddr, + int *uaddrlen, enum cgroup_bpf_attach_type atype, void *t_ctx, u32 *flags) @@ -1473,21 +1477,31 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, }; struct sockaddr_storage unspec; struct cgroup *cgrp; + int ret; /* Check socket family since not all sockets represent network * endpoint (e.g. AF_UNIX). */ - if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6) + if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6 && + sk->sk_family != AF_UNIX) return 0; if (!ctx.uaddr) { memset(&unspec, 0, sizeof(unspec)); ctx.uaddr = (struct sockaddr *)&unspec; + ctx.uaddrlen = 0; + } else { + ctx.uaddrlen = *uaddrlen; } cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); - return bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, - 0, flags); + ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, + 0, flags); + + if (!ret && uaddr) + *uaddrlen = ctx.uaddrlen; + + return ret; } EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr); @@ -2520,10 +2534,13 @@ cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_CGROUP_SOCK_OPS: case BPF_CGROUP_UDP4_RECVMSG: case BPF_CGROUP_UDP6_RECVMSG: + case BPF_CGROUP_UNIX_RECVMSG: case BPF_CGROUP_INET4_GETPEERNAME: case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_UNIX_GETPEERNAME: case BPF_CGROUP_INET4_GETSOCKNAME: case BPF_CGROUP_INET6_GETSOCKNAME: + case BPF_CGROUP_UNIX_GETSOCKNAME: return NULL; default: return &bpf_get_retval_proto; @@ -2535,10 +2552,13 @@ cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_CGROUP_SOCK_OPS: case BPF_CGROUP_UDP4_RECVMSG: case BPF_CGROUP_UDP6_RECVMSG: + case BPF_CGROUP_UNIX_RECVMSG: case BPF_CGROUP_INET4_GETPEERNAME: case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_UNIX_GETPEERNAME: case BPF_CGROUP_INET4_GETSOCKNAME: case BPF_CGROUP_INET6_GETSOCKNAME: + case BPF_CGROUP_UNIX_GETSOCKNAME: return NULL; default: return &bpf_set_retval_proto; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index dd1c69ee3375..62a53ebfedf9 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1272,7 +1272,7 @@ BPF_CALL_3(bpf_timer_start, struct bpf_timer_kern *, timer, u64, nsecs, u64, fla if (in_nmi()) return -EOPNOTSUPP; - if (flags > BPF_F_TIMER_ABS) + if (flags & ~(BPF_F_TIMER_ABS | BPF_F_TIMER_CPU_PIN)) return -EINVAL; __bpf_spin_lock_irqsave(&timer->lock); t = timer->timer; @@ -1286,6 +1286,9 @@ BPF_CALL_3(bpf_timer_start, struct bpf_timer_kern *, timer, u64, nsecs, u64, fla else mode = HRTIMER_MODE_REL_SOFT; + if (flags & BPF_F_TIMER_CPU_PIN) + mode |= HRTIMER_MODE_PINNED; + hrtimer_start(&t->timer, ns_to_ktime(nsecs), mode); out: __bpf_spin_unlock_irqrestore(&timer->lock); @@ -2549,6 +2552,9 @@ BTF_ID_FLAGS(func, bpf_dynptr_slice_rdwr, KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_num_new, KF_ITER_NEW) BTF_ID_FLAGS(func, bpf_iter_num_next, KF_ITER_NEXT | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_num_destroy, KF_ITER_DESTROY) +BTF_ID_FLAGS(func, bpf_iter_task_vma_new, KF_ITER_NEW | KF_RCU) +BTF_ID_FLAGS(func, bpf_iter_task_vma_next, KF_ITER_NEXT | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_iter_task_vma_destroy, KF_ITER_DESTROY) BTF_ID_FLAGS(func, bpf_dynptr_adjust) BTF_ID_FLAGS(func, bpf_dynptr_is_null) BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly) diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 458bb80b14d5..d6b277482085 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -28,7 +28,7 @@ struct bpf_stack_map { void *elems; struct pcpu_freelist freelist; u32 n_buckets; - struct stack_map_bucket *buckets[]; + struct stack_map_bucket *buckets[] __counted_by(n_buckets); }; static inline bool stack_map_use_build_id(struct bpf_map *map) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index fc088fd4874f..341f8cb4405c 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2446,14 +2446,19 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type, case BPF_CGROUP_INET6_BIND: case BPF_CGROUP_INET4_CONNECT: case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UNIX_CONNECT: case BPF_CGROUP_INET4_GETPEERNAME: case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_UNIX_GETPEERNAME: case BPF_CGROUP_INET4_GETSOCKNAME: case BPF_CGROUP_INET6_GETSOCKNAME: + case BPF_CGROUP_UNIX_GETSOCKNAME: case BPF_CGROUP_UDP4_SENDMSG: case BPF_CGROUP_UDP6_SENDMSG: + case BPF_CGROUP_UNIX_SENDMSG: case BPF_CGROUP_UDP4_RECVMSG: case BPF_CGROUP_UDP6_RECVMSG: + case BPF_CGROUP_UNIX_RECVMSG: return 0; default: return -EINVAL; @@ -3374,7 +3379,7 @@ static void bpf_perf_link_dealloc(struct bpf_link *link) static int bpf_perf_link_fill_common(const struct perf_event *event, char __user *uname, u32 ulen, u64 *probe_offset, u64 *probe_addr, - u32 *fd_type) + u32 *fd_type, unsigned long *missed) { const char *buf; u32 prog_id; @@ -3385,7 +3390,7 @@ static int bpf_perf_link_fill_common(const struct perf_event *event, return -EINVAL; err = bpf_get_perf_event_info(event, &prog_id, fd_type, &buf, - probe_offset, probe_addr); + probe_offset, probe_addr, missed); if (err) return err; if (!uname) @@ -3408,6 +3413,7 @@ static int bpf_perf_link_fill_common(const struct perf_event *event, static int bpf_perf_link_fill_kprobe(const struct perf_event *event, struct bpf_link_info *info) { + unsigned long missed; char __user *uname; u64 addr, offset; u32 ulen, type; @@ -3416,7 +3422,7 @@ static int bpf_perf_link_fill_kprobe(const struct perf_event *event, uname = u64_to_user_ptr(info->perf_event.kprobe.func_name); ulen = info->perf_event.kprobe.name_len; err = bpf_perf_link_fill_common(event, uname, ulen, &offset, &addr, - &type); + &type, &missed); if (err) return err; if (type == BPF_FD_TYPE_KRETPROBE) @@ -3425,6 +3431,7 @@ static int bpf_perf_link_fill_kprobe(const struct perf_event *event, info->perf_event.type = BPF_PERF_EVENT_KPROBE; info->perf_event.kprobe.offset = offset; + info->perf_event.kprobe.missed = missed; if (!kallsyms_show_value(current_cred())) addr = 0; info->perf_event.kprobe.addr = addr; @@ -3444,7 +3451,7 @@ static int bpf_perf_link_fill_uprobe(const struct perf_event *event, uname = u64_to_user_ptr(info->perf_event.uprobe.file_name); ulen = info->perf_event.uprobe.name_len; err = bpf_perf_link_fill_common(event, uname, ulen, &offset, &addr, - &type); + &type, NULL); if (err) return err; @@ -3480,7 +3487,7 @@ static int bpf_perf_link_fill_tracepoint(const struct perf_event *event, uname = u64_to_user_ptr(info->perf_event.tracepoint.tp_name); ulen = info->perf_event.tracepoint.name_len; info->perf_event.type = BPF_PERF_EVENT_TRACEPOINT; - return bpf_perf_link_fill_common(event, uname, ulen, NULL, NULL, NULL); + return bpf_perf_link_fill_common(event, uname, ulen, NULL, NULL, NULL, NULL); } static int bpf_perf_link_fill_perf_event(const struct perf_event *event, @@ -3676,14 +3683,19 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type) case BPF_CGROUP_INET6_BIND: case BPF_CGROUP_INET4_CONNECT: case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UNIX_CONNECT: case BPF_CGROUP_INET4_GETPEERNAME: case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_UNIX_GETPEERNAME: case BPF_CGROUP_INET4_GETSOCKNAME: case BPF_CGROUP_INET6_GETSOCKNAME: + case BPF_CGROUP_UNIX_GETSOCKNAME: case BPF_CGROUP_UDP4_SENDMSG: case BPF_CGROUP_UDP6_SENDMSG: + case BPF_CGROUP_UNIX_SENDMSG: case BPF_CGROUP_UDP4_RECVMSG: case BPF_CGROUP_UDP6_RECVMSG: + case BPF_CGROUP_UNIX_RECVMSG: return BPF_PROG_TYPE_CGROUP_SOCK_ADDR; case BPF_CGROUP_SOCK_OPS: return BPF_PROG_TYPE_SOCK_OPS; @@ -3949,14 +3961,19 @@ static int bpf_prog_query(const union bpf_attr *attr, case BPF_CGROUP_INET6_POST_BIND: case BPF_CGROUP_INET4_CONNECT: case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UNIX_CONNECT: case BPF_CGROUP_INET4_GETPEERNAME: case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_UNIX_GETPEERNAME: case BPF_CGROUP_INET4_GETSOCKNAME: case BPF_CGROUP_INET6_GETSOCKNAME: + case BPF_CGROUP_UNIX_GETSOCKNAME: case BPF_CGROUP_UDP4_SENDMSG: case BPF_CGROUP_UDP6_SENDMSG: + case BPF_CGROUP_UNIX_SENDMSG: case BPF_CGROUP_UDP4_RECVMSG: case BPF_CGROUP_UDP6_RECVMSG: + case BPF_CGROUP_UNIX_RECVMSG: case BPF_CGROUP_SOCK_OPS: case BPF_CGROUP_DEVICE: case BPF_CGROUP_SYSCTL: @@ -4822,7 +4839,7 @@ static int bpf_task_fd_query(const union bpf_attr *attr, err = bpf_get_perf_event_info(event, &prog_id, &fd_type, &buf, &probe_offset, - &probe_addr); + &probe_addr, NULL); if (!err) err = bpf_task_fd_query_copy(attr, uattr, prog_id, fd_type, buf, diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index 7473068ed313..fef17628341f 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -7,7 +7,9 @@ #include <linux/fs.h> #include <linux/fdtable.h> #include <linux/filter.h> +#include <linux/bpf_mem_alloc.h> #include <linux/btf_ids.h> +#include <linux/mm_types.h> #include "mmap_unlock_work.h" static const char * const iter_task_type_names[] = { @@ -803,6 +805,95 @@ const struct bpf_func_proto bpf_find_vma_proto = { .arg5_type = ARG_ANYTHING, }; +struct bpf_iter_task_vma_kern_data { + struct task_struct *task; + struct mm_struct *mm; + struct mmap_unlock_irq_work *work; + struct vma_iterator vmi; +}; + +struct bpf_iter_task_vma { + /* opaque iterator state; having __u64 here allows to preserve correct + * alignment requirements in vmlinux.h, generated from BTF + */ + __u64 __opaque[1]; +} __attribute__((aligned(8))); + +/* Non-opaque version of bpf_iter_task_vma */ +struct bpf_iter_task_vma_kern { + struct bpf_iter_task_vma_kern_data *data; +} __attribute__((aligned(8))); + +__diag_push(); +__diag_ignore_all("-Wmissing-prototypes", + "Global functions as their definitions will be in vmlinux BTF"); + +__bpf_kfunc int bpf_iter_task_vma_new(struct bpf_iter_task_vma *it, + struct task_struct *task, u64 addr) +{ + struct bpf_iter_task_vma_kern *kit = (void *)it; + bool irq_work_busy = false; + int err; + + BUILD_BUG_ON(sizeof(struct bpf_iter_task_vma_kern) != sizeof(struct bpf_iter_task_vma)); + BUILD_BUG_ON(__alignof__(struct bpf_iter_task_vma_kern) != __alignof__(struct bpf_iter_task_vma)); + + /* is_iter_reg_valid_uninit guarantees that kit hasn't been initialized + * before, so non-NULL kit->data doesn't point to previously + * bpf_mem_alloc'd bpf_iter_task_vma_kern_data + */ + kit->data = bpf_mem_alloc(&bpf_global_ma, sizeof(struct bpf_iter_task_vma_kern_data)); + if (!kit->data) + return -ENOMEM; + + kit->data->task = get_task_struct(task); + kit->data->mm = task->mm; + if (!kit->data->mm) { + err = -ENOENT; + goto err_cleanup_iter; + } + + /* kit->data->work == NULL is valid after bpf_mmap_unlock_get_irq_work */ + irq_work_busy = bpf_mmap_unlock_get_irq_work(&kit->data->work); + if (irq_work_busy || !mmap_read_trylock(kit->data->mm)) { + err = -EBUSY; + goto err_cleanup_iter; + } + + vma_iter_init(&kit->data->vmi, kit->data->mm, addr); + return 0; + +err_cleanup_iter: + if (kit->data->task) + put_task_struct(kit->data->task); + bpf_mem_free(&bpf_global_ma, kit->data); + /* NULL kit->data signals failed bpf_iter_task_vma initialization */ + kit->data = NULL; + return err; +} + +__bpf_kfunc struct vm_area_struct *bpf_iter_task_vma_next(struct bpf_iter_task_vma *it) +{ + struct bpf_iter_task_vma_kern *kit = (void *)it; + + if (!kit->data) /* bpf_iter_task_vma_new failed */ + return NULL; + return vma_next(&kit->data->vmi); +} + +__bpf_kfunc void bpf_iter_task_vma_destroy(struct bpf_iter_task_vma *it) +{ + struct bpf_iter_task_vma_kern *kit = (void *)it; + + if (kit->data) { + bpf_mmap_unlock_mm(kit->data->work, kit->data->mm); + put_task_struct(kit->data->task); + bpf_mem_free(&bpf_global_ma, kit->data); + } +} + +__diag_pop(); + DEFINE_PER_CPU(struct mmap_unlock_irq_work, mmap_unlock_work); static void do_mmap_read_unlock(struct irq_work *entry) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e59d6c2c6bf4..bb58987e4844 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1342,6 +1342,50 @@ static void scrub_spilled_slot(u8 *stype) *stype = STACK_MISC; } +static void print_scalar_ranges(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, + const char **sep) +{ + struct { + const char *name; + u64 val; + bool omit; + } minmaxs[] = { + {"smin", reg->smin_value, reg->smin_value == S64_MIN}, + {"smax", reg->smax_value, reg->smax_value == S64_MAX}, + {"umin", reg->umin_value, reg->umin_value == 0}, + {"umax", reg->umax_value, reg->umax_value == U64_MAX}, + {"smin32", (s64)reg->s32_min_value, reg->s32_min_value == S32_MIN}, + {"smax32", (s64)reg->s32_max_value, reg->s32_max_value == S32_MAX}, + {"umin32", reg->u32_min_value, reg->u32_min_value == 0}, + {"umax32", reg->u32_max_value, reg->u32_max_value == U32_MAX}, + }, *m1, *m2, *mend = &minmaxs[ARRAY_SIZE(minmaxs)]; + bool neg1, neg2; + + for (m1 = &minmaxs[0]; m1 < mend; m1++) { + if (m1->omit) + continue; + + neg1 = m1->name[0] == 's' && (s64)m1->val < 0; + + verbose(env, "%s%s=", *sep, m1->name); + *sep = ","; + + for (m2 = m1 + 2; m2 < mend; m2 += 2) { + if (m2->omit || m2->val != m1->val) + continue; + /* don't mix negatives with positives */ + neg2 = m2->name[0] == 's' && (s64)m2->val < 0; + if (neg2 != neg1) + continue; + m2->omit = true; + verbose(env, "%s=", m2->name); + } + + verbose(env, m1->name[0] == 's' ? "%lld" : "%llu", m1->val); + } +} + static void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_func_state *state, bool print_all) @@ -1405,34 +1449,13 @@ static void print_verifier_state(struct bpf_verifier_env *env, */ verbose_a("imm=%llx", reg->var_off.value); } else { - if (reg->smin_value != reg->umin_value && - reg->smin_value != S64_MIN) - verbose_a("smin=%lld", (long long)reg->smin_value); - if (reg->smax_value != reg->umax_value && - reg->smax_value != S64_MAX) - verbose_a("smax=%lld", (long long)reg->smax_value); - if (reg->umin_value != 0) - verbose_a("umin=%llu", (unsigned long long)reg->umin_value); - if (reg->umax_value != U64_MAX) - verbose_a("umax=%llu", (unsigned long long)reg->umax_value); + print_scalar_ranges(env, reg, &sep); if (!tnum_is_unknown(reg->var_off)) { char tn_buf[48]; tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); verbose_a("var_off=%s", tn_buf); } - if (reg->s32_min_value != reg->smin_value && - reg->s32_min_value != S32_MIN) - verbose_a("s32_min=%d", (int)(reg->s32_min_value)); - if (reg->s32_max_value != reg->smax_value && - reg->s32_max_value != S32_MAX) - verbose_a("s32_max=%d", (int)(reg->s32_max_value)); - if (reg->u32_min_value != reg->umin_value && - reg->u32_min_value != U32_MIN) - verbose_a("u32_min=%d", (int)(reg->u32_min_value)); - if (reg->u32_max_value != reg->umax_value && - reg->u32_max_value != U32_MAX) - verbose_a("u32_max=%d", (int)(reg->u32_max_value)); } #undef verbose_a @@ -1516,7 +1539,8 @@ static void print_verifier_state(struct bpf_verifier_env *env, if (state->in_async_callback_fn) verbose(env, " async_cb"); verbose(env, "\n"); - mark_verifier_state_clean(env); + if (!print_all) + mark_verifier_state_clean(env); } static inline u32 vlog_alignment(u32 pos) @@ -3114,7 +3138,7 @@ static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn, if (class == BPF_LDX) { if (t != SRC_OP) - return BPF_SIZE(code) == BPF_DW; + return BPF_SIZE(code) == BPF_DW || BPF_MODE(code) == BPF_MEMSX; /* LDX source must be ptr. */ return true; } @@ -14383,6 +14407,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, !sanitize_speculative_path(env, insn, *insn_idx + 1, *insn_idx)) return -EFAULT; + if (env->log.level & BPF_LOG_LEVEL) + print_insn_state(env, this_branch->frame[this_branch->curframe]); *insn_idx += insn->off; return 0; } else if (pred == 0) { @@ -14395,6 +14421,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, *insn_idx + insn->off + 1, *insn_idx)) return -EFAULT; + if (env->log.level & BPF_LOG_LEVEL) + print_insn_state(env, this_branch->frame[this_branch->curframe]); return 0; } @@ -14795,10 +14823,13 @@ static int check_return_code(struct bpf_verifier_env *env, int regno) case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG || env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG || + env->prog->expected_attach_type == BPF_CGROUP_UNIX_RECVMSG || env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME || env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME || + env->prog->expected_attach_type == BPF_CGROUP_UNIX_GETPEERNAME || env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME || - env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME) + env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME || + env->prog->expected_attach_type == BPF_CGROUP_UNIX_GETSOCKNAME) range = tnum_range(1, 1); if (env->prog->expected_attach_type == BPF_CGROUP_INET4_BIND || env->prog->expected_attach_type == BPF_CGROUP_INET6_BIND) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 868008f56fec..df697c74d519 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -117,6 +117,9 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) * and don't send kprobe event into ring-buffer, * so return zero here */ + rcu_read_lock(); + bpf_prog_inc_misses_counters(rcu_dereference(call->prog_array)); + rcu_read_unlock(); ret = 0; goto out; } @@ -2384,7 +2387,8 @@ int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog) int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, u32 *fd_type, const char **buf, - u64 *probe_offset, u64 *probe_addr) + u64 *probe_offset, u64 *probe_addr, + unsigned long *missed) { bool is_tracepoint, is_syscall_tp; struct bpf_prog *prog; @@ -2419,7 +2423,7 @@ int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, #ifdef CONFIG_KPROBE_EVENTS if (flags & TRACE_EVENT_FL_KPROBE) err = bpf_get_kprobe_info(event, fd_type, buf, - probe_offset, probe_addr, + probe_offset, probe_addr, missed, event->attr.type == PERF_TYPE_TRACEPOINT); #endif #ifdef CONFIG_UPROBE_EVENTS @@ -2614,6 +2618,7 @@ static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link, kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link); info->kprobe_multi.count = kmulti_link->cnt; info->kprobe_multi.flags = kmulti_link->flags; + info->kprobe_multi.missed = kmulti_link->fp.nmissed; if (!uaddrs) return 0; @@ -2710,6 +2715,7 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link, int err; if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) { + bpf_prog_inc_misses_counter(link->link.prog); err = 0; goto out; } diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 3d7a180a8427..961a78ffd6d2 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1189,6 +1189,12 @@ static const struct file_operations kprobe_events_ops = { .write = probes_write, }; +static unsigned long trace_kprobe_missed(struct trace_kprobe *tk) +{ + return trace_kprobe_is_return(tk) ? + tk->rp.kp.nmissed + tk->rp.nmissed : tk->rp.kp.nmissed; +} + /* Probes profiling interfaces */ static int probes_profile_seq_show(struct seq_file *m, void *v) { @@ -1200,8 +1206,7 @@ static int probes_profile_seq_show(struct seq_file *m, void *v) return 0; tk = to_trace_kprobe(ev); - nmissed = trace_kprobe_is_return(tk) ? - tk->rp.kp.nmissed + tk->rp.nmissed : tk->rp.kp.nmissed; + nmissed = trace_kprobe_missed(tk); seq_printf(m, " %-44s %15lu %15lu\n", trace_probe_name(&tk->tp), trace_kprobe_nhit(tk), @@ -1547,7 +1552,8 @@ NOKPROBE_SYMBOL(kretprobe_perf_func); int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type, const char **symbol, u64 *probe_offset, - u64 *probe_addr, bool perf_type_tracepoint) + u64 *probe_addr, unsigned long *missed, + bool perf_type_tracepoint) { const char *pevent = trace_event_name(event->tp_event); const char *group = event->tp_event->class->system; @@ -1566,6 +1572,8 @@ int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type, *probe_addr = kallsyms_show_value(current_cred()) ? (unsigned long)tk->rp.kp.addr : 0; *symbol = tk->symbol; + if (missed) + *missed = trace_kprobe_missed(tk); return 0; } #endif /* CONFIG_PERF_EVENTS */ diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index de753403cdaf..9c581d6da843 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -556,7 +556,7 @@ static int perf_call_bpf_enter(struct trace_event_call *call, struct pt_regs *re { struct syscall_tp_t { struct trace_entry ent; - unsigned long syscall_nr; + int syscall_nr; unsigned long args[SYSCALL_DEFINE_MAXARGS]; } __aligned(8) param; int i; @@ -661,7 +661,7 @@ static int perf_call_bpf_exit(struct trace_event_call *call, struct pt_regs *reg { struct syscall_tp_t { struct trace_entry ent; - unsigned long syscall_nr; + int syscall_nr; unsigned long ret; } __aligned(8) param; diff --git a/net/core/filter.c b/net/core/filter.c index a094694899c9..cc2e4babc85f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -81,6 +81,7 @@ #include <net/xdp.h> #include <net/mptcp.h> #include <net/netfilter/nf_conntrack_bpf.h> +#include <linux/un.h> static const struct bpf_func_proto * bpf_sk_base_func_proto(enum bpf_func_id func_id); @@ -5850,6 +5851,9 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, params->rt_metric = res.fi->fib_priority; params->ifindex = dev->ifindex; + if (flags & BPF_FIB_LOOKUP_SRC) + params->ipv4_src = fib_result_prefsrc(net, &res); + /* xdp and cls_bpf programs are run in RCU-bh so * rcu_read_lock_bh is not needed here */ @@ -5992,6 +5996,18 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, params->rt_metric = res.f6i->fib6_metric; params->ifindex = dev->ifindex; + if (flags & BPF_FIB_LOOKUP_SRC) { + if (res.f6i->fib6_prefsrc.plen) { + *src = res.f6i->fib6_prefsrc.addr; + } else { + err = ipv6_bpf_stub->ipv6_dev_get_saddr(net, dev, + &fl6.daddr, 0, + src); + if (err) + return BPF_FIB_LKUP_RET_NO_SRC_ADDR; + } + } + if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH) goto set_fwd_params; @@ -6010,7 +6026,8 @@ set_fwd_params: #endif #define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \ - BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID) + BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID | \ + BPF_FIB_LOOKUP_SRC) BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx, struct bpf_fib_lookup *, params, int, plen, u32, flags) @@ -7858,14 +7875,19 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_CGROUP_INET6_BIND: case BPF_CGROUP_INET4_CONNECT: case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UNIX_CONNECT: case BPF_CGROUP_UDP4_RECVMSG: case BPF_CGROUP_UDP6_RECVMSG: + case BPF_CGROUP_UNIX_RECVMSG: case BPF_CGROUP_UDP4_SENDMSG: case BPF_CGROUP_UDP6_SENDMSG: + case BPF_CGROUP_UNIX_SENDMSG: case BPF_CGROUP_INET4_GETPEERNAME: case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_UNIX_GETPEERNAME: case BPF_CGROUP_INET4_GETSOCKNAME: case BPF_CGROUP_INET6_GETSOCKNAME: + case BPF_CGROUP_UNIX_GETSOCKNAME: return &bpf_sock_addr_setsockopt_proto; default: return NULL; @@ -7876,14 +7898,19 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_CGROUP_INET6_BIND: case BPF_CGROUP_INET4_CONNECT: case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UNIX_CONNECT: case BPF_CGROUP_UDP4_RECVMSG: case BPF_CGROUP_UDP6_RECVMSG: + case BPF_CGROUP_UNIX_RECVMSG: case BPF_CGROUP_UDP4_SENDMSG: case BPF_CGROUP_UDP6_SENDMSG: + case BPF_CGROUP_UNIX_SENDMSG: case BPF_CGROUP_INET4_GETPEERNAME: case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_UNIX_GETPEERNAME: case BPF_CGROUP_INET4_GETSOCKNAME: case BPF_CGROUP_INET6_GETSOCKNAME: + case BPF_CGROUP_UNIX_GETSOCKNAME: return &bpf_sock_addr_getsockopt_proto; default: return NULL; @@ -8931,8 +8958,8 @@ static bool sock_addr_is_valid_access(int off, int size, if (off % size != 0) return false; - /* Disallow access to IPv6 fields from IPv4 contex and vise - * versa. + /* Disallow access to fields not belonging to the attach type's address + * family. */ switch (off) { case bpf_ctx_range(struct bpf_sock_addr, user_ip4): @@ -11752,6 +11779,27 @@ __bpf_kfunc int bpf_dynptr_from_xdp(struct xdp_buff *xdp, u64 flags, return 0; } + +__bpf_kfunc int bpf_sock_addr_set_sun_path(struct bpf_sock_addr_kern *sa_kern, + const u8 *sun_path, u32 sun_path__sz) +{ + struct sockaddr_un *un; + + if (sa_kern->sk->sk_family != AF_UNIX) + return -EINVAL; + + /* We do not allow changing the address to unnamed or larger than the + * maximum allowed address size for a unix sockaddr. + */ + if (sun_path__sz == 0 || sun_path__sz > UNIX_PATH_MAX) + return -EINVAL; + + un = (struct sockaddr_un *)sa_kern->uaddr; + memcpy(un->sun_path, sun_path, sun_path__sz); + sa_kern->uaddrlen = offsetof(struct sockaddr_un, sun_path) + sun_path__sz; + + return 0; +} __diag_pop(); int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags, @@ -11776,6 +11824,10 @@ BTF_SET8_START(bpf_kfunc_check_set_xdp) BTF_ID_FLAGS(func, bpf_dynptr_from_xdp) BTF_SET8_END(bpf_kfunc_check_set_xdp) +BTF_SET8_START(bpf_kfunc_check_set_sock_addr) +BTF_ID_FLAGS(func, bpf_sock_addr_set_sun_path) +BTF_SET8_END(bpf_kfunc_check_set_sock_addr) + static const struct btf_kfunc_id_set bpf_kfunc_set_skb = { .owner = THIS_MODULE, .set = &bpf_kfunc_check_set_skb, @@ -11786,6 +11838,11 @@ static const struct btf_kfunc_id_set bpf_kfunc_set_xdp = { .set = &bpf_kfunc_check_set_xdp, }; +static const struct btf_kfunc_id_set bpf_kfunc_set_sock_addr = { + .owner = THIS_MODULE, + .set = &bpf_kfunc_check_set_sock_addr, +}; + static int __init bpf_kfunc_init(void) { int ret; @@ -11800,7 +11857,9 @@ static int __init bpf_kfunc_init(void) ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_XMIT, &bpf_kfunc_set_skb); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_SEG6LOCAL, &bpf_kfunc_set_skb); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_NETFILTER, &bpf_kfunc_set_skb); - return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp); + return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + &bpf_kfunc_set_sock_addr); } late_initcall(bpf_kfunc_init); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 3d2e30e20473..5ce275b2d7ef 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -452,7 +452,7 @@ int inet_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len) /* BPF prog is run before any checks are done so that if the prog * changes context in a wrong way it will be caught. */ - err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, + err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, &addr_len, CGROUP_INET4_BIND, &flags); if (err) return err; @@ -788,6 +788,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr, struct sock *sk = sock->sk; struct inet_sock *inet = inet_sk(sk); DECLARE_SOCKADDR(struct sockaddr_in *, sin, uaddr); + int sin_addr_len = sizeof(*sin); sin->sin_family = AF_INET; lock_sock(sk); @@ -800,7 +801,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr, } sin->sin_port = inet->inet_dport; sin->sin_addr.s_addr = inet->inet_daddr; - BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, + BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len, CGROUP_INET4_GETPEERNAME); } else { __be32 addr = inet->inet_rcv_saddr; @@ -808,12 +809,12 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr, addr = inet->inet_saddr; sin->sin_port = inet->inet_sport; sin->sin_addr.s_addr = addr; - BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, + BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len, CGROUP_INET4_GETSOCKNAME); } release_sock(sk); memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); - return sizeof(*sin); + return sin_addr_len; } EXPORT_SYMBOL(inet_getname); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 2c61f444e1c7..823306487a82 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -301,7 +301,7 @@ static int ping_pre_connect(struct sock *sk, struct sockaddr *uaddr, if (addr_len < sizeof(struct sockaddr_in)) return -EINVAL; - return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr); + return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, &addr_len); } /* Checks the bind address and possibly modifies sk->sk_bound_dev_if. */ diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f603ad9307af..2519f530b114 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -194,7 +194,7 @@ static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr, sock_owned_by_me(sk); - return BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr); + return BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, &addr_len); } /* This will initiate an outgoing connection. */ diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 7f7724beca33..1734fd6a1ce0 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1144,7 +1144,9 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (cgroup_bpf_enabled(CGROUP_UDP4_SENDMSG) && !connected) { err = BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, - (struct sockaddr *)usin, &ipc.addr); + (struct sockaddr *)usin, + &msg->msg_namelen, + &ipc.addr); if (err) goto out_free; if (usin) { @@ -1867,7 +1869,8 @@ try_again: *addr_len = sizeof(*sin); BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, - (struct sockaddr *)sin); + (struct sockaddr *)sin, + addr_len); } if (udp_test_bit(GRO_ENABLED, sk)) @@ -1906,7 +1909,7 @@ int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (addr_len < sizeof(struct sockaddr_in)) return -EINVAL; - return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr); + return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, &addr_len); } EXPORT_SYMBOL(udp_pre_connect); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index c6ad0d6e99b5..c35d302a3da9 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -454,7 +454,7 @@ int inet6_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len) /* BPF prog is run before any checks are done so that if the prog * changes context in a wrong way it will be caught. */ - err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, + err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, &addr_len, CGROUP_INET6_BIND, &flags); if (err) return err; @@ -520,6 +520,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, int peer) { struct sockaddr_in6 *sin = (struct sockaddr_in6 *)uaddr; + int sin_addr_len = sizeof(*sin); struct sock *sk = sock->sk; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); @@ -539,7 +540,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, sin->sin6_addr = sk->sk_v6_daddr; if (inet6_test_bit(SNDFLOW, sk)) sin->sin6_flowinfo = np->flow_label; - BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, + BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len, CGROUP_INET6_GETPEERNAME); } else { if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) @@ -547,13 +548,13 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, else sin->sin6_addr = sk->sk_v6_rcv_saddr; sin->sin6_port = inet->inet_sport; - BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, + BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len, CGROUP_INET6_GETSOCKNAME); } sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr, sk->sk_bound_dev_if); release_sock(sk); - return sizeof(*sin); + return sin_addr_len; } EXPORT_SYMBOL(inet6_getname); @@ -1061,6 +1062,7 @@ static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = { .udp6_lib_lookup = __udp6_lib_lookup, .ipv6_setsockopt = do_ipv6_setsockopt, .ipv6_getsockopt = do_ipv6_getsockopt, + .ipv6_dev_get_saddr = ipv6_dev_get_saddr, }; static int __init inet6_init(void) diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index e8fb0d275cc2..d2098dd4ceae 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -56,7 +56,7 @@ static int ping_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr, if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; - return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr); + return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, &addr_len); } static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index bfe7d19ff4fd..d410703bb5a1 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -135,7 +135,7 @@ static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr, sock_owned_by_me(sk); - return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr); + return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, &addr_len); } static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 5e9312eefed0..622b10a549f7 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -410,7 +410,8 @@ try_again: *addr_len = sizeof(*sin6); BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, - (struct sockaddr *)sin6); + (struct sockaddr *)sin6, + addr_len); } if (udp_test_bit(GRO_ENABLED, sk)) @@ -1157,7 +1158,7 @@ static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr, if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; - return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr); + return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, &addr_len); } /** @@ -1510,6 +1511,7 @@ do_udp_sendmsg: if (cgroup_bpf_enabled(CGROUP_UDP6_SENDMSG) && !connected) { err = BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, (struct sockaddr *)sin6, + &addr_len, &fl6->saddr); if (err) goto out_no_dst; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 3e8a04a13668..e10d07c76044 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -116,6 +116,7 @@ #include <linux/freezer.h> #include <linux/file.h> #include <linux/btf_ids.h> +#include <linux/bpf-cgroup.h> #include "scm.h" @@ -1381,6 +1382,10 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, if (err) goto out; + err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, addr, &alen); + if (err) + goto out; + if ((test_bit(SOCK_PASSCRED, &sock->flags) || test_bit(SOCK_PASSPIDFD, &sock->flags)) && !unix_sk(sk)->addr) { @@ -1490,6 +1495,10 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, if (err) goto out; + err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, &addr_len); + if (err) + goto out; + if ((test_bit(SOCK_PASSCRED, &sock->flags) || test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) { err = unix_autobind(sk); @@ -1770,6 +1779,13 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer) } else { err = addr->len; memcpy(sunaddr, addr->name, addr->len); + + if (peer) + BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err, + CGROUP_UNIX_GETPEERNAME); + else + BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err, + CGROUP_UNIX_GETSOCKNAME); } sock_put(sk); out: @@ -1922,6 +1938,13 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, err = unix_validate_addr(sunaddr, msg->msg_namelen); if (err) goto out; + + err = BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, + msg->msg_name, + &msg->msg_namelen, + NULL); + if (err) + goto out; } else { sunaddr = NULL; err = -ENOTCONN; @@ -2390,9 +2413,14 @@ int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size, EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND); - if (msg->msg_name) + if (msg->msg_name) { unix_copy_addr(msg, skb->sk); + BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, + msg->msg_name, + &msg->msg_namelen); + } + if (size > skb->len - skip) size = skb->len - skip; else if (size < skb->len - skip) @@ -2744,6 +2772,11 @@ unlock: DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, state->msg->msg_name); unix_copy_addr(state->msg, skb->sk); + + BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, + state->msg->msg_name, + &state->msg->msg_namelen); + sunaddr = NULL; } diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 6c707ebcebb9..90af76fa9dd8 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -169,6 +169,9 @@ endif TPROGS_CFLAGS += -Wall -O2 TPROGS_CFLAGS += -Wmissing-prototypes TPROGS_CFLAGS += -Wstrict-prototypes +TPROGS_CFLAGS += $(call try-run,\ + printf "int main() { return 0; }" |\ + $(CC) -Werror -fsanitize=bounds -x c - -o "$$TMP",-fsanitize=bounds,) TPROGS_CFLAGS += -I$(objtree)/usr/include TPROGS_CFLAGS += -I$(srctree)/tools/testing/selftests/bpf/ diff --git a/samples/bpf/syscall_tp_user.c b/samples/bpf/syscall_tp_user.c index 7a788bb837fc..7a09ac74fac0 100644 --- a/samples/bpf/syscall_tp_user.c +++ b/samples/bpf/syscall_tp_user.c @@ -17,9 +17,9 @@ static void usage(const char *cmd) { - printf("USAGE: %s [-i num_progs] [-h]\n", cmd); - printf(" -i num_progs # number of progs of the test\n"); - printf(" -h # help\n"); + printf("USAGE: %s [-i nr_tests] [-h]\n", cmd); + printf(" -i nr_tests # rounds of test to run\n"); + printf(" -h # help\n"); } static void verify_map(int map_id) @@ -45,14 +45,14 @@ static void verify_map(int map_id) } } -static int test(char *filename, int num_progs) +static int test(char *filename, int nr_tests) { - int map0_fds[num_progs], map1_fds[num_progs], fd, i, j = 0; - struct bpf_link *links[num_progs * 4]; - struct bpf_object *objs[num_progs]; + int map0_fds[nr_tests], map1_fds[nr_tests], fd, i, j = 0; + struct bpf_link **links = NULL; + struct bpf_object *objs[nr_tests]; struct bpf_program *prog; - for (i = 0; i < num_progs; i++) { + for (i = 0; i < nr_tests; i++) { objs[i] = bpf_object__open_file(filename, NULL); if (libbpf_get_error(objs[i])) { fprintf(stderr, "opening BPF object file failed\n"); @@ -60,6 +60,19 @@ static int test(char *filename, int num_progs) goto cleanup; } + /* One-time initialization */ + if (!links) { + int nr_progs = 0; + + bpf_object__for_each_program(prog, objs[i]) + nr_progs += 1; + + links = calloc(nr_progs * nr_tests, sizeof(struct bpf_link *)); + + if (!links) + goto cleanup; + } + /* load BPF program */ if (bpf_object__load(objs[i])) { fprintf(stderr, "loading BPF object file failed\n"); @@ -101,14 +114,18 @@ static int test(char *filename, int num_progs) close(fd); /* verify the map */ - for (i = 0; i < num_progs; i++) { + for (i = 0; i < nr_tests; i++) { verify_map(map0_fds[i]); verify_map(map1_fds[i]); } cleanup: - for (j--; j >= 0; j--) - bpf_link__destroy(links[j]); + if (links) { + for (j--; j >= 0; j--) + bpf_link__destroy(links[j]); + + free(links); + } for (i--; i >= 0; i--) bpf_object__close(objs[i]); @@ -117,13 +134,13 @@ cleanup: int main(int argc, char **argv) { - int opt, num_progs = 1; + int opt, nr_tests = 1; char filename[256]; while ((opt = getopt(argc, argv, "i:h")) != -1) { switch (opt) { case 'i': - num_progs = atoi(optarg); + nr_tests = atoi(optarg); break; case 'h': default: @@ -134,5 +151,5 @@ int main(int argc, char **argv) snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - return test(filename, num_progs); + return test(filename, nr_tests); } diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst index bd015ec9847b..2ce900f66d6e 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst @@ -36,11 +36,14 @@ CGROUP COMMANDS | **cgroup_device** | **cgroup_inet4_bind** | **cgroup_inet6_bind** | | **cgroup_inet4_post_bind** | **cgroup_inet6_post_bind** | | **cgroup_inet4_connect** | **cgroup_inet6_connect** | -| **cgroup_inet4_getpeername** | **cgroup_inet6_getpeername** | +| **cgroup_unix_connect** | **cgroup_inet4_getpeername** | +| **cgroup_inet6_getpeername** | **cgroup_unix_getpeername** | | **cgroup_inet4_getsockname** | **cgroup_inet6_getsockname** | -| **cgroup_udp4_sendmsg** | **cgroup_udp6_sendmsg** | +| **cgroup_unix_getsockname** | **cgroup_udp4_sendmsg** | +| **cgroup_udp6_sendmsg** | **cgroup_unix_sendmsg** | | **cgroup_udp4_recvmsg** | **cgroup_udp6_recvmsg** | -| **cgroup_sysctl** | **cgroup_getsockopt** | **cgroup_setsockopt** | +| **cgroup_unix_recvmsg** | **cgroup_sysctl** | +| **cgroup_getsockopt** | **cgroup_setsockopt** | | **cgroup_inet_sock_release** } | *ATTACH_FLAGS* := { **multi** | **override** } @@ -102,21 +105,28 @@ DESCRIPTION **post_bind6** return from bind(2) for an inet6 socket (since 4.17); **connect4** call to connect(2) for an inet4 socket (since 4.17); **connect6** call to connect(2) for an inet6 socket (since 4.17); + **connect_unix** call to connect(2) for a unix socket (since 6.7); **sendmsg4** call to sendto(2), sendmsg(2), sendmmsg(2) for an unconnected udp4 socket (since 4.18); **sendmsg6** call to sendto(2), sendmsg(2), sendmmsg(2) for an unconnected udp6 socket (since 4.18); + **sendmsg_unix** call to sendto(2), sendmsg(2), sendmmsg(2) for + an unconnected unix socket (since 6.7); **recvmsg4** call to recvfrom(2), recvmsg(2), recvmmsg(2) for an unconnected udp4 socket (since 5.2); **recvmsg6** call to recvfrom(2), recvmsg(2), recvmmsg(2) for an unconnected udp6 socket (since 5.2); + **recvmsg_unix** call to recvfrom(2), recvmsg(2), recvmmsg(2) for + an unconnected unix socket (since 6.7); **sysctl** sysctl access (since 5.2); **getsockopt** call to getsockopt (since 5.3); **setsockopt** call to setsockopt (since 5.3); **getpeername4** call to getpeername(2) for an inet4 socket (since 5.8); **getpeername6** call to getpeername(2) for an inet6 socket (since 5.8); + **getpeername_unix** call to getpeername(2) for a unix socket (since 6.7); **getsockname4** call to getsockname(2) for an inet4 socket (since 5.8); **getsockname6** call to getsockname(2) for an inet6 socket (since 5.8). + **getsockname_unix** call to getsockname(2) for a unix socket (since 6.7); **sock_release** closing an userspace inet socket (since 5.9). **bpftool cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG* diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index dcae81bd27ed..58e6a5b10ef7 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -47,9 +47,11 @@ PROG COMMANDS | **cgroup/sock** | **cgroup/dev** | **lwt_in** | **lwt_out** | **lwt_xmit** | | **lwt_seg6local** | **sockops** | **sk_skb** | **sk_msg** | **lirc_mode2** | | **cgroup/bind4** | **cgroup/bind6** | **cgroup/post_bind4** | **cgroup/post_bind6** | -| **cgroup/connect4** | **cgroup/connect6** | **cgroup/getpeername4** | **cgroup/getpeername6** | -| **cgroup/getsockname4** | **cgroup/getsockname6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** | -| **cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/sysctl** | +| **cgroup/connect4** | **cgroup/connect6** | **cgroup/connect_unix** | +| **cgroup/getpeername4** | **cgroup/getpeername6** | **cgroup/getpeername_unix** | +| **cgroup/getsockname4** | **cgroup/getsockname6** | **cgroup/getsockname_unix** | +| **cgroup/sendmsg4** | **cgroup/sendmsg6** | **cgroup/sendmsg_unix** | +| **cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/recvmsg_unix** | **cgroup/sysctl** | | **cgroup/getsockopt** | **cgroup/setsockopt** | **cgroup/sock_release** | | **struct_ops** | **fentry** | **fexit** | **freplace** | **sk_lookup** | } diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 085bf18f3659..6e4f7ce6bc01 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -480,13 +480,13 @@ _bpftool() action tracepoint raw_tracepoint \ xdp perf_event cgroup/skb cgroup/sock \ cgroup/dev lwt_in lwt_out lwt_xmit \ - lwt_seg6local sockops sk_skb sk_msg \ - lirc_mode2 cgroup/bind4 cgroup/bind6 \ - cgroup/connect4 cgroup/connect6 \ - cgroup/getpeername4 cgroup/getpeername6 \ - cgroup/getsockname4 cgroup/getsockname6 \ - cgroup/sendmsg4 cgroup/sendmsg6 \ - cgroup/recvmsg4 cgroup/recvmsg6 \ + lwt_seg6local sockops sk_skb sk_msg lirc_mode2 \ + cgroup/bind4 cgroup/bind6 \ + cgroup/connect4 cgroup/connect6 cgroup/connect_unix \ + cgroup/getpeername4 cgroup/getpeername6 cgroup/getpeername_unix \ + cgroup/getsockname4 cgroup/getsockname6 cgroup/getsockname_unix \ + cgroup/sendmsg4 cgroup/sendmsg6 cgroup/sendmsg_unix \ + cgroup/recvmsg4 cgroup/recvmsg6 cgroup/recvmsg_unix \ cgroup/post_bind4 cgroup/post_bind6 \ cgroup/sysctl cgroup/getsockopt \ cgroup/setsockopt cgroup/sock_release struct_ops \ diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c index ac846b0805b4..af6898c0f388 100644 --- a/tools/bpf/bpftool/cgroup.c +++ b/tools/bpf/bpftool/cgroup.c @@ -28,13 +28,15 @@ " cgroup_device | cgroup_inet4_bind |\n" \ " cgroup_inet6_bind | cgroup_inet4_post_bind |\n" \ " cgroup_inet6_post_bind | cgroup_inet4_connect |\n" \ - " cgroup_inet6_connect | cgroup_inet4_getpeername |\n" \ - " cgroup_inet6_getpeername | cgroup_inet4_getsockname |\n" \ - " cgroup_inet6_getsockname | cgroup_udp4_sendmsg |\n" \ - " cgroup_udp6_sendmsg | cgroup_udp4_recvmsg |\n" \ - " cgroup_udp6_recvmsg | cgroup_sysctl |\n" \ - " cgroup_getsockopt | cgroup_setsockopt |\n" \ - " cgroup_inet_sock_release }" + " cgroup_inet6_connect | cgroup_unix_connect |\n" \ + " cgroup_inet4_getpeername | cgroup_inet6_getpeername |\n" \ + " cgroup_unix_getpeername | cgroup_inet4_getsockname |\n" \ + " cgroup_inet6_getsockname | cgroup_unix_getsockname |\n" \ + " cgroup_udp4_sendmsg | cgroup_udp6_sendmsg |\n" \ + " cgroup_unix_sendmsg | cgroup_udp4_recvmsg |\n" \ + " cgroup_udp6_recvmsg | cgroup_unix_recvmsg |\n" \ + " cgroup_sysctl | cgroup_getsockopt |\n" \ + " cgroup_setsockopt | cgroup_inet_sock_release }" static unsigned int query_flags; static struct btf *btf_vmlinux; diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index 04c47745b3ea..ee3ce2b8000d 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -708,17 +708,22 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h codegen("\ \n\ - skel->%1$s = skel_prep_map_data((void *)\"\\ \n\ - ", ident); + { \n\ + static const char data[] __attribute__((__aligned__(8))) = \"\\\n\ + "); mmap_data = bpf_map__initial_value(map, &mmap_size); print_hex(mmap_data, mmap_size); codegen("\ \n\ - \", %1$zd, %2$zd); \n\ - if (!skel->%3$s) \n\ - goto cleanup; \n\ - skel->maps.%3$s.initial_value = (__u64) (long) skel->%3$s;\n\ - ", bpf_map_mmap_sz(map), mmap_size, ident); + \"; \n\ + \n\ + skel->%1$s = skel_prep_map_data((void *)data, %2$zd,\n\ + sizeof(data) - 1);\n\ + if (!skel->%1$s) \n\ + goto cleanup; \n\ + skel->maps.%1$s.initial_value = (__u64) (long) skel->%1$s;\n\ + } \n\ + ", ident, bpf_map_mmap_sz(map)); } codegen("\ \n\ @@ -733,32 +738,30 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h { \n\ struct bpf_load_and_run_opts opts = {}; \n\ int err; \n\ - \n\ - opts.ctx = (struct bpf_loader_ctx *)skel; \n\ - opts.data_sz = %2$d; \n\ - opts.data = (void *)\"\\ \n\ + static const char opts_data[] __attribute__((__aligned__(8))) = \"\\\n\ ", - obj_name, opts.data_sz); + obj_name); print_hex(opts.data, opts.data_sz); codegen("\ \n\ \"; \n\ + static const char opts_insn[] __attribute__((__aligned__(8))) = \"\\\n\ "); - - codegen("\ - \n\ - opts.insns_sz = %d; \n\ - opts.insns = (void *)\"\\ \n\ - ", - opts.insns_sz); print_hex(opts.insns, opts.insns_sz); codegen("\ \n\ \"; \n\ + \n\ + opts.ctx = (struct bpf_loader_ctx *)skel; \n\ + opts.data_sz = sizeof(opts_data) - 1; \n\ + opts.data = (void *)opts_data; \n\ + opts.insns_sz = sizeof(opts_insn) - 1; \n\ + opts.insns = (void *)opts_insn; \n\ + \n\ err = bpf_load_and_run(&opts); \n\ if (err < 0) \n\ return err; \n\ - ", obj_name); + "); bpf_object__for_each_map(map, obj) { const char *mmap_flags; @@ -1209,7 +1212,7 @@ static int do_skeleton(int argc, char **argv) codegen("\ \n\ \n\ - s->data = %2$s__elf_bytes(&s->data_sz); \n\ + s->data = %1$s__elf_bytes(&s->data_sz); \n\ \n\ obj->skeleton = s; \n\ return 0; \n\ @@ -1218,12 +1221,12 @@ static int do_skeleton(int argc, char **argv) return err; \n\ } \n\ \n\ - static inline const void *%2$s__elf_bytes(size_t *sz) \n\ + static inline const void *%1$s__elf_bytes(size_t *sz) \n\ { \n\ - *sz = %1$d; \n\ - return (const void *)\"\\ \n\ - " - , file_sz, obj_name); + static const char data[] __attribute__((__aligned__(8))) = \"\\\n\ + ", + obj_name + ); /* embed contents of BPF object file */ print_hex(obj_data, file_sz); @@ -1231,6 +1234,9 @@ static int do_skeleton(int argc, char **argv) codegen("\ \n\ \"; \n\ + \n\ + *sz = sizeof(data) - 1; \n\ + return (const void *)data; \n\ } \n\ \n\ #ifdef __cplusplus \n\ diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index 2e5c231e08ac..4b1407b05056 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -265,6 +265,7 @@ show_kprobe_multi_json(struct bpf_link_info *info, json_writer_t *wtr) jsonw_bool_field(json_wtr, "retprobe", info->kprobe_multi.flags & BPF_F_KPROBE_MULTI_RETURN); jsonw_uint_field(json_wtr, "func_cnt", info->kprobe_multi.count); + jsonw_uint_field(json_wtr, "missed", info->kprobe_multi.missed); jsonw_name(json_wtr, "funcs"); jsonw_start_array(json_wtr); addrs = u64_to_ptr(info->kprobe_multi.addrs); @@ -301,6 +302,7 @@ show_perf_event_kprobe_json(struct bpf_link_info *info, json_writer_t *wtr) jsonw_string_field(wtr, "func", u64_to_ptr(info->perf_event.kprobe.func_name)); jsonw_uint_field(wtr, "offset", info->perf_event.kprobe.offset); + jsonw_uint_field(wtr, "missed", info->perf_event.kprobe.missed); } static void @@ -641,6 +643,8 @@ static void show_kprobe_multi_plain(struct bpf_link_info *info) else printf("\n\tkprobe.multi "); printf("func_cnt %u ", info->kprobe_multi.count); + if (info->kprobe_multi.missed) + printf("missed %llu ", info->kprobe_multi.missed); addrs = (__u64 *)u64_to_ptr(info->kprobe_multi.addrs); qsort(addrs, info->kprobe_multi.count, sizeof(__u64), cmp_u64); @@ -683,6 +687,8 @@ static void show_perf_event_kprobe_plain(struct bpf_link_info *info) printf("%s", buf); if (info->perf_event.kprobe.offset) printf("+%#x", info->perf_event.kprobe.offset); + if (info->perf_event.kprobe.missed) + printf(" missed %llu", info->perf_event.kprobe.missed); printf(" "); } diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 8443a149dd17..7ec4f5671e7a 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -2475,9 +2475,10 @@ static int do_help(int argc, char **argv) " sk_reuseport | flow_dissector | cgroup/sysctl |\n" " cgroup/bind4 | cgroup/bind6 | cgroup/post_bind4 |\n" " cgroup/post_bind6 | cgroup/connect4 | cgroup/connect6 |\n" - " cgroup/getpeername4 | cgroup/getpeername6 |\n" - " cgroup/getsockname4 | cgroup/getsockname6 | cgroup/sendmsg4 |\n" - " cgroup/sendmsg6 | cgroup/recvmsg4 | cgroup/recvmsg6 |\n" + " cgroup/connect_unix | cgroup/getpeername4 | cgroup/getpeername6 |\n" + " cgroup/getpeername_unix | cgroup/getsockname4 | cgroup/getsockname6 |\n" + " cgroup/getsockname_unix | cgroup/sendmsg4 | cgroup/sendmsg6 |\n" + " cgroup/sendmsg°unix | cgroup/recvmsg4 | cgroup/recvmsg6 | cgroup/recvmsg_unix |\n" " cgroup/getsockopt | cgroup/setsockopt | cgroup/sock_release |\n" " struct_ops | fentry | fexit | freplace | sk_lookup }\n" " ATTACH_TYPE := { sk_msg_verdict | sk_skb_verdict | sk_skb_stream_verdict |\n" diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 5f13db15a3c7..7ba61b75bc0e 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1047,6 +1047,11 @@ enum bpf_attach_type { BPF_TCX_INGRESS, BPF_TCX_EGRESS, BPF_TRACE_UPROBE_MULTI, + BPF_CGROUP_UNIX_CONNECT, + BPF_CGROUP_UNIX_SENDMSG, + BPF_CGROUP_UNIX_RECVMSG, + BPF_CGROUP_UNIX_GETPEERNAME, + BPF_CGROUP_UNIX_GETSOCKNAME, __MAX_BPF_ATTACH_TYPE }; @@ -2704,8 +2709,8 @@ union bpf_attr { * *bpf_socket* should be one of the following: * * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. - * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT** - * and **BPF_CGROUP_INET6_CONNECT**. + * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**, + * **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**. * * This helper actually implements a subset of **setsockopt()**. * It supports the following *level*\ s: @@ -2943,8 +2948,8 @@ union bpf_attr { * *bpf_socket* should be one of the following: * * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. - * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT** - * and **BPF_CGROUP_INET6_CONNECT**. + * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**, + * **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**. * * This helper actually implements a subset of **getsockopt()**. * It supports the same set of *optname*\ s that is supported by @@ -3264,6 +3269,11 @@ union bpf_attr { * and *params*->smac will not be set as output. A common * use case is to call **bpf_redirect_neigh**\ () after * doing **bpf_fib_lookup**\ (). + * **BPF_FIB_LOOKUP_SRC** + * Derive and set source IP addr in *params*->ipv{4,6}_src + * for the nexthop. If the src addr cannot be derived, + * **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this + * case, *params*->dmac and *params*->smac are not set either. * * *ctx* is either **struct xdp_md** for XDP programs or * **struct sk_buff** tc cls_act programs. @@ -5096,6 +5106,8 @@ union bpf_attr { * **BPF_F_TIMER_ABS** * Start the timer in absolute expire value instead of the * default relative one. + * **BPF_F_TIMER_CPU_PIN** + * Timer will be pinned to the CPU of the caller. * * Return * 0 on success. @@ -6532,6 +6544,7 @@ struct bpf_link_info { __aligned_u64 addrs; __u32 count; /* in/out: kprobe_multi function count */ __u32 flags; + __u64 missed; } kprobe_multi; struct { __u32 type; /* enum bpf_perf_event_type */ @@ -6547,6 +6560,7 @@ struct bpf_link_info { __u32 name_len; __u32 offset; /* offset from func_name */ __u64 addr; + __u64 missed; } kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */ struct { __aligned_u64 tp_name; /* in/out */ @@ -6960,6 +6974,7 @@ enum { BPF_FIB_LOOKUP_OUTPUT = (1U << 1), BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), BPF_FIB_LOOKUP_TBID = (1U << 3), + BPF_FIB_LOOKUP_SRC = (1U << 4), }; enum { @@ -6972,6 +6987,7 @@ enum { BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ + BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */ }; struct bpf_fib_lookup { @@ -7006,6 +7022,9 @@ struct bpf_fib_lookup { __u32 rt_metric; }; + /* input: source address to consider for lookup + * output: source address result from lookup + */ union { __be32 ipv4_src; __u32 ipv6_src[4]; /* in6_addr; network order */ @@ -7307,9 +7326,11 @@ struct bpf_core_relo { * Flags to control bpf_timer_start() behaviour. * - BPF_F_TIMER_ABS: Timeout passed is absolute time, by default it is * relative to current time. + * - BPF_F_TIMER_CPU_PIN: Timer will be pinned to the CPU of the caller. */ enum { BPF_F_TIMER_ABS = (1ULL << 0), + BPF_F_TIMER_CPU_PIN = (1ULL << 1), }; /* BPF numbers iterator state */ diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index 3803479dbe10..1c13f8e88833 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -362,8 +362,6 @@ struct pt_regs___arm64 { #define __PT_PARM7_REG a6 #define __PT_PARM8_REG a7 -/* riscv does not select ARCH_HAS_SYSCALL_WRAPPER. */ -#define PT_REGS_SYSCALL_REGS(ctx) ctx #define __PT_PARM1_SYSCALL_REG __PT_PARM1_REG #define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG #define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG diff --git a/tools/lib/bpf/elf.c b/tools/lib/bpf/elf.c index 9d0296c1726a..2a158e8a8b7c 100644 --- a/tools/lib/bpf/elf.c +++ b/tools/lib/bpf/elf.c @@ -1,5 +1,8 @@ // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif #include <libelf.h> #include <gelf.h> #include <fcntl.h> @@ -10,6 +13,17 @@ #define STRERR_BUFSIZE 128 +/* A SHT_GNU_versym section holds 16-bit words. This bit is set if + * the symbol is hidden and can only be seen when referenced using an + * explicit version number. This is a GNU extension. + */ +#define VERSYM_HIDDEN 0x8000 + +/* This is the mask for the rest of the data in a word read from a + * SHT_GNU_versym section. + */ +#define VERSYM_VERSION 0x7fff + int elf_open(const char *binary_path, struct elf_fd *elf_fd) { char errmsg[STRERR_BUFSIZE]; @@ -64,13 +78,18 @@ struct elf_sym { const char *name; GElf_Sym sym; GElf_Shdr sh; + int ver; + bool hidden; }; struct elf_sym_iter { Elf *elf; Elf_Data *syms; + Elf_Data *versyms; + Elf_Data *verdefs; size_t nr_syms; size_t strtabidx; + size_t verdef_strtabidx; size_t next_sym_idx; struct elf_sym sym; int st_type; @@ -111,6 +130,26 @@ static int elf_sym_iter_new(struct elf_sym_iter *iter, iter->nr_syms = iter->syms->d_size / sh.sh_entsize; iter->elf = elf; iter->st_type = st_type; + + /* Version symbol table is meaningful to dynsym only */ + if (sh_type != SHT_DYNSYM) + return 0; + + scn = elf_find_next_scn_by_type(elf, SHT_GNU_versym, NULL); + if (!scn) + return 0; + iter->versyms = elf_getdata(scn, 0); + + scn = elf_find_next_scn_by_type(elf, SHT_GNU_verdef, NULL); + if (!scn) { + pr_debug("elf: failed to find verdef ELF sections in '%s'\n", binary_path); + return -ENOENT; + } + if (!gelf_getshdr(scn, &sh)) + return -EINVAL; + iter->verdef_strtabidx = sh.sh_link; + iter->verdefs = elf_getdata(scn, 0); + return 0; } @@ -119,6 +158,7 @@ static struct elf_sym *elf_sym_iter_next(struct elf_sym_iter *iter) struct elf_sym *ret = &iter->sym; GElf_Sym *sym = &ret->sym; const char *name = NULL; + GElf_Versym versym; Elf_Scn *sym_scn; size_t idx; @@ -138,12 +178,80 @@ static struct elf_sym *elf_sym_iter_next(struct elf_sym_iter *iter) iter->next_sym_idx = idx + 1; ret->name = name; + ret->ver = 0; + ret->hidden = false; + + if (iter->versyms) { + if (!gelf_getversym(iter->versyms, idx, &versym)) + continue; + ret->ver = versym & VERSYM_VERSION; + ret->hidden = versym & VERSYM_HIDDEN; + } return ret; } return NULL; } +static const char *elf_get_vername(struct elf_sym_iter *iter, int ver) +{ + GElf_Verdaux verdaux; + GElf_Verdef verdef; + int offset; + + offset = 0; + while (gelf_getverdef(iter->verdefs, offset, &verdef)) { + if (verdef.vd_ndx != ver) { + if (!verdef.vd_next) + break; + + offset += verdef.vd_next; + continue; + } + + if (!gelf_getverdaux(iter->verdefs, offset + verdef.vd_aux, &verdaux)) + break; + + return elf_strptr(iter->elf, iter->verdef_strtabidx, verdaux.vda_name); + + } + return NULL; +} + +static bool symbol_match(struct elf_sym_iter *iter, int sh_type, struct elf_sym *sym, + const char *name, size_t name_len, const char *lib_ver) +{ + const char *ver_name; + + /* Symbols are in forms of func, func@LIB_VER or func@@LIB_VER + * make sure the func part matches the user specified name + */ + if (strncmp(sym->name, name, name_len) != 0) + return false; + + /* ...but we don't want a search for "foo" to match 'foo2" also, so any + * additional characters in sname should be of the form "@@LIB". + */ + if (sym->name[name_len] != '\0' && sym->name[name_len] != '@') + return false; + + /* If user does not specify symbol version, then we got a match */ + if (!lib_ver) + return true; + + /* If user specifies symbol version, for dynamic symbols, + * get version name from ELF verdef section for comparison. + */ + if (sh_type == SHT_DYNSYM) { + ver_name = elf_get_vername(iter, sym->ver); + if (!ver_name) + return false; + return strcmp(ver_name, lib_ver) == 0; + } + + /* For normal symbols, it is already in form of func@LIB_VER */ + return strcmp(sym->name, name) == 0; +} /* Transform symbol's virtual address (absolute for binaries and relative * for shared libs) into file offset, which is what kernel is expecting @@ -166,7 +274,8 @@ static unsigned long elf_sym_offset(struct elf_sym *sym) long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name) { int i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB }; - bool is_shared_lib, is_name_qualified; + const char *at_symbol, *lib_ver; + bool is_shared_lib; long ret = -ENOENT; size_t name_len; GElf_Ehdr ehdr; @@ -179,9 +288,18 @@ long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name) /* for shared lib case, we do not need to calculate relative offset */ is_shared_lib = ehdr.e_type == ET_DYN; - name_len = strlen(name); - /* Does name specify "@@LIB"? */ - is_name_qualified = strstr(name, "@@") != NULL; + /* Does name specify "@@LIB_VER" or "@LIB_VER" ? */ + at_symbol = strchr(name, '@'); + if (at_symbol) { + name_len = at_symbol - name; + /* skip second @ if it's @@LIB_VER case */ + if (at_symbol[1] == '@') + at_symbol++; + lib_ver = at_symbol + 1; + } else { + name_len = strlen(name); + lib_ver = NULL; + } /* Search SHT_DYNSYM, SHT_SYMTAB for symbol. This search order is used because if * a binary is stripped, it may only have SHT_DYNSYM, and a fully-statically @@ -201,20 +319,17 @@ long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name) goto out; while ((sym = elf_sym_iter_next(&iter))) { - /* User can specify func, func@@LIB or func@@LIB_VERSION. */ - if (strncmp(sym->name, name, name_len) != 0) - continue; - /* ...but we don't want a search for "foo" to match 'foo2" also, so any - * additional characters in sname should be of the form "@@LIB". - */ - if (!is_name_qualified && sym->name[name_len] != '\0' && sym->name[name_len] != '@') + if (!symbol_match(&iter, sh_types[i], sym, name, name_len, lib_ver)) continue; cur_bind = GELF_ST_BIND(sym->sym.st_info); if (ret > 0) { /* handle multiple matches */ - if (last_bind != STB_WEAK && cur_bind != STB_WEAK) { + if (elf_sym_offset(sym) == ret) { + /* same offset, no problem */ + continue; + } else if (last_bind != STB_WEAK && cur_bind != STB_WEAK) { /* Only accept one non-weak bind. */ pr_warn("elf: ambiguous match for '%s', '%s' in '%s'\n", sym->name, name, binary_path); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 3a6108e3238b..a295f6acf98f 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -82,17 +82,22 @@ static const char * const attach_type_name[] = { [BPF_CGROUP_INET6_BIND] = "cgroup_inet6_bind", [BPF_CGROUP_INET4_CONNECT] = "cgroup_inet4_connect", [BPF_CGROUP_INET6_CONNECT] = "cgroup_inet6_connect", + [BPF_CGROUP_UNIX_CONNECT] = "cgroup_unix_connect", [BPF_CGROUP_INET4_POST_BIND] = "cgroup_inet4_post_bind", [BPF_CGROUP_INET6_POST_BIND] = "cgroup_inet6_post_bind", [BPF_CGROUP_INET4_GETPEERNAME] = "cgroup_inet4_getpeername", [BPF_CGROUP_INET6_GETPEERNAME] = "cgroup_inet6_getpeername", + [BPF_CGROUP_UNIX_GETPEERNAME] = "cgroup_unix_getpeername", [BPF_CGROUP_INET4_GETSOCKNAME] = "cgroup_inet4_getsockname", [BPF_CGROUP_INET6_GETSOCKNAME] = "cgroup_inet6_getsockname", + [BPF_CGROUP_UNIX_GETSOCKNAME] = "cgroup_unix_getsockname", [BPF_CGROUP_UDP4_SENDMSG] = "cgroup_udp4_sendmsg", [BPF_CGROUP_UDP6_SENDMSG] = "cgroup_udp6_sendmsg", + [BPF_CGROUP_UNIX_SENDMSG] = "cgroup_unix_sendmsg", [BPF_CGROUP_SYSCTL] = "cgroup_sysctl", [BPF_CGROUP_UDP4_RECVMSG] = "cgroup_udp4_recvmsg", [BPF_CGROUP_UDP6_RECVMSG] = "cgroup_udp6_recvmsg", + [BPF_CGROUP_UNIX_RECVMSG] = "cgroup_unix_recvmsg", [BPF_CGROUP_GETSOCKOPT] = "cgroup_getsockopt", [BPF_CGROUP_SETSOCKOPT] = "cgroup_setsockopt", [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser", @@ -8960,14 +8965,19 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("cgroup/bind6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE), SEC_DEF("cgroup/connect4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE), SEC_DEF("cgroup/connect6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE), + SEC_DEF("cgroup/connect_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_CONNECT, SEC_ATTACHABLE), SEC_DEF("cgroup/sendmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE), SEC_DEF("cgroup/sendmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE), + SEC_DEF("cgroup/sendmsg_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_SENDMSG, SEC_ATTACHABLE), SEC_DEF("cgroup/recvmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE), SEC_DEF("cgroup/recvmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE), + SEC_DEF("cgroup/recvmsg_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_RECVMSG, SEC_ATTACHABLE), SEC_DEF("cgroup/getpeername4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE), SEC_DEF("cgroup/getpeername6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE), + SEC_DEF("cgroup/getpeername_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETPEERNAME, SEC_ATTACHABLE), SEC_DEF("cgroup/getsockname4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE), SEC_DEF("cgroup/getsockname6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE), + SEC_DEF("cgroup/getsockname_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETSOCKNAME, SEC_ATTACHABLE), SEC_DEF("cgroup/sysctl", CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE), SEC_DEF("cgroup/getsockopt", CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE), SEC_DEF("cgroup/setsockopt", CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE), @@ -11114,7 +11124,7 @@ static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, stru *link = NULL; - n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%ms", + n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]", &probe_type, &binary_path, &func_name); switch (n) { case 1: @@ -11624,14 +11634,14 @@ err_out: static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link) { DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts); - char *probe_type = NULL, *binary_path = NULL, *func_name = NULL; - int n, ret = -EINVAL; + char *probe_type = NULL, *binary_path = NULL, *func_name = NULL, *func_off; + int n, c, ret = -EINVAL; long offset = 0; *link = NULL; - n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[a-zA-Z0-9_.]+%li", - &probe_type, &binary_path, &func_name, &offset); + n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]", + &probe_type, &binary_path, &func_name); switch (n) { case 1: /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */ @@ -11642,7 +11652,17 @@ static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf prog->name, prog->sec_name); break; case 3: - case 4: + /* check if user specifies `+offset`, if yes, this should be + * the last part of the string, make sure sscanf read to EOL + */ + func_off = strrchr(func_name, '+'); + if (func_off) { + n = sscanf(func_off, "+%li%n", &offset, &c); + if (n == 1 && *(func_off + c) == '\0') + func_off[0] = '\0'; + else + offset = 0; + } opts.retprobe = strcmp(probe_type, "uretprobe") == 0 || strcmp(probe_type, "uretprobe.s") == 0; if (opts.retprobe && offset != 0) { diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 0e52621cba43..475378438545 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -1229,6 +1229,7 @@ LIBBPF_API int bpf_tc_query(const struct bpf_tc_hook *hook, /* Ring buffer APIs */ struct ring_buffer; +struct ring; struct user_ring_buffer; typedef int (*ring_buffer_sample_fn)(void *ctx, void *data, size_t size); @@ -1249,6 +1250,78 @@ LIBBPF_API int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms); LIBBPF_API int ring_buffer__consume(struct ring_buffer *rb); LIBBPF_API int ring_buffer__epoll_fd(const struct ring_buffer *rb); +/** + * @brief **ring_buffer__ring()** returns the ringbuffer object inside a given + * ringbuffer manager representing a single BPF_MAP_TYPE_RINGBUF map instance. + * + * @param rb A ringbuffer manager object. + * @param idx An index into the ringbuffers contained within the ringbuffer + * manager object. The index is 0-based and corresponds to the order in which + * ring_buffer__add was called. + * @return A ringbuffer object on success; NULL and errno set if the index is + * invalid. + */ +LIBBPF_API struct ring *ring_buffer__ring(struct ring_buffer *rb, + unsigned int idx); + +/** + * @brief **ring__consumer_pos()** returns the current consumer position in the + * given ringbuffer. + * + * @param r A ringbuffer object. + * @return The current consumer position. + */ +LIBBPF_API unsigned long ring__consumer_pos(const struct ring *r); + +/** + * @brief **ring__producer_pos()** returns the current producer position in the + * given ringbuffer. + * + * @param r A ringbuffer object. + * @return The current producer position. + */ +LIBBPF_API unsigned long ring__producer_pos(const struct ring *r); + +/** + * @brief **ring__avail_data_size()** returns the number of bytes in the + * ringbuffer not yet consumed. This has no locking associated with it, so it + * can be inaccurate if operations are ongoing while this is called. However, it + * should still show the correct trend over the long-term. + * + * @param r A ringbuffer object. + * @return The number of bytes not yet consumed. + */ +LIBBPF_API size_t ring__avail_data_size(const struct ring *r); + +/** + * @brief **ring__size()** returns the total size of the ringbuffer's map data + * area (excluding special producer/consumer pages). Effectively this gives the + * amount of usable bytes of data inside the ringbuffer. + * + * @param r A ringbuffer object. + * @return The total size of the ringbuffer map data area. + */ +LIBBPF_API size_t ring__size(const struct ring *r); + +/** + * @brief **ring__map_fd()** returns the file descriptor underlying the given + * ringbuffer. + * + * @param r A ringbuffer object. + * @return The underlying ringbuffer file descriptor + */ +LIBBPF_API int ring__map_fd(const struct ring *r); + +/** + * @brief **ring__consume()** consumes available ringbuffer data without event + * polling. + * + * @param r A ringbuffer object. + * @return The number of records consumed (or INT_MAX, whichever is less), or + * a negative number if any of the callbacks return an error. + */ +LIBBPF_API int ring__consume(struct ring *r); + struct user_ring_buffer_opts { size_t sz; /* size of this struct, for forward/backward compatibility */ }; diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 57712321490f..cc973b678a39 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -400,4 +400,11 @@ LIBBPF_1.3.0 { bpf_program__attach_netfilter; bpf_program__attach_tcx; bpf_program__attach_uprobe_multi; + ring__avail_data_size; + ring__consume; + ring__consumer_pos; + ring__map_fd; + ring__producer_pos; + ring__size; + ring_buffer__ring; } LIBBPF_1.2.0; diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c index 02199364db13..aacb64278a01 100644 --- a/tools/lib/bpf/ringbuf.c +++ b/tools/lib/bpf/ringbuf.c @@ -34,7 +34,7 @@ struct ring { struct ring_buffer { struct epoll_event *events; - struct ring *rings; + struct ring **rings; size_t page_size; int epoll_fd; int ring_cnt; @@ -57,7 +57,7 @@ struct ringbuf_hdr { __u32 pad; }; -static void ringbuf_unmap_ring(struct ring_buffer *rb, struct ring *r) +static void ringbuf_free_ring(struct ring_buffer *rb, struct ring *r) { if (r->consumer_pos) { munmap(r->consumer_pos, rb->page_size); @@ -67,6 +67,8 @@ static void ringbuf_unmap_ring(struct ring_buffer *rb, struct ring *r) munmap(r->producer_pos, rb->page_size + 2 * (r->mask + 1)); r->producer_pos = NULL; } + + free(r); } /* Add extra RINGBUF maps to this ring buffer manager */ @@ -107,8 +109,10 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, return libbpf_err(-ENOMEM); rb->events = tmp; - r = &rb->rings[rb->ring_cnt]; - memset(r, 0, sizeof(*r)); + r = calloc(1, sizeof(*r)); + if (!r) + return libbpf_err(-ENOMEM); + rb->rings[rb->ring_cnt] = r; r->map_fd = map_fd; r->sample_cb = sample_cb; @@ -121,7 +125,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, err = -errno; pr_warn("ringbuf: failed to mmap consumer page for map fd=%d: %d\n", map_fd, err); - return libbpf_err(err); + goto err_out; } r->consumer_pos = tmp; @@ -131,16 +135,16 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, */ mmap_sz = rb->page_size + 2 * (__u64)info.max_entries; if (mmap_sz != (__u64)(size_t)mmap_sz) { + err = -E2BIG; pr_warn("ringbuf: ring buffer size (%u) is too big\n", info.max_entries); - return libbpf_err(-E2BIG); + goto err_out; } tmp = mmap(NULL, (size_t)mmap_sz, PROT_READ, MAP_SHARED, map_fd, rb->page_size); if (tmp == MAP_FAILED) { err = -errno; - ringbuf_unmap_ring(rb, r); pr_warn("ringbuf: failed to mmap data pages for map fd=%d: %d\n", map_fd, err); - return libbpf_err(err); + goto err_out; } r->producer_pos = tmp; r->data = tmp + rb->page_size; @@ -152,14 +156,17 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, e->data.fd = rb->ring_cnt; if (epoll_ctl(rb->epoll_fd, EPOLL_CTL_ADD, map_fd, e) < 0) { err = -errno; - ringbuf_unmap_ring(rb, r); pr_warn("ringbuf: failed to epoll add map fd=%d: %d\n", map_fd, err); - return libbpf_err(err); + goto err_out; } rb->ring_cnt++; return 0; + +err_out: + ringbuf_free_ring(rb, r); + return libbpf_err(err); } void ring_buffer__free(struct ring_buffer *rb) @@ -170,7 +177,7 @@ void ring_buffer__free(struct ring_buffer *rb) return; for (i = 0; i < rb->ring_cnt; ++i) - ringbuf_unmap_ring(rb, &rb->rings[i]); + ringbuf_free_ring(rb, rb->rings[i]); if (rb->epoll_fd >= 0) close(rb->epoll_fd); @@ -278,7 +285,7 @@ int ring_buffer__consume(struct ring_buffer *rb) int i; for (i = 0; i < rb->ring_cnt; i++) { - struct ring *ring = &rb->rings[i]; + struct ring *ring = rb->rings[i]; err = ringbuf_process_ring(ring); if (err < 0) @@ -305,7 +312,7 @@ int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms) for (i = 0; i < cnt; i++) { __u32 ring_id = rb->events[i].data.fd; - struct ring *ring = &rb->rings[ring_id]; + struct ring *ring = rb->rings[ring_id]; err = ringbuf_process_ring(ring); if (err < 0) @@ -323,6 +330,58 @@ int ring_buffer__epoll_fd(const struct ring_buffer *rb) return rb->epoll_fd; } +struct ring *ring_buffer__ring(struct ring_buffer *rb, unsigned int idx) +{ + if (idx >= rb->ring_cnt) + return errno = ERANGE, NULL; + + return rb->rings[idx]; +} + +unsigned long ring__consumer_pos(const struct ring *r) +{ + /* Synchronizes with smp_store_release() in ringbuf_process_ring(). */ + return smp_load_acquire(r->consumer_pos); +} + +unsigned long ring__producer_pos(const struct ring *r) +{ + /* Synchronizes with smp_store_release() in __bpf_ringbuf_reserve() in + * the kernel. + */ + return smp_load_acquire(r->producer_pos); +} + +size_t ring__avail_data_size(const struct ring *r) +{ + unsigned long cons_pos, prod_pos; + + cons_pos = ring__consumer_pos(r); + prod_pos = ring__producer_pos(r); + return prod_pos - cons_pos; +} + +size_t ring__size(const struct ring *r) +{ + return r->mask + 1; +} + +int ring__map_fd(const struct ring *r) +{ + return r->map_fd; +} + +int ring__consume(struct ring *r) +{ + int64_t res; + + res = ringbuf_process_ring(r); + if (res < 0) + return libbpf_err(res); + + return res > INT_MAX ? INT_MAX : res; +} + static void user_ringbuf_unmap_ring(struct user_ring_buffer *rb) { if (rb->consumer_pos) { diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64 index 1e827c24761c..5c2cc7e8c5d0 100644 --- a/tools/testing/selftests/bpf/DENYLIST.aarch64 +++ b/tools/testing/selftests/bpf/DENYLIST.aarch64 @@ -10,3 +10,4 @@ fexit_test/fexit_many_args # fexit_many_args:FAIL:fexit_ma fill_link_info/kprobe_multi_link_info # bpf_program__attach_kprobe_multi_opts unexpected error: -95 fill_link_info/kretprobe_multi_link_info # bpf_program__attach_kprobe_multi_opts unexpected error: -95 fill_link_info/kprobe_multi_invalid_ubuff # bpf_program__attach_kprobe_multi_opts unexpected error: -95 +missed/kprobe_recursion # missed_kprobe_recursion__attach unexpected error: -95 (errno 95) diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x index ce6f291665cf..1a63996c0304 100644 --- a/tools/testing/selftests/bpf/DENYLIST.s390x +++ b/tools/testing/selftests/bpf/DENYLIST.s390x @@ -1,30 +1,5 @@ # TEMPORARY # Alphabetical order -bloom_filter_map # failed to find kernel BTF type ID of '__x64_sys_getpgid': -3 (?) -bpf_cookie # failed to open_and_load program: -524 (trampoline) -bpf_loop # attaches to __x64_sys_nanosleep -cgrp_local_storage # prog_attach unexpected error: -524 (trampoline) -dynptr/test_dynptr_skb_data -dynptr/test_skb_readonly exceptions # JIT does not support calling kfunc bpf_throw (exceptions) -fexit_sleep # fexit_skel_load fexit skeleton failed (trampoline) get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace) -iters/testmod_seq* # s390x doesn't support kfuncs in modules yet -kprobe_multi_bench_attach # bpf_program__attach_kprobe_multi_opts unexpected error: -95 -kprobe_multi_test # relies on fentry -ksyms_btf/weak_ksyms* # test_ksyms_weak__open_and_load unexpected error: -22 (kfunc) -ksyms_module # test_ksyms_module__open_and_load unexpected error: -9 (?) -ksyms_module_libbpf # JIT does not support calling kernel function (kfunc) -ksyms_module_lskel # test_ksyms_module_lskel__open_and_load unexpected error: -9 (?) -module_attach # skel_attach skeleton attach failed: -524 (trampoline) -ringbuf # skel_load skeleton load failed (?) stacktrace_build_id # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2 (?) -test_lsm # attach unexpected error: -524 (trampoline) -trace_printk # trace_printk__load unexpected error: -2 (errno 2) (?) -trace_vprintk # trace_vprintk__open_and_load unexpected error: -9 (?) -unpriv_bpf_disabled # fentry -user_ringbuf # failed to find kernel BTF type ID of '__s390x_sys_prctl': -3 (?) -verif_stats # trace_vprintk__open_and_load unexpected error: -9 (?) -xdp_bonding # failed to auto-attach program 'trace_on_entry': -524 (trampoline) -xdp_metadata # JIT does not support calling kernel function (kfunc) -test_task_under_cgroup # JIT does not support calling kernel function (kfunc) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index caede9b574cb..4225f975fce3 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -27,7 +27,11 @@ endif BPF_GCC ?= $(shell command -v bpf-gcc;) SAN_CFLAGS ?= SAN_LDFLAGS ?= $(SAN_CFLAGS) -CFLAGS += -g -O0 -rdynamic -Wall -Werror $(GENFLAGS) $(SAN_CFLAGS) \ +RELEASE ?= +OPT_FLAGS ?= $(if $(RELEASE),-O2,-O0) +CFLAGS += -g $(OPT_FLAGS) -rdynamic \ + -Wall -Werror \ + $(GENFLAGS) $(SAN_CFLAGS) \ -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \ -I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT) LDFLAGS += $(SAN_LDFLAGS) @@ -104,7 +108,7 @@ TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \ xskxceiver xdp_redirect_multi xdp_synproxy veristat xdp_hw_metadata \ xdp_features -TEST_GEN_FILES += liburandom_read.so urandom_read sign-file +TEST_GEN_FILES += liburandom_read.so urandom_read sign-file uprobe_multi # Emit succinct information message describing current building step # $1 - generic step name (e.g., CC, LINK, etc); @@ -188,7 +192,7 @@ $(OUTPUT)/%:%.c $(Q)$(LINK.c) $^ $(LDLIBS) -o $@ # LLVM's ld.lld doesn't support all the architectures, so use it only on x86 -ifeq ($(SRCARCH),x86) +ifeq ($(SRCARCH),$(filter $(SRCARCH),x86 riscv)) LLD := lld else LLD := ld @@ -196,17 +200,20 @@ endif # Filter out -static for liburandom_read.so and its dependent targets so that static builds # do not fail. Static builds leave urandom_read relying on system-wide shared libraries. -$(OUTPUT)/liburandom_read.so: urandom_read_lib1.c urandom_read_lib2.c +$(OUTPUT)/liburandom_read.so: urandom_read_lib1.c urandom_read_lib2.c liburandom_read.map $(call msg,LIB,,$@) - $(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) \ - $^ $(filter-out -static,$(LDLIBS)) \ + $(Q)$(CLANG) $(CLANG_TARGET_ARCH) \ + $(filter-out -static,$(CFLAGS) $(LDFLAGS)) \ + $(filter %.c,$^) $(filter-out -static,$(LDLIBS)) \ -fuse-ld=$(LLD) -Wl,-znoseparate-code -Wl,--build-id=sha1 \ + -Wl,--version-script=liburandom_read.map \ -fPIC -shared -o $@ $(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_read.so $(call msg,BINARY,,$@) - $(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $(filter %.c,$^) \ - -lurandom_read $(filter-out -static,$(LDLIBS)) -L$(OUTPUT) \ + $(Q)$(CLANG) $(CLANG_TARGET_ARCH) \ + $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $(filter %.c,$^) \ + -lurandom_read $(filter-out -static,$(LDLIBS)) -L$(OUTPUT) \ -fuse-ld=$(LLD) -Wl,-znoseparate-code -Wl,--build-id=sha1 \ -Wl,-rpath=. -o $@ @@ -238,7 +245,7 @@ $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(RUNQSLOWER_OUTPUT) BPFTOOL_OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \ BPFOBJ_OUTPUT=$(BUILD_DIR)/libbpf \ BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) \ - EXTRA_CFLAGS='-g -O0 $(SAN_CFLAGS)' \ + EXTRA_CFLAGS='-g $(OPT_FLAGS) $(SAN_CFLAGS)' \ EXTRA_LDFLAGS='$(SAN_LDFLAGS)' && \ cp $(RUNQSLOWER_OUTPUT)runqslower $@ @@ -276,7 +283,7 @@ $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/bpftool $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \ ARCH= CROSS_COMPILE= CC="$(HOSTCC)" LD="$(HOSTLD)" \ - EXTRA_CFLAGS='-g -O0' \ + EXTRA_CFLAGS='-g $(OPT_FLAGS)' \ OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \ LIBBPF_OUTPUT=$(HOST_BUILD_DIR)/libbpf/ \ LIBBPF_DESTDIR=$(HOST_SCRATCH_DIR)/ \ @@ -287,7 +294,7 @@ $(CROSS_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ $(BPFOBJ) | $(BUILD_DIR)/bpftool $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \ ARCH=$(ARCH) CROSS_COMPILE=$(CROSS_COMPILE) \ - EXTRA_CFLAGS='-g -O0' \ + EXTRA_CFLAGS='-g $(OPT_FLAGS)' \ OUTPUT=$(BUILD_DIR)/bpftool/ \ LIBBPF_OUTPUT=$(BUILD_DIR)/libbpf/ \ LIBBPF_DESTDIR=$(SCRATCH_DIR)/ \ @@ -310,7 +317,7 @@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ $(APIDIR)/linux/bpf.h \ | $(BUILD_DIR)/libbpf $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \ - EXTRA_CFLAGS='-g -O0 $(SAN_CFLAGS)' \ + EXTRA_CFLAGS='-g $(OPT_FLAGS) $(SAN_CFLAGS)' \ EXTRA_LDFLAGS='$(SAN_LDFLAGS)' \ DESTDIR=$(SCRATCH_DIR) prefix= all install_headers @@ -319,7 +326,7 @@ $(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ $(APIDIR)/linux/bpf.h \ | $(HOST_BUILD_DIR)/libbpf $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) \ - EXTRA_CFLAGS='-g -O0' ARCH= CROSS_COMPILE= \ + EXTRA_CFLAGS='-g $(OPT_FLAGS)' ARCH= CROSS_COMPILE= \ OUTPUT=$(HOST_BUILD_DIR)/libbpf/ \ CC="$(HOSTCC)" LD="$(HOSTLD)" \ DESTDIR=$(HOST_SCRATCH_DIR)/ prefix= all install_headers @@ -640,7 +647,9 @@ $(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT) $(call msg,BINARY,,$@) $(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@ -$(OUTPUT)/xskxceiver: xskxceiver.c xskxceiver.h $(OUTPUT)/xsk.o $(OUTPUT)/xsk_xdp_progs.skel.h $(BPFOBJ) | $(OUTPUT) +# Include find_bit.c to compile xskxceiver. +EXTRA_SRC := $(TOOLSDIR)/lib/find_bit.c +$(OUTPUT)/xskxceiver: $(EXTRA_SRC) xskxceiver.c xskxceiver.h $(OUTPUT)/xsk.o $(OUTPUT)/xsk_xdp_progs.skel.h $(BPFOBJ) | $(OUTPUT) $(call msg,BINARY,,$@) $(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@ diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h index 9aa29564bd74..2c8cb3f61529 100644 --- a/tools/testing/selftests/bpf/bpf_experimental.h +++ b/tools/testing/selftests/bpf/bpf_experimental.h @@ -159,6 +159,14 @@ extern void *bpf_percpu_obj_new_impl(__u64 local_type_id, void *meta) __ksym; */ extern void bpf_percpu_obj_drop_impl(void *kptr, void *meta) __ksym; +struct bpf_iter_task_vma; + +extern int bpf_iter_task_vma_new(struct bpf_iter_task_vma *it, + struct task_struct *task, + unsigned long addr) __ksym; +extern struct vm_area_struct *bpf_iter_task_vma_next(struct bpf_iter_task_vma *it) __ksym; +extern void bpf_iter_task_vma_destroy(struct bpf_iter_task_vma *it) __ksym; + /* Convenience macro to wrap over bpf_obj_drop_impl */ #define bpf_percpu_obj_drop(kptr) bpf_percpu_obj_drop_impl(kptr, NULL) diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h index 642dda0e758a..5ca68ff0b59f 100644 --- a/tools/testing/selftests/bpf/bpf_kfuncs.h +++ b/tools/testing/selftests/bpf/bpf_kfuncs.h @@ -1,6 +1,8 @@ #ifndef __BPF_KFUNCS__ #define __BPF_KFUNCS__ +struct bpf_sock_addr_kern; + /* Description * Initializes an skb-type dynptr * Returns @@ -41,4 +43,16 @@ extern bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *ptr) __ksym; extern __u32 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym; extern int bpf_dynptr_clone(const struct bpf_dynptr *ptr, struct bpf_dynptr *clone__init) __ksym; +/* Description + * Modify the address of a AF_UNIX sockaddr. + * Returns__bpf_kfunc + * -EINVAL if the address size is too big or, 0 if the sockaddr was successfully modified. + */ +extern int bpf_sock_addr_set_sun_path(struct bpf_sock_addr_kern *sa_kern, + const __u8 *sun_path, __u32 sun_path__sz) __ksym; + +void *bpf_cast_to_kern_ctx(void *) __ksym; + +void *bpf_rdonly_cast(void *obj, __u32 btf_id) __ksym; + #endif diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index cefc5dd72573..a5e246f7b202 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -138,6 +138,10 @@ __bpf_kfunc void bpf_iter_testmod_seq_destroy(struct bpf_iter_testmod_seq *it) it->cnt = 0; } +__bpf_kfunc void bpf_kfunc_common_test(void) +{ +} + struct bpf_testmod_btf_type_tag_1 { int a; }; @@ -343,6 +347,7 @@ BTF_SET8_START(bpf_testmod_common_kfunc_ids) BTF_ID_FLAGS(func, bpf_iter_testmod_seq_new, KF_ITER_NEW) BTF_ID_FLAGS(func, bpf_iter_testmod_seq_next, KF_ITER_NEXT | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_testmod_seq_destroy, KF_ITER_DESTROY) +BTF_ID_FLAGS(func, bpf_kfunc_common_test) BTF_SET8_END(bpf_testmod_common_kfunc_ids) static const struct btf_kfunc_id_set bpf_testmod_common_kfunc_set = { diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h index f5c5b1375c24..7c664dd61059 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h @@ -104,4 +104,6 @@ void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p); void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p); void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p); void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len); + +void bpf_kfunc_common_test(void) __ksym; #endif /* _BPF_TESTMOD_KFUNC_H */ diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index 2caee8423ee0..5b1da2a32ea7 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -49,6 +49,10 @@ snprintf(buf, sizeof(buf), "%s%s", NETCLS_MOUNT_PATH, \ CGROUP_WORK_DIR) +static __thread bool cgroup_workdir_mounted; + +static void __cleanup_cgroup_environment(void); + static int __enable_controllers(const char *cgroup_path, const char *controllers) { char path[PATH_MAX + 1]; @@ -195,6 +199,11 @@ int setup_cgroup_environment(void) format_cgroup_path(cgroup_workdir, ""); + if (mkdir(CGROUP_MOUNT_PATH, 0777) && errno != EEXIST) { + log_err("mkdir mount"); + return 1; + } + if (unshare(CLONE_NEWNS)) { log_err("unshare"); return 1; @@ -209,9 +218,10 @@ int setup_cgroup_environment(void) log_err("mount cgroup2"); return 1; } + cgroup_workdir_mounted = true; /* Cleanup existing failed runs, now that the environment is setup */ - cleanup_cgroup_environment(); + __cleanup_cgroup_environment(); if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) { log_err("mkdir cgroup work dir"); @@ -306,10 +316,25 @@ int join_parent_cgroup(const char *relative_path) } /** + * __cleanup_cgroup_environment() - Delete temporary cgroups + * + * This is a helper for cleanup_cgroup_environment() that is responsible for + * deletion of all temporary cgroups that have been created during the test. + */ +static void __cleanup_cgroup_environment(void) +{ + char cgroup_workdir[PATH_MAX + 1]; + + format_cgroup_path(cgroup_workdir, ""); + join_cgroup_from_top(CGROUP_MOUNT_PATH); + nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT); +} + +/** * cleanup_cgroup_environment() - Cleanup Cgroup Testing Environment * * This is an idempotent function to delete all temporary cgroups that - * have been created during the test, including the cgroup testing work + * have been created during the test and unmount the cgroup testing work * directory. * * At call time, it moves the calling process to the root cgroup, and then @@ -320,11 +345,10 @@ int join_parent_cgroup(const char *relative_path) */ void cleanup_cgroup_environment(void) { - char cgroup_workdir[PATH_MAX + 1]; - - format_cgroup_path(cgroup_workdir, ""); - join_cgroup_from_top(CGROUP_MOUNT_PATH); - nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT); + __cleanup_cgroup_environment(); + if (cgroup_workdir_mounted && umount(CGROUP_MOUNT_PATH)) + log_err("umount cgroup2"); + cgroup_workdir_mounted = false; } /** diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index e41eb33b2704..02dd4409200e 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -84,3 +84,4 @@ CONFIG_USERFAULTFD=y CONFIG_VXLAN=y CONFIG_XDP_SOCKETS=y CONFIG_XFRM_INTERFACE=y +CONFIG_VSOCKETS=y diff --git a/tools/testing/selftests/bpf/liburandom_read.map b/tools/testing/selftests/bpf/liburandom_read.map new file mode 100644 index 000000000000..38a97a419a04 --- /dev/null +++ b/tools/testing/selftests/bpf/liburandom_read.map @@ -0,0 +1,15 @@ +LIBURANDOM_READ_1.0.0 { + global: + urandlib_api; + urandlib_api_sameoffset; + urandlib_read_without_sema; + urandlib_read_with_sema; + urandlib_read_with_sema_semaphore; + local: + *; +}; + +LIBURANDOM_READ_2.0.0 { + global: + urandlib_api; +} LIBURANDOM_READ_1.0.0; diff --git a/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c index 16f1671e4bde..66191ae9863c 100644 --- a/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c +++ b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c @@ -33,11 +33,11 @@ static void create_inner_maps(enum bpf_map_type map_type, { int map_fd, map_index, ret; __u32 map_key = 0, map_id; - char map_name[15]; + char map_name[16]; for (map_index = 0; map_index < OUTER_MAP_ENTRIES; map_index++) { memset(map_name, 0, sizeof(map_name)); - sprintf(map_name, "inner_map_fd_%d", map_index); + snprintf(map_name, sizeof(map_name), "inner_map_fd_%d", map_index); map_fd = bpf_map_create(map_type, map_name, sizeof(__u32), sizeof(__u32), 1, NULL); CHECK(map_fd < 0, diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index da72a3a66230..6db27a9088e9 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -11,6 +11,7 @@ #include <arpa/inet.h> #include <sys/mount.h> #include <sys/stat.h> +#include <sys/un.h> #include <linux/err.h> #include <linux/in.h> @@ -257,6 +258,26 @@ static int connect_fd_to_addr(int fd, return 0; } +int connect_to_addr(const struct sockaddr_storage *addr, socklen_t addrlen, int type) +{ + int fd; + + fd = socket(addr->ss_family, type, 0); + if (fd < 0) { + log_err("Failed to create client socket"); + return -1; + } + + if (connect_fd_to_addr(fd, addr, addrlen, false)) + goto error_close; + + return fd; + +error_close: + save_errno_close(fd); + return -1; +} + static const struct network_helper_opts default_opts; int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts) @@ -380,6 +401,19 @@ int make_sockaddr(int family, const char *addr_str, __u16 port, if (len) *len = sizeof(*sin6); return 0; + } else if (family == AF_UNIX) { + /* Note that we always use abstract unix sockets to avoid having + * to clean up leftover files. + */ + struct sockaddr_un *sun = (void *)addr; + + memset(addr, 0, sizeof(*sun)); + sun->sun_family = family; + sun->sun_path[0] = 0; + strcpy(sun->sun_path + 1, addr_str); + if (len) + *len = offsetof(struct sockaddr_un, sun_path) + 1 + strlen(addr_str); + return 0; } return -1; } diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index 5eccc67d1a99..34f1200a781b 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -51,6 +51,7 @@ int *start_reuseport_server(int family, int type, const char *addr_str, __u16 port, int timeout_ms, unsigned int nr_listens); void free_fds(int *fds, unsigned int nr_close_fds); +int connect_to_addr(const struct sockaddr_storage *addr, socklen_t len, int type); int connect_to_fd(int server_fd, int timeout_ms); int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts); int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms); diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c index b92770592563..465c1c3a3d3c 100644 --- a/tools/testing/selftests/bpf/prog_tests/align.c +++ b/tools/testing/selftests/bpf/prog_tests/align.c @@ -6,6 +6,7 @@ struct bpf_reg_match { unsigned int line; + const char *reg; const char *match; }; @@ -39,13 +40,13 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {0, "R1=ctx(off=0,imm=0)"}, - {0, "R10=fp0"}, - {0, "R3_w=2"}, - {1, "R3_w=4"}, - {2, "R3_w=8"}, - {3, "R3_w=16"}, - {4, "R3_w=32"}, + {0, "R1", "ctx(off=0,imm=0)"}, + {0, "R10", "fp0"}, + {0, "R3_w", "2"}, + {1, "R3_w", "4"}, + {2, "R3_w", "8"}, + {3, "R3_w", "16"}, + {4, "R3_w", "32"}, }, }, { @@ -67,19 +68,19 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {0, "R1=ctx(off=0,imm=0)"}, - {0, "R10=fp0"}, - {0, "R3_w=1"}, - {1, "R3_w=2"}, - {2, "R3_w=4"}, - {3, "R3_w=8"}, - {4, "R3_w=16"}, - {5, "R3_w=1"}, - {6, "R4_w=32"}, - {7, "R4_w=16"}, - {8, "R4_w=8"}, - {9, "R4_w=4"}, - {10, "R4_w=2"}, + {0, "R1", "ctx(off=0,imm=0)"}, + {0, "R10", "fp0"}, + {0, "R3_w", "1"}, + {1, "R3_w", "2"}, + {2, "R3_w", "4"}, + {3, "R3_w", "8"}, + {4, "R3_w", "16"}, + {5, "R3_w", "1"}, + {6, "R4_w", "32"}, + {7, "R4_w", "16"}, + {8, "R4_w", "8"}, + {9, "R4_w", "4"}, + {10, "R4_w", "2"}, }, }, { @@ -96,14 +97,14 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {0, "R1=ctx(off=0,imm=0)"}, - {0, "R10=fp0"}, - {0, "R3_w=4"}, - {1, "R3_w=8"}, - {2, "R3_w=10"}, - {3, "R4_w=8"}, - {4, "R4_w=12"}, - {5, "R4_w=14"}, + {0, "R1", "ctx(off=0,imm=0)"}, + {0, "R10", "fp0"}, + {0, "R3_w", "4"}, + {1, "R3_w", "8"}, + {2, "R3_w", "10"}, + {3, "R4_w", "8"}, + {4, "R4_w", "12"}, + {5, "R4_w", "14"}, }, }, { @@ -118,12 +119,12 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {0, "R1=ctx(off=0,imm=0)"}, - {0, "R10=fp0"}, - {0, "R3_w=7"}, - {1, "R3_w=7"}, - {2, "R3_w=14"}, - {3, "R3_w=56"}, + {0, "R1", "ctx(off=0,imm=0)"}, + {0, "R10", "fp0"}, + {0, "R3_w", "7"}, + {1, "R3_w", "7"}, + {2, "R3_w", "14"}, + {3, "R3_w", "56"}, }, }, @@ -161,19 +162,19 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {6, "R0_w=pkt(off=8,r=8,imm=0)"}, - {6, "R3_w=scalar(umax=255,var_off=(0x0; 0xff))"}, - {7, "R3_w=scalar(umax=510,var_off=(0x0; 0x1fe))"}, - {8, "R3_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"}, - {9, "R3_w=scalar(umax=2040,var_off=(0x0; 0x7f8))"}, - {10, "R3_w=scalar(umax=4080,var_off=(0x0; 0xff0))"}, - {12, "R3_w=pkt_end(off=0,imm=0)"}, - {17, "R4_w=scalar(umax=255,var_off=(0x0; 0xff))"}, - {18, "R4_w=scalar(umax=8160,var_off=(0x0; 0x1fe0))"}, - {19, "R4_w=scalar(umax=4080,var_off=(0x0; 0xff0))"}, - {20, "R4_w=scalar(umax=2040,var_off=(0x0; 0x7f8))"}, - {21, "R4_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"}, - {22, "R4_w=scalar(umax=510,var_off=(0x0; 0x1fe))"}, + {6, "R0_w", "pkt(off=8,r=8,imm=0)"}, + {6, "R3_w", "var_off=(0x0; 0xff)"}, + {7, "R3_w", "var_off=(0x0; 0x1fe)"}, + {8, "R3_w", "var_off=(0x0; 0x3fc)"}, + {9, "R3_w", "var_off=(0x0; 0x7f8)"}, + {10, "R3_w", "var_off=(0x0; 0xff0)"}, + {12, "R3_w", "pkt_end(off=0,imm=0)"}, + {17, "R4_w", "var_off=(0x0; 0xff)"}, + {18, "R4_w", "var_off=(0x0; 0x1fe0)"}, + {19, "R4_w", "var_off=(0x0; 0xff0)"}, + {20, "R4_w", "var_off=(0x0; 0x7f8)"}, + {21, "R4_w", "var_off=(0x0; 0x3fc)"}, + {22, "R4_w", "var_off=(0x0; 0x1fe)"}, }, }, { @@ -194,16 +195,16 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {6, "R3_w=scalar(umax=255,var_off=(0x0; 0xff))"}, - {7, "R4_w=scalar(id=1,umax=255,var_off=(0x0; 0xff))"}, - {8, "R4_w=scalar(umax=255,var_off=(0x0; 0xff))"}, - {9, "R4_w=scalar(id=1,umax=255,var_off=(0x0; 0xff))"}, - {10, "R4_w=scalar(umax=510,var_off=(0x0; 0x1fe))"}, - {11, "R4_w=scalar(id=1,umax=255,var_off=(0x0; 0xff))"}, - {12, "R4_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"}, - {13, "R4_w=scalar(id=1,umax=255,var_off=(0x0; 0xff))"}, - {14, "R4_w=scalar(umax=2040,var_off=(0x0; 0x7f8))"}, - {15, "R4_w=scalar(umax=4080,var_off=(0x0; 0xff0))"}, + {6, "R3_w", "var_off=(0x0; 0xff)"}, + {7, "R4_w", "var_off=(0x0; 0xff)"}, + {8, "R4_w", "var_off=(0x0; 0xff)"}, + {9, "R4_w", "var_off=(0x0; 0xff)"}, + {10, "R4_w", "var_off=(0x0; 0x1fe)"}, + {11, "R4_w", "var_off=(0x0; 0xff)"}, + {12, "R4_w", "var_off=(0x0; 0x3fc)"}, + {13, "R4_w", "var_off=(0x0; 0xff)"}, + {14, "R4_w", "var_off=(0x0; 0x7f8)"}, + {15, "R4_w", "var_off=(0x0; 0xff0)"}, }, }, { @@ -234,14 +235,14 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {2, "R5_w=pkt(off=0,r=0,imm=0)"}, - {4, "R5_w=pkt(off=14,r=0,imm=0)"}, - {5, "R4_w=pkt(off=14,r=0,imm=0)"}, - {9, "R2=pkt(off=0,r=18,imm=0)"}, - {10, "R5=pkt(off=14,r=18,imm=0)"}, - {10, "R4_w=scalar(umax=255,var_off=(0x0; 0xff))"}, - {13, "R4_w=scalar(umax=65535,var_off=(0x0; 0xffff))"}, - {14, "R4_w=scalar(umax=65535,var_off=(0x0; 0xffff))"}, + {2, "R5_w", "pkt(off=0,r=0,imm=0)"}, + {4, "R5_w", "pkt(off=14,r=0,imm=0)"}, + {5, "R4_w", "pkt(off=14,r=0,imm=0)"}, + {9, "R2", "pkt(off=0,r=18,imm=0)"}, + {10, "R5", "pkt(off=14,r=18,imm=0)"}, + {10, "R4_w", "var_off=(0x0; 0xff)"}, + {13, "R4_w", "var_off=(0x0; 0xffff)"}, + {14, "R4_w", "var_off=(0x0; 0xffff)"}, }, }, { @@ -298,20 +299,20 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {6, "R2_w=pkt(off=0,r=8,imm=0)"}, - {7, "R6_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"}, + {6, "R2_w", "pkt(off=0,r=8,imm=0)"}, + {7, "R6_w", "var_off=(0x0; 0x3fc)"}, /* Offset is added to packet pointer R5, resulting in * known fixed offset, and variable offset from R6. */ - {11, "R5_w=pkt(id=1,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, + {11, "R5_w", "pkt(id=1,off=14,"}, /* At the time the word size load is performed from R5, * it's total offset is NET_IP_ALIGN + reg->off (0) + * reg->aux_off (14) which is 16. Then the variable * offset is considered using reg->aux_off_align which * is 4 and meets the load's requirements. */ - {15, "R4=pkt(id=1,off=18,r=18,umax=1020,var_off=(0x0; 0x3fc))"}, - {15, "R5=pkt(id=1,off=14,r=18,umax=1020,var_off=(0x0; 0x3fc))"}, + {15, "R4", "var_off=(0x0; 0x3fc)"}, + {15, "R5", "var_off=(0x0; 0x3fc)"}, /* Variable offset is added to R5 packet pointer, * resulting in auxiliary alignment of 4. To avoid BPF * verifier's precision backtracking logging @@ -319,46 +320,46 @@ static struct bpf_align_test tests[] = { * instruction to validate R5 state. We also check * that R4 is what it should be in such case. */ - {18, "R4_w=pkt(id=2,off=0,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, - {18, "R5_w=pkt(id=2,off=0,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, + {18, "R4_w", "var_off=(0x0; 0x3fc)"}, + {18, "R5_w", "var_off=(0x0; 0x3fc)"}, /* Constant offset is added to R5, resulting in * reg->off of 14. */ - {19, "R5_w=pkt(id=2,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, + {19, "R5_w", "pkt(id=2,off=14,"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off * (14) which is 16. Then the variable offset is 4-byte * aligned, so the total offset is 4-byte aligned and * meets the load's requirements. */ - {24, "R4=pkt(id=2,off=18,r=18,umax=1020,var_off=(0x0; 0x3fc))"}, - {24, "R5=pkt(id=2,off=14,r=18,umax=1020,var_off=(0x0; 0x3fc))"}, + {24, "R4", "var_off=(0x0; 0x3fc)"}, + {24, "R5", "var_off=(0x0; 0x3fc)"}, /* Constant offset is added to R5 packet pointer, * resulting in reg->off value of 14. */ - {26, "R5_w=pkt(off=14,r=8"}, + {26, "R5_w", "pkt(off=14,r=8,"}, /* Variable offset is added to R5, resulting in a * variable offset of (4n). See comment for insn #18 * for R4 = R5 trick. */ - {28, "R4_w=pkt(id=3,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, - {28, "R5_w=pkt(id=3,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, + {28, "R4_w", "var_off=(0x0; 0x3fc)"}, + {28, "R5_w", "var_off=(0x0; 0x3fc)"}, /* Constant is added to R5 again, setting reg->off to 18. */ - {29, "R5_w=pkt(id=3,off=18,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, + {29, "R5_w", "pkt(id=3,off=18,"}, /* And once more we add a variable; resulting var_off * is still (4n), fixed offset is not changed. * Also, we create a new reg->id. */ - {31, "R4_w=pkt(id=4,off=18,r=0,umax=2040,var_off=(0x0; 0x7fc)"}, - {31, "R5_w=pkt(id=4,off=18,r=0,umax=2040,var_off=(0x0; 0x7fc)"}, + {31, "R4_w", "var_off=(0x0; 0x7fc)"}, + {31, "R5_w", "var_off=(0x0; 0x7fc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (18) * which is 20. Then the variable offset is (4n), so * the total offset is 4-byte aligned and meets the * load's requirements. */ - {35, "R4=pkt(id=4,off=22,r=22,umax=2040,var_off=(0x0; 0x7fc)"}, - {35, "R5=pkt(id=4,off=18,r=22,umax=2040,var_off=(0x0; 0x7fc)"}, + {35, "R4", "var_off=(0x0; 0x7fc)"}, + {35, "R5", "var_off=(0x0; 0x7fc)"}, }, }, { @@ -396,36 +397,36 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {6, "R2_w=pkt(off=0,r=8,imm=0)"}, - {7, "R6_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"}, + {6, "R2_w", "pkt(off=0,r=8,imm=0)"}, + {7, "R6_w", "var_off=(0x0; 0x3fc)"}, /* Adding 14 makes R6 be (4n+2) */ - {8, "R6_w=scalar(umin=14,umax=1034,var_off=(0x2; 0x7fc))"}, + {8, "R6_w", "var_off=(0x2; 0x7fc)"}, /* Packet pointer has (4n+2) offset */ - {11, "R5_w=pkt(id=1,off=0,r=0,umin=14,umax=1034,var_off=(0x2; 0x7fc)"}, - {12, "R4=pkt(id=1,off=4,r=0,umin=14,umax=1034,var_off=(0x2; 0x7fc)"}, + {11, "R5_w", "var_off=(0x2; 0x7fc)"}, + {12, "R4", "var_off=(0x2; 0x7fc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) * which is 2. Then the variable offset is (4n+2), so * the total offset is 4-byte aligned and meets the * load's requirements. */ - {15, "R5=pkt(id=1,off=0,r=4,umin=14,umax=1034,var_off=(0x2; 0x7fc)"}, + {15, "R5", "var_off=(0x2; 0x7fc)"}, /* Newly read value in R6 was shifted left by 2, so has * known alignment of 4. */ - {17, "R6_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"}, + {17, "R6_w", "var_off=(0x0; 0x3fc)"}, /* Added (4n) to packet pointer's (4n+2) var_off, giving * another (4n+2). */ - {19, "R5_w=pkt(id=2,off=0,r=0,umin=14,umax=2054,var_off=(0x2; 0xffc)"}, - {20, "R4=pkt(id=2,off=4,r=0,umin=14,umax=2054,var_off=(0x2; 0xffc)"}, + {19, "R5_w", "var_off=(0x2; 0xffc)"}, + {20, "R4", "var_off=(0x2; 0xffc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) * which is 2. Then the variable offset is (4n+2), so * the total offset is 4-byte aligned and meets the * load's requirements. */ - {23, "R5=pkt(id=2,off=0,r=4,umin=14,umax=2054,var_off=(0x2; 0xffc)"}, + {23, "R5", "var_off=(0x2; 0xffc)"}, }, }, { @@ -458,18 +459,18 @@ static struct bpf_align_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, .matches = { - {3, "R5_w=pkt_end(off=0,imm=0)"}, + {3, "R5_w", "pkt_end(off=0,imm=0)"}, /* (ptr - ptr) << 2 == unknown, (4n) */ - {5, "R5_w=scalar(smax=9223372036854775804,umax=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc)"}, + {5, "R5_w", "var_off=(0x0; 0xfffffffffffffffc)"}, /* (4n) + 14 == (4n+2). We blow our bounds, because * the add could overflow. */ - {6, "R5_w=scalar(smin=-9223372036854775806,smax=9223372036854775806,umin=2,umax=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"}, + {6, "R5_w", "var_off=(0x2; 0xfffffffffffffffc)"}, /* Checked s>=0 */ - {9, "R5=scalar(umin=2,umax=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, + {9, "R5", "var_off=(0x2; 0x7ffffffffffffffc)"}, /* packet pointer + nonnegative (4n+2) */ - {11, "R6_w=pkt(id=1,off=0,r=0,umin=2,umax=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, - {12, "R4_w=pkt(id=1,off=4,r=0,umin=2,umax=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, + {11, "R6_w", "var_off=(0x2; 0x7ffffffffffffffc)"}, + {12, "R4_w", "var_off=(0x2; 0x7ffffffffffffffc)"}, /* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine. * We checked the bounds, but it might have been able * to overflow if the packet pointer started in the @@ -477,7 +478,7 @@ static struct bpf_align_test tests[] = { * So we did not get a 'range' on R6, and the access * attempt will fail. */ - {15, "R6_w=pkt(id=1,off=0,r=0,umin=2,umax=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, + {15, "R6_w", "var_off=(0x2; 0x7ffffffffffffffc)"}, } }, { @@ -512,24 +513,23 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {6, "R2_w=pkt(off=0,r=8,imm=0)"}, - {8, "R6_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"}, + {6, "R2_w", "pkt(off=0,r=8,imm=0)"}, + {8, "R6_w", "var_off=(0x0; 0x3fc)"}, /* Adding 14 makes R6 be (4n+2) */ - {9, "R6_w=scalar(umin=14,umax=1034,var_off=(0x2; 0x7fc))"}, + {9, "R6_w", "var_off=(0x2; 0x7fc)"}, /* New unknown value in R7 is (4n) */ - {10, "R7_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"}, + {10, "R7_w", "var_off=(0x0; 0x3fc)"}, /* Subtracting it from R6 blows our unsigned bounds */ - {11, "R6=scalar(smin=-1006,smax=1034,umin=2,umax=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"}, + {11, "R6", "var_off=(0x2; 0xfffffffffffffffc)"}, /* Checked s>= 0 */ - {14, "R6=scalar(umin=2,umax=1034,var_off=(0x2; 0x7fc))"}, + {14, "R6", "var_off=(0x2; 0x7fc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) * which is 2. Then the variable offset is (4n+2), so * the total offset is 4-byte aligned and meets the * load's requirements. */ - {20, "R5=pkt(id=2,off=0,r=4,umin=2,umax=1034,var_off=(0x2; 0x7fc)"}, - + {20, "R5", "var_off=(0x2; 0x7fc)"}, }, }, { @@ -566,23 +566,23 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {6, "R2_w=pkt(off=0,r=8,imm=0)"}, - {9, "R6_w=scalar(umax=60,var_off=(0x0; 0x3c))"}, + {6, "R2_w", "pkt(off=0,r=8,imm=0)"}, + {9, "R6_w", "var_off=(0x0; 0x3c)"}, /* Adding 14 makes R6 be (4n+2) */ - {10, "R6_w=scalar(umin=14,umax=74,var_off=(0x2; 0x7c))"}, + {10, "R6_w", "var_off=(0x2; 0x7c)"}, /* Subtracting from packet pointer overflows ubounds */ - {13, "R5_w=pkt(id=2,off=0,r=8,umin=18446744073709551542,umax=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c)"}, + {13, "R5_w", "var_off=(0xffffffffffffff82; 0x7c)"}, /* New unknown value in R7 is (4n), >= 76 */ - {14, "R7_w=scalar(umin=76,umax=1096,var_off=(0x0; 0x7fc))"}, + {14, "R7_w", "var_off=(0x0; 0x7fc)"}, /* Adding it to packet pointer gives nice bounds again */ - {16, "R5_w=pkt(id=3,off=0,r=0,umin=2,umax=1082,var_off=(0x2; 0x7fc)"}, + {16, "R5_w", "var_off=(0x2; 0x7fc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) * which is 2. Then the variable offset is (4n+2), so * the total offset is 4-byte aligned and meets the * load's requirements. */ - {20, "R5=pkt(id=3,off=0,r=4,umin=2,umax=1082,var_off=(0x2; 0x7fc)"}, + {20, "R5", "var_off=(0x2; 0x7fc)"}, }, }, }; @@ -635,6 +635,7 @@ static int do_test_single(struct bpf_align_test *test) line_ptr = strtok(bpf_vlog_copy, "\n"); for (i = 0; i < MAX_MATCHES; i++) { struct bpf_reg_match m = test->matches[i]; + const char *p; int tmp; if (!m.match) @@ -649,8 +650,8 @@ static int do_test_single(struct bpf_align_test *test) line_ptr = strtok(NULL, "\n"); } if (!line_ptr) { - printf("Failed to find line %u for match: %s\n", - m.line, m.match); + printf("Failed to find line %u for match: %s=%s\n", + m.line, m.reg, m.match); ret = 1; printf("%s", bpf_vlog); break; @@ -667,15 +668,15 @@ static int do_test_single(struct bpf_align_test *test) * 6: R0_w=pkt(off=8,r=8,imm=0) R1=ctx(off=0,imm=0) R2_w=pkt(off=0,r=8,imm=0) R3_w=pkt_end(off=0,imm=0) R10=fp0 * 6: (71) r3 = *(u8 *)(r2 +0) ; R2_w=pkt(off=0,r=8,imm=0) R3_w=scalar(umax=255,var_off=(0x0; 0xff)) */ - while (!strstr(line_ptr, m.match)) { + while (!(p = strstr(line_ptr, m.reg)) || !strstr(p, m.match)) { cur_line = -1; line_ptr = strtok(NULL, "\n"); sscanf(line_ptr ?: "", "%u: ", &cur_line); if (!line_ptr || cur_line != m.line) break; } - if (cur_line != m.line || !line_ptr || !strstr(line_ptr, m.match)) { - printf("Failed to find match %u: %s\n", m.line, m.match); + if (cur_line != m.line || !line_ptr || !(p = strstr(line_ptr, m.reg)) || !strstr(p, m.match)) { + printf("Failed to find match %u: %s=%s\n", m.line, m.reg, m.match); ret = 1; printf("%s", bpf_vlog); break; diff --git a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c index d2d9e965eba5..053f4d6da77a 100644 --- a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c +++ b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c @@ -193,8 +193,8 @@ error: void test_bloom_filter_map(void) { - __u32 *rand_vals, nr_rand_vals; - struct bloom_filter_map *skel; + __u32 *rand_vals = NULL, nr_rand_vals = 0; + struct bloom_filter_map *skel = NULL; int err; test_fail_cases(); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 1f02168103dd..41aba139b20b 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -10,7 +10,7 @@ #include "bpf_iter_task.skel.h" #include "bpf_iter_task_stack.skel.h" #include "bpf_iter_task_file.skel.h" -#include "bpf_iter_task_vma.skel.h" +#include "bpf_iter_task_vmas.skel.h" #include "bpf_iter_task_btf.skel.h" #include "bpf_iter_tcp4.skel.h" #include "bpf_iter_tcp6.skel.h" @@ -1399,19 +1399,19 @@ static void str_strip_first_line(char *str) static void test_task_vma_common(struct bpf_iter_attach_opts *opts) { int err, iter_fd = -1, proc_maps_fd = -1; - struct bpf_iter_task_vma *skel; + struct bpf_iter_task_vmas *skel; int len, read_size = 4; char maps_path[64]; - skel = bpf_iter_task_vma__open(); - if (!ASSERT_OK_PTR(skel, "bpf_iter_task_vma__open")) + skel = bpf_iter_task_vmas__open(); + if (!ASSERT_OK_PTR(skel, "bpf_iter_task_vmas__open")) return; skel->bss->pid = getpid(); skel->bss->one_task = opts ? 1 : 0; - err = bpf_iter_task_vma__load(skel); - if (!ASSERT_OK(err, "bpf_iter_task_vma__load")) + err = bpf_iter_task_vmas__load(skel); + if (!ASSERT_OK(err, "bpf_iter_task_vmas__load")) goto out; skel->links.proc_maps = bpf_program__attach_iter( @@ -1462,25 +1462,25 @@ static void test_task_vma_common(struct bpf_iter_attach_opts *opts) out: close(proc_maps_fd); close(iter_fd); - bpf_iter_task_vma__destroy(skel); + bpf_iter_task_vmas__destroy(skel); } static void test_task_vma_dead_task(void) { - struct bpf_iter_task_vma *skel; + struct bpf_iter_task_vmas *skel; int wstatus, child_pid = -1; time_t start_tm, cur_tm; int err, iter_fd = -1; int wait_sec = 3; - skel = bpf_iter_task_vma__open(); - if (!ASSERT_OK_PTR(skel, "bpf_iter_task_vma__open")) + skel = bpf_iter_task_vmas__open(); + if (!ASSERT_OK_PTR(skel, "bpf_iter_task_vmas__open")) return; skel->bss->pid = getpid(); - err = bpf_iter_task_vma__load(skel); - if (!ASSERT_OK(err, "bpf_iter_task_vma__load")) + err = bpf_iter_task_vmas__load(skel); + if (!ASSERT_OK(err, "bpf_iter_task_vmas__load")) goto out; skel->links.proc_maps = bpf_program__attach_iter( @@ -1533,7 +1533,7 @@ static void test_task_vma_dead_task(void) out: waitpid(child_pid, &wstatus, 0); close(iter_fd); - bpf_iter_task_vma__destroy(skel); + bpf_iter_task_vmas__destroy(skel); } void test_bpf_sockmap_map_iter_fd(void) diff --git a/tools/testing/selftests/bpf/prog_tests/connect_ping.c b/tools/testing/selftests/bpf/prog_tests/connect_ping.c index 289218c2216c..40fe571f2fe7 100644 --- a/tools/testing/selftests/bpf/prog_tests/connect_ping.c +++ b/tools/testing/selftests/bpf/prog_tests/connect_ping.c @@ -28,9 +28,9 @@ static void subtest(int cgroup_fd, struct connect_ping *skel, .sin6_family = AF_INET6, .sin6_addr = IN6ADDR_LOOPBACK_INIT, }; - struct sockaddr *sa; + struct sockaddr *sa = NULL; socklen_t sa_len; - int protocol; + int protocol = -1; int sock_fd; switch (family) { diff --git a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c index 2fd05649bad1..4ad4cd69152e 100644 --- a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c +++ b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c @@ -11,9 +11,13 @@ #define NS_TEST "fib_lookup_ns" #define IPV6_IFACE_ADDR "face::face" +#define IPV6_IFACE_ADDR_SEC "cafe::cafe" +#define IPV6_ADDR_DST "face::3" #define IPV6_NUD_FAILED_ADDR "face::1" #define IPV6_NUD_STALE_ADDR "face::2" #define IPV4_IFACE_ADDR "10.0.0.254" +#define IPV4_IFACE_ADDR_SEC "10.1.0.254" +#define IPV4_ADDR_DST "10.2.0.254" #define IPV4_NUD_FAILED_ADDR "10.0.0.1" #define IPV4_NUD_STALE_ADDR "10.0.0.2" #define IPV4_TBID_ADDR "172.0.0.254" @@ -31,6 +35,7 @@ struct fib_lookup_test { const char *desc; const char *daddr; int expected_ret; + const char *expected_src; int lookup_flags; __u32 tbid; __u8 dmac[6]; @@ -69,6 +74,22 @@ static const struct fib_lookup_test tests[] = { .daddr = IPV6_TBID_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS, .lookup_flags = BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID, .tbid = 100, .dmac = DMAC_INIT2, }, + { .desc = "IPv4 set src addr from netdev", + .daddr = IPV4_NUD_FAILED_ADDR, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS, + .expected_src = IPV4_IFACE_ADDR, + .lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, }, + { .desc = "IPv6 set src addr from netdev", + .daddr = IPV6_NUD_FAILED_ADDR, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS, + .expected_src = IPV6_IFACE_ADDR, + .lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, }, + { .desc = "IPv4 set prefsrc addr from route", + .daddr = IPV4_ADDR_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS, + .expected_src = IPV4_IFACE_ADDR_SEC, + .lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, }, + { .desc = "IPv6 set prefsrc addr route", + .daddr = IPV6_ADDR_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS, + .expected_src = IPV6_IFACE_ADDR_SEC, + .lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, }, }; static int ifindex; @@ -97,6 +118,13 @@ static int setup_netns(void) SYS(fail, "ip neigh add %s dev veth1 nud failed", IPV4_NUD_FAILED_ADDR); SYS(fail, "ip neigh add %s dev veth1 lladdr %s nud stale", IPV4_NUD_STALE_ADDR, DMAC); + /* Setup for prefsrc IP addr selection */ + SYS(fail, "ip addr add %s/24 dev veth1", IPV4_IFACE_ADDR_SEC); + SYS(fail, "ip route add %s/32 dev veth1 src %s", IPV4_ADDR_DST, IPV4_IFACE_ADDR_SEC); + + SYS(fail, "ip addr add %s/64 dev veth1 nodad", IPV6_IFACE_ADDR_SEC); + SYS(fail, "ip route add %s/128 dev veth1 src %s", IPV6_ADDR_DST, IPV6_IFACE_ADDR_SEC); + /* Setup for tbid lookup tests */ SYS(fail, "ip addr add %s/24 dev veth2", IPV4_TBID_ADDR); SYS(fail, "ip route del %s/24 dev veth2", IPV4_TBID_NET); @@ -133,9 +161,12 @@ static int set_lookup_params(struct bpf_fib_lookup *params, const struct fib_loo if (inet_pton(AF_INET6, test->daddr, params->ipv6_dst) == 1) { params->family = AF_INET6; - ret = inet_pton(AF_INET6, IPV6_IFACE_ADDR, params->ipv6_src); - if (!ASSERT_EQ(ret, 1, "inet_pton(IPV6_IFACE_ADDR)")) - return -1; + if (!(test->lookup_flags & BPF_FIB_LOOKUP_SRC)) { + ret = inet_pton(AF_INET6, IPV6_IFACE_ADDR, params->ipv6_src); + if (!ASSERT_EQ(ret, 1, "inet_pton(IPV6_IFACE_ADDR)")) + return -1; + } + return 0; } @@ -143,9 +174,12 @@ static int set_lookup_params(struct bpf_fib_lookup *params, const struct fib_loo if (!ASSERT_EQ(ret, 1, "convert IP[46] address")) return -1; params->family = AF_INET; - ret = inet_pton(AF_INET, IPV4_IFACE_ADDR, ¶ms->ipv4_src); - if (!ASSERT_EQ(ret, 1, "inet_pton(IPV4_IFACE_ADDR)")) - return -1; + + if (!(test->lookup_flags & BPF_FIB_LOOKUP_SRC)) { + ret = inet_pton(AF_INET, IPV4_IFACE_ADDR, ¶ms->ipv4_src); + if (!ASSERT_EQ(ret, 1, "inet_pton(IPV4_IFACE_ADDR)")) + return -1; + } return 0; } @@ -156,6 +190,40 @@ static void mac_str(char *b, const __u8 *mac) mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]); } +static void assert_src_ip(struct bpf_fib_lookup *fib_params, const char *expected_src) +{ + int ret; + __u32 src6[4]; + __be32 src4; + + switch (fib_params->family) { + case AF_INET6: + ret = inet_pton(AF_INET6, expected_src, src6); + ASSERT_EQ(ret, 1, "inet_pton(expected_src)"); + + ret = memcmp(src6, fib_params->ipv6_src, sizeof(fib_params->ipv6_src)); + if (!ASSERT_EQ(ret, 0, "fib_lookup ipv6 src")) { + char str_src6[64]; + + inet_ntop(AF_INET6, fib_params->ipv6_src, str_src6, + sizeof(str_src6)); + printf("ipv6 expected %s actual %s ", expected_src, + str_src6); + } + + break; + case AF_INET: + ret = inet_pton(AF_INET, expected_src, &src4); + ASSERT_EQ(ret, 1, "inet_pton(expected_src)"); + + ASSERT_EQ(fib_params->ipv4_src, src4, "fib_lookup ipv4 src"); + + break; + default: + PRINT_FAIL("invalid addr family: %d", fib_params->family); + } +} + void test_fib_lookup(void) { struct bpf_fib_lookup *fib_params; @@ -207,6 +275,9 @@ void test_fib_lookup(void) ASSERT_EQ(skel->bss->fib_lookup_ret, tests[i].expected_ret, "fib_lookup_ret"); + if (tests[i].expected_src) + assert_src_ip(fib_params, tests[i].expected_src); + ret = memcmp(tests[i].dmac, fib_params->dmac, sizeof(tests[i].dmac)); if (!ASSERT_EQ(ret, 0, "dmac not match")) { char expected[18], actual[18]; diff --git a/tools/testing/selftests/bpf/prog_tests/iters.c b/tools/testing/selftests/bpf/prog_tests/iters.c index 10804ae5ae97..b696873c5455 100644 --- a/tools/testing/selftests/bpf/prog_tests/iters.c +++ b/tools/testing/selftests/bpf/prog_tests/iters.c @@ -8,6 +8,7 @@ #include "iters_looping.skel.h" #include "iters_num.skel.h" #include "iters_testmod_seq.skel.h" +#include "iters_task_vma.skel.h" static void subtest_num_iters(void) { @@ -90,6 +91,61 @@ cleanup: iters_testmod_seq__destroy(skel); } +static void subtest_task_vma_iters(void) +{ + unsigned long start, end, bpf_iter_start, bpf_iter_end; + struct iters_task_vma *skel; + char rest_of_line[1000]; + unsigned int seen; + FILE *f = NULL; + int err; + + skel = iters_task_vma__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + skel->bss->target_pid = getpid(); + + err = iters_task_vma__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto cleanup; + + getpgid(skel->bss->target_pid); + iters_task_vma__detach(skel); + + if (!ASSERT_GT(skel->bss->vmas_seen, 0, "vmas_seen_gt_zero")) + goto cleanup; + + f = fopen("/proc/self/maps", "r"); + if (!ASSERT_OK_PTR(f, "proc_maps_fopen")) + goto cleanup; + + seen = 0; + while (fscanf(f, "%lx-%lx %[^\n]\n", &start, &end, rest_of_line) == 3) { + /* [vsyscall] vma isn't _really_ part of task->mm vmas. + * /proc/PID/maps returns it when out of vmas - see get_gate_vma + * calls in fs/proc/task_mmu.c + */ + if (strstr(rest_of_line, "[vsyscall]")) + continue; + + bpf_iter_start = skel->bss->vm_ranges[seen].vm_start; + bpf_iter_end = skel->bss->vm_ranges[seen].vm_end; + + ASSERT_EQ(bpf_iter_start, start, "vma->vm_start match"); + ASSERT_EQ(bpf_iter_end, end, "vma->vm_end match"); + seen++; + } + + if (!ASSERT_EQ(skel->bss->vmas_seen, seen, "vmas_seen_eq")) + goto cleanup; + +cleanup: + if (f) + fclose(f); + iters_task_vma__destroy(skel); +} + void test_iters(void) { RUN_TESTS(iters_state_safety); @@ -103,4 +159,6 @@ void test_iters(void) subtest_num_iters(); if (test__start_subtest("testmod_seq")) subtest_testmod_seq_iters(); + if (test__start_subtest("task_vma")) + subtest_task_vma_iters(); } diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c index db3bf6bbe01a..69dc31383b78 100644 --- a/tools/testing/selftests/bpf/prog_tests/linked_list.c +++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c @@ -268,7 +268,7 @@ end: static void list_and_rb_node_same_struct(bool refcount_field) { - int bpf_rb_node_btf_id, bpf_refcount_btf_id, foo_btf_id; + int bpf_rb_node_btf_id, bpf_refcount_btf_id = 0, foo_btf_id; struct btf *btf; int id, err; diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h b/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h index 61333f2a03f9..e9190574e79f 100644 --- a/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h +++ b/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h @@ -49,7 +49,8 @@ static int open_tuntap(const char *dev_name, bool need_mac) return -1; ifr.ifr_flags = IFF_NO_PI | (need_mac ? IFF_TAP : IFF_TUN); - memcpy(ifr.ifr_name, dev_name, IFNAMSIZ); + strncpy(ifr.ifr_name, dev_name, IFNAMSIZ - 1); + ifr.ifr_name[IFNAMSIZ - 1] = '\0'; err = ioctl(fd, TUNSETIFF, &ifr); if (!ASSERT_OK(err, "ioctl(TUNSETIFF)")) { diff --git a/tools/testing/selftests/bpf/prog_tests/missed.c b/tools/testing/selftests/bpf/prog_tests/missed.c new file mode 100644 index 000000000000..70d90c43537c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/missed.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include "missed_kprobe.skel.h" +#include "missed_kprobe_recursion.skel.h" +#include "missed_tp_recursion.skel.h" + +/* + * Putting kprobe on bpf_fentry_test1 that calls bpf_kfunc_common_test + * kfunc, which has also kprobe on. The latter won't get triggered due + * to kprobe recursion check and kprobe missed counter is incremented. + */ +static void test_missed_perf_kprobe(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + struct bpf_link_info info = {}; + struct missed_kprobe *skel; + __u32 len = sizeof(info); + int err, prog_fd; + + skel = missed_kprobe__open_and_load(); + if (!ASSERT_OK_PTR(skel, "missed_kprobe__open_and_load")) + goto cleanup; + + err = missed_kprobe__attach(skel); + if (!ASSERT_OK(err, "missed_kprobe__attach")) + goto cleanup; + + prog_fd = bpf_program__fd(skel->progs.trigger); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(topts.retval, 0, "test_run"); + + err = bpf_link_get_info_by_fd(bpf_link__fd(skel->links.test2), &info, &len); + if (!ASSERT_OK(err, "bpf_link_get_info_by_fd")) + goto cleanup; + + ASSERT_EQ(info.type, BPF_LINK_TYPE_PERF_EVENT, "info.type"); + ASSERT_EQ(info.perf_event.type, BPF_PERF_EVENT_KPROBE, "info.perf_event.type"); + ASSERT_EQ(info.perf_event.kprobe.missed, 1, "info.perf_event.kprobe.missed"); + +cleanup: + missed_kprobe__destroy(skel); +} + +static __u64 get_missed_count(int fd) +{ + struct bpf_prog_info info = {}; + __u32 len = sizeof(info); + int err; + + err = bpf_prog_get_info_by_fd(fd, &info, &len); + if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd")) + return (__u64) -1; + return info.recursion_misses; +} + +/* + * Putting kprobe.multi on bpf_fentry_test1 that calls bpf_kfunc_common_test + * kfunc which has 3 perf event kprobes and 1 kprobe.multi attached. + * + * Because fprobe (kprobe.multi attach layear) does not have strict recursion + * check the kprobe's bpf_prog_active check is hit for test2-5. + */ +static void test_missed_kprobe_recursion(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + struct missed_kprobe_recursion *skel; + int err, prog_fd; + + skel = missed_kprobe_recursion__open_and_load(); + if (!ASSERT_OK_PTR(skel, "missed_kprobe_recursion__open_and_load")) + goto cleanup; + + err = missed_kprobe_recursion__attach(skel); + if (!ASSERT_OK(err, "missed_kprobe_recursion__attach")) + goto cleanup; + + prog_fd = bpf_program__fd(skel->progs.trigger); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(topts.retval, 0, "test_run"); + + ASSERT_EQ(get_missed_count(bpf_program__fd(skel->progs.test1)), 0, "test1_recursion_misses"); + ASSERT_GE(get_missed_count(bpf_program__fd(skel->progs.test2)), 1, "test2_recursion_misses"); + ASSERT_GE(get_missed_count(bpf_program__fd(skel->progs.test3)), 1, "test3_recursion_misses"); + ASSERT_GE(get_missed_count(bpf_program__fd(skel->progs.test4)), 1, "test4_recursion_misses"); + ASSERT_GE(get_missed_count(bpf_program__fd(skel->progs.test5)), 1, "test5_recursion_misses"); + +cleanup: + missed_kprobe_recursion__destroy(skel); +} + +/* + * Putting kprobe on bpf_fentry_test1 that calls bpf_printk and invokes + * bpf_trace_printk tracepoint. The bpf_trace_printk tracepoint has test[234] + * programs attached to it. + * + * Because kprobe execution goes through bpf_prog_active check, programs + * attached to the tracepoint will fail the recursion check and increment + * the recursion_misses stats. + */ +static void test_missed_tp_recursion(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + struct missed_tp_recursion *skel; + int err, prog_fd; + + skel = missed_tp_recursion__open_and_load(); + if (!ASSERT_OK_PTR(skel, "missed_tp_recursion__open_and_load")) + goto cleanup; + + err = missed_tp_recursion__attach(skel); + if (!ASSERT_OK(err, "missed_tp_recursion__attach")) + goto cleanup; + + prog_fd = bpf_program__fd(skel->progs.trigger); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(topts.retval, 0, "test_run"); + + ASSERT_EQ(get_missed_count(bpf_program__fd(skel->progs.test1)), 0, "test1_recursion_misses"); + ASSERT_EQ(get_missed_count(bpf_program__fd(skel->progs.test2)), 1, "test2_recursion_misses"); + ASSERT_EQ(get_missed_count(bpf_program__fd(skel->progs.test3)), 1, "test3_recursion_misses"); + ASSERT_EQ(get_missed_count(bpf_program__fd(skel->progs.test4)), 1, "test4_recursion_misses"); + +cleanup: + missed_tp_recursion__destroy(skel); +} + +void test_missed(void) +{ + if (test__start_subtest("perf_kprobe")) + test_missed_perf_kprobe(); + if (test__start_subtest("kprobe_recursion")) + test_missed_kprobe_recursion(); + if (test__start_subtest("tp_recursion")) + test_missed_tp_recursion(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/percpu_alloc.c b/tools/testing/selftests/bpf/prog_tests/percpu_alloc.c index 9541e9b3a034..343da65864d6 100644 --- a/tools/testing/selftests/bpf/prog_tests/percpu_alloc.c +++ b/tools/testing/selftests/bpf/prog_tests/percpu_alloc.c @@ -19,6 +19,7 @@ static void test_array(void) bpf_program__set_autoload(skel->progs.test_array_map_3, true); bpf_program__set_autoload(skel->progs.test_array_map_4, true); + skel->bss->my_pid = getpid(); skel->rodata->nr_cpus = libbpf_num_possible_cpus(); err = percpu_alloc_array__load(skel); @@ -51,6 +52,7 @@ static void test_array_sleepable(void) bpf_program__set_autoload(skel->progs.test_array_map_10, true); + skel->bss->my_pid = getpid(); skel->rodata->nr_cpus = libbpf_num_possible_cpus(); err = percpu_alloc_array__load(skel); @@ -85,6 +87,7 @@ static void test_cgrp_local_storage(void) if (!ASSERT_OK_PTR(skel, "percpu_alloc_cgrp_local_storage__open")) goto close_fd; + skel->bss->my_pid = getpid(); skel->rodata->nr_cpus = libbpf_num_possible_cpus(); err = percpu_alloc_cgrp_local_storage__load(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c index 722c5f2a7776..a043af9cd6d9 100644 --- a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c +++ b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c @@ -14,7 +14,7 @@ static void test_queue_stack_map_by_type(int type) int i, err, prog_fd, map_in_fd, map_out_fd; char file[32], buf[128]; struct bpf_object *obj; - struct iphdr iph; + struct iphdr iph = {}; LIBBPF_OPTS(bpf_test_run_opts, topts, .data_in = &pkt_v4, .data_size_in = sizeof(pkt_v4), diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c index ac104dc652e3..48c5695b7abf 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c @@ -91,6 +91,9 @@ static void ringbuf_subtest(void) int err, cnt, rb_fd; int page_size = getpagesize(); void *mmap_ptr, *tmp_ptr; + struct ring *ring; + int map_fd; + unsigned long avail_data, ring_size, cons_pos, prod_pos; skel = test_ringbuf_lskel__open(); if (CHECK(!skel, "skel_open", "skeleton open failed\n")) @@ -162,6 +165,13 @@ static void ringbuf_subtest(void) trigger_samples(); + ring = ring_buffer__ring(ringbuf, 0); + if (!ASSERT_OK_PTR(ring, "ring_buffer__ring_idx_0")) + goto cleanup; + + map_fd = ring__map_fd(ring); + ASSERT_EQ(map_fd, skel->maps.ringbuf.map_fd, "ring_map_fd"); + /* 2 submitted + 1 discarded records */ CHECK(skel->bss->avail_data != 3 * rec_sz, "err_avail_size", "exp %ld, got %ld\n", @@ -176,6 +186,18 @@ static void ringbuf_subtest(void) "err_prod_pos", "exp %ld, got %ld\n", 3L * rec_sz, skel->bss->prod_pos); + /* verify getting this data directly via the ring object yields the same + * results + */ + avail_data = ring__avail_data_size(ring); + ASSERT_EQ(avail_data, 3 * rec_sz, "ring_avail_size"); + ring_size = ring__size(ring); + ASSERT_EQ(ring_size, page_size, "ring_ring_size"); + cons_pos = ring__consumer_pos(ring); + ASSERT_EQ(cons_pos, 0, "ring_cons_pos"); + prod_pos = ring__producer_pos(ring); + ASSERT_EQ(prod_pos, 3 * rec_sz, "ring_prod_pos"); + /* poll for samples */ err = ring_buffer__poll(ringbuf, -1); @@ -282,6 +304,10 @@ static void ringbuf_subtest(void) err = ring_buffer__consume(ringbuf); CHECK(err < 0, "rb_consume", "failed: %d\b", err); + /* also consume using ring__consume to make sure it works the same */ + err = ring__consume(ring); + ASSERT_GE(err, 0, "ring_consume"); + /* 3 rounds, 2 samples each */ cnt = atomic_xchg(&sample_cnt, 0); CHECK(cnt != 6, "cnt", "exp %d samples, got %d\n", 6, cnt); diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c index 1455911d9fcb..58522195081b 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c @@ -42,6 +42,8 @@ void test_ringbuf_multi(void) { struct test_ringbuf_multi *skel; struct ring_buffer *ringbuf = NULL; + struct ring *ring_old; + struct ring *ring; int err; int page_size = getpagesize(); int proto_fd = -1; @@ -84,11 +86,24 @@ void test_ringbuf_multi(void) if (CHECK(!ringbuf, "ringbuf_create", "failed to create ringbuf\n")) goto cleanup; + /* verify ring_buffer__ring returns expected results */ + ring = ring_buffer__ring(ringbuf, 0); + if (!ASSERT_OK_PTR(ring, "ring_buffer__ring_idx_0")) + goto cleanup; + ring_old = ring; + ring = ring_buffer__ring(ringbuf, 1); + ASSERT_ERR_PTR(ring, "ring_buffer__ring_idx_1"); + err = ring_buffer__add(ringbuf, bpf_map__fd(skel->maps.ringbuf2), process_sample, (void *)(long)2); if (CHECK(err, "ringbuf_add", "failed to add another ring\n")) goto cleanup; + /* verify adding a new ring didn't invalidate our older pointer */ + ring = ring_buffer__ring(ringbuf, 0); + if (!ASSERT_EQ(ring, ring_old, "ring_buffer__ring_again")) + goto cleanup; + err = test_ringbuf_multi__attach(skel); if (CHECK(err, "skel_attach", "skeleton attachment failed: %d\n", err)) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/section_names.c b/tools/testing/selftests/bpf/prog_tests/section_names.c index 8b571890c57e..c3d78846f31a 100644 --- a/tools/testing/selftests/bpf/prog_tests/section_names.c +++ b/tools/testing/selftests/bpf/prog_tests/section_names.c @@ -124,6 +124,11 @@ static struct sec_name_test tests[] = { {0, BPF_CGROUP_INET6_CONNECT}, }, { + "cgroup/connect_unix", + {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_CONNECT}, + {0, BPF_CGROUP_UNIX_CONNECT}, + }, + { "cgroup/sendmsg4", {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG}, {0, BPF_CGROUP_UDP4_SENDMSG}, @@ -134,6 +139,11 @@ static struct sec_name_test tests[] = { {0, BPF_CGROUP_UDP6_SENDMSG}, }, { + "cgroup/sendmsg_unix", + {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_SENDMSG}, + {0, BPF_CGROUP_UNIX_SENDMSG}, + }, + { "cgroup/recvmsg4", {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG}, {0, BPF_CGROUP_UDP4_RECVMSG}, @@ -144,6 +154,11 @@ static struct sec_name_test tests[] = { {0, BPF_CGROUP_UDP6_RECVMSG}, }, { + "cgroup/recvmsg_unix", + {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_RECVMSG}, + {0, BPF_CGROUP_UNIX_RECVMSG}, + }, + { "cgroup/sysctl", {0, BPF_PROG_TYPE_CGROUP_SYSCTL, BPF_CGROUP_SYSCTL}, {0, BPF_CGROUP_SYSCTL}, @@ -158,6 +173,36 @@ static struct sec_name_test tests[] = { {0, BPF_PROG_TYPE_CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT}, {0, BPF_CGROUP_SETSOCKOPT}, }, + { + "cgroup/getpeername4", + {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME}, + {0, BPF_CGROUP_INET4_GETPEERNAME}, + }, + { + "cgroup/getpeername6", + {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME}, + {0, BPF_CGROUP_INET6_GETPEERNAME}, + }, + { + "cgroup/getpeername_unix", + {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETPEERNAME}, + {0, BPF_CGROUP_UNIX_GETPEERNAME}, + }, + { + "cgroup/getsockname4", + {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME}, + {0, BPF_CGROUP_INET4_GETSOCKNAME}, + }, + { + "cgroup/getsockname6", + {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME}, + {0, BPF_CGROUP_INET6_GETSOCKNAME}, + }, + { + "cgroup/getsockname_unix", + {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETSOCKNAME}, + {0, BPF_CGROUP_UNIX_GETSOCKNAME}, + }, }; static void test_prog_type_by_name(const struct sec_name_test *test) diff --git a/tools/testing/selftests/bpf/prog_tests/sock_addr.c b/tools/testing/selftests/bpf/prog_tests/sock_addr.c new file mode 100644 index 000000000000..5fd617718991 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/sock_addr.c @@ -0,0 +1,612 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <sys/un.h> + +#include "test_progs.h" + +#include "connect_unix_prog.skel.h" +#include "sendmsg_unix_prog.skel.h" +#include "recvmsg_unix_prog.skel.h" +#include "getsockname_unix_prog.skel.h" +#include "getpeername_unix_prog.skel.h" +#include "network_helpers.h" + +#define SERVUN_ADDRESS "bpf_cgroup_unix_test" +#define SERVUN_REWRITE_ADDRESS "bpf_cgroup_unix_test_rewrite" +#define SRCUN_ADDRESS "bpf_cgroup_unix_test_src" + +enum sock_addr_test_type { + SOCK_ADDR_TEST_BIND, + SOCK_ADDR_TEST_CONNECT, + SOCK_ADDR_TEST_SENDMSG, + SOCK_ADDR_TEST_RECVMSG, + SOCK_ADDR_TEST_GETSOCKNAME, + SOCK_ADDR_TEST_GETPEERNAME, +}; + +typedef void *(*load_fn)(int cgroup_fd); +typedef void (*destroy_fn)(void *skel); + +struct sock_addr_test { + enum sock_addr_test_type type; + const char *name; + /* BPF prog properties */ + load_fn loadfn; + destroy_fn destroyfn; + /* Socket properties */ + int socket_family; + int socket_type; + /* IP:port pairs for BPF prog to override */ + const char *requested_addr; + unsigned short requested_port; + const char *expected_addr; + unsigned short expected_port; + const char *expected_src_addr; +}; + +static void *connect_unix_prog_load(int cgroup_fd) +{ + struct connect_unix_prog *skel; + + skel = connect_unix_prog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + skel->links.connect_unix_prog = bpf_program__attach_cgroup( + skel->progs.connect_unix_prog, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.connect_unix_prog, "prog_attach")) + goto cleanup; + + return skel; +cleanup: + connect_unix_prog__destroy(skel); + return NULL; +} + +static void connect_unix_prog_destroy(void *skel) +{ + connect_unix_prog__destroy(skel); +} + +static void *sendmsg_unix_prog_load(int cgroup_fd) +{ + struct sendmsg_unix_prog *skel; + + skel = sendmsg_unix_prog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + skel->links.sendmsg_unix_prog = bpf_program__attach_cgroup( + skel->progs.sendmsg_unix_prog, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.sendmsg_unix_prog, "prog_attach")) + goto cleanup; + + return skel; +cleanup: + sendmsg_unix_prog__destroy(skel); + return NULL; +} + +static void sendmsg_unix_prog_destroy(void *skel) +{ + sendmsg_unix_prog__destroy(skel); +} + +static void *recvmsg_unix_prog_load(int cgroup_fd) +{ + struct recvmsg_unix_prog *skel; + + skel = recvmsg_unix_prog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + skel->links.recvmsg_unix_prog = bpf_program__attach_cgroup( + skel->progs.recvmsg_unix_prog, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.recvmsg_unix_prog, "prog_attach")) + goto cleanup; + + return skel; +cleanup: + recvmsg_unix_prog__destroy(skel); + return NULL; +} + +static void recvmsg_unix_prog_destroy(void *skel) +{ + recvmsg_unix_prog__destroy(skel); +} + +static void *getsockname_unix_prog_load(int cgroup_fd) +{ + struct getsockname_unix_prog *skel; + + skel = getsockname_unix_prog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + skel->links.getsockname_unix_prog = bpf_program__attach_cgroup( + skel->progs.getsockname_unix_prog, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.getsockname_unix_prog, "prog_attach")) + goto cleanup; + + return skel; +cleanup: + getsockname_unix_prog__destroy(skel); + return NULL; +} + +static void getsockname_unix_prog_destroy(void *skel) +{ + getsockname_unix_prog__destroy(skel); +} + +static void *getpeername_unix_prog_load(int cgroup_fd) +{ + struct getpeername_unix_prog *skel; + + skel = getpeername_unix_prog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + skel->links.getpeername_unix_prog = bpf_program__attach_cgroup( + skel->progs.getpeername_unix_prog, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.getpeername_unix_prog, "prog_attach")) + goto cleanup; + + return skel; +cleanup: + getpeername_unix_prog__destroy(skel); + return NULL; +} + +static void getpeername_unix_prog_destroy(void *skel) +{ + getpeername_unix_prog__destroy(skel); +} + +static struct sock_addr_test tests[] = { + { + SOCK_ADDR_TEST_CONNECT, + "connect_unix", + connect_unix_prog_load, + connect_unix_prog_destroy, + AF_UNIX, + SOCK_STREAM, + SERVUN_ADDRESS, + 0, + SERVUN_REWRITE_ADDRESS, + 0, + NULL, + }, + { + SOCK_ADDR_TEST_SENDMSG, + "sendmsg_unix", + sendmsg_unix_prog_load, + sendmsg_unix_prog_destroy, + AF_UNIX, + SOCK_DGRAM, + SERVUN_ADDRESS, + 0, + SERVUN_REWRITE_ADDRESS, + 0, + NULL, + }, + { + SOCK_ADDR_TEST_RECVMSG, + "recvmsg_unix-dgram", + recvmsg_unix_prog_load, + recvmsg_unix_prog_destroy, + AF_UNIX, + SOCK_DGRAM, + SERVUN_REWRITE_ADDRESS, + 0, + SERVUN_REWRITE_ADDRESS, + 0, + SERVUN_ADDRESS, + }, + { + SOCK_ADDR_TEST_RECVMSG, + "recvmsg_unix-stream", + recvmsg_unix_prog_load, + recvmsg_unix_prog_destroy, + AF_UNIX, + SOCK_STREAM, + SERVUN_REWRITE_ADDRESS, + 0, + SERVUN_REWRITE_ADDRESS, + 0, + SERVUN_ADDRESS, + }, + { + SOCK_ADDR_TEST_GETSOCKNAME, + "getsockname_unix", + getsockname_unix_prog_load, + getsockname_unix_prog_destroy, + AF_UNIX, + SOCK_STREAM, + SERVUN_ADDRESS, + 0, + SERVUN_REWRITE_ADDRESS, + 0, + NULL, + }, + { + SOCK_ADDR_TEST_GETPEERNAME, + "getpeername_unix", + getpeername_unix_prog_load, + getpeername_unix_prog_destroy, + AF_UNIX, + SOCK_STREAM, + SERVUN_ADDRESS, + 0, + SERVUN_REWRITE_ADDRESS, + 0, + NULL, + }, +}; + +typedef int (*info_fn)(int, struct sockaddr *, socklen_t *); + +static int cmp_addr(const struct sockaddr_storage *addr1, socklen_t addr1_len, + const struct sockaddr_storage *addr2, socklen_t addr2_len, + bool cmp_port) +{ + const struct sockaddr_in *four1, *four2; + const struct sockaddr_in6 *six1, *six2; + const struct sockaddr_un *un1, *un2; + + if (addr1->ss_family != addr2->ss_family) + return -1; + + if (addr1_len != addr2_len) + return -1; + + if (addr1->ss_family == AF_INET) { + four1 = (const struct sockaddr_in *)addr1; + four2 = (const struct sockaddr_in *)addr2; + return !((four1->sin_port == four2->sin_port || !cmp_port) && + four1->sin_addr.s_addr == four2->sin_addr.s_addr); + } else if (addr1->ss_family == AF_INET6) { + six1 = (const struct sockaddr_in6 *)addr1; + six2 = (const struct sockaddr_in6 *)addr2; + return !((six1->sin6_port == six2->sin6_port || !cmp_port) && + !memcmp(&six1->sin6_addr, &six2->sin6_addr, + sizeof(struct in6_addr))); + } else if (addr1->ss_family == AF_UNIX) { + un1 = (const struct sockaddr_un *)addr1; + un2 = (const struct sockaddr_un *)addr2; + return memcmp(un1, un2, addr1_len); + } + + return -1; +} + +static int cmp_sock_addr(info_fn fn, int sock1, + const struct sockaddr_storage *addr2, + socklen_t addr2_len, bool cmp_port) +{ + struct sockaddr_storage addr1; + socklen_t len1 = sizeof(addr1); + + memset(&addr1, 0, len1); + if (fn(sock1, (struct sockaddr *)&addr1, (socklen_t *)&len1) != 0) + return -1; + + return cmp_addr(&addr1, len1, addr2, addr2_len, cmp_port); +} + +static int cmp_local_addr(int sock1, const struct sockaddr_storage *addr2, + socklen_t addr2_len, bool cmp_port) +{ + return cmp_sock_addr(getsockname, sock1, addr2, addr2_len, cmp_port); +} + +static int cmp_peer_addr(int sock1, const struct sockaddr_storage *addr2, + socklen_t addr2_len, bool cmp_port) +{ + return cmp_sock_addr(getpeername, sock1, addr2, addr2_len, cmp_port); +} + +static void test_bind(struct sock_addr_test *test) +{ + struct sockaddr_storage expected_addr; + socklen_t expected_addr_len = sizeof(struct sockaddr_storage); + int serv = -1, client = -1, err; + + serv = start_server(test->socket_family, test->socket_type, + test->requested_addr, test->requested_port, 0); + if (!ASSERT_GE(serv, 0, "start_server")) + goto cleanup; + + err = make_sockaddr(test->socket_family, + test->expected_addr, test->expected_port, + &expected_addr, &expected_addr_len); + if (!ASSERT_EQ(err, 0, "make_sockaddr")) + goto cleanup; + + err = cmp_local_addr(serv, &expected_addr, expected_addr_len, true); + if (!ASSERT_EQ(err, 0, "cmp_local_addr")) + goto cleanup; + + /* Try to connect to server just in case */ + client = connect_to_addr(&expected_addr, expected_addr_len, test->socket_type); + if (!ASSERT_GE(client, 0, "connect_to_addr")) + goto cleanup; + +cleanup: + if (client != -1) + close(client); + if (serv != -1) + close(serv); +} + +static void test_connect(struct sock_addr_test *test) +{ + struct sockaddr_storage addr, expected_addr, expected_src_addr; + socklen_t addr_len = sizeof(struct sockaddr_storage), + expected_addr_len = sizeof(struct sockaddr_storage), + expected_src_addr_len = sizeof(struct sockaddr_storage); + int serv = -1, client = -1, err; + + serv = start_server(test->socket_family, test->socket_type, + test->expected_addr, test->expected_port, 0); + if (!ASSERT_GE(serv, 0, "start_server")) + goto cleanup; + + err = make_sockaddr(test->socket_family, test->requested_addr, test->requested_port, + &addr, &addr_len); + if (!ASSERT_EQ(err, 0, "make_sockaddr")) + goto cleanup; + + client = connect_to_addr(&addr, addr_len, test->socket_type); + if (!ASSERT_GE(client, 0, "connect_to_addr")) + goto cleanup; + + err = make_sockaddr(test->socket_family, test->expected_addr, test->expected_port, + &expected_addr, &expected_addr_len); + if (!ASSERT_EQ(err, 0, "make_sockaddr")) + goto cleanup; + + if (test->expected_src_addr) { + err = make_sockaddr(test->socket_family, test->expected_src_addr, 0, + &expected_src_addr, &expected_src_addr_len); + if (!ASSERT_EQ(err, 0, "make_sockaddr")) + goto cleanup; + } + + err = cmp_peer_addr(client, &expected_addr, expected_addr_len, true); + if (!ASSERT_EQ(err, 0, "cmp_peer_addr")) + goto cleanup; + + if (test->expected_src_addr) { + err = cmp_local_addr(client, &expected_src_addr, expected_src_addr_len, false); + if (!ASSERT_EQ(err, 0, "cmp_local_addr")) + goto cleanup; + } +cleanup: + if (client != -1) + close(client); + if (serv != -1) + close(serv); +} + +static void test_xmsg(struct sock_addr_test *test) +{ + struct sockaddr_storage addr, src_addr; + socklen_t addr_len = sizeof(struct sockaddr_storage), + src_addr_len = sizeof(struct sockaddr_storage); + struct msghdr hdr; + struct iovec iov; + char data = 'a'; + int serv = -1, client = -1, err; + + /* Unlike the other tests, here we test that we can rewrite the src addr + * with a recvmsg() hook. + */ + + serv = start_server(test->socket_family, test->socket_type, + test->expected_addr, test->expected_port, 0); + if (!ASSERT_GE(serv, 0, "start_server")) + goto cleanup; + + client = socket(test->socket_family, test->socket_type, 0); + if (!ASSERT_GE(client, 0, "socket")) + goto cleanup; + + /* AF_UNIX sockets have to be bound to something to trigger the recvmsg bpf program. */ + if (test->socket_family == AF_UNIX) { + err = make_sockaddr(AF_UNIX, SRCUN_ADDRESS, 0, &src_addr, &src_addr_len); + if (!ASSERT_EQ(err, 0, "make_sockaddr")) + goto cleanup; + + err = bind(client, (const struct sockaddr *) &src_addr, src_addr_len); + if (!ASSERT_OK(err, "bind")) + goto cleanup; + } + + err = make_sockaddr(test->socket_family, test->requested_addr, test->requested_port, + &addr, &addr_len); + if (!ASSERT_EQ(err, 0, "make_sockaddr")) + goto cleanup; + + if (test->socket_type == SOCK_DGRAM) { + memset(&iov, 0, sizeof(iov)); + iov.iov_base = &data; + iov.iov_len = sizeof(data); + + memset(&hdr, 0, sizeof(hdr)); + hdr.msg_name = (void *)&addr; + hdr.msg_namelen = addr_len; + hdr.msg_iov = &iov; + hdr.msg_iovlen = 1; + + err = sendmsg(client, &hdr, 0); + if (!ASSERT_EQ(err, sizeof(data), "sendmsg")) + goto cleanup; + } else { + /* Testing with connection-oriented sockets is only valid for + * recvmsg() tests. + */ + if (!ASSERT_EQ(test->type, SOCK_ADDR_TEST_RECVMSG, "recvmsg")) + goto cleanup; + + err = connect(client, (const struct sockaddr *)&addr, addr_len); + if (!ASSERT_OK(err, "connect")) + goto cleanup; + + err = send(client, &data, sizeof(data), 0); + if (!ASSERT_EQ(err, sizeof(data), "send")) + goto cleanup; + + err = listen(serv, 0); + if (!ASSERT_OK(err, "listen")) + goto cleanup; + + err = accept(serv, NULL, NULL); + if (!ASSERT_GE(err, 0, "accept")) + goto cleanup; + + close(serv); + serv = err; + } + + addr_len = src_addr_len = sizeof(struct sockaddr_storage); + + err = recvfrom(serv, &data, sizeof(data), 0, (struct sockaddr *) &src_addr, &src_addr_len); + if (!ASSERT_EQ(err, sizeof(data), "recvfrom")) + goto cleanup; + + ASSERT_EQ(data, 'a', "data mismatch"); + + if (test->expected_src_addr) { + err = make_sockaddr(test->socket_family, test->expected_src_addr, 0, + &addr, &addr_len); + if (!ASSERT_EQ(err, 0, "make_sockaddr")) + goto cleanup; + + err = cmp_addr(&src_addr, src_addr_len, &addr, addr_len, false); + if (!ASSERT_EQ(err, 0, "cmp_addr")) + goto cleanup; + } + +cleanup: + if (client != -1) + close(client); + if (serv != -1) + close(serv); +} + +static void test_getsockname(struct sock_addr_test *test) +{ + struct sockaddr_storage expected_addr; + socklen_t expected_addr_len = sizeof(struct sockaddr_storage); + int serv = -1, err; + + serv = start_server(test->socket_family, test->socket_type, + test->requested_addr, test->requested_port, 0); + if (!ASSERT_GE(serv, 0, "start_server")) + goto cleanup; + + err = make_sockaddr(test->socket_family, + test->expected_addr, test->expected_port, + &expected_addr, &expected_addr_len); + if (!ASSERT_EQ(err, 0, "make_sockaddr")) + goto cleanup; + + err = cmp_local_addr(serv, &expected_addr, expected_addr_len, true); + if (!ASSERT_EQ(err, 0, "cmp_local_addr")) + goto cleanup; + +cleanup: + if (serv != -1) + close(serv); +} + +static void test_getpeername(struct sock_addr_test *test) +{ + struct sockaddr_storage addr, expected_addr; + socklen_t addr_len = sizeof(struct sockaddr_storage), + expected_addr_len = sizeof(struct sockaddr_storage); + int serv = -1, client = -1, err; + + serv = start_server(test->socket_family, test->socket_type, + test->requested_addr, test->requested_port, 0); + if (!ASSERT_GE(serv, 0, "start_server")) + goto cleanup; + + err = make_sockaddr(test->socket_family, test->requested_addr, test->requested_port, + &addr, &addr_len); + if (!ASSERT_EQ(err, 0, "make_sockaddr")) + goto cleanup; + + client = connect_to_addr(&addr, addr_len, test->socket_type); + if (!ASSERT_GE(client, 0, "connect_to_addr")) + goto cleanup; + + err = make_sockaddr(test->socket_family, test->expected_addr, test->expected_port, + &expected_addr, &expected_addr_len); + if (!ASSERT_EQ(err, 0, "make_sockaddr")) + goto cleanup; + + err = cmp_peer_addr(client, &expected_addr, expected_addr_len, true); + if (!ASSERT_EQ(err, 0, "cmp_peer_addr")) + goto cleanup; + +cleanup: + if (client != -1) + close(client); + if (serv != -1) + close(serv); +} + +void test_sock_addr(void) +{ + int cgroup_fd = -1; + void *skel; + + cgroup_fd = test__join_cgroup("/sock_addr"); + if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup")) + goto cleanup; + + for (size_t i = 0; i < ARRAY_SIZE(tests); ++i) { + struct sock_addr_test *test = &tests[i]; + + if (!test__start_subtest(test->name)) + continue; + + skel = test->loadfn(cgroup_fd); + if (!skel) + continue; + + switch (test->type) { + /* Not exercised yet but we leave this code here for when the + * INET and INET6 sockaddr tests are migrated to this file in + * the future. + */ + case SOCK_ADDR_TEST_BIND: + test_bind(test); + break; + case SOCK_ADDR_TEST_CONNECT: + test_connect(test); + break; + case SOCK_ADDR_TEST_SENDMSG: + case SOCK_ADDR_TEST_RECVMSG: + test_xmsg(test); + break; + case SOCK_ADDR_TEST_GETSOCKNAME: + test_getsockname(test); + break; + case SOCK_ADDR_TEST_GETPEERNAME: + test_getpeername(test); + break; + default: + ASSERT_TRUE(false, "Unknown sock addr test type"); + break; + } + + test->destroyfn(skel); + } + +cleanup: + if (cgroup_fd >= 0) + close(cgroup_fd); +} diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index dda7060e86a0..f75f84d0b3d7 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -359,7 +359,7 @@ out: static void test_sockmap_skb_verdict_shutdown(void) { struct epoll_event ev, events[MAX_EVENTS]; - int n, err, map, verdict, s, c1, p1; + int n, err, map, verdict, s, c1 = -1, p1 = -1; struct test_sockmap_pass_prog *skel; int epollfd; int zero = 0; @@ -414,9 +414,9 @@ out: static void test_sockmap_skb_verdict_fionread(bool pass_prog) { int expected, zero = 0, sent, recvd, avail; - int err, map, verdict, s, c0, c1, p0, p1; - struct test_sockmap_pass_prog *pass; - struct test_sockmap_drop_prog *drop; + int err, map, verdict, s, c0 = -1, c1 = -1, p0 = -1, p1 = -1; + struct test_sockmap_pass_prog *pass = NULL; + struct test_sockmap_drop_prog *drop = NULL; char buf[256] = "0123456789"; if (pass_prog) { diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h index 36d829a65aa4..e880f97bc44d 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h @@ -378,7 +378,7 @@ static inline int enable_reuseport(int s, int progfd) static inline int socket_loopback_reuseport(int family, int sotype, int progfd) { struct sockaddr_storage addr; - socklen_t len; + socklen_t len = 0; int err, s; init_addr_loopback(family, &addr, &len); diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index 8df8cbb447f1..a934d430c20c 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -73,7 +73,7 @@ static void test_insert_bound(struct test_sockmap_listen *skel __always_unused, int family, int sotype, int mapfd) { struct sockaddr_storage addr; - socklen_t len; + socklen_t len = 0; u32 key = 0; u64 value; int err, s; @@ -871,7 +871,7 @@ static void test_msg_redir_to_listening(struct test_sockmap_listen *skel, static void redir_partial(int family, int sotype, int sock_map, int parser_map) { - int s, c0, c1, p0, p1; + int s, c0 = -1, c1 = -1, p0 = -1, p1 = -1; int err, n, key, value; char buf[] = "abc"; @@ -1336,53 +1336,59 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map, } } -static void unix_redir_to_connected(int sotype, int sock_mapfd, - int verd_mapfd, enum redir_mode mode) +static void pairs_redir_to_connected(int cli0, int peer0, int cli1, int peer1, + int sock_mapfd, int verd_mapfd, enum redir_mode mode) { const char *log_prefix = redir_mode_str(mode); - int c0, c1, p0, p1; unsigned int pass; int err, n; - int sfd[2]; u32 key; char b; zero_verdict_count(verd_mapfd); - if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd)) - return; - c0 = sfd[0], p0 = sfd[1]; - - if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd)) - goto close0; - c1 = sfd[0], p1 = sfd[1]; - - err = add_to_sockmap(sock_mapfd, p0, p1); + err = add_to_sockmap(sock_mapfd, peer0, peer1); if (err) - goto close; + return; - n = write(c1, "a", 1); + n = write(cli1, "a", 1); if (n < 0) FAIL_ERRNO("%s: write", log_prefix); if (n == 0) FAIL("%s: incomplete write", log_prefix); if (n < 1) - goto close; + return; key = SK_PASS; err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass); if (err) - goto close; + return; if (pass != 1) FAIL("%s: want pass count 1, have %d", log_prefix, pass); - n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC); + n = recv_timeout(mode == REDIR_INGRESS ? peer0 : cli0, &b, 1, 0, IO_TIMEOUT_SEC); if (n < 0) FAIL_ERRNO("%s: recv_timeout", log_prefix); if (n == 0) FAIL("%s: incomplete recv", log_prefix); +} + +static void unix_redir_to_connected(int sotype, int sock_mapfd, + int verd_mapfd, enum redir_mode mode) +{ + int c0, c1, p0, p1; + int sfd[2]; + + if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd)) + return; + c0 = sfd[0], p0 = sfd[1]; + + if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd)) + goto close0; + c1 = sfd[0], p1 = sfd[1]; + + pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, verd_mapfd, mode); -close: xclose(c1); xclose(p1); close0: @@ -1661,14 +1667,8 @@ close_peer0: static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd, enum redir_mode mode) { - const char *log_prefix = redir_mode_str(mode); int c0, c1, p0, p1; - unsigned int pass; - int err, n; - u32 key; - char b; - - zero_verdict_count(verd_mapfd); + int err; err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0); if (err) @@ -1677,32 +1677,8 @@ static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd, if (err) goto close_cli0; - err = add_to_sockmap(sock_mapfd, p0, p1); - if (err) - goto close_cli1; + pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, verd_mapfd, mode); - n = write(c1, "a", 1); - if (n < 0) - FAIL_ERRNO("%s: write", log_prefix); - if (n == 0) - FAIL("%s: incomplete write", log_prefix); - if (n < 1) - goto close_cli1; - - key = SK_PASS; - err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass); - if (err) - goto close_cli1; - if (pass != 1) - FAIL("%s: want pass count 1, have %d", log_prefix, pass); - - n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC); - if (n < 0) - FAIL_ERRNO("%s: recv_timeout", log_prefix); - if (n == 0) - FAIL("%s: incomplete recv", log_prefix); - -close_cli1: xclose(c1); xclose(p1); close_cli0: @@ -1747,15 +1723,9 @@ static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd, int verd_mapfd, enum redir_mode mode) { - const char *log_prefix = redir_mode_str(mode); int c0, c1, p0, p1; - unsigned int pass; - int err, n; int sfd[2]; - u32 key; - char b; - - zero_verdict_count(verd_mapfd); + int err; if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd)) return; @@ -1765,32 +1735,8 @@ static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd, if (err) goto close; - err = add_to_sockmap(sock_mapfd, p0, p1); - if (err) - goto close_cli1; - - n = write(c1, "a", 1); - if (n < 0) - FAIL_ERRNO("%s: write", log_prefix); - if (n == 0) - FAIL("%s: incomplete write", log_prefix); - if (n < 1) - goto close_cli1; - - key = SK_PASS; - err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass); - if (err) - goto close_cli1; - if (pass != 1) - FAIL("%s: want pass count 1, have %d", log_prefix, pass); - - n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC); - if (n < 0) - FAIL_ERRNO("%s: recv_timeout", log_prefix); - if (n == 0) - FAIL("%s: incomplete recv", log_prefix); + pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, verd_mapfd, mode); -close_cli1: xclose(c1); xclose(p1); close: @@ -1827,15 +1773,9 @@ static void inet_unix_skb_redir_to_connected(struct test_sockmap_listen *skel, static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd, int verd_mapfd, enum redir_mode mode) { - const char *log_prefix = redir_mode_str(mode); int c0, c1, p0, p1; - unsigned int pass; - int err, n; int sfd[2]; - u32 key; - char b; - - zero_verdict_count(verd_mapfd); + int err; err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0); if (err) @@ -1845,32 +1785,8 @@ static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd, goto close_cli0; c1 = sfd[0], p1 = sfd[1]; - err = add_to_sockmap(sock_mapfd, p0, p1); - if (err) - goto close; - - n = write(c1, "a", 1); - if (n < 0) - FAIL_ERRNO("%s: write", log_prefix); - if (n == 0) - FAIL("%s: incomplete write", log_prefix); - if (n < 1) - goto close; - - key = SK_PASS; - err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass); - if (err) - goto close; - if (pass != 1) - FAIL("%s: want pass count 1, have %d", log_prefix, pass); - - n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC); - if (n < 0) - FAIL_ERRNO("%s: recv_timeout", log_prefix); - if (n == 0) - FAIL("%s: incomplete recv", log_prefix); + pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, verd_mapfd, mode); -close: xclose(c1); xclose(p1); close_cli0: diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c index ce2c61d62fc6..760ad96b4be0 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer.c +++ b/tools/testing/selftests/bpf/prog_tests/timer.c @@ -15,6 +15,7 @@ static int timer(struct timer *timer_skel) ASSERT_EQ(timer_skel->data->callback_check, 52, "callback_check1"); ASSERT_EQ(timer_skel->data->callback2_check, 52, "callback2_check1"); + ASSERT_EQ(timer_skel->bss->pinned_callback_check, 0, "pinned_callback_check1"); prog_fd = bpf_program__fd(timer_skel->progs.test1); err = bpf_prog_test_run_opts(prog_fd, &topts); @@ -33,6 +34,9 @@ static int timer(struct timer *timer_skel) /* check that timer_cb3() was executed twice */ ASSERT_EQ(timer_skel->bss->abs_data, 12, "abs_data"); + /* check that timer_cb_pinned() was executed twice */ + ASSERT_EQ(timer_skel->bss->pinned_callback_check, 2, "pinned_callback_check"); + /* check that there were no errors in timer execution */ ASSERT_EQ(timer_skel->bss->err, 0, "err"); diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe.c b/tools/testing/selftests/bpf/prog_tests/uprobe.c new file mode 100644 index 000000000000..cf3e0e7a64fa --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/uprobe.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Hengqi Chen */ + +#include <test_progs.h> +#include "test_uprobe.skel.h" + +static FILE *urand_spawn(int *pid) +{ + FILE *f; + + /* urandom_read's stdout is wired into f */ + f = popen("./urandom_read 1 report-pid", "r"); + if (!f) + return NULL; + + if (fscanf(f, "%d", pid) != 1) { + pclose(f); + errno = EINVAL; + return NULL; + } + + return f; +} + +static int urand_trigger(FILE **urand_pipe) +{ + int exit_code; + + /* pclose() waits for child process to exit and returns their exit code */ + exit_code = pclose(*urand_pipe); + *urand_pipe = NULL; + + return exit_code; +} + +void test_uprobe(void) +{ + LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts); + struct test_uprobe *skel; + FILE *urand_pipe = NULL; + int urand_pid = 0, err; + + skel = test_uprobe__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + urand_pipe = urand_spawn(&urand_pid); + if (!ASSERT_OK_PTR(urand_pipe, "urand_spawn")) + goto cleanup; + + skel->bss->my_pid = urand_pid; + + /* Manual attach uprobe to urandlib_api + * There are two `urandlib_api` symbols in .dynsym section: + * - urandlib_api@LIBURANDOM_READ_1.0.0 + * - urandlib_api@@LIBURANDOM_READ_2.0.0 + * Both are global bind and would cause a conflict if user + * specify the symbol name without a version suffix + */ + uprobe_opts.func_name = "urandlib_api"; + skel->links.test4 = bpf_program__attach_uprobe_opts(skel->progs.test4, + urand_pid, + "./liburandom_read.so", + 0 /* offset */, + &uprobe_opts); + if (!ASSERT_ERR_PTR(skel->links.test4, "urandlib_api_attach_conflict")) + goto cleanup; + + uprobe_opts.func_name = "urandlib_api@LIBURANDOM_READ_1.0.0"; + skel->links.test4 = bpf_program__attach_uprobe_opts(skel->progs.test4, + urand_pid, + "./liburandom_read.so", + 0 /* offset */, + &uprobe_opts); + if (!ASSERT_OK_PTR(skel->links.test4, "urandlib_api_attach_ok")) + goto cleanup; + + /* Auto attach 3 u[ret]probes to urandlib_api_sameoffset */ + err = test_uprobe__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto cleanup; + + /* trigger urandom_read */ + ASSERT_OK(urand_trigger(&urand_pipe), "urand_exit_code"); + + ASSERT_EQ(skel->bss->test1_result, 1, "urandlib_api_sameoffset"); + ASSERT_EQ(skel->bss->test2_result, 1, "urandlib_api_sameoffset@v1"); + ASSERT_EQ(skel->bss->test3_result, 3, "urandlib_api_sameoffset@@v2"); + ASSERT_EQ(skel->bss->test4_result, 1, "urandlib_api"); + +cleanup: + if (urand_pipe) + pclose(urand_pipe); + test_uprobe__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c index 626c461fa34d..4439ba9392f8 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c @@ -226,7 +226,7 @@ static int verify_xsk_metadata(struct xsk *xsk) __u64 comp_addr; void *data; __u64 addr; - __u32 idx; + __u32 idx = 0; int ret; ret = recvfrom(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, NULL); diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c index dd923dc637d5..dd923dc637d5 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index 38a57a2e70db..799fff4995d8 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -99,6 +99,9 @@ #elif defined(__TARGET_ARCH_arm64) #define SYSCALL_WRAPPER 1 #define SYS_PREFIX "__arm64_" +#elif defined(__TARGET_ARCH_riscv) +#define SYSCALL_WRAPPER 1 +#define SYS_PREFIX "__riscv_" #else #define SYSCALL_WRAPPER 0 #define SYS_PREFIX "__se_" diff --git a/tools/testing/selftests/bpf/progs/connect_unix_prog.c b/tools/testing/selftests/bpf/progs/connect_unix_prog.c new file mode 100644 index 000000000000..ca8aa2f116b3 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/connect_unix_prog.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" + +#include <string.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include "bpf_kfuncs.h" + +__u8 SERVUN_REWRITE_ADDRESS[] = "\0bpf_cgroup_unix_test_rewrite"; + +SEC("cgroup/connect_unix") +int connect_unix_prog(struct bpf_sock_addr *ctx) +{ + struct bpf_sock_addr_kern *sa_kern = bpf_cast_to_kern_ctx(ctx); + struct sockaddr_un *sa_kern_unaddr; + __u32 unaddrlen = offsetof(struct sockaddr_un, sun_path) + + sizeof(SERVUN_REWRITE_ADDRESS) - 1; + int ret; + + /* Rewrite destination. */ + ret = bpf_sock_addr_set_sun_path(sa_kern, SERVUN_REWRITE_ADDRESS, + sizeof(SERVUN_REWRITE_ADDRESS) - 1); + if (ret) + return 0; + + if (sa_kern->uaddrlen != unaddrlen) + return 0; + + sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr, + bpf_core_type_id_kernel(struct sockaddr_un)); + if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS, + sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0) + return 0; + + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/exceptions_assert.c b/tools/testing/selftests/bpf/progs/exceptions_assert.c index fa35832e6748..e1e5c54a6a11 100644 --- a/tools/testing/selftests/bpf/progs/exceptions_assert.c +++ b/tools/testing/selftests/bpf/progs/exceptions_assert.c @@ -31,35 +31,35 @@ check_assert(s64, eq, llong_max, LLONG_MAX); __msg(": R0_w=scalar(smax=2147483646) R10=fp0") check_assert(s64, lt, pos, INT_MAX); -__msg(": R0_w=scalar(umin=9223372036854775808,var_off=(0x8000000000000000; 0x7fffffffffffffff))") +__msg(": R0_w=scalar(smax=-1,umin=9223372036854775808,var_off=(0x8000000000000000; 0x7fffffffffffffff))") check_assert(s64, lt, zero, 0); -__msg(": R0_w=scalar(umin=9223372036854775808,umax=18446744071562067967,var_off=(0x8000000000000000; 0x7fffffffffffffff))") +__msg(": R0_w=scalar(smax=-2147483649,umin=9223372036854775808,umax=18446744071562067967,var_off=(0x8000000000000000; 0x7fffffffffffffff))") check_assert(s64, lt, neg, INT_MIN); __msg(": R0_w=scalar(smax=2147483647) R10=fp0") check_assert(s64, le, pos, INT_MAX); __msg(": R0_w=scalar(smax=0) R10=fp0") check_assert(s64, le, zero, 0); -__msg(": R0_w=scalar(umin=9223372036854775808,umax=18446744071562067968,var_off=(0x8000000000000000; 0x7fffffffffffffff))") +__msg(": R0_w=scalar(smax=-2147483648,umin=9223372036854775808,umax=18446744071562067968,var_off=(0x8000000000000000; 0x7fffffffffffffff))") check_assert(s64, le, neg, INT_MIN); -__msg(": R0_w=scalar(umin=2147483648,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))") +__msg(": R0_w=scalar(smin=umin=2147483648,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))") check_assert(s64, gt, pos, INT_MAX); -__msg(": R0_w=scalar(umin=1,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))") +__msg(": R0_w=scalar(smin=umin=1,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))") check_assert(s64, gt, zero, 0); __msg(": R0_w=scalar(smin=-2147483647) R10=fp0") check_assert(s64, gt, neg, INT_MIN); -__msg(": R0_w=scalar(umin=2147483647,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))") +__msg(": R0_w=scalar(smin=umin=2147483647,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))") check_assert(s64, ge, pos, INT_MAX); -__msg(": R0_w=scalar(umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff)) R10=fp0") +__msg(": R0_w=scalar(smin=0,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff)) R10=fp0") check_assert(s64, ge, zero, 0); __msg(": R0_w=scalar(smin=-2147483648) R10=fp0") check_assert(s64, ge, neg, INT_MIN); SEC("?tc") __log_level(2) __failure -__msg(": R0=0 R1=ctx(off=0,imm=0) R2=scalar(smin=-2147483646,smax=2147483645) R10=fp0") +__msg(": R0=0 R1=ctx(off=0,imm=0) R2=scalar(smin=smin32=-2147483646,smax=smax32=2147483645) R10=fp0") int check_assert_range_s64(struct __sk_buff *ctx) { struct bpf_sock *sk = ctx->sk; @@ -75,7 +75,7 @@ int check_assert_range_s64(struct __sk_buff *ctx) SEC("?tc") __log_level(2) __failure -__msg(": R1=ctx(off=0,imm=0) R2=scalar(umin=4096,umax=8192,var_off=(0x0; 0x3fff))") +__msg(": R1=ctx(off=0,imm=0) R2=scalar(smin=umin=smin32=umin32=4096,smax=umax=smax32=umax32=8192,var_off=(0x0; 0x3fff))") int check_assert_range_u64(struct __sk_buff *ctx) { u64 num = ctx->len; diff --git a/tools/testing/selftests/bpf/progs/getpeername_unix_prog.c b/tools/testing/selftests/bpf/progs/getpeername_unix_prog.c new file mode 100644 index 000000000000..9c078f34bbb2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/getpeername_unix_prog.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" + +#include <string.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include "bpf_kfuncs.h" + +__u8 SERVUN_REWRITE_ADDRESS[] = "\0bpf_cgroup_unix_test_rewrite"; + +SEC("cgroup/getpeername_unix") +int getpeername_unix_prog(struct bpf_sock_addr *ctx) +{ + struct bpf_sock_addr_kern *sa_kern = bpf_cast_to_kern_ctx(ctx); + struct sockaddr_un *sa_kern_unaddr; + __u32 unaddrlen = offsetof(struct sockaddr_un, sun_path) + + sizeof(SERVUN_REWRITE_ADDRESS) - 1; + int ret; + + ret = bpf_sock_addr_set_sun_path(sa_kern, SERVUN_REWRITE_ADDRESS, + sizeof(SERVUN_REWRITE_ADDRESS) - 1); + if (ret) + return 1; + + if (sa_kern->uaddrlen != unaddrlen) + return 1; + + sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr, + bpf_core_type_id_kernel(struct sockaddr_un)); + if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS, + sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0) + return 1; + + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/getsockname_unix_prog.c b/tools/testing/selftests/bpf/progs/getsockname_unix_prog.c new file mode 100644 index 000000000000..ac7145111497 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/getsockname_unix_prog.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" + +#include <string.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include "bpf_kfuncs.h" + +__u8 SERVUN_REWRITE_ADDRESS[] = "\0bpf_cgroup_unix_test_rewrite"; + +SEC("cgroup/getsockname_unix") +int getsockname_unix_prog(struct bpf_sock_addr *ctx) +{ + struct bpf_sock_addr_kern *sa_kern = bpf_cast_to_kern_ctx(ctx); + struct sockaddr_un *sa_kern_unaddr; + __u32 unaddrlen = offsetof(struct sockaddr_un, sun_path) + + sizeof(SERVUN_REWRITE_ADDRESS) - 1; + int ret; + + ret = bpf_sock_addr_set_sun_path(sa_kern, SERVUN_REWRITE_ADDRESS, + sizeof(SERVUN_REWRITE_ADDRESS) - 1); + if (ret) + return 1; + + if (sa_kern->uaddrlen != unaddrlen) + return 1; + + sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr, + bpf_core_type_id_kernel(struct sockaddr_un)); + if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS, + sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0) + return 1; + + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/iters_task_vma.c b/tools/testing/selftests/bpf/progs/iters_task_vma.c new file mode 100644 index 000000000000..44edecfdfaee --- /dev/null +++ b/tools/testing/selftests/bpf/progs/iters_task_vma.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include "bpf_experimental.h" +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +pid_t target_pid = 0; +unsigned int vmas_seen = 0; + +struct { + __u64 vm_start; + __u64 vm_end; +} vm_ranges[1000]; + +SEC("raw_tp/sys_enter") +int iter_task_vma_for_each(const void *ctx) +{ + struct task_struct *task = bpf_get_current_task_btf(); + struct vm_area_struct *vma; + unsigned int seen = 0; + + if (task->pid != target_pid) + return 0; + + if (vmas_seen) + return 0; + + bpf_for_each(task_vma, vma, task, 0) { + if (seen >= 1000) + break; + + vm_ranges[seen].vm_start = vma->vm_start; + vm_ranges[seen].vm_end = vma->vm_end; + seen++; + } + + vmas_seen = seen; + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/missed_kprobe.c b/tools/testing/selftests/bpf/progs/missed_kprobe.c new file mode 100644 index 000000000000..7f9ef701f5de --- /dev/null +++ b/tools/testing/selftests/bpf/progs/missed_kprobe.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "../bpf_testmod/bpf_testmod_kfunc.h" + +char _license[] SEC("license") = "GPL"; + +/* + * No tests in here, just to trigger 'bpf_fentry_test*' + * through tracing test_run + */ +SEC("fentry/bpf_modify_return_test") +int BPF_PROG(trigger) +{ + return 0; +} + +SEC("kprobe/bpf_fentry_test1") +int test1(struct pt_regs *ctx) +{ + bpf_kfunc_common_test(); + return 0; +} + +SEC("kprobe/bpf_kfunc_common_test") +int test2(struct pt_regs *ctx) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/missed_kprobe_recursion.c b/tools/testing/selftests/bpf/progs/missed_kprobe_recursion.c new file mode 100644 index 000000000000..8ea71cbd6c45 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/missed_kprobe_recursion.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "../bpf_testmod/bpf_testmod_kfunc.h" + +char _license[] SEC("license") = "GPL"; + +/* + * No tests in here, just to trigger 'bpf_fentry_test*' + * through tracing test_run + */ +SEC("fentry/bpf_modify_return_test") +int BPF_PROG(trigger) +{ + return 0; +} + +SEC("kprobe.multi/bpf_fentry_test1") +int test1(struct pt_regs *ctx) +{ + bpf_kfunc_common_test(); + return 0; +} + +SEC("kprobe/bpf_kfunc_common_test") +int test2(struct pt_regs *ctx) +{ + return 0; +} + +SEC("kprobe/bpf_kfunc_common_test") +int test3(struct pt_regs *ctx) +{ + return 0; +} + +SEC("kprobe/bpf_kfunc_common_test") +int test4(struct pt_regs *ctx) +{ + return 0; +} + +SEC("kprobe.multi/bpf_kfunc_common_test") +int test5(struct pt_regs *ctx) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/missed_tp_recursion.c b/tools/testing/selftests/bpf/progs/missed_tp_recursion.c new file mode 100644 index 000000000000..762385f827c5 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/missed_tp_recursion.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +/* + * No tests in here, just to trigger 'bpf_fentry_test*' + * through tracing test_run + */ +SEC("fentry/bpf_modify_return_test") +int BPF_PROG(trigger) +{ + return 0; +} + +SEC("kprobe/bpf_fentry_test1") +int test1(struct pt_regs *ctx) +{ + bpf_printk("test"); + return 0; +} + +SEC("tp/bpf_trace/bpf_trace_printk") +int test2(struct pt_regs *ctx) +{ + return 0; +} + +SEC("tp/bpf_trace/bpf_trace_printk") +int test3(struct pt_regs *ctx) +{ + return 0; +} + +SEC("tp/bpf_trace/bpf_trace_printk") +int test4(struct pt_regs *ctx) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/percpu_alloc_array.c b/tools/testing/selftests/bpf/progs/percpu_alloc_array.c index bbc45346e006..37c2d2608ec0 100644 --- a/tools/testing/selftests/bpf/progs/percpu_alloc_array.c +++ b/tools/testing/selftests/bpf/progs/percpu_alloc_array.c @@ -71,6 +71,7 @@ int BPF_PROG(test_array_map_2) } int cpu0_field_d, sum_field_c; +int my_pid; /* Summarize percpu data */ SEC("?fentry/bpf_fentry_test3") @@ -81,6 +82,9 @@ int BPF_PROG(test_array_map_3) struct val_t *v; struct elem *e; + if ((bpf_get_current_pid_tgid() >> 32) != my_pid) + return 0; + e = bpf_map_lookup_elem(&array, &index); if (!e) return 0; @@ -130,6 +134,9 @@ int BPF_PROG(test_array_map_10) struct val_t *v; struct elem *e; + if ((bpf_get_current_pid_tgid() >> 32) != my_pid) + return 0; + e = bpf_map_lookup_elem(&array, &index); if (!e) return 0; diff --git a/tools/testing/selftests/bpf/progs/percpu_alloc_cgrp_local_storage.c b/tools/testing/selftests/bpf/progs/percpu_alloc_cgrp_local_storage.c index 1c36a241852c..a2acf9aa6c24 100644 --- a/tools/testing/selftests/bpf/progs/percpu_alloc_cgrp_local_storage.c +++ b/tools/testing/selftests/bpf/progs/percpu_alloc_cgrp_local_storage.c @@ -70,6 +70,7 @@ int BPF_PROG(test_cgrp_local_storage_2) } int cpu0_field_d, sum_field_c; +int my_pid; /* Summarize percpu data collection */ SEC("fentry/bpf_fentry_test3") @@ -81,6 +82,9 @@ int BPF_PROG(test_cgrp_local_storage_3) struct elem *e; int i; + if ((bpf_get_current_pid_tgid() >> 32) != my_pid) + return 0; + task = bpf_get_current_task_btf(); e = bpf_cgrp_storage_get(&cgrp, task->cgroups->dfl_cgrp, 0, 0); if (!e) diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h index f799d87e8700..897061930cb7 100644 --- a/tools/testing/selftests/bpf/progs/profiler.inc.h +++ b/tools/testing/selftests/bpf/progs/profiler.inc.h @@ -609,7 +609,7 @@ out: } SEC("tracepoint/syscalls/sys_enter_kill") -int tracepoint__syscalls__sys_enter_kill(struct trace_event_raw_sys_enter* ctx) +int tracepoint__syscalls__sys_enter_kill(struct syscall_trace_enter* ctx) { struct bpf_func_stats_ctx stats_ctx; diff --git a/tools/testing/selftests/bpf/progs/recvmsg_unix_prog.c b/tools/testing/selftests/bpf/progs/recvmsg_unix_prog.c new file mode 100644 index 000000000000..4dfbc8552558 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/recvmsg_unix_prog.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" + +#include <string.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include "bpf_kfuncs.h" + +__u8 SERVUN_ADDRESS[] = "\0bpf_cgroup_unix_test"; + +SEC("cgroup/recvmsg_unix") +int recvmsg_unix_prog(struct bpf_sock_addr *ctx) +{ + struct bpf_sock_addr_kern *sa_kern = bpf_cast_to_kern_ctx(ctx); + struct sockaddr_un *sa_kern_unaddr; + __u32 unaddrlen = offsetof(struct sockaddr_un, sun_path) + + sizeof(SERVUN_ADDRESS) - 1; + int ret; + + ret = bpf_sock_addr_set_sun_path(sa_kern, SERVUN_ADDRESS, + sizeof(SERVUN_ADDRESS) - 1); + if (ret) + return 1; + + if (sa_kern->uaddrlen != unaddrlen) + return 1; + + sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr, + bpf_core_type_id_kernel(struct sockaddr_un)); + if (memcmp(sa_kern_unaddr->sun_path, SERVUN_ADDRESS, + sizeof(SERVUN_ADDRESS) - 1) != 0) + return 1; + + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c b/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c new file mode 100644 index 000000000000..1f67e832666e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" + +#include <string.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include "bpf_kfuncs.h" + +__u8 SERVUN_REWRITE_ADDRESS[] = "\0bpf_cgroup_unix_test_rewrite"; + +SEC("cgroup/sendmsg_unix") +int sendmsg_unix_prog(struct bpf_sock_addr *ctx) +{ + struct bpf_sock_addr_kern *sa_kern = bpf_cast_to_kern_ctx(ctx); + struct sockaddr_un *sa_kern_unaddr; + __u32 unaddrlen = offsetof(struct sockaddr_un, sun_path) + + sizeof(SERVUN_REWRITE_ADDRESS) - 1; + int ret; + + /* Rewrite destination. */ + ret = bpf_sock_addr_set_sun_path(sa_kern, SERVUN_REWRITE_ADDRESS, + sizeof(SERVUN_REWRITE_ADDRESS) - 1); + if (ret) + return 0; + + if (sa_kern->uaddrlen != unaddrlen) + return 0; + + sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr, + bpf_core_type_id_kernel(struct sockaddr_un)); + if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS, + sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0) + return 0; + + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_ldsx_insn.c b/tools/testing/selftests/bpf/progs/test_ldsx_insn.c index 67c14ba1e87b..3ddcb3777912 100644 --- a/tools/testing/selftests/bpf/progs/test_ldsx_insn.c +++ b/tools/testing/selftests/bpf/progs/test_ldsx_insn.c @@ -6,7 +6,8 @@ #include <bpf/bpf_tracing.h> #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ - (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18 + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ + defined(__TARGET_ARCH_s390)) && __clang_major__ >= 18 const volatile int skip = 0; #else const volatile int skip = 1; @@ -104,7 +105,11 @@ int _tc(volatile struct __sk_buff *skb) "%[tmp_mark] = r1" : [tmp_mark]"=r"(tmp_mark) : [ctx]"r"(skb), - [off_mark]"i"(offsetof(struct __sk_buff, mark)) + [off_mark]"i"(offsetof(struct __sk_buff, mark) +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + + sizeof(skb->mark) - 1 +#endif + ) : "r1"); #else tmp_mark = (char)skb->mark; diff --git a/tools/testing/selftests/bpf/progs/test_uprobe.c b/tools/testing/selftests/bpf/progs/test_uprobe.c new file mode 100644 index 000000000000..896c88a4960d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_uprobe.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Hengqi Chen */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +pid_t my_pid = 0; + +int test1_result = 0; +int test2_result = 0; +int test3_result = 0; +int test4_result = 0; + +SEC("uprobe/./liburandom_read.so:urandlib_api_sameoffset") +int BPF_UPROBE(test1) +{ + pid_t pid = bpf_get_current_pid_tgid() >> 32; + + if (pid != my_pid) + return 0; + + test1_result = 1; + return 0; +} + +SEC("uprobe/./liburandom_read.so:urandlib_api_sameoffset@LIBURANDOM_READ_1.0.0") +int BPF_UPROBE(test2) +{ + pid_t pid = bpf_get_current_pid_tgid() >> 32; + + if (pid != my_pid) + return 0; + + test2_result = 1; + return 0; +} + +SEC("uretprobe/./liburandom_read.so:urandlib_api_sameoffset@@LIBURANDOM_READ_2.0.0") +int BPF_URETPROBE(test3, int ret) +{ + pid_t pid = bpf_get_current_pid_tgid() >> 32; + + if (pid != my_pid) + return 0; + + test3_result = ret; + return 0; +} + +SEC("uprobe") +int BPF_UPROBE(test4) +{ + pid_t pid = bpf_get_current_pid_tgid() >> 32; + + if (pid != my_pid) + return 0; + + test4_result = 1; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_vmlinux.c b/tools/testing/selftests/bpf/progs/test_vmlinux.c index 4b8e37f7fd06..78b23934d9f8 100644 --- a/tools/testing/selftests/bpf/progs/test_vmlinux.c +++ b/tools/testing/selftests/bpf/progs/test_vmlinux.c @@ -16,12 +16,12 @@ bool kprobe_called = false; bool fentry_called = false; SEC("tp/syscalls/sys_enter_nanosleep") -int handle__tp(struct trace_event_raw_sys_enter *args) +int handle__tp(struct syscall_trace_enter *args) { struct __kernel_timespec *ts; long tv_nsec; - if (args->id != __NR_nanosleep) + if (args->nr != __NR_nanosleep) return 0; ts = (void *)args->args[0]; diff --git a/tools/testing/selftests/bpf/progs/timer.c b/tools/testing/selftests/bpf/progs/timer.c index 9a16d95213e1..8b946c8188c6 100644 --- a/tools/testing/selftests/bpf/progs/timer.c +++ b/tools/testing/selftests/bpf/progs/timer.c @@ -51,7 +51,7 @@ struct { __uint(max_entries, 1); __type(key, int); __type(value, struct elem); -} abs_timer SEC(".maps"); +} abs_timer SEC(".maps"), soft_timer_pinned SEC(".maps"), abs_timer_pinned SEC(".maps"); __u64 bss_data; __u64 abs_data; @@ -59,6 +59,8 @@ __u64 err; __u64 ok; __u64 callback_check = 52; __u64 callback2_check = 52; +__u64 pinned_callback_check; +__s32 pinned_cpu; #define ARRAY 1 #define HTAB 2 @@ -329,3 +331,62 @@ int BPF_PROG2(test3, int, a) return 0; } + +/* callback for pinned timer */ +static int timer_cb_pinned(void *map, int *key, struct bpf_timer *timer) +{ + __s32 cpu = bpf_get_smp_processor_id(); + + if (cpu != pinned_cpu) + err |= 16384; + + pinned_callback_check++; + return 0; +} + +static void test_pinned_timer(bool soft) +{ + int key = 0; + void *map; + struct bpf_timer *timer; + __u64 flags = BPF_F_TIMER_CPU_PIN; + __u64 start_time; + + if (soft) { + map = &soft_timer_pinned; + start_time = 0; + } else { + map = &abs_timer_pinned; + start_time = bpf_ktime_get_boot_ns(); + flags |= BPF_F_TIMER_ABS; + } + + timer = bpf_map_lookup_elem(map, &key); + if (timer) { + if (bpf_timer_init(timer, map, CLOCK_BOOTTIME) != 0) + err |= 4096; + bpf_timer_set_callback(timer, timer_cb_pinned); + pinned_cpu = bpf_get_smp_processor_id(); + bpf_timer_start(timer, start_time + 1000, flags); + } else { + err |= 8192; + } +} + +SEC("fentry/bpf_fentry_test4") +int BPF_PROG2(test4, int, a) +{ + bpf_printk("test4"); + test_pinned_timer(true); + + return 0; +} + +SEC("fentry/bpf_fentry_test5") +int BPF_PROG2(test5, int, a) +{ + bpf_printk("test5"); + test_pinned_timer(false); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/verifier_bswap.c b/tools/testing/selftests/bpf/progs/verifier_bswap.c index 5d54f8eae6a1..107525fb4a6a 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bswap.c +++ b/tools/testing/selftests/bpf/progs/verifier_bswap.c @@ -5,7 +5,8 @@ #include "bpf_misc.h" #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ - (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || defined(__TARGET_ARCH_arm)) && \ + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ + defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \ __clang_major__ >= 18 SEC("socket") diff --git a/tools/testing/selftests/bpf/progs/verifier_gotol.c b/tools/testing/selftests/bpf/progs/verifier_gotol.c index aa54ecd5829e..9f202eda952f 100644 --- a/tools/testing/selftests/bpf/progs/verifier_gotol.c +++ b/tools/testing/selftests/bpf/progs/verifier_gotol.c @@ -5,7 +5,8 @@ #include "bpf_misc.h" #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ - (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || defined(__TARGET_ARCH_arm)) && \ + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ + defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \ __clang_major__ >= 18 SEC("socket") diff --git a/tools/testing/selftests/bpf/progs/verifier_ldsx.c b/tools/testing/selftests/bpf/progs/verifier_ldsx.c index 1e1bc379c44f..375525329637 100644 --- a/tools/testing/selftests/bpf/progs/verifier_ldsx.c +++ b/tools/testing/selftests/bpf/progs/verifier_ldsx.c @@ -5,7 +5,8 @@ #include "bpf_misc.h" #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ - (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || defined(__TARGET_ARCH_arm)) && \ + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ + defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \ __clang_major__ >= 18 SEC("socket") @@ -13,12 +14,16 @@ __description("LDSX, S8") __success __success_unpriv __retval(-2) __naked void ldsx_s8(void) { - asm volatile (" \ - r1 = 0x3fe; \ - *(u64 *)(r10 - 8) = r1; \ - r0 = *(s8 *)(r10 - 8); \ - exit; \ -" ::: __clobber_all); + asm volatile ( + "r1 = 0x3fe;" + "*(u64 *)(r10 - 8) = r1;" +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + "r0 = *(s8 *)(r10 - 8);" +#else + "r0 = *(s8 *)(r10 - 1);" +#endif + "exit;" + ::: __clobber_all); } SEC("socket") @@ -26,12 +31,16 @@ __description("LDSX, S16") __success __success_unpriv __retval(-2) __naked void ldsx_s16(void) { - asm volatile (" \ - r1 = 0x3fffe; \ - *(u64 *)(r10 - 8) = r1; \ - r0 = *(s16 *)(r10 - 8); \ - exit; \ -" ::: __clobber_all); + asm volatile ( + "r1 = 0x3fffe;" + "*(u64 *)(r10 - 8) = r1;" +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + "r0 = *(s16 *)(r10 - 8);" +#else + "r0 = *(s16 *)(r10 - 2);" +#endif + "exit;" + ::: __clobber_all); } SEC("socket") @@ -39,35 +48,43 @@ __description("LDSX, S32") __success __success_unpriv __retval(-1) __naked void ldsx_s32(void) { - asm volatile (" \ - r1 = 0xfffffffe; \ - *(u64 *)(r10 - 8) = r1; \ - r0 = *(s32 *)(r10 - 8); \ - r0 >>= 1; \ - exit; \ -" ::: __clobber_all); + asm volatile ( + "r1 = 0xfffffffe;" + "*(u64 *)(r10 - 8) = r1;" +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + "r0 = *(s32 *)(r10 - 8);" +#else + "r0 = *(s32 *)(r10 - 4);" +#endif + "r0 >>= 1;" + "exit;" + ::: __clobber_all); } SEC("socket") __description("LDSX, S8 range checking, privileged") __log_level(2) __success __retval(1) -__msg("R1_w=scalar(smin=-128,smax=127)") +__msg("R1_w=scalar(smin=smin32=-128,smax=smax32=127)") __naked void ldsx_s8_range_priv(void) { - asm volatile (" \ - call %[bpf_get_prandom_u32]; \ - *(u64 *)(r10 - 8) = r0; \ - r1 = *(s8 *)(r10 - 8); \ - /* r1 with s8 range */ \ - if r1 s> 0x7f goto l0_%=; \ - if r1 s< -0x80 goto l0_%=; \ - r0 = 1; \ -l1_%=: \ - exit; \ -l0_%=: \ - r0 = 2; \ - goto l1_%=; \ -" : + asm volatile ( + "call %[bpf_get_prandom_u32];" + "*(u64 *)(r10 - 8) = r0;" +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + "r1 = *(s8 *)(r10 - 8);" +#else + "r1 = *(s8 *)(r10 - 1);" +#endif + /* r1 with s8 range */ + "if r1 s> 0x7f goto l0_%=;" + "if r1 s< -0x80 goto l0_%=;" + "r0 = 1;" +"l1_%=:" + "exit;" +"l0_%=:" + "r0 = 2;" + "goto l1_%=;" + : : __imm(bpf_get_prandom_u32) : __clobber_all); } @@ -77,20 +94,24 @@ __description("LDSX, S16 range checking") __success __success_unpriv __retval(1) __naked void ldsx_s16_range(void) { - asm volatile (" \ - call %[bpf_get_prandom_u32]; \ - *(u64 *)(r10 - 8) = r0; \ - r1 = *(s16 *)(r10 - 8); \ - /* r1 with s16 range */ \ - if r1 s> 0x7fff goto l0_%=; \ - if r1 s< -0x8000 goto l0_%=; \ - r0 = 1; \ -l1_%=: \ - exit; \ -l0_%=: \ - r0 = 2; \ - goto l1_%=; \ -" : + asm volatile ( + "call %[bpf_get_prandom_u32];" + "*(u64 *)(r10 - 8) = r0;" +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + "r1 = *(s16 *)(r10 - 8);" +#else + "r1 = *(s16 *)(r10 - 2);" +#endif + /* r1 with s16 range */ + "if r1 s> 0x7fff goto l0_%=;" + "if r1 s< -0x8000 goto l0_%=;" + "r0 = 1;" +"l1_%=:" + "exit;" +"l0_%=:" + "r0 = 2;" + "goto l1_%=;" + : : __imm(bpf_get_prandom_u32) : __clobber_all); } @@ -100,20 +121,24 @@ __description("LDSX, S32 range checking") __success __success_unpriv __retval(1) __naked void ldsx_s32_range(void) { - asm volatile (" \ - call %[bpf_get_prandom_u32]; \ - *(u64 *)(r10 - 8) = r0; \ - r1 = *(s32 *)(r10 - 8); \ - /* r1 with s16 range */ \ - if r1 s> 0x7fffFFFF goto l0_%=; \ - if r1 s< -0x80000000 goto l0_%=; \ - r0 = 1; \ -l1_%=: \ - exit; \ -l0_%=: \ - r0 = 2; \ - goto l1_%=; \ -" : + asm volatile ( + "call %[bpf_get_prandom_u32];" + "*(u64 *)(r10 - 8) = r0;" +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + "r1 = *(s32 *)(r10 - 8);" +#else + "r1 = *(s32 *)(r10 - 4);" +#endif + /* r1 with s16 range */ + "if r1 s> 0x7fffFFFF goto l0_%=;" + "if r1 s< -0x80000000 goto l0_%=;" + "r0 = 1;" +"l1_%=:" + "exit;" +"l0_%=:" + "r0 = 2;" + "goto l1_%=;" + : : __imm(bpf_get_prandom_u32) : __clobber_all); } diff --git a/tools/testing/selftests/bpf/progs/verifier_movsx.c b/tools/testing/selftests/bpf/progs/verifier_movsx.c index ca11fd5dafd1..b2a04d1179d0 100644 --- a/tools/testing/selftests/bpf/progs/verifier_movsx.c +++ b/tools/testing/selftests/bpf/progs/verifier_movsx.c @@ -5,7 +5,8 @@ #include "bpf_misc.h" #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ - (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || defined(__TARGET_ARCH_arm)) && \ + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ + defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \ __clang_major__ >= 18 SEC("socket") diff --git a/tools/testing/selftests/bpf/progs/verifier_sdiv.c b/tools/testing/selftests/bpf/progs/verifier_sdiv.c index fb039722b639..8fc5174808b2 100644 --- a/tools/testing/selftests/bpf/progs/verifier_sdiv.c +++ b/tools/testing/selftests/bpf/progs/verifier_sdiv.c @@ -5,7 +5,8 @@ #include "bpf_misc.h" #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ - (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || defined(__TARGET_ARCH_arm)) && \ + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ + defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \ __clang_major__ >= 18 SEC("socket") diff --git a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c index 24369f242853..ccde6a4c6319 100644 --- a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c +++ b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c @@ -3,11 +3,12 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> -#include "xsk_xdp_metadata.h" +#include <linux/if_ether.h> +#include "xsk_xdp_common.h" struct { __uint(type, BPF_MAP_TYPE_XSKMAP); - __uint(max_entries, 1); + __uint(max_entries, 2); __uint(key_size, sizeof(int)); __uint(value_size, sizeof(int)); } xsk SEC(".maps"); @@ -52,4 +53,21 @@ SEC("xdp.frags") int xsk_xdp_populate_metadata(struct xdp_md *xdp) return bpf_redirect_map(&xsk, 0, XDP_DROP); } +SEC("xdp") int xsk_xdp_shared_umem(struct xdp_md *xdp) +{ + void *data = (void *)(long)xdp->data; + void *data_end = (void *)(long)xdp->data_end; + struct ethhdr *eth = data; + + if (eth + 1 > data_end) + return XDP_DROP; + + /* Redirecting packets based on the destination MAC address */ + idx = ((unsigned int)(eth->h_dest[5])) / 2; + if (idx > MAX_SOCKETS) + return XDP_DROP; + + return bpf_redirect_map(&xsk, idx, XDP_DROP); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index b4edd8454934..37ffa57f28a1 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -69,7 +69,7 @@ static int tester_init(struct test_loader *tester) { if (!tester->log_buf) { tester->log_buf_sz = TEST_LOADER_LOG_BUF_SZ; - tester->log_buf = malloc(tester->log_buf_sz); + tester->log_buf = calloc(tester->log_buf_sz, 1); if (!ASSERT_OK_PTR(tester->log_buf, "tester_log_buf")) return -ENOMEM; } @@ -538,7 +538,7 @@ void run_subtest(struct test_loader *tester, bool unpriv) { struct test_subspec *subspec = unpriv ? &spec->unpriv : &spec->priv; - struct bpf_program *tprog, *tprog_iter; + struct bpf_program *tprog = NULL, *tprog_iter; struct test_spec *spec_iter; struct cap_state caps = {}; struct bpf_object *tobj; diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 4d582cac2c09..1b9387890148 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -255,7 +255,7 @@ static void print_subtest_name(int test_num, int subtest_num, const char *test_name, char *subtest_name, char *result) { - char test_num_str[TEST_NUM_WIDTH + 1]; + char test_num_str[32]; snprintf(test_num_str, sizeof(test_num_str), "%d/%d", test_num, subtest_num); diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 77bd492c6024..2f9f6f250f17 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -417,6 +417,8 @@ int get_bpf_max_tramp_links(void); #define SYS_NANOSLEEP_KPROBE_NAME "__s390x_sys_nanosleep" #elif defined(__aarch64__) #define SYS_NANOSLEEP_KPROBE_NAME "__arm64_sys_nanosleep" +#elif defined(__riscv) +#define SYS_NANOSLEEP_KPROBE_NAME "__riscv_sys_nanosleep" #else #define SYS_NANOSLEEP_KPROBE_NAME "sys_nanosleep" #endif diff --git a/tools/testing/selftests/bpf/urandom_read.c b/tools/testing/selftests/bpf/urandom_read.c index e92644d0fa75..4ed795655b9f 100644 --- a/tools/testing/selftests/bpf/urandom_read.c +++ b/tools/testing/selftests/bpf/urandom_read.c @@ -11,6 +11,9 @@ #define _SDT_HAS_SEMAPHORES 1 #include "sdt.h" +#define SHARED 1 +#include "bpf/libbpf_internal.h" + #define SEC(name) __attribute__((section(name), used)) #define BUF_SIZE 256 @@ -21,10 +24,14 @@ void urand_read_without_sema(int iter_num, int iter_cnt, int read_sz); void urandlib_read_with_sema(int iter_num, int iter_cnt, int read_sz); void urandlib_read_without_sema(int iter_num, int iter_cnt, int read_sz); +int urandlib_api(void); +COMPAT_VERSION(urandlib_api_old, urandlib_api, LIBURANDOM_READ_1.0.0) +int urandlib_api_old(void); +int urandlib_api_sameoffset(void); + unsigned short urand_read_with_sema_semaphore SEC(".probes"); -static __attribute__((noinline)) -void urandom_read(int fd, int count) +static noinline void urandom_read(int fd, int count) { char buf[BUF_SIZE]; int i; @@ -83,6 +90,10 @@ int main(int argc, char *argv[]) urandom_read(fd, count); + urandlib_api(); + urandlib_api_old(); + urandlib_api_sameoffset(); + close(fd); return 0; } diff --git a/tools/testing/selftests/bpf/urandom_read_lib1.c b/tools/testing/selftests/bpf/urandom_read_lib1.c index 86186e24b740..8c1356d8b4ee 100644 --- a/tools/testing/selftests/bpf/urandom_read_lib1.c +++ b/tools/testing/selftests/bpf/urandom_read_lib1.c @@ -3,6 +3,9 @@ #define _SDT_HAS_SEMAPHORES 1 #include "sdt.h" +#define SHARED 1 +#include "bpf/libbpf_internal.h" + #define SEC(name) __attribute__((section(name), used)) unsigned short urandlib_read_with_sema_semaphore SEC(".probes"); @@ -11,3 +14,22 @@ void urandlib_read_with_sema(int iter_num, int iter_cnt, int read_sz) { STAP_PROBE3(urandlib, read_with_sema, iter_num, iter_cnt, read_sz); } + +COMPAT_VERSION(urandlib_api_v1, urandlib_api, LIBURANDOM_READ_1.0.0) +int urandlib_api_v1(void) +{ + return 1; +} + +DEFAULT_VERSION(urandlib_api_v2, urandlib_api, LIBURANDOM_READ_2.0.0) +int urandlib_api_v2(void) +{ + return 2; +} + +COMPAT_VERSION(urandlib_api_sameoffset, urandlib_api_sameoffset, LIBURANDOM_READ_1.0.0) +DEFAULT_VERSION(urandlib_api_sameoffset, urandlib_api_sameoffset, LIBURANDOM_READ_2.0.0) +int urandlib_api_sameoffset(void) +{ + return 3; +} diff --git a/tools/testing/selftests/bpf/xdp_features.c b/tools/testing/selftests/bpf/xdp_features.c index b449788fbd39..595c79141cf3 100644 --- a/tools/testing/selftests/bpf/xdp_features.c +++ b/tools/testing/selftests/bpf/xdp_features.c @@ -360,9 +360,9 @@ static int recv_msg(int sockfd, void *buf, size_t bufsize, void *val, static int dut_run(struct xdp_features *skel) { int flags = XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_DRV_MODE; - int state, err, *sockfd, ctrl_sockfd, echo_sockfd; + int state, err = 0, *sockfd, ctrl_sockfd, echo_sockfd; struct sockaddr_storage ctrl_addr; - pthread_t dut_thread; + pthread_t dut_thread = 0; socklen_t addrlen; sockfd = start_reuseport_server(AF_INET6, SOCK_STREAM, NULL, diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c index 613321eb84c1..17c980138796 100644 --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c @@ -234,7 +234,7 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t struct pollfd fds[rxq + 1]; __u64 comp_addr; __u64 addr; - __u32 idx; + __u32 idx = 0; int ret; int i; diff --git a/tools/testing/selftests/bpf/xsk.c b/tools/testing/selftests/bpf/xsk.c index d9fb2b730a2c..e574711eeb84 100644 --- a/tools/testing/selftests/bpf/xsk.c +++ b/tools/testing/selftests/bpf/xsk.c @@ -442,10 +442,9 @@ void xsk_clear_xskmap(struct bpf_map *map) bpf_map_delete_elem(map_fd, &index); } -int xsk_update_xskmap(struct bpf_map *map, struct xsk_socket *xsk) +int xsk_update_xskmap(struct bpf_map *map, struct xsk_socket *xsk, u32 index) { int map_fd, sock_fd; - u32 index = 0; map_fd = bpf_map__fd(map); sock_fd = xsk_socket__fd(xsk); diff --git a/tools/testing/selftests/bpf/xsk.h b/tools/testing/selftests/bpf/xsk.h index d93200fdaa8d..771570bc3731 100644 --- a/tools/testing/selftests/bpf/xsk.h +++ b/tools/testing/selftests/bpf/xsk.h @@ -204,7 +204,7 @@ struct xsk_umem_config { int xsk_attach_xdp_program(struct bpf_program *prog, int ifindex, u32 xdp_flags); void xsk_detach_xdp_program(int ifindex, u32 xdp_flags); -int xsk_update_xskmap(struct bpf_map *map, struct xsk_socket *xsk); +int xsk_update_xskmap(struct bpf_map *map, struct xsk_socket *xsk, u32 index); void xsk_clear_xskmap(struct bpf_map *map); bool xsk_is_in_mode(u32 ifindex, int mode); diff --git a/tools/testing/selftests/bpf/xsk_xdp_common.h b/tools/testing/selftests/bpf/xsk_xdp_common.h new file mode 100644 index 000000000000..5a6f36f07383 --- /dev/null +++ b/tools/testing/selftests/bpf/xsk_xdp_common.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef XSK_XDP_COMMON_H_ +#define XSK_XDP_COMMON_H_ + +#define MAX_SOCKETS 2 + +struct xdp_info { + __u64 count; +} __attribute__((aligned(32))); + +#endif /* XSK_XDP_COMMON_H_ */ diff --git a/tools/testing/selftests/bpf/xsk_xdp_metadata.h b/tools/testing/selftests/bpf/xsk_xdp_metadata.h deleted file mode 100644 index 943133da378a..000000000000 --- a/tools/testing/selftests/bpf/xsk_xdp_metadata.h +++ /dev/null @@ -1,5 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -struct xdp_info { - __u64 count; -} __attribute__((aligned(32))); diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index 43e0a5796929..591ca9637b23 100644 --- a/tools/testing/selftests/bpf/xskxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -80,6 +80,7 @@ #include <linux/if_ether.h> #include <linux/mman.h> #include <linux/netdev.h> +#include <linux/bitmap.h> #include <arpa/inet.h> #include <net/if.h> #include <locale.h> @@ -102,10 +103,7 @@ #include <bpf/bpf.h> #include <linux/filter.h> #include "../kselftest.h" -#include "xsk_xdp_metadata.h" - -static const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62"; -static const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61"; +#include "xsk_xdp_common.h" static bool opt_verbose; static bool opt_print_tests; @@ -159,10 +157,10 @@ static void write_payload(void *dest, u32 pkt_nb, u32 start, u32 size) ptr[i] = htonl(pkt_nb << 16 | (i + start)); } -static void gen_eth_hdr(struct ifobject *ifobject, struct ethhdr *eth_hdr) +static void gen_eth_hdr(struct xsk_socket_info *xsk, struct ethhdr *eth_hdr) { - memcpy(eth_hdr->h_dest, ifobject->dst_mac, ETH_ALEN); - memcpy(eth_hdr->h_source, ifobject->src_mac, ETH_ALEN); + memcpy(eth_hdr->h_dest, xsk->dst_mac, ETH_ALEN); + memcpy(eth_hdr->h_source, xsk->src_mac, ETH_ALEN); eth_hdr->h_proto = htons(ETH_P_LOOPBACK); } @@ -260,7 +258,7 @@ static int __xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_i cfg.bind_flags = ifobject->bind_flags; if (shared) cfg.bind_flags |= XDP_SHARED_UMEM; - if (ifobject->pkt_stream && ifobject->mtu > MAX_ETH_PKT_SIZE) + if (ifobject->mtu > MAX_ETH_PKT_SIZE) cfg.bind_flags |= XDP_USE_SG; txr = ifobject->tx_on ? &xsk->tx : NULL; @@ -429,11 +427,9 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, if (i == 0) { ifobj->rx_on = false; ifobj->tx_on = true; - ifobj->pkt_stream = test->tx_pkt_stream_default; } else { ifobj->rx_on = true; ifobj->tx_on = false; - ifobj->pkt_stream = test->rx_pkt_stream_default; } memset(ifobj->umem, 0, sizeof(*ifobj->umem)); @@ -443,6 +439,15 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, for (j = 0; j < MAX_SOCKETS; j++) { memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j])); ifobj->xsk_arr[j].rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS; + if (i == 0) + ifobj->xsk_arr[j].pkt_stream = test->tx_pkt_stream_default; + else + ifobj->xsk_arr[j].pkt_stream = test->rx_pkt_stream_default; + + memcpy(ifobj->xsk_arr[j].src_mac, g_mac, ETH_ALEN); + memcpy(ifobj->xsk_arr[j].dst_mac, g_mac, ETH_ALEN); + ifobj->xsk_arr[j].src_mac[5] += ((j * 2) + 0); + ifobj->xsk_arr[j].dst_mac[5] += ((j * 2) + 1); } } @@ -526,8 +531,10 @@ static int test_spec_set_mtu(struct test_spec *test, int mtu) static void pkt_stream_reset(struct pkt_stream *pkt_stream) { - if (pkt_stream) + if (pkt_stream) { pkt_stream->current_pkt_nb = 0; + pkt_stream->nb_rx_pkts = 0; + } } static struct pkt *pkt_stream_get_next_tx_pkt(struct pkt_stream *pkt_stream) @@ -557,17 +564,17 @@ static void pkt_stream_delete(struct pkt_stream *pkt_stream) static void pkt_stream_restore_default(struct test_spec *test) { - struct pkt_stream *tx_pkt_stream = test->ifobj_tx->pkt_stream; - struct pkt_stream *rx_pkt_stream = test->ifobj_rx->pkt_stream; + struct pkt_stream *tx_pkt_stream = test->ifobj_tx->xsk->pkt_stream; + struct pkt_stream *rx_pkt_stream = test->ifobj_rx->xsk->pkt_stream; if (tx_pkt_stream != test->tx_pkt_stream_default) { - pkt_stream_delete(test->ifobj_tx->pkt_stream); - test->ifobj_tx->pkt_stream = test->tx_pkt_stream_default; + pkt_stream_delete(test->ifobj_tx->xsk->pkt_stream); + test->ifobj_tx->xsk->pkt_stream = test->tx_pkt_stream_default; } if (rx_pkt_stream != test->rx_pkt_stream_default) { - pkt_stream_delete(test->ifobj_rx->pkt_stream); - test->ifobj_rx->pkt_stream = test->rx_pkt_stream_default; + pkt_stream_delete(test->ifobj_rx->xsk->pkt_stream); + test->ifobj_rx->xsk->pkt_stream = test->rx_pkt_stream_default; } } @@ -627,14 +634,16 @@ static u32 pkt_nb_frags(u32 frame_size, struct pkt_stream *pkt_stream, struct pk return nb_frags; } -static void pkt_set(struct xsk_umem_info *umem, struct pkt *pkt, int offset, u32 len) +static void pkt_set(struct pkt_stream *pkt_stream, struct pkt *pkt, int offset, u32 len) { pkt->offset = offset; pkt->len = len; - if (len > MAX_ETH_JUMBO_SIZE) + if (len > MAX_ETH_JUMBO_SIZE) { pkt->valid = false; - else + } else { pkt->valid = true; + pkt_stream->nb_valid_entries++; + } } static u32 pkt_get_buffer_len(struct xsk_umem_info *umem, u32 len) @@ -642,7 +651,7 @@ static u32 pkt_get_buffer_len(struct xsk_umem_info *umem, u32 len) return ceil_u32(len, umem->frame_size) * umem->frame_size; } -static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb_pkts, u32 pkt_len) +static struct pkt_stream *__pkt_stream_generate(u32 nb_pkts, u32 pkt_len, u32 nb_start, u32 nb_off) { struct pkt_stream *pkt_stream; u32 i; @@ -656,41 +665,45 @@ static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb for (i = 0; i < nb_pkts; i++) { struct pkt *pkt = &pkt_stream->pkts[i]; - pkt_set(umem, pkt, 0, pkt_len); - pkt->pkt_nb = i; + pkt_set(pkt_stream, pkt, 0, pkt_len); + pkt->pkt_nb = nb_start + i * nb_off; } return pkt_stream; } -static struct pkt_stream *pkt_stream_clone(struct xsk_umem_info *umem, - struct pkt_stream *pkt_stream) +static struct pkt_stream *pkt_stream_generate(u32 nb_pkts, u32 pkt_len) { - return pkt_stream_generate(umem, pkt_stream->nb_pkts, pkt_stream->pkts[0].len); + return __pkt_stream_generate(nb_pkts, pkt_len, 0, 1); +} + +static struct pkt_stream *pkt_stream_clone(struct pkt_stream *pkt_stream) +{ + return pkt_stream_generate(pkt_stream->nb_pkts, pkt_stream->pkts[0].len); } static void pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len) { struct pkt_stream *pkt_stream; - pkt_stream = pkt_stream_generate(test->ifobj_tx->umem, nb_pkts, pkt_len); - test->ifobj_tx->pkt_stream = pkt_stream; - pkt_stream = pkt_stream_generate(test->ifobj_rx->umem, nb_pkts, pkt_len); - test->ifobj_rx->pkt_stream = pkt_stream; + pkt_stream = pkt_stream_generate(nb_pkts, pkt_len); + test->ifobj_tx->xsk->pkt_stream = pkt_stream; + pkt_stream = pkt_stream_generate(nb_pkts, pkt_len); + test->ifobj_rx->xsk->pkt_stream = pkt_stream; } static void __pkt_stream_replace_half(struct ifobject *ifobj, u32 pkt_len, int offset) { - struct xsk_umem_info *umem = ifobj->umem; struct pkt_stream *pkt_stream; u32 i; - pkt_stream = pkt_stream_clone(umem, ifobj->pkt_stream); - for (i = 1; i < ifobj->pkt_stream->nb_pkts; i += 2) - pkt_set(umem, &pkt_stream->pkts[i], offset, pkt_len); + pkt_stream = pkt_stream_clone(ifobj->xsk->pkt_stream); + for (i = 1; i < ifobj->xsk->pkt_stream->nb_pkts; i += 2) + pkt_set(pkt_stream, &pkt_stream->pkts[i], offset, pkt_len); - ifobj->pkt_stream = pkt_stream; + ifobj->xsk->pkt_stream = pkt_stream; + pkt_stream->nb_valid_entries /= 2; } static void pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, int offset) @@ -701,15 +714,34 @@ static void pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, int off static void pkt_stream_receive_half(struct test_spec *test) { - struct xsk_umem_info *umem = test->ifobj_rx->umem; - struct pkt_stream *pkt_stream = test->ifobj_tx->pkt_stream; + struct pkt_stream *pkt_stream = test->ifobj_tx->xsk->pkt_stream; u32 i; - test->ifobj_rx->pkt_stream = pkt_stream_generate(umem, pkt_stream->nb_pkts, - pkt_stream->pkts[0].len); - pkt_stream = test->ifobj_rx->pkt_stream; + test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(pkt_stream->nb_pkts, + pkt_stream->pkts[0].len); + pkt_stream = test->ifobj_rx->xsk->pkt_stream; for (i = 1; i < pkt_stream->nb_pkts; i += 2) pkt_stream->pkts[i].valid = false; + + pkt_stream->nb_valid_entries /= 2; +} + +static void pkt_stream_even_odd_sequence(struct test_spec *test) +{ + struct pkt_stream *pkt_stream; + u32 i; + + for (i = 0; i < test->nb_sockets; i++) { + pkt_stream = test->ifobj_tx->xsk_arr[i].pkt_stream; + pkt_stream = __pkt_stream_generate(pkt_stream->nb_pkts / 2, + pkt_stream->pkts[0].len, i, 2); + test->ifobj_tx->xsk_arr[i].pkt_stream = pkt_stream; + + pkt_stream = test->ifobj_rx->xsk_arr[i].pkt_stream; + pkt_stream = __pkt_stream_generate(pkt_stream->nb_pkts / 2, + pkt_stream->pkts[0].len, i, 2); + test->ifobj_rx->xsk_arr[i].pkt_stream = pkt_stream; + } } static u64 pkt_get_addr(struct pkt *pkt, struct xsk_umem_info *umem) @@ -724,16 +756,16 @@ static void pkt_stream_cancel(struct pkt_stream *pkt_stream) pkt_stream->current_pkt_nb--; } -static void pkt_generate(struct ifobject *ifobject, u64 addr, u32 len, u32 pkt_nb, - u32 bytes_written) +static void pkt_generate(struct xsk_socket_info *xsk, struct xsk_umem_info *umem, u64 addr, u32 len, + u32 pkt_nb, u32 bytes_written) { - void *data = xsk_umem__get_data(ifobject->umem->buffer, addr); + void *data = xsk_umem__get_data(umem->buffer, addr); if (len < MIN_PKT_SIZE) return; if (!bytes_written) { - gen_eth_hdr(ifobject, data); + gen_eth_hdr(xsk, data); len -= PKT_HDR_SIZE; data += PKT_HDR_SIZE; @@ -783,6 +815,10 @@ static struct pkt_stream *__pkt_stream_generate_custom(struct ifobject *ifobj, s if (pkt->valid && pkt->len > pkt_stream->max_pkt_len) pkt_stream->max_pkt_len = pkt->len; + + if (pkt->valid) + pkt_stream->nb_valid_entries++; + pkt_nb++; } @@ -796,10 +832,10 @@ static void pkt_stream_generate_custom(struct test_spec *test, struct pkt *pkts, struct pkt_stream *pkt_stream; pkt_stream = __pkt_stream_generate_custom(test->ifobj_tx, pkts, nb_pkts, true); - test->ifobj_tx->pkt_stream = pkt_stream; + test->ifobj_tx->xsk->pkt_stream = pkt_stream; pkt_stream = __pkt_stream_generate_custom(test->ifobj_rx, pkts, nb_pkts, false); - test->ifobj_rx->pkt_stream = pkt_stream; + test->ifobj_rx->xsk->pkt_stream = pkt_stream; } static void pkt_print_data(u32 *data, u32 cnt) @@ -1004,145 +1040,190 @@ static int complete_pkts(struct xsk_socket_info *xsk, int batch_size) return TEST_PASS; } -static int receive_pkts(struct test_spec *test, struct pollfd *fds) +static int __receive_pkts(struct test_spec *test, struct xsk_socket_info *xsk) { - struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0}; - struct pkt_stream *pkt_stream = test->ifobj_rx->pkt_stream; - struct xsk_socket_info *xsk = test->ifobj_rx->xsk; + u32 frags_processed = 0, nb_frags = 0, pkt_len = 0; u32 idx_rx = 0, idx_fq = 0, rcvd, pkts_sent = 0; + struct pkt_stream *pkt_stream = xsk->pkt_stream; struct ifobject *ifobj = test->ifobj_rx; struct xsk_umem_info *umem = xsk->umem; + struct pollfd fds = { }; struct pkt *pkt; + u64 first_addr = 0; int ret; - ret = gettimeofday(&tv_now, NULL); - if (ret) - exit_with_error(errno); - timeradd(&tv_now, &tv_timeout, &tv_end); - - pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent); - while (pkt) { - u32 frags_processed = 0, nb_frags = 0, pkt_len = 0; - u64 first_addr; + fds.fd = xsk_socket__fd(xsk->xsk); + fds.events = POLLIN; - ret = gettimeofday(&tv_now, NULL); - if (ret) - exit_with_error(errno); - if (timercmp(&tv_now, &tv_end, >)) { - ksft_print_msg("ERROR: [%s] Receive loop timed out\n", __func__); - return TEST_FAILURE; - } + ret = kick_rx(xsk); + if (ret) + return TEST_FAILURE; - ret = kick_rx(xsk); - if (ret) + if (ifobj->use_poll) { + ret = poll(&fds, 1, POLL_TMOUT); + if (ret < 0) return TEST_FAILURE; - if (ifobj->use_poll) { - ret = poll(fds, 1, POLL_TMOUT); - if (ret < 0) - return TEST_FAILURE; - - if (!ret) { - if (!is_umem_valid(test->ifobj_tx)) - return TEST_PASS; - - ksft_print_msg("ERROR: [%s] Poll timed out\n", __func__); - return TEST_FAILURE; - } + if (!ret) { + if (!is_umem_valid(test->ifobj_tx)) + return TEST_PASS; - if (!(fds->revents & POLLIN)) - continue; + ksft_print_msg("ERROR: [%s] Poll timed out\n", __func__); + return TEST_CONTINUE; } - rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx); - if (!rcvd) - continue; + if (!(fds.revents & POLLIN)) + return TEST_CONTINUE; + } - if (ifobj->use_fill_ring) { - ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); - while (ret != rcvd) { - if (xsk_ring_prod__needs_wakeup(&umem->fq)) { - ret = poll(fds, 1, POLL_TMOUT); - if (ret < 0) - return TEST_FAILURE; - } - ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); + rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx); + if (!rcvd) + return TEST_CONTINUE; + + if (ifobj->use_fill_ring) { + ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); + while (ret != rcvd) { + if (xsk_ring_prod__needs_wakeup(&umem->fq)) { + ret = poll(&fds, 1, POLL_TMOUT); + if (ret < 0) + return TEST_FAILURE; } + ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); } + } - while (frags_processed < rcvd) { - const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++); - u64 addr = desc->addr, orig; + while (frags_processed < rcvd) { + const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++); + u64 addr = desc->addr, orig; - orig = xsk_umem__extract_addr(addr); - addr = xsk_umem__add_offset_to_addr(addr); + orig = xsk_umem__extract_addr(addr); + addr = xsk_umem__add_offset_to_addr(addr); + if (!nb_frags) { + pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent); if (!pkt) { ksft_print_msg("[%s] received too many packets addr: %lx len %u\n", __func__, addr, desc->len); return TEST_FAILURE; } + } - print_verbose("Rx: addr: %lx len: %u options: %u pkt_nb: %u valid: %u\n", - addr, desc->len, desc->options, pkt->pkt_nb, pkt->valid); + print_verbose("Rx: addr: %lx len: %u options: %u pkt_nb: %u valid: %u\n", + addr, desc->len, desc->options, pkt->pkt_nb, pkt->valid); - if (!is_frag_valid(umem, addr, desc->len, pkt->pkt_nb, pkt_len) || - !is_offset_correct(umem, pkt, addr) || - (ifobj->use_metadata && !is_metadata_correct(pkt, umem->buffer, addr))) - return TEST_FAILURE; + if (!is_frag_valid(umem, addr, desc->len, pkt->pkt_nb, pkt_len) || + !is_offset_correct(umem, pkt, addr) || (ifobj->use_metadata && + !is_metadata_correct(pkt, umem->buffer, addr))) + return TEST_FAILURE; - if (!nb_frags++) - first_addr = addr; - frags_processed++; - pkt_len += desc->len; - if (ifobj->use_fill_ring) - *xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig; + if (!nb_frags++) + first_addr = addr; + frags_processed++; + pkt_len += desc->len; + if (ifobj->use_fill_ring) + *xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig; - if (pkt_continues(desc->options)) - continue; + if (pkt_continues(desc->options)) + continue; - /* The complete packet has been received */ - if (!is_pkt_valid(pkt, umem->buffer, first_addr, pkt_len) || - !is_offset_correct(umem, pkt, addr)) - return TEST_FAILURE; + /* The complete packet has been received */ + if (!is_pkt_valid(pkt, umem->buffer, first_addr, pkt_len) || + !is_offset_correct(umem, pkt, addr)) + return TEST_FAILURE; - pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent); - nb_frags = 0; - pkt_len = 0; - } + pkt_stream->nb_rx_pkts++; + nb_frags = 0; + pkt_len = 0; + } - if (nb_frags) { - /* In the middle of a packet. Start over from beginning of packet. */ - idx_rx -= nb_frags; - xsk_ring_cons__cancel(&xsk->rx, nb_frags); - if (ifobj->use_fill_ring) { - idx_fq -= nb_frags; - xsk_ring_prod__cancel(&umem->fq, nb_frags); - } - frags_processed -= nb_frags; + if (nb_frags) { + /* In the middle of a packet. Start over from beginning of packet. */ + idx_rx -= nb_frags; + xsk_ring_cons__cancel(&xsk->rx, nb_frags); + if (ifobj->use_fill_ring) { + idx_fq -= nb_frags; + xsk_ring_prod__cancel(&umem->fq, nb_frags); } + frags_processed -= nb_frags; + } - if (ifobj->use_fill_ring) - xsk_ring_prod__submit(&umem->fq, frags_processed); - if (ifobj->release_rx) - xsk_ring_cons__release(&xsk->rx, frags_processed); + if (ifobj->use_fill_ring) + xsk_ring_prod__submit(&umem->fq, frags_processed); + if (ifobj->release_rx) + xsk_ring_cons__release(&xsk->rx, frags_processed); + + pthread_mutex_lock(&pacing_mutex); + pkts_in_flight -= pkts_sent; + pthread_mutex_unlock(&pacing_mutex); + pkts_sent = 0; - pthread_mutex_lock(&pacing_mutex); - pkts_in_flight -= pkts_sent; - pthread_mutex_unlock(&pacing_mutex); - pkts_sent = 0; +return TEST_CONTINUE; +} + +bool all_packets_received(struct test_spec *test, struct xsk_socket_info *xsk, u32 sock_num, + unsigned long *bitmap) +{ + struct pkt_stream *pkt_stream = xsk->pkt_stream; + + if (!pkt_stream) { + __set_bit(sock_num, bitmap); + return false; + } + + if (pkt_stream->nb_rx_pkts == pkt_stream->nb_valid_entries) { + __set_bit(sock_num, bitmap); + if (bitmap_full(bitmap, test->nb_sockets)) + return true; + } + + return false; +} + +static int receive_pkts(struct test_spec *test) +{ + struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0}; + DECLARE_BITMAP(bitmap, test->nb_sockets); + struct xsk_socket_info *xsk; + u32 sock_num = 0; + int res, ret; + + ret = gettimeofday(&tv_now, NULL); + if (ret) + exit_with_error(errno); + + timeradd(&tv_now, &tv_timeout, &tv_end); + + while (1) { + xsk = &test->ifobj_rx->xsk_arr[sock_num]; + + if ((all_packets_received(test, xsk, sock_num, bitmap))) + break; + + res = __receive_pkts(test, xsk); + if (!(res == TEST_PASS || res == TEST_CONTINUE)) + return res; + + ret = gettimeofday(&tv_now, NULL); + if (ret) + exit_with_error(errno); + + if (timercmp(&tv_now, &tv_end, >)) { + ksft_print_msg("ERROR: [%s] Receive loop timed out\n", __func__); + return TEST_FAILURE; + } + sock_num = (sock_num + 1) % test->nb_sockets; } return TEST_PASS; } -static int __send_pkts(struct ifobject *ifobject, struct pollfd *fds, bool timeout) +static int __send_pkts(struct ifobject *ifobject, struct xsk_socket_info *xsk, bool timeout) { u32 i, idx = 0, valid_pkts = 0, valid_frags = 0, buffer_len; - struct pkt_stream *pkt_stream = ifobject->pkt_stream; - struct xsk_socket_info *xsk = ifobject->xsk; + struct pkt_stream *pkt_stream = xsk->pkt_stream; struct xsk_umem_info *umem = ifobject->umem; bool use_poll = ifobject->use_poll; + struct pollfd fds = { }; int ret; buffer_len = pkt_get_buffer_len(umem, pkt_stream->max_pkt_len); @@ -1154,9 +1235,12 @@ static int __send_pkts(struct ifobject *ifobject, struct pollfd *fds, bool timeo return TEST_CONTINUE; } + fds.fd = xsk_socket__fd(xsk->xsk); + fds.events = POLLOUT; + while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE) { if (use_poll) { - ret = poll(fds, 1, POLL_TMOUT); + ret = poll(&fds, 1, POLL_TMOUT); if (timeout) { if (ret < 0) { ksft_print_msg("ERROR: [%s] Poll error %d\n", @@ -1207,7 +1291,7 @@ static int __send_pkts(struct ifobject *ifobject, struct pollfd *fds, bool timeo tx_desc->options = 0; } if (pkt->valid) - pkt_generate(ifobject, tx_desc->addr, tx_desc->len, pkt->pkt_nb, + pkt_generate(xsk, umem, tx_desc->addr, tx_desc->len, pkt->pkt_nb, bytes_written); bytes_written += tx_desc->len; @@ -1235,7 +1319,7 @@ static int __send_pkts(struct ifobject *ifobject, struct pollfd *fds, bool timeo xsk->outstanding_tx += valid_frags; if (use_poll) { - ret = poll(fds, 1, POLL_TMOUT); + ret = poll(&fds, 1, POLL_TMOUT); if (ret <= 0) { if (ret == 0 && timeout) return TEST_PASS; @@ -1281,27 +1365,43 @@ static int wait_for_tx_completion(struct xsk_socket_info *xsk) return TEST_PASS; } +bool all_packets_sent(struct test_spec *test, unsigned long *bitmap) +{ + return bitmap_full(bitmap, test->nb_sockets); +} + static int send_pkts(struct test_spec *test, struct ifobject *ifobject) { - struct pkt_stream *pkt_stream = ifobject->pkt_stream; bool timeout = !is_umem_valid(test->ifobj_rx); - struct pollfd fds = { }; - u32 ret; + DECLARE_BITMAP(bitmap, test->nb_sockets); + u32 i, ret; - fds.fd = xsk_socket__fd(ifobject->xsk->xsk); - fds.events = POLLOUT; + while (!(all_packets_sent(test, bitmap))) { + for (i = 0; i < test->nb_sockets; i++) { + struct pkt_stream *pkt_stream; - while (pkt_stream->current_pkt_nb < pkt_stream->nb_pkts) { - ret = __send_pkts(ifobject, &fds, timeout); - if (ret == TEST_CONTINUE && !test->fail) - continue; - if ((ret || test->fail) && !timeout) - return TEST_FAILURE; - if (ret == TEST_PASS && timeout) - return ret; + pkt_stream = ifobject->xsk_arr[i].pkt_stream; + if (!pkt_stream || pkt_stream->current_pkt_nb >= pkt_stream->nb_pkts) { + __set_bit(i, bitmap); + continue; + } + ret = __send_pkts(ifobject, &ifobject->xsk_arr[i], timeout); + if (ret == TEST_CONTINUE && !test->fail) + continue; + + if ((ret || test->fail) && !timeout) + return TEST_FAILURE; + + if (ret == TEST_PASS && timeout) + return ret; + + ret = wait_for_tx_completion(&ifobject->xsk_arr[i]); + if (ret) + return TEST_FAILURE; + } } - return wait_for_tx_completion(ifobject->xsk); + return TEST_PASS; } static int get_xsk_stats(struct xsk_socket *xsk, struct xdp_statistics *stats) @@ -1347,8 +1447,8 @@ static int validate_rx_dropped(struct ifobject *ifobject) * packet being invalid). Since the last packet may or may not have * been dropped already, both outcomes must be allowed. */ - if (stats.rx_dropped == ifobject->pkt_stream->nb_pkts / 2 || - stats.rx_dropped == ifobject->pkt_stream->nb_pkts / 2 - 1) + if (stats.rx_dropped == ifobject->xsk->pkt_stream->nb_pkts / 2 || + stats.rx_dropped == ifobject->xsk->pkt_stream->nb_pkts / 2 - 1) return TEST_PASS; return TEST_FAILURE; @@ -1412,9 +1512,10 @@ static int validate_tx_invalid_descs(struct ifobject *ifobject) return TEST_FAILURE; } - if (stats.tx_invalid_descs != ifobject->pkt_stream->nb_pkts / 2) { + if (stats.tx_invalid_descs != ifobject->xsk->pkt_stream->nb_pkts / 2) { ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%u] expected [%u]\n", - __func__, stats.tx_invalid_descs, ifobject->pkt_stream->nb_pkts); + __func__, stats.tx_invalid_descs, + ifobject->xsk->pkt_stream->nb_pkts); return TEST_FAILURE; } @@ -1506,6 +1607,7 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject) LIBBPF_OPTS(bpf_xdp_query_opts, opts); void *bufs; int ret; + u32 i; if (ifobject->umem->unaligned_mode) mmap_flags |= MAP_HUGETLB | MAP_HUGE_2MB; @@ -1528,11 +1630,14 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject) if (!ifobject->rx_on) return; - xsk_populate_fill_ring(ifobject->umem, ifobject->pkt_stream, ifobject->use_fill_ring); + xsk_populate_fill_ring(ifobject->umem, ifobject->xsk->pkt_stream, ifobject->use_fill_ring); - ret = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk); - if (ret) - exit_with_error(errno); + for (i = 0; i < test->nb_sockets; i++) { + ifobject->xsk = &ifobject->xsk_arr[i]; + ret = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk, i); + if (ret) + exit_with_error(errno); + } } static void *worker_testapp_validate_tx(void *arg) @@ -1562,14 +1667,13 @@ static void *worker_testapp_validate_rx(void *arg) { struct test_spec *test = (struct test_spec *)arg; struct ifobject *ifobject = test->ifobj_rx; - struct pollfd fds = { }; int err; if (test->current_step == 1) { thread_common_ops(test, ifobject); } else { xsk_clear_xskmap(ifobject->xskmap); - err = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk); + err = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk, 0); if (err) { ksft_print_msg("Error: Failed to update xskmap, error %s\n", strerror(-err)); @@ -1577,12 +1681,9 @@ static void *worker_testapp_validate_rx(void *arg) } } - fds.fd = xsk_socket__fd(ifobject->xsk->xsk); - fds.events = POLLIN; - pthread_barrier_wait(&barr); - err = receive_pkts(test, &fds); + err = receive_pkts(test); if (!err && ifobject->validation_func) err = ifobject->validation_func(ifobject); @@ -1691,11 +1792,11 @@ static int __testapp_validate_traffic(struct test_spec *test, struct ifobject *i if (ifobj2) { if (pthread_barrier_init(&barr, NULL, 2)) exit_with_error(errno); - pkt_stream_reset(ifobj2->pkt_stream); + pkt_stream_reset(ifobj2->xsk->pkt_stream); } test->current_step++; - pkt_stream_reset(ifobj1->pkt_stream); + pkt_stream_reset(ifobj1->xsk->pkt_stream); pkts_in_flight = 0; signal(SIGUSR1, handler); @@ -1719,9 +1820,15 @@ static int __testapp_validate_traffic(struct test_spec *test, struct ifobject *i pthread_join(t0, NULL); if (test->total_steps == test->current_step || test->fail) { + u32 i; + if (ifobj2) - xsk_socket__delete(ifobj2->xsk->xsk); - xsk_socket__delete(ifobj1->xsk->xsk); + for (i = 0; i < test->nb_sockets; i++) + xsk_socket__delete(ifobj2->xsk_arr[i].xsk); + + for (i = 0; i < test->nb_sockets; i++) + xsk_socket__delete(ifobj1->xsk_arr[i].xsk); + testapp_clean_xsk_umem(ifobj1); if (ifobj2 && !ifobj2->shared_umem) testapp_clean_xsk_umem(ifobj2); @@ -1793,16 +1900,18 @@ static int testapp_bidirectional(struct test_spec *test) return res; } -static int swap_xsk_resources(struct ifobject *ifobj_tx, struct ifobject *ifobj_rx) +static int swap_xsk_resources(struct test_spec *test) { int ret; - xsk_socket__delete(ifobj_tx->xsk->xsk); - xsk_socket__delete(ifobj_rx->xsk->xsk); - ifobj_tx->xsk = &ifobj_tx->xsk_arr[1]; - ifobj_rx->xsk = &ifobj_rx->xsk_arr[1]; + test->ifobj_tx->xsk_arr[0].pkt_stream = NULL; + test->ifobj_rx->xsk_arr[0].pkt_stream = NULL; + test->ifobj_tx->xsk_arr[1].pkt_stream = test->tx_pkt_stream_default; + test->ifobj_rx->xsk_arr[1].pkt_stream = test->rx_pkt_stream_default; + test->ifobj_tx->xsk = &test->ifobj_tx->xsk_arr[1]; + test->ifobj_rx->xsk = &test->ifobj_rx->xsk_arr[1]; - ret = xsk_update_xskmap(ifobj_rx->xskmap, ifobj_rx->xsk->xsk); + ret = xsk_update_xskmap(test->ifobj_rx->xskmap, test->ifobj_rx->xsk->xsk, 0); if (ret) return TEST_FAILURE; @@ -1816,7 +1925,7 @@ static int testapp_xdp_prog_cleanup(struct test_spec *test) if (testapp_validate_traffic(test)) return TEST_FAILURE; - if (swap_xsk_resources(test->ifobj_tx, test->ifobj_rx)) + if (swap_xsk_resources(test)) return TEST_FAILURE; return testapp_validate_traffic(test); } @@ -1852,8 +1961,7 @@ static int testapp_stats_tx_invalid_descs(struct test_spec *test) static int testapp_stats_rx_full(struct test_spec *test) { pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE); - test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem, - DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE); + test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE); test->ifobj_rx->xsk->rxqsize = DEFAULT_UMEM_BUFFERS; test->ifobj_rx->release_rx = false; @@ -1864,8 +1972,7 @@ static int testapp_stats_rx_full(struct test_spec *test) static int testapp_stats_fill_empty(struct test_spec *test) { pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE); - test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem, - DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE); + test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE); test->ifobj_rx->use_fill_ring = false; test->ifobj_rx->validation_func = validate_fill_empty; @@ -2031,6 +2138,23 @@ static int testapp_xdp_metadata_copy(struct test_spec *test) return testapp_validate_traffic(test); } +static int testapp_xdp_shared_umem(struct test_spec *test) +{ + struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs; + struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs; + + test->total_steps = 1; + test->nb_sockets = 2; + + test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_shared_umem, + skel_tx->progs.xsk_xdp_shared_umem, + skel_rx->maps.xsk, skel_tx->maps.xsk); + + pkt_stream_even_odd_sequence(test); + + return testapp_validate_traffic(test); +} + static int testapp_poll_txq_tmout(struct test_spec *test) { test->ifobj_tx->use_poll = true; @@ -2117,15 +2241,11 @@ static bool hugepages_present(void) return true; } -static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *src_mac, - thread_func_t func_ptr) +static void init_iface(struct ifobject *ifobj, thread_func_t func_ptr) { LIBBPF_OPTS(bpf_xdp_query_opts, query_opts); int err; - memcpy(ifobj->dst_mac, dst_mac, ETH_ALEN); - memcpy(ifobj->src_mac, src_mac, ETH_ALEN); - ifobj->func_ptr = func_ptr; err = xsk_load_xdp_programs(ifobj); @@ -2336,6 +2456,7 @@ static const struct test_spec tests[] = { {.name = "STAT_FILL_EMPTY", .test_func = testapp_stats_fill_empty}, {.name = "XDP_PROG_CLEANUP", .test_func = testapp_xdp_prog_cleanup}, {.name = "XDP_DROP_HALF", .test_func = testapp_xdp_drop}, + {.name = "XDP_SHARED_UMEM", .test_func = testapp_xdp_shared_umem}, {.name = "XDP_METADATA_COPY", .test_func = testapp_xdp_metadata}, {.name = "XDP_METADATA_COPY_MULTI_BUFF", .test_func = testapp_xdp_metadata_mb}, {.name = "SEND_RECEIVE_9K_PACKETS", .test_func = testapp_send_receive_mb}, @@ -2401,12 +2522,12 @@ int main(int argc, char **argv) modes++; } - init_iface(ifobj_rx, MAC1, MAC2, worker_testapp_validate_rx); - init_iface(ifobj_tx, MAC2, MAC1, worker_testapp_validate_tx); + init_iface(ifobj_rx, worker_testapp_validate_rx); + init_iface(ifobj_tx, worker_testapp_validate_tx); test_spec_init(&test, ifobj_tx, ifobj_rx, 0, &tests[0]); - tx_pkt_stream_default = pkt_stream_generate(ifobj_tx->umem, DEFAULT_PKT_CNT, MIN_PKT_SIZE); - rx_pkt_stream_default = pkt_stream_generate(ifobj_rx->umem, DEFAULT_PKT_CNT, MIN_PKT_SIZE); + tx_pkt_stream_default = pkt_stream_generate(DEFAULT_PKT_CNT, MIN_PKT_SIZE); + rx_pkt_stream_default = pkt_stream_generate(DEFAULT_PKT_CNT, MIN_PKT_SIZE); if (!tx_pkt_stream_default || !rx_pkt_stream_default) exit_with_error(ENOMEM); test.tx_pkt_stream_default = tx_pkt_stream_default; diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h index 8015aeea839d..f174df2d693f 100644 --- a/tools/testing/selftests/bpf/xskxceiver.h +++ b/tools/testing/selftests/bpf/xskxceiver.h @@ -8,6 +8,7 @@ #include <limits.h> #include "xsk_xdp_progs.skel.h" +#include "xsk_xdp_common.h" #ifndef SOL_XDP #define SOL_XDP 283 @@ -35,7 +36,6 @@ #define TEST_SKIP 2 #define MAX_INTERFACES 2 #define MAX_INTERFACE_NAME_CHARS 16 -#define MAX_SOCKETS 2 #define MAX_TEST_NAME_SIZE 48 #define MAX_TEARDOWN_ITER 10 #define PKT_HDR_SIZE (sizeof(struct ethhdr) + 2) /* Just to align the data in the packet */ @@ -59,6 +59,7 @@ #define HUGEPAGE_SIZE (2 * 1024 * 1024) #define PKT_DUMP_NB_TO_PRINT 16 #define RUN_ALL_TESTS UINT_MAX +#define NUM_MAC_ADDRESSES 4 #define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0) @@ -87,8 +88,11 @@ struct xsk_socket_info { struct xsk_ring_prod tx; struct xsk_umem_info *umem; struct xsk_socket *xsk; + struct pkt_stream *pkt_stream; u32 outstanding_tx; u32 rxqsize; + u8 dst_mac[ETH_ALEN]; + u8 src_mac[ETH_ALEN]; }; struct pkt { @@ -104,6 +108,8 @@ struct pkt_stream { u32 current_pkt_nb; struct pkt *pkts; u32 max_pkt_len; + u32 nb_rx_pkts; + u32 nb_valid_entries; bool verbatim; }; @@ -120,7 +126,6 @@ struct ifobject { struct xsk_umem_info *umem; thread_func_t func_ptr; validation_func_t validation_func; - struct pkt_stream *pkt_stream; struct xsk_xdp_progs *xdp_progs; struct bpf_map *xskmap; struct bpf_program *xdp_prog; @@ -140,8 +145,6 @@ struct ifobject { bool unaligned_supp; bool multi_buff_supp; bool multi_buff_zc_supp; - u8 dst_mac[ETH_ALEN]; - u8 src_mac[ETH_ALEN]; }; struct test_spec { @@ -168,4 +171,6 @@ pthread_mutex_t pacing_mutex = PTHREAD_MUTEX_INITIALIZER; int pkts_in_flight; +static const u8 g_mac[ETH_ALEN] = {0x55, 0x44, 0x33, 0x22, 0x11, 0x00}; + #endif /* XSKXCEIVER_H_ */ |