diff options
Diffstat (limited to 'tools/lib/bpf/xsk.c')
-rw-r--r-- | tools/lib/bpf/xsk.c | 173 |
1 files changed, 135 insertions, 38 deletions
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index a902838f9fcc..8e0ffa800a71 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -73,6 +73,21 @@ struct xsk_nl_info { int fd; }; +/* Up until and including Linux 5.3 */ +struct xdp_ring_offset_v1 { + __u64 producer; + __u64 consumer; + __u64 desc; +}; + +/* Up until and including Linux 5.3 */ +struct xdp_mmap_offsets_v1 { + struct xdp_ring_offset_v1 rx; + struct xdp_ring_offset_v1 tx; + struct xdp_ring_offset_v1 fr; + struct xdp_ring_offset_v1 cr; +}; + int xsk_umem__fd(const struct xsk_umem *umem) { return umem ? umem->fd : -EINVAL; @@ -133,6 +148,58 @@ static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg, return 0; } +static void xsk_mmap_offsets_v1(struct xdp_mmap_offsets *off) +{ + struct xdp_mmap_offsets_v1 off_v1; + + /* getsockopt on a kernel <= 5.3 has no flags fields. + * Copy over the offsets to the correct places in the >=5.4 format + * and put the flags where they would have been on that kernel. + */ + memcpy(&off_v1, off, sizeof(off_v1)); + + off->rx.producer = off_v1.rx.producer; + off->rx.consumer = off_v1.rx.consumer; + off->rx.desc = off_v1.rx.desc; + off->rx.flags = off_v1.rx.consumer + sizeof(__u32); + + off->tx.producer = off_v1.tx.producer; + off->tx.consumer = off_v1.tx.consumer; + off->tx.desc = off_v1.tx.desc; + off->tx.flags = off_v1.tx.consumer + sizeof(__u32); + + off->fr.producer = off_v1.fr.producer; + off->fr.consumer = off_v1.fr.consumer; + off->fr.desc = off_v1.fr.desc; + off->fr.flags = off_v1.fr.consumer + sizeof(__u32); + + off->cr.producer = off_v1.cr.producer; + off->cr.consumer = off_v1.cr.consumer; + off->cr.desc = off_v1.cr.desc; + off->cr.flags = off_v1.cr.consumer + sizeof(__u32); +} + +static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off) +{ + socklen_t optlen; + int err; + + optlen = sizeof(*off); + err = getsockopt(fd, SOL_XDP, XDP_MMAP_OFFSETS, off, &optlen); + if (err) + return err; + + if (optlen == sizeof(*off)) + return 0; + + if (optlen == sizeof(struct xdp_mmap_offsets_v1)) { + xsk_mmap_offsets_v1(off); + return 0; + } + + return -EINVAL; +} + int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area, __u64 size, struct xsk_ring_prod *fill, struct xsk_ring_cons *comp, @@ -141,7 +208,6 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area, struct xdp_mmap_offsets off; struct xdp_umem_reg mr; struct xsk_umem *umem; - socklen_t optlen; void *map; int err; @@ -163,6 +229,7 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area, umem->umem_area = umem_area; xsk_set_umem_config(&umem->config, usr_config); + memset(&mr, 0, sizeof(mr)); mr.addr = (uintptr_t)umem_area; mr.len = size; mr.chunk_size = umem->config.frame_size; @@ -189,8 +256,7 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area, goto out_socket; } - optlen = sizeof(off); - err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); + err = xsk_get_mmap_offsets(umem->fd, &off); if (err) { err = -errno; goto out_socket; @@ -273,33 +339,55 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk) /* This is the C-program: * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx) * { - * int index = ctx->rx_queue_index; + * int ret, index = ctx->rx_queue_index; * * // A set entry here means that the correspnding queue_id * // has an active AF_XDP socket bound to it. + * ret = bpf_redirect_map(&xsks_map, index, XDP_PASS); + * if (ret > 0) + * return ret; + * + * // Fallback for pre-5.3 kernels, not supporting default + * // action in the flags parameter. * if (bpf_map_lookup_elem(&xsks_map, &index)) * return bpf_redirect_map(&xsks_map, index, 0); - * * return XDP_PASS; * } */ struct bpf_insn prog[] = { - /* r1 = *(u32 *)(r1 + 16) */ - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 16), - /* *(u32 *)(r10 - 4) = r1 */ - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_1, -4), + /* r2 = *(u32 *)(r1 + 16) */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 16), + /* *(u32 *)(r10 - 4) = r2 */ + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -4), + /* r1 = xskmap[] */ + BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd), + /* r3 = XDP_PASS */ + BPF_MOV64_IMM(BPF_REG_3, 2), + /* call bpf_redirect_map */ + BPF_EMIT_CALL(BPF_FUNC_redirect_map), + /* if w0 != 0 goto pc+13 */ + BPF_JMP32_IMM(BPF_JSGT, BPF_REG_0, 0, 13), + /* r2 = r10 */ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + /* r2 += -4 */ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + /* r1 = xskmap[] */ BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd), + /* call bpf_map_lookup_elem */ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + /* r1 = r0 */ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV32_IMM(BPF_REG_0, 2), - /* if r1 == 0 goto +5 */ + /* r0 = XDP_PASS */ + BPF_MOV64_IMM(BPF_REG_0, 2), + /* if r1 == 0 goto pc+5 */ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5), /* r2 = *(u32 *)(r10 - 4) */ - BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd), BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_10, -4), - BPF_MOV32_IMM(BPF_REG_3, 0), + /* r1 = xskmap[] */ + BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd), + /* r3 = 0 */ + BPF_MOV64_IMM(BPF_REG_3, 0), + /* call bpf_redirect_map */ BPF_EMIT_CALL(BPF_FUNC_redirect_map), /* The jumps are to this instruction */ BPF_EXIT_INSN(), @@ -310,7 +398,7 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk) "LGPL-2.1 or BSD-2-Clause", 0, log_buf, log_buf_size); if (prog_fd < 0) { - pr_warning("BPF log buffer:\n%s", log_buf); + pr_warn("BPF log buffer:\n%s", log_buf); return prog_fd; } @@ -343,13 +431,18 @@ static int xsk_get_max_queues(struct xsk_socket *xsk) goto out; } - if (err || channels.max_combined == 0) + if (err) { /* If the device says it has no channels, then all traffic * is sent to a single stream, so max queues = 1. */ ret = 1; - else - ret = channels.max_combined; + } else { + /* Take the max of rx, tx, combined. Drivers return + * the number of channels in different ways. + */ + ret = max(channels.max_rx, channels.max_tx); + ret = max(ret, (int)channels.max_combined); + } out: close(fd); @@ -465,6 +558,8 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk) } } else { xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id); + if (xsk->prog_fd < 0) + return -errno; err = xsk_lookup_bpf_maps(xsk); if (err) { close(xsk->prog_fd); @@ -472,7 +567,8 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk) } } - err = xsk_set_bpf_maps(xsk); + if (xsk->rx) + err = xsk_set_bpf_maps(xsk); if (err) { xsk_delete_bpf_maps(xsk); close(xsk->prog_fd); @@ -491,21 +587,26 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, struct sockaddr_xdp sxdp = {}; struct xdp_mmap_offsets off; struct xsk_socket *xsk; - socklen_t optlen; int err; - if (!umem || !xsk_ptr || !rx || !tx) + if (!umem || !xsk_ptr || !(rx || tx)) return -EFAULT; - if (umem->refcount) { - pr_warning("Error: shared umems not supported by libbpf.\n"); - return -EBUSY; - } - xsk = calloc(1, sizeof(*xsk)); if (!xsk) return -ENOMEM; + err = xsk_set_xdp_socket_config(&xsk->config, usr_config); + if (err) + goto out_xsk_alloc; + + if (umem->refcount && + !(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) { + pr_warn("Error: shared umems not supported by libbpf supplied XDP program.\n"); + err = -EBUSY; + goto out_xsk_alloc; + } + if (umem->refcount++ > 0) { xsk->fd = socket(AF_XDP, SOCK_RAW, 0); if (xsk->fd < 0) { @@ -527,10 +628,6 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, memcpy(xsk->ifname, ifname, IFNAMSIZ - 1); xsk->ifname[IFNAMSIZ - 1] = '\0'; - err = xsk_set_xdp_socket_config(&xsk->config, usr_config); - if (err) - goto out_socket; - if (rx) { err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING, &xsk->config.rx_size, @@ -550,8 +647,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, } } - optlen = sizeof(off); - err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); + err = xsk_get_mmap_offsets(xsk->fd, &off); if (err) { err = -errno; goto out_socket; @@ -599,7 +695,12 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, sxdp.sxdp_family = PF_XDP; sxdp.sxdp_ifindex = xsk->ifindex; sxdp.sxdp_queue_id = xsk->queue_id; - sxdp.sxdp_flags = xsk->config.bind_flags; + if (umem->refcount > 1) { + sxdp.sxdp_flags = XDP_SHARED_UMEM; + sxdp.sxdp_shared_umem_fd = umem->fd; + } else { + sxdp.sxdp_flags = xsk->config.bind_flags; + } err = bind(xsk->fd, (struct sockaddr *)&sxdp, sizeof(sxdp)); if (err) { @@ -637,7 +738,6 @@ out_xsk_alloc: int xsk_umem__delete(struct xsk_umem *umem) { struct xdp_mmap_offsets off; - socklen_t optlen; int err; if (!umem) @@ -646,8 +746,7 @@ int xsk_umem__delete(struct xsk_umem *umem) if (umem->refcount) return -EBUSY; - optlen = sizeof(off); - err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); + err = xsk_get_mmap_offsets(umem->fd, &off); if (!err) { munmap(umem->fill->ring - off.fr.desc, off.fr.desc + umem->config.fill_size * sizeof(__u64)); @@ -665,7 +764,6 @@ void xsk_socket__delete(struct xsk_socket *xsk) { size_t desc_sz = sizeof(struct xdp_desc); struct xdp_mmap_offsets off; - socklen_t optlen; int err; if (!xsk) @@ -676,8 +774,7 @@ void xsk_socket__delete(struct xsk_socket *xsk) close(xsk->prog_fd); } - optlen = sizeof(off); - err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); + err = xsk_get_mmap_offsets(xsk->fd, &off); if (!err) { if (xsk->rx) { munmap(xsk->rx->ring - off.rx.desc, |