diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-03-05 08:26:13 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-03-05 08:26:13 -0800 |
commit | 6456300356433873309a1cae6aa05e77d6b59153 (patch) | |
tree | 3158f04f2ca63a48e4d3021aba31aee8f18221cf /tools/lib/bpf/xsk.c | |
parent | cd2a3bf02625ffad02a6b9f7df758ee36cf12769 (diff) | |
parent | 18a4d8bf250a33c015955f0dec27259780ef6448 (diff) | |
download | linux-6456300356433873309a1cae6aa05e77d6b59153.tar.gz linux-6456300356433873309a1cae6aa05e77d6b59153.tar.bz2 linux-6456300356433873309a1cae6aa05e77d6b59153.zip |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
"Here we go, another merge window full of networking and #ebpf changes:
1) Snoop DHCPACKS in batman-adv to learn MAC/IP pairs in the DHCP
range without dealing with floods of ARP traffic, from Linus
Lüssing.
2) Throttle buffered multicast packet transmission in mt76, from
Felix Fietkau.
3) Support adaptive interrupt moderation in ice, from Brett Creeley.
4) A lot of struct_size conversions, from Gustavo A. R. Silva.
5) Add peek/push/pop commands to bpftool, as well as bash completion,
from Stanislav Fomichev.
6) Optimize sk_msg_clone(), from Vakul Garg.
7) Add SO_BINDTOIFINDEX, from David Herrmann.
8) Be more conservative with local resends due to local congestion,
from Yuchung Cheng.
9) Allow vetoing of unsupported VXLAN FDBs, from Petr Machata.
10) Add health buffer support to devlink, from Eran Ben Elisha.
11) Add TXQ scheduling API to mac80211, from Toke Høiland-Jørgensen.
12) Add statistics to basic packet scheduler filter, from Cong Wang.
13) Add GRE tunnel support for mlxsw Spectrum-2, from Nir Dotan.
14) Lots of new IP tunneling forwarding tests, also from Nir Dotan.
15) Add 3ad stats to bonding, from Nikolay Aleksandrov.
16) Lots of probing improvements for bpftool, from Quentin Monnet.
17) Various nfp drive #ebpf JIT improvements from Jakub Kicinski.
18) Allow #ebpf programs to access gso_segs from skb shared info, from
Eric Dumazet.
19) Add sock_diag support for AF_XDP sockets, from Björn Töpel.
20) Support 22260 iwlwifi devices, from Luca Coelho.
21) Use rbtree for ipv6 defragmentation, from Peter Oskolkov.
22) Add JMP32 instruction class support to #ebpf, from Jiong Wang.
23) Add spinlock support to #ebpf, from Alexei Starovoitov.
24) Support 256-bit keys and TLS 1.3 in ktls, from Dave Watson.
25) Add device infomation API to devlink, from Jakub Kicinski.
26) Add new timestamping socket options which are y2038 safe, from
Deepa Dinamani.
27) Add RX checksum offloading for various sh_eth chips, from Sergei
Shtylyov.
28) Flow offload infrastructure, from Pablo Neira Ayuso.
29) Numerous cleanups, improvements, and bug fixes to the PHY layer
and many drivers from Heiner Kallweit.
30) Lots of changes to try and make packet scheduler classifiers run
lockless as much as possible, from Vlad Buslov.
31) Support BCM957504 chip in bnxt_en driver, from Erik Burrows.
32) Add concurrency tests to tc-tests infrastructure, from Vlad
Buslov.
33) Add hwmon support to aquantia, from Heiner Kallweit.
34) Allow 64-bit values for SO_MAX_PACING_RATE, from Eric Dumazet.
And I would be remiss if I didn't thank the various major networking
subsystem maintainers for integrating much of this work before I even
saw it. Alexei Starovoitov, Daniel Borkmann, Pablo Neira Ayuso,
Johannes Berg, Kalle Valo, and many others. Thank you!"
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (2207 commits)
net/sched: avoid unused-label warning
net: ignore sysctl_devconf_inherit_init_net without SYSCTL
phy: mdio-mux: fix Kconfig dependencies
net: phy: use phy_modify_mmd_changed in genphy_c45_an_config_aneg
net: dsa: mv88e6xxx: add call to mv88e6xxx_ports_cmode_init to probe for new DSA framework
selftest/net: Remove duplicate header
sky2: Disable MSI on Dell Inspiron 1545 and Gateway P-79
net/mlx5e: Update tx reporter status in case channels were successfully opened
devlink: Add support for direct reporter health state update
devlink: Update reporter state to error even if recover aborted
sctp: call iov_iter_revert() after sending ABORT
team: Free BPF filter when unregistering netdev
ip6mr: Do not call __IP6_INC_STATS() from preemptible context
isdn: mISDN: Fix potential NULL pointer dereference of kzalloc
net: dsa: mv88e6xxx: support in-band signalling on SGMII ports with external PHYs
cxgb4/chtls: Prefix adapter flags with CXGB4
net-sysfs: Switch to bitmap_zalloc()
mellanox: Switch to bitmap_zalloc()
bpf: add test cases for non-pointer sanitiation logic
mlxsw: i2c: Extend initialization by querying resources data
...
Diffstat (limited to 'tools/lib/bpf/xsk.c')
-rw-r--r-- | tools/lib/bpf/xsk.c | 723 |
1 files changed, 723 insertions, 0 deletions
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c new file mode 100644 index 000000000000..f98ac82c9aea --- /dev/null +++ b/tools/lib/bpf/xsk.c @@ -0,0 +1,723 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +/* + * AF_XDP user-space access library. + * + * Copyright(c) 2018 - 2019 Intel Corporation. + * + * Author(s): Magnus Karlsson <magnus.karlsson@intel.com> + */ + +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <arpa/inet.h> +#include <asm/barrier.h> +#include <linux/compiler.h> +#include <linux/ethtool.h> +#include <linux/filter.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/if_xdp.h> +#include <linux/sockios.h> +#include <net/if.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <sys/types.h> + +#include "bpf.h" +#include "libbpf.h" +#include "libbpf_util.h" +#include "xsk.h" + +#ifndef SOL_XDP + #define SOL_XDP 283 +#endif + +#ifndef AF_XDP + #define AF_XDP 44 +#endif + +#ifndef PF_XDP + #define PF_XDP AF_XDP +#endif + +struct xsk_umem { + struct xsk_ring_prod *fill; + struct xsk_ring_cons *comp; + char *umem_area; + struct xsk_umem_config config; + int fd; + int refcount; +}; + +struct xsk_socket { + struct xsk_ring_cons *rx; + struct xsk_ring_prod *tx; + __u64 outstanding_tx; + struct xsk_umem *umem; + struct xsk_socket_config config; + int fd; + int xsks_map; + int ifindex; + int prog_fd; + int qidconf_map_fd; + int xsks_map_fd; + __u32 queue_id; + char ifname[IFNAMSIZ]; +}; + +struct xsk_nl_info { + bool xdp_prog_attached; + int ifindex; + int fd; +}; + +/* For 32-bit systems, we need to use mmap2 as the offsets are 64-bit. + * Unfortunately, it is not part of glibc. + */ +static inline void *xsk_mmap(void *addr, size_t length, int prot, int flags, + int fd, __u64 offset) +{ +#ifdef __NR_mmap2 + unsigned int page_shift = __builtin_ffs(getpagesize()) - 1; + long ret = syscall(__NR_mmap2, addr, length, prot, flags, fd, + (off_t)(offset >> page_shift)); + + return (void *)ret; +#else + return mmap(addr, length, prot, flags, fd, offset); +#endif +} + +int xsk_umem__fd(const struct xsk_umem *umem) +{ + return umem ? umem->fd : -EINVAL; +} + +int xsk_socket__fd(const struct xsk_socket *xsk) +{ + return xsk ? xsk->fd : -EINVAL; +} + +static bool xsk_page_aligned(void *buffer) +{ + unsigned long addr = (unsigned long)buffer; + + return !(addr & (getpagesize() - 1)); +} + +static void xsk_set_umem_config(struct xsk_umem_config *cfg, + const struct xsk_umem_config *usr_cfg) +{ + if (!usr_cfg) { + cfg->fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; + cfg->comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS; + cfg->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; + cfg->frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM; + return; + } + + cfg->fill_size = usr_cfg->fill_size; + cfg->comp_size = usr_cfg->comp_size; + cfg->frame_size = usr_cfg->frame_size; + cfg->frame_headroom = usr_cfg->frame_headroom; +} + +static void xsk_set_xdp_socket_config(struct xsk_socket_config *cfg, + const struct xsk_socket_config *usr_cfg) +{ + if (!usr_cfg) { + cfg->rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS; + cfg->tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; + cfg->libbpf_flags = 0; + cfg->xdp_flags = 0; + cfg->bind_flags = 0; + return; + } + + cfg->rx_size = usr_cfg->rx_size; + cfg->tx_size = usr_cfg->tx_size; + cfg->libbpf_flags = usr_cfg->libbpf_flags; + cfg->xdp_flags = usr_cfg->xdp_flags; + cfg->bind_flags = usr_cfg->bind_flags; +} + +int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, __u64 size, + struct xsk_ring_prod *fill, struct xsk_ring_cons *comp, + const struct xsk_umem_config *usr_config) +{ + struct xdp_mmap_offsets off; + struct xdp_umem_reg mr; + struct xsk_umem *umem; + socklen_t optlen; + void *map; + int err; + + if (!umem_area || !umem_ptr || !fill || !comp) + return -EFAULT; + if (!size && !xsk_page_aligned(umem_area)) + return -EINVAL; + + umem = calloc(1, sizeof(*umem)); + if (!umem) + return -ENOMEM; + + umem->fd = socket(AF_XDP, SOCK_RAW, 0); + if (umem->fd < 0) { + err = -errno; + goto out_umem_alloc; + } + + umem->umem_area = umem_area; + xsk_set_umem_config(&umem->config, usr_config); + + mr.addr = (uintptr_t)umem_area; + mr.len = size; + mr.chunk_size = umem->config.frame_size; + mr.headroom = umem->config.frame_headroom; + + err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr)); + if (err) { + err = -errno; + goto out_socket; + } + err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_FILL_RING, + &umem->config.fill_size, + sizeof(umem->config.fill_size)); + if (err) { + err = -errno; + goto out_socket; + } + err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_COMPLETION_RING, + &umem->config.comp_size, + sizeof(umem->config.comp_size)); + if (err) { + err = -errno; + goto out_socket; + } + + optlen = sizeof(off); + err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); + if (err) { + err = -errno; + goto out_socket; + } + + map = xsk_mmap(NULL, off.fr.desc + + umem->config.fill_size * sizeof(__u64), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, + umem->fd, XDP_UMEM_PGOFF_FILL_RING); + if (map == MAP_FAILED) { + err = -errno; + goto out_socket; + } + + umem->fill = fill; + fill->mask = umem->config.fill_size - 1; + fill->size = umem->config.fill_size; + fill->producer = map + off.fr.producer; + fill->consumer = map + off.fr.consumer; + fill->ring = map + off.fr.desc; + fill->cached_cons = umem->config.fill_size; + + map = xsk_mmap(NULL, + off.cr.desc + umem->config.comp_size * sizeof(__u64), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, + umem->fd, XDP_UMEM_PGOFF_COMPLETION_RING); + if (map == MAP_FAILED) { + err = -errno; + goto out_mmap; + } + + umem->comp = comp; + comp->mask = umem->config.comp_size - 1; + comp->size = umem->config.comp_size; + comp->producer = map + off.cr.producer; + comp->consumer = map + off.cr.consumer; + comp->ring = map + off.cr.desc; + + *umem_ptr = umem; + return 0; + +out_mmap: + munmap(umem->fill, + off.fr.desc + umem->config.fill_size * sizeof(__u64)); +out_socket: + close(umem->fd); +out_umem_alloc: + free(umem); + return err; +} + +static int xsk_load_xdp_prog(struct xsk_socket *xsk) +{ + char bpf_log_buf[BPF_LOG_BUF_SIZE]; + int err, prog_fd; + + /* This is the C-program: + * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx) + * { + * int *qidconf, index = ctx->rx_queue_index; + * + * // A set entry here means that the correspnding queue_id + * // has an active AF_XDP socket bound to it. + * qidconf = bpf_map_lookup_elem(&qidconf_map, &index); + * if (!qidconf) + * return XDP_ABORTED; + * + * if (*qidconf) + * return bpf_redirect_map(&xsks_map, index, 0); + * + * return XDP_PASS; + * } + */ + struct bpf_insn prog[] = { + /* r1 = *(u32 *)(r1 + 16) */ + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 16), + /* *(u32 *)(r10 - 4) = r1 */ + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_1, -4), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, xsk->qidconf_map_fd), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV32_IMM(BPF_REG_0, 0), + /* if r1 == 0 goto +8 */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 8), + BPF_MOV32_IMM(BPF_REG_0, 2), + /* r1 = *(u32 *)(r1 + 0) */ + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 0), + /* if r1 == 0 goto +5 */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5), + /* r2 = *(u32 *)(r10 - 4) */ + BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_10, -4), + BPF_MOV32_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_redirect_map), + /* The jumps are to this instruction */ + BPF_EXIT_INSN(), + }; + size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); + + prog_fd = bpf_load_program(BPF_PROG_TYPE_XDP, prog, insns_cnt, + "LGPL-2.1 or BSD-2-Clause", 0, bpf_log_buf, + BPF_LOG_BUF_SIZE); + if (prog_fd < 0) { + pr_warning("BPF log buffer:\n%s", bpf_log_buf); + return prog_fd; + } + + err = bpf_set_link_xdp_fd(xsk->ifindex, prog_fd, xsk->config.xdp_flags); + if (err) { + close(prog_fd); + return err; + } + + xsk->prog_fd = prog_fd; + return 0; +} + +static int xsk_get_max_queues(struct xsk_socket *xsk) +{ + struct ethtool_channels channels; + struct ifreq ifr; + int fd, err, ret; + + fd = socket(AF_INET, SOCK_DGRAM, 0); + if (fd < 0) + return -errno; + + channels.cmd = ETHTOOL_GCHANNELS; + ifr.ifr_data = (void *)&channels; + strncpy(ifr.ifr_name, xsk->ifname, IFNAMSIZ); + err = ioctl(fd, SIOCETHTOOL, &ifr); + if (err && errno != EOPNOTSUPP) { + ret = -errno; + goto out; + } + + if (channels.max_combined == 0 || errno == EOPNOTSUPP) + /* If the device says it has no channels, then all traffic + * is sent to a single stream, so max queues = 1. + */ + ret = 1; + else + ret = channels.max_combined; + +out: + close(fd); + return ret; +} + +static int xsk_create_bpf_maps(struct xsk_socket *xsk) +{ + int max_queues; + int fd; + + max_queues = xsk_get_max_queues(xsk); + if (max_queues < 0) + return max_queues; + + fd = bpf_create_map_name(BPF_MAP_TYPE_ARRAY, "qidconf_map", + sizeof(int), sizeof(int), max_queues, 0); + if (fd < 0) + return fd; + xsk->qidconf_map_fd = fd; + + fd = bpf_create_map_name(BPF_MAP_TYPE_XSKMAP, "xsks_map", + sizeof(int), sizeof(int), max_queues, 0); + if (fd < 0) { + close(xsk->qidconf_map_fd); + return fd; + } + xsk->xsks_map_fd = fd; + + return 0; +} + +static void xsk_delete_bpf_maps(struct xsk_socket *xsk) +{ + close(xsk->qidconf_map_fd); + close(xsk->xsks_map_fd); +} + +static int xsk_update_bpf_maps(struct xsk_socket *xsk, int qidconf_value, + int xsks_value) +{ + bool qidconf_map_updated = false, xsks_map_updated = false; + struct bpf_prog_info prog_info = {}; + __u32 prog_len = sizeof(prog_info); + struct bpf_map_info map_info; + __u32 map_len = sizeof(map_info); + __u32 *map_ids; + int reset_value = 0; + __u32 num_maps; + unsigned int i; + int err; + + err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len); + if (err) + return err; + + num_maps = prog_info.nr_map_ids; + + map_ids = calloc(prog_info.nr_map_ids, sizeof(*map_ids)); + if (!map_ids) + return -ENOMEM; + + memset(&prog_info, 0, prog_len); + prog_info.nr_map_ids = num_maps; + prog_info.map_ids = (__u64)(unsigned long)map_ids; + + err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len); + if (err) + goto out_map_ids; + + for (i = 0; i < prog_info.nr_map_ids; i++) { + int fd; + + fd = bpf_map_get_fd_by_id(map_ids[i]); + if (fd < 0) { + err = -errno; + goto out_maps; + } + + err = bpf_obj_get_info_by_fd(fd, &map_info, &map_len); + if (err) + goto out_maps; + + if (!strcmp(map_info.name, "qidconf_map")) { + err = bpf_map_update_elem(fd, &xsk->queue_id, + &qidconf_value, 0); + if (err) + goto out_maps; + qidconf_map_updated = true; + xsk->qidconf_map_fd = fd; + } else if (!strcmp(map_info.name, "xsks_map")) { + err = bpf_map_update_elem(fd, &xsk->queue_id, + &xsks_value, 0); + if (err) + goto out_maps; + xsks_map_updated = true; + xsk->xsks_map_fd = fd; + } + + if (qidconf_map_updated && xsks_map_updated) + break; + } + + if (!(qidconf_map_updated && xsks_map_updated)) { + err = -ENOENT; + goto out_maps; + } + + err = 0; + goto out_success; + +out_maps: + if (qidconf_map_updated) + (void)bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, + &reset_value, 0); + if (xsks_map_updated) + (void)bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id, + &reset_value, 0); +out_success: + if (qidconf_map_updated) + close(xsk->qidconf_map_fd); + if (xsks_map_updated) + close(xsk->xsks_map_fd); +out_map_ids: + free(map_ids); + return err; +} + +static int xsk_setup_xdp_prog(struct xsk_socket *xsk) +{ + bool prog_attached = false; + __u32 prog_id = 0; + int err; + + err = bpf_get_link_xdp_id(xsk->ifindex, &prog_id, + xsk->config.xdp_flags); + if (err) + return err; + + if (!prog_id) { + prog_attached = true; + err = xsk_create_bpf_maps(xsk); + if (err) + return err; + + err = xsk_load_xdp_prog(xsk); + if (err) + goto out_maps; + } else { + xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id); + } + + err = xsk_update_bpf_maps(xsk, true, xsk->fd); + if (err) + goto out_load; + + return 0; + +out_load: + if (prog_attached) + close(xsk->prog_fd); +out_maps: + if (prog_attached) + xsk_delete_bpf_maps(xsk); + return err; +} + +int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, + __u32 queue_id, struct xsk_umem *umem, + struct xsk_ring_cons *rx, struct xsk_ring_prod *tx, + const struct xsk_socket_config *usr_config) +{ + struct sockaddr_xdp sxdp = {}; + struct xdp_mmap_offsets off; + struct xsk_socket *xsk; + socklen_t optlen; + void *map; + int err; + + if (!umem || !xsk_ptr || !rx || !tx) + return -EFAULT; + + if (umem->refcount) { + pr_warning("Error: shared umems not supported by libbpf.\n"); + return -EBUSY; + } + + xsk = calloc(1, sizeof(*xsk)); + if (!xsk) + return -ENOMEM; + + if (umem->refcount++ > 0) { + xsk->fd = socket(AF_XDP, SOCK_RAW, 0); + if (xsk->fd < 0) { + err = -errno; + goto out_xsk_alloc; + } + } else { + xsk->fd = umem->fd; + } + + xsk->outstanding_tx = 0; + xsk->queue_id = queue_id; + xsk->umem = umem; + xsk->ifindex = if_nametoindex(ifname); + if (!xsk->ifindex) { + err = -errno; + goto out_socket; + } + strncpy(xsk->ifname, ifname, IFNAMSIZ); + + xsk_set_xdp_socket_config(&xsk->config, usr_config); + + if (rx) { + err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING, + &xsk->config.rx_size, + sizeof(xsk->config.rx_size)); + if (err) { + err = -errno; + goto out_socket; + } + } + if (tx) { + err = setsockopt(xsk->fd, SOL_XDP, XDP_TX_RING, + &xsk->config.tx_size, + sizeof(xsk->config.tx_size)); + if (err) { + err = -errno; + goto out_socket; + } + } + + optlen = sizeof(off); + err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); + if (err) { + err = -errno; + goto out_socket; + } + + if (rx) { + map = xsk_mmap(NULL, off.rx.desc + + xsk->config.rx_size * sizeof(struct xdp_desc), + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, + xsk->fd, XDP_PGOFF_RX_RING); + if (map == MAP_FAILED) { + err = -errno; + goto out_socket; + } + + rx->mask = xsk->config.rx_size - 1; + rx->size = xsk->config.rx_size; + rx->producer = map + off.rx.producer; + rx->consumer = map + off.rx.consumer; + rx->ring = map + off.rx.desc; + } + xsk->rx = rx; + + if (tx) { + map = xsk_mmap(NULL, off.tx.desc + + xsk->config.tx_size * sizeof(struct xdp_desc), + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, + xsk->fd, XDP_PGOFF_TX_RING); + if (map == MAP_FAILED) { + err = -errno; + goto out_mmap_rx; + } + + tx->mask = xsk->config.tx_size - 1; + tx->size = xsk->config.tx_size; + tx->producer = map + off.tx.producer; + tx->consumer = map + off.tx.consumer; + tx->ring = map + off.tx.desc; + tx->cached_cons = xsk->config.tx_size; + } + xsk->tx = tx; + + sxdp.sxdp_family = PF_XDP; + sxdp.sxdp_ifindex = xsk->ifindex; + sxdp.sxdp_queue_id = xsk->queue_id; + sxdp.sxdp_flags = xsk->config.bind_flags; + + err = bind(xsk->fd, (struct sockaddr *)&sxdp, sizeof(sxdp)); + if (err) { + err = -errno; + goto out_mmap_tx; + } + + if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) { + err = xsk_setup_xdp_prog(xsk); + if (err) + goto out_mmap_tx; + } + + *xsk_ptr = xsk; + return 0; + +out_mmap_tx: + if (tx) + munmap(xsk->tx, + off.tx.desc + + xsk->config.tx_size * sizeof(struct xdp_desc)); +out_mmap_rx: + if (rx) + munmap(xsk->rx, + off.rx.desc + + xsk->config.rx_size * sizeof(struct xdp_desc)); +out_socket: + if (--umem->refcount) + close(xsk->fd); +out_xsk_alloc: + free(xsk); + return err; +} + +int xsk_umem__delete(struct xsk_umem *umem) +{ + struct xdp_mmap_offsets off; + socklen_t optlen; + int err; + + if (!umem) + return 0; + + if (umem->refcount) + return -EBUSY; + + optlen = sizeof(off); + err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); + if (!err) { + munmap(umem->fill->ring, + off.fr.desc + umem->config.fill_size * sizeof(__u64)); + munmap(umem->comp->ring, + off.cr.desc + umem->config.comp_size * sizeof(__u64)); + } + + close(umem->fd); + free(umem); + + return 0; +} + +void xsk_socket__delete(struct xsk_socket *xsk) +{ + struct xdp_mmap_offsets off; + socklen_t optlen; + int err; + + if (!xsk) + return; + + (void)xsk_update_bpf_maps(xsk, 0, 0); + + optlen = sizeof(off); + err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); + if (!err) { + if (xsk->rx) + munmap(xsk->rx->ring, + off.rx.desc + + xsk->config.rx_size * sizeof(struct xdp_desc)); + if (xsk->tx) + munmap(xsk->tx->ring, + off.tx.desc + + xsk->config.tx_size * sizeof(struct xdp_desc)); + } + + xsk->umem->refcount--; + /* Do not close an fd that also has an associated umem connected + * to it. + */ + if (xsk->fd != xsk->umem->fd) + close(xsk->fd); + free(xsk); +} |