summaryrefslogtreecommitdiffstats
path: root/net/xdp/xsk.c
diff options
context:
space:
mode:
authorBjörn Töpel <bjorn.topel@intel.com>2018-06-04 13:57:13 +0200
committerDaniel Borkmann <daniel@iogearbox.net>2018-06-04 17:21:02 +0200
commitbbff2f321a864ee07c9d3d1245af498023146951 (patch)
tree962414a03f749ce22e52ed553780a74216992d0a /net/xdp/xsk.c
parenta509a95536a86ef84deb16c656d741437791b414 (diff)
downloadlinux-bbff2f321a864ee07c9d3d1245af498023146951.tar.gz
linux-bbff2f321a864ee07c9d3d1245af498023146951.tar.bz2
linux-bbff2f321a864ee07c9d3d1245af498023146951.zip
xsk: new descriptor addressing scheme
Currently, AF_XDP only supports a fixed frame-size memory scheme where each frame is referenced via an index (idx). A user passes the frame index to the kernel, and the kernel acts upon the data. Some NICs, however, do not have a fixed frame-size model, instead they have a model where a memory window is passed to the hardware and multiple frames are filled into that window (referred to as the "type-writer" model). By changing the descriptor format from the current frame index addressing scheme, AF_XDP can in the future be extended to support these kinds of NICs. In the index-based model, an idx refers to a frame of size frame_size. Addressing a frame in the UMEM is done by offseting the UMEM starting address by a global offset, idx * frame_size + offset. Communicating via the fill- and completion-rings are done by means of idx. In this commit, the idx is removed in favor of an address (addr), which is a relative address ranging over the UMEM. To convert an idx-based address to the new addr is simply: addr = idx * frame_size + offset. We also stop referring to the UMEM "frame" as a frame. Instead it is simply called a chunk. To transfer ownership of a chunk to the kernel, the addr of the chunk is passed in the fill-ring. Note, that the kernel will mask addr to make it chunk aligned, so there is no need for userspace to do that. E.g., for a chunk size of 2k, passing an addr of 2048, 2050 or 3000 to the fill-ring will refer to the same chunk. On the completion-ring, the addr will match that of the Tx descriptor, passed to the kernel. Changing the descriptor format to use chunks/addr will allow for future changes to move to a type-writer based model, where multiple frames can reside in one chunk. In this model passing one single chunk into the fill-ring, would potentially result in multiple Rx descriptors. This commit changes the uapi of AF_XDP sockets, and updates the documentation. Signed-off-by: Björn Töpel <bjorn.topel@intel.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Diffstat (limited to 'net/xdp/xsk.c')
-rw-r--r--net/xdp/xsk.c30
1 files changed, 17 insertions, 13 deletions
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 966307ce4b8e..4688c750df1d 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -41,24 +41,27 @@ bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
{
- u32 id, len = xdp->data_end - xdp->data;
+ u32 len = xdp->data_end - xdp->data;
void *buffer;
+ u64 addr;
int err;
if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
return -EINVAL;
- if (!xskq_peek_id(xs->umem->fq, &id)) {
+ if (!xskq_peek_addr(xs->umem->fq, &addr) ||
+ len > xs->umem->chunk_size_nohr) {
xs->rx_dropped++;
return -ENOSPC;
}
- buffer = xdp_umem_get_data_with_headroom(xs->umem, id);
+ addr += xs->umem->headroom;
+
+ buffer = xdp_umem_get_data(xs->umem, addr);
memcpy(buffer, xdp->data, len);
- err = xskq_produce_batch_desc(xs->rx, id, len,
- xs->umem->frame_headroom);
+ err = xskq_produce_batch_desc(xs->rx, addr, len);
if (!err)
- xskq_discard_id(xs->umem->fq);
+ xskq_discard_addr(xs->umem->fq);
else
xs->rx_dropped++;
@@ -95,10 +98,10 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
static void xsk_destruct_skb(struct sk_buff *skb)
{
- u32 id = (u32)(long)skb_shinfo(skb)->destructor_arg;
+ u64 addr = (u64)(long)skb_shinfo(skb)->destructor_arg;
struct xdp_sock *xs = xdp_sk(skb->sk);
- WARN_ON_ONCE(xskq_produce_id(xs->umem->cq, id));
+ WARN_ON_ONCE(xskq_produce_addr(xs->umem->cq, addr));
sock_wfree(skb);
}
@@ -123,14 +126,15 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
while (xskq_peek_desc(xs->tx, &desc)) {
char *buffer;
- u32 id, len;
+ u64 addr;
+ u32 len;
if (max_batch-- == 0) {
err = -EAGAIN;
goto out;
}
- if (xskq_reserve_id(xs->umem->cq)) {
+ if (xskq_reserve_addr(xs->umem->cq)) {
err = -EAGAIN;
goto out;
}
@@ -153,8 +157,8 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
}
skb_put(skb, len);
- id = desc.idx;
- buffer = xdp_umem_get_data(xs->umem, id) + desc.offset;
+ addr = desc.addr;
+ buffer = xdp_umem_get_data(xs->umem, addr);
err = skb_store_bits(skb, 0, buffer, len);
if (unlikely(err)) {
kfree_skb(skb);
@@ -164,7 +168,7 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
skb->dev = xs->dev;
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
- skb_shinfo(skb)->destructor_arg = (void *)(long)id;
+ skb_shinfo(skb)->destructor_arg = (void *)(long)addr;
skb->destructor = xsk_destruct_skb;
err = dev_direct_xmit(skb, xs->queue_id);