summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/net/vxlan.c117
-rw-r--r--include/linux/netdevice.h10
-rw-r--r--include/net/protocol.h3
-rw-r--r--include/net/vxlan.h1
-rw-r--r--net/core/dev.c3
-rw-r--r--net/ipv4/udp_offload.c143
6 files changed, 269 insertions, 8 deletions
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 0fa4b9108e82..942acc20d394 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -40,6 +40,7 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <net/vxlan.h>
+#include <net/protocol.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
#include <net/addrconf.h>
@@ -554,13 +555,106 @@ static int vxlan_fdb_append(struct vxlan_fdb *f,
return 1;
}
+static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, struct sk_buff *skb)
+{
+ struct sk_buff *p, **pp = NULL;
+ struct vxlanhdr *vh, *vh2;
+ struct ethhdr *eh, *eh2;
+ unsigned int hlen, off_vx, off_eth;
+ const struct packet_offload *ptype;
+ __be16 type;
+ int flush = 1;
+
+ off_vx = skb_gro_offset(skb);
+ hlen = off_vx + sizeof(*vh);
+ vh = skb_gro_header_fast(skb, off_vx);
+ if (skb_gro_header_hard(skb, hlen)) {
+ vh = skb_gro_header_slow(skb, hlen, off_vx);
+ if (unlikely(!vh))
+ goto out;
+ }
+ skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */
+
+ off_eth = skb_gro_offset(skb);
+ hlen = off_eth + sizeof(*eh);
+ eh = skb_gro_header_fast(skb, off_eth);
+ if (skb_gro_header_hard(skb, hlen)) {
+ eh = skb_gro_header_slow(skb, hlen, off_eth);
+ if (unlikely(!eh))
+ goto out;
+ }
+
+ flush = 0;
+
+ for (p = *head; p; p = p->next) {
+ if (!NAPI_GRO_CB(p)->same_flow)
+ continue;
+
+ vh2 = (struct vxlanhdr *)(p->data + off_vx);
+ eh2 = (struct ethhdr *)(p->data + off_eth);
+ if (vh->vx_vni != vh2->vx_vni || compare_ether_header(eh, eh2)) {
+ NAPI_GRO_CB(p)->same_flow = 0;
+ continue;
+ }
+ goto found;
+ }
+
+found:
+ type = eh->h_proto;
+
+ rcu_read_lock();
+ ptype = gro_find_receive_by_type(type);
+ if (ptype == NULL) {
+ flush = 1;
+ goto out_unlock;
+ }
+
+ skb_gro_pull(skb, sizeof(*eh)); /* pull inner eth header */
+ pp = ptype->callbacks.gro_receive(head, skb);
+
+out_unlock:
+ rcu_read_unlock();
+out:
+ NAPI_GRO_CB(skb)->flush |= flush;
+
+ return pp;
+}
+
+static int vxlan_gro_complete(struct sk_buff *skb, int nhoff)
+{
+ struct ethhdr *eh;
+ struct packet_offload *ptype;
+ __be16 type;
+ int vxlan_len = sizeof(struct vxlanhdr) + sizeof(struct ethhdr);
+ int err = -ENOSYS;
+
+ eh = (struct ethhdr *)(skb->data + nhoff + sizeof(struct vxlanhdr));
+ type = eh->h_proto;
+
+ rcu_read_lock();
+ ptype = gro_find_complete_by_type(type);
+ if (ptype != NULL)
+ err = ptype->callbacks.gro_complete(skb, nhoff + vxlan_len);
+
+ rcu_read_unlock();
+ return err;
+}
+
/* Notify netdevs that UDP port started listening */
-static void vxlan_notify_add_rx_port(struct sock *sk)
+static void vxlan_notify_add_rx_port(struct vxlan_sock *vs)
{
struct net_device *dev;
+ struct sock *sk = vs->sock->sk;
struct net *net = sock_net(sk);
sa_family_t sa_family = sk->sk_family;
__be16 port = inet_sk(sk)->inet_sport;
+ int err;
+
+ if (sa_family == AF_INET) {
+ err = udp_add_offload(&vs->udp_offloads);
+ if (err)
+ pr_warn("vxlan: udp_add_offload failed with status %d\n", err);
+ }
rcu_read_lock();
for_each_netdev_rcu(net, dev) {
@@ -572,9 +666,10 @@ static void vxlan_notify_add_rx_port(struct sock *sk)
}
/* Notify netdevs that UDP port is no more listening */
-static void vxlan_notify_del_rx_port(struct sock *sk)
+static void vxlan_notify_del_rx_port(struct vxlan_sock *vs)
{
struct net_device *dev;
+ struct sock *sk = vs->sock->sk;
struct net *net = sock_net(sk);
sa_family_t sa_family = sk->sk_family;
__be16 port = inet_sk(sk)->inet_sport;
@@ -586,6 +681,9 @@ static void vxlan_notify_del_rx_port(struct sock *sk)
port);
}
rcu_read_unlock();
+
+ if (sa_family == AF_INET)
+ udp_del_offload(&vs->udp_offloads);
}
/* Add new entry to forwarding table -- assumes lock held */
@@ -963,7 +1061,7 @@ void vxlan_sock_release(struct vxlan_sock *vs)
spin_lock(&vn->sock_lock);
hlist_del_rcu(&vs->hlist);
rcu_assign_sk_user_data(vs->sock->sk, NULL);
- vxlan_notify_del_rx_port(sk);
+ vxlan_notify_del_rx_port(vs);
spin_unlock(&vn->sock_lock);
queue_work(vxlan_wq, &vs->del_work);
@@ -1124,8 +1222,8 @@ static void vxlan_rcv(struct vxlan_sock *vs,
* leave the CHECKSUM_UNNECESSARY, the device checksummed it
* for us. Otherwise force the upper layers to verify it.
*/
- if (skb->ip_summed != CHECKSUM_UNNECESSARY || !skb->encapsulation ||
- !(vxlan->dev->features & NETIF_F_RXCSUM))
+ if ((skb->ip_summed != CHECKSUM_UNNECESSARY && skb->ip_summed != CHECKSUM_PARTIAL) ||
+ !skb->encapsulation || !(vxlan->dev->features & NETIF_F_RXCSUM))
skb->ip_summed = CHECKSUM_NONE;
skb->encapsulation = 0;
@@ -2303,7 +2401,7 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
struct sock *sk;
unsigned int h;
- vs = kmalloc(sizeof(*vs), GFP_KERNEL);
+ vs = kzalloc(sizeof(*vs), GFP_KERNEL);
if (!vs)
return ERR_PTR(-ENOMEM);
@@ -2328,9 +2426,14 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
vs->data = data;
rcu_assign_sk_user_data(vs->sock->sk, vs);
+ /* Initialize the vxlan udp offloads structure */
+ vs->udp_offloads.port = port;
+ vs->udp_offloads.callbacks.gro_receive = vxlan_gro_receive;
+ vs->udp_offloads.callbacks.gro_complete = vxlan_gro_complete;
+
spin_lock(&vn->sock_lock);
hlist_add_head_rcu(&vs->hlist, vs_head(net, port));
- vxlan_notify_add_rx_port(sk);
+ vxlan_notify_add_rx_port(vs);
spin_unlock(&vn->sock_lock);
/* Mark socket as an encapsulation socket. */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 83ce2aee65e6..c31022980e18 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1675,7 +1675,10 @@ struct napi_gro_cb {
unsigned long age;
/* Used in ipv6_gro_receive() */
- int proto;
+ u16 proto;
+
+ /* Used in udp_gro_receive */
+ u16 udp_mark;
/* used to support CHECKSUM_COMPLETE for tunneling protocols */
__wsum csum;
@@ -1714,6 +1717,11 @@ struct packet_offload {
struct list_head list;
};
+struct udp_offload {
+ __be16 port;
+ struct offload_callbacks callbacks;
+};
+
/* often modified stats are per cpu, other are shared (netdev->stats) */
struct pcpu_sw_netstats {
u64 rx_packets;
diff --git a/include/net/protocol.h b/include/net/protocol.h
index 0e5f8665d7fb..a7e986b08147 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -108,6 +108,9 @@ int inet_del_offload(const struct net_offload *prot, unsigned char num);
void inet_register_protosw(struct inet_protosw *p);
void inet_unregister_protosw(struct inet_protosw *p);
+int udp_add_offload(struct udp_offload *prot);
+void udp_del_offload(struct udp_offload *prot);
+
#if IS_ENABLED(CONFIG_IPV6)
int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char num);
int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char num);
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 6b6d180fb91a..5deef1ae78c9 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -21,6 +21,7 @@ struct vxlan_sock {
struct rcu_head rcu;
struct hlist_head vni_list[VNI_HASH_SIZE];
atomic_t refcnt;
+ struct udp_offload udp_offloads;
};
struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
diff --git a/net/core/dev.c b/net/core/dev.c
index a578af589198..d89931bae25b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3893,6 +3893,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
NAPI_GRO_CB(skb)->same_flow = 0;
NAPI_GRO_CB(skb)->flush = 0;
NAPI_GRO_CB(skb)->free = 0;
+ NAPI_GRO_CB(skb)->udp_mark = 0;
pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
break;
@@ -3983,6 +3984,7 @@ struct packet_offload *gro_find_receive_by_type(__be16 type)
}
return NULL;
}
+EXPORT_SYMBOL(gro_find_receive_by_type);
struct packet_offload *gro_find_complete_by_type(__be16 type)
{
@@ -3996,6 +3998,7 @@ struct packet_offload *gro_find_complete_by_type(__be16 type)
}
return NULL;
}
+EXPORT_SYMBOL(gro_find_complete_by_type);
static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
{
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 79c62bdcd3c5..ee853c55deea 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -14,6 +14,15 @@
#include <net/udp.h>
#include <net/protocol.h>
+static DEFINE_SPINLOCK(udp_offload_lock);
+static struct udp_offload_priv *udp_offload_base __read_mostly;
+
+struct udp_offload_priv {
+ struct udp_offload *offload;
+ struct rcu_head rcu;
+ struct udp_offload_priv __rcu *next;
+};
+
static int udp4_ufo_send_check(struct sk_buff *skb)
{
if (!pskb_may_pull(skb, sizeof(struct udphdr)))
@@ -89,10 +98,144 @@ out:
return segs;
}
+int udp_add_offload(struct udp_offload *uo)
+{
+ struct udp_offload_priv **head = &udp_offload_base;
+ struct udp_offload_priv *new_offload = kzalloc(sizeof(*new_offload), GFP_KERNEL);
+
+ if (!new_offload)
+ return -ENOMEM;
+
+ new_offload->offload = uo;
+
+ spin_lock(&udp_offload_lock);
+ rcu_assign_pointer(new_offload->next, rcu_dereference(*head));
+ rcu_assign_pointer(*head, rcu_dereference(new_offload));
+ spin_unlock(&udp_offload_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL(udp_add_offload);
+
+static void udp_offload_free_routine(struct rcu_head *head)
+{
+ struct udp_offload_priv *ou_priv = container_of(head, struct udp_offload_priv, rcu);
+ kfree(ou_priv);
+}
+
+void udp_del_offload(struct udp_offload *uo)
+{
+ struct udp_offload_priv __rcu **head = &udp_offload_base;
+ struct udp_offload_priv *uo_priv;
+
+ spin_lock(&udp_offload_lock);
+
+ uo_priv = rcu_dereference(*head);
+ for (; uo_priv != NULL;
+ uo_priv = rcu_dereference(*head)) {
+
+ if (uo_priv->offload == uo) {
+ rcu_assign_pointer(*head, rcu_dereference(uo_priv->next));
+ goto unlock;
+ }
+ head = &uo_priv->next;
+ }
+ pr_warn("udp_del_offload: didn't find offload for port %d\n", htons(uo->port));
+unlock:
+ spin_unlock(&udp_offload_lock);
+ if (uo_priv != NULL)
+ call_rcu(&uo_priv->rcu, udp_offload_free_routine);
+}
+EXPORT_SYMBOL(udp_del_offload);
+
+static struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
+{
+ struct udp_offload_priv *uo_priv;
+ struct sk_buff *p, **pp = NULL;
+ struct udphdr *uh, *uh2;
+ unsigned int hlen, off;
+ int flush = 1;
+
+ if (NAPI_GRO_CB(skb)->udp_mark ||
+ (!skb->encapsulation && skb->ip_summed != CHECKSUM_COMPLETE))
+ goto out;
+
+ /* mark that this skb passed once through the udp gro layer */
+ NAPI_GRO_CB(skb)->udp_mark = 1;
+
+ off = skb_gro_offset(skb);
+ hlen = off + sizeof(*uh);
+ uh = skb_gro_header_fast(skb, off);
+ if (skb_gro_header_hard(skb, hlen)) {
+ uh = skb_gro_header_slow(skb, hlen, off);
+ if (unlikely(!uh))
+ goto out;
+ }
+
+ rcu_read_lock();
+ uo_priv = rcu_dereference(udp_offload_base);
+ for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) {
+ if (uo_priv->offload->port == uh->dest &&
+ uo_priv->offload->callbacks.gro_receive)
+ goto unflush;
+ }
+ goto out_unlock;
+
+unflush:
+ flush = 0;
+
+ for (p = *head; p; p = p->next) {
+ if (!NAPI_GRO_CB(p)->same_flow)
+ continue;
+
+ uh2 = (struct udphdr *)(p->data + off);
+ if ((*(u32 *)&uh->source != *(u32 *)&uh2->source)) {
+ NAPI_GRO_CB(p)->same_flow = 0;
+ continue;
+ }
+ }
+
+ skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */
+ pp = uo_priv->offload->callbacks.gro_receive(head, skb);
+
+out_unlock:
+ rcu_read_unlock();
+out:
+ NAPI_GRO_CB(skb)->flush |= flush;
+ return pp;
+}
+
+static int udp_gro_complete(struct sk_buff *skb, int nhoff)
+{
+ struct udp_offload_priv *uo_priv;
+ __be16 newlen = htons(skb->len - nhoff);
+ struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
+ int err = -ENOSYS;
+
+ uh->len = newlen;
+
+ rcu_read_lock();
+
+ uo_priv = rcu_dereference(udp_offload_base);
+ for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) {
+ if (uo_priv->offload->port == uh->dest &&
+ uo_priv->offload->callbacks.gro_complete)
+ break;
+ }
+
+ if (uo_priv != NULL)
+ err = uo_priv->offload->callbacks.gro_complete(skb, nhoff + sizeof(struct udphdr));
+
+ rcu_read_unlock();
+ return err;
+}
+
static const struct net_offload udpv4_offload = {
.callbacks = {
.gso_send_check = udp4_ufo_send_check,
.gso_segment = udp4_ufo_fragment,
+ .gro_receive = udp_gro_receive,
+ .gro_complete = udp_gro_complete,
},
};