summaryrefslogtreecommitdiffstats
path: root/net/ipv4/ip_input.c
diff options
context:
space:
mode:
authorsubashab@codeaurora.org <subashab@codeaurora.org>2017-03-23 13:34:16 -0600
committerDavid S. Miller <davem@davemloft.net>2017-03-24 13:17:07 -0700
commitdddb64bcb34615bf48a2c9cb9881eb76795cc5c5 (patch)
treedff7f083a71c3016dd2d09c0f9fac18546e9eddf /net/ipv4/ip_input.c
parent8fa96e3bf6c04af61fc0a32ad64d648322c4d29d (diff)
downloadlinux-stable-dddb64bcb34615bf48a2c9cb9881eb76795cc5c5.tar.gz
linux-stable-dddb64bcb34615bf48a2c9cb9881eb76795cc5c5.tar.bz2
linux-stable-dddb64bcb34615bf48a2c9cb9881eb76795cc5c5.zip
net: Add sysctl to toggle early demux for tcp and udp
Certain system process significant unconnected UDP workload. It would be preferrable to disable UDP early demux for those systems and enable it for TCP only. By disabling UDP demux, we see these slight gains on an ARM64 system- 782 -> 788Mbps unconnected single stream UDPv4 633 -> 654Mbps unconnected UDPv4 different sources The performance impact can change based on CPU architecure and cache sizes. There will not much difference seen if entire UDP hash table is in cache. Both sysctls are enabled by default to preserve existing behavior. v1->v2: Change function pointer instead of adding conditional as suggested by Stephen. v2->v3: Read once in callers to avoid issues due to compiler optimizations. Also update commit message with the tests. v3->v4: Store and use read once result instead of querying pointer again incorrectly. v4->v5: Refactor to avoid errors due to compilation with IPV6={m,n} Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org> Suggested-by: Eric Dumazet <edumazet@google.com> Cc: Stephen Hemminger <stephen@networkplumber.org> Cc: Tom Herbert <tom@herbertland.com> Cc: David Miller <davem@davemloft.net> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/ip_input.c')
-rw-r--r--net/ipv4/ip_input.c5
1 files changed, 3 insertions, 2 deletions
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index d6feabb03516..fa2dc8f692c6 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -313,6 +313,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
const struct iphdr *iph = ip_hdr(skb);
struct rtable *rt;
struct net_device *dev = skb->dev;
+ void (*edemux)(struct sk_buff *skb);
/* if ingress device is enslaved to an L3 master device pass the
* skb to its handler for processing
@@ -329,8 +330,8 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
int protocol = iph->protocol;
ipprot = rcu_dereference(inet_protos[protocol]);
- if (ipprot && ipprot->early_demux) {
- ipprot->early_demux(skb);
+ if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
+ edemux(skb);
/* must reload iph, skb->head might have changed */
iph = ip_hdr(skb);
}