From 954fba0274058d27c7c07b5ea07c41b3b7477894 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Jun 2012 19:30:21 +0000 Subject: netpoll: fix netpoll_send_udp() bugs Bogdan Hamciuc diagnosed and fixed following bug in netpoll_send_udp() : "skb->len += len;" instead of "skb_put(skb, len);" Meaning that _if_ a network driver needs to call skb_realloc_headroom(), only packet headers would be copied, leaving garbage in the payload. However the skb_realloc_headroom() must be avoided as much as possible since it requires memory and netpoll tries hard to work even if memory is exhausted (using a pool of preallocated skbs) It appears netpoll_send_udp() reserved 16 bytes for the ethernet header, which happens to work for typicall drivers but not all. Right thing is to use LL_RESERVED_SPACE(dev) (And also add dev->needed_tailroom of tailroom) This patch combines both fixes. Many thanks to Bogdan for raising this issue. Reported-by: Bogdan Hamciuc Signed-off-by: Eric Dumazet Tested-by: Bogdan Hamciuc Cc: Herbert Xu Cc: Neil Horman Reviewed-by: Neil Horman Reviewed-by: Cong Wang Signed-off-by: David S. Miller --- net/core/netpoll.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 3d84fb9d8873..f9f40b932e4b 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -362,22 +362,23 @@ EXPORT_SYMBOL(netpoll_send_skb_on_dev); void netpoll_send_udp(struct netpoll *np, const char *msg, int len) { - int total_len, eth_len, ip_len, udp_len; + int total_len, ip_len, udp_len; struct sk_buff *skb; struct udphdr *udph; struct iphdr *iph; struct ethhdr *eth; udp_len = len + sizeof(*udph); - ip_len = eth_len = udp_len + sizeof(*iph); - total_len = eth_len + ETH_HLEN + NET_IP_ALIGN; + ip_len = udp_len + sizeof(*iph); + total_len = ip_len + LL_RESERVED_SPACE(np->dev); - skb = find_skb(np, total_len, total_len - len); + skb = find_skb(np, total_len + np->dev->needed_tailroom, + total_len - len); if (!skb) return; skb_copy_to_linear_data(skb, msg, len); - skb->len += len; + skb_put(skb, len); skb_push(skb, sizeof(*udph)); skb_reset_transport_header(skb); -- cgit v1.2.3 From 62b1a8ab9b3660bb820d8dfe23148ed6cda38574 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 14 Jun 2012 06:42:44 +0000 Subject: net: remove skb_orphan_try() Orphaning skb in dev_hard_start_xmit() makes bonding behavior unfriendly for applications sending big UDP bursts : Once packets pass the bonding device and come to real device, they might hit a full qdisc and be dropped. Without orphaning, the sender is automatically throttled because sk->sk_wmemalloc reaches sk->sk_sndbuf (assuming sk_sndbuf is not too big) We could try to defer the orphaning adding another test in dev_hard_start_xmit(), but all this seems of little gain, now that BQL tends to make packets more likely to be parked in Qdisc queues instead of NIC TX ring, in cases where performance matters. Reverts commits : fc6055a5ba31 net: Introduce skb_orphan_try() 87fd308cfc6b net: skb_tx_hash() fix relative to skb_orphan_try() and removes SKBTX_DRV_NEEDS_SK_REF flag Reported-and-bisected-by: Jean-Michel Hautbois Signed-off-by: Eric Dumazet Tested-by: Oliver Hartkopp Acked-by: Oliver Hartkopp Signed-off-by: David S. Miller --- net/can/raw.c | 3 --- net/core/dev.c | 23 +---------------------- net/iucv/af_iucv.c | 1 - 3 files changed, 1 insertion(+), 26 deletions(-) (limited to 'net') diff --git a/net/can/raw.c b/net/can/raw.c index cde1b4a20f75..46cca3a91d19 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -681,9 +681,6 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock, if (err < 0) goto free_skb; - /* to be able to check the received tx sock reference in raw_rcv() */ - skb_shinfo(skb)->tx_flags |= SKBTX_DRV_NEEDS_SK_REF; - skb->dev = dev; skb->sk = sk; diff --git a/net/core/dev.c b/net/core/dev.c index cd0981977f5c..6df214041a5e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2089,25 +2089,6 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features) return 0; } -/* - * Try to orphan skb early, right before transmission by the device. - * We cannot orphan skb if tx timestamp is requested or the sk-reference - * is needed on driver level for other reasons, e.g. see net/can/raw.c - */ -static inline void skb_orphan_try(struct sk_buff *skb) -{ - struct sock *sk = skb->sk; - - if (sk && !skb_shinfo(skb)->tx_flags) { - /* skb_tx_hash() wont be able to get sk. - * We copy sk_hash into skb->rxhash - */ - if (!skb->rxhash) - skb->rxhash = sk->sk_hash; - skb_orphan(skb); - } -} - static bool can_checksum_protocol(netdev_features_t features, __be16 protocol) { return ((features & NETIF_F_GEN_CSUM) || @@ -2193,8 +2174,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, if (!list_empty(&ptype_all)) dev_queue_xmit_nit(skb, dev); - skb_orphan_try(skb); - features = netif_skb_features(skb); if (vlan_tx_tag_present(skb) && @@ -2304,7 +2283,7 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, if (skb->sk && skb->sk->sk_hash) hash = skb->sk->sk_hash; else - hash = (__force u16) skb->protocol ^ skb->rxhash; + hash = (__force u16) skb->protocol; hash = jhash_1word(hash, hashrnd); return (u16) (((u64) hash * qcount) >> 32) + qoffset; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 07d7d55a1b93..cd6f7a991d80 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -372,7 +372,6 @@ static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock, skb_trim(skb, skb->dev->mtu); } skb->protocol = ETH_P_AF_IUCV; - skb_shinfo(skb)->tx_flags |= SKBTX_DRV_NEEDS_SK_REF; nskb = skb_clone(skb, GFP_ATOMIC); if (!nskb) return -ENOMEM; -- cgit v1.2.3 From 2a0c451ade8e1783c5d453948289e4a978d417c9 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Thu, 14 Jun 2012 23:00:17 +0000 Subject: ipv6: Prevent access to uninitialized fib_table_hash via /proc/net/ipv6_route /proc/net/ipv6_route reflects the contents of fib_table_hash. The proc handler is installed in ip6_route_net_init() whereas fib_table_hash is allocated in fib6_net_init() _after_ the proc handler has been installed. This opens up a short time frame to access fib_table_hash with its pants down. fib6_init() as a whole can't be moved to an earlier position as it also registers the rtnetlink message handlers which should be registered at the end. Therefore split it into fib6_init() which is run early and fib6_init_late() to register the rtnetlink message handlers. Signed-off-by: Thomas Graf Reviewed-by: Neil Horman Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 18 +++++++++++------- net/ipv6/route.c | 16 +++++++++++----- 2 files changed, 22 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 74c21b924a79..fbd4afff05fa 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1692,21 +1692,25 @@ int __init fib6_init(void) ret = register_pernet_subsys(&fib6_net_ops); if (ret) goto out_kmem_cache_create; - - ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib, - NULL); - if (ret) - goto out_unregister_subsys; out: return ret; -out_unregister_subsys: - unregister_pernet_subsys(&fib6_net_ops); out_kmem_cache_create: kmem_cache_destroy(fib6_node_kmem); goto out; } +int __init fib6_init_late(void) +{ + return __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib, + NULL); +} + +void fib6_cleanup_late(void) +{ + rtnl_unregister(PF_INET6, RTM_GETROUTE); +} + void fib6_gc_cleanup(void) { unregister_pernet_subsys(&fib6_net_ops); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 999a982ad3fd..dc60bf585966 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3018,10 +3018,14 @@ int __init ip6_route_init(void) if (ret) goto out_kmem_cache; - ret = register_pernet_subsys(&ip6_route_net_ops); + ret = fib6_init(); if (ret) goto out_dst_entries; + ret = register_pernet_subsys(&ip6_route_net_ops); + if (ret) + goto out_fib6_init; + ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; /* Registering of the loopback is done before this portion of code, @@ -3035,13 +3039,13 @@ int __init ip6_route_init(void) init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev; init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); #endif - ret = fib6_init(); + ret = fib6_init_late(); if (ret) goto out_register_subsys; ret = xfrm6_init(); if (ret) - goto out_fib6_init; + goto out_fib6_init_late; ret = fib6_rules_init(); if (ret) @@ -3064,10 +3068,12 @@ fib6_rules_init: fib6_rules_cleanup(); xfrm6_init: xfrm6_fini(); -out_fib6_init: - fib6_gc_cleanup(); +out_fib6_init_late: + fib6_cleanup_late(); out_register_subsys: unregister_pernet_subsys(&ip6_route_net_ops); +out_fib6_init: + fib6_gc_cleanup(); out_dst_entries: dst_entries_destroy(&ip6_dst_blackhole_ops); out_kmem_cache: -- cgit v1.2.3