From 035d210f928ce083435b4fd351a26d126c02c927 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 13 Jul 2015 00:06:02 +0200 Subject: rtnetlink: reject non-IFLA_VF_PORT attributes inside IFLA_VF_PORTS Similarly as in commit 4f7d2cdfdde7 ("rtnetlink: verify IFLA_VF_INFO attributes before passing them to driver"), we have a double nesting of netlink attributes, i.e. IFLA_VF_PORTS only contains IFLA_VF_PORT that is nested itself. While IFLA_VF_PORTS is a verified attribute from ifla_policy[], we only check if the IFLA_VF_PORTS container has IFLA_VF_PORT attributes and then pass the attribute's content itself via nla_parse_nested(). It would be more correct to reject inner types other than IFLA_VF_PORT instead of continuing parsing and also similarly as in commit 4f7d2cdfdde7, to check for a minimum of NLA_HDRLEN. Signed-off-by: Daniel Borkmann Cc: Roopa Prabhu Cc: Scott Feldman Cc: Jason Gunthorpe Acked-by: Roopa Prabhu Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'net/core') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 9e433d58d265..dc004b1e1f85 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1804,10 +1804,13 @@ static int do_setlink(const struct sk_buff *skb, goto errout; nla_for_each_nested(attr, tb[IFLA_VF_PORTS], rem) { - if (nla_type(attr) != IFLA_VF_PORT) - continue; - err = nla_parse_nested(port, IFLA_PORT_MAX, - attr, ifla_port_policy); + if (nla_type(attr) != IFLA_VF_PORT || + nla_len(attr) < NLA_HDRLEN) { + err = -EINVAL; + goto errout; + } + err = nla_parse_nested(port, IFLA_PORT_MAX, attr, + ifla_port_policy); if (err < 0) goto errout; if (!port[IFLA_PORT_VF]) { -- cgit v1.2.3 From 738ac1ebb96d02e0d23bc320302a6ea94c612dec Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 13 Jul 2015 16:04:13 +0800 Subject: net: Clone skb before setting peeked flag Shared skbs must not be modified and this is crucial for broadcast and/or multicast paths where we use it as an optimisation to avoid unnecessary cloning. The function skb_recv_datagram breaks this rule by setting peeked without cloning the skb first. This causes funky races which leads to double-free. This patch fixes this by cloning the skb and replacing the skb in the list when setting skb->peeked. Fixes: a59322be07c9 ("[UDP]: Only increment counter on first peek/recv") Reported-by: Konstantin Khlebnikov Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/datagram.c | 41 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 38 insertions(+), 3 deletions(-) (limited to 'net/core') diff --git a/net/core/datagram.c b/net/core/datagram.c index b80fb91bb3f7..4e9a3f690b7e 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -131,6 +131,35 @@ out_noerr: goto out; } +static int skb_set_peeked(struct sk_buff *skb) +{ + struct sk_buff *nskb; + + if (skb->peeked) + return 0; + + /* We have to unshare an skb before modifying it. */ + if (!skb_shared(skb)) + goto done; + + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) + return -ENOMEM; + + skb->prev->next = nskb; + skb->next->prev = nskb; + nskb->prev = skb->prev; + nskb->next = skb->next; + + consume_skb(skb); + skb = nskb; + +done: + skb->peeked = 1; + + return 0; +} + /** * __skb_recv_datagram - Receive a datagram skbuff * @sk: socket @@ -165,7 +194,9 @@ out_noerr: struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, int *peeked, int *off, int *err) { + struct sk_buff_head *queue = &sk->sk_receive_queue; struct sk_buff *skb, *last; + unsigned long cpu_flags; long timeo; /* * Caller is allowed not to check sk->sk_err before skb_recv_datagram() @@ -184,8 +215,6 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, * Look at current nfs client by the way... * However, this function was correct in any case. 8) */ - unsigned long cpu_flags; - struct sk_buff_head *queue = &sk->sk_receive_queue; int _off = *off; last = (struct sk_buff *)queue; @@ -199,7 +228,11 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, _off -= skb->len; continue; } - skb->peeked = 1; + + error = skb_set_peeked(skb); + if (error) + goto unlock_err; + atomic_inc(&skb->users); } else __skb_unlink(skb, queue); @@ -223,6 +256,8 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, return NULL; +unlock_err: + spin_unlock_irqrestore(&queue->lock, cpu_flags); no_packet: *err = error; return NULL; -- cgit v1.2.3 From 89c22d8c3b278212eef6a8cc66b570bc840a6f5a Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 13 Jul 2015 20:01:42 +0800 Subject: net: Fix skb csum races when peeking When we calculate the checksum on the recv path, we store the result in the skb as an optimisation in case we need the checksum again down the line. This is in fact bogus for the MSG_PEEK case as this is done without any locking. So multiple threads can peek and then store the result to the same skb, potentially resulting in bogus skb states. This patch fixes this by only storing the result if the skb is not shared. This preserves the optimisations for the few cases where it can be done safely due to locking or other reasons, e.g., SIOCINQ. Signed-off-by: Herbert Xu Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/datagram.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'net/core') diff --git a/net/core/datagram.c b/net/core/datagram.c index 4e9a3f690b7e..4967262b2707 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -657,7 +657,8 @@ __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len) !skb->csum_complete_sw) netdev_rx_csum_fault(skb->dev); } - skb->csum_valid = !sum; + if (!skb_shared(skb)) + skb->csum_valid = !sum; return sum; } EXPORT_SYMBOL(__skb_checksum_complete_head); @@ -677,11 +678,13 @@ __sum16 __skb_checksum_complete(struct sk_buff *skb) netdev_rx_csum_fault(skb->dev); } - /* Save full packet checksum */ - skb->csum = csum; - skb->ip_summed = CHECKSUM_COMPLETE; - skb->csum_complete_sw = 1; - skb->csum_valid = !sum; + if (!skb_shared(skb)) { + /* Save full packet checksum */ + skb->csum = csum; + skb->ip_summed = CHECKSUM_COMPLETE; + skb->csum_complete_sw = 1; + skb->csum_valid = !sum; + } return sum; } -- cgit v1.2.3 From 8bf4ada2e21378816b28205427ee6b0e1ca4c5f1 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Fri, 17 Jul 2015 14:01:11 +0300 Subject: net: ratelimit warnings about dst entry refcount underflow or overflow Kernel generates a lot of warnings when dst entry reference counter overflows and becomes negative. That bug was seen several times at machines with outdated 3.10.y kernels. Most like it's already fixed in upstream. Anyway that flood completely kills machine and makes further debugging impossible. Signed-off-by: Konstantin Khlebnikov Signed-off-by: David S. Miller --- net/core/dst.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/dst.c b/net/core/dst.c index e956ce6d1378..002144bea935 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -284,7 +284,9 @@ void dst_release(struct dst_entry *dst) int newrefcnt; newrefcnt = atomic_dec_return(&dst->__refcnt); - WARN_ON(newrefcnt < 0); + if (unlikely(newrefcnt < 0)) + net_warn_ratelimited("%s: dst:%p refcnt:%d\n", + __func__, dst, newrefcnt); if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) call_rcu(&dst->rcu_head, dst_destroy_rcu); } -- cgit v1.2.3