From bafa6d9d89072c1a18853afe9ee5de05c491c13a Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Fri, 7 Sep 2012 00:45:29 +0000
Subject: ipv4/route: arg delay is useless in rt_cache_flush()

Since route cache deletion (89aef8921bfbac22f), delay is no
more used. Remove it.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/arp.c          |  2 +-
 net/ipv4/devinet.c      |  6 +++---
 net/ipv4/fib_frontend.c | 20 ++++++++++----------
 net/ipv4/fib_rules.c    |  2 +-
 net/ipv4/fib_trie.c     |  6 +++---
 net/ipv4/route.c        | 19 +++----------------
 6 files changed, 21 insertions(+), 34 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 77e87aff419a..47800459e4cb 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1225,7 +1225,7 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event,
 	switch (event) {
 	case NETDEV_CHANGEADDR:
 		neigh_changeaddr(&arp_tbl, dev);
-		rt_cache_flush(dev_net(dev), 0);
+		rt_cache_flush(dev_net(dev));
 		break;
 	default:
 		break;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 44bf82e3aef7..9b55b6f5a585 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1503,7 +1503,7 @@ static int devinet_conf_proc(ctl_table *ctl, int write,
 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
 			if ((new_value == 0) && (old_value != 0))
-				rt_cache_flush(net, 0);
+				rt_cache_flush(net);
 	}
 
 	return ret;
@@ -1537,7 +1537,7 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write,
 				dev_disable_lro(idev->dev);
 			}
 			rtnl_unlock();
-			rt_cache_flush(net, 0);
+			rt_cache_flush(net);
 		}
 	}
 
@@ -1554,7 +1554,7 @@ static int ipv4_doint_and_flush(ctl_table *ctl, int write,
 	struct net *net = ctl->extra2;
 
 	if (write && *valp != val)
-		rt_cache_flush(net, 0);
+		rt_cache_flush(net);
 
 	return ret;
 }
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index c43ae3fba792..8e2b475da9fa 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -148,7 +148,7 @@ static void fib_flush(struct net *net)
 	}
 
 	if (flushed)
-		rt_cache_flush(net, -1);
+		rt_cache_flush(net);
 }
 
 /*
@@ -999,11 +999,11 @@ static void nl_fib_lookup_exit(struct net *net)
 	net->ipv4.fibnl = NULL;
 }
 
-static void fib_disable_ip(struct net_device *dev, int force, int delay)
+static void fib_disable_ip(struct net_device *dev, int force)
 {
 	if (fib_sync_down_dev(dev, force))
 		fib_flush(dev_net(dev));
-	rt_cache_flush(dev_net(dev), delay);
+	rt_cache_flush(dev_net(dev));
 	arp_ifdown(dev);
 }
 
@@ -1020,7 +1020,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
 		fib_sync_up(dev);
 #endif
 		atomic_inc(&net->ipv4.dev_addr_genid);
-		rt_cache_flush(dev_net(dev), -1);
+		rt_cache_flush(dev_net(dev));
 		break;
 	case NETDEV_DOWN:
 		fib_del_ifaddr(ifa, NULL);
@@ -1029,9 +1029,9 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
 			/* Last address was deleted from this interface.
 			 * Disable IP.
 			 */
-			fib_disable_ip(dev, 1, 0);
+			fib_disable_ip(dev, 1);
 		} else {
-			rt_cache_flush(dev_net(dev), -1);
+			rt_cache_flush(dev_net(dev));
 		}
 		break;
 	}
@@ -1045,7 +1045,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
 	struct net *net = dev_net(dev);
 
 	if (event == NETDEV_UNREGISTER) {
-		fib_disable_ip(dev, 2, -1);
+		fib_disable_ip(dev, 2);
 		rt_flush_dev(dev);
 		return NOTIFY_DONE;
 	}
@@ -1062,14 +1062,14 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
 		fib_sync_up(dev);
 #endif
 		atomic_inc(&net->ipv4.dev_addr_genid);
-		rt_cache_flush(dev_net(dev), -1);
+		rt_cache_flush(dev_net(dev));
 		break;
 	case NETDEV_DOWN:
-		fib_disable_ip(dev, 0, 0);
+		fib_disable_ip(dev, 0);
 		break;
 	case NETDEV_CHANGEMTU:
 	case NETDEV_CHANGE:
-		rt_cache_flush(dev_net(dev), 0);
+		rt_cache_flush(dev_net(dev));
 		break;
 	case NETDEV_UNREGISTER_BATCH:
 		break;
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index a83d74e498d2..274309d3aded 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -259,7 +259,7 @@ static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule)
 
 static void fib4_rule_flush_cache(struct fib_rules_ops *ops)
 {
-	rt_cache_flush(ops->fro_net, -1);
+	rt_cache_flush(ops->fro_net);
 }
 
 static const struct fib_rules_ops __net_initdata fib4_rules_ops_template = {
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 57bd978483e1..d1b93595b4a7 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1286,7 +1286,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
 
 			fib_release_info(fi_drop);
 			if (state & FA_S_ACCESSED)
-				rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
+				rt_cache_flush(cfg->fc_nlinfo.nl_net);
 			rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
 				tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE);
 
@@ -1333,7 +1333,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
 	list_add_tail_rcu(&new_fa->fa_list,
 			  (fa ? &fa->fa_list : fa_head));
 
-	rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
+	rt_cache_flush(cfg->fc_nlinfo.nl_net);
 	rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
 		  &cfg->fc_nlinfo, 0);
 succeeded:
@@ -1708,7 +1708,7 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
 		trie_leaf_remove(t, l);
 
 	if (fa->fa_state & FA_S_ACCESSED)
-		rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
+		rt_cache_flush(cfg->fc_nlinfo.nl_net);
 
 	fib_release_info(fa->fa_info);
 	alias_free_mem_rcu(fa);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 82cf2a722b23..f6436d3b207a 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -461,11 +461,7 @@ static void rt_cache_invalidate(struct net *net)
 	atomic_add(shuffle + 1U, &net->ipv4.rt_genid);
 }
 
-/*
- * delay < 0  : invalidate cache (fast : entries will be deleted later)
- * delay >= 0 : invalidate & flush cache (can be long)
- */
-void rt_cache_flush(struct net *net, int delay)
+void rt_cache_flush(struct net *net)
 {
 	rt_cache_invalidate(net);
 }
@@ -2345,7 +2341,7 @@ int ip_rt_dump(struct sk_buff *skb,  struct netlink_callback *cb)
 
 void ip_rt_multicast_event(struct in_device *in_dev)
 {
-	rt_cache_flush(dev_net(in_dev->dev), 0);
+	rt_cache_flush(dev_net(in_dev->dev));
 }
 
 #ifdef CONFIG_SYSCTL
@@ -2354,16 +2350,7 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
 					size_t *lenp, loff_t *ppos)
 {
 	if (write) {
-		int flush_delay;
-		ctl_table ctl;
-		struct net *net;
-
-		memcpy(&ctl, __ctl, sizeof(ctl));
-		ctl.data = &flush_delay;
-		proc_dointvec(&ctl, write, buffer, lenp, ppos);
-
-		net = (struct net *)__ctl->extra1;
-		rt_cache_flush(net, flush_delay);
+		rt_cache_flush((struct net *)__ctl->extra1);
 		return 0;
 	}
 
-- 
cgit v1.2.3


From 2885da72966fcb89f48d554339d347fb02b5ea78 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 7 Sep 2012 22:27:11 +0200
Subject: net: rt_cache_flush() cleanup

We dont use jhash anymore since route cache removal,
so we can get rid of get_random_bytes() calls for rt_genid
changes.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 19 ++-----------------
 1 file changed, 2 insertions(+), 17 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f6436d3b207a..be27cfa96e88 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -447,23 +447,9 @@ static inline bool rt_is_expired(const struct rtable *rth)
 	return rth->rt_genid != rt_genid(dev_net(rth->dst.dev));
 }
 
-/*
- * Perturbation of rt_genid by a small quantity [1..256]
- * Using 8 bits of shuffling ensure we can call rt_cache_invalidate()
- * many times (2^24) without giving recent rt_genid.
- * Jenkins hash is strong enough that litle changes of rt_genid are OK.
- */
-static void rt_cache_invalidate(struct net *net)
-{
-	unsigned char shuffle;
-
-	get_random_bytes(&shuffle, sizeof(shuffle));
-	atomic_add(shuffle + 1U, &net->ipv4.rt_genid);
-}
-
 void rt_cache_flush(struct net *net)
 {
-	rt_cache_invalidate(net);
+	atomic_inc(&net->ipv4.rt_genid);
 }
 
 static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
@@ -2520,8 +2506,7 @@ static __net_initdata struct pernet_operations sysctl_route_ops = {
 
 static __net_init int rt_genid_init(struct net *net)
 {
-	get_random_bytes(&net->ipv4.rt_genid,
-			 sizeof(net->ipv4.rt_genid));
+	atomic_set(&net->ipv4.rt_genid, 0);
 	get_random_bytes(&net->ipv4.dev_addr_genid,
 			 sizeof(net->ipv4.dev_addr_genid));
 	return 0;
-- 
cgit v1.2.3


From b42664f898c976247f7f609b8bb9c94d7475ca10 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Mon, 10 Sep 2012 22:09:44 +0000
Subject: netns: move net->ipv4.rt_genid to net->rt_genid

This commit prepares the use of rt_genid by both IPv4 and IPv6.
Initialization is left in IPv4 part.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index be27cfa96e88..fd9af60397b5 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -202,11 +202,6 @@ EXPORT_SYMBOL(ip_tos2prio);
 static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
 #define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field)
 
-static inline int rt_genid(struct net *net)
-{
-	return atomic_read(&net->ipv4.rt_genid);
-}
-
 #ifdef CONFIG_PROC_FS
 static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
 {
@@ -449,7 +444,7 @@ static inline bool rt_is_expired(const struct rtable *rth)
 
 void rt_cache_flush(struct net *net)
 {
-	atomic_inc(&net->ipv4.rt_genid);
+	rt_genid_bump(net);
 }
 
 static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
@@ -2506,7 +2501,7 @@ static __net_initdata struct pernet_operations sysctl_route_ops = {
 
 static __net_init int rt_genid_init(struct net *net)
 {
-	atomic_set(&net->ipv4.rt_genid, 0);
+	atomic_set(&net->rt_genid, 0);
 	get_random_bytes(&net->ipv4.dev_addr_genid,
 			 sizeof(net->ipv4.dev_addr_genid));
 	return 0;
-- 
cgit v1.2.3


From 1d57f19539c074105791da6384a8ad674bba8037 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 17 Sep 2012 12:51:39 +0000
Subject: tcp: fix regression in urgent data handling

Stephan Springl found that commit 1402d366019fed "tcp: introduce
tcp_try_coalesce" introduced a regression for rlogin

It turns out problem comes from TCP urgent data handling and
a change in behavior in input path.

rlogin sends two one-byte packets with URG ptr set, and when next data
frame is coalesced, we lack sk_data_ready() calls to wakeup consumer.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Stephan Springl <springl-k@lar.bfw.de>
Cc: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 6e38c6c23caa..d377f4854cb8 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4661,7 +4661,7 @@ queue_and_out:
 
 		if (eaten > 0)
 			kfree_skb_partial(skb, fragstolen);
-		else if (!sock_flag(sk, SOCK_DEAD))
+		if (!sock_flag(sk, SOCK_DEAD))
 			sk->sk_data_ready(sk, 0);
 		return;
 	}
@@ -5556,8 +5556,7 @@ no_ack:
 #endif
 			if (eaten)
 				kfree_skb_partial(skb, fragstolen);
-			else
-				sk->sk_data_ready(sk, 0);
+			sk->sk_data_ready(sk, 0);
 			return 0;
 		}
 	}
-- 
cgit v1.2.3


From 15c041759bfcd9ab0a4e43f1c16e2644977d0467 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michal=20Kube=C4=8Dek?= <mkubecek@suse.cz>
Date: Fri, 14 Sep 2012 04:59:52 +0000
Subject: tcp: flush DMA queue before sk_wait_data if rcv_wnd is zero

If recv() syscall is called for a TCP socket so that
  - IOAT DMA is used
  - MSG_WAITALL flag is used
  - requested length is bigger than sk_rcvbuf
  - enough data has already arrived to bring rcv_wnd to zero
then when tcp_recvmsg() gets to calling sk_wait_data(), receive
window can be still zero while sk_async_wait_queue exhausts
enough space to keep it zero. As this queue isn't cleaned until
the tcp_service_net_dma() call, sk_wait_data() cannot receive
any data and blocks forever.

If zero receive window and non-empty sk_async_wait_queue is
detected before calling sk_wait_data(), process the queue first.

Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 2109ff4a1daf..bf9a8ab29459 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1762,8 +1762,14 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		}
 
 #ifdef CONFIG_NET_DMA
-		if (tp->ucopy.dma_chan)
-			dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
+		if (tp->ucopy.dma_chan) {
+			if (tp->rcv_wnd == 0 &&
+			    !skb_queue_empty(&sk->sk_async_wait_queue)) {
+				tcp_service_net_dma(sk, true);
+				tcp_cleanup_rbuf(sk, copied);
+			} else
+				dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
+		}
 #endif
 		if (copied >= target) {
 			/* Do not sleep, just process backlog. */
-- 
cgit v1.2.3


From bc26ccd8fc756749de95606d28314efd0ce5aec3 Mon Sep 17 00:00:00 2001
From: Andrey Vagin <avagin@openvz.org>
Date: Wed, 19 Sep 2012 09:40:00 +0000
Subject: tcp: restore rcv_wscale in a repair mode (v2)

rcv_wscale is a symetric parameter with snd_wscale.

Both this parameters are set on a connection handshake.

Without this value a remote window size can not be interpreted correctly,
because a value from a packet should be shifted on rcv_wscale.

And one more thing is that wscale_ok should be set too.

This patch doesn't break a backward compatibility.
If someone uses it in a old scheme, a rcv window
will be restored with the same bug (rcv_wscale = 0).

v2: Save backward compatibility on big-endian system. Before
    the first two bytes were snd_wscale and the second two bytes were
    rcv_wscale. Now snd_wscale is opt_val & 0xFFFF and rcv_wscale >> 16.
    This approach is independent on byte ordering.

Cc: David S. Miller <davem@davemloft.net>
Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Cc: James Morris <jmorris@namei.org>
Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Cc: Patrick McHardy <kaber@trash.net>
CC: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Andrew Vagin <avagin@openvz.org>
Acked-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index bf9a8ab29459..5f6419341821 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2331,10 +2331,17 @@ static int tcp_repair_options_est(struct tcp_sock *tp,
 			tp->rx_opt.mss_clamp = opt.opt_val;
 			break;
 		case TCPOPT_WINDOW:
-			if (opt.opt_val > 14)
-				return -EFBIG;
+			{
+				u16 snd_wscale = opt.opt_val & 0xFFFF;
+				u16 rcv_wscale = opt.opt_val >> 16;
+
+				if (snd_wscale > 14 || rcv_wscale > 14)
+					return -EFBIG;
 
-			tp->rx_opt.snd_wscale = opt.opt_val;
+				tp->rx_opt.snd_wscale = snd_wscale;
+				tp->rx_opt.rcv_wscale = rcv_wscale;
+				tp->rx_opt.wscale_ok = 1;
+			}
 			break;
 		case TCPOPT_SACK_PERM:
 			if (opt.opt_val != 0)
-- 
cgit v1.2.3


From bf5b30b8a4416de04f1ac1196281ddb318669464 Mon Sep 17 00:00:00 2001
From: Zhao Hongjiang <zhaohongjiang@huawei.com>
Date: Thu, 20 Sep 2012 22:37:25 +0000
Subject: net: change return values from -EACCES to -EPERM

Change return value from -EACCES to -EPERM when the permission check fails.

Signed-off-by: Zhao Hongjiang <zhaohongjiang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/devinet.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 9b55b6f5a585..e12fad773852 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -725,7 +725,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 		break;
 
 	case SIOCSIFFLAGS:
-		ret = -EACCES;
+		ret = -EPERM;
 		if (!capable(CAP_NET_ADMIN))
 			goto out;
 		break;
@@ -733,7 +733,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
 	case SIOCSIFDSTADDR:	/* Set the destination address */
 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
-		ret = -EACCES;
+		ret = -EPERM;
 		if (!capable(CAP_NET_ADMIN))
 			goto out;
 		ret = -EINVAL;
-- 
cgit v1.2.3


From ab43ed8b7490cb387782423ecf74aeee7237e591 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 22 Sep 2012 00:08:29 +0000
Subject: ipv4: raw: fix icmp_filter()

icmp_filter() should not modify its input, or else its caller
would need to recompute ip_hdr() if skb->head is reallocated.

Use skb_header_pointer() instead of pskb_may_pull() and
change the prototype to make clear both sk and skb are const.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/raw.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index ff0f071969ea..d23c6571ba1c 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -131,18 +131,20 @@ found:
  *	0 - deliver
  *	1 - block
  */
-static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb)
+static int icmp_filter(const struct sock *sk, const struct sk_buff *skb)
 {
-	int type;
+	struct icmphdr _hdr;
+	const struct icmphdr *hdr;
 
-	if (!pskb_may_pull(skb, sizeof(struct icmphdr)))
+	hdr = skb_header_pointer(skb, skb_transport_offset(skb),
+				 sizeof(_hdr), &_hdr);
+	if (!hdr)
 		return 1;
 
-	type = icmp_hdr(skb)->type;
-	if (type < 32) {
+	if (hdr->type < 32) {
 		__u32 data = raw_sk(sk)->filter.data;
 
-		return ((1 << type) & data) != 0;
+		return ((1U << hdr->type) & data) != 0;
 	}
 
 	/* Do not block unknown ICMP types */
-- 
cgit v1.2.3


From bc9259a8bae9e814fc1f775a1b3effa13e6ad330 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Thu, 27 Sep 2012 04:11:00 +0000
Subject: inetpeer: fix token initialization

When jiffies wraps around (for example, 5 minutes after the boot, see
INITIAL_JIFFIES) and peer has just been created, now - peer->rate_last can be
< XRLIM_BURST_FACTOR * timeout, so token is not set to the maximum value, thus
some icmp packets can be unexpectedly dropped.

Fix this case by initializing last_rate to 60 seconds in the past.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inetpeer.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index e1e0a4e8fd34..c7527f6b9ad9 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -510,7 +510,10 @@ relookup:
 					secure_ipv6_id(daddr->addr.a6));
 		p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
 		p->rate_tokens = 0;
-		p->rate_last = 0;
+		/* 60*HZ is arbitrary, but chosen enough high so that the first
+		 * calculation of tokens is at its maximum.
+		 */
+		p->rate_last = jiffies - 60*HZ;
 		INIT_LIST_HEAD(&p->gc_list);
 
 		/* Link the node. */
-- 
cgit v1.2.3